1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; Helpers for quad-word reduction operations
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1285 ;; Reduction operations
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1319 rtx vec = gen_reg_rtx (V2DImode);
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1503 ;; Saturating arithmetic
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1574 switch (GET_CODE (operands[3]))
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1592 switch (GET_CODE (operands[3]))
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1624 switch (GET_CODE (operands[3]))
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1643 switch (GET_CODE (operands[3]))
1646 base_comparison = gen_neon_vclt<mode>;
1649 base_comparison = gen_neon_vcle<mode>;
1652 /* Do nothing, other zero form cases already have the correct
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1673 Our transformations are:
1678 a NE b -> !(a EQ b) */
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1685 swap_bsl_operands = 1;
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1731 int inverse = 0, immediate_zero = 0;
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1740 switch (GET_CODE (operands[3]))
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1787 ;; Patterns for builtins.
1789 ; good for plain vadd, vaddq.
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2017 ;; vqrdmlah, vqrdmlsh
2018 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2019 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2020 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2021 (match_operand:VMDQI 2 "s_register_operand" "w")
2022 (match_operand:VMDQI 3 "s_register_operand" "w")]
2025 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2026 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2029 (define_insn "neon_vqdmlal<mode>"
2030 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2031 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2032 (match_operand:VMDI 2 "s_register_operand" "w")
2033 (match_operand:VMDI 3 "s_register_operand" "w")]
2036 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2037 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2040 (define_insn "neon_vqdmlsl<mode>"
2041 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2042 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2043 (match_operand:VMDI 2 "s_register_operand" "w")
2044 (match_operand:VMDI 3 "s_register_operand" "w")]
2047 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2048 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2051 (define_insn "neon_vmull<sup><mode>"
2052 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2053 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2054 (match_operand:VW 2 "s_register_operand" "w")]
2057 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2058 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2061 (define_insn "neon_vqdmull<mode>"
2062 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2063 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2064 (match_operand:VMDI 2 "s_register_operand" "w")]
2067 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2068 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2071 (define_expand "neon_vsub<mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2073 (match_operand:VCVTF 1 "s_register_operand" "w")
2074 (match_operand:VCVTF 2 "s_register_operand" "w")]
2077 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2078 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2080 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2085 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2087 (define_insn "neon_vsub<mode>_unspec"
2088 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2089 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2090 (match_operand:VCVTF 2 "s_register_operand" "w")]
2093 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2095 (if_then_else (match_test "<Is_float_mode>")
2096 (const_string "neon_fp_addsub_s<q>")
2097 (const_string "neon_sub<q>")))]
2100 (define_insn "neon_vsubl<sup><mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2103 (match_operand:VDI 2 "s_register_operand" "w")]
2106 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2107 [(set_attr "type" "neon_sub_long")]
2110 (define_insn "neon_vsubw<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2116 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2117 [(set_attr "type" "neon_sub_widen")]
2120 (define_insn "neon_vqsub<sup><mode>"
2121 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2122 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2123 (match_operand:VDQIX 2 "s_register_operand" "w")]
2126 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2127 [(set_attr "type" "neon_qsub<q>")]
2130 (define_insn "neon_vhsub<sup><mode>"
2131 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2132 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2133 (match_operand:VDQIW 2 "s_register_operand" "w")]
2136 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2137 [(set_attr "type" "neon_sub_halve<q>")]
2140 (define_insn "neon_v<r>subhn<mode>"
2141 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2142 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2143 (match_operand:VN 2 "s_register_operand" "w")]
2146 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2147 [(set_attr "type" "neon_sub_halve_narrow_q")]
2150 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2151 ;; without unsafe math optimizations.
2152 (define_expand "neon_vc<cmp_op><mode>"
2153 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2155 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2156 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2159 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2161 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2162 && !flag_unsafe_math_optimizations)
2164 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2165 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2166 whereas this expander iterates over the integer modes as well,
2167 but we will never expand to UNSPECs for the integer comparisons. */
2171 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2176 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2185 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2192 (define_insn "neon_vc<cmp_op><mode>_insn"
2193 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2195 (COMPARISONS:<V_cmp_result>
2196 (match_operand:VDQW 1 "s_register_operand" "w,w")
2197 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2198 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2199 && !flag_unsafe_math_optimizations)"
2202 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2204 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2205 ? "f" : "<cmp_type>",
2206 which_alternative == 0
2207 ? "%<V_reg>2" : "#0");
2208 output_asm_insn (pattern, operands);
2212 (if_then_else (match_operand 2 "zero_operand")
2213 (const_string "neon_compare_zero<q>")
2214 (const_string "neon_compare<q>")))]
2217 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2218 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2219 (unspec:<V_cmp_result>
2220 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2221 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2226 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2228 which_alternative == 0
2229 ? "%<V_reg>2" : "#0");
2230 output_asm_insn (pattern, operands);
2233 [(set_attr "type" "neon_fp_compare_s<q>")]
2236 (define_insn "neon_vc<cmp_op>u<mode>"
2237 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2239 (GTUGEU:<V_cmp_result>
2240 (match_operand:VDQIW 1 "s_register_operand" "w")
2241 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2243 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2244 [(set_attr "type" "neon_compare<q>")]
2247 (define_expand "neon_vca<cmp_op><mode>"
2248 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2250 (GTGE:<V_cmp_result>
2251 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2252 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2255 if (flag_unsafe_math_optimizations)
2256 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2259 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2266 (define_insn "neon_vca<cmp_op><mode>_insn"
2267 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2269 (GTGE:<V_cmp_result>
2270 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2271 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2272 "TARGET_NEON && flag_unsafe_math_optimizations"
2273 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_fp_compare_s<q>")]
2277 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2278 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2279 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2280 (match_operand:VCVTF 2 "s_register_operand" "w")]
2283 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_fp_compare_s<q>")]
2287 (define_insn "neon_vtst<mode>"
2288 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2289 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2290 (match_operand:VDQIW 2 "s_register_operand" "w")]
2293 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2294 [(set_attr "type" "neon_tst<q>")]
2297 (define_insn "neon_vabd<sup><mode>"
2298 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2299 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2300 (match_operand:VDQIW 2 "s_register_operand" "w")]
2303 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2304 [(set_attr "type" "neon_abd<q>")]
2307 (define_insn "neon_vabdf<mode>"
2308 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2309 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2310 (match_operand:VCVTF 2 "s_register_operand" "w")]
2313 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2314 [(set_attr "type" "neon_fp_abd_s<q>")]
2317 (define_insn "neon_vabdl<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2320 (match_operand:VW 2 "s_register_operand" "w")]
2323 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2324 [(set_attr "type" "neon_abd_long")]
2327 (define_insn "neon_vaba<sup><mode>"
2328 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2329 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2330 (match_operand:VDQIW 3 "s_register_operand" "w")]
2332 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2334 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2335 [(set_attr "type" "neon_arith_acc<q>")]
2338 (define_insn "neon_vabal<sup><mode>"
2339 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2340 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2341 (match_operand:VW 3 "s_register_operand" "w")]
2343 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2345 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2346 [(set_attr "type" "neon_arith_acc<q>")]
2349 (define_insn "neon_v<maxmin><sup><mode>"
2350 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2351 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2352 (match_operand:VDQIW 2 "s_register_operand" "w")]
2355 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_minmax<q>")]
2359 (define_insn "neon_v<maxmin>f<mode>"
2360 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2361 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2362 (match_operand:VCVTF 2 "s_register_operand" "w")]
2365 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2366 [(set_attr "type" "neon_fp_minmax_s<q>")]
2369 (define_expand "neon_vpadd<mode>"
2370 [(match_operand:VD 0 "s_register_operand" "=w")
2371 (match_operand:VD 1 "s_register_operand" "w")
2372 (match_operand:VD 2 "s_register_operand" "w")]
2375 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2380 (define_insn "neon_vpaddl<sup><mode>"
2381 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2382 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2385 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2386 [(set_attr "type" "neon_reduc_add_long")]
2389 (define_insn "neon_vpadal<sup><mode>"
2390 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2391 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2392 (match_operand:VDQIW 2 "s_register_operand" "w")]
2395 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2396 [(set_attr "type" "neon_reduc_add_acc")]
2399 (define_insn "neon_vp<maxmin><sup><mode>"
2400 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2401 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2402 (match_operand:VDI 2 "s_register_operand" "w")]
2405 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2406 [(set_attr "type" "neon_reduc_minmax<q>")]
2409 (define_insn "neon_vp<maxmin>f<mode>"
2410 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2411 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2412 (match_operand:VCVTF 2 "s_register_operand" "w")]
2415 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2416 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2419 (define_insn "neon_vrecps<mode>"
2420 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2421 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2422 (match_operand:VCVTF 2 "s_register_operand" "w")]
2425 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2426 [(set_attr "type" "neon_fp_recps_s<q>")]
2429 (define_insn "neon_vrsqrts<mode>"
2430 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2431 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2432 (match_operand:VCVTF 2 "s_register_operand" "w")]
2435 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2436 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2439 (define_expand "neon_vabs<mode>"
2440 [(match_operand:VDQW 0 "s_register_operand" "")
2441 (match_operand:VDQW 1 "s_register_operand" "")]
2444 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2448 (define_insn "neon_vqabs<mode>"
2449 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2450 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2453 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2454 [(set_attr "type" "neon_qabs<q>")]
2457 (define_insn "neon_bswap<mode>"
2458 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2459 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2461 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2462 [(set_attr "type" "neon_rev<q>")]
2465 (define_expand "neon_vneg<mode>"
2466 [(match_operand:VDQW 0 "s_register_operand" "")
2467 (match_operand:VDQW 1 "s_register_operand" "")]
2470 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2474 (define_expand "neon_copysignf<mode>"
2475 [(match_operand:VCVTF 0 "register_operand")
2476 (match_operand:VCVTF 1 "register_operand")
2477 (match_operand:VCVTF 2 "register_operand")]
2481 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2482 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2483 rtvec v = rtvec_alloc (n_elt);
2485 /* Create bitmask for vector select. */
2486 for (i = 0; i < n_elt; ++i)
2487 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2489 emit_move_insn (v_bitmask,
2490 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2491 emit_move_insn (operands[0], operands[2]);
2492 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2493 <VCVTF:V_cmp_result>mode, 0);
2494 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2501 (define_insn "neon_vqneg<mode>"
2502 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2503 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2506 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2507 [(set_attr "type" "neon_qneg<q>")]
2510 (define_insn "neon_vcls<mode>"
2511 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2512 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2515 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2516 [(set_attr "type" "neon_cls<q>")]
2519 (define_insn "clz<mode>2"
2520 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2521 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2523 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2524 [(set_attr "type" "neon_cnt<q>")]
2527 (define_expand "neon_vclz<mode>"
2528 [(match_operand:VDQIW 0 "s_register_operand" "")
2529 (match_operand:VDQIW 1 "s_register_operand" "")]
2532 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2536 (define_insn "popcount<mode>2"
2537 [(set (match_operand:VE 0 "s_register_operand" "=w")
2538 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2540 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2541 [(set_attr "type" "neon_cnt<q>")]
2544 (define_expand "neon_vcnt<mode>"
2545 [(match_operand:VE 0 "s_register_operand" "=w")
2546 (match_operand:VE 1 "s_register_operand" "w")]
2549 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2553 (define_insn "neon_vrecpe<mode>"
2554 [(set (match_operand:V32 0 "s_register_operand" "=w")
2555 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2558 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2559 [(set_attr "type" "neon_fp_recpe_s<q>")]
2562 (define_insn "neon_vrsqrte<mode>"
2563 [(set (match_operand:V32 0 "s_register_operand" "=w")
2564 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2567 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2568 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2571 (define_expand "neon_vmvn<mode>"
2572 [(match_operand:VDQIW 0 "s_register_operand" "")
2573 (match_operand:VDQIW 1 "s_register_operand" "")]
2576 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2580 (define_insn "neon_vget_lane<mode>_sext_internal"
2581 [(set (match_operand:SI 0 "s_register_operand" "=r")
2583 (vec_select:<V_elem>
2584 (match_operand:VD 1 "s_register_operand" "w")
2585 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2588 if (BYTES_BIG_ENDIAN)
2590 int elt = INTVAL (operands[2]);
2591 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2592 operands[2] = GEN_INT (elt);
2594 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2596 [(set_attr "type" "neon_to_gp")]
2599 (define_insn "neon_vget_lane<mode>_zext_internal"
2600 [(set (match_operand:SI 0 "s_register_operand" "=r")
2602 (vec_select:<V_elem>
2603 (match_operand:VD 1 "s_register_operand" "w")
2604 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2607 if (BYTES_BIG_ENDIAN)
2609 int elt = INTVAL (operands[2]);
2610 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2611 operands[2] = GEN_INT (elt);
2613 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2615 [(set_attr "type" "neon_to_gp")]
2618 (define_insn "neon_vget_lane<mode>_sext_internal"
2619 [(set (match_operand:SI 0 "s_register_operand" "=r")
2621 (vec_select:<V_elem>
2622 (match_operand:VQ2 1 "s_register_operand" "w")
2623 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2627 int regno = REGNO (operands[1]);
2628 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2629 unsigned int elt = INTVAL (operands[2]);
2630 unsigned int elt_adj = elt % halfelts;
2632 if (BYTES_BIG_ENDIAN)
2633 elt_adj = halfelts - 1 - elt_adj;
2635 ops[0] = operands[0];
2636 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2637 ops[2] = GEN_INT (elt_adj);
2638 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2642 [(set_attr "type" "neon_to_gp_q")]
2645 (define_insn "neon_vget_lane<mode>_zext_internal"
2646 [(set (match_operand:SI 0 "s_register_operand" "=r")
2648 (vec_select:<V_elem>
2649 (match_operand:VQ2 1 "s_register_operand" "w")
2650 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2654 int regno = REGNO (operands[1]);
2655 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2656 unsigned int elt = INTVAL (operands[2]);
2657 unsigned int elt_adj = elt % halfelts;
2659 if (BYTES_BIG_ENDIAN)
2660 elt_adj = halfelts - 1 - elt_adj;
2662 ops[0] = operands[0];
2663 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2664 ops[2] = GEN_INT (elt_adj);
2665 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2669 [(set_attr "type" "neon_to_gp_q")]
2672 (define_expand "neon_vget_lane<mode>"
2673 [(match_operand:<V_ext> 0 "s_register_operand" "")
2674 (match_operand:VDQW 1 "s_register_operand" "")
2675 (match_operand:SI 2 "immediate_operand" "")]
2678 if (BYTES_BIG_ENDIAN)
2680 /* The intrinsics are defined in terms of a model where the
2681 element ordering in memory is vldm order, whereas the generic
2682 RTL is defined in terms of a model where the element ordering
2683 in memory is array order. Convert the lane number to conform
2685 unsigned int elt = INTVAL (operands[2]);
2686 unsigned int reg_nelts
2687 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2688 elt ^= reg_nelts - 1;
2689 operands[2] = GEN_INT (elt);
2692 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2693 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2695 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2701 (define_expand "neon_vget_laneu<mode>"
2702 [(match_operand:<V_ext> 0 "s_register_operand" "")
2703 (match_operand:VDQIW 1 "s_register_operand" "")
2704 (match_operand:SI 2 "immediate_operand" "")]
2707 if (BYTES_BIG_ENDIAN)
2709 /* The intrinsics are defined in terms of a model where the
2710 element ordering in memory is vldm order, whereas the generic
2711 RTL is defined in terms of a model where the element ordering
2712 in memory is array order. Convert the lane number to conform
2714 unsigned int elt = INTVAL (operands[2]);
2715 unsigned int reg_nelts
2716 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2717 elt ^= reg_nelts - 1;
2718 operands[2] = GEN_INT (elt);
2721 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2722 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2724 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2730 (define_expand "neon_vget_lanedi"
2731 [(match_operand:DI 0 "s_register_operand" "=r")
2732 (match_operand:DI 1 "s_register_operand" "w")
2733 (match_operand:SI 2 "immediate_operand" "")]
2736 emit_move_insn (operands[0], operands[1]);
2740 (define_expand "neon_vget_lanev2di"
2741 [(match_operand:DI 0 "s_register_operand" "")
2742 (match_operand:V2DI 1 "s_register_operand" "")
2743 (match_operand:SI 2 "immediate_operand" "")]
2748 if (BYTES_BIG_ENDIAN)
2750 /* The intrinsics are defined in terms of a model where the
2751 element ordering in memory is vldm order, whereas the generic
2752 RTL is defined in terms of a model where the element ordering
2753 in memory is array order. Convert the lane number to conform
2755 unsigned int elt = INTVAL (operands[2]);
2756 unsigned int reg_nelts = 2;
2757 elt ^= reg_nelts - 1;
2758 operands[2] = GEN_INT (elt);
2761 lane = INTVAL (operands[2]);
2762 gcc_assert ((lane ==0) || (lane == 1));
2763 emit_move_insn (operands[0], lane == 0
2764 ? gen_lowpart (DImode, operands[1])
2765 : gen_highpart (DImode, operands[1]));
2769 (define_expand "neon_vset_lane<mode>"
2770 [(match_operand:VDQ 0 "s_register_operand" "=w")
2771 (match_operand:<V_elem> 1 "s_register_operand" "r")
2772 (match_operand:VDQ 2 "s_register_operand" "0")
2773 (match_operand:SI 3 "immediate_operand" "i")]
2776 unsigned int elt = INTVAL (operands[3]);
2778 if (BYTES_BIG_ENDIAN)
2780 unsigned int reg_nelts
2781 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2782 elt ^= reg_nelts - 1;
2785 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2786 GEN_INT (1 << elt), operands[2]));
2790 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2792 (define_expand "neon_vset_lanedi"
2793 [(match_operand:DI 0 "s_register_operand" "=w")
2794 (match_operand:DI 1 "s_register_operand" "r")
2795 (match_operand:DI 2 "s_register_operand" "0")
2796 (match_operand:SI 3 "immediate_operand" "i")]
2799 emit_move_insn (operands[0], operands[1]);
2803 (define_expand "neon_vcreate<mode>"
2804 [(match_operand:VD_RE 0 "s_register_operand" "")
2805 (match_operand:DI 1 "general_operand" "")]
2808 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2809 emit_move_insn (operands[0], src);
2813 (define_insn "neon_vdup_n<mode>"
2814 [(set (match_operand:VX 0 "s_register_operand" "=w")
2815 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2817 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2818 [(set_attr "type" "neon_from_gp<q>")]
2821 (define_insn "neon_vdup_n<mode>"
2822 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2823 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2826 vdup.<V_sz_elem>\t%<V_reg>0, %1
2827 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2828 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2831 (define_expand "neon_vdup_ndi"
2832 [(match_operand:DI 0 "s_register_operand" "=w")
2833 (match_operand:DI 1 "s_register_operand" "r")]
2836 emit_move_insn (operands[0], operands[1]);
2841 (define_insn "neon_vdup_nv2di"
2842 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2843 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2846 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2847 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2848 [(set_attr "length" "8")
2849 (set_attr "type" "multiple")]
2852 (define_insn "neon_vdup_lane<mode>_internal"
2853 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2855 (vec_select:<V_elem>
2856 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2857 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2860 if (BYTES_BIG_ENDIAN)
2862 int elt = INTVAL (operands[2]);
2863 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2864 operands[2] = GEN_INT (elt);
2867 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2869 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2871 [(set_attr "type" "neon_dup<q>")]
2874 (define_expand "neon_vdup_lane<mode>"
2875 [(match_operand:VDQW 0 "s_register_operand" "=w")
2876 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2877 (match_operand:SI 2 "immediate_operand" "i")]
2880 if (BYTES_BIG_ENDIAN)
2882 unsigned int elt = INTVAL (operands[2]);
2883 unsigned int reg_nelts
2884 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2885 elt ^= reg_nelts - 1;
2886 operands[2] = GEN_INT (elt);
2888 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2893 ; Scalar index is ignored, since only zero is valid here.
2894 (define_expand "neon_vdup_lanedi"
2895 [(match_operand:DI 0 "s_register_operand" "=w")
2896 (match_operand:DI 1 "s_register_operand" "w")
2897 (match_operand:SI 2 "immediate_operand" "i")]
2900 emit_move_insn (operands[0], operands[1]);
2904 ; Likewise for v2di, as the DImode second operand has only a single element.
2905 (define_expand "neon_vdup_lanev2di"
2906 [(match_operand:V2DI 0 "s_register_operand" "=w")
2907 (match_operand:DI 1 "s_register_operand" "w")
2908 (match_operand:SI 2 "immediate_operand" "i")]
2911 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2915 ; Disabled before reload because we don't want combine doing something silly,
2916 ; but used by the post-reload expansion of neon_vcombine.
2917 (define_insn "*neon_vswp<mode>"
2918 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2919 (match_operand:VDQX 1 "s_register_operand" "+w"))
2920 (set (match_dup 1) (match_dup 0))]
2921 "TARGET_NEON && reload_completed"
2922 "vswp\t%<V_reg>0, %<V_reg>1"
2923 [(set_attr "type" "neon_permute<q>")]
2926 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2928 ;; FIXME: A different implementation of this builtin could make it much
2929 ;; more likely that we wouldn't actually need to output anything (we could make
2930 ;; it so that the reg allocator puts things in the right places magically
2931 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2933 (define_insn_and_split "neon_vcombine<mode>"
2934 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2935 (vec_concat:<V_DOUBLE>
2936 (match_operand:VDX 1 "s_register_operand" "w")
2937 (match_operand:VDX 2 "s_register_operand" "w")))]
2940 "&& reload_completed"
2943 neon_split_vcombine (operands);
2946 [(set_attr "type" "multiple")]
2949 (define_expand "neon_vget_high<mode>"
2950 [(match_operand:<V_HALF> 0 "s_register_operand")
2951 (match_operand:VQX 1 "s_register_operand")]
2954 emit_move_insn (operands[0],
2955 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2956 GET_MODE_SIZE (<V_HALF>mode)));
2960 (define_expand "neon_vget_low<mode>"
2961 [(match_operand:<V_HALF> 0 "s_register_operand")
2962 (match_operand:VQX 1 "s_register_operand")]
2965 emit_move_insn (operands[0],
2966 simplify_gen_subreg (<V_HALF>mode, operands[1],
2971 (define_insn "float<mode><V_cvtto>2"
2972 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2973 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2974 "TARGET_NEON && !flag_rounding_math"
2975 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2976 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2979 (define_insn "floatuns<mode><V_cvtto>2"
2980 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2981 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2982 "TARGET_NEON && !flag_rounding_math"
2983 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2984 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2987 (define_insn "fix_trunc<mode><V_cvtto>2"
2988 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2989 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2991 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
2992 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2995 (define_insn "fixuns_trunc<mode><V_cvtto>2"
2996 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2997 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2999 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3000 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3003 (define_insn "neon_vcvt<sup><mode>"
3004 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3005 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3008 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3009 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3012 (define_insn "neon_vcvt<sup><mode>"
3013 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3014 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3017 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3018 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3021 (define_insn "neon_vcvtv4sfv4hf"
3022 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3023 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3025 "TARGET_NEON && TARGET_FP16"
3026 "vcvt.f32.f16\t%q0, %P1"
3027 [(set_attr "type" "neon_fp_cvt_widen_h")]
3030 (define_insn "neon_vcvtv4hfv4sf"
3031 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3032 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3034 "TARGET_NEON && TARGET_FP16"
3035 "vcvt.f16.f32\t%P0, %q1"
3036 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3039 (define_insn "neon_vcvt<sup>_n<mode>"
3040 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3041 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3042 (match_operand:SI 2 "immediate_operand" "i")]
3046 neon_const_bounds (operands[2], 1, 33);
3047 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3049 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3052 (define_insn "neon_vcvt<sup>_n<mode>"
3053 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3054 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3055 (match_operand:SI 2 "immediate_operand" "i")]
3059 neon_const_bounds (operands[2], 1, 33);
3060 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3062 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3065 (define_insn "neon_vmovn<mode>"
3066 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3067 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3070 "vmovn.<V_if_elem>\t%P0, %q1"
3071 [(set_attr "type" "neon_shift_imm_narrow_q")]
3074 (define_insn "neon_vqmovn<sup><mode>"
3075 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3076 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3079 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3080 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3083 (define_insn "neon_vqmovun<mode>"
3084 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3085 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3088 "vqmovun.<V_s_elem>\t%P0, %q1"
3089 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3092 (define_insn "neon_vmovl<sup><mode>"
3093 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3094 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3097 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3098 [(set_attr "type" "neon_shift_imm_long")]
3101 (define_insn "neon_vmul_lane<mode>"
3102 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3103 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3104 (match_operand:VMD 2 "s_register_operand"
3105 "<scalar_mul_constraint>")
3106 (match_operand:SI 3 "immediate_operand" "i")]
3110 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3113 (if_then_else (match_test "<Is_float_mode>")
3114 (const_string "neon_fp_mul_s_scalar<q>")
3115 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3118 (define_insn "neon_vmul_lane<mode>"
3119 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3120 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3121 (match_operand:<V_HALF> 2 "s_register_operand"
3122 "<scalar_mul_constraint>")
3123 (match_operand:SI 3 "immediate_operand" "i")]
3127 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3130 (if_then_else (match_test "<Is_float_mode>")
3131 (const_string "neon_fp_mul_s_scalar<q>")
3132 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3135 (define_insn "neon_vmull<sup>_lane<mode>"
3136 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3137 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3138 (match_operand:VMDI 2 "s_register_operand"
3139 "<scalar_mul_constraint>")
3140 (match_operand:SI 3 "immediate_operand" "i")]
3144 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3146 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3149 (define_insn "neon_vqdmull_lane<mode>"
3150 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3151 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3152 (match_operand:VMDI 2 "s_register_operand"
3153 "<scalar_mul_constraint>")
3154 (match_operand:SI 3 "immediate_operand" "i")]
3155 UNSPEC_VQDMULL_LANE))]
3158 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3160 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3163 (define_insn "neon_vq<r>dmulh_lane<mode>"
3164 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3165 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3166 (match_operand:<V_HALF> 2 "s_register_operand"
3167 "<scalar_mul_constraint>")
3168 (match_operand:SI 3 "immediate_operand" "i")]
3172 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3174 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3177 (define_insn "neon_vq<r>dmulh_lane<mode>"
3178 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3179 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3180 (match_operand:VMDI 2 "s_register_operand"
3181 "<scalar_mul_constraint>")
3182 (match_operand:SI 3 "immediate_operand" "i")]
3186 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3188 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3191 ;; vqrdmlah_lane, vqrdmlsh_lane
3192 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3193 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3194 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3195 (match_operand:VMQI 2 "s_register_operand" "w")
3196 (match_operand:<V_HALF> 3 "s_register_operand"
3197 "<scalar_mul_constraint>")
3198 (match_operand:SI 4 "immediate_operand" "i")]
3203 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3205 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3208 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3209 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3210 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3211 (match_operand:VMDI 2 "s_register_operand" "w")
3212 (match_operand:VMDI 3 "s_register_operand"
3213 "<scalar_mul_constraint>")
3214 (match_operand:SI 4 "immediate_operand" "i")]
3219 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3221 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3224 (define_insn "neon_vmla_lane<mode>"
3225 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3226 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3227 (match_operand:VMD 2 "s_register_operand" "w")
3228 (match_operand:VMD 3 "s_register_operand"
3229 "<scalar_mul_constraint>")
3230 (match_operand:SI 4 "immediate_operand" "i")]
3234 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3237 (if_then_else (match_test "<Is_float_mode>")
3238 (const_string "neon_fp_mla_s_scalar<q>")
3239 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3242 (define_insn "neon_vmla_lane<mode>"
3243 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3244 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3245 (match_operand:VMQ 2 "s_register_operand" "w")
3246 (match_operand:<V_HALF> 3 "s_register_operand"
3247 "<scalar_mul_constraint>")
3248 (match_operand:SI 4 "immediate_operand" "i")]
3252 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3255 (if_then_else (match_test "<Is_float_mode>")
3256 (const_string "neon_fp_mla_s_scalar<q>")
3257 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3260 (define_insn "neon_vmlal<sup>_lane<mode>"
3261 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3262 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3263 (match_operand:VMDI 2 "s_register_operand" "w")
3264 (match_operand:VMDI 3 "s_register_operand"
3265 "<scalar_mul_constraint>")
3266 (match_operand:SI 4 "immediate_operand" "i")]
3270 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3272 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3275 (define_insn "neon_vqdmlal_lane<mode>"
3276 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3277 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3278 (match_operand:VMDI 2 "s_register_operand" "w")
3279 (match_operand:VMDI 3 "s_register_operand"
3280 "<scalar_mul_constraint>")
3281 (match_operand:SI 4 "immediate_operand" "i")]
3282 UNSPEC_VQDMLAL_LANE))]
3285 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3287 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3290 (define_insn "neon_vmls_lane<mode>"
3291 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3292 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3293 (match_operand:VMD 2 "s_register_operand" "w")
3294 (match_operand:VMD 3 "s_register_operand"
3295 "<scalar_mul_constraint>")
3296 (match_operand:SI 4 "immediate_operand" "i")]
3300 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3303 (if_then_else (match_test "<Is_float_mode>")
3304 (const_string "neon_fp_mla_s_scalar<q>")
3305 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3308 (define_insn "neon_vmls_lane<mode>"
3309 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3310 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3311 (match_operand:VMQ 2 "s_register_operand" "w")
3312 (match_operand:<V_HALF> 3 "s_register_operand"
3313 "<scalar_mul_constraint>")
3314 (match_operand:SI 4 "immediate_operand" "i")]
3318 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3321 (if_then_else (match_test "<Is_float_mode>")
3322 (const_string "neon_fp_mla_s_scalar<q>")
3323 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3326 (define_insn "neon_vmlsl<sup>_lane<mode>"
3327 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3328 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3329 (match_operand:VMDI 2 "s_register_operand" "w")
3330 (match_operand:VMDI 3 "s_register_operand"
3331 "<scalar_mul_constraint>")
3332 (match_operand:SI 4 "immediate_operand" "i")]
3336 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3338 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3341 (define_insn "neon_vqdmlsl_lane<mode>"
3342 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3343 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3344 (match_operand:VMDI 2 "s_register_operand" "w")
3345 (match_operand:VMDI 3 "s_register_operand"
3346 "<scalar_mul_constraint>")
3347 (match_operand:SI 4 "immediate_operand" "i")]
3348 UNSPEC_VQDMLSL_LANE))]
3351 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3353 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3356 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3357 ; core register into a temp register, then use a scalar taken from that. This
3358 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3359 ; or extracted from another vector. The latter case it's currently better to
3360 ; use the "_lane" variant, and the former case can probably be implemented
3361 ; using vld1_lane, but that hasn't been done yet.
3363 (define_expand "neon_vmul_n<mode>"
3364 [(match_operand:VMD 0 "s_register_operand" "")
3365 (match_operand:VMD 1 "s_register_operand" "")
3366 (match_operand:<V_elem> 2 "s_register_operand" "")]
3369 rtx tmp = gen_reg_rtx (<MODE>mode);
3370 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3371 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3376 (define_expand "neon_vmul_n<mode>"
3377 [(match_operand:VMQ 0 "s_register_operand" "")
3378 (match_operand:VMQ 1 "s_register_operand" "")
3379 (match_operand:<V_elem> 2 "s_register_operand" "")]
3382 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3383 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3384 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3389 (define_expand "neon_vmulls_n<mode>"
3390 [(match_operand:<V_widen> 0 "s_register_operand" "")
3391 (match_operand:VMDI 1 "s_register_operand" "")
3392 (match_operand:<V_elem> 2 "s_register_operand" "")]
3395 rtx tmp = gen_reg_rtx (<MODE>mode);
3396 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3397 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3402 (define_expand "neon_vmullu_n<mode>"
3403 [(match_operand:<V_widen> 0 "s_register_operand" "")
3404 (match_operand:VMDI 1 "s_register_operand" "")
3405 (match_operand:<V_elem> 2 "s_register_operand" "")]
3408 rtx tmp = gen_reg_rtx (<MODE>mode);
3409 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3410 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3415 (define_expand "neon_vqdmull_n<mode>"
3416 [(match_operand:<V_widen> 0 "s_register_operand" "")
3417 (match_operand:VMDI 1 "s_register_operand" "")
3418 (match_operand:<V_elem> 2 "s_register_operand" "")]
3421 rtx tmp = gen_reg_rtx (<MODE>mode);
3422 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3423 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3428 (define_expand "neon_vqdmulh_n<mode>"
3429 [(match_operand:VMDI 0 "s_register_operand" "")
3430 (match_operand:VMDI 1 "s_register_operand" "")
3431 (match_operand:<V_elem> 2 "s_register_operand" "")]
3434 rtx tmp = gen_reg_rtx (<MODE>mode);
3435 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3436 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3441 (define_expand "neon_vqrdmulh_n<mode>"
3442 [(match_operand:VMDI 0 "s_register_operand" "")
3443 (match_operand:VMDI 1 "s_register_operand" "")
3444 (match_operand:<V_elem> 2 "s_register_operand" "")]
3447 rtx tmp = gen_reg_rtx (<MODE>mode);
3448 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3449 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3454 (define_expand "neon_vqdmulh_n<mode>"
3455 [(match_operand:VMQI 0 "s_register_operand" "")
3456 (match_operand:VMQI 1 "s_register_operand" "")
3457 (match_operand:<V_elem> 2 "s_register_operand" "")]
3460 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3461 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3462 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3467 (define_expand "neon_vqrdmulh_n<mode>"
3468 [(match_operand:VMQI 0 "s_register_operand" "")
3469 (match_operand:VMQI 1 "s_register_operand" "")
3470 (match_operand:<V_elem> 2 "s_register_operand" "")]
3473 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3474 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3475 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3480 (define_expand "neon_vmla_n<mode>"
3481 [(match_operand:VMD 0 "s_register_operand" "")
3482 (match_operand:VMD 1 "s_register_operand" "")
3483 (match_operand:VMD 2 "s_register_operand" "")
3484 (match_operand:<V_elem> 3 "s_register_operand" "")]
3487 rtx tmp = gen_reg_rtx (<MODE>mode);
3488 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3489 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3494 (define_expand "neon_vmla_n<mode>"
3495 [(match_operand:VMQ 0 "s_register_operand" "")
3496 (match_operand:VMQ 1 "s_register_operand" "")
3497 (match_operand:VMQ 2 "s_register_operand" "")
3498 (match_operand:<V_elem> 3 "s_register_operand" "")]
3501 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3502 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3503 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3508 (define_expand "neon_vmlals_n<mode>"
3509 [(match_operand:<V_widen> 0 "s_register_operand" "")
3510 (match_operand:<V_widen> 1 "s_register_operand" "")
3511 (match_operand:VMDI 2 "s_register_operand" "")
3512 (match_operand:<V_elem> 3 "s_register_operand" "")]
3515 rtx tmp = gen_reg_rtx (<MODE>mode);
3516 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3517 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3522 (define_expand "neon_vmlalu_n<mode>"
3523 [(match_operand:<V_widen> 0 "s_register_operand" "")
3524 (match_operand:<V_widen> 1 "s_register_operand" "")
3525 (match_operand:VMDI 2 "s_register_operand" "")
3526 (match_operand:<V_elem> 3 "s_register_operand" "")]
3529 rtx tmp = gen_reg_rtx (<MODE>mode);
3530 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3531 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3536 (define_expand "neon_vqdmlal_n<mode>"
3537 [(match_operand:<V_widen> 0 "s_register_operand" "")
3538 (match_operand:<V_widen> 1 "s_register_operand" "")
3539 (match_operand:VMDI 2 "s_register_operand" "")
3540 (match_operand:<V_elem> 3 "s_register_operand" "")]
3543 rtx tmp = gen_reg_rtx (<MODE>mode);
3544 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3545 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3550 (define_expand "neon_vmls_n<mode>"
3551 [(match_operand:VMD 0 "s_register_operand" "")
3552 (match_operand:VMD 1 "s_register_operand" "")
3553 (match_operand:VMD 2 "s_register_operand" "")
3554 (match_operand:<V_elem> 3 "s_register_operand" "")]
3557 rtx tmp = gen_reg_rtx (<MODE>mode);
3558 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3559 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3564 (define_expand "neon_vmls_n<mode>"
3565 [(match_operand:VMQ 0 "s_register_operand" "")
3566 (match_operand:VMQ 1 "s_register_operand" "")
3567 (match_operand:VMQ 2 "s_register_operand" "")
3568 (match_operand:<V_elem> 3 "s_register_operand" "")]
3571 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3572 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3573 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3578 (define_expand "neon_vmlsls_n<mode>"
3579 [(match_operand:<V_widen> 0 "s_register_operand" "")
3580 (match_operand:<V_widen> 1 "s_register_operand" "")
3581 (match_operand:VMDI 2 "s_register_operand" "")
3582 (match_operand:<V_elem> 3 "s_register_operand" "")]
3585 rtx tmp = gen_reg_rtx (<MODE>mode);
3586 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3587 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3592 (define_expand "neon_vmlslu_n<mode>"
3593 [(match_operand:<V_widen> 0 "s_register_operand" "")
3594 (match_operand:<V_widen> 1 "s_register_operand" "")
3595 (match_operand:VMDI 2 "s_register_operand" "")
3596 (match_operand:<V_elem> 3 "s_register_operand" "")]
3599 rtx tmp = gen_reg_rtx (<MODE>mode);
3600 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3601 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3606 (define_expand "neon_vqdmlsl_n<mode>"
3607 [(match_operand:<V_widen> 0 "s_register_operand" "")
3608 (match_operand:<V_widen> 1 "s_register_operand" "")
3609 (match_operand:VMDI 2 "s_register_operand" "")
3610 (match_operand:<V_elem> 3 "s_register_operand" "")]
3613 rtx tmp = gen_reg_rtx (<MODE>mode);
3614 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3615 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3620 (define_insn "neon_vext<mode>"
3621 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3622 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3623 (match_operand:VDQX 2 "s_register_operand" "w")
3624 (match_operand:SI 3 "immediate_operand" "i")]
3628 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3629 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3631 [(set_attr "type" "neon_ext<q>")]
3634 (define_insn "neon_vrev64<mode>"
3635 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3636 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3639 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3640 [(set_attr "type" "neon_rev<q>")]
3643 (define_insn "neon_vrev32<mode>"
3644 [(set (match_operand:VX 0 "s_register_operand" "=w")
3645 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3648 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3649 [(set_attr "type" "neon_rev<q>")]
3652 (define_insn "neon_vrev16<mode>"
3653 [(set (match_operand:VE 0 "s_register_operand" "=w")
3654 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3657 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3658 [(set_attr "type" "neon_rev<q>")]
3661 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3662 ; allocation. For an intrinsic of form:
3663 ; rD = vbsl_* (rS, rN, rM)
3664 ; We can use any of:
3665 ; vbsl rS, rN, rM (if D = S)
3666 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3667 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3669 (define_insn "neon_vbsl<mode>_internal"
3670 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3671 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3672 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3673 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3677 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3678 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3679 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3680 [(set_attr "type" "neon_bsl<q>")]
3683 (define_expand "neon_vbsl<mode>"
3684 [(set (match_operand:VDQX 0 "s_register_operand" "")
3685 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3686 (match_operand:VDQX 2 "s_register_operand" "")
3687 (match_operand:VDQX 3 "s_register_operand" "")]
3691 /* We can't alias operands together if they have different modes. */
3692 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3696 (define_insn "neon_v<shift_op><sup><mode>"
3697 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3698 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3699 (match_operand:VDQIX 2 "s_register_operand" "w")]
3702 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3703 [(set_attr "type" "neon_shift_imm<q>")]
3707 (define_insn "neon_v<shift_op><sup><mode>"
3708 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3709 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3710 (match_operand:VDQIX 2 "s_register_operand" "w")]
3713 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3714 [(set_attr "type" "neon_sat_shift_imm<q>")]
3718 (define_insn "neon_v<shift_op><sup>_n<mode>"
3719 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3720 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3721 (match_operand:SI 2 "immediate_operand" "i")]
3725 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3726 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3728 [(set_attr "type" "neon_shift_imm<q>")]
3731 ;; vshrn_n, vrshrn_n
3732 (define_insn "neon_v<shift_op>_n<mode>"
3733 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3734 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3735 (match_operand:SI 2 "immediate_operand" "i")]
3739 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3740 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3742 [(set_attr "type" "neon_shift_imm_narrow_q")]
3745 ;; vqshrn_n, vqrshrn_n
3746 (define_insn "neon_v<shift_op><sup>_n<mode>"
3747 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3748 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3749 (match_operand:SI 2 "immediate_operand" "i")]
3753 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3754 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3756 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3759 ;; vqshrun_n, vqrshrun_n
3760 (define_insn "neon_v<shift_op>_n<mode>"
3761 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3762 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3763 (match_operand:SI 2 "immediate_operand" "i")]
3767 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3768 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3770 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3773 (define_insn "neon_vshl_n<mode>"
3774 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3775 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3776 (match_operand:SI 2 "immediate_operand" "i")]
3780 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3781 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3783 [(set_attr "type" "neon_shift_imm<q>")]
3786 (define_insn "neon_vqshl_<sup>_n<mode>"
3787 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3788 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3789 (match_operand:SI 2 "immediate_operand" "i")]
3793 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3794 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3796 [(set_attr "type" "neon_sat_shift_imm<q>")]
3799 (define_insn "neon_vqshlu_n<mode>"
3800 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3801 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3802 (match_operand:SI 2 "immediate_operand" "i")]
3806 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3807 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3809 [(set_attr "type" "neon_sat_shift_imm<q>")]
3812 (define_insn "neon_vshll<sup>_n<mode>"
3813 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3814 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3815 (match_operand:SI 2 "immediate_operand" "i")]
3819 /* The boundaries are: 0 < imm <= size. */
3820 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3821 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3823 [(set_attr "type" "neon_shift_imm_long")]
3827 (define_insn "neon_v<shift_op><sup>_n<mode>"
3828 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3829 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3830 (match_operand:VDQIX 2 "s_register_operand" "w")
3831 (match_operand:SI 3 "immediate_operand" "i")]
3835 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3836 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3838 [(set_attr "type" "neon_shift_acc<q>")]
3841 (define_insn "neon_vsri_n<mode>"
3842 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3843 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3844 (match_operand:VDQIX 2 "s_register_operand" "w")
3845 (match_operand:SI 3 "immediate_operand" "i")]
3849 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3850 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3852 [(set_attr "type" "neon_shift_reg<q>")]
3855 (define_insn "neon_vsli_n<mode>"
3856 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3857 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3858 (match_operand:VDQIX 2 "s_register_operand" "w")
3859 (match_operand:SI 3 "immediate_operand" "i")]
3863 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3864 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3866 [(set_attr "type" "neon_shift_reg<q>")]
3869 (define_insn "neon_vtbl1v8qi"
3870 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3871 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3872 (match_operand:V8QI 2 "s_register_operand" "w")]
3875 "vtbl.8\t%P0, {%P1}, %P2"
3876 [(set_attr "type" "neon_tbl1")]
3879 (define_insn "neon_vtbl2v8qi"
3880 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3881 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3882 (match_operand:V8QI 2 "s_register_operand" "w")]
3887 int tabbase = REGNO (operands[1]);
3889 ops[0] = operands[0];
3890 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3891 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3892 ops[3] = operands[2];
3893 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3897 [(set_attr "type" "neon_tbl2")]
3900 (define_insn "neon_vtbl3v8qi"
3901 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3902 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3903 (match_operand:V8QI 2 "s_register_operand" "w")]
3908 int tabbase = REGNO (operands[1]);
3910 ops[0] = operands[0];
3911 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3912 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3913 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3914 ops[4] = operands[2];
3915 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3919 [(set_attr "type" "neon_tbl3")]
3922 (define_insn "neon_vtbl4v8qi"
3923 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3924 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3925 (match_operand:V8QI 2 "s_register_operand" "w")]
3930 int tabbase = REGNO (operands[1]);
3932 ops[0] = operands[0];
3933 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3934 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3935 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3936 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3937 ops[5] = operands[2];
3938 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3942 [(set_attr "type" "neon_tbl4")]
3945 ;; These three are used by the vec_perm infrastructure for V16QImode.
3946 (define_insn_and_split "neon_vtbl1v16qi"
3947 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3948 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3949 (match_operand:V16QI 2 "s_register_operand" "w")]
3953 "&& reload_completed"
3956 rtx op0, op1, op2, part0, part2;
3960 op1 = gen_lowpart (TImode, operands[1]);
3963 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3964 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3965 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3966 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3968 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3969 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3970 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3971 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3974 [(set_attr "type" "multiple")]
3977 (define_insn_and_split "neon_vtbl2v16qi"
3978 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3979 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3980 (match_operand:V16QI 2 "s_register_operand" "w")]
3984 "&& reload_completed"
3987 rtx op0, op1, op2, part0, part2;
3994 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3995 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3996 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3997 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3999 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4000 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4001 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4002 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4005 [(set_attr "type" "multiple")]
4008 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4009 ;; handle quad-word input modes, producing octa-word output modes. But
4010 ;; that requires us to add support for octa-word vector modes in moves.
4011 ;; That seems overkill for this one use in vec_perm.
4012 (define_insn_and_split "neon_vcombinev16qi"
4013 [(set (match_operand:OI 0 "s_register_operand" "=w")
4014 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4015 (match_operand:V16QI 2 "s_register_operand" "w")]
4019 "&& reload_completed"
4022 neon_split_vcombine (operands);
4025 [(set_attr "type" "multiple")]
4028 (define_insn "neon_vtbx1v8qi"
4029 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4030 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4031 (match_operand:V8QI 2 "s_register_operand" "w")
4032 (match_operand:V8QI 3 "s_register_operand" "w")]
4035 "vtbx.8\t%P0, {%P2}, %P3"
4036 [(set_attr "type" "neon_tbl1")]
4039 (define_insn "neon_vtbx2v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:TI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4048 int tabbase = REGNO (operands[2]);
4050 ops[0] = operands[0];
4051 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4052 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4053 ops[3] = operands[3];
4054 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4058 [(set_attr "type" "neon_tbl2")]
4061 (define_insn "neon_vtbx3v8qi"
4062 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4063 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4064 (match_operand:EI 2 "s_register_operand" "w")
4065 (match_operand:V8QI 3 "s_register_operand" "w")]
4070 int tabbase = REGNO (operands[2]);
4072 ops[0] = operands[0];
4073 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4074 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4075 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4076 ops[4] = operands[3];
4077 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4081 [(set_attr "type" "neon_tbl3")]
4084 (define_insn "neon_vtbx4v8qi"
4085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4086 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4087 (match_operand:OI 2 "s_register_operand" "w")
4088 (match_operand:V8QI 3 "s_register_operand" "w")]
4093 int tabbase = REGNO (operands[2]);
4095 ops[0] = operands[0];
4096 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4097 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4098 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4099 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4100 ops[5] = operands[3];
4101 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4105 [(set_attr "type" "neon_tbl4")]
4108 (define_expand "neon_vtrn<mode>_internal"
4110 [(set (match_operand:VDQW 0 "s_register_operand" "")
4111 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4112 (match_operand:VDQW 2 "s_register_operand" "")]
4114 (set (match_operand:VDQW 3 "s_register_operand" "")
4115 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4120 ;; Note: Different operand numbering to handle tied registers correctly.
4121 (define_insn "*neon_vtrn<mode>_insn"
4122 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4123 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4124 (match_operand:VDQW 3 "s_register_operand" "2")]
4126 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4127 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4130 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4131 [(set_attr "type" "neon_permute<q>")]
4134 (define_expand "neon_vzip<mode>_internal"
4136 [(set (match_operand:VDQW 0 "s_register_operand" "")
4137 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4138 (match_operand:VDQW 2 "s_register_operand" "")]
4140 (set (match_operand:VDQW 3 "s_register_operand" "")
4141 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4146 ;; Note: Different operand numbering to handle tied registers correctly.
4147 (define_insn "*neon_vzip<mode>_insn"
4148 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4149 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4150 (match_operand:VDQW 3 "s_register_operand" "2")]
4152 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4153 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4156 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4157 [(set_attr "type" "neon_zip<q>")]
4160 (define_expand "neon_vuzp<mode>_internal"
4162 [(set (match_operand:VDQW 0 "s_register_operand" "")
4163 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4164 (match_operand:VDQW 2 "s_register_operand" "")]
4166 (set (match_operand:VDQW 3 "s_register_operand" "")
4167 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4172 ;; Note: Different operand numbering to handle tied registers correctly.
4173 (define_insn "*neon_vuzp<mode>_insn"
4174 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4175 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4176 (match_operand:VDQW 3 "s_register_operand" "2")]
4178 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4179 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4182 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4183 [(set_attr "type" "neon_zip<q>")]
4186 (define_expand "neon_vreinterpretv8qi<mode>"
4187 [(match_operand:V8QI 0 "s_register_operand" "")
4188 (match_operand:VD_RE 1 "s_register_operand" "")]
4191 neon_reinterpret (operands[0], operands[1]);
4195 (define_expand "neon_vreinterpretv4hi<mode>"
4196 [(match_operand:V4HI 0 "s_register_operand" "")
4197 (match_operand:VD_RE 1 "s_register_operand" "")]
4200 neon_reinterpret (operands[0], operands[1]);
4204 (define_expand "neon_vreinterpretv2si<mode>"
4205 [(match_operand:V2SI 0 "s_register_operand" "")
4206 (match_operand:VD_RE 1 "s_register_operand" "")]
4209 neon_reinterpret (operands[0], operands[1]);
4213 (define_expand "neon_vreinterpretv2sf<mode>"
4214 [(match_operand:V2SF 0 "s_register_operand" "")
4215 (match_operand:VD_RE 1 "s_register_operand" "")]
4218 neon_reinterpret (operands[0], operands[1]);
4222 (define_expand "neon_vreinterpretdi<mode>"
4223 [(match_operand:DI 0 "s_register_operand" "")
4224 (match_operand:VD_RE 1 "s_register_operand" "")]
4227 neon_reinterpret (operands[0], operands[1]);
4231 (define_expand "neon_vreinterpretti<mode>"
4232 [(match_operand:TI 0 "s_register_operand" "")
4233 (match_operand:VQXMOV 1 "s_register_operand" "")]
4236 neon_reinterpret (operands[0], operands[1]);
4241 (define_expand "neon_vreinterpretv16qi<mode>"
4242 [(match_operand:V16QI 0 "s_register_operand" "")
4243 (match_operand:VQXMOV 1 "s_register_operand" "")]
4246 neon_reinterpret (operands[0], operands[1]);
4250 (define_expand "neon_vreinterpretv8hi<mode>"
4251 [(match_operand:V8HI 0 "s_register_operand" "")
4252 (match_operand:VQXMOV 1 "s_register_operand" "")]
4255 neon_reinterpret (operands[0], operands[1]);
4259 (define_expand "neon_vreinterpretv4si<mode>"
4260 [(match_operand:V4SI 0 "s_register_operand" "")
4261 (match_operand:VQXMOV 1 "s_register_operand" "")]
4264 neon_reinterpret (operands[0], operands[1]);
4268 (define_expand "neon_vreinterpretv4sf<mode>"
4269 [(match_operand:V4SF 0 "s_register_operand" "")
4270 (match_operand:VQXMOV 1 "s_register_operand" "")]
4273 neon_reinterpret (operands[0], operands[1]);
4277 (define_expand "neon_vreinterpretv2di<mode>"
4278 [(match_operand:V2DI 0 "s_register_operand" "")
4279 (match_operand:VQXMOV 1 "s_register_operand" "")]
4282 neon_reinterpret (operands[0], operands[1]);
4286 (define_expand "vec_load_lanes<mode><mode>"
4287 [(set (match_operand:VDQX 0 "s_register_operand")
4288 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4292 (define_insn "neon_vld1<mode>"
4293 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4294 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4297 "vld1.<V_sz_elem>\t%h0, %A1"
4298 [(set_attr "type" "neon_load1_1reg<q>")]
4301 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4302 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4304 (define_insn "neon_vld1_lane<mode>"
4305 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4306 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4307 (match_operand:VDX 2 "s_register_operand" "0")
4308 (match_operand:SI 3 "immediate_operand" "i")]
4312 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4313 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4314 operands[3] = GEN_INT (lane);
4316 return "vld1.<V_sz_elem>\t%P0, %A1";
4318 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4320 [(set_attr "type" "neon_load1_one_lane<q>")]
4323 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4324 ;; here on big endian targets.
4325 (define_insn "neon_vld1_lane<mode>"
4326 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4327 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4328 (match_operand:VQX 2 "s_register_operand" "0")
4329 (match_operand:SI 3 "immediate_operand" "i")]
4333 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4334 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4335 operands[3] = GEN_INT (lane);
4336 int regno = REGNO (operands[0]);
4337 if (lane >= max / 2)
4341 operands[3] = GEN_INT (lane);
4343 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4345 return "vld1.<V_sz_elem>\t%P0, %A1";
4347 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4349 [(set_attr "type" "neon_load1_one_lane<q>")]
4352 (define_insn "neon_vld1_dup<mode>"
4353 [(set (match_operand:VD 0 "s_register_operand" "=w")
4354 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4356 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4357 [(set_attr "type" "neon_load1_all_lanes<q>")]
4360 ;; Special case for DImode. Treat it exactly like a simple load.
4361 (define_expand "neon_vld1_dupdi"
4362 [(set (match_operand:DI 0 "s_register_operand" "")
4363 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4369 (define_insn "neon_vld1_dup<mode>"
4370 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4371 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4374 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4376 [(set_attr "type" "neon_load1_all_lanes<q>")]
4379 (define_insn_and_split "neon_vld1_dupv2di"
4380 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4381 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4384 "&& reload_completed"
4387 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4388 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4389 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4392 [(set_attr "length" "8")
4393 (set_attr "type" "neon_load1_all_lanes_q")]
4396 (define_expand "vec_store_lanes<mode><mode>"
4397 [(set (match_operand:VDQX 0 "neon_struct_operand")
4398 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4402 (define_insn "neon_vst1<mode>"
4403 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4404 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4407 "vst1.<V_sz_elem>\t%h1, %A0"
4408 [(set_attr "type" "neon_store1_1reg<q>")])
4410 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4411 ;; here on big endian targets.
4412 (define_insn "neon_vst1_lane<mode>"
4413 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4415 [(match_operand:VDX 1 "s_register_operand" "w")
4416 (match_operand:SI 2 "immediate_operand" "i")]
4420 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4421 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4422 operands[2] = GEN_INT (lane);
4424 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4428 [(set_attr "type" "neon_store1_one_lane<q>")]
4431 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4432 ;; here on big endian targets.
4433 (define_insn "neon_vst1_lane<mode>"
4434 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4436 [(match_operand:VQX 1 "s_register_operand" "w")
4437 (match_operand:SI 2 "immediate_operand" "i")]
4441 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4442 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4443 int regno = REGNO (operands[1]);
4444 if (lane >= max / 2)
4449 operands[2] = GEN_INT (lane);
4450 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4452 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4454 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4456 [(set_attr "type" "neon_store1_one_lane<q>")]
4459 (define_expand "vec_load_lanesti<mode>"
4460 [(set (match_operand:TI 0 "s_register_operand")
4461 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4462 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4466 (define_insn "neon_vld2<mode>"
4467 [(set (match_operand:TI 0 "s_register_operand" "=w")
4468 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4469 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4473 if (<V_sz_elem> == 64)
4474 return "vld1.64\t%h0, %A1";
4476 return "vld2.<V_sz_elem>\t%h0, %A1";
4479 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4480 (const_string "neon_load1_2reg<q>")
4481 (const_string "neon_load2_2reg<q>")))]
4484 (define_expand "vec_load_lanesoi<mode>"
4485 [(set (match_operand:OI 0 "s_register_operand")
4486 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4487 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4491 (define_insn "neon_vld2<mode>"
4492 [(set (match_operand:OI 0 "s_register_operand" "=w")
4493 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4494 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4497 "vld2.<V_sz_elem>\t%h0, %A1"
4498 [(set_attr "type" "neon_load2_2reg_q")])
4500 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4501 ;; here on big endian targets.
4502 (define_insn "neon_vld2_lane<mode>"
4503 [(set (match_operand:TI 0 "s_register_operand" "=w")
4504 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4505 (match_operand:TI 2 "s_register_operand" "0")
4506 (match_operand:SI 3 "immediate_operand" "i")
4507 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4511 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4512 int regno = REGNO (operands[0]);
4514 ops[0] = gen_rtx_REG (DImode, regno);
4515 ops[1] = gen_rtx_REG (DImode, regno + 2);
4516 ops[2] = operands[1];
4517 ops[3] = GEN_INT (lane);
4518 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4521 [(set_attr "type" "neon_load2_one_lane<q>")]
4524 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4525 ;; here on big endian targets.
4526 (define_insn "neon_vld2_lane<mode>"
4527 [(set (match_operand:OI 0 "s_register_operand" "=w")
4528 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4529 (match_operand:OI 2 "s_register_operand" "0")
4530 (match_operand:SI 3 "immediate_operand" "i")
4531 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4535 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4536 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4537 int regno = REGNO (operands[0]);
4539 if (lane >= max / 2)
4544 ops[0] = gen_rtx_REG (DImode, regno);
4545 ops[1] = gen_rtx_REG (DImode, regno + 4);
4546 ops[2] = operands[1];
4547 ops[3] = GEN_INT (lane);
4548 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4551 [(set_attr "type" "neon_load2_one_lane<q>")]
4554 (define_insn "neon_vld2_dup<mode>"
4555 [(set (match_operand:TI 0 "s_register_operand" "=w")
4556 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4557 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4561 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4562 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4564 return "vld1.<V_sz_elem>\t%h0, %A1";
4567 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4568 (const_string "neon_load2_all_lanes<q>")
4569 (const_string "neon_load1_1reg<q>")))]
4572 (define_expand "vec_store_lanesti<mode>"
4573 [(set (match_operand:TI 0 "neon_struct_operand")
4574 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4575 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579 (define_insn "neon_vst2<mode>"
4580 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4581 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4582 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4586 if (<V_sz_elem> == 64)
4587 return "vst1.64\t%h1, %A0";
4589 return "vst2.<V_sz_elem>\t%h1, %A0";
4592 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4593 (const_string "neon_store1_2reg<q>")
4594 (const_string "neon_store2_one_lane<q>")))]
4597 (define_expand "vec_store_lanesoi<mode>"
4598 [(set (match_operand:OI 0 "neon_struct_operand")
4599 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4600 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4604 (define_insn "neon_vst2<mode>"
4605 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4606 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4607 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4610 "vst2.<V_sz_elem>\t%h1, %A0"
4611 [(set_attr "type" "neon_store2_4reg<q>")]
4614 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4615 ;; here on big endian targets.
4616 (define_insn "neon_vst2_lane<mode>"
4617 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4618 (unspec:<V_two_elem>
4619 [(match_operand:TI 1 "s_register_operand" "w")
4620 (match_operand:SI 2 "immediate_operand" "i")
4621 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4625 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4626 int regno = REGNO (operands[1]);
4628 ops[0] = operands[0];
4629 ops[1] = gen_rtx_REG (DImode, regno);
4630 ops[2] = gen_rtx_REG (DImode, regno + 2);
4631 ops[3] = GEN_INT (lane);
4632 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4635 [(set_attr "type" "neon_store2_one_lane<q>")]
4638 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4639 ;; here on big endian targets.
4640 (define_insn "neon_vst2_lane<mode>"
4641 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4642 (unspec:<V_two_elem>
4643 [(match_operand:OI 1 "s_register_operand" "w")
4644 (match_operand:SI 2 "immediate_operand" "i")
4645 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4649 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4650 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4651 int regno = REGNO (operands[1]);
4653 if (lane >= max / 2)
4658 ops[0] = operands[0];
4659 ops[1] = gen_rtx_REG (DImode, regno);
4660 ops[2] = gen_rtx_REG (DImode, regno + 4);
4661 ops[3] = GEN_INT (lane);
4662 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4665 [(set_attr "type" "neon_store2_one_lane<q>")]
4668 (define_expand "vec_load_lanesei<mode>"
4669 [(set (match_operand:EI 0 "s_register_operand")
4670 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4671 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4675 (define_insn "neon_vld3<mode>"
4676 [(set (match_operand:EI 0 "s_register_operand" "=w")
4677 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4678 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4682 if (<V_sz_elem> == 64)
4683 return "vld1.64\t%h0, %A1";
4685 return "vld3.<V_sz_elem>\t%h0, %A1";
4688 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4689 (const_string "neon_load1_3reg<q>")
4690 (const_string "neon_load3_3reg<q>")))]
4693 (define_expand "vec_load_lanesci<mode>"
4694 [(match_operand:CI 0 "s_register_operand")
4695 (match_operand:CI 1 "neon_struct_operand")
4696 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4699 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4703 (define_expand "neon_vld3<mode>"
4704 [(match_operand:CI 0 "s_register_operand")
4705 (match_operand:CI 1 "neon_struct_operand")
4706 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4711 mem = adjust_address (operands[1], EImode, 0);
4712 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4713 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4714 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4718 (define_insn "neon_vld3qa<mode>"
4719 [(set (match_operand:CI 0 "s_register_operand" "=w")
4720 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4721 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4725 int regno = REGNO (operands[0]);
4727 ops[0] = gen_rtx_REG (DImode, regno);
4728 ops[1] = gen_rtx_REG (DImode, regno + 4);
4729 ops[2] = gen_rtx_REG (DImode, regno + 8);
4730 ops[3] = operands[1];
4731 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4734 [(set_attr "type" "neon_load3_3reg<q>")]
4737 (define_insn "neon_vld3qb<mode>"
4738 [(set (match_operand:CI 0 "s_register_operand" "=w")
4739 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4740 (match_operand:CI 2 "s_register_operand" "0")
4741 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4745 int regno = REGNO (operands[0]);
4747 ops[0] = gen_rtx_REG (DImode, regno + 2);
4748 ops[1] = gen_rtx_REG (DImode, regno + 6);
4749 ops[2] = gen_rtx_REG (DImode, regno + 10);
4750 ops[3] = operands[1];
4751 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4754 [(set_attr "type" "neon_load3_3reg<q>")]
4757 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4758 ;; here on big endian targets.
4759 (define_insn "neon_vld3_lane<mode>"
4760 [(set (match_operand:EI 0 "s_register_operand" "=w")
4761 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4762 (match_operand:EI 2 "s_register_operand" "0")
4763 (match_operand:SI 3 "immediate_operand" "i")
4764 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4768 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4769 int regno = REGNO (operands[0]);
4771 ops[0] = gen_rtx_REG (DImode, regno);
4772 ops[1] = gen_rtx_REG (DImode, regno + 2);
4773 ops[2] = gen_rtx_REG (DImode, regno + 4);
4774 ops[3] = operands[1];
4775 ops[4] = GEN_INT (lane);
4776 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4780 [(set_attr "type" "neon_load3_one_lane<q>")]
4783 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4784 ;; here on big endian targets.
4785 (define_insn "neon_vld3_lane<mode>"
4786 [(set (match_operand:CI 0 "s_register_operand" "=w")
4787 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4788 (match_operand:CI 2 "s_register_operand" "0")
4789 (match_operand:SI 3 "immediate_operand" "i")
4790 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4794 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4795 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4796 int regno = REGNO (operands[0]);
4798 if (lane >= max / 2)
4803 ops[0] = gen_rtx_REG (DImode, regno);
4804 ops[1] = gen_rtx_REG (DImode, regno + 4);
4805 ops[2] = gen_rtx_REG (DImode, regno + 8);
4806 ops[3] = operands[1];
4807 ops[4] = GEN_INT (lane);
4808 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4812 [(set_attr "type" "neon_load3_one_lane<q>")]
4815 (define_insn "neon_vld3_dup<mode>"
4816 [(set (match_operand:EI 0 "s_register_operand" "=w")
4817 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4818 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4822 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4824 int regno = REGNO (operands[0]);
4826 ops[0] = gen_rtx_REG (DImode, regno);
4827 ops[1] = gen_rtx_REG (DImode, regno + 2);
4828 ops[2] = gen_rtx_REG (DImode, regno + 4);
4829 ops[3] = operands[1];
4830 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4834 return "vld1.<V_sz_elem>\t%h0, %A1";
4837 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4838 (const_string "neon_load3_all_lanes<q>")
4839 (const_string "neon_load1_1reg<q>")))])
4841 (define_expand "vec_store_lanesei<mode>"
4842 [(set (match_operand:EI 0 "neon_struct_operand")
4843 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4844 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4848 (define_insn "neon_vst3<mode>"
4849 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4850 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4851 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4855 if (<V_sz_elem> == 64)
4856 return "vst1.64\t%h1, %A0";
4858 return "vst3.<V_sz_elem>\t%h1, %A0";
4861 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4862 (const_string "neon_store1_3reg<q>")
4863 (const_string "neon_store3_one_lane<q>")))])
4865 (define_expand "vec_store_lanesci<mode>"
4866 [(match_operand:CI 0 "neon_struct_operand")
4867 (match_operand:CI 1 "s_register_operand")
4868 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4871 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4875 (define_expand "neon_vst3<mode>"
4876 [(match_operand:CI 0 "neon_struct_operand")
4877 (match_operand:CI 1 "s_register_operand")
4878 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4883 mem = adjust_address (operands[0], EImode, 0);
4884 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4885 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4886 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4890 (define_insn "neon_vst3qa<mode>"
4891 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4892 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4893 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4897 int regno = REGNO (operands[1]);
4899 ops[0] = operands[0];
4900 ops[1] = gen_rtx_REG (DImode, regno);
4901 ops[2] = gen_rtx_REG (DImode, regno + 4);
4902 ops[3] = gen_rtx_REG (DImode, regno + 8);
4903 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4906 [(set_attr "type" "neon_store3_3reg<q>")]
4909 (define_insn "neon_vst3qb<mode>"
4910 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4911 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4912 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4916 int regno = REGNO (operands[1]);
4918 ops[0] = operands[0];
4919 ops[1] = gen_rtx_REG (DImode, regno + 2);
4920 ops[2] = gen_rtx_REG (DImode, regno + 6);
4921 ops[3] = gen_rtx_REG (DImode, regno + 10);
4922 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4925 [(set_attr "type" "neon_store3_3reg<q>")]
4928 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4929 ;; here on big endian targets.
4930 (define_insn "neon_vst3_lane<mode>"
4931 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4932 (unspec:<V_three_elem>
4933 [(match_operand:EI 1 "s_register_operand" "w")
4934 (match_operand:SI 2 "immediate_operand" "i")
4935 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4939 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4940 int regno = REGNO (operands[1]);
4942 ops[0] = operands[0];
4943 ops[1] = gen_rtx_REG (DImode, regno);
4944 ops[2] = gen_rtx_REG (DImode, regno + 2);
4945 ops[3] = gen_rtx_REG (DImode, regno + 4);
4946 ops[4] = GEN_INT (lane);
4947 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4951 [(set_attr "type" "neon_store3_one_lane<q>")]
4954 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4955 ;; here on big endian targets.
4956 (define_insn "neon_vst3_lane<mode>"
4957 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4958 (unspec:<V_three_elem>
4959 [(match_operand:CI 1 "s_register_operand" "w")
4960 (match_operand:SI 2 "immediate_operand" "i")
4961 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4965 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4966 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4967 int regno = REGNO (operands[1]);
4969 if (lane >= max / 2)
4974 ops[0] = operands[0];
4975 ops[1] = gen_rtx_REG (DImode, regno);
4976 ops[2] = gen_rtx_REG (DImode, regno + 4);
4977 ops[3] = gen_rtx_REG (DImode, regno + 8);
4978 ops[4] = GEN_INT (lane);
4979 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4983 [(set_attr "type" "neon_store3_one_lane<q>")]
4986 (define_expand "vec_load_lanesoi<mode>"
4987 [(set (match_operand:OI 0 "s_register_operand")
4988 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4989 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4993 (define_insn "neon_vld4<mode>"
4994 [(set (match_operand:OI 0 "s_register_operand" "=w")
4995 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4996 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5000 if (<V_sz_elem> == 64)
5001 return "vld1.64\t%h0, %A1";
5003 return "vld4.<V_sz_elem>\t%h0, %A1";
5006 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5007 (const_string "neon_load1_4reg<q>")
5008 (const_string "neon_load4_4reg<q>")))]
5011 (define_expand "vec_load_lanesxi<mode>"
5012 [(match_operand:XI 0 "s_register_operand")
5013 (match_operand:XI 1 "neon_struct_operand")
5014 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5017 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5021 (define_expand "neon_vld4<mode>"
5022 [(match_operand:XI 0 "s_register_operand")
5023 (match_operand:XI 1 "neon_struct_operand")
5024 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5029 mem = adjust_address (operands[1], OImode, 0);
5030 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5031 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5032 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5036 (define_insn "neon_vld4qa<mode>"
5037 [(set (match_operand:XI 0 "s_register_operand" "=w")
5038 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5039 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5043 int regno = REGNO (operands[0]);
5045 ops[0] = gen_rtx_REG (DImode, regno);
5046 ops[1] = gen_rtx_REG (DImode, regno + 4);
5047 ops[2] = gen_rtx_REG (DImode, regno + 8);
5048 ops[3] = gen_rtx_REG (DImode, regno + 12);
5049 ops[4] = operands[1];
5050 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5053 [(set_attr "type" "neon_load4_4reg<q>")]
5056 (define_insn "neon_vld4qb<mode>"
5057 [(set (match_operand:XI 0 "s_register_operand" "=w")
5058 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5059 (match_operand:XI 2 "s_register_operand" "0")
5060 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 int regno = REGNO (operands[0]);
5066 ops[0] = gen_rtx_REG (DImode, regno + 2);
5067 ops[1] = gen_rtx_REG (DImode, regno + 6);
5068 ops[2] = gen_rtx_REG (DImode, regno + 10);
5069 ops[3] = gen_rtx_REG (DImode, regno + 14);
5070 ops[4] = operands[1];
5071 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5074 [(set_attr "type" "neon_load4_4reg<q>")]
5077 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5078 ;; here on big endian targets.
5079 (define_insn "neon_vld4_lane<mode>"
5080 [(set (match_operand:OI 0 "s_register_operand" "=w")
5081 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5082 (match_operand:OI 2 "s_register_operand" "0")
5083 (match_operand:SI 3 "immediate_operand" "i")
5084 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5088 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5089 int regno = REGNO (operands[0]);
5091 ops[0] = gen_rtx_REG (DImode, regno);
5092 ops[1] = gen_rtx_REG (DImode, regno + 2);
5093 ops[2] = gen_rtx_REG (DImode, regno + 4);
5094 ops[3] = gen_rtx_REG (DImode, regno + 6);
5095 ops[4] = operands[1];
5096 ops[5] = GEN_INT (lane);
5097 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5101 [(set_attr "type" "neon_load4_one_lane<q>")]
5104 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5105 ;; here on big endian targets.
5106 (define_insn "neon_vld4_lane<mode>"
5107 [(set (match_operand:XI 0 "s_register_operand" "=w")
5108 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5109 (match_operand:XI 2 "s_register_operand" "0")
5110 (match_operand:SI 3 "immediate_operand" "i")
5111 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5115 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5116 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5117 int regno = REGNO (operands[0]);
5119 if (lane >= max / 2)
5124 ops[0] = gen_rtx_REG (DImode, regno);
5125 ops[1] = gen_rtx_REG (DImode, regno + 4);
5126 ops[2] = gen_rtx_REG (DImode, regno + 8);
5127 ops[3] = gen_rtx_REG (DImode, regno + 12);
5128 ops[4] = operands[1];
5129 ops[5] = GEN_INT (lane);
5130 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5134 [(set_attr "type" "neon_load4_one_lane<q>")]
5137 (define_insn "neon_vld4_dup<mode>"
5138 [(set (match_operand:OI 0 "s_register_operand" "=w")
5139 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5140 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5144 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5146 int regno = REGNO (operands[0]);
5148 ops[0] = gen_rtx_REG (DImode, regno);
5149 ops[1] = gen_rtx_REG (DImode, regno + 2);
5150 ops[2] = gen_rtx_REG (DImode, regno + 4);
5151 ops[3] = gen_rtx_REG (DImode, regno + 6);
5152 ops[4] = operands[1];
5153 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5158 return "vld1.<V_sz_elem>\t%h0, %A1";
5161 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5162 (const_string "neon_load4_all_lanes<q>")
5163 (const_string "neon_load1_1reg<q>")))]
5166 (define_expand "vec_store_lanesoi<mode>"
5167 [(set (match_operand:OI 0 "neon_struct_operand")
5168 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5169 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5173 (define_insn "neon_vst4<mode>"
5174 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5175 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5176 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5180 if (<V_sz_elem> == 64)
5181 return "vst1.64\t%h1, %A0";
5183 return "vst4.<V_sz_elem>\t%h1, %A0";
5186 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5187 (const_string "neon_store1_4reg<q>")
5188 (const_string "neon_store4_4reg<q>")))]
5191 (define_expand "vec_store_lanesxi<mode>"
5192 [(match_operand:XI 0 "neon_struct_operand")
5193 (match_operand:XI 1 "s_register_operand")
5194 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5197 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5201 (define_expand "neon_vst4<mode>"
5202 [(match_operand:XI 0 "neon_struct_operand")
5203 (match_operand:XI 1 "s_register_operand")
5204 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5209 mem = adjust_address (operands[0], OImode, 0);
5210 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5211 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5212 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5216 (define_insn "neon_vst4qa<mode>"
5217 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5218 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5219 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5223 int regno = REGNO (operands[1]);
5225 ops[0] = operands[0];
5226 ops[1] = gen_rtx_REG (DImode, regno);
5227 ops[2] = gen_rtx_REG (DImode, regno + 4);
5228 ops[3] = gen_rtx_REG (DImode, regno + 8);
5229 ops[4] = gen_rtx_REG (DImode, regno + 12);
5230 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5233 [(set_attr "type" "neon_store4_4reg<q>")]
5236 (define_insn "neon_vst4qb<mode>"
5237 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5238 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5239 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5243 int regno = REGNO (operands[1]);
5245 ops[0] = operands[0];
5246 ops[1] = gen_rtx_REG (DImode, regno + 2);
5247 ops[2] = gen_rtx_REG (DImode, regno + 6);
5248 ops[3] = gen_rtx_REG (DImode, regno + 10);
5249 ops[4] = gen_rtx_REG (DImode, regno + 14);
5250 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5253 [(set_attr "type" "neon_store4_4reg<q>")]
5256 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5257 ;; here on big endian targets.
5258 (define_insn "neon_vst4_lane<mode>"
5259 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5260 (unspec:<V_four_elem>
5261 [(match_operand:OI 1 "s_register_operand" "w")
5262 (match_operand:SI 2 "immediate_operand" "i")
5263 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5267 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5268 int regno = REGNO (operands[1]);
5270 ops[0] = operands[0];
5271 ops[1] = gen_rtx_REG (DImode, regno);
5272 ops[2] = gen_rtx_REG (DImode, regno + 2);
5273 ops[3] = gen_rtx_REG (DImode, regno + 4);
5274 ops[4] = gen_rtx_REG (DImode, regno + 6);
5275 ops[5] = GEN_INT (lane);
5276 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5280 [(set_attr "type" "neon_store4_one_lane<q>")]
5283 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5284 ;; here on big endian targets.
5285 (define_insn "neon_vst4_lane<mode>"
5286 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5287 (unspec:<V_four_elem>
5288 [(match_operand:XI 1 "s_register_operand" "w")
5289 (match_operand:SI 2 "immediate_operand" "i")
5290 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5294 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5295 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5296 int regno = REGNO (operands[1]);
5298 if (lane >= max / 2)
5303 ops[0] = operands[0];
5304 ops[1] = gen_rtx_REG (DImode, regno);
5305 ops[2] = gen_rtx_REG (DImode, regno + 4);
5306 ops[3] = gen_rtx_REG (DImode, regno + 8);
5307 ops[4] = gen_rtx_REG (DImode, regno + 12);
5308 ops[5] = GEN_INT (lane);
5309 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5313 [(set_attr "type" "neon_store4_4reg<q>")]
5316 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5317 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5318 (SE:<V_unpack> (vec_select:<V_HALF>
5319 (match_operand:VU 1 "register_operand" "w")
5320 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5321 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5322 "vmovl.<US><V_sz_elem> %q0, %e1"
5323 [(set_attr "type" "neon_shift_imm_long")]
5326 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5327 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5328 (SE:<V_unpack> (vec_select:<V_HALF>
5329 (match_operand:VU 1 "register_operand" "w")
5330 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5332 "vmovl.<US><V_sz_elem> %q0, %f1"
5333 [(set_attr "type" "neon_shift_imm_long")]
5336 (define_expand "vec_unpack<US>_hi_<mode>"
5337 [(match_operand:<V_unpack> 0 "register_operand" "")
5338 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5339 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5341 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5344 for (i = 0; i < (<V_mode_nunits>/2); i++)
5345 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5347 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5348 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5355 (define_expand "vec_unpack<US>_lo_<mode>"
5356 [(match_operand:<V_unpack> 0 "register_operand" "")
5357 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5358 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5360 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5363 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5364 RTVEC_ELT (v, i) = GEN_INT (i);
5365 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5366 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5373 (define_insn "neon_vec_<US>mult_lo_<mode>"
5374 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5375 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5376 (match_operand:VU 1 "register_operand" "w")
5377 (match_operand:VU 2 "vect_par_constant_low" "")))
5378 (SE:<V_unpack> (vec_select:<V_HALF>
5379 (match_operand:VU 3 "register_operand" "w")
5381 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5382 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5383 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5386 (define_expand "vec_widen_<US>mult_lo_<mode>"
5387 [(match_operand:<V_unpack> 0 "register_operand" "")
5388 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5389 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5390 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5392 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5395 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5396 RTVEC_ELT (v, i) = GEN_INT (i);
5397 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5399 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5407 (define_insn "neon_vec_<US>mult_hi_<mode>"
5408 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5409 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5410 (match_operand:VU 1 "register_operand" "w")
5411 (match_operand:VU 2 "vect_par_constant_high" "")))
5412 (SE:<V_unpack> (vec_select:<V_HALF>
5413 (match_operand:VU 3 "register_operand" "w")
5415 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5416 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5417 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5420 (define_expand "vec_widen_<US>mult_hi_<mode>"
5421 [(match_operand:<V_unpack> 0 "register_operand" "")
5422 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5423 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5424 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5426 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5429 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5430 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5431 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5433 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5442 (define_insn "neon_vec_<US>shiftl_<mode>"
5443 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5444 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5445 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5448 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5450 [(set_attr "type" "neon_shift_imm_long")]
5453 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5454 [(match_operand:<V_unpack> 0 "register_operand" "")
5455 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5456 (match_operand:SI 2 "immediate_operand" "i")]
5457 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5459 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5460 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5466 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5467 [(match_operand:<V_unpack> 0 "register_operand" "")
5468 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5469 (match_operand:SI 2 "immediate_operand" "i")]
5470 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5472 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5473 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5474 GET_MODE_SIZE (<V_HALF>mode)),
5480 ;; Vectorize for non-neon-quad case
5481 (define_insn "neon_unpack<US>_<mode>"
5482 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5483 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5485 "vmovl.<US><V_sz_elem> %q0, %P1"
5486 [(set_attr "type" "neon_move")]
5489 (define_expand "vec_unpack<US>_lo_<mode>"
5490 [(match_operand:<V_double_width> 0 "register_operand" "")
5491 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5494 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5495 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5496 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5502 (define_expand "vec_unpack<US>_hi_<mode>"
5503 [(match_operand:<V_double_width> 0 "register_operand" "")
5504 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5507 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5508 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5509 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5515 (define_insn "neon_vec_<US>mult_<mode>"
5516 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5517 (mult:<V_widen> (SE:<V_widen>
5518 (match_operand:VDI 1 "register_operand" "w"))
5520 (match_operand:VDI 2 "register_operand" "w"))))]
5522 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5523 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5526 (define_expand "vec_widen_<US>mult_hi_<mode>"
5527 [(match_operand:<V_double_width> 0 "register_operand" "")
5528 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5529 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5532 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5533 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5534 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5541 (define_expand "vec_widen_<US>mult_lo_<mode>"
5542 [(match_operand:<V_double_width> 0 "register_operand" "")
5543 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5544 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5547 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5548 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5549 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5556 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5557 [(match_operand:<V_double_width> 0 "register_operand" "")
5558 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5559 (match_operand:SI 2 "immediate_operand" "i")]
5562 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5563 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5564 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5570 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5571 [(match_operand:<V_double_width> 0 "register_operand" "")
5572 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5573 (match_operand:SI 2 "immediate_operand" "i")]
5576 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5577 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5578 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5584 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5585 ; because the ordering of vector elements in Q registers is different from what
5586 ; the semantics of the instructions require.
5588 (define_insn "vec_pack_trunc_<mode>"
5589 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5590 (vec_concat:<V_narrow_pack>
5591 (truncate:<V_narrow>
5592 (match_operand:VN 1 "register_operand" "w"))
5593 (truncate:<V_narrow>
5594 (match_operand:VN 2 "register_operand" "w"))))]
5595 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5596 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5597 [(set_attr "type" "multiple")
5598 (set_attr "length" "8")]
5601 ;; For the non-quad case.
5602 (define_insn "neon_vec_pack_trunc_<mode>"
5603 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5604 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5605 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5606 "vmovn.i<V_sz_elem>\t%P0, %q1"
5607 [(set_attr "type" "neon_move_narrow_q")]
5610 (define_expand "vec_pack_trunc_<mode>"
5611 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5612 (match_operand:VSHFT 1 "register_operand" "")
5613 (match_operand:VSHFT 2 "register_operand")]
5614 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5616 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5618 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5619 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5620 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5624 (define_insn "neon_vabd<mode>_2"
5625 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5626 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5627 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5629 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5631 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5632 (const_string "neon_fp_abd_s<q>")
5633 (const_string "neon_abd<q>")))]
5636 (define_insn "neon_vabd<mode>_3"
5637 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5638 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5639 (match_operand:VDQ 2 "s_register_operand" "w")]
5641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5642 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5644 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5645 (const_string "neon_fp_abd_s<q>")
5646 (const_string "neon_abd<q>")))]
5649 ;; Copy from core-to-neon regs, then extend, not vice-versa
5652 [(set (match_operand:DI 0 "s_register_operand" "")
5653 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5654 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5655 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5656 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5658 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5662 [(set (match_operand:DI 0 "s_register_operand" "")
5663 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5664 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5665 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5666 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5668 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5672 [(set (match_operand:DI 0 "s_register_operand" "")
5673 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5674 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5675 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5676 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5678 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5682 [(set (match_operand:DI 0 "s_register_operand" "")
5683 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5684 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5685 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5686 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5688 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5692 [(set (match_operand:DI 0 "s_register_operand" "")
5693 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5694 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5695 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5696 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5698 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5702 [(set (match_operand:DI 0 "s_register_operand" "")
5703 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5704 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5705 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5706 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5708 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));