1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1198 ;; shift-count granularity. That's good enough for the middle-end's current
1201 ;; Note that it's not safe to perform such an operation in big-endian mode,
1202 ;; due to element-ordering issues.
1204 (define_expand "vec_shr_<mode>"
1205 [(match_operand:VDQ 0 "s_register_operand" "")
1206 (match_operand:VDQ 1 "s_register_operand" "")
1207 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1208 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1211 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1212 const int width = GET_MODE_BITSIZE (<MODE>mode);
1213 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1214 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1215 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1217 if (num_bits == width)
1219 emit_move_insn (operands[0], operands[1]);
1223 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1224 operands[0] = gen_lowpart (bvecmode, operands[0]);
1225 operands[1] = gen_lowpart (bvecmode, operands[1]);
1227 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1228 GEN_INT (num_bits / BITS_PER_UNIT)));
1232 (define_expand "vec_shl_<mode>"
1233 [(match_operand:VDQ 0 "s_register_operand" "")
1234 (match_operand:VDQ 1 "s_register_operand" "")
1235 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1236 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1239 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1240 const int width = GET_MODE_BITSIZE (<MODE>mode);
1241 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1242 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1243 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1247 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1251 num_bits = width - num_bits;
1253 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1254 operands[0] = gen_lowpart (bvecmode, operands[0]);
1255 operands[1] = gen_lowpart (bvecmode, operands[1]);
1257 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1258 GEN_INT (num_bits / BITS_PER_UNIT)));
1262 ;; Helpers for quad-word reduction operations
1264 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1265 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1266 ; N/2-element vector.
1268 (define_insn "quad_halves_<code>v4si"
1269 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1271 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1272 (parallel [(const_int 0) (const_int 1)]))
1273 (vec_select:V2SI (match_dup 1)
1274 (parallel [(const_int 2) (const_int 3)]))))]
1276 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1277 [(set_attr "vqh_mnem" "<VQH_mnem>")
1278 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1281 (define_insn "quad_halves_<code>v4sf"
1282 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1284 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1285 (parallel [(const_int 0) (const_int 1)]))
1286 (vec_select:V2SF (match_dup 1)
1287 (parallel [(const_int 2) (const_int 3)]))))]
1288 "TARGET_NEON && flag_unsafe_math_optimizations"
1289 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1294 (define_insn "quad_halves_<code>v8hi"
1295 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1297 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)]))
1300 (vec_select:V4HI (match_dup 1)
1301 (parallel [(const_int 4) (const_int 5)
1302 (const_int 6) (const_int 7)]))))]
1304 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1305 [(set_attr "vqh_mnem" "<VQH_mnem>")
1306 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1309 (define_insn "quad_halves_<code>v16qi"
1310 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1312 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1313 (parallel [(const_int 0) (const_int 1)
1314 (const_int 2) (const_int 3)
1315 (const_int 4) (const_int 5)
1316 (const_int 6) (const_int 7)]))
1317 (vec_select:V8QI (match_dup 1)
1318 (parallel [(const_int 8) (const_int 9)
1319 (const_int 10) (const_int 11)
1320 (const_int 12) (const_int 13)
1321 (const_int 14) (const_int 15)]))))]
1323 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1324 [(set_attr "vqh_mnem" "<VQH_mnem>")
1325 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1328 (define_expand "move_hi_quad_<mode>"
1329 [(match_operand:ANY128 0 "s_register_operand" "")
1330 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1333 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1334 GET_MODE_SIZE (<V_HALF>mode)),
1339 (define_expand "move_lo_quad_<mode>"
1340 [(match_operand:ANY128 0 "s_register_operand" "")
1341 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1344 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1350 ;; Reduction operations
1352 (define_expand "reduc_plus_scal_<mode>"
1353 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1354 (match_operand:VD 1 "s_register_operand" "")]
1355 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1357 rtx vec = gen_reg_rtx (<MODE>mode);
1358 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1359 &gen_neon_vpadd_internal<mode>);
1360 /* The same result is actually computed into every element. */
1361 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1365 (define_expand "reduc_plus_scal_<mode>"
1366 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1367 (match_operand:VQ 1 "s_register_operand" "")]
1368 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1369 && !BYTES_BIG_ENDIAN"
1371 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1373 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1374 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1379 (define_expand "reduc_plus_scal_v2di"
1380 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1381 (match_operand:V2DI 1 "s_register_operand" "")]
1382 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1384 rtx vec = gen_reg_rtx (V2DImode);
1386 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1387 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1392 (define_insn "arm_reduc_plus_internal_v2di"
1393 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1394 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1396 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1397 "vadd.i64\t%e0, %e1, %f1"
1398 [(set_attr "type" "neon_add_q")]
1401 (define_expand "reduc_smin_scal_<mode>"
1402 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1403 (match_operand:VD 1 "s_register_operand" "")]
1404 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1406 rtx vec = gen_reg_rtx (<MODE>mode);
1408 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1409 &gen_neon_vpsmin<mode>);
1410 /* The result is computed into every element of the vector. */
1411 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1415 (define_expand "reduc_smin_scal_<mode>"
1416 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1417 (match_operand:VQ 1 "s_register_operand" "")]
1418 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1419 && !BYTES_BIG_ENDIAN"
1421 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1423 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1424 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1429 (define_expand "reduc_smax_scal_<mode>"
1430 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1431 (match_operand:VD 1 "s_register_operand" "")]
1432 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1434 rtx vec = gen_reg_rtx (<MODE>mode);
1435 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1436 &gen_neon_vpsmax<mode>);
1437 /* The result is computed into every element of the vector. */
1438 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1442 (define_expand "reduc_smax_scal_<mode>"
1443 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1444 (match_operand:VQ 1 "s_register_operand" "")]
1445 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1446 && !BYTES_BIG_ENDIAN"
1448 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1450 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1451 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1456 (define_expand "reduc_umin_scal_<mode>"
1457 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1458 (match_operand:VDI 1 "s_register_operand" "")]
1461 rtx vec = gen_reg_rtx (<MODE>mode);
1462 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1463 &gen_neon_vpumin<mode>);
1464 /* The result is computed into every element of the vector. */
1465 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1469 (define_expand "reduc_umin_scal_<mode>"
1470 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1471 (match_operand:VQI 1 "s_register_operand" "")]
1472 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1474 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1476 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1477 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1482 (define_expand "reduc_umax_scal_<mode>"
1483 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1484 (match_operand:VDI 1 "s_register_operand" "")]
1487 rtx vec = gen_reg_rtx (<MODE>mode);
1488 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1489 &gen_neon_vpumax<mode>);
1490 /* The result is computed into every element of the vector. */
1491 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1495 (define_expand "reduc_umax_scal_<mode>"
1496 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1497 (match_operand:VQI 1 "s_register_operand" "")]
1498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1500 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1502 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1503 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1508 (define_insn "neon_vpadd_internal<mode>"
1509 [(set (match_operand:VD 0 "s_register_operand" "=w")
1510 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1511 (match_operand:VD 2 "s_register_operand" "w")]
1514 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1515 ;; Assume this schedules like vadd.
1517 (if_then_else (match_test "<Is_float_mode>")
1518 (const_string "neon_fp_reduc_add_s<q>")
1519 (const_string "neon_reduc_add<q>")))]
1522 (define_insn "neon_vpsmin<mode>"
1523 [(set (match_operand:VD 0 "s_register_operand" "=w")
1524 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1525 (match_operand:VD 2 "s_register_operand" "w")]
1528 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1530 (if_then_else (match_test "<Is_float_mode>")
1531 (const_string "neon_fp_reduc_minmax_s<q>")
1532 (const_string "neon_reduc_minmax<q>")))]
1535 (define_insn "neon_vpsmax<mode>"
1536 [(set (match_operand:VD 0 "s_register_operand" "=w")
1537 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1538 (match_operand:VD 2 "s_register_operand" "w")]
1541 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1543 (if_then_else (match_test "<Is_float_mode>")
1544 (const_string "neon_fp_reduc_minmax_s<q>")
1545 (const_string "neon_reduc_minmax<q>")))]
1548 (define_insn "neon_vpumin<mode>"
1549 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1550 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1551 (match_operand:VDI 2 "s_register_operand" "w")]
1554 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1555 [(set_attr "type" "neon_reduc_minmax<q>")]
1558 (define_insn "neon_vpumax<mode>"
1559 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1560 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1561 (match_operand:VDI 2 "s_register_operand" "w")]
1564 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1565 [(set_attr "type" "neon_reduc_minmax<q>")]
1568 ;; Saturating arithmetic
1570 ; NOTE: Neon supports many more saturating variants of instructions than the
1571 ; following, but these are all GCC currently understands.
1572 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1573 ; yet either, although these patterns may be used by intrinsics when they're
1576 (define_insn "*ss_add<mode>_neon"
1577 [(set (match_operand:VD 0 "s_register_operand" "=w")
1578 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1579 (match_operand:VD 2 "s_register_operand" "w")))]
1581 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1582 [(set_attr "type" "neon_qadd<q>")]
1585 (define_insn "*us_add<mode>_neon"
1586 [(set (match_operand:VD 0 "s_register_operand" "=w")
1587 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1588 (match_operand:VD 2 "s_register_operand" "w")))]
1590 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1591 [(set_attr "type" "neon_qadd<q>")]
1594 (define_insn "*ss_sub<mode>_neon"
1595 [(set (match_operand:VD 0 "s_register_operand" "=w")
1596 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1597 (match_operand:VD 2 "s_register_operand" "w")))]
1599 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1600 [(set_attr "type" "neon_qsub<q>")]
1603 (define_insn "*us_sub<mode>_neon"
1604 [(set (match_operand:VD 0 "s_register_operand" "=w")
1605 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1606 (match_operand:VD 2 "s_register_operand" "w")))]
1608 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1609 [(set_attr "type" "neon_qsub<q>")]
1612 ;; Conditional instructions. These are comparisons with conditional moves for
1613 ;; vectors. They perform the assignment:
1615 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1617 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1620 (define_expand "vcond<mode><mode>"
1621 [(set (match_operand:VDQW 0 "s_register_operand" "")
1623 (match_operator 3 "comparison_operator"
1624 [(match_operand:VDQW 4 "s_register_operand" "")
1625 (match_operand:VDQW 5 "nonmemory_operand" "")])
1626 (match_operand:VDQW 1 "s_register_operand" "")
1627 (match_operand:VDQW 2 "s_register_operand" "")))]
1628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1630 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1632 rtx magic_rtx = GEN_INT (magic_word);
1634 int use_zero_form = 0;
1635 int swap_bsl_operands = 0;
1636 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1637 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1639 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1640 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1642 switch (GET_CODE (operands[3]))
1649 if (operands[5] == CONST0_RTX (<MODE>mode))
1656 if (!REG_P (operands[5]))
1657 operands[5] = force_reg (<MODE>mode, operands[5]);
1660 switch (GET_CODE (operands[3]))
1670 base_comparison = gen_neon_vcge<mode>;
1671 complimentary_comparison = gen_neon_vcgt<mode>;
1679 base_comparison = gen_neon_vcgt<mode>;
1680 complimentary_comparison = gen_neon_vcge<mode>;
1685 base_comparison = gen_neon_vceq<mode>;
1686 complimentary_comparison = gen_neon_vceq<mode>;
1692 switch (GET_CODE (operands[3]))
1699 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1700 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1706 Note that there also exist direct comparison against 0 forms,
1707 so catch those as a special case. */
1711 switch (GET_CODE (operands[3]))
1714 base_comparison = gen_neon_vclt<mode>;
1717 base_comparison = gen_neon_vcle<mode>;
1720 /* Do nothing, other zero form cases already have the correct
1727 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1729 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1736 /* Vector compare returns false for lanes which are unordered, so if we use
1737 the inverse of the comparison we actually want to emit, then
1738 swap the operands to BSL, we will end up with the correct result.
1739 Note that a NE NaN and NaN NE b are true for all a, b.
1741 Our transformations are:
1746 a NE b -> !(a EQ b) */
1749 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1751 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1753 swap_bsl_operands = 1;
1756 /* We check (a > b || b > a). combining these comparisons give us
1757 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1758 will then give us (a == b || a UNORDERED b) as intended. */
1760 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1761 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1762 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1763 swap_bsl_operands = 1;
1766 /* Operands are ORDERED iff (a > b || b >= a).
1767 Swapping the operands to BSL will give the UNORDERED case. */
1768 swap_bsl_operands = 1;
1771 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1772 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1773 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1779 if (swap_bsl_operands)
1780 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1783 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1788 (define_expand "vcondu<mode><mode>"
1789 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1791 (match_operator 3 "arm_comparison_operator"
1792 [(match_operand:VDQIW 4 "s_register_operand" "")
1793 (match_operand:VDQIW 5 "s_register_operand" "")])
1794 (match_operand:VDQIW 1 "s_register_operand" "")
1795 (match_operand:VDQIW 2 "s_register_operand" "")))]
1799 int inverse = 0, immediate_zero = 0;
1801 mask = gen_reg_rtx (<V_cmp_result>mode);
1803 if (operands[5] == CONST0_RTX (<MODE>mode))
1805 else if (!REG_P (operands[5]))
1806 operands[5] = force_reg (<MODE>mode, operands[5]);
1808 switch (GET_CODE (operands[3]))
1811 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1816 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1821 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1827 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1830 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1836 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1839 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1844 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1854 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1857 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1863 ;; Patterns for builtins.
1865 ; good for plain vadd, vaddq.
1867 (define_expand "neon_vadd<mode>"
1868 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1869 (match_operand:VCVTF 1 "s_register_operand" "w")
1870 (match_operand:VCVTF 2 "s_register_operand" "w")
1871 (match_operand:SI 3 "immediate_operand" "i")]
1874 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1875 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1877 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1882 ; Note that NEON operations don't support the full IEEE 754 standard: in
1883 ; particular, denormal values are flushed to zero. This means that GCC cannot
1884 ; use those instructions for autovectorization, etc. unless
1885 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1886 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1887 ; header) must work in either case: if -funsafe-math-optimizations is given,
1888 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1889 ; expand to unspecs (which may potentially limit the extent to which they might
1890 ; be optimized by generic code).
1892 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1894 (define_insn "neon_vadd<mode>_unspec"
1895 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1896 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1897 (match_operand:VCVTF 2 "s_register_operand" "w")]
1900 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1902 (if_then_else (match_test "<Is_float_mode>")
1903 (const_string "neon_fp_addsub_s<q>")
1904 (const_string "neon_add<q>")))]
1907 ; operand 3 represents in bits:
1908 ; bit 0: signed (vs unsigned).
1909 ; bit 1: rounding (vs none).
1911 (define_insn "neon_vaddl<mode>"
1912 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1913 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1914 (match_operand:VDI 2 "s_register_operand" "w")
1915 (match_operand:SI 3 "immediate_operand" "i")]
1918 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1919 [(set_attr "type" "neon_add_long")]
1922 (define_insn "neon_vaddw<mode>"
1923 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1924 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1925 (match_operand:VDI 2 "s_register_operand" "w")
1926 (match_operand:SI 3 "immediate_operand" "i")]
1929 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1930 [(set_attr "type" "neon_add_widen")]
1935 (define_insn "neon_vhadd<mode>"
1936 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1937 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1938 (match_operand:VDQIW 2 "s_register_operand" "w")
1939 (match_operand:SI 3 "immediate_operand" "i")]
1942 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1943 [(set_attr "type" "neon_add_halve_q")]
1946 (define_insn "neon_vqadd<mode>"
1947 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1948 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1949 (match_operand:VDQIX 2 "s_register_operand" "w")
1950 (match_operand:SI 3 "immediate_operand" "i")]
1953 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1954 [(set_attr "type" "neon_qadd<q>")]
1957 (define_insn "neon_vaddhn<mode>"
1958 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1959 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1960 (match_operand:VN 2 "s_register_operand" "w")
1961 (match_operand:SI 3 "immediate_operand" "i")]
1964 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1965 [(set_attr "type" "neon_add_halve_narrow_q")]
1968 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1969 ;; polynomial multiplication case that can specified by operand 3.
1970 (define_insn "neon_vmul<mode>"
1971 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1972 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1973 (match_operand:VDQW 2 "s_register_operand" "w")
1974 (match_operand:SI 3 "immediate_operand" "i")]
1977 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1979 (if_then_else (match_test "<Is_float_mode>")
1980 (const_string "neon_fp_mul_s<q>")
1981 (const_string "neon_mul_<V_elem_ch><q>")))]
1984 (define_expand "neon_vmla<mode>"
1985 [(match_operand:VDQW 0 "s_register_operand" "=w")
1986 (match_operand:VDQW 1 "s_register_operand" "0")
1987 (match_operand:VDQW 2 "s_register_operand" "w")
1988 (match_operand:VDQW 3 "s_register_operand" "w")
1989 (match_operand:SI 4 "immediate_operand" "i")]
1992 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1993 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1994 operands[2], operands[3]));
1996 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1997 operands[2], operands[3]));
2001 (define_expand "neon_vfma<VCVTF:mode>"
2002 [(match_operand:VCVTF 0 "s_register_operand")
2003 (match_operand:VCVTF 1 "s_register_operand")
2004 (match_operand:VCVTF 2 "s_register_operand")
2005 (match_operand:VCVTF 3 "s_register_operand")
2006 (match_operand:SI 4 "immediate_operand")]
2007 "TARGET_NEON && TARGET_FMA"
2009 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2014 (define_expand "neon_vfms<VCVTF:mode>"
2015 [(match_operand:VCVTF 0 "s_register_operand")
2016 (match_operand:VCVTF 1 "s_register_operand")
2017 (match_operand:VCVTF 2 "s_register_operand")
2018 (match_operand:VCVTF 3 "s_register_operand")
2019 (match_operand:SI 4 "immediate_operand")]
2020 "TARGET_NEON && TARGET_FMA"
2022 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2027 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2029 (define_insn "neon_vmla<mode>_unspec"
2030 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2031 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2032 (match_operand:VDQW 2 "s_register_operand" "w")
2033 (match_operand:VDQW 3 "s_register_operand" "w")]
2036 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2038 (if_then_else (match_test "<Is_float_mode>")
2039 (const_string "neon_fp_mla_s<q>")
2040 (const_string "neon_mla_<V_elem_ch><q>")))]
2043 (define_insn "neon_vmlal<mode>"
2044 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2045 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2046 (match_operand:VW 2 "s_register_operand" "w")
2047 (match_operand:VW 3 "s_register_operand" "w")
2048 (match_operand:SI 4 "immediate_operand" "i")]
2051 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2052 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2055 (define_expand "neon_vmls<mode>"
2056 [(match_operand:VDQW 0 "s_register_operand" "=w")
2057 (match_operand:VDQW 1 "s_register_operand" "0")
2058 (match_operand:VDQW 2 "s_register_operand" "w")
2059 (match_operand:VDQW 3 "s_register_operand" "w")
2060 (match_operand:SI 4 "immediate_operand" "i")]
2063 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2064 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2065 operands[1], operands[2], operands[3]));
2067 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2068 operands[2], operands[3]));
2072 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2074 (define_insn "neon_vmls<mode>_unspec"
2075 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2076 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2077 (match_operand:VDQW 2 "s_register_operand" "w")
2078 (match_operand:VDQW 3 "s_register_operand" "w")]
2081 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2083 (if_then_else (match_test "<Is_float_mode>")
2084 (const_string "neon_fp_mla_s<q>")
2085 (const_string "neon_mla_<V_elem_ch><q>")))]
2088 (define_insn "neon_vmlsl<mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2091 (match_operand:VW 2 "s_register_operand" "w")
2092 (match_operand:VW 3 "s_register_operand" "w")
2093 (match_operand:SI 4 "immediate_operand" "i")]
2096 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2097 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2100 (define_insn "neon_vqdmulh<mode>"
2101 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2102 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2103 (match_operand:VMDQI 2 "s_register_operand" "w")
2104 (match_operand:SI 3 "immediate_operand" "i")]
2107 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2108 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2111 (define_insn "neon_vqdmlal<mode>"
2112 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2113 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2114 (match_operand:VMDI 2 "s_register_operand" "w")
2115 (match_operand:VMDI 3 "s_register_operand" "w")
2116 (match_operand:SI 4 "immediate_operand" "i")]
2119 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2120 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2123 (define_insn "neon_vqdmlsl<mode>"
2124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2125 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2126 (match_operand:VMDI 2 "s_register_operand" "w")
2127 (match_operand:VMDI 3 "s_register_operand" "w")
2128 (match_operand:SI 4 "immediate_operand" "i")]
2131 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2132 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2135 (define_insn "neon_vmull<mode>"
2136 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2137 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2138 (match_operand:VW 2 "s_register_operand" "w")
2139 (match_operand:SI 3 "immediate_operand" "i")]
2142 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2143 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2146 (define_insn "neon_vqdmull<mode>"
2147 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2148 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2149 (match_operand:VMDI 2 "s_register_operand" "w")
2150 (match_operand:SI 3 "immediate_operand" "i")]
2153 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2154 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2157 (define_expand "neon_vsub<mode>"
2158 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2159 (match_operand:VCVTF 1 "s_register_operand" "w")
2160 (match_operand:VCVTF 2 "s_register_operand" "w")
2161 (match_operand:SI 3 "immediate_operand" "i")]
2164 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2165 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2167 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2172 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2174 (define_insn "neon_vsub<mode>_unspec"
2175 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2176 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2177 (match_operand:VCVTF 2 "s_register_operand" "w")]
2180 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2182 (if_then_else (match_test "<Is_float_mode>")
2183 (const_string "neon_fp_addsub_s<q>")
2184 (const_string "neon_sub<q>")))]
2187 (define_insn "neon_vsubl<mode>"
2188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2189 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2190 (match_operand:VDI 2 "s_register_operand" "w")
2191 (match_operand:SI 3 "immediate_operand" "i")]
2194 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2195 [(set_attr "type" "neon_sub_long")]
2198 (define_insn "neon_vsubw<mode>"
2199 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2200 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2201 (match_operand:VDI 2 "s_register_operand" "w")
2202 (match_operand:SI 3 "immediate_operand" "i")]
2205 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2206 [(set_attr "type" "neon_sub_widen")]
2209 (define_insn "neon_vqsub<mode>"
2210 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2211 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2212 (match_operand:VDQIX 2 "s_register_operand" "w")
2213 (match_operand:SI 3 "immediate_operand" "i")]
2216 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2217 [(set_attr "type" "neon_qsub<q>")]
2220 (define_insn "neon_vhsub<mode>"
2221 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2222 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2223 (match_operand:VDQIW 2 "s_register_operand" "w")
2224 (match_operand:SI 3 "immediate_operand" "i")]
2227 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2228 [(set_attr "type" "neon_sub_halve<q>")]
2231 (define_insn "neon_vsubhn<mode>"
2232 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2233 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2234 (match_operand:VN 2 "s_register_operand" "w")
2235 (match_operand:SI 3 "immediate_operand" "i")]
2238 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2239 [(set_attr "type" "neon_sub_halve_narrow_q")]
2242 (define_insn "neon_vceq<mode>"
2243 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2244 (unspec:<V_cmp_result>
2245 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2246 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2247 (match_operand:SI 3 "immediate_operand" "i,i")]
2251 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2252 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2254 (if_then_else (match_test "<Is_float_mode>")
2255 (const_string "neon_fp_compare_s<q>")
2256 (if_then_else (match_operand 2 "zero_operand")
2257 (const_string "neon_compare_zero<q>")
2258 (const_string "neon_compare<q>"))))]
2261 (define_insn "neon_vcge<mode>"
2262 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2263 (unspec:<V_cmp_result>
2264 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2265 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2266 (match_operand:SI 3 "immediate_operand" "i,i")]
2270 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2271 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2273 (if_then_else (match_test "<Is_float_mode>")
2274 (const_string "neon_fp_compare_s<q>")
2275 (if_then_else (match_operand 2 "zero_operand")
2276 (const_string "neon_compare_zero<q>")
2277 (const_string "neon_compare<q>"))))]
2280 (define_insn "neon_vcgeu<mode>"
2281 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2282 (unspec:<V_cmp_result>
2283 [(match_operand:VDQIW 1 "s_register_operand" "w")
2284 (match_operand:VDQIW 2 "s_register_operand" "w")
2285 (match_operand:SI 3 "immediate_operand" "i")]
2288 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2289 [(set_attr "type" "neon_compare<q>")]
2292 (define_insn "neon_vcgt<mode>"
2293 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2294 (unspec:<V_cmp_result>
2295 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2296 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2297 (match_operand:SI 3 "immediate_operand" "i,i")]
2301 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2302 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2304 (if_then_else (match_test "<Is_float_mode>")
2305 (const_string "neon_fp_compare_s<q>")
2306 (if_then_else (match_operand 2 "zero_operand")
2307 (const_string "neon_compare_zero<q>")
2308 (const_string "neon_compare<q>"))))]
2311 (define_insn "neon_vcgtu<mode>"
2312 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2313 (unspec:<V_cmp_result>
2314 [(match_operand:VDQIW 1 "s_register_operand" "w")
2315 (match_operand:VDQIW 2 "s_register_operand" "w")
2316 (match_operand:SI 3 "immediate_operand" "i")]
2319 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2320 [(set_attr "type" "neon_compare<q>")]
2323 ;; VCLE and VCLT only support comparisons with immediate zero (register
2324 ;; variants are VCGE and VCGT with operands reversed).
2326 (define_insn "neon_vcle<mode>"
2327 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2328 (unspec:<V_cmp_result>
2329 [(match_operand:VDQW 1 "s_register_operand" "w")
2330 (match_operand:VDQW 2 "zero_operand" "Dz")
2331 (match_operand:SI 3 "immediate_operand" "i")]
2334 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2336 (if_then_else (match_test "<Is_float_mode>")
2337 (const_string "neon_fp_compare_s<q>")
2338 (if_then_else (match_operand 2 "zero_operand")
2339 (const_string "neon_compare_zero<q>")
2340 (const_string "neon_compare<q>"))))]
2343 (define_insn "neon_vclt<mode>"
2344 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2345 (unspec:<V_cmp_result>
2346 [(match_operand:VDQW 1 "s_register_operand" "w")
2347 (match_operand:VDQW 2 "zero_operand" "Dz")
2348 (match_operand:SI 3 "immediate_operand" "i")]
2351 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2353 (if_then_else (match_test "<Is_float_mode>")
2354 (const_string "neon_fp_compare_s<q>")
2355 (if_then_else (match_operand 2 "zero_operand")
2356 (const_string "neon_compare_zero<q>")
2357 (const_string "neon_compare<q>"))))]
2360 (define_insn "neon_vcage<mode>"
2361 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2362 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2363 (match_operand:VCVTF 2 "s_register_operand" "w")
2364 (match_operand:SI 3 "immediate_operand" "i")]
2367 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2368 [(set_attr "type" "neon_fp_compare_s<q>")]
2371 (define_insn "neon_vcagt<mode>"
2372 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2373 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2374 (match_operand:VCVTF 2 "s_register_operand" "w")
2375 (match_operand:SI 3 "immediate_operand" "i")]
2378 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_fp_compare_s<q>")]
2382 (define_insn "neon_vtst<mode>"
2383 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2384 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2385 (match_operand:VDQIW 2 "s_register_operand" "w")
2386 (match_operand:SI 3 "immediate_operand" "i")]
2389 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2390 [(set_attr "type" "neon_tst<q>")]
2393 (define_insn "neon_vabd<mode>"
2394 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2395 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2396 (match_operand:VDQW 2 "s_register_operand" "w")
2397 (match_operand:SI 3 "immediate_operand" "i")]
2400 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2402 (if_then_else (match_test "<Is_float_mode>")
2403 (const_string "neon_fp_abd_s<q>")
2404 (const_string "neon_abd<q>")))]
2407 (define_insn "neon_vabdl<mode>"
2408 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2409 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2410 (match_operand:VW 2 "s_register_operand" "w")
2411 (match_operand:SI 3 "immediate_operand" "i")]
2414 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2415 [(set_attr "type" "neon_abd_long")]
2418 (define_insn "neon_vaba<mode>"
2419 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2420 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2421 (match_operand:VDQIW 3 "s_register_operand" "w")
2422 (match_operand:SI 4 "immediate_operand" "i")]
2424 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2426 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2427 [(set_attr "type" "neon_arith_acc<q>")]
2430 (define_insn "neon_vabal<mode>"
2431 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2432 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2433 (match_operand:VW 3 "s_register_operand" "w")
2434 (match_operand:SI 4 "immediate_operand" "i")]
2436 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2438 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2439 [(set_attr "type" "neon_arith_acc<q>")]
2442 (define_insn "neon_vmax<mode>"
2443 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2444 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2445 (match_operand:VDQW 2 "s_register_operand" "w")
2446 (match_operand:SI 3 "immediate_operand" "i")]
2449 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2451 (if_then_else (match_test "<Is_float_mode>")
2452 (const_string "neon_fp_minmax_s<q>")
2453 (const_string "neon_minmax<q>")))]
2456 (define_insn "neon_vmin<mode>"
2457 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2458 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2459 (match_operand:VDQW 2 "s_register_operand" "w")
2460 (match_operand:SI 3 "immediate_operand" "i")]
2463 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2465 (if_then_else (match_test "<Is_float_mode>")
2466 (const_string "neon_fp_minmax_s<q>")
2467 (const_string "neon_minmax<q>")))]
2470 (define_expand "neon_vpadd<mode>"
2471 [(match_operand:VD 0 "s_register_operand" "=w")
2472 (match_operand:VD 1 "s_register_operand" "w")
2473 (match_operand:VD 2 "s_register_operand" "w")
2474 (match_operand:SI 3 "immediate_operand" "i")]
2477 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2482 (define_insn "neon_vpaddl<mode>"
2483 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2484 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2485 (match_operand:SI 2 "immediate_operand" "i")]
2488 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2489 [(set_attr "type" "neon_reduc_add_long")]
2492 (define_insn "neon_vpadal<mode>"
2493 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2494 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2495 (match_operand:VDQIW 2 "s_register_operand" "w")
2496 (match_operand:SI 3 "immediate_operand" "i")]
2499 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2500 [(set_attr "type" "neon_reduc_add_acc")]
2503 (define_insn "neon_vpmax<mode>"
2504 [(set (match_operand:VD 0 "s_register_operand" "=w")
2505 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2506 (match_operand:VD 2 "s_register_operand" "w")
2507 (match_operand:SI 3 "immediate_operand" "i")]
2510 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2512 (if_then_else (match_test "<Is_float_mode>")
2513 (const_string "neon_fp_reduc_minmax_s<q>")
2514 (const_string "neon_reduc_minmax<q>")))]
2517 (define_insn "neon_vpmin<mode>"
2518 [(set (match_operand:VD 0 "s_register_operand" "=w")
2519 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2520 (match_operand:VD 2 "s_register_operand" "w")
2521 (match_operand:SI 3 "immediate_operand" "i")]
2524 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2526 (if_then_else (match_test "<Is_float_mode>")
2527 (const_string "neon_fp_reduc_minmax_s<q>")
2528 (const_string "neon_reduc_minmax<q>")))]
2531 (define_insn "neon_vrecps<mode>"
2532 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2533 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2534 (match_operand:VCVTF 2 "s_register_operand" "w")
2535 (match_operand:SI 3 "immediate_operand" "i")]
2538 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2539 [(set_attr "type" "neon_fp_recps_s<q>")]
2542 (define_insn "neon_vrsqrts<mode>"
2543 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2544 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2545 (match_operand:VCVTF 2 "s_register_operand" "w")
2546 (match_operand:SI 3 "immediate_operand" "i")]
2549 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2550 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2553 (define_expand "neon_vabs<mode>"
2554 [(match_operand:VDQW 0 "s_register_operand" "")
2555 (match_operand:VDQW 1 "s_register_operand" "")
2556 (match_operand:SI 2 "immediate_operand" "")]
2559 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2563 (define_insn "neon_vqabs<mode>"
2564 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2565 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2566 (match_operand:SI 2 "immediate_operand" "i")]
2569 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2570 [(set_attr "type" "neon_qabs<q>")]
2573 (define_insn "neon_bswap<mode>"
2574 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2575 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2577 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2578 [(set_attr "type" "neon_rev<q>")]
2581 (define_expand "neon_vneg<mode>"
2582 [(match_operand:VDQW 0 "s_register_operand" "")
2583 (match_operand:VDQW 1 "s_register_operand" "")
2584 (match_operand:SI 2 "immediate_operand" "")]
2587 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2591 (define_expand "neon_copysignf<mode>"
2592 [(match_operand:VCVTF 0 "register_operand")
2593 (match_operand:VCVTF 1 "register_operand")
2594 (match_operand:VCVTF 2 "register_operand")]
2598 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2599 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2600 rtvec v = rtvec_alloc (n_elt);
2602 /* Create bitmask for vector select. */
2603 for (i = 0; i < n_elt; ++i)
2604 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2606 emit_move_insn (v_bitmask,
2607 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2608 emit_move_insn (operands[0], operands[2]);
2609 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2610 <VCVTF:V_cmp_result>mode, 0);
2611 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2618 (define_insn "neon_vqneg<mode>"
2619 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2620 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2621 (match_operand:SI 2 "immediate_operand" "i")]
2624 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2625 [(set_attr "type" "neon_qneg<q>")]
2628 (define_insn "neon_vcls<mode>"
2629 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2630 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2631 (match_operand:SI 2 "immediate_operand" "i")]
2634 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2635 [(set_attr "type" "neon_cls<q>")]
2638 (define_insn "clz<mode>2"
2639 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2640 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2642 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2643 [(set_attr "type" "neon_cnt<q>")]
2646 (define_expand "neon_vclz<mode>"
2647 [(match_operand:VDQIW 0 "s_register_operand" "")
2648 (match_operand:VDQIW 1 "s_register_operand" "")
2649 (match_operand:SI 2 "immediate_operand" "")]
2652 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2656 (define_insn "popcount<mode>2"
2657 [(set (match_operand:VE 0 "s_register_operand" "=w")
2658 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2660 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2661 [(set_attr "type" "neon_cnt<q>")]
2664 (define_expand "neon_vcnt<mode>"
2665 [(match_operand:VE 0 "s_register_operand" "=w")
2666 (match_operand:VE 1 "s_register_operand" "w")
2667 (match_operand:SI 2 "immediate_operand" "i")]
2670 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2674 (define_insn "neon_vrecpe<mode>"
2675 [(set (match_operand:V32 0 "s_register_operand" "=w")
2676 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2677 (match_operand:SI 2 "immediate_operand" "i")]
2680 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2681 [(set_attr "type" "neon_fp_recpe_s<q>")]
2684 (define_insn "neon_vrsqrte<mode>"
2685 [(set (match_operand:V32 0 "s_register_operand" "=w")
2686 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2687 (match_operand:SI 2 "immediate_operand" "i")]
2690 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2691 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2694 (define_expand "neon_vmvn<mode>"
2695 [(match_operand:VDQIW 0 "s_register_operand" "")
2696 (match_operand:VDQIW 1 "s_register_operand" "")
2697 (match_operand:SI 2 "immediate_operand" "")]
2700 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2704 (define_insn "neon_vget_lane<mode>_sext_internal"
2705 [(set (match_operand:SI 0 "s_register_operand" "=r")
2707 (vec_select:<V_elem>
2708 (match_operand:VD 1 "s_register_operand" "w")
2709 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2712 if (BYTES_BIG_ENDIAN)
2714 int elt = INTVAL (operands[2]);
2715 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2716 operands[2] = GEN_INT (elt);
2718 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2720 [(set_attr "type" "neon_to_gp")]
2723 (define_insn "neon_vget_lane<mode>_zext_internal"
2724 [(set (match_operand:SI 0 "s_register_operand" "=r")
2726 (vec_select:<V_elem>
2727 (match_operand:VD 1 "s_register_operand" "w")
2728 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2731 if (BYTES_BIG_ENDIAN)
2733 int elt = INTVAL (operands[2]);
2734 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2735 operands[2] = GEN_INT (elt);
2737 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2739 [(set_attr "type" "neon_to_gp")]
2742 (define_insn "neon_vget_lane<mode>_sext_internal"
2743 [(set (match_operand:SI 0 "s_register_operand" "=r")
2745 (vec_select:<V_elem>
2746 (match_operand:VQ 1 "s_register_operand" "w")
2747 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2751 int regno = REGNO (operands[1]);
2752 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2753 unsigned int elt = INTVAL (operands[2]);
2754 unsigned int elt_adj = elt % halfelts;
2756 if (BYTES_BIG_ENDIAN)
2757 elt_adj = halfelts - 1 - elt_adj;
2759 ops[0] = operands[0];
2760 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2761 ops[2] = GEN_INT (elt_adj);
2762 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2766 [(set_attr "type" "neon_to_gp_q")]
2769 (define_insn "neon_vget_lane<mode>_zext_internal"
2770 [(set (match_operand:SI 0 "s_register_operand" "=r")
2772 (vec_select:<V_elem>
2773 (match_operand:VQ 1 "s_register_operand" "w")
2774 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2778 int regno = REGNO (operands[1]);
2779 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2780 unsigned int elt = INTVAL (operands[2]);
2781 unsigned int elt_adj = elt % halfelts;
2783 if (BYTES_BIG_ENDIAN)
2784 elt_adj = halfelts - 1 - elt_adj;
2786 ops[0] = operands[0];
2787 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2788 ops[2] = GEN_INT (elt_adj);
2789 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2793 [(set_attr "type" "neon_to_gp_q")]
2796 (define_expand "neon_vget_lane<mode>"
2797 [(match_operand:<V_ext> 0 "s_register_operand" "")
2798 (match_operand:VDQW 1 "s_register_operand" "")
2799 (match_operand:SI 2 "immediate_operand" "")
2800 (match_operand:SI 3 "immediate_operand" "")]
2803 HOST_WIDE_INT magic = INTVAL (operands[3]);
2806 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2808 if (BYTES_BIG_ENDIAN)
2810 /* The intrinsics are defined in terms of a model where the
2811 element ordering in memory is vldm order, whereas the generic
2812 RTL is defined in terms of a model where the element ordering
2813 in memory is array order. Convert the lane number to conform
2815 unsigned int elt = INTVAL (operands[2]);
2816 unsigned int reg_nelts
2817 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2818 elt ^= reg_nelts - 1;
2819 operands[2] = GEN_INT (elt);
2822 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2823 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2826 if ((magic & 1) != 0)
2827 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2830 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2837 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2840 (define_expand "neon_vget_lanedi"
2841 [(match_operand:DI 0 "s_register_operand" "=r")
2842 (match_operand:DI 1 "s_register_operand" "w")
2843 (match_operand:SI 2 "immediate_operand" "i")
2844 (match_operand:SI 3 "immediate_operand" "i")]
2847 neon_lane_bounds (operands[2], 0, 1);
2848 emit_move_insn (operands[0], operands[1]);
2852 (define_expand "neon_vget_lanev2di"
2853 [(match_operand:DI 0 "s_register_operand" "")
2854 (match_operand:V2DI 1 "s_register_operand" "")
2855 (match_operand:SI 2 "immediate_operand" "")
2856 (match_operand:SI 3 "immediate_operand" "")]
2859 switch (INTVAL (operands[2]))
2862 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2865 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2868 neon_lane_bounds (operands[2], 0, 1);
2874 (define_expand "neon_vset_lane<mode>"
2875 [(match_operand:VDQ 0 "s_register_operand" "=w")
2876 (match_operand:<V_elem> 1 "s_register_operand" "r")
2877 (match_operand:VDQ 2 "s_register_operand" "0")
2878 (match_operand:SI 3 "immediate_operand" "i")]
2881 unsigned int elt = INTVAL (operands[3]);
2882 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2884 if (BYTES_BIG_ENDIAN)
2886 unsigned int reg_nelts
2887 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2888 elt ^= reg_nelts - 1;
2891 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2892 GEN_INT (1 << elt), operands[2]));
2896 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2898 (define_expand "neon_vset_lanedi"
2899 [(match_operand:DI 0 "s_register_operand" "=w")
2900 (match_operand:DI 1 "s_register_operand" "r")
2901 (match_operand:DI 2 "s_register_operand" "0")
2902 (match_operand:SI 3 "immediate_operand" "i")]
2905 neon_lane_bounds (operands[3], 0, 1);
2906 emit_move_insn (operands[0], operands[1]);
2910 (define_expand "neon_vcreate<mode>"
2911 [(match_operand:VDX 0 "s_register_operand" "")
2912 (match_operand:DI 1 "general_operand" "")]
2915 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2916 emit_move_insn (operands[0], src);
2920 (define_insn "neon_vdup_n<mode>"
2921 [(set (match_operand:VX 0 "s_register_operand" "=w")
2922 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2924 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2925 [(set_attr "type" "neon_from_gp<q>")]
2928 (define_insn "neon_vdup_n<mode>"
2929 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2930 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2933 vdup.<V_sz_elem>\t%<V_reg>0, %1
2934 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2935 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2938 (define_expand "neon_vdup_ndi"
2939 [(match_operand:DI 0 "s_register_operand" "=w")
2940 (match_operand:DI 1 "s_register_operand" "r")]
2943 emit_move_insn (operands[0], operands[1]);
2948 (define_insn "neon_vdup_nv2di"
2949 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2950 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2953 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2954 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2955 [(set_attr "length" "8")
2956 (set_attr "type" "multiple")]
2959 (define_insn "neon_vdup_lane<mode>_internal"
2960 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2962 (vec_select:<V_elem>
2963 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2964 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2967 if (BYTES_BIG_ENDIAN)
2969 int elt = INTVAL (operands[2]);
2970 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2971 operands[2] = GEN_INT (elt);
2974 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2976 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2978 [(set_attr "type" "neon_dup<q>")]
2981 (define_expand "neon_vdup_lane<mode>"
2982 [(match_operand:VDQW 0 "s_register_operand" "=w")
2983 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2984 (match_operand:SI 2 "immediate_operand" "i")]
2987 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2988 if (BYTES_BIG_ENDIAN)
2990 unsigned int elt = INTVAL (operands[2]);
2991 unsigned int reg_nelts
2992 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2993 elt ^= reg_nelts - 1;
2994 operands[2] = GEN_INT (elt);
2996 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3001 ; Scalar index is ignored, since only zero is valid here.
3002 (define_expand "neon_vdup_lanedi"
3003 [(match_operand:DI 0 "s_register_operand" "=w")
3004 (match_operand:DI 1 "s_register_operand" "w")
3005 (match_operand:SI 2 "immediate_operand" "i")]
3008 neon_lane_bounds (operands[2], 0, 1);
3009 emit_move_insn (operands[0], operands[1]);
3013 ; Likewise for v2di, as the DImode second operand has only a single element.
3014 (define_expand "neon_vdup_lanev2di"
3015 [(match_operand:V2DI 0 "s_register_operand" "=w")
3016 (match_operand:DI 1 "s_register_operand" "w")
3017 (match_operand:SI 2 "immediate_operand" "i")]
3020 neon_lane_bounds (operands[2], 0, 1);
3021 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3025 ; Disabled before reload because we don't want combine doing something silly,
3026 ; but used by the post-reload expansion of neon_vcombine.
3027 (define_insn "*neon_vswp<mode>"
3028 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3029 (match_operand:VDQX 1 "s_register_operand" "+w"))
3030 (set (match_dup 1) (match_dup 0))]
3031 "TARGET_NEON && reload_completed"
3032 "vswp\t%<V_reg>0, %<V_reg>1"
3033 [(set_attr "type" "neon_permute<q>")]
3036 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3038 ;; FIXME: A different implementation of this builtin could make it much
3039 ;; more likely that we wouldn't actually need to output anything (we could make
3040 ;; it so that the reg allocator puts things in the right places magically
3041 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3043 (define_insn_and_split "neon_vcombine<mode>"
3044 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3045 (vec_concat:<V_DOUBLE>
3046 (match_operand:VDX 1 "s_register_operand" "w")
3047 (match_operand:VDX 2 "s_register_operand" "w")))]
3050 "&& reload_completed"
3053 neon_split_vcombine (operands);
3056 [(set_attr "type" "multiple")]
3059 (define_expand "neon_vget_high<mode>"
3060 [(match_operand:<V_HALF> 0 "s_register_operand")
3061 (match_operand:VQX 1 "s_register_operand")]
3064 emit_move_insn (operands[0],
3065 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3066 GET_MODE_SIZE (<V_HALF>mode)));
3070 (define_expand "neon_vget_low<mode>"
3071 [(match_operand:<V_HALF> 0 "s_register_operand")
3072 (match_operand:VQX 1 "s_register_operand")]
3075 emit_move_insn (operands[0],
3076 simplify_gen_subreg (<V_HALF>mode, operands[1],
3081 (define_insn "float<mode><V_cvtto>2"
3082 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3083 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3084 "TARGET_NEON && !flag_rounding_math"
3085 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3086 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3089 (define_insn "floatuns<mode><V_cvtto>2"
3090 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3091 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3092 "TARGET_NEON && !flag_rounding_math"
3093 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3094 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3097 (define_insn "fix_trunc<mode><V_cvtto>2"
3098 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3099 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3101 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3102 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3105 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3106 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3107 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3109 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3110 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3113 (define_insn "neon_vcvt<mode>"
3114 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3115 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3116 (match_operand:SI 2 "immediate_operand" "i")]
3119 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3120 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3123 (define_insn "neon_vcvt<mode>"
3124 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3125 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3126 (match_operand:SI 2 "immediate_operand" "i")]
3129 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3130 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3133 (define_insn "neon_vcvtv4sfv4hf"
3134 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3135 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3137 "TARGET_NEON && TARGET_FP16"
3138 "vcvt.f32.f16\t%q0, %P1"
3139 [(set_attr "type" "neon_fp_cvt_widen_h")]
3142 (define_insn "neon_vcvtv4hfv4sf"
3143 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3144 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3146 "TARGET_NEON && TARGET_FP16"
3147 "vcvt.f16.f32\t%P0, %q1"
3148 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3151 (define_insn "neon_vcvt_n<mode>"
3152 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3153 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3154 (match_operand:SI 2 "immediate_operand" "i")
3155 (match_operand:SI 3 "immediate_operand" "i")]
3159 neon_const_bounds (operands[2], 1, 33);
3160 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3162 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3165 (define_insn "neon_vcvt_n<mode>"
3166 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3167 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3168 (match_operand:SI 2 "immediate_operand" "i")
3169 (match_operand:SI 3 "immediate_operand" "i")]
3173 neon_const_bounds (operands[2], 1, 33);
3174 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3176 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3179 (define_insn "neon_vmovn<mode>"
3180 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3181 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3182 (match_operand:SI 2 "immediate_operand" "i")]
3185 "vmovn.<V_if_elem>\t%P0, %q1"
3186 [(set_attr "type" "neon_shift_imm_narrow_q")]
3189 (define_insn "neon_vqmovn<mode>"
3190 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3191 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3192 (match_operand:SI 2 "immediate_operand" "i")]
3195 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3196 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3199 (define_insn "neon_vqmovun<mode>"
3200 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3201 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3202 (match_operand:SI 2 "immediate_operand" "i")]
3205 "vqmovun.<V_s_elem>\t%P0, %q1"
3206 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3209 (define_insn "neon_vmovl<mode>"
3210 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3211 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3212 (match_operand:SI 2 "immediate_operand" "i")]
3215 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3216 [(set_attr "type" "neon_shift_imm_long")]
3219 (define_insn "neon_vmul_lane<mode>"
3220 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3221 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3222 (match_operand:VMD 2 "s_register_operand"
3223 "<scalar_mul_constraint>")
3224 (match_operand:SI 3 "immediate_operand" "i")
3225 (match_operand:SI 4 "immediate_operand" "i")]
3229 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3230 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3233 (if_then_else (match_test "<Is_float_mode>")
3234 (const_string "neon_fp_mul_s_scalar<q>")
3235 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3238 (define_insn "neon_vmul_lane<mode>"
3239 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3240 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3241 (match_operand:<V_HALF> 2 "s_register_operand"
3242 "<scalar_mul_constraint>")
3243 (match_operand:SI 3 "immediate_operand" "i")
3244 (match_operand:SI 4 "immediate_operand" "i")]
3248 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3249 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3252 (if_then_else (match_test "<Is_float_mode>")
3253 (const_string "neon_fp_mul_s_scalar<q>")
3254 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3257 (define_insn "neon_vmull_lane<mode>"
3258 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3259 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3260 (match_operand:VMDI 2 "s_register_operand"
3261 "<scalar_mul_constraint>")
3262 (match_operand:SI 3 "immediate_operand" "i")
3263 (match_operand:SI 4 "immediate_operand" "i")]
3264 UNSPEC_VMULL_LANE))]
3267 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3268 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3270 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3273 (define_insn "neon_vqdmull_lane<mode>"
3274 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3275 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3276 (match_operand:VMDI 2 "s_register_operand"
3277 "<scalar_mul_constraint>")
3278 (match_operand:SI 3 "immediate_operand" "i")
3279 (match_operand:SI 4 "immediate_operand" "i")]
3280 UNSPEC_VQDMULL_LANE))]
3283 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3284 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3286 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3289 (define_insn "neon_vqdmulh_lane<mode>"
3290 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3291 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3292 (match_operand:<V_HALF> 2 "s_register_operand"
3293 "<scalar_mul_constraint>")
3294 (match_operand:SI 3 "immediate_operand" "i")
3295 (match_operand:SI 4 "immediate_operand" "i")]
3296 UNSPEC_VQDMULH_LANE))]
3299 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3300 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3302 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3305 (define_insn "neon_vqdmulh_lane<mode>"
3306 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3307 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3308 (match_operand:VMDI 2 "s_register_operand"
3309 "<scalar_mul_constraint>")
3310 (match_operand:SI 3 "immediate_operand" "i")
3311 (match_operand:SI 4 "immediate_operand" "i")]
3312 UNSPEC_VQDMULH_LANE))]
3315 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3316 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3318 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3321 (define_insn "neon_vmla_lane<mode>"
3322 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3323 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3324 (match_operand:VMD 2 "s_register_operand" "w")
3325 (match_operand:VMD 3 "s_register_operand"
3326 "<scalar_mul_constraint>")
3327 (match_operand:SI 4 "immediate_operand" "i")
3328 (match_operand:SI 5 "immediate_operand" "i")]
3332 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3333 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3336 (if_then_else (match_test "<Is_float_mode>")
3337 (const_string "neon_fp_mla_s_scalar<q>")
3338 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3341 (define_insn "neon_vmla_lane<mode>"
3342 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3343 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3344 (match_operand:VMQ 2 "s_register_operand" "w")
3345 (match_operand:<V_HALF> 3 "s_register_operand"
3346 "<scalar_mul_constraint>")
3347 (match_operand:SI 4 "immediate_operand" "i")
3348 (match_operand:SI 5 "immediate_operand" "i")]
3352 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3353 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3356 (if_then_else (match_test "<Is_float_mode>")
3357 (const_string "neon_fp_mla_s_scalar<q>")
3358 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3361 (define_insn "neon_vmlal_lane<mode>"
3362 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3363 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3364 (match_operand:VMDI 2 "s_register_operand" "w")
3365 (match_operand:VMDI 3 "s_register_operand"
3366 "<scalar_mul_constraint>")
3367 (match_operand:SI 4 "immediate_operand" "i")
3368 (match_operand:SI 5 "immediate_operand" "i")]
3369 UNSPEC_VMLAL_LANE))]
3372 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3373 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3375 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3378 (define_insn "neon_vqdmlal_lane<mode>"
3379 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3380 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3381 (match_operand:VMDI 2 "s_register_operand" "w")
3382 (match_operand:VMDI 3 "s_register_operand"
3383 "<scalar_mul_constraint>")
3384 (match_operand:SI 4 "immediate_operand" "i")
3385 (match_operand:SI 5 "immediate_operand" "i")]
3386 UNSPEC_VQDMLAL_LANE))]
3389 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3390 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3392 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3395 (define_insn "neon_vmls_lane<mode>"
3396 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3397 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3398 (match_operand:VMD 2 "s_register_operand" "w")
3399 (match_operand:VMD 3 "s_register_operand"
3400 "<scalar_mul_constraint>")
3401 (match_operand:SI 4 "immediate_operand" "i")
3402 (match_operand:SI 5 "immediate_operand" "i")]
3406 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3407 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3410 (if_then_else (match_test "<Is_float_mode>")
3411 (const_string "neon_fp_mla_s_scalar<q>")
3412 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3415 (define_insn "neon_vmls_lane<mode>"
3416 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3417 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3418 (match_operand:VMQ 2 "s_register_operand" "w")
3419 (match_operand:<V_HALF> 3 "s_register_operand"
3420 "<scalar_mul_constraint>")
3421 (match_operand:SI 4 "immediate_operand" "i")
3422 (match_operand:SI 5 "immediate_operand" "i")]
3426 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3427 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3430 (if_then_else (match_test "<Is_float_mode>")
3431 (const_string "neon_fp_mla_s_scalar<q>")
3432 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3435 (define_insn "neon_vmlsl_lane<mode>"
3436 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3437 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3438 (match_operand:VMDI 2 "s_register_operand" "w")
3439 (match_operand:VMDI 3 "s_register_operand"
3440 "<scalar_mul_constraint>")
3441 (match_operand:SI 4 "immediate_operand" "i")
3442 (match_operand:SI 5 "immediate_operand" "i")]
3443 UNSPEC_VMLSL_LANE))]
3446 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3447 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3449 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3452 (define_insn "neon_vqdmlsl_lane<mode>"
3453 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3454 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3455 (match_operand:VMDI 2 "s_register_operand" "w")
3456 (match_operand:VMDI 3 "s_register_operand"
3457 "<scalar_mul_constraint>")
3458 (match_operand:SI 4 "immediate_operand" "i")
3459 (match_operand:SI 5 "immediate_operand" "i")]
3460 UNSPEC_VQDMLSL_LANE))]
3463 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3464 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3466 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3469 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3470 ; core register into a temp register, then use a scalar taken from that. This
3471 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3472 ; or extracted from another vector. The latter case it's currently better to
3473 ; use the "_lane" variant, and the former case can probably be implemented
3474 ; using vld1_lane, but that hasn't been done yet.
3476 (define_expand "neon_vmul_n<mode>"
3477 [(match_operand:VMD 0 "s_register_operand" "")
3478 (match_operand:VMD 1 "s_register_operand" "")
3479 (match_operand:<V_elem> 2 "s_register_operand" "")
3480 (match_operand:SI 3 "immediate_operand" "")]
3483 rtx tmp = gen_reg_rtx (<MODE>mode);
3484 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3485 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3486 const0_rtx, const0_rtx));
3490 (define_expand "neon_vmul_n<mode>"
3491 [(match_operand:VMQ 0 "s_register_operand" "")
3492 (match_operand:VMQ 1 "s_register_operand" "")
3493 (match_operand:<V_elem> 2 "s_register_operand" "")
3494 (match_operand:SI 3 "immediate_operand" "")]
3497 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3498 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3499 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3500 const0_rtx, const0_rtx));
3504 (define_expand "neon_vmull_n<mode>"
3505 [(match_operand:<V_widen> 0 "s_register_operand" "")
3506 (match_operand:VMDI 1 "s_register_operand" "")
3507 (match_operand:<V_elem> 2 "s_register_operand" "")
3508 (match_operand:SI 3 "immediate_operand" "")]
3511 rtx tmp = gen_reg_rtx (<MODE>mode);
3512 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3513 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3514 const0_rtx, operands[3]));
3518 (define_expand "neon_vqdmull_n<mode>"
3519 [(match_operand:<V_widen> 0 "s_register_operand" "")
3520 (match_operand:VMDI 1 "s_register_operand" "")
3521 (match_operand:<V_elem> 2 "s_register_operand" "")
3522 (match_operand:SI 3 "immediate_operand" "")]
3525 rtx tmp = gen_reg_rtx (<MODE>mode);
3526 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3527 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3528 const0_rtx, const0_rtx));
3532 (define_expand "neon_vqdmulh_n<mode>"
3533 [(match_operand:VMDI 0 "s_register_operand" "")
3534 (match_operand:VMDI 1 "s_register_operand" "")
3535 (match_operand:<V_elem> 2 "s_register_operand" "")
3536 (match_operand:SI 3 "immediate_operand" "")]
3539 rtx tmp = gen_reg_rtx (<MODE>mode);
3540 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3541 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3542 const0_rtx, operands[3]));
3546 (define_expand "neon_vqdmulh_n<mode>"
3547 [(match_operand:VMQI 0 "s_register_operand" "")
3548 (match_operand:VMQI 1 "s_register_operand" "")
3549 (match_operand:<V_elem> 2 "s_register_operand" "")
3550 (match_operand:SI 3 "immediate_operand" "")]
3553 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3554 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3555 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3556 const0_rtx, operands[3]));
3560 (define_expand "neon_vmla_n<mode>"
3561 [(match_operand:VMD 0 "s_register_operand" "")
3562 (match_operand:VMD 1 "s_register_operand" "")
3563 (match_operand:VMD 2 "s_register_operand" "")
3564 (match_operand:<V_elem> 3 "s_register_operand" "")
3565 (match_operand:SI 4 "immediate_operand" "")]
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3571 tmp, const0_rtx, operands[4]));
3575 (define_expand "neon_vmla_n<mode>"
3576 [(match_operand:VMQ 0 "s_register_operand" "")
3577 (match_operand:VMQ 1 "s_register_operand" "")
3578 (match_operand:VMQ 2 "s_register_operand" "")
3579 (match_operand:<V_elem> 3 "s_register_operand" "")
3580 (match_operand:SI 4 "immediate_operand" "")]
3583 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3584 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3585 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3586 tmp, const0_rtx, operands[4]));
3590 (define_expand "neon_vmlal_n<mode>"
3591 [(match_operand:<V_widen> 0 "s_register_operand" "")
3592 (match_operand:<V_widen> 1 "s_register_operand" "")
3593 (match_operand:VMDI 2 "s_register_operand" "")
3594 (match_operand:<V_elem> 3 "s_register_operand" "")
3595 (match_operand:SI 4 "immediate_operand" "")]
3598 rtx tmp = gen_reg_rtx (<MODE>mode);
3599 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3600 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3601 tmp, const0_rtx, operands[4]));
3605 (define_expand "neon_vqdmlal_n<mode>"
3606 [(match_operand:<V_widen> 0 "s_register_operand" "")
3607 (match_operand:<V_widen> 1 "s_register_operand" "")
3608 (match_operand:VMDI 2 "s_register_operand" "")
3609 (match_operand:<V_elem> 3 "s_register_operand" "")
3610 (match_operand:SI 4 "immediate_operand" "")]
3613 rtx tmp = gen_reg_rtx (<MODE>mode);
3614 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3615 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3616 tmp, const0_rtx, operands[4]));
3620 (define_expand "neon_vmls_n<mode>"
3621 [(match_operand:VMD 0 "s_register_operand" "")
3622 (match_operand:VMD 1 "s_register_operand" "")
3623 (match_operand:VMD 2 "s_register_operand" "")
3624 (match_operand:<V_elem> 3 "s_register_operand" "")
3625 (match_operand:SI 4 "immediate_operand" "")]
3628 rtx tmp = gen_reg_rtx (<MODE>mode);
3629 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3630 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3631 tmp, const0_rtx, operands[4]));
3635 (define_expand "neon_vmls_n<mode>"
3636 [(match_operand:VMQ 0 "s_register_operand" "")
3637 (match_operand:VMQ 1 "s_register_operand" "")
3638 (match_operand:VMQ 2 "s_register_operand" "")
3639 (match_operand:<V_elem> 3 "s_register_operand" "")
3640 (match_operand:SI 4 "immediate_operand" "")]
3643 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3644 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3645 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3646 tmp, const0_rtx, operands[4]));
3650 (define_expand "neon_vmlsl_n<mode>"
3651 [(match_operand:<V_widen> 0 "s_register_operand" "")
3652 (match_operand:<V_widen> 1 "s_register_operand" "")
3653 (match_operand:VMDI 2 "s_register_operand" "")
3654 (match_operand:<V_elem> 3 "s_register_operand" "")
3655 (match_operand:SI 4 "immediate_operand" "")]
3658 rtx tmp = gen_reg_rtx (<MODE>mode);
3659 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3660 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3661 tmp, const0_rtx, operands[4]));
3665 (define_expand "neon_vqdmlsl_n<mode>"
3666 [(match_operand:<V_widen> 0 "s_register_operand" "")
3667 (match_operand:<V_widen> 1 "s_register_operand" "")
3668 (match_operand:VMDI 2 "s_register_operand" "")
3669 (match_operand:<V_elem> 3 "s_register_operand" "")
3670 (match_operand:SI 4 "immediate_operand" "")]
3673 rtx tmp = gen_reg_rtx (<MODE>mode);
3674 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3675 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3676 tmp, const0_rtx, operands[4]));
3680 (define_insn "neon_vext<mode>"
3681 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3682 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3683 (match_operand:VDQX 2 "s_register_operand" "w")
3684 (match_operand:SI 3 "immediate_operand" "i")]
3688 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3689 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3691 [(set_attr "type" "neon_ext<q>")]
3694 (define_insn "neon_vrev64<mode>"
3695 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3696 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3697 (match_operand:SI 2 "immediate_operand" "i")]
3700 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3701 [(set_attr "type" "neon_rev<q>")]
3704 (define_insn "neon_vrev32<mode>"
3705 [(set (match_operand:VX 0 "s_register_operand" "=w")
3706 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3707 (match_operand:SI 2 "immediate_operand" "i")]
3710 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3711 [(set_attr "type" "neon_rev<q>")]
3714 (define_insn "neon_vrev16<mode>"
3715 [(set (match_operand:VE 0 "s_register_operand" "=w")
3716 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3717 (match_operand:SI 2 "immediate_operand" "i")]
3720 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3721 [(set_attr "type" "neon_rev<q>")]
3724 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3725 ; allocation. For an intrinsic of form:
3726 ; rD = vbsl_* (rS, rN, rM)
3727 ; We can use any of:
3728 ; vbsl rS, rN, rM (if D = S)
3729 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3730 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3732 (define_insn "neon_vbsl<mode>_internal"
3733 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3734 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3735 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3736 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3740 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3741 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3742 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3743 [(set_attr "type" "neon_bsl<q>")]
3746 (define_expand "neon_vbsl<mode>"
3747 [(set (match_operand:VDQX 0 "s_register_operand" "")
3748 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3749 (match_operand:VDQX 2 "s_register_operand" "")
3750 (match_operand:VDQX 3 "s_register_operand" "")]
3754 /* We can't alias operands together if they have different modes. */
3755 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3758 (define_insn "neon_vshl<mode>"
3759 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3760 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3761 (match_operand:VDQIX 2 "s_register_operand" "w")
3762 (match_operand:SI 3 "immediate_operand" "i")]
3765 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3766 [(set_attr "type" "neon_shift_imm<q>")]
3769 (define_insn "neon_vqshl<mode>"
3770 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3771 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3772 (match_operand:VDQIX 2 "s_register_operand" "w")
3773 (match_operand:SI 3 "immediate_operand" "i")]
3776 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3777 [(set_attr "type" "neon_sat_shift_imm<q>")]
3780 (define_insn "neon_vshr_n<mode>"
3781 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3782 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3783 (match_operand:SI 2 "immediate_operand" "i")
3784 (match_operand:SI 3 "immediate_operand" "i")]
3788 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3789 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3791 [(set_attr "type" "neon_shift_imm<q>")]
3794 (define_insn "neon_vshrn_n<mode>"
3795 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3796 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3797 (match_operand:SI 2 "immediate_operand" "i")
3798 (match_operand:SI 3 "immediate_operand" "i")]
3802 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3803 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3805 [(set_attr "type" "neon_shift_imm_narrow_q")]
3808 (define_insn "neon_vqshrn_n<mode>"
3809 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3810 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3811 (match_operand:SI 2 "immediate_operand" "i")
3812 (match_operand:SI 3 "immediate_operand" "i")]
3816 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3817 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3819 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3822 (define_insn "neon_vqshrun_n<mode>"
3823 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3824 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3825 (match_operand:SI 2 "immediate_operand" "i")
3826 (match_operand:SI 3 "immediate_operand" "i")]
3830 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3831 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3833 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3836 (define_insn "neon_vshl_n<mode>"
3837 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3838 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3839 (match_operand:SI 2 "immediate_operand" "i")
3840 (match_operand:SI 3 "immediate_operand" "i")]
3844 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3845 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3847 [(set_attr "type" "neon_shift_imm<q>")]
3850 (define_insn "neon_vqshl_n<mode>"
3851 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3852 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3853 (match_operand:SI 2 "immediate_operand" "i")
3854 (match_operand:SI 3 "immediate_operand" "i")]
3858 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3859 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3861 [(set_attr "type" "neon_sat_shift_imm<q>")]
3864 (define_insn "neon_vqshlu_n<mode>"
3865 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3866 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3867 (match_operand:SI 2 "immediate_operand" "i")
3868 (match_operand:SI 3 "immediate_operand" "i")]
3872 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3873 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3875 [(set_attr "type" "neon_sat_shift_imm<q>")]
3878 (define_insn "neon_vshll_n<mode>"
3879 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3880 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3881 (match_operand:SI 2 "immediate_operand" "i")
3882 (match_operand:SI 3 "immediate_operand" "i")]
3886 /* The boundaries are: 0 < imm <= size. */
3887 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3888 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3890 [(set_attr "type" "neon_shift_imm_long")]
3893 (define_insn "neon_vsra_n<mode>"
3894 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3895 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3896 (match_operand:VDQIX 2 "s_register_operand" "w")
3897 (match_operand:SI 3 "immediate_operand" "i")
3898 (match_operand:SI 4 "immediate_operand" "i")]
3902 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3903 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3905 [(set_attr "type" "neon_shift_acc<q>")]
3908 (define_insn "neon_vsri_n<mode>"
3909 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3910 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3911 (match_operand:VDQIX 2 "s_register_operand" "w")
3912 (match_operand:SI 3 "immediate_operand" "i")]
3916 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3917 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3919 [(set_attr "type" "neon_shift_reg<q>")]
3922 (define_insn "neon_vsli_n<mode>"
3923 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3924 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3925 (match_operand:VDQIX 2 "s_register_operand" "w")
3926 (match_operand:SI 3 "immediate_operand" "i")]
3930 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3931 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3933 [(set_attr "type" "neon_shift_reg<q>")]
3936 (define_insn "neon_vtbl1v8qi"
3937 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3938 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3939 (match_operand:V8QI 2 "s_register_operand" "w")]
3942 "vtbl.8\t%P0, {%P1}, %P2"
3943 [(set_attr "type" "neon_tbl1")]
3946 (define_insn "neon_vtbl2v8qi"
3947 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3948 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3949 (match_operand:V8QI 2 "s_register_operand" "w")]
3954 int tabbase = REGNO (operands[1]);
3956 ops[0] = operands[0];
3957 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3958 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3959 ops[3] = operands[2];
3960 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3964 [(set_attr "type" "neon_tbl2")]
3967 (define_insn "neon_vtbl3v8qi"
3968 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3969 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3970 (match_operand:V8QI 2 "s_register_operand" "w")]
3975 int tabbase = REGNO (operands[1]);
3977 ops[0] = operands[0];
3978 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3979 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3980 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3981 ops[4] = operands[2];
3982 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3986 [(set_attr "type" "neon_tbl3")]
3989 (define_insn "neon_vtbl4v8qi"
3990 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3991 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3992 (match_operand:V8QI 2 "s_register_operand" "w")]
3997 int tabbase = REGNO (operands[1]);
3999 ops[0] = operands[0];
4000 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4001 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4002 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4003 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4004 ops[5] = operands[2];
4005 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4009 [(set_attr "type" "neon_tbl4")]
4012 ;; These three are used by the vec_perm infrastructure for V16QImode.
4013 (define_insn_and_split "neon_vtbl1v16qi"
4014 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4015 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4016 (match_operand:V16QI 2 "s_register_operand" "w")]
4020 "&& reload_completed"
4023 rtx op0, op1, op2, part0, part2;
4027 op1 = gen_lowpart (TImode, operands[1]);
4030 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4031 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4032 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4033 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4035 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4036 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4037 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4038 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4041 [(set_attr "type" "multiple")]
4044 (define_insn_and_split "neon_vtbl2v16qi"
4045 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4046 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4047 (match_operand:V16QI 2 "s_register_operand" "w")]
4051 "&& reload_completed"
4054 rtx op0, op1, op2, part0, part2;
4061 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4062 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4063 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4064 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4066 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4067 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4068 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4069 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4072 [(set_attr "type" "multiple")]
4075 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4076 ;; handle quad-word input modes, producing octa-word output modes. But
4077 ;; that requires us to add support for octa-word vector modes in moves.
4078 ;; That seems overkill for this one use in vec_perm.
4079 (define_insn_and_split "neon_vcombinev16qi"
4080 [(set (match_operand:OI 0 "s_register_operand" "=w")
4081 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4082 (match_operand:V16QI 2 "s_register_operand" "w")]
4086 "&& reload_completed"
4089 neon_split_vcombine (operands);
4092 [(set_attr "type" "multiple")]
4095 (define_insn "neon_vtbx1v8qi"
4096 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4097 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4098 (match_operand:V8QI 2 "s_register_operand" "w")
4099 (match_operand:V8QI 3 "s_register_operand" "w")]
4102 "vtbx.8\t%P0, {%P2}, %P3"
4103 [(set_attr "type" "neon_tbl1")]
4106 (define_insn "neon_vtbx2v8qi"
4107 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4108 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4109 (match_operand:TI 2 "s_register_operand" "w")
4110 (match_operand:V8QI 3 "s_register_operand" "w")]
4115 int tabbase = REGNO (operands[2]);
4117 ops[0] = operands[0];
4118 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4119 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4120 ops[3] = operands[3];
4121 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4125 [(set_attr "type" "neon_tbl2")]
4128 (define_insn "neon_vtbx3v8qi"
4129 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4130 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4131 (match_operand:EI 2 "s_register_operand" "w")
4132 (match_operand:V8QI 3 "s_register_operand" "w")]
4137 int tabbase = REGNO (operands[2]);
4139 ops[0] = operands[0];
4140 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4141 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4142 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4143 ops[4] = operands[3];
4144 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4148 [(set_attr "type" "neon_tbl3")]
4151 (define_insn "neon_vtbx4v8qi"
4152 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4153 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4154 (match_operand:OI 2 "s_register_operand" "w")
4155 (match_operand:V8QI 3 "s_register_operand" "w")]
4160 int tabbase = REGNO (operands[2]);
4162 ops[0] = operands[0];
4163 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4164 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4165 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4166 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4167 ops[5] = operands[3];
4168 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4172 [(set_attr "type" "neon_tbl4")]
4175 (define_expand "neon_vtrn<mode>_internal"
4177 [(set (match_operand:VDQW 0 "s_register_operand" "")
4178 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4179 (match_operand:VDQW 2 "s_register_operand" "")]
4181 (set (match_operand:VDQW 3 "s_register_operand" "")
4182 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4187 ;; Note: Different operand numbering to handle tied registers correctly.
4188 (define_insn "*neon_vtrn<mode>_insn"
4189 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4190 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4191 (match_operand:VDQW 3 "s_register_operand" "2")]
4193 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4194 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4197 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4198 [(set_attr "type" "neon_permute<q>")]
4201 (define_expand "neon_vzip<mode>_internal"
4203 [(set (match_operand:VDQW 0 "s_register_operand" "")
4204 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4205 (match_operand:VDQW 2 "s_register_operand" "")]
4207 (set (match_operand:VDQW 3 "s_register_operand" "")
4208 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4213 ;; Note: Different operand numbering to handle tied registers correctly.
4214 (define_insn "*neon_vzip<mode>_insn"
4215 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4216 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4217 (match_operand:VDQW 3 "s_register_operand" "2")]
4219 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4220 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4223 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4224 [(set_attr "type" "neon_zip<q>")]
4227 (define_expand "neon_vuzp<mode>_internal"
4229 [(set (match_operand:VDQW 0 "s_register_operand" "")
4230 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4231 (match_operand:VDQW 2 "s_register_operand" "")]
4233 (set (match_operand:VDQW 3 "s_register_operand" "")
4234 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4239 ;; Note: Different operand numbering to handle tied registers correctly.
4240 (define_insn "*neon_vuzp<mode>_insn"
4241 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4242 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4243 (match_operand:VDQW 3 "s_register_operand" "2")]
4245 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4246 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4249 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4250 [(set_attr "type" "neon_zip<q>")]
4253 (define_expand "neon_vreinterpretv8qi<mode>"
4254 [(match_operand:V8QI 0 "s_register_operand" "")
4255 (match_operand:VDX 1 "s_register_operand" "")]
4258 neon_reinterpret (operands[0], operands[1]);
4262 (define_expand "neon_vreinterpretv4hi<mode>"
4263 [(match_operand:V4HI 0 "s_register_operand" "")
4264 (match_operand:VDX 1 "s_register_operand" "")]
4267 neon_reinterpret (operands[0], operands[1]);
4271 (define_expand "neon_vreinterpretv2si<mode>"
4272 [(match_operand:V2SI 0 "s_register_operand" "")
4273 (match_operand:VDX 1 "s_register_operand" "")]
4276 neon_reinterpret (operands[0], operands[1]);
4280 (define_expand "neon_vreinterpretv2sf<mode>"
4281 [(match_operand:V2SF 0 "s_register_operand" "")
4282 (match_operand:VDX 1 "s_register_operand" "")]
4285 neon_reinterpret (operands[0], operands[1]);
4289 (define_expand "neon_vreinterpretdi<mode>"
4290 [(match_operand:DI 0 "s_register_operand" "")
4291 (match_operand:VDX 1 "s_register_operand" "")]
4294 neon_reinterpret (operands[0], operands[1]);
4298 (define_expand "neon_vreinterpretti<mode>"
4299 [(match_operand:TI 0 "s_register_operand" "")
4300 (match_operand:VQXMOV 1 "s_register_operand" "")]
4303 neon_reinterpret (operands[0], operands[1]);
4308 (define_expand "neon_vreinterpretv16qi<mode>"
4309 [(match_operand:V16QI 0 "s_register_operand" "")
4310 (match_operand:VQXMOV 1 "s_register_operand" "")]
4313 neon_reinterpret (operands[0], operands[1]);
4317 (define_expand "neon_vreinterpretv8hi<mode>"
4318 [(match_operand:V8HI 0 "s_register_operand" "")
4319 (match_operand:VQXMOV 1 "s_register_operand" "")]
4322 neon_reinterpret (operands[0], operands[1]);
4326 (define_expand "neon_vreinterpretv4si<mode>"
4327 [(match_operand:V4SI 0 "s_register_operand" "")
4328 (match_operand:VQXMOV 1 "s_register_operand" "")]
4331 neon_reinterpret (operands[0], operands[1]);
4335 (define_expand "neon_vreinterpretv4sf<mode>"
4336 [(match_operand:V4SF 0 "s_register_operand" "")
4337 (match_operand:VQXMOV 1 "s_register_operand" "")]
4340 neon_reinterpret (operands[0], operands[1]);
4344 (define_expand "neon_vreinterpretv2di<mode>"
4345 [(match_operand:V2DI 0 "s_register_operand" "")
4346 (match_operand:VQXMOV 1 "s_register_operand" "")]
4349 neon_reinterpret (operands[0], operands[1]);
4353 (define_expand "vec_load_lanes<mode><mode>"
4354 [(set (match_operand:VDQX 0 "s_register_operand")
4355 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4359 (define_insn "neon_vld1<mode>"
4360 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4361 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4364 "vld1.<V_sz_elem>\t%h0, %A1"
4365 [(set_attr "type" "neon_load1_1reg<q>")]
4368 (define_insn "neon_vld1_lane<mode>"
4369 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4370 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4371 (match_operand:VDX 2 "s_register_operand" "0")
4372 (match_operand:SI 3 "immediate_operand" "i")]
4376 HOST_WIDE_INT lane = INTVAL (operands[3]);
4377 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4378 if (lane < 0 || lane >= max)
4379 error ("lane out of range");
4381 return "vld1.<V_sz_elem>\t%P0, %A1";
4383 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4385 [(set_attr "type" "neon_load1_one_lane<q>")]
4388 (define_insn "neon_vld1_lane<mode>"
4389 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4390 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4391 (match_operand:VQX 2 "s_register_operand" "0")
4392 (match_operand:SI 3 "immediate_operand" "i")]
4396 HOST_WIDE_INT lane = INTVAL (operands[3]);
4397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4398 int regno = REGNO (operands[0]);
4399 if (lane < 0 || lane >= max)
4400 error ("lane out of range");
4401 else if (lane >= max / 2)
4405 operands[3] = GEN_INT (lane);
4407 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4409 return "vld1.<V_sz_elem>\t%P0, %A1";
4411 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4413 [(set_attr "type" "neon_load1_one_lane<q>")]
4416 (define_insn "neon_vld1_dup<mode>"
4417 [(set (match_operand:VD 0 "s_register_operand" "=w")
4418 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4420 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4421 [(set_attr "type" "neon_load1_all_lanes<q>")]
4424 ;; Special case for DImode. Treat it exactly like a simple load.
4425 (define_expand "neon_vld1_dupdi"
4426 [(set (match_operand:DI 0 "s_register_operand" "")
4427 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4433 (define_insn "neon_vld1_dup<mode>"
4434 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4435 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4438 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4440 [(set_attr "type" "neon_load1_all_lanes<q>")]
4443 (define_insn_and_split "neon_vld1_dupv2di"
4444 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4445 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4448 "&& reload_completed"
4451 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4452 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4453 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4456 [(set_attr "length" "8")
4457 (set_attr "type" "neon_load1_all_lanes_q")]
4460 (define_expand "vec_store_lanes<mode><mode>"
4461 [(set (match_operand:VDQX 0 "neon_struct_operand")
4462 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4466 (define_insn "neon_vst1<mode>"
4467 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4468 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4471 "vst1.<V_sz_elem>\t%h1, %A0"
4472 [(set_attr "type" "neon_store1_1reg<q>")])
4474 (define_insn "neon_vst1_lane<mode>"
4475 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4477 [(match_operand:VDX 1 "s_register_operand" "w")
4478 (match_operand:SI 2 "immediate_operand" "i")]
4482 HOST_WIDE_INT lane = INTVAL (operands[2]);
4483 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4484 if (lane < 0 || lane >= max)
4485 error ("lane out of range");
4487 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4489 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4491 [(set_attr "type" "neon_store1_one_lane<q>")]
4494 (define_insn "neon_vst1_lane<mode>"
4495 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4497 [(match_operand:VQX 1 "s_register_operand" "w")
4498 (match_operand:SI 2 "immediate_operand" "i")]
4502 HOST_WIDE_INT lane = INTVAL (operands[2]);
4503 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4504 int regno = REGNO (operands[1]);
4505 if (lane < 0 || lane >= max)
4506 error ("lane out of range");
4507 else if (lane >= max / 2)
4511 operands[2] = GEN_INT (lane);
4513 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4515 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4517 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4519 [(set_attr "type" "neon_store1_one_lane<q>")]
4522 (define_expand "vec_load_lanesti<mode>"
4523 [(set (match_operand:TI 0 "s_register_operand")
4524 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4525 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4529 (define_insn "neon_vld2<mode>"
4530 [(set (match_operand:TI 0 "s_register_operand" "=w")
4531 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4532 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4536 if (<V_sz_elem> == 64)
4537 return "vld1.64\t%h0, %A1";
4539 return "vld2.<V_sz_elem>\t%h0, %A1";
4542 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4543 (const_string "neon_load1_2reg<q>")
4544 (const_string "neon_load2_2reg<q>")))]
4547 (define_expand "vec_load_lanesoi<mode>"
4548 [(set (match_operand:OI 0 "s_register_operand")
4549 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4550 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4554 (define_insn "neon_vld2<mode>"
4555 [(set (match_operand:OI 0 "s_register_operand" "=w")
4556 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4557 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4560 "vld2.<V_sz_elem>\t%h0, %A1"
4561 [(set_attr "type" "neon_load2_2reg_q")])
4563 (define_insn "neon_vld2_lane<mode>"
4564 [(set (match_operand:TI 0 "s_register_operand" "=w")
4565 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4566 (match_operand:TI 2 "s_register_operand" "0")
4567 (match_operand:SI 3 "immediate_operand" "i")
4568 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 HOST_WIDE_INT lane = INTVAL (operands[3]);
4573 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4574 int regno = REGNO (operands[0]);
4576 if (lane < 0 || lane >= max)
4577 error ("lane out of range");
4578 ops[0] = gen_rtx_REG (DImode, regno);
4579 ops[1] = gen_rtx_REG (DImode, regno + 2);
4580 ops[2] = operands[1];
4581 ops[3] = operands[3];
4582 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4585 [(set_attr "type" "neon_load2_one_lane<q>")]
4588 (define_insn "neon_vld2_lane<mode>"
4589 [(set (match_operand:OI 0 "s_register_operand" "=w")
4590 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4591 (match_operand:OI 2 "s_register_operand" "0")
4592 (match_operand:SI 3 "immediate_operand" "i")
4593 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4597 HOST_WIDE_INT lane = INTVAL (operands[3]);
4598 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4599 int regno = REGNO (operands[0]);
4601 if (lane < 0 || lane >= max)
4602 error ("lane out of range");
4603 else if (lane >= max / 2)
4608 ops[0] = gen_rtx_REG (DImode, regno);
4609 ops[1] = gen_rtx_REG (DImode, regno + 4);
4610 ops[2] = operands[1];
4611 ops[3] = GEN_INT (lane);
4612 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4615 [(set_attr "type" "neon_load2_one_lane<q>")]
4618 (define_insn "neon_vld2_dup<mode>"
4619 [(set (match_operand:TI 0 "s_register_operand" "=w")
4620 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4621 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4625 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4626 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4628 return "vld1.<V_sz_elem>\t%h0, %A1";
4631 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4632 (const_string "neon_load2_all_lanes<q>")
4633 (const_string "neon_load1_1reg<q>")))]
4636 (define_expand "vec_store_lanesti<mode>"
4637 [(set (match_operand:TI 0 "neon_struct_operand")
4638 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4639 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4643 (define_insn "neon_vst2<mode>"
4644 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4645 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4646 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4650 if (<V_sz_elem> == 64)
4651 return "vst1.64\t%h1, %A0";
4653 return "vst2.<V_sz_elem>\t%h1, %A0";
4656 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4657 (const_string "neon_store1_2reg<q>")
4658 (const_string "neon_store2_one_lane<q>")))]
4661 (define_expand "vec_store_lanesoi<mode>"
4662 [(set (match_operand:OI 0 "neon_struct_operand")
4663 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4664 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4668 (define_insn "neon_vst2<mode>"
4669 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4670 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4671 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4674 "vst2.<V_sz_elem>\t%h1, %A0"
4675 [(set_attr "type" "neon_store2_4reg<q>")]
4678 (define_insn "neon_vst2_lane<mode>"
4679 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4680 (unspec:<V_two_elem>
4681 [(match_operand:TI 1 "s_register_operand" "w")
4682 (match_operand:SI 2 "immediate_operand" "i")
4683 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4687 HOST_WIDE_INT lane = INTVAL (operands[2]);
4688 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4689 int regno = REGNO (operands[1]);
4691 if (lane < 0 || lane >= max)
4692 error ("lane out of range");
4693 ops[0] = operands[0];
4694 ops[1] = gen_rtx_REG (DImode, regno);
4695 ops[2] = gen_rtx_REG (DImode, regno + 2);
4696 ops[3] = operands[2];
4697 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4700 [(set_attr "type" "neon_store2_one_lane<q>")]
4703 (define_insn "neon_vst2_lane<mode>"
4704 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4705 (unspec:<V_two_elem>
4706 [(match_operand:OI 1 "s_register_operand" "w")
4707 (match_operand:SI 2 "immediate_operand" "i")
4708 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4712 HOST_WIDE_INT lane = INTVAL (operands[2]);
4713 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4714 int regno = REGNO (operands[1]);
4716 if (lane < 0 || lane >= max)
4717 error ("lane out of range");
4718 else if (lane >= max / 2)
4723 ops[0] = operands[0];
4724 ops[1] = gen_rtx_REG (DImode, regno);
4725 ops[2] = gen_rtx_REG (DImode, regno + 4);
4726 ops[3] = GEN_INT (lane);
4727 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4730 [(set_attr "type" "neon_store2_one_lane<q>")]
4733 (define_expand "vec_load_lanesei<mode>"
4734 [(set (match_operand:EI 0 "s_register_operand")
4735 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4736 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4740 (define_insn "neon_vld3<mode>"
4741 [(set (match_operand:EI 0 "s_register_operand" "=w")
4742 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4743 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747 if (<V_sz_elem> == 64)
4748 return "vld1.64\t%h0, %A1";
4750 return "vld3.<V_sz_elem>\t%h0, %A1";
4753 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4754 (const_string "neon_load1_3reg<q>")
4755 (const_string "neon_load3_3reg<q>")))]
4758 (define_expand "vec_load_lanesci<mode>"
4759 [(match_operand:CI 0 "s_register_operand")
4760 (match_operand:CI 1 "neon_struct_operand")
4761 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4764 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4768 (define_expand "neon_vld3<mode>"
4769 [(match_operand:CI 0 "s_register_operand")
4770 (match_operand:CI 1 "neon_struct_operand")
4771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4776 mem = adjust_address (operands[1], EImode, 0);
4777 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4778 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4779 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4783 (define_insn "neon_vld3qa<mode>"
4784 [(set (match_operand:CI 0 "s_register_operand" "=w")
4785 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4786 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4790 int regno = REGNO (operands[0]);
4792 ops[0] = gen_rtx_REG (DImode, regno);
4793 ops[1] = gen_rtx_REG (DImode, regno + 4);
4794 ops[2] = gen_rtx_REG (DImode, regno + 8);
4795 ops[3] = operands[1];
4796 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4799 [(set_attr "type" "neon_load3_3reg<q>")]
4802 (define_insn "neon_vld3qb<mode>"
4803 [(set (match_operand:CI 0 "s_register_operand" "=w")
4804 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4805 (match_operand:CI 2 "s_register_operand" "0")
4806 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810 int regno = REGNO (operands[0]);
4812 ops[0] = gen_rtx_REG (DImode, regno + 2);
4813 ops[1] = gen_rtx_REG (DImode, regno + 6);
4814 ops[2] = gen_rtx_REG (DImode, regno + 10);
4815 ops[3] = operands[1];
4816 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4819 [(set_attr "type" "neon_load3_3reg<q>")]
4822 (define_insn "neon_vld3_lane<mode>"
4823 [(set (match_operand:EI 0 "s_register_operand" "=w")
4824 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4825 (match_operand:EI 2 "s_register_operand" "0")
4826 (match_operand:SI 3 "immediate_operand" "i")
4827 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4831 HOST_WIDE_INT lane = INTVAL (operands[3]);
4832 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4833 int regno = REGNO (operands[0]);
4835 if (lane < 0 || lane >= max)
4836 error ("lane out of range");
4837 ops[0] = gen_rtx_REG (DImode, regno);
4838 ops[1] = gen_rtx_REG (DImode, regno + 2);
4839 ops[2] = gen_rtx_REG (DImode, regno + 4);
4840 ops[3] = operands[1];
4841 ops[4] = operands[3];
4842 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4846 [(set_attr "type" "neon_load3_one_lane<q>")]
4849 (define_insn "neon_vld3_lane<mode>"
4850 [(set (match_operand:CI 0 "s_register_operand" "=w")
4851 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4852 (match_operand:CI 2 "s_register_operand" "0")
4853 (match_operand:SI 3 "immediate_operand" "i")
4854 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4858 HOST_WIDE_INT lane = INTVAL (operands[3]);
4859 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4860 int regno = REGNO (operands[0]);
4862 if (lane < 0 || lane >= max)
4863 error ("lane out of range");
4864 else if (lane >= max / 2)
4869 ops[0] = gen_rtx_REG (DImode, regno);
4870 ops[1] = gen_rtx_REG (DImode, regno + 4);
4871 ops[2] = gen_rtx_REG (DImode, regno + 8);
4872 ops[3] = operands[1];
4873 ops[4] = GEN_INT (lane);
4874 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4878 [(set_attr "type" "neon_load3_one_lane<q>")]
4881 (define_insn "neon_vld3_dup<mode>"
4882 [(set (match_operand:EI 0 "s_register_operand" "=w")
4883 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4884 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4888 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4890 int regno = REGNO (operands[0]);
4892 ops[0] = gen_rtx_REG (DImode, regno);
4893 ops[1] = gen_rtx_REG (DImode, regno + 2);
4894 ops[2] = gen_rtx_REG (DImode, regno + 4);
4895 ops[3] = operands[1];
4896 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4900 return "vld1.<V_sz_elem>\t%h0, %A1";
4903 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4904 (const_string "neon_load3_all_lanes<q>")
4905 (const_string "neon_load1_1reg<q>")))])
4907 (define_expand "vec_store_lanesei<mode>"
4908 [(set (match_operand:EI 0 "neon_struct_operand")
4909 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4910 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4914 (define_insn "neon_vst3<mode>"
4915 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4916 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4917 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4921 if (<V_sz_elem> == 64)
4922 return "vst1.64\t%h1, %A0";
4924 return "vst3.<V_sz_elem>\t%h1, %A0";
4927 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4928 (const_string "neon_store1_3reg<q>")
4929 (const_string "neon_store3_one_lane<q>")))])
4931 (define_expand "vec_store_lanesci<mode>"
4932 [(match_operand:CI 0 "neon_struct_operand")
4933 (match_operand:CI 1 "s_register_operand")
4934 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4937 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4941 (define_expand "neon_vst3<mode>"
4942 [(match_operand:CI 0 "neon_struct_operand")
4943 (match_operand:CI 1 "s_register_operand")
4944 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4949 mem = adjust_address (operands[0], EImode, 0);
4950 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4951 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4952 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4956 (define_insn "neon_vst3qa<mode>"
4957 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4958 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4959 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4963 int regno = REGNO (operands[1]);
4965 ops[0] = operands[0];
4966 ops[1] = gen_rtx_REG (DImode, regno);
4967 ops[2] = gen_rtx_REG (DImode, regno + 4);
4968 ops[3] = gen_rtx_REG (DImode, regno + 8);
4969 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4972 [(set_attr "type" "neon_store3_3reg<q>")]
4975 (define_insn "neon_vst3qb<mode>"
4976 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4977 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4978 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4982 int regno = REGNO (operands[1]);
4984 ops[0] = operands[0];
4985 ops[1] = gen_rtx_REG (DImode, regno + 2);
4986 ops[2] = gen_rtx_REG (DImode, regno + 6);
4987 ops[3] = gen_rtx_REG (DImode, regno + 10);
4988 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4991 [(set_attr "type" "neon_store3_3reg<q>")]
4994 (define_insn "neon_vst3_lane<mode>"
4995 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4996 (unspec:<V_three_elem>
4997 [(match_operand:EI 1 "s_register_operand" "w")
4998 (match_operand:SI 2 "immediate_operand" "i")
4999 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5003 HOST_WIDE_INT lane = INTVAL (operands[2]);
5004 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5005 int regno = REGNO (operands[1]);
5007 if (lane < 0 || lane >= max)
5008 error ("lane out of range");
5009 ops[0] = operands[0];
5010 ops[1] = gen_rtx_REG (DImode, regno);
5011 ops[2] = gen_rtx_REG (DImode, regno + 2);
5012 ops[3] = gen_rtx_REG (DImode, regno + 4);
5013 ops[4] = operands[2];
5014 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5018 [(set_attr "type" "neon_store3_one_lane<q>")]
5021 (define_insn "neon_vst3_lane<mode>"
5022 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5023 (unspec:<V_three_elem>
5024 [(match_operand:CI 1 "s_register_operand" "w")
5025 (match_operand:SI 2 "immediate_operand" "i")
5026 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030 HOST_WIDE_INT lane = INTVAL (operands[2]);
5031 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5032 int regno = REGNO (operands[1]);
5034 if (lane < 0 || lane >= max)
5035 error ("lane out of range");
5036 else if (lane >= max / 2)
5041 ops[0] = operands[0];
5042 ops[1] = gen_rtx_REG (DImode, regno);
5043 ops[2] = gen_rtx_REG (DImode, regno + 4);
5044 ops[3] = gen_rtx_REG (DImode, regno + 8);
5045 ops[4] = GEN_INT (lane);
5046 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5050 [(set_attr "type" "neon_store3_one_lane<q>")]
5053 (define_expand "vec_load_lanesoi<mode>"
5054 [(set (match_operand:OI 0 "s_register_operand")
5055 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5056 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5060 (define_insn "neon_vld4<mode>"
5061 [(set (match_operand:OI 0 "s_register_operand" "=w")
5062 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5063 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067 if (<V_sz_elem> == 64)
5068 return "vld1.64\t%h0, %A1";
5070 return "vld4.<V_sz_elem>\t%h0, %A1";
5073 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5074 (const_string "neon_load1_4reg<q>")
5075 (const_string "neon_load4_4reg<q>")))]
5078 (define_expand "vec_load_lanesxi<mode>"
5079 [(match_operand:XI 0 "s_register_operand")
5080 (match_operand:XI 1 "neon_struct_operand")
5081 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5084 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5088 (define_expand "neon_vld4<mode>"
5089 [(match_operand:XI 0 "s_register_operand")
5090 (match_operand:XI 1 "neon_struct_operand")
5091 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5096 mem = adjust_address (operands[1], OImode, 0);
5097 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5098 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5099 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5103 (define_insn "neon_vld4qa<mode>"
5104 [(set (match_operand:XI 0 "s_register_operand" "=w")
5105 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5106 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5110 int regno = REGNO (operands[0]);
5112 ops[0] = gen_rtx_REG (DImode, regno);
5113 ops[1] = gen_rtx_REG (DImode, regno + 4);
5114 ops[2] = gen_rtx_REG (DImode, regno + 8);
5115 ops[3] = gen_rtx_REG (DImode, regno + 12);
5116 ops[4] = operands[1];
5117 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5120 [(set_attr "type" "neon_load4_4reg<q>")]
5123 (define_insn "neon_vld4qb<mode>"
5124 [(set (match_operand:XI 0 "s_register_operand" "=w")
5125 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5126 (match_operand:XI 2 "s_register_operand" "0")
5127 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5131 int regno = REGNO (operands[0]);
5133 ops[0] = gen_rtx_REG (DImode, regno + 2);
5134 ops[1] = gen_rtx_REG (DImode, regno + 6);
5135 ops[2] = gen_rtx_REG (DImode, regno + 10);
5136 ops[3] = gen_rtx_REG (DImode, regno + 14);
5137 ops[4] = operands[1];
5138 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5141 [(set_attr "type" "neon_load4_4reg<q>")]
5144 (define_insn "neon_vld4_lane<mode>"
5145 [(set (match_operand:OI 0 "s_register_operand" "=w")
5146 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5147 (match_operand:OI 2 "s_register_operand" "0")
5148 (match_operand:SI 3 "immediate_operand" "i")
5149 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5153 HOST_WIDE_INT lane = INTVAL (operands[3]);
5154 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5155 int regno = REGNO (operands[0]);
5157 if (lane < 0 || lane >= max)
5158 error ("lane out of range");
5159 ops[0] = gen_rtx_REG (DImode, regno);
5160 ops[1] = gen_rtx_REG (DImode, regno + 2);
5161 ops[2] = gen_rtx_REG (DImode, regno + 4);
5162 ops[3] = gen_rtx_REG (DImode, regno + 6);
5163 ops[4] = operands[1];
5164 ops[5] = operands[3];
5165 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5169 [(set_attr "type" "neon_load4_one_lane<q>")]
5172 (define_insn "neon_vld4_lane<mode>"
5173 [(set (match_operand:XI 0 "s_register_operand" "=w")
5174 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5175 (match_operand:XI 2 "s_register_operand" "0")
5176 (match_operand:SI 3 "immediate_operand" "i")
5177 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5181 HOST_WIDE_INT lane = INTVAL (operands[3]);
5182 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5183 int regno = REGNO (operands[0]);
5185 if (lane < 0 || lane >= max)
5186 error ("lane out of range");
5187 else if (lane >= max / 2)
5192 ops[0] = gen_rtx_REG (DImode, regno);
5193 ops[1] = gen_rtx_REG (DImode, regno + 4);
5194 ops[2] = gen_rtx_REG (DImode, regno + 8);
5195 ops[3] = gen_rtx_REG (DImode, regno + 12);
5196 ops[4] = operands[1];
5197 ops[5] = GEN_INT (lane);
5198 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5202 [(set_attr "type" "neon_load4_one_lane<q>")]
5205 (define_insn "neon_vld4_dup<mode>"
5206 [(set (match_operand:OI 0 "s_register_operand" "=w")
5207 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5208 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5212 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5214 int regno = REGNO (operands[0]);
5216 ops[0] = gen_rtx_REG (DImode, regno);
5217 ops[1] = gen_rtx_REG (DImode, regno + 2);
5218 ops[2] = gen_rtx_REG (DImode, regno + 4);
5219 ops[3] = gen_rtx_REG (DImode, regno + 6);
5220 ops[4] = operands[1];
5221 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5226 return "vld1.<V_sz_elem>\t%h0, %A1";
5229 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5230 (const_string "neon_load4_all_lanes<q>")
5231 (const_string "neon_load1_1reg<q>")))]
5234 (define_expand "vec_store_lanesoi<mode>"
5235 [(set (match_operand:OI 0 "neon_struct_operand")
5236 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5237 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5241 (define_insn "neon_vst4<mode>"
5242 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5243 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5244 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5248 if (<V_sz_elem> == 64)
5249 return "vst1.64\t%h1, %A0";
5251 return "vst4.<V_sz_elem>\t%h1, %A0";
5254 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5255 (const_string "neon_store1_4reg<q>")
5256 (const_string "neon_store4_4reg<q>")))]
5259 (define_expand "vec_store_lanesxi<mode>"
5260 [(match_operand:XI 0 "neon_struct_operand")
5261 (match_operand:XI 1 "s_register_operand")
5262 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5265 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5269 (define_expand "neon_vst4<mode>"
5270 [(match_operand:XI 0 "neon_struct_operand")
5271 (match_operand:XI 1 "s_register_operand")
5272 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5277 mem = adjust_address (operands[0], OImode, 0);
5278 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5279 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5280 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5284 (define_insn "neon_vst4qa<mode>"
5285 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5286 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5287 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291 int regno = REGNO (operands[1]);
5293 ops[0] = operands[0];
5294 ops[1] = gen_rtx_REG (DImode, regno);
5295 ops[2] = gen_rtx_REG (DImode, regno + 4);
5296 ops[3] = gen_rtx_REG (DImode, regno + 8);
5297 ops[4] = gen_rtx_REG (DImode, regno + 12);
5298 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5301 [(set_attr "type" "neon_store4_4reg<q>")]
5304 (define_insn "neon_vst4qb<mode>"
5305 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5306 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5307 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311 int regno = REGNO (operands[1]);
5313 ops[0] = operands[0];
5314 ops[1] = gen_rtx_REG (DImode, regno + 2);
5315 ops[2] = gen_rtx_REG (DImode, regno + 6);
5316 ops[3] = gen_rtx_REG (DImode, regno + 10);
5317 ops[4] = gen_rtx_REG (DImode, regno + 14);
5318 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5321 [(set_attr "type" "neon_store4_4reg<q>")]
5324 (define_insn "neon_vst4_lane<mode>"
5325 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5326 (unspec:<V_four_elem>
5327 [(match_operand:OI 1 "s_register_operand" "w")
5328 (match_operand:SI 2 "immediate_operand" "i")
5329 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5333 HOST_WIDE_INT lane = INTVAL (operands[2]);
5334 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5335 int regno = REGNO (operands[1]);
5337 if (lane < 0 || lane >= max)
5338 error ("lane out of range");
5339 ops[0] = operands[0];
5340 ops[1] = gen_rtx_REG (DImode, regno);
5341 ops[2] = gen_rtx_REG (DImode, regno + 2);
5342 ops[3] = gen_rtx_REG (DImode, regno + 4);
5343 ops[4] = gen_rtx_REG (DImode, regno + 6);
5344 ops[5] = operands[2];
5345 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5349 [(set_attr "type" "neon_store4_one_lane<q>")]
5352 (define_insn "neon_vst4_lane<mode>"
5353 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5354 (unspec:<V_four_elem>
5355 [(match_operand:XI 1 "s_register_operand" "w")
5356 (match_operand:SI 2 "immediate_operand" "i")
5357 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5361 HOST_WIDE_INT lane = INTVAL (operands[2]);
5362 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5363 int regno = REGNO (operands[1]);
5365 if (lane < 0 || lane >= max)
5366 error ("lane out of range");
5367 else if (lane >= max / 2)
5372 ops[0] = operands[0];
5373 ops[1] = gen_rtx_REG (DImode, regno);
5374 ops[2] = gen_rtx_REG (DImode, regno + 4);
5375 ops[3] = gen_rtx_REG (DImode, regno + 8);
5376 ops[4] = gen_rtx_REG (DImode, regno + 12);
5377 ops[5] = GEN_INT (lane);
5378 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5382 [(set_attr "type" "neon_store4_4reg<q>")]
5385 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5386 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5387 (SE:<V_unpack> (vec_select:<V_HALF>
5388 (match_operand:VU 1 "register_operand" "w")
5389 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5390 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5391 "vmovl.<US><V_sz_elem> %q0, %e1"
5392 [(set_attr "type" "neon_shift_imm_long")]
5395 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5396 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5397 (SE:<V_unpack> (vec_select:<V_HALF>
5398 (match_operand:VU 1 "register_operand" "w")
5399 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5400 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5401 "vmovl.<US><V_sz_elem> %q0, %f1"
5402 [(set_attr "type" "neon_shift_imm_long")]
5405 (define_expand "vec_unpack<US>_hi_<mode>"
5406 [(match_operand:<V_unpack> 0 "register_operand" "")
5407 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5408 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5410 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5413 for (i = 0; i < (<V_mode_nunits>/2); i++)
5414 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5416 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5417 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5424 (define_expand "vec_unpack<US>_lo_<mode>"
5425 [(match_operand:<V_unpack> 0 "register_operand" "")
5426 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5427 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5429 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5432 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5433 RTVEC_ELT (v, i) = GEN_INT (i);
5434 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5435 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5442 (define_insn "neon_vec_<US>mult_lo_<mode>"
5443 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5444 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5445 (match_operand:VU 1 "register_operand" "w")
5446 (match_operand:VU 2 "vect_par_constant_low" "")))
5447 (SE:<V_unpack> (vec_select:<V_HALF>
5448 (match_operand:VU 3 "register_operand" "w")
5450 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5451 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5452 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5455 (define_expand "vec_widen_<US>mult_lo_<mode>"
5456 [(match_operand:<V_unpack> 0 "register_operand" "")
5457 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5458 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5459 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5461 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5464 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5465 RTVEC_ELT (v, i) = GEN_INT (i);
5466 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5468 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5476 (define_insn "neon_vec_<US>mult_hi_<mode>"
5477 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5478 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5479 (match_operand:VU 1 "register_operand" "w")
5480 (match_operand:VU 2 "vect_par_constant_high" "")))
5481 (SE:<V_unpack> (vec_select:<V_HALF>
5482 (match_operand:VU 3 "register_operand" "w")
5484 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5485 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5486 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5489 (define_expand "vec_widen_<US>mult_hi_<mode>"
5490 [(match_operand:<V_unpack> 0 "register_operand" "")
5491 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5492 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5493 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5495 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5498 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5499 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5500 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5502 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5511 (define_insn "neon_vec_<US>shiftl_<mode>"
5512 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5513 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5514 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5517 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5519 [(set_attr "type" "neon_shift_imm_long")]
5522 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5523 [(match_operand:<V_unpack> 0 "register_operand" "")
5524 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5525 (match_operand:SI 2 "immediate_operand" "i")]
5526 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5528 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5529 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5535 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5536 [(match_operand:<V_unpack> 0 "register_operand" "")
5537 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5538 (match_operand:SI 2 "immediate_operand" "i")]
5539 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5541 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5542 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5543 GET_MODE_SIZE (<V_HALF>mode)),
5549 ;; Vectorize for non-neon-quad case
5550 (define_insn "neon_unpack<US>_<mode>"
5551 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5552 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5554 "vmovl.<US><V_sz_elem> %q0, %P1"
5555 [(set_attr "type" "neon_move")]
5558 (define_expand "vec_unpack<US>_lo_<mode>"
5559 [(match_operand:<V_double_width> 0 "register_operand" "")
5560 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5563 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5564 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5565 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5571 (define_expand "vec_unpack<US>_hi_<mode>"
5572 [(match_operand:<V_double_width> 0 "register_operand" "")
5573 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5576 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5577 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5578 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5584 (define_insn "neon_vec_<US>mult_<mode>"
5585 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5586 (mult:<V_widen> (SE:<V_widen>
5587 (match_operand:VDI 1 "register_operand" "w"))
5589 (match_operand:VDI 2 "register_operand" "w"))))]
5591 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5592 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5595 (define_expand "vec_widen_<US>mult_hi_<mode>"
5596 [(match_operand:<V_double_width> 0 "register_operand" "")
5597 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5598 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5601 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5602 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5603 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5610 (define_expand "vec_widen_<US>mult_lo_<mode>"
5611 [(match_operand:<V_double_width> 0 "register_operand" "")
5612 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5613 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5616 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5617 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5618 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5625 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5626 [(match_operand:<V_double_width> 0 "register_operand" "")
5627 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5628 (match_operand:SI 2 "immediate_operand" "i")]
5631 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5632 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5633 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5639 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5640 [(match_operand:<V_double_width> 0 "register_operand" "")
5641 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5642 (match_operand:SI 2 "immediate_operand" "i")]
5645 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5646 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5647 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5653 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5654 ; because the ordering of vector elements in Q registers is different from what
5655 ; the semantics of the instructions require.
5657 (define_insn "vec_pack_trunc_<mode>"
5658 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5659 (vec_concat:<V_narrow_pack>
5660 (truncate:<V_narrow>
5661 (match_operand:VN 1 "register_operand" "w"))
5662 (truncate:<V_narrow>
5663 (match_operand:VN 2 "register_operand" "w"))))]
5664 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5665 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5666 [(set_attr "type" "multiple")
5667 (set_attr "length" "8")]
5670 ;; For the non-quad case.
5671 (define_insn "neon_vec_pack_trunc_<mode>"
5672 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5673 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5674 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5675 "vmovn.i<V_sz_elem>\t%P0, %q1"
5676 [(set_attr "type" "neon_move_narrow_q")]
5679 (define_expand "vec_pack_trunc_<mode>"
5680 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5681 (match_operand:VSHFT 1 "register_operand" "")
5682 (match_operand:VSHFT 2 "register_operand")]
5683 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5685 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5687 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5688 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5689 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5693 (define_insn "neon_vabd<mode>_2"
5694 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5695 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5696 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5697 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5698 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5700 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5701 (const_string "neon_fp_abd_s<q>")
5702 (const_string "neon_abd<q>")))]
5705 (define_insn "neon_vabd<mode>_3"
5706 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5707 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5708 (match_operand:VDQ 2 "s_register_operand" "w")]
5710 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5711 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5713 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5714 (const_string "neon_fp_abd_s<q>")
5715 (const_string "neon_abd<q>")))]
5718 ;; Copy from core-to-neon regs, then extend, not vice-versa
5721 [(set (match_operand:DI 0 "s_register_operand" "")
5722 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5723 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5724 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5725 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5727 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5731 [(set (match_operand:DI 0 "s_register_operand" "")
5732 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5733 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5734 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5735 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5737 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5741 [(set (match_operand:DI 0 "s_register_operand" "")
5742 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5743 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5744 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5745 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5747 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5751 [(set (match_operand:DI 0 "s_register_operand" "")
5752 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5753 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5754 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5755 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5757 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5761 [(set (match_operand:DI 0 "s_register_operand" "")
5762 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5763 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5764 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5765 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5767 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5771 [(set (match_operand:DI 0 "s_register_operand" "")
5772 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5773 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5774 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5775 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5777 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));