1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; Helpers for quad-word reduction operations
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1285 ;; Reduction operations
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1319 rtx vec = gen_reg_rtx (V2DImode);
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1503 ;; Saturating arithmetic
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1574 switch (GET_CODE (operands[3]))
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1592 switch (GET_CODE (operands[3]))
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1624 switch (GET_CODE (operands[3]))
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1643 switch (GET_CODE (operands[3]))
1646 base_comparison = gen_neon_vclt<mode>;
1649 base_comparison = gen_neon_vcle<mode>;
1652 /* Do nothing, other zero form cases already have the correct
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1673 Our transformations are:
1678 a NE b -> !(a EQ b) */
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1685 swap_bsl_operands = 1;
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1731 int inverse = 0, immediate_zero = 0;
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1740 switch (GET_CODE (operands[3]))
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1787 ;; Patterns for builtins.
1789 ; good for plain vadd, vaddq.
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2017 ;; vqrdmlah, vqrdmlsh
2018 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2019 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2020 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2021 (match_operand:VMDQI 2 "s_register_operand" "w")
2022 (match_operand:VMDQI 3 "s_register_operand" "w")]
2025 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2026 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2029 (define_insn "neon_vqdmlal<mode>"
2030 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2031 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2032 (match_operand:VMDI 2 "s_register_operand" "w")
2033 (match_operand:VMDI 3 "s_register_operand" "w")]
2036 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2037 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2040 (define_insn "neon_vqdmlsl<mode>"
2041 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2042 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2043 (match_operand:VMDI 2 "s_register_operand" "w")
2044 (match_operand:VMDI 3 "s_register_operand" "w")]
2047 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2048 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2051 (define_insn "neon_vmull<sup><mode>"
2052 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2053 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2054 (match_operand:VW 2 "s_register_operand" "w")]
2057 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2058 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2061 (define_insn "neon_vqdmull<mode>"
2062 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2063 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2064 (match_operand:VMDI 2 "s_register_operand" "w")]
2067 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2068 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2071 (define_expand "neon_vsub<mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2073 (match_operand:VCVTF 1 "s_register_operand" "w")
2074 (match_operand:VCVTF 2 "s_register_operand" "w")]
2077 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2078 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2080 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2085 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2087 (define_insn "neon_vsub<mode>_unspec"
2088 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2089 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2090 (match_operand:VCVTF 2 "s_register_operand" "w")]
2093 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2095 (if_then_else (match_test "<Is_float_mode>")
2096 (const_string "neon_fp_addsub_s<q>")
2097 (const_string "neon_sub<q>")))]
2100 (define_insn "neon_vsubl<sup><mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2103 (match_operand:VDI 2 "s_register_operand" "w")]
2106 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2107 [(set_attr "type" "neon_sub_long")]
2110 (define_insn "neon_vsubw<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2116 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2117 [(set_attr "type" "neon_sub_widen")]
2120 (define_insn "neon_vqsub<sup><mode>"
2121 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2122 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2123 (match_operand:VDQIX 2 "s_register_operand" "w")]
2126 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2127 [(set_attr "type" "neon_qsub<q>")]
2130 (define_insn "neon_vhsub<sup><mode>"
2131 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2132 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2133 (match_operand:VDQIW 2 "s_register_operand" "w")]
2136 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2137 [(set_attr "type" "neon_sub_halve<q>")]
2140 (define_insn "neon_v<r>subhn<mode>"
2141 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2142 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2143 (match_operand:VN 2 "s_register_operand" "w")]
2146 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2147 [(set_attr "type" "neon_sub_halve_narrow_q")]
2150 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2151 ;; without unsafe math optimizations.
2152 (define_expand "neon_vc<cmp_op><mode>"
2153 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2155 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2156 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2159 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2161 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2162 && !flag_unsafe_math_optimizations)
2164 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2165 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2166 whereas this expander iterates over the integer modes as well,
2167 but we will never expand to UNSPECs for the integer comparisons. */
2171 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2176 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2185 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2192 (define_insn "neon_vc<cmp_op><mode>_insn"
2193 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2195 (COMPARISONS:<V_cmp_result>
2196 (match_operand:VDQW 1 "s_register_operand" "w,w")
2197 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2198 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2199 && !flag_unsafe_math_optimizations)"
2202 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2204 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2205 ? "f" : "<cmp_type>",
2206 which_alternative == 0
2207 ? "%<V_reg>2" : "#0");
2208 output_asm_insn (pattern, operands);
2212 (if_then_else (match_operand 2 "zero_operand")
2213 (const_string "neon_compare_zero<q>")
2214 (const_string "neon_compare<q>")))]
2217 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2218 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2219 (unspec:<V_cmp_result>
2220 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2221 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2226 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2228 which_alternative == 0
2229 ? "%<V_reg>2" : "#0");
2230 output_asm_insn (pattern, operands);
2233 [(set_attr "type" "neon_fp_compare_s<q>")]
2236 (define_insn "neon_vc<cmp_op>u<mode>"
2237 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2239 (GTUGEU:<V_cmp_result>
2240 (match_operand:VDQIW 1 "s_register_operand" "w")
2241 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2243 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2244 [(set_attr "type" "neon_compare<q>")]
2247 (define_expand "neon_vca<cmp_op><mode>"
2248 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2250 (GTGE:<V_cmp_result>
2251 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2252 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2255 if (flag_unsafe_math_optimizations)
2256 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2259 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2266 (define_insn "neon_vca<cmp_op><mode>_insn"
2267 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2269 (GTGE:<V_cmp_result>
2270 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2271 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2272 "TARGET_NEON && flag_unsafe_math_optimizations"
2273 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_fp_compare_s<q>")]
2277 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2278 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2279 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2280 (match_operand:VCVTF 2 "s_register_operand" "w")]
2283 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_fp_compare_s<q>")]
2287 (define_insn "neon_vtst<mode>"
2288 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2289 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2290 (match_operand:VDQIW 2 "s_register_operand" "w")]
2293 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2294 [(set_attr "type" "neon_tst<q>")]
2297 (define_insn "neon_vabd<sup><mode>"
2298 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2299 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2300 (match_operand:VDQIW 2 "s_register_operand" "w")]
2303 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2304 [(set_attr "type" "neon_abd<q>")]
2307 (define_insn "neon_vabdf<mode>"
2308 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2309 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2310 (match_operand:VCVTF 2 "s_register_operand" "w")]
2313 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2314 [(set_attr "type" "neon_fp_abd_s<q>")]
2317 (define_insn "neon_vabdl<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2320 (match_operand:VW 2 "s_register_operand" "w")]
2323 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2324 [(set_attr "type" "neon_abd_long")]
2327 (define_insn "neon_vaba<sup><mode>"
2328 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2329 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2330 (match_operand:VDQIW 3 "s_register_operand" "w")]
2332 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2334 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2335 [(set_attr "type" "neon_arith_acc<q>")]
2338 (define_insn "neon_vabal<sup><mode>"
2339 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2340 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2341 (match_operand:VW 3 "s_register_operand" "w")]
2343 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2345 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2346 [(set_attr "type" "neon_arith_acc<q>")]
2349 (define_insn "neon_v<maxmin><sup><mode>"
2350 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2351 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2352 (match_operand:VDQIW 2 "s_register_operand" "w")]
2355 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_minmax<q>")]
2359 (define_insn "neon_v<maxmin>f<mode>"
2360 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2361 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2362 (match_operand:VCVTF 2 "s_register_operand" "w")]
2365 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2366 [(set_attr "type" "neon_fp_minmax_s<q>")]
2369 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2370 (define_insn "<fmaxmin><mode>3"
2371 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2372 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2373 (match_operand:VCVTF 2 "s_register_operand" "w")]
2375 "TARGET_NEON && TARGET_FPU_ARMV8"
2376 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2377 [(set_attr "type" "neon_fp_minmax_s<q>")]
2380 (define_expand "neon_vpadd<mode>"
2381 [(match_operand:VD 0 "s_register_operand" "=w")
2382 (match_operand:VD 1 "s_register_operand" "w")
2383 (match_operand:VD 2 "s_register_operand" "w")]
2386 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2391 (define_insn "neon_vpaddl<sup><mode>"
2392 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2393 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2396 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2397 [(set_attr "type" "neon_reduc_add_long")]
2400 (define_insn "neon_vpadal<sup><mode>"
2401 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2402 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2403 (match_operand:VDQIW 2 "s_register_operand" "w")]
2406 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2407 [(set_attr "type" "neon_reduc_add_acc")]
2410 (define_insn "neon_vp<maxmin><sup><mode>"
2411 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2412 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2413 (match_operand:VDI 2 "s_register_operand" "w")]
2416 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2417 [(set_attr "type" "neon_reduc_minmax<q>")]
2420 (define_insn "neon_vp<maxmin>f<mode>"
2421 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2422 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2423 (match_operand:VCVTF 2 "s_register_operand" "w")]
2426 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2427 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2430 (define_insn "neon_vrecps<mode>"
2431 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2432 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2433 (match_operand:VCVTF 2 "s_register_operand" "w")]
2436 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2437 [(set_attr "type" "neon_fp_recps_s<q>")]
2440 (define_insn "neon_vrsqrts<mode>"
2441 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2442 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2443 (match_operand:VCVTF 2 "s_register_operand" "w")]
2446 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2447 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2450 (define_expand "neon_vabs<mode>"
2451 [(match_operand:VDQW 0 "s_register_operand" "")
2452 (match_operand:VDQW 1 "s_register_operand" "")]
2455 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2459 (define_insn "neon_vqabs<mode>"
2460 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2461 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2464 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2465 [(set_attr "type" "neon_qabs<q>")]
2468 (define_insn "neon_bswap<mode>"
2469 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2470 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2472 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2473 [(set_attr "type" "neon_rev<q>")]
2476 (define_expand "neon_vneg<mode>"
2477 [(match_operand:VDQW 0 "s_register_operand" "")
2478 (match_operand:VDQW 1 "s_register_operand" "")]
2481 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2485 (define_expand "neon_copysignf<mode>"
2486 [(match_operand:VCVTF 0 "register_operand")
2487 (match_operand:VCVTF 1 "register_operand")
2488 (match_operand:VCVTF 2 "register_operand")]
2492 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2493 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2494 rtvec v = rtvec_alloc (n_elt);
2496 /* Create bitmask for vector select. */
2497 for (i = 0; i < n_elt; ++i)
2498 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2500 emit_move_insn (v_bitmask,
2501 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2502 emit_move_insn (operands[0], operands[2]);
2503 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2504 <VCVTF:V_cmp_result>mode, 0);
2505 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2512 (define_insn "neon_vqneg<mode>"
2513 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2514 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2517 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2518 [(set_attr "type" "neon_qneg<q>")]
2521 (define_insn "neon_vcls<mode>"
2522 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2523 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2526 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2527 [(set_attr "type" "neon_cls<q>")]
2530 (define_insn "clz<mode>2"
2531 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2532 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2534 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2535 [(set_attr "type" "neon_cnt<q>")]
2538 (define_expand "neon_vclz<mode>"
2539 [(match_operand:VDQIW 0 "s_register_operand" "")
2540 (match_operand:VDQIW 1 "s_register_operand" "")]
2543 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2547 (define_insn "popcount<mode>2"
2548 [(set (match_operand:VE 0 "s_register_operand" "=w")
2549 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2551 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2552 [(set_attr "type" "neon_cnt<q>")]
2555 (define_expand "neon_vcnt<mode>"
2556 [(match_operand:VE 0 "s_register_operand" "=w")
2557 (match_operand:VE 1 "s_register_operand" "w")]
2560 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2564 (define_insn "neon_vrecpe<mode>"
2565 [(set (match_operand:V32 0 "s_register_operand" "=w")
2566 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2569 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2570 [(set_attr "type" "neon_fp_recpe_s<q>")]
2573 (define_insn "neon_vrsqrte<mode>"
2574 [(set (match_operand:V32 0 "s_register_operand" "=w")
2575 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2578 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2579 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2582 (define_expand "neon_vmvn<mode>"
2583 [(match_operand:VDQIW 0 "s_register_operand" "")
2584 (match_operand:VDQIW 1 "s_register_operand" "")]
2587 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2591 (define_insn "neon_vget_lane<mode>_sext_internal"
2592 [(set (match_operand:SI 0 "s_register_operand" "=r")
2594 (vec_select:<V_elem>
2595 (match_operand:VD 1 "s_register_operand" "w")
2596 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2599 if (BYTES_BIG_ENDIAN)
2601 int elt = INTVAL (operands[2]);
2602 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2603 operands[2] = GEN_INT (elt);
2605 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2607 [(set_attr "type" "neon_to_gp")]
2610 (define_insn "neon_vget_lane<mode>_zext_internal"
2611 [(set (match_operand:SI 0 "s_register_operand" "=r")
2613 (vec_select:<V_elem>
2614 (match_operand:VD 1 "s_register_operand" "w")
2615 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2618 if (BYTES_BIG_ENDIAN)
2620 int elt = INTVAL (operands[2]);
2621 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2622 operands[2] = GEN_INT (elt);
2624 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2626 [(set_attr "type" "neon_to_gp")]
2629 (define_insn "neon_vget_lane<mode>_sext_internal"
2630 [(set (match_operand:SI 0 "s_register_operand" "=r")
2632 (vec_select:<V_elem>
2633 (match_operand:VQ2 1 "s_register_operand" "w")
2634 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2638 int regno = REGNO (operands[1]);
2639 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2640 unsigned int elt = INTVAL (operands[2]);
2641 unsigned int elt_adj = elt % halfelts;
2643 if (BYTES_BIG_ENDIAN)
2644 elt_adj = halfelts - 1 - elt_adj;
2646 ops[0] = operands[0];
2647 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2648 ops[2] = GEN_INT (elt_adj);
2649 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2653 [(set_attr "type" "neon_to_gp_q")]
2656 (define_insn "neon_vget_lane<mode>_zext_internal"
2657 [(set (match_operand:SI 0 "s_register_operand" "=r")
2659 (vec_select:<V_elem>
2660 (match_operand:VQ2 1 "s_register_operand" "w")
2661 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2665 int regno = REGNO (operands[1]);
2666 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2667 unsigned int elt = INTVAL (operands[2]);
2668 unsigned int elt_adj = elt % halfelts;
2670 if (BYTES_BIG_ENDIAN)
2671 elt_adj = halfelts - 1 - elt_adj;
2673 ops[0] = operands[0];
2674 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2675 ops[2] = GEN_INT (elt_adj);
2676 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2680 [(set_attr "type" "neon_to_gp_q")]
2683 (define_expand "neon_vget_lane<mode>"
2684 [(match_operand:<V_ext> 0 "s_register_operand" "")
2685 (match_operand:VDQW 1 "s_register_operand" "")
2686 (match_operand:SI 2 "immediate_operand" "")]
2689 if (BYTES_BIG_ENDIAN)
2691 /* The intrinsics are defined in terms of a model where the
2692 element ordering in memory is vldm order, whereas the generic
2693 RTL is defined in terms of a model where the element ordering
2694 in memory is array order. Convert the lane number to conform
2696 unsigned int elt = INTVAL (operands[2]);
2697 unsigned int reg_nelts
2698 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2699 elt ^= reg_nelts - 1;
2700 operands[2] = GEN_INT (elt);
2703 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2704 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2706 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2712 (define_expand "neon_vget_laneu<mode>"
2713 [(match_operand:<V_ext> 0 "s_register_operand" "")
2714 (match_operand:VDQIW 1 "s_register_operand" "")
2715 (match_operand:SI 2 "immediate_operand" "")]
2718 if (BYTES_BIG_ENDIAN)
2720 /* The intrinsics are defined in terms of a model where the
2721 element ordering in memory is vldm order, whereas the generic
2722 RTL is defined in terms of a model where the element ordering
2723 in memory is array order. Convert the lane number to conform
2725 unsigned int elt = INTVAL (operands[2]);
2726 unsigned int reg_nelts
2727 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2728 elt ^= reg_nelts - 1;
2729 operands[2] = GEN_INT (elt);
2732 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2733 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2735 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2741 (define_expand "neon_vget_lanedi"
2742 [(match_operand:DI 0 "s_register_operand" "=r")
2743 (match_operand:DI 1 "s_register_operand" "w")
2744 (match_operand:SI 2 "immediate_operand" "")]
2747 emit_move_insn (operands[0], operands[1]);
2751 (define_expand "neon_vget_lanev2di"
2752 [(match_operand:DI 0 "s_register_operand" "")
2753 (match_operand:V2DI 1 "s_register_operand" "")
2754 (match_operand:SI 2 "immediate_operand" "")]
2759 if (BYTES_BIG_ENDIAN)
2761 /* The intrinsics are defined in terms of a model where the
2762 element ordering in memory is vldm order, whereas the generic
2763 RTL is defined in terms of a model where the element ordering
2764 in memory is array order. Convert the lane number to conform
2766 unsigned int elt = INTVAL (operands[2]);
2767 unsigned int reg_nelts = 2;
2768 elt ^= reg_nelts - 1;
2769 operands[2] = GEN_INT (elt);
2772 lane = INTVAL (operands[2]);
2773 gcc_assert ((lane ==0) || (lane == 1));
2774 emit_move_insn (operands[0], lane == 0
2775 ? gen_lowpart (DImode, operands[1])
2776 : gen_highpart (DImode, operands[1]));
2780 (define_expand "neon_vset_lane<mode>"
2781 [(match_operand:VDQ 0 "s_register_operand" "=w")
2782 (match_operand:<V_elem> 1 "s_register_operand" "r")
2783 (match_operand:VDQ 2 "s_register_operand" "0")
2784 (match_operand:SI 3 "immediate_operand" "i")]
2787 unsigned int elt = INTVAL (operands[3]);
2789 if (BYTES_BIG_ENDIAN)
2791 unsigned int reg_nelts
2792 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2793 elt ^= reg_nelts - 1;
2796 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2797 GEN_INT (1 << elt), operands[2]));
2801 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2803 (define_expand "neon_vset_lanedi"
2804 [(match_operand:DI 0 "s_register_operand" "=w")
2805 (match_operand:DI 1 "s_register_operand" "r")
2806 (match_operand:DI 2 "s_register_operand" "0")
2807 (match_operand:SI 3 "immediate_operand" "i")]
2810 emit_move_insn (operands[0], operands[1]);
2814 (define_expand "neon_vcreate<mode>"
2815 [(match_operand:VD_RE 0 "s_register_operand" "")
2816 (match_operand:DI 1 "general_operand" "")]
2819 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2820 emit_move_insn (operands[0], src);
2824 (define_insn "neon_vdup_n<mode>"
2825 [(set (match_operand:VX 0 "s_register_operand" "=w")
2826 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2828 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2829 [(set_attr "type" "neon_from_gp<q>")]
2832 (define_insn "neon_vdup_n<mode>"
2833 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2834 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2837 vdup.<V_sz_elem>\t%<V_reg>0, %1
2838 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2839 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2842 (define_expand "neon_vdup_ndi"
2843 [(match_operand:DI 0 "s_register_operand" "=w")
2844 (match_operand:DI 1 "s_register_operand" "r")]
2847 emit_move_insn (operands[0], operands[1]);
2852 (define_insn "neon_vdup_nv2di"
2853 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2854 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2857 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2858 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2859 [(set_attr "length" "8")
2860 (set_attr "type" "multiple")]
2863 (define_insn "neon_vdup_lane<mode>_internal"
2864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2866 (vec_select:<V_elem>
2867 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2868 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2871 if (BYTES_BIG_ENDIAN)
2873 int elt = INTVAL (operands[2]);
2874 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2875 operands[2] = GEN_INT (elt);
2878 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2880 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2882 [(set_attr "type" "neon_dup<q>")]
2885 (define_expand "neon_vdup_lane<mode>"
2886 [(match_operand:VDQW 0 "s_register_operand" "=w")
2887 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2888 (match_operand:SI 2 "immediate_operand" "i")]
2891 if (BYTES_BIG_ENDIAN)
2893 unsigned int elt = INTVAL (operands[2]);
2894 unsigned int reg_nelts
2895 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2896 elt ^= reg_nelts - 1;
2897 operands[2] = GEN_INT (elt);
2899 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2904 ; Scalar index is ignored, since only zero is valid here.
2905 (define_expand "neon_vdup_lanedi"
2906 [(match_operand:DI 0 "s_register_operand" "=w")
2907 (match_operand:DI 1 "s_register_operand" "w")
2908 (match_operand:SI 2 "immediate_operand" "i")]
2911 emit_move_insn (operands[0], operands[1]);
2915 ; Likewise for v2di, as the DImode second operand has only a single element.
2916 (define_expand "neon_vdup_lanev2di"
2917 [(match_operand:V2DI 0 "s_register_operand" "=w")
2918 (match_operand:DI 1 "s_register_operand" "w")
2919 (match_operand:SI 2 "immediate_operand" "i")]
2922 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2926 ; Disabled before reload because we don't want combine doing something silly,
2927 ; but used by the post-reload expansion of neon_vcombine.
2928 (define_insn "*neon_vswp<mode>"
2929 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2930 (match_operand:VDQX 1 "s_register_operand" "+w"))
2931 (set (match_dup 1) (match_dup 0))]
2932 "TARGET_NEON && reload_completed"
2933 "vswp\t%<V_reg>0, %<V_reg>1"
2934 [(set_attr "type" "neon_permute<q>")]
2937 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2939 ;; FIXME: A different implementation of this builtin could make it much
2940 ;; more likely that we wouldn't actually need to output anything (we could make
2941 ;; it so that the reg allocator puts things in the right places magically
2942 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2944 (define_insn_and_split "neon_vcombine<mode>"
2945 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2946 (vec_concat:<V_DOUBLE>
2947 (match_operand:VDX 1 "s_register_operand" "w")
2948 (match_operand:VDX 2 "s_register_operand" "w")))]
2951 "&& reload_completed"
2954 neon_split_vcombine (operands);
2957 [(set_attr "type" "multiple")]
2960 (define_expand "neon_vget_high<mode>"
2961 [(match_operand:<V_HALF> 0 "s_register_operand")
2962 (match_operand:VQX 1 "s_register_operand")]
2965 emit_move_insn (operands[0],
2966 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2967 GET_MODE_SIZE (<V_HALF>mode)));
2971 (define_expand "neon_vget_low<mode>"
2972 [(match_operand:<V_HALF> 0 "s_register_operand")
2973 (match_operand:VQX 1 "s_register_operand")]
2976 emit_move_insn (operands[0],
2977 simplify_gen_subreg (<V_HALF>mode, operands[1],
2982 (define_insn "float<mode><V_cvtto>2"
2983 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2984 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2985 "TARGET_NEON && !flag_rounding_math"
2986 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2987 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2990 (define_insn "floatuns<mode><V_cvtto>2"
2991 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2992 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2993 "TARGET_NEON && !flag_rounding_math"
2994 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2995 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2998 (define_insn "fix_trunc<mode><V_cvtto>2"
2999 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3000 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3002 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3003 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3006 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3007 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3008 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3010 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3011 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3014 (define_insn "neon_vcvt<sup><mode>"
3015 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3016 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3019 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3020 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3023 (define_insn "neon_vcvt<sup><mode>"
3024 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3025 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3028 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3029 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3032 (define_insn "neon_vcvtv4sfv4hf"
3033 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3034 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3036 "TARGET_NEON && TARGET_FP16"
3037 "vcvt.f32.f16\t%q0, %P1"
3038 [(set_attr "type" "neon_fp_cvt_widen_h")]
3041 (define_insn "neon_vcvtv4hfv4sf"
3042 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3043 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3045 "TARGET_NEON && TARGET_FP16"
3046 "vcvt.f16.f32\t%P0, %q1"
3047 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3050 (define_insn "neon_vcvt<sup>_n<mode>"
3051 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3052 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3053 (match_operand:SI 2 "immediate_operand" "i")]
3057 neon_const_bounds (operands[2], 1, 33);
3058 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3060 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3063 (define_insn "neon_vcvt<sup>_n<mode>"
3064 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3065 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3066 (match_operand:SI 2 "immediate_operand" "i")]
3070 neon_const_bounds (operands[2], 1, 33);
3071 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3073 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3076 (define_insn "neon_vmovn<mode>"
3077 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3078 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3081 "vmovn.<V_if_elem>\t%P0, %q1"
3082 [(set_attr "type" "neon_shift_imm_narrow_q")]
3085 (define_insn "neon_vqmovn<sup><mode>"
3086 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3087 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3090 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3091 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3094 (define_insn "neon_vqmovun<mode>"
3095 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3096 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3099 "vqmovun.<V_s_elem>\t%P0, %q1"
3100 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3103 (define_insn "neon_vmovl<sup><mode>"
3104 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3105 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3108 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3109 [(set_attr "type" "neon_shift_imm_long")]
3112 (define_insn "neon_vmul_lane<mode>"
3113 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3114 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3115 (match_operand:VMD 2 "s_register_operand"
3116 "<scalar_mul_constraint>")
3117 (match_operand:SI 3 "immediate_operand" "i")]
3121 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3124 (if_then_else (match_test "<Is_float_mode>")
3125 (const_string "neon_fp_mul_s_scalar<q>")
3126 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3129 (define_insn "neon_vmul_lane<mode>"
3130 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3131 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3132 (match_operand:<V_HALF> 2 "s_register_operand"
3133 "<scalar_mul_constraint>")
3134 (match_operand:SI 3 "immediate_operand" "i")]
3138 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3141 (if_then_else (match_test "<Is_float_mode>")
3142 (const_string "neon_fp_mul_s_scalar<q>")
3143 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3146 (define_insn "neon_vmull<sup>_lane<mode>"
3147 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3148 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3149 (match_operand:VMDI 2 "s_register_operand"
3150 "<scalar_mul_constraint>")
3151 (match_operand:SI 3 "immediate_operand" "i")]
3155 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3157 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3160 (define_insn "neon_vqdmull_lane<mode>"
3161 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3162 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3163 (match_operand:VMDI 2 "s_register_operand"
3164 "<scalar_mul_constraint>")
3165 (match_operand:SI 3 "immediate_operand" "i")]
3166 UNSPEC_VQDMULL_LANE))]
3169 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3171 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3174 (define_insn "neon_vq<r>dmulh_lane<mode>"
3175 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3176 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3177 (match_operand:<V_HALF> 2 "s_register_operand"
3178 "<scalar_mul_constraint>")
3179 (match_operand:SI 3 "immediate_operand" "i")]
3183 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3185 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3188 (define_insn "neon_vq<r>dmulh_lane<mode>"
3189 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3190 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3191 (match_operand:VMDI 2 "s_register_operand"
3192 "<scalar_mul_constraint>")
3193 (match_operand:SI 3 "immediate_operand" "i")]
3197 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3199 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3202 ;; vqrdmlah_lane, vqrdmlsh_lane
3203 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3204 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3205 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3206 (match_operand:VMQI 2 "s_register_operand" "w")
3207 (match_operand:<V_HALF> 3 "s_register_operand"
3208 "<scalar_mul_constraint>")
3209 (match_operand:SI 4 "immediate_operand" "i")]
3214 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3216 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3219 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3220 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3221 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3222 (match_operand:VMDI 2 "s_register_operand" "w")
3223 (match_operand:VMDI 3 "s_register_operand"
3224 "<scalar_mul_constraint>")
3225 (match_operand:SI 4 "immediate_operand" "i")]
3230 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3232 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3235 (define_insn "neon_vmla_lane<mode>"
3236 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3237 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3238 (match_operand:VMD 2 "s_register_operand" "w")
3239 (match_operand:VMD 3 "s_register_operand"
3240 "<scalar_mul_constraint>")
3241 (match_operand:SI 4 "immediate_operand" "i")]
3245 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3248 (if_then_else (match_test "<Is_float_mode>")
3249 (const_string "neon_fp_mla_s_scalar<q>")
3250 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3253 (define_insn "neon_vmla_lane<mode>"
3254 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3255 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3256 (match_operand:VMQ 2 "s_register_operand" "w")
3257 (match_operand:<V_HALF> 3 "s_register_operand"
3258 "<scalar_mul_constraint>")
3259 (match_operand:SI 4 "immediate_operand" "i")]
3263 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3266 (if_then_else (match_test "<Is_float_mode>")
3267 (const_string "neon_fp_mla_s_scalar<q>")
3268 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3271 (define_insn "neon_vmlal<sup>_lane<mode>"
3272 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3273 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3274 (match_operand:VMDI 2 "s_register_operand" "w")
3275 (match_operand:VMDI 3 "s_register_operand"
3276 "<scalar_mul_constraint>")
3277 (match_operand:SI 4 "immediate_operand" "i")]
3281 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3283 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3286 (define_insn "neon_vqdmlal_lane<mode>"
3287 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3288 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3289 (match_operand:VMDI 2 "s_register_operand" "w")
3290 (match_operand:VMDI 3 "s_register_operand"
3291 "<scalar_mul_constraint>")
3292 (match_operand:SI 4 "immediate_operand" "i")]
3293 UNSPEC_VQDMLAL_LANE))]
3296 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3298 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3301 (define_insn "neon_vmls_lane<mode>"
3302 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3303 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3304 (match_operand:VMD 2 "s_register_operand" "w")
3305 (match_operand:VMD 3 "s_register_operand"
3306 "<scalar_mul_constraint>")
3307 (match_operand:SI 4 "immediate_operand" "i")]
3311 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3314 (if_then_else (match_test "<Is_float_mode>")
3315 (const_string "neon_fp_mla_s_scalar<q>")
3316 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3319 (define_insn "neon_vmls_lane<mode>"
3320 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3321 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3322 (match_operand:VMQ 2 "s_register_operand" "w")
3323 (match_operand:<V_HALF> 3 "s_register_operand"
3324 "<scalar_mul_constraint>")
3325 (match_operand:SI 4 "immediate_operand" "i")]
3329 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3332 (if_then_else (match_test "<Is_float_mode>")
3333 (const_string "neon_fp_mla_s_scalar<q>")
3334 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3337 (define_insn "neon_vmlsl<sup>_lane<mode>"
3338 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3339 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3340 (match_operand:VMDI 2 "s_register_operand" "w")
3341 (match_operand:VMDI 3 "s_register_operand"
3342 "<scalar_mul_constraint>")
3343 (match_operand:SI 4 "immediate_operand" "i")]
3347 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3349 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3352 (define_insn "neon_vqdmlsl_lane<mode>"
3353 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3354 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3355 (match_operand:VMDI 2 "s_register_operand" "w")
3356 (match_operand:VMDI 3 "s_register_operand"
3357 "<scalar_mul_constraint>")
3358 (match_operand:SI 4 "immediate_operand" "i")]
3359 UNSPEC_VQDMLSL_LANE))]
3362 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3364 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3367 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3368 ; core register into a temp register, then use a scalar taken from that. This
3369 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3370 ; or extracted from another vector. The latter case it's currently better to
3371 ; use the "_lane" variant, and the former case can probably be implemented
3372 ; using vld1_lane, but that hasn't been done yet.
3374 (define_expand "neon_vmul_n<mode>"
3375 [(match_operand:VMD 0 "s_register_operand" "")
3376 (match_operand:VMD 1 "s_register_operand" "")
3377 (match_operand:<V_elem> 2 "s_register_operand" "")]
3380 rtx tmp = gen_reg_rtx (<MODE>mode);
3381 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3382 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3387 (define_expand "neon_vmul_n<mode>"
3388 [(match_operand:VMQ 0 "s_register_operand" "")
3389 (match_operand:VMQ 1 "s_register_operand" "")
3390 (match_operand:<V_elem> 2 "s_register_operand" "")]
3393 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3394 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3395 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3400 (define_expand "neon_vmulls_n<mode>"
3401 [(match_operand:<V_widen> 0 "s_register_operand" "")
3402 (match_operand:VMDI 1 "s_register_operand" "")
3403 (match_operand:<V_elem> 2 "s_register_operand" "")]
3406 rtx tmp = gen_reg_rtx (<MODE>mode);
3407 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3408 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3413 (define_expand "neon_vmullu_n<mode>"
3414 [(match_operand:<V_widen> 0 "s_register_operand" "")
3415 (match_operand:VMDI 1 "s_register_operand" "")
3416 (match_operand:<V_elem> 2 "s_register_operand" "")]
3419 rtx tmp = gen_reg_rtx (<MODE>mode);
3420 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3421 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3426 (define_expand "neon_vqdmull_n<mode>"
3427 [(match_operand:<V_widen> 0 "s_register_operand" "")
3428 (match_operand:VMDI 1 "s_register_operand" "")
3429 (match_operand:<V_elem> 2 "s_register_operand" "")]
3432 rtx tmp = gen_reg_rtx (<MODE>mode);
3433 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3434 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3439 (define_expand "neon_vqdmulh_n<mode>"
3440 [(match_operand:VMDI 0 "s_register_operand" "")
3441 (match_operand:VMDI 1 "s_register_operand" "")
3442 (match_operand:<V_elem> 2 "s_register_operand" "")]
3445 rtx tmp = gen_reg_rtx (<MODE>mode);
3446 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3447 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3452 (define_expand "neon_vqrdmulh_n<mode>"
3453 [(match_operand:VMDI 0 "s_register_operand" "")
3454 (match_operand:VMDI 1 "s_register_operand" "")
3455 (match_operand:<V_elem> 2 "s_register_operand" "")]
3458 rtx tmp = gen_reg_rtx (<MODE>mode);
3459 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3460 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3465 (define_expand "neon_vqdmulh_n<mode>"
3466 [(match_operand:VMQI 0 "s_register_operand" "")
3467 (match_operand:VMQI 1 "s_register_operand" "")
3468 (match_operand:<V_elem> 2 "s_register_operand" "")]
3471 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3472 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3473 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3478 (define_expand "neon_vqrdmulh_n<mode>"
3479 [(match_operand:VMQI 0 "s_register_operand" "")
3480 (match_operand:VMQI 1 "s_register_operand" "")
3481 (match_operand:<V_elem> 2 "s_register_operand" "")]
3484 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3485 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3486 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3491 (define_expand "neon_vmla_n<mode>"
3492 [(match_operand:VMD 0 "s_register_operand" "")
3493 (match_operand:VMD 1 "s_register_operand" "")
3494 (match_operand:VMD 2 "s_register_operand" "")
3495 (match_operand:<V_elem> 3 "s_register_operand" "")]
3498 rtx tmp = gen_reg_rtx (<MODE>mode);
3499 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3500 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3505 (define_expand "neon_vmla_n<mode>"
3506 [(match_operand:VMQ 0 "s_register_operand" "")
3507 (match_operand:VMQ 1 "s_register_operand" "")
3508 (match_operand:VMQ 2 "s_register_operand" "")
3509 (match_operand:<V_elem> 3 "s_register_operand" "")]
3512 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3513 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3514 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3519 (define_expand "neon_vmlals_n<mode>"
3520 [(match_operand:<V_widen> 0 "s_register_operand" "")
3521 (match_operand:<V_widen> 1 "s_register_operand" "")
3522 (match_operand:VMDI 2 "s_register_operand" "")
3523 (match_operand:<V_elem> 3 "s_register_operand" "")]
3526 rtx tmp = gen_reg_rtx (<MODE>mode);
3527 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3528 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3533 (define_expand "neon_vmlalu_n<mode>"
3534 [(match_operand:<V_widen> 0 "s_register_operand" "")
3535 (match_operand:<V_widen> 1 "s_register_operand" "")
3536 (match_operand:VMDI 2 "s_register_operand" "")
3537 (match_operand:<V_elem> 3 "s_register_operand" "")]
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3547 (define_expand "neon_vqdmlal_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")]
3554 rtx tmp = gen_reg_rtx (<MODE>mode);
3555 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3556 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3561 (define_expand "neon_vmls_n<mode>"
3562 [(match_operand:VMD 0 "s_register_operand" "")
3563 (match_operand:VMD 1 "s_register_operand" "")
3564 (match_operand:VMD 2 "s_register_operand" "")
3565 (match_operand:<V_elem> 3 "s_register_operand" "")]
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3575 (define_expand "neon_vmls_n<mode>"
3576 [(match_operand:VMQ 0 "s_register_operand" "")
3577 (match_operand:VMQ 1 "s_register_operand" "")
3578 (match_operand:VMQ 2 "s_register_operand" "")
3579 (match_operand:<V_elem> 3 "s_register_operand" "")]
3582 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3583 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3584 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3589 (define_expand "neon_vmlsls_n<mode>"
3590 [(match_operand:<V_widen> 0 "s_register_operand" "")
3591 (match_operand:<V_widen> 1 "s_register_operand" "")
3592 (match_operand:VMDI 2 "s_register_operand" "")
3593 (match_operand:<V_elem> 3 "s_register_operand" "")]
3596 rtx tmp = gen_reg_rtx (<MODE>mode);
3597 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3598 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3603 (define_expand "neon_vmlslu_n<mode>"
3604 [(match_operand:<V_widen> 0 "s_register_operand" "")
3605 (match_operand:<V_widen> 1 "s_register_operand" "")
3606 (match_operand:VMDI 2 "s_register_operand" "")
3607 (match_operand:<V_elem> 3 "s_register_operand" "")]
3610 rtx tmp = gen_reg_rtx (<MODE>mode);
3611 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3612 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3617 (define_expand "neon_vqdmlsl_n<mode>"
3618 [(match_operand:<V_widen> 0 "s_register_operand" "")
3619 (match_operand:<V_widen> 1 "s_register_operand" "")
3620 (match_operand:VMDI 2 "s_register_operand" "")
3621 (match_operand:<V_elem> 3 "s_register_operand" "")]
3624 rtx tmp = gen_reg_rtx (<MODE>mode);
3625 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3626 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3631 (define_insn "neon_vext<mode>"
3632 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3633 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3634 (match_operand:VDQX 2 "s_register_operand" "w")
3635 (match_operand:SI 3 "immediate_operand" "i")]
3639 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3640 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3642 [(set_attr "type" "neon_ext<q>")]
3645 (define_insn "neon_vrev64<mode>"
3646 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3647 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3650 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3651 [(set_attr "type" "neon_rev<q>")]
3654 (define_insn "neon_vrev32<mode>"
3655 [(set (match_operand:VX 0 "s_register_operand" "=w")
3656 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3659 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3660 [(set_attr "type" "neon_rev<q>")]
3663 (define_insn "neon_vrev16<mode>"
3664 [(set (match_operand:VE 0 "s_register_operand" "=w")
3665 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3668 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3669 [(set_attr "type" "neon_rev<q>")]
3672 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3673 ; allocation. For an intrinsic of form:
3674 ; rD = vbsl_* (rS, rN, rM)
3675 ; We can use any of:
3676 ; vbsl rS, rN, rM (if D = S)
3677 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3678 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3680 (define_insn "neon_vbsl<mode>_internal"
3681 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3682 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3683 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3684 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3688 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3689 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3690 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3691 [(set_attr "type" "neon_bsl<q>")]
3694 (define_expand "neon_vbsl<mode>"
3695 [(set (match_operand:VDQX 0 "s_register_operand" "")
3696 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3697 (match_operand:VDQX 2 "s_register_operand" "")
3698 (match_operand:VDQX 3 "s_register_operand" "")]
3702 /* We can't alias operands together if they have different modes. */
3703 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3707 (define_insn "neon_v<shift_op><sup><mode>"
3708 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3709 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3710 (match_operand:VDQIX 2 "s_register_operand" "w")]
3713 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3714 [(set_attr "type" "neon_shift_imm<q>")]
3718 (define_insn "neon_v<shift_op><sup><mode>"
3719 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3720 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3721 (match_operand:VDQIX 2 "s_register_operand" "w")]
3724 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3725 [(set_attr "type" "neon_sat_shift_imm<q>")]
3729 (define_insn "neon_v<shift_op><sup>_n<mode>"
3730 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3731 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3732 (match_operand:SI 2 "immediate_operand" "i")]
3736 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3737 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3739 [(set_attr "type" "neon_shift_imm<q>")]
3742 ;; vshrn_n, vrshrn_n
3743 (define_insn "neon_v<shift_op>_n<mode>"
3744 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3745 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3746 (match_operand:SI 2 "immediate_operand" "i")]
3750 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3751 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3753 [(set_attr "type" "neon_shift_imm_narrow_q")]
3756 ;; vqshrn_n, vqrshrn_n
3757 (define_insn "neon_v<shift_op><sup>_n<mode>"
3758 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3759 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3760 (match_operand:SI 2 "immediate_operand" "i")]
3764 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3765 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3767 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3770 ;; vqshrun_n, vqrshrun_n
3771 (define_insn "neon_v<shift_op>_n<mode>"
3772 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3773 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3774 (match_operand:SI 2 "immediate_operand" "i")]
3778 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3779 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3781 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3784 (define_insn "neon_vshl_n<mode>"
3785 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3786 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3787 (match_operand:SI 2 "immediate_operand" "i")]
3791 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3792 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3794 [(set_attr "type" "neon_shift_imm<q>")]
3797 (define_insn "neon_vqshl_<sup>_n<mode>"
3798 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3799 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3800 (match_operand:SI 2 "immediate_operand" "i")]
3804 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3805 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3807 [(set_attr "type" "neon_sat_shift_imm<q>")]
3810 (define_insn "neon_vqshlu_n<mode>"
3811 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3812 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3813 (match_operand:SI 2 "immediate_operand" "i")]
3817 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3818 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3820 [(set_attr "type" "neon_sat_shift_imm<q>")]
3823 (define_insn "neon_vshll<sup>_n<mode>"
3824 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3825 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3826 (match_operand:SI 2 "immediate_operand" "i")]
3830 /* The boundaries are: 0 < imm <= size. */
3831 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3832 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3834 [(set_attr "type" "neon_shift_imm_long")]
3838 (define_insn "neon_v<shift_op><sup>_n<mode>"
3839 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3840 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3841 (match_operand:VDQIX 2 "s_register_operand" "w")
3842 (match_operand:SI 3 "immediate_operand" "i")]
3846 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3847 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3849 [(set_attr "type" "neon_shift_acc<q>")]
3852 (define_insn "neon_vsri_n<mode>"
3853 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3854 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3855 (match_operand:VDQIX 2 "s_register_operand" "w")
3856 (match_operand:SI 3 "immediate_operand" "i")]
3860 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3861 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3863 [(set_attr "type" "neon_shift_reg<q>")]
3866 (define_insn "neon_vsli_n<mode>"
3867 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3868 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3869 (match_operand:VDQIX 2 "s_register_operand" "w")
3870 (match_operand:SI 3 "immediate_operand" "i")]
3874 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3875 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3877 [(set_attr "type" "neon_shift_reg<q>")]
3880 (define_insn "neon_vtbl1v8qi"
3881 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3882 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3883 (match_operand:V8QI 2 "s_register_operand" "w")]
3886 "vtbl.8\t%P0, {%P1}, %P2"
3887 [(set_attr "type" "neon_tbl1")]
3890 (define_insn "neon_vtbl2v8qi"
3891 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3892 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3893 (match_operand:V8QI 2 "s_register_operand" "w")]
3898 int tabbase = REGNO (operands[1]);
3900 ops[0] = operands[0];
3901 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3902 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3903 ops[3] = operands[2];
3904 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3908 [(set_attr "type" "neon_tbl2")]
3911 (define_insn "neon_vtbl3v8qi"
3912 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3913 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3914 (match_operand:V8QI 2 "s_register_operand" "w")]
3919 int tabbase = REGNO (operands[1]);
3921 ops[0] = operands[0];
3922 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3923 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3924 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3925 ops[4] = operands[2];
3926 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3930 [(set_attr "type" "neon_tbl3")]
3933 (define_insn "neon_vtbl4v8qi"
3934 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3935 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3936 (match_operand:V8QI 2 "s_register_operand" "w")]
3941 int tabbase = REGNO (operands[1]);
3943 ops[0] = operands[0];
3944 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3945 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3946 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3947 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3948 ops[5] = operands[2];
3949 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3953 [(set_attr "type" "neon_tbl4")]
3956 ;; These three are used by the vec_perm infrastructure for V16QImode.
3957 (define_insn_and_split "neon_vtbl1v16qi"
3958 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3959 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3960 (match_operand:V16QI 2 "s_register_operand" "w")]
3964 "&& reload_completed"
3967 rtx op0, op1, op2, part0, part2;
3971 op1 = gen_lowpart (TImode, operands[1]);
3974 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3975 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3976 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3977 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3979 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3980 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3981 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3982 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3985 [(set_attr "type" "multiple")]
3988 (define_insn_and_split "neon_vtbl2v16qi"
3989 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3990 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3991 (match_operand:V16QI 2 "s_register_operand" "w")]
3995 "&& reload_completed"
3998 rtx op0, op1, op2, part0, part2;
4005 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4006 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4007 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4008 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4010 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4011 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4012 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4013 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4016 [(set_attr "type" "multiple")]
4019 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4020 ;; handle quad-word input modes, producing octa-word output modes. But
4021 ;; that requires us to add support for octa-word vector modes in moves.
4022 ;; That seems overkill for this one use in vec_perm.
4023 (define_insn_and_split "neon_vcombinev16qi"
4024 [(set (match_operand:OI 0 "s_register_operand" "=w")
4025 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4026 (match_operand:V16QI 2 "s_register_operand" "w")]
4030 "&& reload_completed"
4033 neon_split_vcombine (operands);
4036 [(set_attr "type" "multiple")]
4039 (define_insn "neon_vtbx1v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:V8QI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4046 "vtbx.8\t%P0, {%P2}, %P3"
4047 [(set_attr "type" "neon_tbl1")]
4050 (define_insn "neon_vtbx2v8qi"
4051 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4052 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4053 (match_operand:TI 2 "s_register_operand" "w")
4054 (match_operand:V8QI 3 "s_register_operand" "w")]
4059 int tabbase = REGNO (operands[2]);
4061 ops[0] = operands[0];
4062 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4063 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4064 ops[3] = operands[3];
4065 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4069 [(set_attr "type" "neon_tbl2")]
4072 (define_insn "neon_vtbx3v8qi"
4073 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4074 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4075 (match_operand:EI 2 "s_register_operand" "w")
4076 (match_operand:V8QI 3 "s_register_operand" "w")]
4081 int tabbase = REGNO (operands[2]);
4083 ops[0] = operands[0];
4084 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4085 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4086 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4087 ops[4] = operands[3];
4088 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4092 [(set_attr "type" "neon_tbl3")]
4095 (define_insn "neon_vtbx4v8qi"
4096 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4097 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4098 (match_operand:OI 2 "s_register_operand" "w")
4099 (match_operand:V8QI 3 "s_register_operand" "w")]
4104 int tabbase = REGNO (operands[2]);
4106 ops[0] = operands[0];
4107 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4108 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4109 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4110 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4111 ops[5] = operands[3];
4112 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4116 [(set_attr "type" "neon_tbl4")]
4119 (define_expand "neon_vtrn<mode>_internal"
4121 [(set (match_operand:VDQW 0 "s_register_operand" "")
4122 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4123 (match_operand:VDQW 2 "s_register_operand" "")]
4125 (set (match_operand:VDQW 3 "s_register_operand" "")
4126 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4131 ;; Note: Different operand numbering to handle tied registers correctly.
4132 (define_insn "*neon_vtrn<mode>_insn"
4133 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4134 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4135 (match_operand:VDQW 3 "s_register_operand" "2")]
4137 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4138 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4141 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4142 [(set_attr "type" "neon_permute<q>")]
4145 (define_expand "neon_vzip<mode>_internal"
4147 [(set (match_operand:VDQW 0 "s_register_operand" "")
4148 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4149 (match_operand:VDQW 2 "s_register_operand" "")]
4151 (set (match_operand:VDQW 3 "s_register_operand" "")
4152 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4157 ;; Note: Different operand numbering to handle tied registers correctly.
4158 (define_insn "*neon_vzip<mode>_insn"
4159 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4160 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4161 (match_operand:VDQW 3 "s_register_operand" "2")]
4163 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4164 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4167 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4168 [(set_attr "type" "neon_zip<q>")]
4171 (define_expand "neon_vuzp<mode>_internal"
4173 [(set (match_operand:VDQW 0 "s_register_operand" "")
4174 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4175 (match_operand:VDQW 2 "s_register_operand" "")]
4177 (set (match_operand:VDQW 3 "s_register_operand" "")
4178 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4183 ;; Note: Different operand numbering to handle tied registers correctly.
4184 (define_insn "*neon_vuzp<mode>_insn"
4185 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4186 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4187 (match_operand:VDQW 3 "s_register_operand" "2")]
4189 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4190 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4193 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4194 [(set_attr "type" "neon_zip<q>")]
4197 (define_expand "neon_vreinterpretv8qi<mode>"
4198 [(match_operand:V8QI 0 "s_register_operand" "")
4199 (match_operand:VD_RE 1 "s_register_operand" "")]
4202 neon_reinterpret (operands[0], operands[1]);
4206 (define_expand "neon_vreinterpretv4hi<mode>"
4207 [(match_operand:V4HI 0 "s_register_operand" "")
4208 (match_operand:VD_RE 1 "s_register_operand" "")]
4211 neon_reinterpret (operands[0], operands[1]);
4215 (define_expand "neon_vreinterpretv2si<mode>"
4216 [(match_operand:V2SI 0 "s_register_operand" "")
4217 (match_operand:VD_RE 1 "s_register_operand" "")]
4220 neon_reinterpret (operands[0], operands[1]);
4224 (define_expand "neon_vreinterpretv2sf<mode>"
4225 [(match_operand:V2SF 0 "s_register_operand" "")
4226 (match_operand:VD_RE 1 "s_register_operand" "")]
4229 neon_reinterpret (operands[0], operands[1]);
4233 (define_expand "neon_vreinterpretdi<mode>"
4234 [(match_operand:DI 0 "s_register_operand" "")
4235 (match_operand:VD_RE 1 "s_register_operand" "")]
4238 neon_reinterpret (operands[0], operands[1]);
4242 (define_expand "neon_vreinterpretti<mode>"
4243 [(match_operand:TI 0 "s_register_operand" "")
4244 (match_operand:VQXMOV 1 "s_register_operand" "")]
4247 neon_reinterpret (operands[0], operands[1]);
4252 (define_expand "neon_vreinterpretv16qi<mode>"
4253 [(match_operand:V16QI 0 "s_register_operand" "")
4254 (match_operand:VQXMOV 1 "s_register_operand" "")]
4257 neon_reinterpret (operands[0], operands[1]);
4261 (define_expand "neon_vreinterpretv8hi<mode>"
4262 [(match_operand:V8HI 0 "s_register_operand" "")
4263 (match_operand:VQXMOV 1 "s_register_operand" "")]
4266 neon_reinterpret (operands[0], operands[1]);
4270 (define_expand "neon_vreinterpretv4si<mode>"
4271 [(match_operand:V4SI 0 "s_register_operand" "")
4272 (match_operand:VQXMOV 1 "s_register_operand" "")]
4275 neon_reinterpret (operands[0], operands[1]);
4279 (define_expand "neon_vreinterpretv4sf<mode>"
4280 [(match_operand:V4SF 0 "s_register_operand" "")
4281 (match_operand:VQXMOV 1 "s_register_operand" "")]
4284 neon_reinterpret (operands[0], operands[1]);
4288 (define_expand "neon_vreinterpretv2di<mode>"
4289 [(match_operand:V2DI 0 "s_register_operand" "")
4290 (match_operand:VQXMOV 1 "s_register_operand" "")]
4293 neon_reinterpret (operands[0], operands[1]);
4297 (define_expand "vec_load_lanes<mode><mode>"
4298 [(set (match_operand:VDQX 0 "s_register_operand")
4299 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4303 (define_insn "neon_vld1<mode>"
4304 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4305 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4308 "vld1.<V_sz_elem>\t%h0, %A1"
4309 [(set_attr "type" "neon_load1_1reg<q>")]
4312 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4313 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4315 (define_insn "neon_vld1_lane<mode>"
4316 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4317 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4318 (match_operand:VDX 2 "s_register_operand" "0")
4319 (match_operand:SI 3 "immediate_operand" "i")]
4323 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4324 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4325 operands[3] = GEN_INT (lane);
4327 return "vld1.<V_sz_elem>\t%P0, %A1";
4329 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4331 [(set_attr "type" "neon_load1_one_lane<q>")]
4334 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4335 ;; here on big endian targets.
4336 (define_insn "neon_vld1_lane<mode>"
4337 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4338 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4339 (match_operand:VQX 2 "s_register_operand" "0")
4340 (match_operand:SI 3 "immediate_operand" "i")]
4344 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4345 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4346 operands[3] = GEN_INT (lane);
4347 int regno = REGNO (operands[0]);
4348 if (lane >= max / 2)
4352 operands[3] = GEN_INT (lane);
4354 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4356 return "vld1.<V_sz_elem>\t%P0, %A1";
4358 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4360 [(set_attr "type" "neon_load1_one_lane<q>")]
4363 (define_insn "neon_vld1_dup<mode>"
4364 [(set (match_operand:VD 0 "s_register_operand" "=w")
4365 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4367 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4368 [(set_attr "type" "neon_load1_all_lanes<q>")]
4371 ;; Special case for DImode. Treat it exactly like a simple load.
4372 (define_expand "neon_vld1_dupdi"
4373 [(set (match_operand:DI 0 "s_register_operand" "")
4374 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4380 (define_insn "neon_vld1_dup<mode>"
4381 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4382 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4385 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4387 [(set_attr "type" "neon_load1_all_lanes<q>")]
4390 (define_insn_and_split "neon_vld1_dupv2di"
4391 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4392 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4395 "&& reload_completed"
4398 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4399 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4400 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4403 [(set_attr "length" "8")
4404 (set_attr "type" "neon_load1_all_lanes_q")]
4407 (define_expand "vec_store_lanes<mode><mode>"
4408 [(set (match_operand:VDQX 0 "neon_struct_operand")
4409 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4413 (define_insn "neon_vst1<mode>"
4414 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4415 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4418 "vst1.<V_sz_elem>\t%h1, %A0"
4419 [(set_attr "type" "neon_store1_1reg<q>")])
4421 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4422 ;; here on big endian targets.
4423 (define_insn "neon_vst1_lane<mode>"
4424 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4426 [(match_operand:VDX 1 "s_register_operand" "w")
4427 (match_operand:SI 2 "immediate_operand" "i")]
4431 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4432 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4433 operands[2] = GEN_INT (lane);
4435 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4437 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4439 [(set_attr "type" "neon_store1_one_lane<q>")]
4442 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4443 ;; here on big endian targets.
4444 (define_insn "neon_vst1_lane<mode>"
4445 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4447 [(match_operand:VQX 1 "s_register_operand" "w")
4448 (match_operand:SI 2 "immediate_operand" "i")]
4452 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4453 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4454 int regno = REGNO (operands[1]);
4455 if (lane >= max / 2)
4460 operands[2] = GEN_INT (lane);
4461 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4463 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4465 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4467 [(set_attr "type" "neon_store1_one_lane<q>")]
4470 (define_expand "vec_load_lanesti<mode>"
4471 [(set (match_operand:TI 0 "s_register_operand")
4472 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4473 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4477 (define_insn "neon_vld2<mode>"
4478 [(set (match_operand:TI 0 "s_register_operand" "=w")
4479 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4480 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4484 if (<V_sz_elem> == 64)
4485 return "vld1.64\t%h0, %A1";
4487 return "vld2.<V_sz_elem>\t%h0, %A1";
4490 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4491 (const_string "neon_load1_2reg<q>")
4492 (const_string "neon_load2_2reg<q>")))]
4495 (define_expand "vec_load_lanesoi<mode>"
4496 [(set (match_operand:OI 0 "s_register_operand")
4497 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4498 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4502 (define_insn "neon_vld2<mode>"
4503 [(set (match_operand:OI 0 "s_register_operand" "=w")
4504 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4505 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4508 "vld2.<V_sz_elem>\t%h0, %A1"
4509 [(set_attr "type" "neon_load2_2reg_q")])
4511 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4512 ;; here on big endian targets.
4513 (define_insn "neon_vld2_lane<mode>"
4514 [(set (match_operand:TI 0 "s_register_operand" "=w")
4515 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4516 (match_operand:TI 2 "s_register_operand" "0")
4517 (match_operand:SI 3 "immediate_operand" "i")
4518 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4522 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4523 int regno = REGNO (operands[0]);
4525 ops[0] = gen_rtx_REG (DImode, regno);
4526 ops[1] = gen_rtx_REG (DImode, regno + 2);
4527 ops[2] = operands[1];
4528 ops[3] = GEN_INT (lane);
4529 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4532 [(set_attr "type" "neon_load2_one_lane<q>")]
4535 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4536 ;; here on big endian targets.
4537 (define_insn "neon_vld2_lane<mode>"
4538 [(set (match_operand:OI 0 "s_register_operand" "=w")
4539 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4540 (match_operand:OI 2 "s_register_operand" "0")
4541 (match_operand:SI 3 "immediate_operand" "i")
4542 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4546 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4547 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4548 int regno = REGNO (operands[0]);
4550 if (lane >= max / 2)
4555 ops[0] = gen_rtx_REG (DImode, regno);
4556 ops[1] = gen_rtx_REG (DImode, regno + 4);
4557 ops[2] = operands[1];
4558 ops[3] = GEN_INT (lane);
4559 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4562 [(set_attr "type" "neon_load2_one_lane<q>")]
4565 (define_insn "neon_vld2_dup<mode>"
4566 [(set (match_operand:TI 0 "s_register_operand" "=w")
4567 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4568 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4573 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4575 return "vld1.<V_sz_elem>\t%h0, %A1";
4578 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4579 (const_string "neon_load2_all_lanes<q>")
4580 (const_string "neon_load1_1reg<q>")))]
4583 (define_expand "vec_store_lanesti<mode>"
4584 [(set (match_operand:TI 0 "neon_struct_operand")
4585 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4586 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4590 (define_insn "neon_vst2<mode>"
4591 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4592 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4593 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4597 if (<V_sz_elem> == 64)
4598 return "vst1.64\t%h1, %A0";
4600 return "vst2.<V_sz_elem>\t%h1, %A0";
4603 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4604 (const_string "neon_store1_2reg<q>")
4605 (const_string "neon_store2_one_lane<q>")))]
4608 (define_expand "vec_store_lanesoi<mode>"
4609 [(set (match_operand:OI 0 "neon_struct_operand")
4610 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4611 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4615 (define_insn "neon_vst2<mode>"
4616 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4617 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4618 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4621 "vst2.<V_sz_elem>\t%h1, %A0"
4622 [(set_attr "type" "neon_store2_4reg<q>")]
4625 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4626 ;; here on big endian targets.
4627 (define_insn "neon_vst2_lane<mode>"
4628 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4629 (unspec:<V_two_elem>
4630 [(match_operand:TI 1 "s_register_operand" "w")
4631 (match_operand:SI 2 "immediate_operand" "i")
4632 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4636 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4637 int regno = REGNO (operands[1]);
4639 ops[0] = operands[0];
4640 ops[1] = gen_rtx_REG (DImode, regno);
4641 ops[2] = gen_rtx_REG (DImode, regno + 2);
4642 ops[3] = GEN_INT (lane);
4643 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4646 [(set_attr "type" "neon_store2_one_lane<q>")]
4649 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4650 ;; here on big endian targets.
4651 (define_insn "neon_vst2_lane<mode>"
4652 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4653 (unspec:<V_two_elem>
4654 [(match_operand:OI 1 "s_register_operand" "w")
4655 (match_operand:SI 2 "immediate_operand" "i")
4656 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4660 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4661 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4662 int regno = REGNO (operands[1]);
4664 if (lane >= max / 2)
4669 ops[0] = operands[0];
4670 ops[1] = gen_rtx_REG (DImode, regno);
4671 ops[2] = gen_rtx_REG (DImode, regno + 4);
4672 ops[3] = GEN_INT (lane);
4673 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4676 [(set_attr "type" "neon_store2_one_lane<q>")]
4679 (define_expand "vec_load_lanesei<mode>"
4680 [(set (match_operand:EI 0 "s_register_operand")
4681 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4682 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4686 (define_insn "neon_vld3<mode>"
4687 [(set (match_operand:EI 0 "s_register_operand" "=w")
4688 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4689 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4693 if (<V_sz_elem> == 64)
4694 return "vld1.64\t%h0, %A1";
4696 return "vld3.<V_sz_elem>\t%h0, %A1";
4699 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4700 (const_string "neon_load1_3reg<q>")
4701 (const_string "neon_load3_3reg<q>")))]
4704 (define_expand "vec_load_lanesci<mode>"
4705 [(match_operand:CI 0 "s_register_operand")
4706 (match_operand:CI 1 "neon_struct_operand")
4707 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4710 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4714 (define_expand "neon_vld3<mode>"
4715 [(match_operand:CI 0 "s_register_operand")
4716 (match_operand:CI 1 "neon_struct_operand")
4717 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722 mem = adjust_address (operands[1], EImode, 0);
4723 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4724 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4725 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4729 (define_insn "neon_vld3qa<mode>"
4730 [(set (match_operand:CI 0 "s_register_operand" "=w")
4731 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4732 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4736 int regno = REGNO (operands[0]);
4738 ops[0] = gen_rtx_REG (DImode, regno);
4739 ops[1] = gen_rtx_REG (DImode, regno + 4);
4740 ops[2] = gen_rtx_REG (DImode, regno + 8);
4741 ops[3] = operands[1];
4742 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4745 [(set_attr "type" "neon_load3_3reg<q>")]
4748 (define_insn "neon_vld3qb<mode>"
4749 [(set (match_operand:CI 0 "s_register_operand" "=w")
4750 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4751 (match_operand:CI 2 "s_register_operand" "0")
4752 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4756 int regno = REGNO (operands[0]);
4758 ops[0] = gen_rtx_REG (DImode, regno + 2);
4759 ops[1] = gen_rtx_REG (DImode, regno + 6);
4760 ops[2] = gen_rtx_REG (DImode, regno + 10);
4761 ops[3] = operands[1];
4762 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4765 [(set_attr "type" "neon_load3_3reg<q>")]
4768 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4769 ;; here on big endian targets.
4770 (define_insn "neon_vld3_lane<mode>"
4771 [(set (match_operand:EI 0 "s_register_operand" "=w")
4772 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4773 (match_operand:EI 2 "s_register_operand" "0")
4774 (match_operand:SI 3 "immediate_operand" "i")
4775 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4779 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4780 int regno = REGNO (operands[0]);
4782 ops[0] = gen_rtx_REG (DImode, regno);
4783 ops[1] = gen_rtx_REG (DImode, regno + 2);
4784 ops[2] = gen_rtx_REG (DImode, regno + 4);
4785 ops[3] = operands[1];
4786 ops[4] = GEN_INT (lane);
4787 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4791 [(set_attr "type" "neon_load3_one_lane<q>")]
4794 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4795 ;; here on big endian targets.
4796 (define_insn "neon_vld3_lane<mode>"
4797 [(set (match_operand:CI 0 "s_register_operand" "=w")
4798 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4799 (match_operand:CI 2 "s_register_operand" "0")
4800 (match_operand:SI 3 "immediate_operand" "i")
4801 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4805 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4806 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4807 int regno = REGNO (operands[0]);
4809 if (lane >= max / 2)
4814 ops[0] = gen_rtx_REG (DImode, regno);
4815 ops[1] = gen_rtx_REG (DImode, regno + 4);
4816 ops[2] = gen_rtx_REG (DImode, regno + 8);
4817 ops[3] = operands[1];
4818 ops[4] = GEN_INT (lane);
4819 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4823 [(set_attr "type" "neon_load3_one_lane<q>")]
4826 (define_insn "neon_vld3_dup<mode>"
4827 [(set (match_operand:EI 0 "s_register_operand" "=w")
4828 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4829 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4833 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4835 int regno = REGNO (operands[0]);
4837 ops[0] = gen_rtx_REG (DImode, regno);
4838 ops[1] = gen_rtx_REG (DImode, regno + 2);
4839 ops[2] = gen_rtx_REG (DImode, regno + 4);
4840 ops[3] = operands[1];
4841 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4845 return "vld1.<V_sz_elem>\t%h0, %A1";
4848 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4849 (const_string "neon_load3_all_lanes<q>")
4850 (const_string "neon_load1_1reg<q>")))])
4852 (define_expand "vec_store_lanesei<mode>"
4853 [(set (match_operand:EI 0 "neon_struct_operand")
4854 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4855 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4859 (define_insn "neon_vst3<mode>"
4860 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4861 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4862 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4866 if (<V_sz_elem> == 64)
4867 return "vst1.64\t%h1, %A0";
4869 return "vst3.<V_sz_elem>\t%h1, %A0";
4872 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4873 (const_string "neon_store1_3reg<q>")
4874 (const_string "neon_store3_one_lane<q>")))])
4876 (define_expand "vec_store_lanesci<mode>"
4877 [(match_operand:CI 0 "neon_struct_operand")
4878 (match_operand:CI 1 "s_register_operand")
4879 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4882 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4886 (define_expand "neon_vst3<mode>"
4887 [(match_operand:CI 0 "neon_struct_operand")
4888 (match_operand:CI 1 "s_register_operand")
4889 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4894 mem = adjust_address (operands[0], EImode, 0);
4895 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4896 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4897 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4901 (define_insn "neon_vst3qa<mode>"
4902 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4903 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4904 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4908 int regno = REGNO (operands[1]);
4910 ops[0] = operands[0];
4911 ops[1] = gen_rtx_REG (DImode, regno);
4912 ops[2] = gen_rtx_REG (DImode, regno + 4);
4913 ops[3] = gen_rtx_REG (DImode, regno + 8);
4914 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4917 [(set_attr "type" "neon_store3_3reg<q>")]
4920 (define_insn "neon_vst3qb<mode>"
4921 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4922 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4923 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4927 int regno = REGNO (operands[1]);
4929 ops[0] = operands[0];
4930 ops[1] = gen_rtx_REG (DImode, regno + 2);
4931 ops[2] = gen_rtx_REG (DImode, regno + 6);
4932 ops[3] = gen_rtx_REG (DImode, regno + 10);
4933 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4936 [(set_attr "type" "neon_store3_3reg<q>")]
4939 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4940 ;; here on big endian targets.
4941 (define_insn "neon_vst3_lane<mode>"
4942 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4943 (unspec:<V_three_elem>
4944 [(match_operand:EI 1 "s_register_operand" "w")
4945 (match_operand:SI 2 "immediate_operand" "i")
4946 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4950 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4951 int regno = REGNO (operands[1]);
4953 ops[0] = operands[0];
4954 ops[1] = gen_rtx_REG (DImode, regno);
4955 ops[2] = gen_rtx_REG (DImode, regno + 2);
4956 ops[3] = gen_rtx_REG (DImode, regno + 4);
4957 ops[4] = GEN_INT (lane);
4958 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4962 [(set_attr "type" "neon_store3_one_lane<q>")]
4965 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4966 ;; here on big endian targets.
4967 (define_insn "neon_vst3_lane<mode>"
4968 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4969 (unspec:<V_three_elem>
4970 [(match_operand:CI 1 "s_register_operand" "w")
4971 (match_operand:SI 2 "immediate_operand" "i")
4972 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4976 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4977 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4978 int regno = REGNO (operands[1]);
4980 if (lane >= max / 2)
4985 ops[0] = operands[0];
4986 ops[1] = gen_rtx_REG (DImode, regno);
4987 ops[2] = gen_rtx_REG (DImode, regno + 4);
4988 ops[3] = gen_rtx_REG (DImode, regno + 8);
4989 ops[4] = GEN_INT (lane);
4990 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4994 [(set_attr "type" "neon_store3_one_lane<q>")]
4997 (define_expand "vec_load_lanesoi<mode>"
4998 [(set (match_operand:OI 0 "s_register_operand")
4999 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5000 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5004 (define_insn "neon_vld4<mode>"
5005 [(set (match_operand:OI 0 "s_register_operand" "=w")
5006 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5007 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5011 if (<V_sz_elem> == 64)
5012 return "vld1.64\t%h0, %A1";
5014 return "vld4.<V_sz_elem>\t%h0, %A1";
5017 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5018 (const_string "neon_load1_4reg<q>")
5019 (const_string "neon_load4_4reg<q>")))]
5022 (define_expand "vec_load_lanesxi<mode>"
5023 [(match_operand:XI 0 "s_register_operand")
5024 (match_operand:XI 1 "neon_struct_operand")
5025 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5028 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5032 (define_expand "neon_vld4<mode>"
5033 [(match_operand:XI 0 "s_register_operand")
5034 (match_operand:XI 1 "neon_struct_operand")
5035 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5040 mem = adjust_address (operands[1], OImode, 0);
5041 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5042 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5043 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5047 (define_insn "neon_vld4qa<mode>"
5048 [(set (match_operand:XI 0 "s_register_operand" "=w")
5049 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5050 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5054 int regno = REGNO (operands[0]);
5056 ops[0] = gen_rtx_REG (DImode, regno);
5057 ops[1] = gen_rtx_REG (DImode, regno + 4);
5058 ops[2] = gen_rtx_REG (DImode, regno + 8);
5059 ops[3] = gen_rtx_REG (DImode, regno + 12);
5060 ops[4] = operands[1];
5061 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5064 [(set_attr "type" "neon_load4_4reg<q>")]
5067 (define_insn "neon_vld4qb<mode>"
5068 [(set (match_operand:XI 0 "s_register_operand" "=w")
5069 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5070 (match_operand:XI 2 "s_register_operand" "0")
5071 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075 int regno = REGNO (operands[0]);
5077 ops[0] = gen_rtx_REG (DImode, regno + 2);
5078 ops[1] = gen_rtx_REG (DImode, regno + 6);
5079 ops[2] = gen_rtx_REG (DImode, regno + 10);
5080 ops[3] = gen_rtx_REG (DImode, regno + 14);
5081 ops[4] = operands[1];
5082 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5085 [(set_attr "type" "neon_load4_4reg<q>")]
5088 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5089 ;; here on big endian targets.
5090 (define_insn "neon_vld4_lane<mode>"
5091 [(set (match_operand:OI 0 "s_register_operand" "=w")
5092 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5093 (match_operand:OI 2 "s_register_operand" "0")
5094 (match_operand:SI 3 "immediate_operand" "i")
5095 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5099 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5100 int regno = REGNO (operands[0]);
5102 ops[0] = gen_rtx_REG (DImode, regno);
5103 ops[1] = gen_rtx_REG (DImode, regno + 2);
5104 ops[2] = gen_rtx_REG (DImode, regno + 4);
5105 ops[3] = gen_rtx_REG (DImode, regno + 6);
5106 ops[4] = operands[1];
5107 ops[5] = GEN_INT (lane);
5108 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5112 [(set_attr "type" "neon_load4_one_lane<q>")]
5115 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5116 ;; here on big endian targets.
5117 (define_insn "neon_vld4_lane<mode>"
5118 [(set (match_operand:XI 0 "s_register_operand" "=w")
5119 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5120 (match_operand:XI 2 "s_register_operand" "0")
5121 (match_operand:SI 3 "immediate_operand" "i")
5122 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5126 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5127 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5128 int regno = REGNO (operands[0]);
5130 if (lane >= max / 2)
5135 ops[0] = gen_rtx_REG (DImode, regno);
5136 ops[1] = gen_rtx_REG (DImode, regno + 4);
5137 ops[2] = gen_rtx_REG (DImode, regno + 8);
5138 ops[3] = gen_rtx_REG (DImode, regno + 12);
5139 ops[4] = operands[1];
5140 ops[5] = GEN_INT (lane);
5141 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5145 [(set_attr "type" "neon_load4_one_lane<q>")]
5148 (define_insn "neon_vld4_dup<mode>"
5149 [(set (match_operand:OI 0 "s_register_operand" "=w")
5150 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5151 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5155 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5157 int regno = REGNO (operands[0]);
5159 ops[0] = gen_rtx_REG (DImode, regno);
5160 ops[1] = gen_rtx_REG (DImode, regno + 2);
5161 ops[2] = gen_rtx_REG (DImode, regno + 4);
5162 ops[3] = gen_rtx_REG (DImode, regno + 6);
5163 ops[4] = operands[1];
5164 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5169 return "vld1.<V_sz_elem>\t%h0, %A1";
5172 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5173 (const_string "neon_load4_all_lanes<q>")
5174 (const_string "neon_load1_1reg<q>")))]
5177 (define_expand "vec_store_lanesoi<mode>"
5178 [(set (match_operand:OI 0 "neon_struct_operand")
5179 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5180 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5184 (define_insn "neon_vst4<mode>"
5185 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5186 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5187 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5191 if (<V_sz_elem> == 64)
5192 return "vst1.64\t%h1, %A0";
5194 return "vst4.<V_sz_elem>\t%h1, %A0";
5197 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5198 (const_string "neon_store1_4reg<q>")
5199 (const_string "neon_store4_4reg<q>")))]
5202 (define_expand "vec_store_lanesxi<mode>"
5203 [(match_operand:XI 0 "neon_struct_operand")
5204 (match_operand:XI 1 "s_register_operand")
5205 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5208 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5212 (define_expand "neon_vst4<mode>"
5213 [(match_operand:XI 0 "neon_struct_operand")
5214 (match_operand:XI 1 "s_register_operand")
5215 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220 mem = adjust_address (operands[0], OImode, 0);
5221 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5222 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5223 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5227 (define_insn "neon_vst4qa<mode>"
5228 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5229 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5230 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5234 int regno = REGNO (operands[1]);
5236 ops[0] = operands[0];
5237 ops[1] = gen_rtx_REG (DImode, regno);
5238 ops[2] = gen_rtx_REG (DImode, regno + 4);
5239 ops[3] = gen_rtx_REG (DImode, regno + 8);
5240 ops[4] = gen_rtx_REG (DImode, regno + 12);
5241 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5244 [(set_attr "type" "neon_store4_4reg<q>")]
5247 (define_insn "neon_vst4qb<mode>"
5248 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5249 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5250 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254 int regno = REGNO (operands[1]);
5256 ops[0] = operands[0];
5257 ops[1] = gen_rtx_REG (DImode, regno + 2);
5258 ops[2] = gen_rtx_REG (DImode, regno + 6);
5259 ops[3] = gen_rtx_REG (DImode, regno + 10);
5260 ops[4] = gen_rtx_REG (DImode, regno + 14);
5261 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5264 [(set_attr "type" "neon_store4_4reg<q>")]
5267 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5268 ;; here on big endian targets.
5269 (define_insn "neon_vst4_lane<mode>"
5270 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5271 (unspec:<V_four_elem>
5272 [(match_operand:OI 1 "s_register_operand" "w")
5273 (match_operand:SI 2 "immediate_operand" "i")
5274 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5278 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5279 int regno = REGNO (operands[1]);
5281 ops[0] = operands[0];
5282 ops[1] = gen_rtx_REG (DImode, regno);
5283 ops[2] = gen_rtx_REG (DImode, regno + 2);
5284 ops[3] = gen_rtx_REG (DImode, regno + 4);
5285 ops[4] = gen_rtx_REG (DImode, regno + 6);
5286 ops[5] = GEN_INT (lane);
5287 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5291 [(set_attr "type" "neon_store4_one_lane<q>")]
5294 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5295 ;; here on big endian targets.
5296 (define_insn "neon_vst4_lane<mode>"
5297 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5298 (unspec:<V_four_elem>
5299 [(match_operand:XI 1 "s_register_operand" "w")
5300 (match_operand:SI 2 "immediate_operand" "i")
5301 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5305 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5306 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5307 int regno = REGNO (operands[1]);
5309 if (lane >= max / 2)
5314 ops[0] = operands[0];
5315 ops[1] = gen_rtx_REG (DImode, regno);
5316 ops[2] = gen_rtx_REG (DImode, regno + 4);
5317 ops[3] = gen_rtx_REG (DImode, regno + 8);
5318 ops[4] = gen_rtx_REG (DImode, regno + 12);
5319 ops[5] = GEN_INT (lane);
5320 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5324 [(set_attr "type" "neon_store4_4reg<q>")]
5327 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5328 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5329 (SE:<V_unpack> (vec_select:<V_HALF>
5330 (match_operand:VU 1 "register_operand" "w")
5331 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5332 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5333 "vmovl.<US><V_sz_elem> %q0, %e1"
5334 [(set_attr "type" "neon_shift_imm_long")]
5337 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5338 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5339 (SE:<V_unpack> (vec_select:<V_HALF>
5340 (match_operand:VU 1 "register_operand" "w")
5341 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5342 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5343 "vmovl.<US><V_sz_elem> %q0, %f1"
5344 [(set_attr "type" "neon_shift_imm_long")]
5347 (define_expand "vec_unpack<US>_hi_<mode>"
5348 [(match_operand:<V_unpack> 0 "register_operand" "")
5349 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5350 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5352 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5355 for (i = 0; i < (<V_mode_nunits>/2); i++)
5356 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5358 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5359 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5366 (define_expand "vec_unpack<US>_lo_<mode>"
5367 [(match_operand:<V_unpack> 0 "register_operand" "")
5368 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5369 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5371 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5374 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5375 RTVEC_ELT (v, i) = GEN_INT (i);
5376 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5377 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5384 (define_insn "neon_vec_<US>mult_lo_<mode>"
5385 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5386 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5387 (match_operand:VU 1 "register_operand" "w")
5388 (match_operand:VU 2 "vect_par_constant_low" "")))
5389 (SE:<V_unpack> (vec_select:<V_HALF>
5390 (match_operand:VU 3 "register_operand" "w")
5392 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5393 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5394 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5397 (define_expand "vec_widen_<US>mult_lo_<mode>"
5398 [(match_operand:<V_unpack> 0 "register_operand" "")
5399 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5400 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5401 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5403 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5406 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5407 RTVEC_ELT (v, i) = GEN_INT (i);
5408 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5410 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5418 (define_insn "neon_vec_<US>mult_hi_<mode>"
5419 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5420 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5421 (match_operand:VU 1 "register_operand" "w")
5422 (match_operand:VU 2 "vect_par_constant_high" "")))
5423 (SE:<V_unpack> (vec_select:<V_HALF>
5424 (match_operand:VU 3 "register_operand" "w")
5426 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5427 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5428 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5431 (define_expand "vec_widen_<US>mult_hi_<mode>"
5432 [(match_operand:<V_unpack> 0 "register_operand" "")
5433 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5434 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5435 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5437 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5440 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5441 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5442 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5444 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5453 (define_insn "neon_vec_<US>shiftl_<mode>"
5454 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5455 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5456 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5459 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5461 [(set_attr "type" "neon_shift_imm_long")]
5464 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5465 [(match_operand:<V_unpack> 0 "register_operand" "")
5466 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5467 (match_operand:SI 2 "immediate_operand" "i")]
5468 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5470 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5471 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5477 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5478 [(match_operand:<V_unpack> 0 "register_operand" "")
5479 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5480 (match_operand:SI 2 "immediate_operand" "i")]
5481 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5483 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5484 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5485 GET_MODE_SIZE (<V_HALF>mode)),
5491 ;; Vectorize for non-neon-quad case
5492 (define_insn "neon_unpack<US>_<mode>"
5493 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5494 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5496 "vmovl.<US><V_sz_elem> %q0, %P1"
5497 [(set_attr "type" "neon_move")]
5500 (define_expand "vec_unpack<US>_lo_<mode>"
5501 [(match_operand:<V_double_width> 0 "register_operand" "")
5502 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5505 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5506 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5507 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5513 (define_expand "vec_unpack<US>_hi_<mode>"
5514 [(match_operand:<V_double_width> 0 "register_operand" "")
5515 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5518 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5519 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5520 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5526 (define_insn "neon_vec_<US>mult_<mode>"
5527 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5528 (mult:<V_widen> (SE:<V_widen>
5529 (match_operand:VDI 1 "register_operand" "w"))
5531 (match_operand:VDI 2 "register_operand" "w"))))]
5533 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5534 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5537 (define_expand "vec_widen_<US>mult_hi_<mode>"
5538 [(match_operand:<V_double_width> 0 "register_operand" "")
5539 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5540 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5543 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5544 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5545 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5552 (define_expand "vec_widen_<US>mult_lo_<mode>"
5553 [(match_operand:<V_double_width> 0 "register_operand" "")
5554 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5555 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5558 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5559 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5560 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5567 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5568 [(match_operand:<V_double_width> 0 "register_operand" "")
5569 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5570 (match_operand:SI 2 "immediate_operand" "i")]
5573 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5574 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5575 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5581 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5582 [(match_operand:<V_double_width> 0 "register_operand" "")
5583 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5584 (match_operand:SI 2 "immediate_operand" "i")]
5587 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5588 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5589 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5595 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5596 ; because the ordering of vector elements in Q registers is different from what
5597 ; the semantics of the instructions require.
5599 (define_insn "vec_pack_trunc_<mode>"
5600 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5601 (vec_concat:<V_narrow_pack>
5602 (truncate:<V_narrow>
5603 (match_operand:VN 1 "register_operand" "w"))
5604 (truncate:<V_narrow>
5605 (match_operand:VN 2 "register_operand" "w"))))]
5606 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5607 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5608 [(set_attr "type" "multiple")
5609 (set_attr "length" "8")]
5612 ;; For the non-quad case.
5613 (define_insn "neon_vec_pack_trunc_<mode>"
5614 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5615 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5616 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5617 "vmovn.i<V_sz_elem>\t%P0, %q1"
5618 [(set_attr "type" "neon_move_narrow_q")]
5621 (define_expand "vec_pack_trunc_<mode>"
5622 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5623 (match_operand:VSHFT 1 "register_operand" "")
5624 (match_operand:VSHFT 2 "register_operand")]
5625 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5627 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5629 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5630 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5631 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5635 (define_insn "neon_vabd<mode>_2"
5636 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5637 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5638 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5640 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5642 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5643 (const_string "neon_fp_abd_s<q>")
5644 (const_string "neon_abd<q>")))]
5647 (define_insn "neon_vabd<mode>_3"
5648 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5649 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5650 (match_operand:VDQ 2 "s_register_operand" "w")]
5652 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5653 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5655 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5656 (const_string "neon_fp_abd_s<q>")
5657 (const_string "neon_abd<q>")))]
5660 ;; Copy from core-to-neon regs, then extend, not vice-versa
5663 [(set (match_operand:DI 0 "s_register_operand" "")
5664 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5665 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5666 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5667 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5669 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5673 [(set (match_operand:DI 0 "s_register_operand" "")
5674 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5675 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5676 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5677 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5679 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5683 [(set (match_operand:DI 0 "s_register_operand" "")
5684 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5685 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5686 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5687 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5689 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5693 [(set (match_operand:DI 0 "s_register_operand" "")
5694 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5695 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5696 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5697 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5699 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5703 [(set (match_operand:DI 0 "s_register_operand" "")
5704 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5705 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5706 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5707 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5709 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5713 [(set (match_operand:DI 0 "s_register_operand" "")
5714 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5715 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5716 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5717 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5719 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));