1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_store1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "ior<mode>3"
633 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
634 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
635 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
638 switch (which_alternative)
640 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
641 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
642 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
643 default: gcc_unreachable ();
646 [(set_attr "type" "neon_logic<q>")]
649 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
650 ;; vorr. We support the pseudo-instruction vand instead, because that
651 ;; corresponds to the canonical form the middle-end expects to use for
652 ;; immediate bitwise-ANDs.
654 (define_insn "and<mode>3"
655 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
656 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
657 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
660 switch (which_alternative)
662 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
663 case 1: return neon_output_logic_immediate ("vand", &operands[2],
664 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
665 default: gcc_unreachable ();
668 [(set_attr "type" "neon_logic<q>")]
671 (define_insn "orn<mode>3_neon"
672 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
673 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
674 (match_operand:VDQ 1 "s_register_operand" "w")))]
676 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_logic<q>")]
680 ;; TODO: investigate whether we should disable
681 ;; this and bicdi3_neon for the A8 in line with the other
683 (define_insn_and_split "orndi3_neon"
684 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
685 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
686 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
694 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
695 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
696 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
701 operands[3] = gen_highpart (SImode, operands[0]);
702 operands[0] = gen_lowpart (SImode, operands[0]);
703 operands[4] = gen_highpart (SImode, operands[2]);
704 operands[2] = gen_lowpart (SImode, operands[2]);
705 operands[5] = gen_highpart (SImode, operands[1]);
706 operands[1] = gen_lowpart (SImode, operands[1]);
710 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
711 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
715 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
716 (set_attr "length" "*,16,8,8")
717 (set_attr "arch" "any,a,t2,t2")]
720 (define_insn "bic<mode>3_neon"
721 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
722 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
723 (match_operand:VDQ 1 "s_register_operand" "w")))]
725 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_logic<q>")]
729 ;; Compare to *anddi_notdi_di.
730 (define_insn "bicdi3_neon"
731 [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
732 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
733 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
739 [(set_attr "type" "neon_logic,multiple,multiple")
740 (set_attr "length" "*,8,8")]
743 (define_insn "xor<mode>3"
744 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
745 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
746 (match_operand:VDQ 2 "s_register_operand" "w")))]
748 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
749 [(set_attr "type" "neon_logic<q>")]
752 (define_insn "one_cmpl<mode>2"
753 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
754 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
756 "vmvn\t%<V_reg>0, %<V_reg>1"
757 [(set_attr "type" "neon_move<q>")]
760 (define_insn "abs<mode>2"
761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
762 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
764 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
766 (if_then_else (match_test "<Is_float_mode>")
767 (const_string "neon_fp_abs_s<q>")
768 (const_string "neon_abs<q>")))]
771 (define_insn "neg<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_neg_s<q>")
779 (const_string "neon_neg<q>")))]
782 (define_insn "negdi2_neon"
783 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
784 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
785 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
786 (clobber (reg:CC CC_REGNUM))]
789 [(set_attr "length" "8")
790 (set_attr "type" "multiple")]
793 ; Split negdi2_neon for vfp registers
795 [(set (match_operand:DI 0 "s_register_operand" "")
796 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
797 (clobber (match_scratch:DI 2 ""))
798 (clobber (reg:CC CC_REGNUM))]
799 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
800 [(set (match_dup 2) (const_int 0))
801 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
802 (clobber (reg:CC CC_REGNUM))])]
804 if (!REG_P (operands[2]))
805 operands[2] = operands[0];
809 ; Split negdi2_neon for core registers
811 [(set (match_operand:DI 0 "s_register_operand" "")
812 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
813 (clobber (match_scratch:DI 2 ""))
814 (clobber (reg:CC CC_REGNUM))]
815 "TARGET_32BIT && reload_completed
816 && arm_general_register_operand (operands[0], DImode)"
817 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
818 (clobber (reg:CC CC_REGNUM))])]
822 (define_insn "*umin<mode>3_neon"
823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
824 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
825 (match_operand:VDQIW 2 "s_register_operand" "w")))]
827 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
828 [(set_attr "type" "neon_minmax<q>")]
831 (define_insn "*umax<mode>3_neon"
832 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
833 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
834 (match_operand:VDQIW 2 "s_register_operand" "w")))]
836 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
837 [(set_attr "type" "neon_minmax<q>")]
840 (define_insn "*smin<mode>3_neon"
841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
842 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
843 (match_operand:VDQW 2 "s_register_operand" "w")))]
845 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
847 (if_then_else (match_test "<Is_float_mode>")
848 (const_string "neon_fp_minmax_s<q>")
849 (const_string "neon_minmax<q>")))]
852 (define_insn "*smax<mode>3_neon"
853 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
854 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
855 (match_operand:VDQW 2 "s_register_operand" "w")))]
857 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
859 (if_then_else (match_test "<Is_float_mode>")
860 (const_string "neon_fp_minmax_s<q>")
861 (const_string "neon_minmax<q>")))]
864 ; TODO: V2DI shifts are current disabled because there are bugs in the
865 ; generic vectorizer code. It ends up creating a V2DI constructor with
868 (define_insn "vashl<mode>3"
869 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
870 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
871 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
874 switch (which_alternative)
876 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
877 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
879 VALID_NEON_QREG_MODE (<MODE>mode),
881 default: gcc_unreachable ();
884 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
887 (define_insn "vashr<mode>3_imm"
888 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
889 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
890 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
893 return neon_output_shift_immediate ("vshr", 's', &operands[2],
894 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
897 [(set_attr "type" "neon_shift_imm<q>")]
900 (define_insn "vlshr<mode>3_imm"
901 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
902 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
903 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
906 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
907 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
910 [(set_attr "type" "neon_shift_imm<q>")]
913 ; Used for implementing logical shift-right, which is a left-shift by a negative
914 ; amount, with signed operands. This is essentially the same as ashl<mode>3
915 ; above, but using an unspec in case GCC tries anything tricky with negative
918 (define_insn "ashl<mode>3_signed"
919 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
920 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
921 (match_operand:VDQI 2 "s_register_operand" "w")]
922 UNSPEC_ASHIFT_SIGNED))]
924 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
925 [(set_attr "type" "neon_shift_reg<q>")]
928 ; Used for implementing logical shift-right, which is a left-shift by a negative
929 ; amount, with unsigned operands.
931 (define_insn "ashl<mode>3_unsigned"
932 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
933 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
934 (match_operand:VDQI 2 "s_register_operand" "w")]
935 UNSPEC_ASHIFT_UNSIGNED))]
937 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
938 [(set_attr "type" "neon_shift_reg<q>")]
941 (define_expand "vashr<mode>3"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "")
943 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
944 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
947 if (s_register_operand (operands[2], <MODE>mode))
949 rtx neg = gen_reg_rtx (<MODE>mode);
950 emit_insn (gen_neg<mode>2 (neg, operands[2]));
951 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
954 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
958 (define_expand "vlshr<mode>3"
959 [(set (match_operand:VDQIW 0 "s_register_operand" "")
960 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
961 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
964 if (s_register_operand (operands[2], <MODE>mode))
966 rtx neg = gen_reg_rtx (<MODE>mode);
967 emit_insn (gen_neg<mode>2 (neg, operands[2]));
968 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
971 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
977 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
978 ;; leaving the upper half uninitalized. This is OK since the shift
979 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
980 ;; data flow analysis however, we pretend the full register is set
982 (define_insn "neon_load_count"
983 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
984 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
988 vld1.32\t{%P0[0]}, %A1
990 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
993 (define_insn "ashldi3_neon_noclobber"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
996 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
997 "TARGET_NEON && reload_completed
998 && (!CONST_INT_P (operands[2])
999 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1001 vshl.u64\t%P0, %P1, %2
1002 vshl.u64\t%P0, %P1, %P2"
1003 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1006 (define_insn_and_split "ashldi3_neon"
1007 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1008 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1009 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1010 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1011 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1012 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1013 (clobber (reg:CC_C CC_REGNUM))]
1016 "TARGET_NEON && reload_completed"
1020 if (IS_VFP_REGNUM (REGNO (operands[0])))
1022 if (CONST_INT_P (operands[2]))
1024 if (INTVAL (operands[2]) < 1)
1026 emit_insn (gen_movdi (operands[0], operands[1]));
1029 else if (INTVAL (operands[2]) > 63)
1030 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1034 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1035 operands[2] = operands[5];
1038 /* Ditch the unnecessary clobbers. */
1039 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1044 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1045 /* This clobbers CC. */
1046 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1048 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1049 operands[2], operands[3], operands[4]);
1053 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1054 (set_attr "opt" "*,*,speed,speed,*,*")
1055 (set_attr "type" "multiple")]
1058 ; The shift amount needs to be negated for right-shifts
1059 (define_insn "signed_shift_di3_neon"
1060 [(set (match_operand:DI 0 "s_register_operand" "=w")
1061 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1062 (match_operand:DI 2 "s_register_operand" " w")]
1063 UNSPEC_ASHIFT_SIGNED))]
1064 "TARGET_NEON && reload_completed"
1065 "vshl.s64\t%P0, %P1, %P2"
1066 [(set_attr "type" "neon_shift_reg")]
1069 ; The shift amount needs to be negated for right-shifts
1070 (define_insn "unsigned_shift_di3_neon"
1071 [(set (match_operand:DI 0 "s_register_operand" "=w")
1072 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1073 (match_operand:DI 2 "s_register_operand" " w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1075 "TARGET_NEON && reload_completed"
1076 "vshl.u64\t%P0, %P1, %P2"
1077 [(set_attr "type" "neon_shift_reg")]
1080 (define_insn "ashrdi3_neon_imm_noclobber"
1081 [(set (match_operand:DI 0 "s_register_operand" "=w")
1082 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1083 (match_operand:DI 2 "const_int_operand" " i")))]
1084 "TARGET_NEON && reload_completed
1085 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1086 "vshr.s64\t%P0, %P1, %2"
1087 [(set_attr "type" "neon_shift_imm")]
1090 (define_insn "lshrdi3_neon_imm_noclobber"
1091 [(set (match_operand:DI 0 "s_register_operand" "=w")
1092 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1093 (match_operand:DI 2 "const_int_operand" " i")))]
1094 "TARGET_NEON && reload_completed
1095 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1096 "vshr.u64\t%P0, %P1, %2"
1097 [(set_attr "type" "neon_shift_imm")]
1102 (define_insn_and_split "<shift>di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1104 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1105 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1106 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1107 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1108 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1109 (clobber (reg:CC CC_REGNUM))]
1112 "TARGET_NEON && reload_completed"
1116 if (IS_VFP_REGNUM (REGNO (operands[0])))
1118 if (CONST_INT_P (operands[2]))
1120 if (INTVAL (operands[2]) < 1)
1122 emit_insn (gen_movdi (operands[0], operands[1]));
1125 else if (INTVAL (operands[2]) > 64)
1126 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1128 /* Ditch the unnecessary clobbers. */
1129 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1135 /* We must use a negative left-shift. */
1136 emit_insn (gen_negsi2 (operands[3], operands[2]));
1137 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1138 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1144 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1145 /* This clobbers CC. */
1146 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1148 /* This clobbers CC (ASHIFTRT by register only). */
1149 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1150 operands[2], operands[3], operands[4]);
1155 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1156 (set_attr "opt" "*,*,speed,speed,*,*")
1157 (set_attr "type" "multiple")]
1160 ;; Widening operations
1162 (define_insn "widen_ssum<mode>3"
1163 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1164 (plus:<V_widen> (sign_extend:<V_widen>
1165 (match_operand:VW 1 "s_register_operand" "%w"))
1166 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1168 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1169 [(set_attr "type" "neon_add_widen")]
1172 (define_insn "widen_usum<mode>3"
1173 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1174 (plus:<V_widen> (zero_extend:<V_widen>
1175 (match_operand:VW 1 "s_register_operand" "%w"))
1176 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1178 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1179 [(set_attr "type" "neon_add_widen")]
1182 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1183 ;; shift-count granularity. That's good enough for the middle-end's current
1186 ;; Note that it's not safe to perform such an operation in big-endian mode,
1187 ;; due to element-ordering issues.
1189 (define_expand "vec_shr_<mode>"
1190 [(match_operand:VDQ 0 "s_register_operand" "")
1191 (match_operand:VDQ 1 "s_register_operand" "")
1192 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1193 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1196 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1197 const int width = GET_MODE_BITSIZE (<MODE>mode);
1198 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1199 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1200 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1202 if (num_bits == width)
1204 emit_move_insn (operands[0], operands[1]);
1208 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1209 operands[0] = gen_lowpart (bvecmode, operands[0]);
1210 operands[1] = gen_lowpart (bvecmode, operands[1]);
1212 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1213 GEN_INT (num_bits / BITS_PER_UNIT)));
1217 (define_expand "vec_shl_<mode>"
1218 [(match_operand:VDQ 0 "s_register_operand" "")
1219 (match_operand:VDQ 1 "s_register_operand" "")
1220 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1221 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1224 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1225 const int width = GET_MODE_BITSIZE (<MODE>mode);
1226 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1227 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1228 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1232 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1236 num_bits = width - num_bits;
1238 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1239 operands[0] = gen_lowpart (bvecmode, operands[0]);
1240 operands[1] = gen_lowpart (bvecmode, operands[1]);
1242 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1243 GEN_INT (num_bits / BITS_PER_UNIT)));
1247 ;; Helpers for quad-word reduction operations
1249 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1250 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1251 ; N/2-element vector.
1253 (define_insn "quad_halves_<code>v4si"
1254 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1256 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1257 (parallel [(const_int 0) (const_int 1)]))
1258 (vec_select:V2SI (match_dup 1)
1259 (parallel [(const_int 2) (const_int 3)]))))]
1261 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1262 [(set_attr "vqh_mnem" "<VQH_mnem>")
1263 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1266 (define_insn "quad_halves_<code>v4sf"
1267 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1269 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1270 (parallel [(const_int 0) (const_int 1)]))
1271 (vec_select:V2SF (match_dup 1)
1272 (parallel [(const_int 2) (const_int 3)]))))]
1273 "TARGET_NEON && flag_unsafe_math_optimizations"
1274 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1275 [(set_attr "vqh_mnem" "<VQH_mnem>")
1276 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1279 (define_insn "quad_halves_<code>v8hi"
1280 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1282 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1283 (parallel [(const_int 0) (const_int 1)
1284 (const_int 2) (const_int 3)]))
1285 (vec_select:V4HI (match_dup 1)
1286 (parallel [(const_int 4) (const_int 5)
1287 (const_int 6) (const_int 7)]))))]
1289 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1294 (define_insn "quad_halves_<code>v16qi"
1295 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1297 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)
1300 (const_int 4) (const_int 5)
1301 (const_int 6) (const_int 7)]))
1302 (vec_select:V8QI (match_dup 1)
1303 (parallel [(const_int 8) (const_int 9)
1304 (const_int 10) (const_int 11)
1305 (const_int 12) (const_int 13)
1306 (const_int 14) (const_int 15)]))))]
1308 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1309 [(set_attr "vqh_mnem" "<VQH_mnem>")
1310 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1313 (define_expand "move_hi_quad_<mode>"
1314 [(match_operand:ANY128 0 "s_register_operand" "")
1315 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1318 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1319 GET_MODE_SIZE (<V_HALF>mode)),
1324 (define_expand "move_lo_quad_<mode>"
1325 [(match_operand:ANY128 0 "s_register_operand" "")
1326 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1329 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1335 ;; Reduction operations
1337 (define_expand "reduc_splus_<mode>"
1338 [(match_operand:VD 0 "s_register_operand" "")
1339 (match_operand:VD 1 "s_register_operand" "")]
1340 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1342 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1343 &gen_neon_vpadd_internal<mode>);
1347 (define_expand "reduc_splus_<mode>"
1348 [(match_operand:VQ 0 "s_register_operand" "")
1349 (match_operand:VQ 1 "s_register_operand" "")]
1350 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1351 && !BYTES_BIG_ENDIAN"
1353 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1354 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1356 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1357 emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
1358 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1363 (define_insn "reduc_splus_v2di"
1364 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1365 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1367 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1368 "vadd.i64\t%e0, %e1, %f1"
1369 [(set_attr "type" "neon_add_q")]
1372 ;; NEON does not distinguish between signed and unsigned addition except on
1373 ;; widening operations.
1374 (define_expand "reduc_uplus_<mode>"
1375 [(match_operand:VDQI 0 "s_register_operand" "")
1376 (match_operand:VDQI 1 "s_register_operand" "")]
1377 "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
1379 emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
1383 (define_expand "reduc_smin_<mode>"
1384 [(match_operand:VD 0 "s_register_operand" "")
1385 (match_operand:VD 1 "s_register_operand" "")]
1386 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1388 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1389 &gen_neon_vpsmin<mode>);
1393 (define_expand "reduc_smin_<mode>"
1394 [(match_operand:VQ 0 "s_register_operand" "")
1395 (match_operand:VQ 1 "s_register_operand" "")]
1396 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1397 && !BYTES_BIG_ENDIAN"
1399 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1400 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1402 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1403 emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
1404 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1409 (define_expand "reduc_smax_<mode>"
1410 [(match_operand:VD 0 "s_register_operand" "")
1411 (match_operand:VD 1 "s_register_operand" "")]
1412 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1414 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1415 &gen_neon_vpsmax<mode>);
1419 (define_expand "reduc_smax_<mode>"
1420 [(match_operand:VQ 0 "s_register_operand" "")
1421 (match_operand:VQ 1 "s_register_operand" "")]
1422 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1423 && !BYTES_BIG_ENDIAN"
1425 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1426 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1428 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1429 emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
1430 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1435 (define_expand "reduc_umin_<mode>"
1436 [(match_operand:VDI 0 "s_register_operand" "")
1437 (match_operand:VDI 1 "s_register_operand" "")]
1440 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1441 &gen_neon_vpumin<mode>);
1445 (define_expand "reduc_umin_<mode>"
1446 [(match_operand:VQI 0 "s_register_operand" "")
1447 (match_operand:VQI 1 "s_register_operand" "")]
1448 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1450 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1451 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1453 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1454 emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
1455 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1460 (define_expand "reduc_umax_<mode>"
1461 [(match_operand:VDI 0 "s_register_operand" "")
1462 (match_operand:VDI 1 "s_register_operand" "")]
1465 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1466 &gen_neon_vpumax<mode>);
1470 (define_expand "reduc_umax_<mode>"
1471 [(match_operand:VQI 0 "s_register_operand" "")
1472 (match_operand:VQI 1 "s_register_operand" "")]
1473 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1475 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1476 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1478 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1479 emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
1480 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1485 (define_insn "neon_vpadd_internal<mode>"
1486 [(set (match_operand:VD 0 "s_register_operand" "=w")
1487 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1488 (match_operand:VD 2 "s_register_operand" "w")]
1491 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1492 ;; Assume this schedules like vadd.
1494 (if_then_else (match_test "<Is_float_mode>")
1495 (const_string "neon_fp_reduc_add_s<q>")
1496 (const_string "neon_reduc_add<q>")))]
1499 (define_insn "neon_vpsmin<mode>"
1500 [(set (match_operand:VD 0 "s_register_operand" "=w")
1501 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1502 (match_operand:VD 2 "s_register_operand" "w")]
1505 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1507 (if_then_else (match_test "<Is_float_mode>")
1508 (const_string "neon_fp_reduc_minmax_s<q>")
1509 (const_string "neon_reduc_minmax<q>")))]
1512 (define_insn "neon_vpsmax<mode>"
1513 [(set (match_operand:VD 0 "s_register_operand" "=w")
1514 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1515 (match_operand:VD 2 "s_register_operand" "w")]
1518 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1520 (if_then_else (match_test "<Is_float_mode>")
1521 (const_string "neon_fp_reduc_minmax_s<q>")
1522 (const_string "neon_reduc_minmax<q>")))]
1525 (define_insn "neon_vpumin<mode>"
1526 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1527 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1528 (match_operand:VDI 2 "s_register_operand" "w")]
1531 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1532 [(set_attr "type" "neon_reduc_minmax<q>")]
1535 (define_insn "neon_vpumax<mode>"
1536 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1537 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1538 (match_operand:VDI 2 "s_register_operand" "w")]
1541 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1542 [(set_attr "type" "neon_reduc_minmax<q>")]
1545 ;; Saturating arithmetic
1547 ; NOTE: Neon supports many more saturating variants of instructions than the
1548 ; following, but these are all GCC currently understands.
1549 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1550 ; yet either, although these patterns may be used by intrinsics when they're
1553 (define_insn "*ss_add<mode>_neon"
1554 [(set (match_operand:VD 0 "s_register_operand" "=w")
1555 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1556 (match_operand:VD 2 "s_register_operand" "w")))]
1558 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1559 [(set_attr "type" "neon_qadd<q>")]
1562 (define_insn "*us_add<mode>_neon"
1563 [(set (match_operand:VD 0 "s_register_operand" "=w")
1564 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1565 (match_operand:VD 2 "s_register_operand" "w")))]
1567 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1568 [(set_attr "type" "neon_qadd<q>")]
1571 (define_insn "*ss_sub<mode>_neon"
1572 [(set (match_operand:VD 0 "s_register_operand" "=w")
1573 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1574 (match_operand:VD 2 "s_register_operand" "w")))]
1576 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1577 [(set_attr "type" "neon_qsub<q>")]
1580 (define_insn "*us_sub<mode>_neon"
1581 [(set (match_operand:VD 0 "s_register_operand" "=w")
1582 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1583 (match_operand:VD 2 "s_register_operand" "w")))]
1585 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1586 [(set_attr "type" "neon_qsub<q>")]
1589 ;; Conditional instructions. These are comparisons with conditional moves for
1590 ;; vectors. They perform the assignment:
1592 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1594 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1597 (define_expand "vcond<mode><mode>"
1598 [(set (match_operand:VDQW 0 "s_register_operand" "")
1600 (match_operator 3 "comparison_operator"
1601 [(match_operand:VDQW 4 "s_register_operand" "")
1602 (match_operand:VDQW 5 "nonmemory_operand" "")])
1603 (match_operand:VDQW 1 "s_register_operand" "")
1604 (match_operand:VDQW 2 "s_register_operand" "")))]
1605 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1607 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1609 rtx magic_rtx = GEN_INT (magic_word);
1611 int use_zero_form = 0;
1612 int swap_bsl_operands = 0;
1613 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1614 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1616 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1617 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1619 switch (GET_CODE (operands[3]))
1626 if (operands[5] == CONST0_RTX (<MODE>mode))
1633 if (!REG_P (operands[5]))
1634 operands[5] = force_reg (<MODE>mode, operands[5]);
1637 switch (GET_CODE (operands[3]))
1647 base_comparison = gen_neon_vcge<mode>;
1648 complimentary_comparison = gen_neon_vcgt<mode>;
1656 base_comparison = gen_neon_vcgt<mode>;
1657 complimentary_comparison = gen_neon_vcge<mode>;
1662 base_comparison = gen_neon_vceq<mode>;
1663 complimentary_comparison = gen_neon_vceq<mode>;
1669 switch (GET_CODE (operands[3]))
1676 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1677 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1683 Note that there also exist direct comparison against 0 forms,
1684 so catch those as a special case. */
1688 switch (GET_CODE (operands[3]))
1691 base_comparison = gen_neon_vclt<mode>;
1694 base_comparison = gen_neon_vcle<mode>;
1697 /* Do nothing, other zero form cases already have the correct
1704 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1706 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1713 /* Vector compare returns false for lanes which are unordered, so if we use
1714 the inverse of the comparison we actually want to emit, then
1715 swap the operands to BSL, we will end up with the correct result.
1716 Note that a NE NaN and NaN NE b are true for all a, b.
1718 Our transformations are:
1723 a NE b -> !(a EQ b) */
1726 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1728 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1730 swap_bsl_operands = 1;
1733 /* We check (a > b || b > a). combining these comparisons give us
1734 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1735 will then give us (a == b || a UNORDERED b) as intended. */
1737 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1738 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1739 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1740 swap_bsl_operands = 1;
1743 /* Operands are ORDERED iff (a > b || b >= a).
1744 Swapping the operands to BSL will give the UNORDERED case. */
1745 swap_bsl_operands = 1;
1748 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1749 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1750 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1756 if (swap_bsl_operands)
1757 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1760 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1765 (define_expand "vcondu<mode><mode>"
1766 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1768 (match_operator 3 "arm_comparison_operator"
1769 [(match_operand:VDQIW 4 "s_register_operand" "")
1770 (match_operand:VDQIW 5 "s_register_operand" "")])
1771 (match_operand:VDQIW 1 "s_register_operand" "")
1772 (match_operand:VDQIW 2 "s_register_operand" "")))]
1776 int inverse = 0, immediate_zero = 0;
1778 mask = gen_reg_rtx (<V_cmp_result>mode);
1780 if (operands[5] == CONST0_RTX (<MODE>mode))
1782 else if (!REG_P (operands[5]))
1783 operands[5] = force_reg (<MODE>mode, operands[5]);
1785 switch (GET_CODE (operands[3]))
1788 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1793 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1798 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1804 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1807 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1813 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1816 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1821 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1831 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1834 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1840 ;; Patterns for builtins.
1842 ; good for plain vadd, vaddq.
1844 (define_expand "neon_vadd<mode>"
1845 [(match_operand:VDQX 0 "s_register_operand" "=w")
1846 (match_operand:VDQX 1 "s_register_operand" "w")
1847 (match_operand:VDQX 2 "s_register_operand" "w")
1848 (match_operand:SI 3 "immediate_operand" "i")]
1851 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1852 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1854 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1859 ; Note that NEON operations don't support the full IEEE 754 standard: in
1860 ; particular, denormal values are flushed to zero. This means that GCC cannot
1861 ; use those instructions for autovectorization, etc. unless
1862 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1863 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1864 ; header) must work in either case: if -funsafe-math-optimizations is given,
1865 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1866 ; expand to unspecs (which may potentially limit the extent to which they might
1867 ; be optimized by generic code).
1869 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1871 (define_insn "neon_vadd<mode>_unspec"
1872 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
1873 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
1874 (match_operand:VDQX 2 "s_register_operand" "w")]
1877 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1879 (if_then_else (match_test "<Is_float_mode>")
1880 (const_string "neon_fp_addsub_s<q>")
1881 (const_string "neon_add<q>")))]
1884 ; operand 3 represents in bits:
1885 ; bit 0: signed (vs unsigned).
1886 ; bit 1: rounding (vs none).
1888 (define_insn "neon_vaddl<mode>"
1889 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1890 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1891 (match_operand:VDI 2 "s_register_operand" "w")
1892 (match_operand:SI 3 "immediate_operand" "i")]
1895 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1896 [(set_attr "type" "neon_add_long")]
1899 (define_insn "neon_vaddw<mode>"
1900 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1901 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1902 (match_operand:VDI 2 "s_register_operand" "w")
1903 (match_operand:SI 3 "immediate_operand" "i")]
1906 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1907 [(set_attr "type" "neon_add_widen")]
1912 (define_insn "neon_vhadd<mode>"
1913 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1914 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1915 (match_operand:VDQIW 2 "s_register_operand" "w")
1916 (match_operand:SI 3 "immediate_operand" "i")]
1919 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1920 [(set_attr "type" "neon_add_halve_q")]
1923 (define_insn "neon_vqadd<mode>"
1924 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1925 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1926 (match_operand:VDQIX 2 "s_register_operand" "w")
1927 (match_operand:SI 3 "immediate_operand" "i")]
1930 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1931 [(set_attr "type" "neon_qadd<q>")]
1934 (define_insn "neon_vaddhn<mode>"
1935 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1936 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1937 (match_operand:VN 2 "s_register_operand" "w")
1938 (match_operand:SI 3 "immediate_operand" "i")]
1941 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1942 [(set_attr "type" "neon_add_halve_narrow_q")]
1945 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1946 ;; polynomial multiplication case that can specified by operand 3.
1947 (define_insn "neon_vmul<mode>"
1948 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1949 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1950 (match_operand:VDQW 2 "s_register_operand" "w")
1951 (match_operand:SI 3 "immediate_operand" "i")]
1954 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1956 (if_then_else (match_test "<Is_float_mode>")
1957 (const_string "neon_fp_mul_s<q>")
1958 (const_string "neon_mul_<V_elem_ch><q>")))]
1961 (define_expand "neon_vmla<mode>"
1962 [(match_operand:VDQW 0 "s_register_operand" "=w")
1963 (match_operand:VDQW 1 "s_register_operand" "0")
1964 (match_operand:VDQW 2 "s_register_operand" "w")
1965 (match_operand:VDQW 3 "s_register_operand" "w")
1966 (match_operand:SI 4 "immediate_operand" "i")]
1969 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1970 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1971 operands[2], operands[3]));
1973 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1974 operands[2], operands[3]));
1978 (define_expand "neon_vfma<VCVTF:mode>"
1979 [(match_operand:VCVTF 0 "s_register_operand")
1980 (match_operand:VCVTF 1 "s_register_operand")
1981 (match_operand:VCVTF 2 "s_register_operand")
1982 (match_operand:VCVTF 3 "s_register_operand")
1983 (match_operand:SI 4 "immediate_operand")]
1984 "TARGET_NEON && TARGET_FMA"
1986 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1991 (define_expand "neon_vfms<VCVTF:mode>"
1992 [(match_operand:VCVTF 0 "s_register_operand")
1993 (match_operand:VCVTF 1 "s_register_operand")
1994 (match_operand:VCVTF 2 "s_register_operand")
1995 (match_operand:VCVTF 3 "s_register_operand")
1996 (match_operand:SI 4 "immediate_operand")]
1997 "TARGET_NEON && TARGET_FMA"
1999 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2004 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2006 (define_insn "neon_vmla<mode>_unspec"
2007 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2008 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2009 (match_operand:VDQW 2 "s_register_operand" "w")
2010 (match_operand:VDQW 3 "s_register_operand" "w")]
2013 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2015 (if_then_else (match_test "<Is_float_mode>")
2016 (const_string "neon_fp_mla_s<q>")
2017 (const_string "neon_mla_<V_elem_ch><q>")))]
2020 (define_insn "neon_vmlal<mode>"
2021 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2022 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2023 (match_operand:VW 2 "s_register_operand" "w")
2024 (match_operand:VW 3 "s_register_operand" "w")
2025 (match_operand:SI 4 "immediate_operand" "i")]
2028 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2029 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2032 (define_expand "neon_vmls<mode>"
2033 [(match_operand:VDQW 0 "s_register_operand" "=w")
2034 (match_operand:VDQW 1 "s_register_operand" "0")
2035 (match_operand:VDQW 2 "s_register_operand" "w")
2036 (match_operand:VDQW 3 "s_register_operand" "w")
2037 (match_operand:SI 4 "immediate_operand" "i")]
2040 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2041 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2042 operands[1], operands[2], operands[3]));
2044 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2045 operands[2], operands[3]));
2049 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2051 (define_insn "neon_vmls<mode>_unspec"
2052 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2053 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2054 (match_operand:VDQW 2 "s_register_operand" "w")
2055 (match_operand:VDQW 3 "s_register_operand" "w")]
2058 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2060 (if_then_else (match_test "<Is_float_mode>")
2061 (const_string "neon_fp_mla_s<q>")
2062 (const_string "neon_mla_<V_elem_ch><q>")))]
2065 (define_insn "neon_vmlsl<mode>"
2066 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2067 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2068 (match_operand:VW 2 "s_register_operand" "w")
2069 (match_operand:VW 3 "s_register_operand" "w")
2070 (match_operand:SI 4 "immediate_operand" "i")]
2073 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2074 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2077 (define_insn "neon_vqdmulh<mode>"
2078 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2079 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2080 (match_operand:VMDQI 2 "s_register_operand" "w")
2081 (match_operand:SI 3 "immediate_operand" "i")]
2084 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2085 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2088 (define_insn "neon_vqdmlal<mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2091 (match_operand:VMDI 2 "s_register_operand" "w")
2092 (match_operand:VMDI 3 "s_register_operand" "w")
2093 (match_operand:SI 4 "immediate_operand" "i")]
2096 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2097 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2100 (define_insn "neon_vqdmlsl<mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2103 (match_operand:VMDI 2 "s_register_operand" "w")
2104 (match_operand:VMDI 3 "s_register_operand" "w")
2105 (match_operand:SI 4 "immediate_operand" "i")]
2108 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2109 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2112 (define_insn "neon_vmull<mode>"
2113 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2114 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2115 (match_operand:VW 2 "s_register_operand" "w")
2116 (match_operand:SI 3 "immediate_operand" "i")]
2119 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2120 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2123 (define_insn "neon_vqdmull<mode>"
2124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2125 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2126 (match_operand:VMDI 2 "s_register_operand" "w")
2127 (match_operand:SI 3 "immediate_operand" "i")]
2130 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2131 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2134 (define_expand "neon_vsub<mode>"
2135 [(match_operand:VDQX 0 "s_register_operand" "=w")
2136 (match_operand:VDQX 1 "s_register_operand" "w")
2137 (match_operand:VDQX 2 "s_register_operand" "w")
2138 (match_operand:SI 3 "immediate_operand" "i")]
2141 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2142 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2144 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2149 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2151 (define_insn "neon_vsub<mode>_unspec"
2152 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
2153 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
2154 (match_operand:VDQX 2 "s_register_operand" "w")]
2157 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2159 (if_then_else (match_test "<Is_float_mode>")
2160 (const_string "neon_fp_addsub_s<q>")
2161 (const_string "neon_sub<q>")))]
2164 (define_insn "neon_vsubl<mode>"
2165 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2166 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2167 (match_operand:VDI 2 "s_register_operand" "w")
2168 (match_operand:SI 3 "immediate_operand" "i")]
2171 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2172 [(set_attr "type" "neon_sub_long")]
2175 (define_insn "neon_vsubw<mode>"
2176 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2177 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2178 (match_operand:VDI 2 "s_register_operand" "w")
2179 (match_operand:SI 3 "immediate_operand" "i")]
2182 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2183 [(set_attr "type" "neon_sub_widen")]
2186 (define_insn "neon_vqsub<mode>"
2187 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2188 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2189 (match_operand:VDQIX 2 "s_register_operand" "w")
2190 (match_operand:SI 3 "immediate_operand" "i")]
2193 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2194 [(set_attr "type" "neon_qsub<q>")]
2197 (define_insn "neon_vhsub<mode>"
2198 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2199 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2200 (match_operand:VDQIW 2 "s_register_operand" "w")
2201 (match_operand:SI 3 "immediate_operand" "i")]
2204 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2205 [(set_attr "type" "neon_sub_halve<q>")]
2208 (define_insn "neon_vsubhn<mode>"
2209 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2210 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2211 (match_operand:VN 2 "s_register_operand" "w")
2212 (match_operand:SI 3 "immediate_operand" "i")]
2215 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2216 [(set_attr "type" "neon_sub_halve_narrow_q")]
2219 (define_insn "neon_vceq<mode>"
2220 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2221 (unspec:<V_cmp_result>
2222 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2223 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2224 (match_operand:SI 3 "immediate_operand" "i,i")]
2228 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2229 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2231 (if_then_else (match_test "<Is_float_mode>")
2232 (const_string "neon_fp_compare_s<q>")
2233 (if_then_else (match_operand 2 "zero_operand")
2234 (const_string "neon_compare_zero<q>")
2235 (const_string "neon_compare<q>"))))]
2238 (define_insn "neon_vcge<mode>"
2239 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2240 (unspec:<V_cmp_result>
2241 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2242 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2243 (match_operand:SI 3 "immediate_operand" "i,i")]
2247 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2248 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2250 (if_then_else (match_test "<Is_float_mode>")
2251 (const_string "neon_fp_compare_s<q>")
2252 (if_then_else (match_operand 2 "zero_operand")
2253 (const_string "neon_compare_zero<q>")
2254 (const_string "neon_compare<q>"))))]
2257 (define_insn "neon_vcgeu<mode>"
2258 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2259 (unspec:<V_cmp_result>
2260 [(match_operand:VDQIW 1 "s_register_operand" "w")
2261 (match_operand:VDQIW 2 "s_register_operand" "w")
2262 (match_operand:SI 3 "immediate_operand" "i")]
2265 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2266 [(set_attr "type" "neon_compare<q>")]
2269 (define_insn "neon_vcgt<mode>"
2270 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2271 (unspec:<V_cmp_result>
2272 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2273 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2274 (match_operand:SI 3 "immediate_operand" "i,i")]
2278 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2279 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2281 (if_then_else (match_test "<Is_float_mode>")
2282 (const_string "neon_fp_compare_s<q>")
2283 (if_then_else (match_operand 2 "zero_operand")
2284 (const_string "neon_compare_zero<q>")
2285 (const_string "neon_compare<q>"))))]
2288 (define_insn "neon_vcgtu<mode>"
2289 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2290 (unspec:<V_cmp_result>
2291 [(match_operand:VDQIW 1 "s_register_operand" "w")
2292 (match_operand:VDQIW 2 "s_register_operand" "w")
2293 (match_operand:SI 3 "immediate_operand" "i")]
2296 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2297 [(set_attr "type" "neon_compare<q>")]
2300 ;; VCLE and VCLT only support comparisons with immediate zero (register
2301 ;; variants are VCGE and VCGT with operands reversed).
2303 (define_insn "neon_vcle<mode>"
2304 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2305 (unspec:<V_cmp_result>
2306 [(match_operand:VDQW 1 "s_register_operand" "w")
2307 (match_operand:VDQW 2 "zero_operand" "Dz")
2308 (match_operand:SI 3 "immediate_operand" "i")]
2311 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2313 (if_then_else (match_test "<Is_float_mode>")
2314 (const_string "neon_fp_compare_s<q>")
2315 (if_then_else (match_operand 2 "zero_operand")
2316 (const_string "neon_compare_zero<q>")
2317 (const_string "neon_compare<q>"))))]
2320 (define_insn "neon_vclt<mode>"
2321 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2322 (unspec:<V_cmp_result>
2323 [(match_operand:VDQW 1 "s_register_operand" "w")
2324 (match_operand:VDQW 2 "zero_operand" "Dz")
2325 (match_operand:SI 3 "immediate_operand" "i")]
2328 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2330 (if_then_else (match_test "<Is_float_mode>")
2331 (const_string "neon_fp_compare_s<q>")
2332 (if_then_else (match_operand 2 "zero_operand")
2333 (const_string "neon_compare_zero<q>")
2334 (const_string "neon_compare<q>"))))]
2337 (define_insn "neon_vcage<mode>"
2338 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2339 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2340 (match_operand:VCVTF 2 "s_register_operand" "w")
2341 (match_operand:SI 3 "immediate_operand" "i")]
2344 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2345 [(set_attr "type" "neon_fp_compare_s<q>")]
2348 (define_insn "neon_vcagt<mode>"
2349 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2350 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2351 (match_operand:VCVTF 2 "s_register_operand" "w")
2352 (match_operand:SI 3 "immediate_operand" "i")]
2355 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_fp_compare_s<q>")]
2359 (define_insn "neon_vtst<mode>"
2360 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2361 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2362 (match_operand:VDQIW 2 "s_register_operand" "w")
2363 (match_operand:SI 3 "immediate_operand" "i")]
2366 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2367 [(set_attr "type" "neon_tst<q>")]
2370 (define_insn "neon_vabd<mode>"
2371 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2372 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2373 (match_operand:VDQW 2 "s_register_operand" "w")
2374 (match_operand:SI 3 "immediate_operand" "i")]
2377 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 (if_then_else (match_test "<Is_float_mode>")
2380 (const_string "neon_fp_abd_s<q>")
2381 (const_string "neon_abd<q>")))]
2384 (define_insn "neon_vabdl<mode>"
2385 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2386 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2387 (match_operand:VW 2 "s_register_operand" "w")
2388 (match_operand:SI 3 "immediate_operand" "i")]
2391 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2392 [(set_attr "type" "neon_abd_long")]
2395 (define_insn "neon_vaba<mode>"
2396 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2397 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2398 (match_operand:VDQIW 3 "s_register_operand" "w")
2399 (match_operand:SI 4 "immediate_operand" "i")]
2401 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2403 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2404 [(set_attr "type" "neon_arith_acc<q>")]
2407 (define_insn "neon_vabal<mode>"
2408 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2409 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2410 (match_operand:VW 3 "s_register_operand" "w")
2411 (match_operand:SI 4 "immediate_operand" "i")]
2413 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2415 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2416 [(set_attr "type" "neon_arith_acc<q>")]
2419 (define_insn "neon_vmax<mode>"
2420 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2421 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2422 (match_operand:VDQW 2 "s_register_operand" "w")
2423 (match_operand:SI 3 "immediate_operand" "i")]
2426 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2428 (if_then_else (match_test "<Is_float_mode>")
2429 (const_string "neon_fp_minmax_s<q>")
2430 (const_string "neon_minmax<q>")))]
2433 (define_insn "neon_vmin<mode>"
2434 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2435 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2436 (match_operand:VDQW 2 "s_register_operand" "w")
2437 (match_operand:SI 3 "immediate_operand" "i")]
2440 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2442 (if_then_else (match_test "<Is_float_mode>")
2443 (const_string "neon_fp_minmax_s<q>")
2444 (const_string "neon_minmax<q>")))]
2447 (define_expand "neon_vpadd<mode>"
2448 [(match_operand:VD 0 "s_register_operand" "=w")
2449 (match_operand:VD 1 "s_register_operand" "w")
2450 (match_operand:VD 2 "s_register_operand" "w")
2451 (match_operand:SI 3 "immediate_operand" "i")]
2454 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2459 (define_insn "neon_vpaddl<mode>"
2460 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2461 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2462 (match_operand:SI 2 "immediate_operand" "i")]
2465 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2466 [(set_attr "type" "neon_reduc_add_long")]
2469 (define_insn "neon_vpadal<mode>"
2470 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2471 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2472 (match_operand:VDQIW 2 "s_register_operand" "w")
2473 (match_operand:SI 3 "immediate_operand" "i")]
2476 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2477 [(set_attr "type" "neon_reduc_add_acc")]
2480 (define_insn "neon_vpmax<mode>"
2481 [(set (match_operand:VD 0 "s_register_operand" "=w")
2482 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2483 (match_operand:VD 2 "s_register_operand" "w")
2484 (match_operand:SI 3 "immediate_operand" "i")]
2487 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2489 (if_then_else (match_test "<Is_float_mode>")
2490 (const_string "neon_fp_reduc_minmax_s<q>")
2491 (const_string "neon_reduc_minmax<q>")))]
2494 (define_insn "neon_vpmin<mode>"
2495 [(set (match_operand:VD 0 "s_register_operand" "=w")
2496 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2497 (match_operand:VD 2 "s_register_operand" "w")
2498 (match_operand:SI 3 "immediate_operand" "i")]
2501 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2503 (if_then_else (match_test "<Is_float_mode>")
2504 (const_string "neon_fp_reduc_minmax_s<q>")
2505 (const_string "neon_reduc_minmax<q>")))]
2508 (define_insn "neon_vrecps<mode>"
2509 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2510 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2511 (match_operand:VCVTF 2 "s_register_operand" "w")
2512 (match_operand:SI 3 "immediate_operand" "i")]
2515 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2516 [(set_attr "type" "neon_fp_recps_s<q>")]
2519 (define_insn "neon_vrsqrts<mode>"
2520 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2521 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2522 (match_operand:VCVTF 2 "s_register_operand" "w")
2523 (match_operand:SI 3 "immediate_operand" "i")]
2526 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2527 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2530 (define_expand "neon_vabs<mode>"
2531 [(match_operand:VDQW 0 "s_register_operand" "")
2532 (match_operand:VDQW 1 "s_register_operand" "")
2533 (match_operand:SI 2 "immediate_operand" "")]
2536 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2540 (define_insn "neon_vqabs<mode>"
2541 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2542 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2543 (match_operand:SI 2 "immediate_operand" "i")]
2546 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_qabs<q>")]
2550 (define_expand "neon_vneg<mode>"
2551 [(match_operand:VDQW 0 "s_register_operand" "")
2552 (match_operand:VDQW 1 "s_register_operand" "")
2553 (match_operand:SI 2 "immediate_operand" "")]
2556 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2560 (define_insn "neon_vqneg<mode>"
2561 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2562 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2563 (match_operand:SI 2 "immediate_operand" "i")]
2566 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2567 [(set_attr "type" "neon_qneg<q>")]
2570 (define_insn "neon_vcls<mode>"
2571 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2572 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2573 (match_operand:SI 2 "immediate_operand" "i")]
2576 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2577 [(set_attr "type" "neon_cls<q>")]
2580 (define_insn "clz<mode>2"
2581 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2582 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2584 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2585 [(set_attr "type" "neon_cnt<q>")]
2588 (define_expand "neon_vclz<mode>"
2589 [(match_operand:VDQIW 0 "s_register_operand" "")
2590 (match_operand:VDQIW 1 "s_register_operand" "")
2591 (match_operand:SI 2 "immediate_operand" "")]
2594 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2598 (define_insn "popcount<mode>2"
2599 [(set (match_operand:VE 0 "s_register_operand" "=w")
2600 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2602 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2603 [(set_attr "type" "neon_cnt<q>")]
2606 (define_expand "neon_vcnt<mode>"
2607 [(match_operand:VE 0 "s_register_operand" "=w")
2608 (match_operand:VE 1 "s_register_operand" "w")
2609 (match_operand:SI 2 "immediate_operand" "i")]
2612 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2616 (define_insn "neon_vrecpe<mode>"
2617 [(set (match_operand:V32 0 "s_register_operand" "=w")
2618 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2619 (match_operand:SI 2 "immediate_operand" "i")]
2622 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2623 [(set_attr "type" "neon_fp_recpe_s<q>")]
2626 (define_insn "neon_vrsqrte<mode>"
2627 [(set (match_operand:V32 0 "s_register_operand" "=w")
2628 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2629 (match_operand:SI 2 "immediate_operand" "i")]
2632 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2633 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2636 (define_expand "neon_vmvn<mode>"
2637 [(match_operand:VDQIW 0 "s_register_operand" "")
2638 (match_operand:VDQIW 1 "s_register_operand" "")
2639 (match_operand:SI 2 "immediate_operand" "")]
2642 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2646 (define_insn "neon_vget_lane<mode>_sext_internal"
2647 [(set (match_operand:SI 0 "s_register_operand" "=r")
2649 (vec_select:<V_elem>
2650 (match_operand:VD 1 "s_register_operand" "w")
2651 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2654 if (BYTES_BIG_ENDIAN)
2656 int elt = INTVAL (operands[2]);
2657 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2658 operands[2] = GEN_INT (elt);
2660 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2662 [(set_attr "type" "neon_to_gp")]
2665 (define_insn "neon_vget_lane<mode>_zext_internal"
2666 [(set (match_operand:SI 0 "s_register_operand" "=r")
2668 (vec_select:<V_elem>
2669 (match_operand:VD 1 "s_register_operand" "w")
2670 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2673 if (BYTES_BIG_ENDIAN)
2675 int elt = INTVAL (operands[2]);
2676 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2677 operands[2] = GEN_INT (elt);
2679 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2681 [(set_attr "type" "neon_to_gp")]
2684 (define_insn "neon_vget_lane<mode>_sext_internal"
2685 [(set (match_operand:SI 0 "s_register_operand" "=r")
2687 (vec_select:<V_elem>
2688 (match_operand:VQ 1 "s_register_operand" "w")
2689 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2693 int regno = REGNO (operands[1]);
2694 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2695 unsigned int elt = INTVAL (operands[2]);
2696 unsigned int elt_adj = elt % halfelts;
2698 if (BYTES_BIG_ENDIAN)
2699 elt_adj = halfelts - 1 - elt_adj;
2701 ops[0] = operands[0];
2702 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2703 ops[2] = GEN_INT (elt_adj);
2704 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2708 [(set_attr "type" "neon_to_gp_q")]
2711 (define_insn "neon_vget_lane<mode>_zext_internal"
2712 [(set (match_operand:SI 0 "s_register_operand" "=r")
2714 (vec_select:<V_elem>
2715 (match_operand:VQ 1 "s_register_operand" "w")
2716 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2720 int regno = REGNO (operands[1]);
2721 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2722 unsigned int elt = INTVAL (operands[2]);
2723 unsigned int elt_adj = elt % halfelts;
2725 if (BYTES_BIG_ENDIAN)
2726 elt_adj = halfelts - 1 - elt_adj;
2728 ops[0] = operands[0];
2729 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2730 ops[2] = GEN_INT (elt_adj);
2731 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2735 [(set_attr "type" "neon_to_gp_q")]
2738 (define_expand "neon_vget_lane<mode>"
2739 [(match_operand:<V_ext> 0 "s_register_operand" "")
2740 (match_operand:VDQW 1 "s_register_operand" "")
2741 (match_operand:SI 2 "immediate_operand" "")
2742 (match_operand:SI 3 "immediate_operand" "")]
2745 HOST_WIDE_INT magic = INTVAL (operands[3]);
2748 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2750 if (BYTES_BIG_ENDIAN)
2752 /* The intrinsics are defined in terms of a model where the
2753 element ordering in memory is vldm order, whereas the generic
2754 RTL is defined in terms of a model where the element ordering
2755 in memory is array order. Convert the lane number to conform
2757 unsigned int elt = INTVAL (operands[2]);
2758 unsigned int reg_nelts
2759 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2760 elt ^= reg_nelts - 1;
2761 operands[2] = GEN_INT (elt);
2764 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2765 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2768 if ((magic & 1) != 0)
2769 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2772 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2779 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2782 (define_expand "neon_vget_lanedi"
2783 [(match_operand:DI 0 "s_register_operand" "=r")
2784 (match_operand:DI 1 "s_register_operand" "w")
2785 (match_operand:SI 2 "immediate_operand" "i")
2786 (match_operand:SI 3 "immediate_operand" "i")]
2789 neon_lane_bounds (operands[2], 0, 1);
2790 emit_move_insn (operands[0], operands[1]);
2794 (define_expand "neon_vget_lanev2di"
2795 [(match_operand:DI 0 "s_register_operand" "")
2796 (match_operand:V2DI 1 "s_register_operand" "")
2797 (match_operand:SI 2 "immediate_operand" "")
2798 (match_operand:SI 3 "immediate_operand" "")]
2801 switch (INTVAL (operands[2]))
2804 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2807 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2810 neon_lane_bounds (operands[2], 0, 1);
2816 (define_expand "neon_vset_lane<mode>"
2817 [(match_operand:VDQ 0 "s_register_operand" "=w")
2818 (match_operand:<V_elem> 1 "s_register_operand" "r")
2819 (match_operand:VDQ 2 "s_register_operand" "0")
2820 (match_operand:SI 3 "immediate_operand" "i")]
2823 unsigned int elt = INTVAL (operands[3]);
2824 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2826 if (BYTES_BIG_ENDIAN)
2828 unsigned int reg_nelts
2829 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2830 elt ^= reg_nelts - 1;
2833 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2834 GEN_INT (1 << elt), operands[2]));
2838 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2840 (define_expand "neon_vset_lanedi"
2841 [(match_operand:DI 0 "s_register_operand" "=w")
2842 (match_operand:DI 1 "s_register_operand" "r")
2843 (match_operand:DI 2 "s_register_operand" "0")
2844 (match_operand:SI 3 "immediate_operand" "i")]
2847 neon_lane_bounds (operands[3], 0, 1);
2848 emit_move_insn (operands[0], operands[1]);
2852 (define_expand "neon_vcreate<mode>"
2853 [(match_operand:VDX 0 "s_register_operand" "")
2854 (match_operand:DI 1 "general_operand" "")]
2857 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2858 emit_move_insn (operands[0], src);
2862 (define_insn "neon_vdup_n<mode>"
2863 [(set (match_operand:VX 0 "s_register_operand" "=w")
2864 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2866 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2867 [(set_attr "type" "neon_from_gp<q>")]
2870 (define_insn "neon_vdup_n<mode>"
2871 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2872 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2875 vdup.<V_sz_elem>\t%<V_reg>0, %1
2876 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2877 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2880 (define_expand "neon_vdup_ndi"
2881 [(match_operand:DI 0 "s_register_operand" "=w")
2882 (match_operand:DI 1 "s_register_operand" "r")]
2885 emit_move_insn (operands[0], operands[1]);
2890 (define_insn "neon_vdup_nv2di"
2891 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2892 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2895 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2896 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2897 [(set_attr "length" "8")
2898 (set_attr "type" "multiple")]
2901 (define_insn "neon_vdup_lane<mode>_internal"
2902 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2904 (vec_select:<V_elem>
2905 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2906 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2909 if (BYTES_BIG_ENDIAN)
2911 int elt = INTVAL (operands[2]);
2912 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2913 operands[2] = GEN_INT (elt);
2916 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2918 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2920 [(set_attr "type" "neon_dup<q>")]
2923 (define_expand "neon_vdup_lane<mode>"
2924 [(match_operand:VDQW 0 "s_register_operand" "=w")
2925 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2926 (match_operand:SI 2 "immediate_operand" "i")]
2929 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2930 if (BYTES_BIG_ENDIAN)
2932 unsigned int elt = INTVAL (operands[2]);
2933 unsigned int reg_nelts
2934 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2935 elt ^= reg_nelts - 1;
2936 operands[2] = GEN_INT (elt);
2938 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2943 ; Scalar index is ignored, since only zero is valid here.
2944 (define_expand "neon_vdup_lanedi"
2945 [(match_operand:DI 0 "s_register_operand" "=w")
2946 (match_operand:DI 1 "s_register_operand" "w")
2947 (match_operand:SI 2 "immediate_operand" "i")]
2950 neon_lane_bounds (operands[2], 0, 1);
2951 emit_move_insn (operands[0], operands[1]);
2955 ; Likewise for v2di, as the DImode second operand has only a single element.
2956 (define_expand "neon_vdup_lanev2di"
2957 [(match_operand:V2DI 0 "s_register_operand" "=w")
2958 (match_operand:DI 1 "s_register_operand" "w")
2959 (match_operand:SI 2 "immediate_operand" "i")]
2962 neon_lane_bounds (operands[2], 0, 1);
2963 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2967 ; Disabled before reload because we don't want combine doing something silly,
2968 ; but used by the post-reload expansion of neon_vcombine.
2969 (define_insn "*neon_vswp<mode>"
2970 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2971 (match_operand:VDQX 1 "s_register_operand" "+w"))
2972 (set (match_dup 1) (match_dup 0))]
2973 "TARGET_NEON && reload_completed"
2974 "vswp\t%<V_reg>0, %<V_reg>1"
2975 [(set_attr "type" "neon_permute<q>")]
2978 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2980 ;; FIXME: A different implementation of this builtin could make it much
2981 ;; more likely that we wouldn't actually need to output anything (we could make
2982 ;; it so that the reg allocator puts things in the right places magically
2983 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2985 (define_insn_and_split "neon_vcombine<mode>"
2986 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2987 (vec_concat:<V_DOUBLE>
2988 (match_operand:VDX 1 "s_register_operand" "w")
2989 (match_operand:VDX 2 "s_register_operand" "w")))]
2992 "&& reload_completed"
2995 neon_split_vcombine (operands);
2998 [(set_attr "type" "multiple")]
3001 (define_expand "neon_vget_high<mode>"
3002 [(match_operand:<V_HALF> 0 "s_register_operand")
3003 (match_operand:VQX 1 "s_register_operand")]
3006 emit_move_insn (operands[0],
3007 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3008 GET_MODE_SIZE (<V_HALF>mode)));
3012 (define_expand "neon_vget_low<mode>"
3013 [(match_operand:<V_HALF> 0 "s_register_operand")
3014 (match_operand:VQX 1 "s_register_operand")]
3017 emit_move_insn (operands[0],
3018 simplify_gen_subreg (<V_HALF>mode, operands[1],
3023 (define_insn "float<mode><V_cvtto>2"
3024 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3025 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3026 "TARGET_NEON && !flag_rounding_math"
3027 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3028 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3031 (define_insn "floatuns<mode><V_cvtto>2"
3032 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3033 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3034 "TARGET_NEON && !flag_rounding_math"
3035 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3036 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3039 (define_insn "fix_trunc<mode><V_cvtto>2"
3040 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3041 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3043 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3044 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3047 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3048 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3049 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3051 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3052 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3055 (define_insn "neon_vcvt<mode>"
3056 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3057 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3058 (match_operand:SI 2 "immediate_operand" "i")]
3061 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3062 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3065 (define_insn "neon_vcvt<mode>"
3066 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3067 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3068 (match_operand:SI 2 "immediate_operand" "i")]
3071 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3072 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3075 (define_insn "neon_vcvtv4sfv4hf"
3076 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3077 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3079 "TARGET_NEON && TARGET_FP16"
3080 "vcvt.f32.f16\t%q0, %P1"
3081 [(set_attr "type" "neon_fp_cvt_widen_h")]
3084 (define_insn "neon_vcvtv4hfv4sf"
3085 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3086 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3088 "TARGET_NEON && TARGET_FP16"
3089 "vcvt.f16.f32\t%P0, %q1"
3090 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3093 (define_insn "neon_vcvt_n<mode>"
3094 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3095 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3096 (match_operand:SI 2 "immediate_operand" "i")
3097 (match_operand:SI 3 "immediate_operand" "i")]
3101 neon_const_bounds (operands[2], 1, 33);
3102 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3104 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3107 (define_insn "neon_vcvt_n<mode>"
3108 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3109 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3110 (match_operand:SI 2 "immediate_operand" "i")
3111 (match_operand:SI 3 "immediate_operand" "i")]
3115 neon_const_bounds (operands[2], 1, 33);
3116 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3118 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3121 (define_insn "neon_vmovn<mode>"
3122 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3123 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3124 (match_operand:SI 2 "immediate_operand" "i")]
3127 "vmovn.<V_if_elem>\t%P0, %q1"
3128 [(set_attr "type" "neon_shift_imm_narrow_q")]
3131 (define_insn "neon_vqmovn<mode>"
3132 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3133 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3134 (match_operand:SI 2 "immediate_operand" "i")]
3137 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3138 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3141 (define_insn "neon_vqmovun<mode>"
3142 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3143 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3144 (match_operand:SI 2 "immediate_operand" "i")]
3147 "vqmovun.<V_s_elem>\t%P0, %q1"
3148 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3151 (define_insn "neon_vmovl<mode>"
3152 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3153 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3154 (match_operand:SI 2 "immediate_operand" "i")]
3157 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3158 [(set_attr "type" "neon_shift_imm_long")]
3161 (define_insn "neon_vmul_lane<mode>"
3162 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3163 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3164 (match_operand:VMD 2 "s_register_operand"
3165 "<scalar_mul_constraint>")
3166 (match_operand:SI 3 "immediate_operand" "i")
3167 (match_operand:SI 4 "immediate_operand" "i")]
3171 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3172 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3175 (if_then_else (match_test "<Is_float_mode>")
3176 (const_string "neon_fp_mul_s_scalar<q>")
3177 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3180 (define_insn "neon_vmul_lane<mode>"
3181 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3182 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3183 (match_operand:<V_HALF> 2 "s_register_operand"
3184 "<scalar_mul_constraint>")
3185 (match_operand:SI 3 "immediate_operand" "i")
3186 (match_operand:SI 4 "immediate_operand" "i")]
3190 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3191 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3194 (if_then_else (match_test "<Is_float_mode>")
3195 (const_string "neon_fp_mul_s_scalar<q>")
3196 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3199 (define_insn "neon_vmull_lane<mode>"
3200 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3201 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3202 (match_operand:VMDI 2 "s_register_operand"
3203 "<scalar_mul_constraint>")
3204 (match_operand:SI 3 "immediate_operand" "i")
3205 (match_operand:SI 4 "immediate_operand" "i")]
3206 UNSPEC_VMULL_LANE))]
3209 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3210 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3212 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3215 (define_insn "neon_vqdmull_lane<mode>"
3216 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3217 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3218 (match_operand:VMDI 2 "s_register_operand"
3219 "<scalar_mul_constraint>")
3220 (match_operand:SI 3 "immediate_operand" "i")
3221 (match_operand:SI 4 "immediate_operand" "i")]
3222 UNSPEC_VQDMULL_LANE))]
3225 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3226 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3228 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3231 (define_insn "neon_vqdmulh_lane<mode>"
3232 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3233 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3234 (match_operand:<V_HALF> 2 "s_register_operand"
3235 "<scalar_mul_constraint>")
3236 (match_operand:SI 3 "immediate_operand" "i")
3237 (match_operand:SI 4 "immediate_operand" "i")]
3238 UNSPEC_VQDMULH_LANE))]
3241 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3242 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3244 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3247 (define_insn "neon_vqdmulh_lane<mode>"
3248 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3249 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3250 (match_operand:VMDI 2 "s_register_operand"
3251 "<scalar_mul_constraint>")
3252 (match_operand:SI 3 "immediate_operand" "i")
3253 (match_operand:SI 4 "immediate_operand" "i")]
3254 UNSPEC_VQDMULH_LANE))]
3257 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3258 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3260 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3263 (define_insn "neon_vmla_lane<mode>"
3264 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3265 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3266 (match_operand:VMD 2 "s_register_operand" "w")
3267 (match_operand:VMD 3 "s_register_operand"
3268 "<scalar_mul_constraint>")
3269 (match_operand:SI 4 "immediate_operand" "i")
3270 (match_operand:SI 5 "immediate_operand" "i")]
3274 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3275 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3278 (if_then_else (match_test "<Is_float_mode>")
3279 (const_string "neon_fp_mla_s_scalar<q>")
3280 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3283 (define_insn "neon_vmla_lane<mode>"
3284 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3285 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3286 (match_operand:VMQ 2 "s_register_operand" "w")
3287 (match_operand:<V_HALF> 3 "s_register_operand"
3288 "<scalar_mul_constraint>")
3289 (match_operand:SI 4 "immediate_operand" "i")
3290 (match_operand:SI 5 "immediate_operand" "i")]
3294 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3295 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3298 (if_then_else (match_test "<Is_float_mode>")
3299 (const_string "neon_fp_mla_s_scalar<q>")
3300 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3303 (define_insn "neon_vmlal_lane<mode>"
3304 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3305 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3306 (match_operand:VMDI 2 "s_register_operand" "w")
3307 (match_operand:VMDI 3 "s_register_operand"
3308 "<scalar_mul_constraint>")
3309 (match_operand:SI 4 "immediate_operand" "i")
3310 (match_operand:SI 5 "immediate_operand" "i")]
3311 UNSPEC_VMLAL_LANE))]
3314 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3315 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3317 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3320 (define_insn "neon_vqdmlal_lane<mode>"
3321 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3322 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3323 (match_operand:VMDI 2 "s_register_operand" "w")
3324 (match_operand:VMDI 3 "s_register_operand"
3325 "<scalar_mul_constraint>")
3326 (match_operand:SI 4 "immediate_operand" "i")
3327 (match_operand:SI 5 "immediate_operand" "i")]
3328 UNSPEC_VQDMLAL_LANE))]
3331 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3332 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3334 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3337 (define_insn "neon_vmls_lane<mode>"
3338 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3339 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3340 (match_operand:VMD 2 "s_register_operand" "w")
3341 (match_operand:VMD 3 "s_register_operand"
3342 "<scalar_mul_constraint>")
3343 (match_operand:SI 4 "immediate_operand" "i")
3344 (match_operand:SI 5 "immediate_operand" "i")]
3348 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3349 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3352 (if_then_else (match_test "<Is_float_mode>")
3353 (const_string "neon_fp_mla_s_scalar<q>")
3354 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3357 (define_insn "neon_vmls_lane<mode>"
3358 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3359 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3360 (match_operand:VMQ 2 "s_register_operand" "w")
3361 (match_operand:<V_HALF> 3 "s_register_operand"
3362 "<scalar_mul_constraint>")
3363 (match_operand:SI 4 "immediate_operand" "i")
3364 (match_operand:SI 5 "immediate_operand" "i")]
3368 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3369 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3372 (if_then_else (match_test "<Is_float_mode>")
3373 (const_string "neon_fp_mla_s_scalar<q>")
3374 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3377 (define_insn "neon_vmlsl_lane<mode>"
3378 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3379 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3380 (match_operand:VMDI 2 "s_register_operand" "w")
3381 (match_operand:VMDI 3 "s_register_operand"
3382 "<scalar_mul_constraint>")
3383 (match_operand:SI 4 "immediate_operand" "i")
3384 (match_operand:SI 5 "immediate_operand" "i")]
3385 UNSPEC_VMLSL_LANE))]
3388 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3389 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3391 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3394 (define_insn "neon_vqdmlsl_lane<mode>"
3395 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3396 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3397 (match_operand:VMDI 2 "s_register_operand" "w")
3398 (match_operand:VMDI 3 "s_register_operand"
3399 "<scalar_mul_constraint>")
3400 (match_operand:SI 4 "immediate_operand" "i")
3401 (match_operand:SI 5 "immediate_operand" "i")]
3402 UNSPEC_VQDMLSL_LANE))]
3405 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3406 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3408 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3411 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3412 ; core register into a temp register, then use a scalar taken from that. This
3413 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3414 ; or extracted from another vector. The latter case it's currently better to
3415 ; use the "_lane" variant, and the former case can probably be implemented
3416 ; using vld1_lane, but that hasn't been done yet.
3418 (define_expand "neon_vmul_n<mode>"
3419 [(match_operand:VMD 0 "s_register_operand" "")
3420 (match_operand:VMD 1 "s_register_operand" "")
3421 (match_operand:<V_elem> 2 "s_register_operand" "")
3422 (match_operand:SI 3 "immediate_operand" "")]
3425 rtx tmp = gen_reg_rtx (<MODE>mode);
3426 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3427 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3428 const0_rtx, const0_rtx));
3432 (define_expand "neon_vmul_n<mode>"
3433 [(match_operand:VMQ 0 "s_register_operand" "")
3434 (match_operand:VMQ 1 "s_register_operand" "")
3435 (match_operand:<V_elem> 2 "s_register_operand" "")
3436 (match_operand:SI 3 "immediate_operand" "")]
3439 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3440 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3441 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3442 const0_rtx, const0_rtx));
3446 (define_expand "neon_vmull_n<mode>"
3447 [(match_operand:<V_widen> 0 "s_register_operand" "")
3448 (match_operand:VMDI 1 "s_register_operand" "")
3449 (match_operand:<V_elem> 2 "s_register_operand" "")
3450 (match_operand:SI 3 "immediate_operand" "")]
3453 rtx tmp = gen_reg_rtx (<MODE>mode);
3454 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3455 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3456 const0_rtx, operands[3]));
3460 (define_expand "neon_vqdmull_n<mode>"
3461 [(match_operand:<V_widen> 0 "s_register_operand" "")
3462 (match_operand:VMDI 1 "s_register_operand" "")
3463 (match_operand:<V_elem> 2 "s_register_operand" "")
3464 (match_operand:SI 3 "immediate_operand" "")]
3467 rtx tmp = gen_reg_rtx (<MODE>mode);
3468 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3469 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3470 const0_rtx, const0_rtx));
3474 (define_expand "neon_vqdmulh_n<mode>"
3475 [(match_operand:VMDI 0 "s_register_operand" "")
3476 (match_operand:VMDI 1 "s_register_operand" "")
3477 (match_operand:<V_elem> 2 "s_register_operand" "")
3478 (match_operand:SI 3 "immediate_operand" "")]
3481 rtx tmp = gen_reg_rtx (<MODE>mode);
3482 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3483 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3484 const0_rtx, operands[3]));
3488 (define_expand "neon_vqdmulh_n<mode>"
3489 [(match_operand:VMQI 0 "s_register_operand" "")
3490 (match_operand:VMQI 1 "s_register_operand" "")
3491 (match_operand:<V_elem> 2 "s_register_operand" "")
3492 (match_operand:SI 3 "immediate_operand" "")]
3495 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3496 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3497 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3498 const0_rtx, operands[3]));
3502 (define_expand "neon_vmla_n<mode>"
3503 [(match_operand:VMD 0 "s_register_operand" "")
3504 (match_operand:VMD 1 "s_register_operand" "")
3505 (match_operand:VMD 2 "s_register_operand" "")
3506 (match_operand:<V_elem> 3 "s_register_operand" "")
3507 (match_operand:SI 4 "immediate_operand" "")]
3510 rtx tmp = gen_reg_rtx (<MODE>mode);
3511 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3512 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3513 tmp, const0_rtx, operands[4]));
3517 (define_expand "neon_vmla_n<mode>"
3518 [(match_operand:VMQ 0 "s_register_operand" "")
3519 (match_operand:VMQ 1 "s_register_operand" "")
3520 (match_operand:VMQ 2 "s_register_operand" "")
3521 (match_operand:<V_elem> 3 "s_register_operand" "")
3522 (match_operand:SI 4 "immediate_operand" "")]
3525 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3526 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3527 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3528 tmp, const0_rtx, operands[4]));
3532 (define_expand "neon_vmlal_n<mode>"
3533 [(match_operand:<V_widen> 0 "s_register_operand" "")
3534 (match_operand:<V_widen> 1 "s_register_operand" "")
3535 (match_operand:VMDI 2 "s_register_operand" "")
3536 (match_operand:<V_elem> 3 "s_register_operand" "")
3537 (match_operand:SI 4 "immediate_operand" "")]
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3543 tmp, const0_rtx, operands[4]));
3547 (define_expand "neon_vqdmlal_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")
3552 (match_operand:SI 4 "immediate_operand" "")]
3555 rtx tmp = gen_reg_rtx (<MODE>mode);
3556 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3557 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3558 tmp, const0_rtx, operands[4]));
3562 (define_expand "neon_vmls_n<mode>"
3563 [(match_operand:VMD 0 "s_register_operand" "")
3564 (match_operand:VMD 1 "s_register_operand" "")
3565 (match_operand:VMD 2 "s_register_operand" "")
3566 (match_operand:<V_elem> 3 "s_register_operand" "")
3567 (match_operand:SI 4 "immediate_operand" "")]
3570 rtx tmp = gen_reg_rtx (<MODE>mode);
3571 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3572 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3573 tmp, const0_rtx, operands[4]));
3577 (define_expand "neon_vmls_n<mode>"
3578 [(match_operand:VMQ 0 "s_register_operand" "")
3579 (match_operand:VMQ 1 "s_register_operand" "")
3580 (match_operand:VMQ 2 "s_register_operand" "")
3581 (match_operand:<V_elem> 3 "s_register_operand" "")
3582 (match_operand:SI 4 "immediate_operand" "")]
3585 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3586 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3587 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3588 tmp, const0_rtx, operands[4]));
3592 (define_expand "neon_vmlsl_n<mode>"
3593 [(match_operand:<V_widen> 0 "s_register_operand" "")
3594 (match_operand:<V_widen> 1 "s_register_operand" "")
3595 (match_operand:VMDI 2 "s_register_operand" "")
3596 (match_operand:<V_elem> 3 "s_register_operand" "")
3597 (match_operand:SI 4 "immediate_operand" "")]
3600 rtx tmp = gen_reg_rtx (<MODE>mode);
3601 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3602 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3603 tmp, const0_rtx, operands[4]));
3607 (define_expand "neon_vqdmlsl_n<mode>"
3608 [(match_operand:<V_widen> 0 "s_register_operand" "")
3609 (match_operand:<V_widen> 1 "s_register_operand" "")
3610 (match_operand:VMDI 2 "s_register_operand" "")
3611 (match_operand:<V_elem> 3 "s_register_operand" "")
3612 (match_operand:SI 4 "immediate_operand" "")]
3615 rtx tmp = gen_reg_rtx (<MODE>mode);
3616 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3617 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3618 tmp, const0_rtx, operands[4]));
3622 (define_insn "neon_vext<mode>"
3623 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3624 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3625 (match_operand:VDQX 2 "s_register_operand" "w")
3626 (match_operand:SI 3 "immediate_operand" "i")]
3630 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3631 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3633 [(set_attr "type" "neon_ext<q>")]
3636 (define_insn "neon_vrev64<mode>"
3637 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3638 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3639 (match_operand:SI 2 "immediate_operand" "i")]
3642 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3643 [(set_attr "type" "neon_rev<q>")]
3646 (define_insn "neon_vrev32<mode>"
3647 [(set (match_operand:VX 0 "s_register_operand" "=w")
3648 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3649 (match_operand:SI 2 "immediate_operand" "i")]
3652 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3653 [(set_attr "type" "neon_rev<q>")]
3656 (define_insn "neon_vrev16<mode>"
3657 [(set (match_operand:VE 0 "s_register_operand" "=w")
3658 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3659 (match_operand:SI 2 "immediate_operand" "i")]
3662 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3663 [(set_attr "type" "neon_rev<q>")]
3666 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3667 ; allocation. For an intrinsic of form:
3668 ; rD = vbsl_* (rS, rN, rM)
3669 ; We can use any of:
3670 ; vbsl rS, rN, rM (if D = S)
3671 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3672 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3674 (define_insn "neon_vbsl<mode>_internal"
3675 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3676 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3677 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3678 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3682 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3683 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3684 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3685 [(set_attr "type" "neon_bsl<q>")]
3688 (define_expand "neon_vbsl<mode>"
3689 [(set (match_operand:VDQX 0 "s_register_operand" "")
3690 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3691 (match_operand:VDQX 2 "s_register_operand" "")
3692 (match_operand:VDQX 3 "s_register_operand" "")]
3696 /* We can't alias operands together if they have different modes. */
3697 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3700 (define_insn "neon_vshl<mode>"
3701 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3702 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3703 (match_operand:VDQIX 2 "s_register_operand" "w")
3704 (match_operand:SI 3 "immediate_operand" "i")]
3707 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3708 [(set_attr "type" "neon_shift_imm<q>")]
3711 (define_insn "neon_vqshl<mode>"
3712 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3713 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3714 (match_operand:VDQIX 2 "s_register_operand" "w")
3715 (match_operand:SI 3 "immediate_operand" "i")]
3718 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3719 [(set_attr "type" "neon_sat_shift_imm<q>")]
3722 (define_insn "neon_vshr_n<mode>"
3723 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3724 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3725 (match_operand:SI 2 "immediate_operand" "i")
3726 (match_operand:SI 3 "immediate_operand" "i")]
3730 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3731 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3733 [(set_attr "type" "neon_shift_imm<q>")]
3736 (define_insn "neon_vshrn_n<mode>"
3737 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3738 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3739 (match_operand:SI 2 "immediate_operand" "i")
3740 (match_operand:SI 3 "immediate_operand" "i")]
3744 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3745 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3747 [(set_attr "type" "neon_shift_imm_narrow_q")]
3750 (define_insn "neon_vqshrn_n<mode>"
3751 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3752 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3753 (match_operand:SI 2 "immediate_operand" "i")
3754 (match_operand:SI 3 "immediate_operand" "i")]
3758 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3759 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3761 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3764 (define_insn "neon_vqshrun_n<mode>"
3765 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3766 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3767 (match_operand:SI 2 "immediate_operand" "i")
3768 (match_operand:SI 3 "immediate_operand" "i")]
3772 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3773 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3775 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3778 (define_insn "neon_vshl_n<mode>"
3779 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3780 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3781 (match_operand:SI 2 "immediate_operand" "i")
3782 (match_operand:SI 3 "immediate_operand" "i")]
3786 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3787 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3789 [(set_attr "type" "neon_shift_imm<q>")]
3792 (define_insn "neon_vqshl_n<mode>"
3793 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3794 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3795 (match_operand:SI 2 "immediate_operand" "i")
3796 (match_operand:SI 3 "immediate_operand" "i")]
3800 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3801 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3803 [(set_attr "type" "neon_sat_shift_imm<q>")]
3806 (define_insn "neon_vqshlu_n<mode>"
3807 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3808 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3809 (match_operand:SI 2 "immediate_operand" "i")
3810 (match_operand:SI 3 "immediate_operand" "i")]
3814 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3815 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3817 [(set_attr "type" "neon_sat_shift_imm<q>")]
3820 (define_insn "neon_vshll_n<mode>"
3821 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3822 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3823 (match_operand:SI 2 "immediate_operand" "i")
3824 (match_operand:SI 3 "immediate_operand" "i")]
3828 /* The boundaries are: 0 < imm <= size. */
3829 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3830 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3832 [(set_attr "type" "neon_shift_imm_long")]
3835 (define_insn "neon_vsra_n<mode>"
3836 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3837 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3838 (match_operand:VDQIX 2 "s_register_operand" "w")
3839 (match_operand:SI 3 "immediate_operand" "i")
3840 (match_operand:SI 4 "immediate_operand" "i")]
3844 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3845 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3847 [(set_attr "type" "neon_shift_acc<q>")]
3850 (define_insn "neon_vsri_n<mode>"
3851 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3852 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3853 (match_operand:VDQIX 2 "s_register_operand" "w")
3854 (match_operand:SI 3 "immediate_operand" "i")]
3858 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3859 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3861 [(set_attr "type" "neon_shift_reg<q>")]
3864 (define_insn "neon_vsli_n<mode>"
3865 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3866 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3867 (match_operand:VDQIX 2 "s_register_operand" "w")
3868 (match_operand:SI 3 "immediate_operand" "i")]
3872 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3873 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3875 [(set_attr "type" "neon_shift_reg<q>")]
3878 (define_insn "neon_vtbl1v8qi"
3879 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3880 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3881 (match_operand:V8QI 2 "s_register_operand" "w")]
3884 "vtbl.8\t%P0, {%P1}, %P2"
3885 [(set_attr "type" "neon_tbl1")]
3888 (define_insn "neon_vtbl2v8qi"
3889 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3890 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3891 (match_operand:V8QI 2 "s_register_operand" "w")]
3896 int tabbase = REGNO (operands[1]);
3898 ops[0] = operands[0];
3899 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3900 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3901 ops[3] = operands[2];
3902 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3906 [(set_attr "type" "neon_tbl2")]
3909 (define_insn "neon_vtbl3v8qi"
3910 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3911 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3912 (match_operand:V8QI 2 "s_register_operand" "w")]
3917 int tabbase = REGNO (operands[1]);
3919 ops[0] = operands[0];
3920 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3921 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3922 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3923 ops[4] = operands[2];
3924 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3928 [(set_attr "type" "neon_tbl3")]
3931 (define_insn "neon_vtbl4v8qi"
3932 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3933 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3934 (match_operand:V8QI 2 "s_register_operand" "w")]
3939 int tabbase = REGNO (operands[1]);
3941 ops[0] = operands[0];
3942 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3943 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3944 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3945 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3946 ops[5] = operands[2];
3947 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3951 [(set_attr "type" "neon_tbl4")]
3954 ;; These three are used by the vec_perm infrastructure for V16QImode.
3955 (define_insn_and_split "neon_vtbl1v16qi"
3956 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3957 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3958 (match_operand:V16QI 2 "s_register_operand" "w")]
3962 "&& reload_completed"
3965 rtx op0, op1, op2, part0, part2;
3969 op1 = gen_lowpart (TImode, operands[1]);
3972 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3973 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3974 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3975 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3977 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3978 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3979 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3980 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3983 [(set_attr "type" "multiple")]
3986 (define_insn_and_split "neon_vtbl2v16qi"
3987 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3988 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3989 (match_operand:V16QI 2 "s_register_operand" "w")]
3993 "&& reload_completed"
3996 rtx op0, op1, op2, part0, part2;
4003 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4004 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4005 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4006 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4008 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4009 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4010 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4011 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4014 [(set_attr "type" "multiple")]
4017 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4018 ;; handle quad-word input modes, producing octa-word output modes. But
4019 ;; that requires us to add support for octa-word vector modes in moves.
4020 ;; That seems overkill for this one use in vec_perm.
4021 (define_insn_and_split "neon_vcombinev16qi"
4022 [(set (match_operand:OI 0 "s_register_operand" "=w")
4023 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4024 (match_operand:V16QI 2 "s_register_operand" "w")]
4028 "&& reload_completed"
4031 neon_split_vcombine (operands);
4034 [(set_attr "type" "multiple")]
4037 (define_insn "neon_vtbx1v8qi"
4038 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4039 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4040 (match_operand:V8QI 2 "s_register_operand" "w")
4041 (match_operand:V8QI 3 "s_register_operand" "w")]
4044 "vtbx.8\t%P0, {%P2}, %P3"
4045 [(set_attr "type" "neon_tbl1")]
4048 (define_insn "neon_vtbx2v8qi"
4049 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4050 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4051 (match_operand:TI 2 "s_register_operand" "w")
4052 (match_operand:V8QI 3 "s_register_operand" "w")]
4057 int tabbase = REGNO (operands[2]);
4059 ops[0] = operands[0];
4060 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4061 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4062 ops[3] = operands[3];
4063 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4067 [(set_attr "type" "neon_tbl2")]
4070 (define_insn "neon_vtbx3v8qi"
4071 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4072 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4073 (match_operand:EI 2 "s_register_operand" "w")
4074 (match_operand:V8QI 3 "s_register_operand" "w")]
4079 int tabbase = REGNO (operands[2]);
4081 ops[0] = operands[0];
4082 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4083 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4084 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4085 ops[4] = operands[3];
4086 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4090 [(set_attr "type" "neon_tbl3")]
4093 (define_insn "neon_vtbx4v8qi"
4094 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4095 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4096 (match_operand:OI 2 "s_register_operand" "w")
4097 (match_operand:V8QI 3 "s_register_operand" "w")]
4102 int tabbase = REGNO (operands[2]);
4104 ops[0] = operands[0];
4105 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4106 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4107 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4108 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4109 ops[5] = operands[3];
4110 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4114 [(set_attr "type" "neon_tbl4")]
4117 (define_expand "neon_vtrn<mode>_internal"
4119 [(set (match_operand:VDQW 0 "s_register_operand" "")
4120 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4121 (match_operand:VDQW 2 "s_register_operand" "")]
4123 (set (match_operand:VDQW 3 "s_register_operand" "")
4124 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4129 ;; Note: Different operand numbering to handle tied registers correctly.
4130 (define_insn "*neon_vtrn<mode>_insn"
4131 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4132 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4133 (match_operand:VDQW 3 "s_register_operand" "2")]
4135 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4136 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4139 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4140 [(set_attr "type" "neon_permute<q>")]
4143 (define_expand "neon_vtrn<mode>"
4144 [(match_operand:SI 0 "s_register_operand" "r")
4145 (match_operand:VDQW 1 "s_register_operand" "w")
4146 (match_operand:VDQW 2 "s_register_operand" "w")]
4149 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
4150 operands[0], operands[1], operands[2]);
4154 (define_expand "neon_vzip<mode>_internal"
4156 [(set (match_operand:VDQW 0 "s_register_operand" "")
4157 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4158 (match_operand:VDQW 2 "s_register_operand" "")]
4160 (set (match_operand:VDQW 3 "s_register_operand" "")
4161 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4166 ;; Note: Different operand numbering to handle tied registers correctly.
4167 (define_insn "*neon_vzip<mode>_insn"
4168 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4169 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4170 (match_operand:VDQW 3 "s_register_operand" "2")]
4172 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4173 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4176 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4177 [(set_attr "type" "neon_zip<q>")]
4180 (define_expand "neon_vzip<mode>"
4181 [(match_operand:SI 0 "s_register_operand" "r")
4182 (match_operand:VDQW 1 "s_register_operand" "w")
4183 (match_operand:VDQW 2 "s_register_operand" "w")]
4186 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
4187 operands[0], operands[1], operands[2]);
4191 (define_expand "neon_vuzp<mode>_internal"
4193 [(set (match_operand:VDQW 0 "s_register_operand" "")
4194 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4195 (match_operand:VDQW 2 "s_register_operand" "")]
4197 (set (match_operand:VDQW 3 "s_register_operand" "")
4198 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4203 ;; Note: Different operand numbering to handle tied registers correctly.
4204 (define_insn "*neon_vuzp<mode>_insn"
4205 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4206 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4207 (match_operand:VDQW 3 "s_register_operand" "2")]
4209 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4210 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4213 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4214 [(set_attr "type" "neon_zip<q>")]
4217 (define_expand "neon_vuzp<mode>"
4218 [(match_operand:SI 0 "s_register_operand" "r")
4219 (match_operand:VDQW 1 "s_register_operand" "w")
4220 (match_operand:VDQW 2 "s_register_operand" "w")]
4223 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
4224 operands[0], operands[1], operands[2]);
4228 (define_expand "neon_vreinterpretv8qi<mode>"
4229 [(match_operand:V8QI 0 "s_register_operand" "")
4230 (match_operand:VDX 1 "s_register_operand" "")]
4233 neon_reinterpret (operands[0], operands[1]);
4237 (define_expand "neon_vreinterpretv4hi<mode>"
4238 [(match_operand:V4HI 0 "s_register_operand" "")
4239 (match_operand:VDX 1 "s_register_operand" "")]
4242 neon_reinterpret (operands[0], operands[1]);
4246 (define_expand "neon_vreinterpretv2si<mode>"
4247 [(match_operand:V2SI 0 "s_register_operand" "")
4248 (match_operand:VDX 1 "s_register_operand" "")]
4251 neon_reinterpret (operands[0], operands[1]);
4255 (define_expand "neon_vreinterpretv2sf<mode>"
4256 [(match_operand:V2SF 0 "s_register_operand" "")
4257 (match_operand:VDX 1 "s_register_operand" "")]
4260 neon_reinterpret (operands[0], operands[1]);
4264 (define_expand "neon_vreinterpretdi<mode>"
4265 [(match_operand:DI 0 "s_register_operand" "")
4266 (match_operand:VDX 1 "s_register_operand" "")]
4269 neon_reinterpret (operands[0], operands[1]);
4273 (define_expand "neon_vreinterpretti<mode>"
4274 [(match_operand:TI 0 "s_register_operand" "")
4275 (match_operand:VQXMOV 1 "s_register_operand" "")]
4278 neon_reinterpret (operands[0], operands[1]);
4283 (define_expand "neon_vreinterpretv16qi<mode>"
4284 [(match_operand:V16QI 0 "s_register_operand" "")
4285 (match_operand:VQXMOV 1 "s_register_operand" "")]
4288 neon_reinterpret (operands[0], operands[1]);
4292 (define_expand "neon_vreinterpretv8hi<mode>"
4293 [(match_operand:V8HI 0 "s_register_operand" "")
4294 (match_operand:VQXMOV 1 "s_register_operand" "")]
4297 neon_reinterpret (operands[0], operands[1]);
4301 (define_expand "neon_vreinterpretv4si<mode>"
4302 [(match_operand:V4SI 0 "s_register_operand" "")
4303 (match_operand:VQXMOV 1 "s_register_operand" "")]
4306 neon_reinterpret (operands[0], operands[1]);
4310 (define_expand "neon_vreinterpretv4sf<mode>"
4311 [(match_operand:V4SF 0 "s_register_operand" "")
4312 (match_operand:VQXMOV 1 "s_register_operand" "")]
4315 neon_reinterpret (operands[0], operands[1]);
4319 (define_expand "neon_vreinterpretv2di<mode>"
4320 [(match_operand:V2DI 0 "s_register_operand" "")
4321 (match_operand:VQXMOV 1 "s_register_operand" "")]
4324 neon_reinterpret (operands[0], operands[1]);
4328 (define_expand "vec_load_lanes<mode><mode>"
4329 [(set (match_operand:VDQX 0 "s_register_operand")
4330 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4334 (define_insn "neon_vld1<mode>"
4335 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4336 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4339 "vld1.<V_sz_elem>\t%h0, %A1"
4340 [(set_attr "type" "neon_load1_1reg<q>")]
4343 (define_insn "neon_vld1_lane<mode>"
4344 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4345 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4346 (match_operand:VDX 2 "s_register_operand" "0")
4347 (match_operand:SI 3 "immediate_operand" "i")]
4351 HOST_WIDE_INT lane = INTVAL (operands[3]);
4352 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4353 if (lane < 0 || lane >= max)
4354 error ("lane out of range");
4356 return "vld1.<V_sz_elem>\t%P0, %A1";
4358 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4360 [(set_attr "type" "neon_load1_one_lane<q>")]
4363 (define_insn "neon_vld1_lane<mode>"
4364 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4365 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4366 (match_operand:VQX 2 "s_register_operand" "0")
4367 (match_operand:SI 3 "immediate_operand" "i")]
4371 HOST_WIDE_INT lane = INTVAL (operands[3]);
4372 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4373 int regno = REGNO (operands[0]);
4374 if (lane < 0 || lane >= max)
4375 error ("lane out of range");
4376 else if (lane >= max / 2)
4380 operands[3] = GEN_INT (lane);
4382 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4384 return "vld1.<V_sz_elem>\t%P0, %A1";
4386 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4388 [(set_attr "type" "neon_load1_one_lane<q>")]
4391 (define_insn "neon_vld1_dup<mode>"
4392 [(set (match_operand:VD 0 "s_register_operand" "=w")
4393 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4395 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4396 [(set_attr "type" "neon_load1_all_lanes<q>")]
4399 ;; Special case for DImode. Treat it exactly like a simple load.
4400 (define_expand "neon_vld1_dupdi"
4401 [(set (match_operand:DI 0 "s_register_operand" "")
4402 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4408 (define_insn "neon_vld1_dup<mode>"
4409 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4410 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4413 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4415 [(set_attr "type" "neon_load1_all_lanes<q>")]
4418 (define_insn_and_split "neon_vld1_dupv2di"
4419 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4420 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4423 "&& reload_completed"
4426 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4427 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4428 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4431 [(set_attr "length" "8")
4432 (set_attr "type" "neon_load1_all_lanes_q")]
4435 (define_expand "vec_store_lanes<mode><mode>"
4436 [(set (match_operand:VDQX 0 "neon_struct_operand")
4437 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4441 (define_insn "neon_vst1<mode>"
4442 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4443 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4446 "vst1.<V_sz_elem>\t%h1, %A0"
4447 [(set_attr "type" "neon_store1_1reg<q>")])
4449 (define_insn "neon_vst1_lane<mode>"
4450 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4452 [(match_operand:VDX 1 "s_register_operand" "w")
4453 (match_operand:SI 2 "immediate_operand" "i")]
4457 HOST_WIDE_INT lane = INTVAL (operands[2]);
4458 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4459 if (lane < 0 || lane >= max)
4460 error ("lane out of range");
4462 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4464 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4466 [(set_attr "type" "neon_store1_one_lane<q>")]
4469 (define_insn "neon_vst1_lane<mode>"
4470 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4472 [(match_operand:VQX 1 "s_register_operand" "w")
4473 (match_operand:SI 2 "immediate_operand" "i")]
4477 HOST_WIDE_INT lane = INTVAL (operands[2]);
4478 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4479 int regno = REGNO (operands[1]);
4480 if (lane < 0 || lane >= max)
4481 error ("lane out of range");
4482 else if (lane >= max / 2)
4486 operands[2] = GEN_INT (lane);
4488 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4490 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4492 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4494 [(set_attr "type" "neon_store1_one_lane<q>")]
4497 (define_expand "vec_load_lanesti<mode>"
4498 [(set (match_operand:TI 0 "s_register_operand")
4499 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4500 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4504 (define_insn "neon_vld2<mode>"
4505 [(set (match_operand:TI 0 "s_register_operand" "=w")
4506 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4507 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4511 if (<V_sz_elem> == 64)
4512 return "vld1.64\t%h0, %A1";
4514 return "vld2.<V_sz_elem>\t%h0, %A1";
4517 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4518 (const_string "neon_load1_2reg<q>")
4519 (const_string "neon_load2_2reg<q>")))]
4522 (define_expand "vec_load_lanesoi<mode>"
4523 [(set (match_operand:OI 0 "s_register_operand")
4524 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4525 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4529 (define_insn "neon_vld2<mode>"
4530 [(set (match_operand:OI 0 "s_register_operand" "=w")
4531 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4532 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4535 "vld2.<V_sz_elem>\t%h0, %A1"
4536 [(set_attr "type" "neon_load2_2reg_q")])
4538 (define_insn "neon_vld2_lane<mode>"
4539 [(set (match_operand:TI 0 "s_register_operand" "=w")
4540 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4541 (match_operand:TI 2 "s_register_operand" "0")
4542 (match_operand:SI 3 "immediate_operand" "i")
4543 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4547 HOST_WIDE_INT lane = INTVAL (operands[3]);
4548 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4549 int regno = REGNO (operands[0]);
4551 if (lane < 0 || lane >= max)
4552 error ("lane out of range");
4553 ops[0] = gen_rtx_REG (DImode, regno);
4554 ops[1] = gen_rtx_REG (DImode, regno + 2);
4555 ops[2] = operands[1];
4556 ops[3] = operands[3];
4557 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4560 [(set_attr "type" "neon_load2_one_lane<q>")]
4563 (define_insn "neon_vld2_lane<mode>"
4564 [(set (match_operand:OI 0 "s_register_operand" "=w")
4565 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4566 (match_operand:OI 2 "s_register_operand" "0")
4567 (match_operand:SI 3 "immediate_operand" "i")
4568 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 HOST_WIDE_INT lane = INTVAL (operands[3]);
4573 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4574 int regno = REGNO (operands[0]);
4576 if (lane < 0 || lane >= max)
4577 error ("lane out of range");
4578 else if (lane >= max / 2)
4583 ops[0] = gen_rtx_REG (DImode, regno);
4584 ops[1] = gen_rtx_REG (DImode, regno + 4);
4585 ops[2] = operands[1];
4586 ops[3] = GEN_INT (lane);
4587 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4590 [(set_attr "type" "neon_load2_one_lane<q>")]
4593 (define_insn "neon_vld2_dup<mode>"
4594 [(set (match_operand:TI 0 "s_register_operand" "=w")
4595 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4596 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4600 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4601 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4603 return "vld1.<V_sz_elem>\t%h0, %A1";
4606 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4607 (const_string "neon_load2_all_lanes<q>")
4608 (const_string "neon_load1_1reg<q>")))]
4611 (define_expand "vec_store_lanesti<mode>"
4612 [(set (match_operand:TI 0 "neon_struct_operand")
4613 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4614 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4618 (define_insn "neon_vst2<mode>"
4619 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4620 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4621 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4625 if (<V_sz_elem> == 64)
4626 return "vst1.64\t%h1, %A0";
4628 return "vst2.<V_sz_elem>\t%h1, %A0";
4631 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4632 (const_string "neon_store1_2reg<q>")
4633 (const_string "neon_store2_one_lane<q>")))]
4636 (define_expand "vec_store_lanesoi<mode>"
4637 [(set (match_operand:OI 0 "neon_struct_operand")
4638 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4639 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4643 (define_insn "neon_vst2<mode>"
4644 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4645 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4646 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4649 "vst2.<V_sz_elem>\t%h1, %A0"
4650 [(set_attr "type" "neon_store2_4reg<q>")]
4653 (define_insn "neon_vst2_lane<mode>"
4654 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4655 (unspec:<V_two_elem>
4656 [(match_operand:TI 1 "s_register_operand" "w")
4657 (match_operand:SI 2 "immediate_operand" "i")
4658 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4662 HOST_WIDE_INT lane = INTVAL (operands[2]);
4663 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4664 int regno = REGNO (operands[1]);
4666 if (lane < 0 || lane >= max)
4667 error ("lane out of range");
4668 ops[0] = operands[0];
4669 ops[1] = gen_rtx_REG (DImode, regno);
4670 ops[2] = gen_rtx_REG (DImode, regno + 2);
4671 ops[3] = operands[2];
4672 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4675 [(set_attr "type" "neon_store2_one_lane<q>")]
4678 (define_insn "neon_vst2_lane<mode>"
4679 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4680 (unspec:<V_two_elem>
4681 [(match_operand:OI 1 "s_register_operand" "w")
4682 (match_operand:SI 2 "immediate_operand" "i")
4683 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4687 HOST_WIDE_INT lane = INTVAL (operands[2]);
4688 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4689 int regno = REGNO (operands[1]);
4691 if (lane < 0 || lane >= max)
4692 error ("lane out of range");
4693 else if (lane >= max / 2)
4698 ops[0] = operands[0];
4699 ops[1] = gen_rtx_REG (DImode, regno);
4700 ops[2] = gen_rtx_REG (DImode, regno + 4);
4701 ops[3] = GEN_INT (lane);
4702 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4705 [(set_attr "type" "neon_store2_one_lane<q>")]
4708 (define_expand "vec_load_lanesei<mode>"
4709 [(set (match_operand:EI 0 "s_register_operand")
4710 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4711 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4715 (define_insn "neon_vld3<mode>"
4716 [(set (match_operand:EI 0 "s_register_operand" "=w")
4717 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4718 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722 if (<V_sz_elem> == 64)
4723 return "vld1.64\t%h0, %A1";
4725 return "vld3.<V_sz_elem>\t%h0, %A1";
4728 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4729 (const_string "neon_load1_3reg<q>")
4730 (const_string "neon_load3_3reg<q>")))]
4733 (define_expand "vec_load_lanesci<mode>"
4734 [(match_operand:CI 0 "s_register_operand")
4735 (match_operand:CI 1 "neon_struct_operand")
4736 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4739 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4743 (define_expand "neon_vld3<mode>"
4744 [(match_operand:CI 0 "s_register_operand")
4745 (match_operand:CI 1 "neon_struct_operand")
4746 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4751 mem = adjust_address (operands[1], EImode, 0);
4752 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4753 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4754 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4758 (define_insn "neon_vld3qa<mode>"
4759 [(set (match_operand:CI 0 "s_register_operand" "=w")
4760 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4761 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765 int regno = REGNO (operands[0]);
4767 ops[0] = gen_rtx_REG (DImode, regno);
4768 ops[1] = gen_rtx_REG (DImode, regno + 4);
4769 ops[2] = gen_rtx_REG (DImode, regno + 8);
4770 ops[3] = operands[1];
4771 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4774 [(set_attr "type" "neon_load3_3reg<q>")]
4777 (define_insn "neon_vld3qb<mode>"
4778 [(set (match_operand:CI 0 "s_register_operand" "=w")
4779 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4780 (match_operand:CI 2 "s_register_operand" "0")
4781 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4785 int regno = REGNO (operands[0]);
4787 ops[0] = gen_rtx_REG (DImode, regno + 2);
4788 ops[1] = gen_rtx_REG (DImode, regno + 6);
4789 ops[2] = gen_rtx_REG (DImode, regno + 10);
4790 ops[3] = operands[1];
4791 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4794 [(set_attr "type" "neon_load3_3reg<q>")]
4797 (define_insn "neon_vld3_lane<mode>"
4798 [(set (match_operand:EI 0 "s_register_operand" "=w")
4799 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4800 (match_operand:EI 2 "s_register_operand" "0")
4801 (match_operand:SI 3 "immediate_operand" "i")
4802 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4806 HOST_WIDE_INT lane = INTVAL (operands[3]);
4807 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4808 int regno = REGNO (operands[0]);
4810 if (lane < 0 || lane >= max)
4811 error ("lane out of range");
4812 ops[0] = gen_rtx_REG (DImode, regno);
4813 ops[1] = gen_rtx_REG (DImode, regno + 2);
4814 ops[2] = gen_rtx_REG (DImode, regno + 4);
4815 ops[3] = operands[1];
4816 ops[4] = operands[3];
4817 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4821 [(set_attr "type" "neon_load3_one_lane<q>")]
4824 (define_insn "neon_vld3_lane<mode>"
4825 [(set (match_operand:CI 0 "s_register_operand" "=w")
4826 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4827 (match_operand:CI 2 "s_register_operand" "0")
4828 (match_operand:SI 3 "immediate_operand" "i")
4829 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4833 HOST_WIDE_INT lane = INTVAL (operands[3]);
4834 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4835 int regno = REGNO (operands[0]);
4837 if (lane < 0 || lane >= max)
4838 error ("lane out of range");
4839 else if (lane >= max / 2)
4844 ops[0] = gen_rtx_REG (DImode, regno);
4845 ops[1] = gen_rtx_REG (DImode, regno + 4);
4846 ops[2] = gen_rtx_REG (DImode, regno + 8);
4847 ops[3] = operands[1];
4848 ops[4] = GEN_INT (lane);
4849 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4853 [(set_attr "type" "neon_load3_one_lane<q>")]
4856 (define_insn "neon_vld3_dup<mode>"
4857 [(set (match_operand:EI 0 "s_register_operand" "=w")
4858 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4859 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4865 int regno = REGNO (operands[0]);
4867 ops[0] = gen_rtx_REG (DImode, regno);
4868 ops[1] = gen_rtx_REG (DImode, regno + 2);
4869 ops[2] = gen_rtx_REG (DImode, regno + 4);
4870 ops[3] = operands[1];
4871 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4875 return "vld1.<V_sz_elem>\t%h0, %A1";
4878 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4879 (const_string "neon_load3_all_lanes<q>")
4880 (const_string "neon_load1_1reg<q>")))])
4882 (define_expand "vec_store_lanesei<mode>"
4883 [(set (match_operand:EI 0 "neon_struct_operand")
4884 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4885 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4889 (define_insn "neon_vst3<mode>"
4890 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4891 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4892 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4896 if (<V_sz_elem> == 64)
4897 return "vst1.64\t%h1, %A0";
4899 return "vst3.<V_sz_elem>\t%h1, %A0";
4902 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4903 (const_string "neon_store1_3reg<q>")
4904 (const_string "neon_store3_one_lane<q>")))])
4906 (define_expand "vec_store_lanesci<mode>"
4907 [(match_operand:CI 0 "neon_struct_operand")
4908 (match_operand:CI 1 "s_register_operand")
4909 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4912 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4916 (define_expand "neon_vst3<mode>"
4917 [(match_operand:CI 0 "neon_struct_operand")
4918 (match_operand:CI 1 "s_register_operand")
4919 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4924 mem = adjust_address (operands[0], EImode, 0);
4925 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4926 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4927 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4931 (define_insn "neon_vst3qa<mode>"
4932 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4933 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4934 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4938 int regno = REGNO (operands[1]);
4940 ops[0] = operands[0];
4941 ops[1] = gen_rtx_REG (DImode, regno);
4942 ops[2] = gen_rtx_REG (DImode, regno + 4);
4943 ops[3] = gen_rtx_REG (DImode, regno + 8);
4944 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4947 [(set_attr "type" "neon_store3_3reg<q>")]
4950 (define_insn "neon_vst3qb<mode>"
4951 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4952 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4953 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4957 int regno = REGNO (operands[1]);
4959 ops[0] = operands[0];
4960 ops[1] = gen_rtx_REG (DImode, regno + 2);
4961 ops[2] = gen_rtx_REG (DImode, regno + 6);
4962 ops[3] = gen_rtx_REG (DImode, regno + 10);
4963 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4966 [(set_attr "type" "neon_store3_3reg<q>")]
4969 (define_insn "neon_vst3_lane<mode>"
4970 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4971 (unspec:<V_three_elem>
4972 [(match_operand:EI 1 "s_register_operand" "w")
4973 (match_operand:SI 2 "immediate_operand" "i")
4974 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4978 HOST_WIDE_INT lane = INTVAL (operands[2]);
4979 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4980 int regno = REGNO (operands[1]);
4982 if (lane < 0 || lane >= max)
4983 error ("lane out of range");
4984 ops[0] = operands[0];
4985 ops[1] = gen_rtx_REG (DImode, regno);
4986 ops[2] = gen_rtx_REG (DImode, regno + 2);
4987 ops[3] = gen_rtx_REG (DImode, regno + 4);
4988 ops[4] = operands[2];
4989 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4993 [(set_attr "type" "neon_store3_one_lane<q>")]
4996 (define_insn "neon_vst3_lane<mode>"
4997 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4998 (unspec:<V_three_elem>
4999 [(match_operand:CI 1 "s_register_operand" "w")
5000 (match_operand:SI 2 "immediate_operand" "i")
5001 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005 HOST_WIDE_INT lane = INTVAL (operands[2]);
5006 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5007 int regno = REGNO (operands[1]);
5009 if (lane < 0 || lane >= max)
5010 error ("lane out of range");
5011 else if (lane >= max / 2)
5016 ops[0] = operands[0];
5017 ops[1] = gen_rtx_REG (DImode, regno);
5018 ops[2] = gen_rtx_REG (DImode, regno + 4);
5019 ops[3] = gen_rtx_REG (DImode, regno + 8);
5020 ops[4] = GEN_INT (lane);
5021 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5025 [(set_attr "type" "neon_store3_one_lane<q>")]
5028 (define_expand "vec_load_lanesoi<mode>"
5029 [(set (match_operand:OI 0 "s_register_operand")
5030 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5031 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5035 (define_insn "neon_vld4<mode>"
5036 [(set (match_operand:OI 0 "s_register_operand" "=w")
5037 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5038 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5042 if (<V_sz_elem> == 64)
5043 return "vld1.64\t%h0, %A1";
5045 return "vld4.<V_sz_elem>\t%h0, %A1";
5048 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5049 (const_string "neon_load1_4reg<q>")
5050 (const_string "neon_load4_4reg<q>")))]
5053 (define_expand "vec_load_lanesxi<mode>"
5054 [(match_operand:XI 0 "s_register_operand")
5055 (match_operand:XI 1 "neon_struct_operand")
5056 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5059 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5063 (define_expand "neon_vld4<mode>"
5064 [(match_operand:XI 0 "s_register_operand")
5065 (match_operand:XI 1 "neon_struct_operand")
5066 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5071 mem = adjust_address (operands[1], OImode, 0);
5072 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5073 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5074 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5078 (define_insn "neon_vld4qa<mode>"
5079 [(set (match_operand:XI 0 "s_register_operand" "=w")
5080 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5081 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 int regno = REGNO (operands[0]);
5087 ops[0] = gen_rtx_REG (DImode, regno);
5088 ops[1] = gen_rtx_REG (DImode, regno + 4);
5089 ops[2] = gen_rtx_REG (DImode, regno + 8);
5090 ops[3] = gen_rtx_REG (DImode, regno + 12);
5091 ops[4] = operands[1];
5092 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5095 [(set_attr "type" "neon_load4_4reg<q>")]
5098 (define_insn "neon_vld4qb<mode>"
5099 [(set (match_operand:XI 0 "s_register_operand" "=w")
5100 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5101 (match_operand:XI 2 "s_register_operand" "0")
5102 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5106 int regno = REGNO (operands[0]);
5108 ops[0] = gen_rtx_REG (DImode, regno + 2);
5109 ops[1] = gen_rtx_REG (DImode, regno + 6);
5110 ops[2] = gen_rtx_REG (DImode, regno + 10);
5111 ops[3] = gen_rtx_REG (DImode, regno + 14);
5112 ops[4] = operands[1];
5113 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5116 [(set_attr "type" "neon_load4_4reg<q>")]
5119 (define_insn "neon_vld4_lane<mode>"
5120 [(set (match_operand:OI 0 "s_register_operand" "=w")
5121 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5122 (match_operand:OI 2 "s_register_operand" "0")
5123 (match_operand:SI 3 "immediate_operand" "i")
5124 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5128 HOST_WIDE_INT lane = INTVAL (operands[3]);
5129 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5130 int regno = REGNO (operands[0]);
5132 if (lane < 0 || lane >= max)
5133 error ("lane out of range");
5134 ops[0] = gen_rtx_REG (DImode, regno);
5135 ops[1] = gen_rtx_REG (DImode, regno + 2);
5136 ops[2] = gen_rtx_REG (DImode, regno + 4);
5137 ops[3] = gen_rtx_REG (DImode, regno + 6);
5138 ops[4] = operands[1];
5139 ops[5] = operands[3];
5140 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5144 [(set_attr "type" "neon_load4_one_lane<q>")]
5147 (define_insn "neon_vld4_lane<mode>"
5148 [(set (match_operand:XI 0 "s_register_operand" "=w")
5149 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5150 (match_operand:XI 2 "s_register_operand" "0")
5151 (match_operand:SI 3 "immediate_operand" "i")
5152 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5156 HOST_WIDE_INT lane = INTVAL (operands[3]);
5157 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5158 int regno = REGNO (operands[0]);
5160 if (lane < 0 || lane >= max)
5161 error ("lane out of range");
5162 else if (lane >= max / 2)
5167 ops[0] = gen_rtx_REG (DImode, regno);
5168 ops[1] = gen_rtx_REG (DImode, regno + 4);
5169 ops[2] = gen_rtx_REG (DImode, regno + 8);
5170 ops[3] = gen_rtx_REG (DImode, regno + 12);
5171 ops[4] = operands[1];
5172 ops[5] = GEN_INT (lane);
5173 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5177 [(set_attr "type" "neon_load4_one_lane<q>")]
5180 (define_insn "neon_vld4_dup<mode>"
5181 [(set (match_operand:OI 0 "s_register_operand" "=w")
5182 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5183 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5187 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5189 int regno = REGNO (operands[0]);
5191 ops[0] = gen_rtx_REG (DImode, regno);
5192 ops[1] = gen_rtx_REG (DImode, regno + 2);
5193 ops[2] = gen_rtx_REG (DImode, regno + 4);
5194 ops[3] = gen_rtx_REG (DImode, regno + 6);
5195 ops[4] = operands[1];
5196 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5201 return "vld1.<V_sz_elem>\t%h0, %A1";
5204 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5205 (const_string "neon_load4_all_lanes<q>")
5206 (const_string "neon_load1_1reg<q>")))]
5209 (define_expand "vec_store_lanesoi<mode>"
5210 [(set (match_operand:OI 0 "neon_struct_operand")
5211 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5212 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5216 (define_insn "neon_vst4<mode>"
5217 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5218 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5219 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5223 if (<V_sz_elem> == 64)
5224 return "vst1.64\t%h1, %A0";
5226 return "vst4.<V_sz_elem>\t%h1, %A0";
5229 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5230 (const_string "neon_store1_4reg<q>")
5231 (const_string "neon_store4_4reg<q>")))]
5234 (define_expand "vec_store_lanesxi<mode>"
5235 [(match_operand:XI 0 "neon_struct_operand")
5236 (match_operand:XI 1 "s_register_operand")
5237 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5244 (define_expand "neon_vst4<mode>"
5245 [(match_operand:XI 0 "neon_struct_operand")
5246 (match_operand:XI 1 "s_register_operand")
5247 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 mem = adjust_address (operands[0], OImode, 0);
5253 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5254 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5255 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5259 (define_insn "neon_vst4qa<mode>"
5260 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5261 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5262 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5266 int regno = REGNO (operands[1]);
5268 ops[0] = operands[0];
5269 ops[1] = gen_rtx_REG (DImode, regno);
5270 ops[2] = gen_rtx_REG (DImode, regno + 4);
5271 ops[3] = gen_rtx_REG (DImode, regno + 8);
5272 ops[4] = gen_rtx_REG (DImode, regno + 12);
5273 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5276 [(set_attr "type" "neon_store4_4reg<q>")]
5279 (define_insn "neon_vst4qb<mode>"
5280 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5281 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5282 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5286 int regno = REGNO (operands[1]);
5288 ops[0] = operands[0];
5289 ops[1] = gen_rtx_REG (DImode, regno + 2);
5290 ops[2] = gen_rtx_REG (DImode, regno + 6);
5291 ops[3] = gen_rtx_REG (DImode, regno + 10);
5292 ops[4] = gen_rtx_REG (DImode, regno + 14);
5293 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5296 [(set_attr "type" "neon_store4_4reg<q>")]
5299 (define_insn "neon_vst4_lane<mode>"
5300 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5301 (unspec:<V_four_elem>
5302 [(match_operand:OI 1 "s_register_operand" "w")
5303 (match_operand:SI 2 "immediate_operand" "i")
5304 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5308 HOST_WIDE_INT lane = INTVAL (operands[2]);
5309 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5310 int regno = REGNO (operands[1]);
5312 if (lane < 0 || lane >= max)
5313 error ("lane out of range");
5314 ops[0] = operands[0];
5315 ops[1] = gen_rtx_REG (DImode, regno);
5316 ops[2] = gen_rtx_REG (DImode, regno + 2);
5317 ops[3] = gen_rtx_REG (DImode, regno + 4);
5318 ops[4] = gen_rtx_REG (DImode, regno + 6);
5319 ops[5] = operands[2];
5320 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5324 [(set_attr "type" "neon_store4_one_lane<q>")]
5327 (define_insn "neon_vst4_lane<mode>"
5328 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5329 (unspec:<V_four_elem>
5330 [(match_operand:XI 1 "s_register_operand" "w")
5331 (match_operand:SI 2 "immediate_operand" "i")
5332 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5336 HOST_WIDE_INT lane = INTVAL (operands[2]);
5337 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5338 int regno = REGNO (operands[1]);
5340 if (lane < 0 || lane >= max)
5341 error ("lane out of range");
5342 else if (lane >= max / 2)
5347 ops[0] = operands[0];
5348 ops[1] = gen_rtx_REG (DImode, regno);
5349 ops[2] = gen_rtx_REG (DImode, regno + 4);
5350 ops[3] = gen_rtx_REG (DImode, regno + 8);
5351 ops[4] = gen_rtx_REG (DImode, regno + 12);
5352 ops[5] = GEN_INT (lane);
5353 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5357 [(set_attr "type" "neon_store4_4reg<q>")]
5360 (define_expand "neon_vand<mode>"
5361 [(match_operand:VDQX 0 "s_register_operand" "")
5362 (match_operand:VDQX 1 "s_register_operand" "")
5363 (match_operand:VDQX 2 "neon_inv_logic_op2" "")
5364 (match_operand:SI 3 "immediate_operand" "")]
5367 emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
5371 (define_expand "neon_vorr<mode>"
5372 [(match_operand:VDQX 0 "s_register_operand" "")
5373 (match_operand:VDQX 1 "s_register_operand" "")
5374 (match_operand:VDQX 2 "neon_logic_op2" "")
5375 (match_operand:SI 3 "immediate_operand" "")]
5378 emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
5382 (define_expand "neon_veor<mode>"
5383 [(match_operand:VDQX 0 "s_register_operand" "")
5384 (match_operand:VDQX 1 "s_register_operand" "")
5385 (match_operand:VDQX 2 "s_register_operand" "")
5386 (match_operand:SI 3 "immediate_operand" "")]
5389 emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
5393 (define_expand "neon_vbic<mode>"
5394 [(match_operand:VDQX 0 "s_register_operand" "")
5395 (match_operand:VDQX 1 "s_register_operand" "")
5396 (match_operand:VDQX 2 "neon_logic_op2" "")
5397 (match_operand:SI 3 "immediate_operand" "")]
5400 emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
5404 (define_expand "neon_vorn<mode>"
5405 [(match_operand:VDQX 0 "s_register_operand" "")
5406 (match_operand:VDQX 1 "s_register_operand" "")
5407 (match_operand:VDQX 2 "neon_inv_logic_op2" "")
5408 (match_operand:SI 3 "immediate_operand" "")]
5411 emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
5415 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5416 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5417 (SE:<V_unpack> (vec_select:<V_HALF>
5418 (match_operand:VU 1 "register_operand" "w")
5419 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5420 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5421 "vmovl.<US><V_sz_elem> %q0, %e1"
5422 [(set_attr "type" "neon_shift_imm_long")]
5425 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5426 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5427 (SE:<V_unpack> (vec_select:<V_HALF>
5428 (match_operand:VU 1 "register_operand" "w")
5429 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5430 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5431 "vmovl.<US><V_sz_elem> %q0, %f1"
5432 [(set_attr "type" "neon_shift_imm_long")]
5435 (define_expand "vec_unpack<US>_hi_<mode>"
5436 [(match_operand:<V_unpack> 0 "register_operand" "")
5437 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5438 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5440 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5443 for (i = 0; i < (<V_mode_nunits>/2); i++)
5444 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5446 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5447 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5454 (define_expand "vec_unpack<US>_lo_<mode>"
5455 [(match_operand:<V_unpack> 0 "register_operand" "")
5456 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5457 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5459 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5462 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5463 RTVEC_ELT (v, i) = GEN_INT (i);
5464 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5465 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5472 (define_insn "neon_vec_<US>mult_lo_<mode>"
5473 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5474 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5475 (match_operand:VU 1 "register_operand" "w")
5476 (match_operand:VU 2 "vect_par_constant_low" "")))
5477 (SE:<V_unpack> (vec_select:<V_HALF>
5478 (match_operand:VU 3 "register_operand" "w")
5480 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5481 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5482 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5485 (define_expand "vec_widen_<US>mult_lo_<mode>"
5486 [(match_operand:<V_unpack> 0 "register_operand" "")
5487 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5488 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5491 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5494 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5495 RTVEC_ELT (v, i) = GEN_INT (i);
5496 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5498 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5506 (define_insn "neon_vec_<US>mult_hi_<mode>"
5507 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5508 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5509 (match_operand:VU 1 "register_operand" "w")
5510 (match_operand:VU 2 "vect_par_constant_high" "")))
5511 (SE:<V_unpack> (vec_select:<V_HALF>
5512 (match_operand:VU 3 "register_operand" "w")
5514 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5515 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5516 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5519 (define_expand "vec_widen_<US>mult_hi_<mode>"
5520 [(match_operand:<V_unpack> 0 "register_operand" "")
5521 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5522 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5523 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5525 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5528 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5529 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5530 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5532 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5541 (define_insn "neon_vec_<US>shiftl_<mode>"
5542 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5543 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5544 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5547 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5549 [(set_attr "type" "neon_shift_imm_long")]
5552 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5553 [(match_operand:<V_unpack> 0 "register_operand" "")
5554 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5555 (match_operand:SI 2 "immediate_operand" "i")]
5556 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5558 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5559 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5565 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5566 [(match_operand:<V_unpack> 0 "register_operand" "")
5567 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5568 (match_operand:SI 2 "immediate_operand" "i")]
5569 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5571 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5572 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5573 GET_MODE_SIZE (<V_HALF>mode)),
5579 ;; Vectorize for non-neon-quad case
5580 (define_insn "neon_unpack<US>_<mode>"
5581 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5582 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5584 "vmovl.<US><V_sz_elem> %q0, %P1"
5585 [(set_attr "type" "neon_move")]
5588 (define_expand "vec_unpack<US>_lo_<mode>"
5589 [(match_operand:<V_double_width> 0 "register_operand" "")
5590 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5593 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5594 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5595 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5601 (define_expand "vec_unpack<US>_hi_<mode>"
5602 [(match_operand:<V_double_width> 0 "register_operand" "")
5603 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5606 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5607 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5608 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5614 (define_insn "neon_vec_<US>mult_<mode>"
5615 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5616 (mult:<V_widen> (SE:<V_widen>
5617 (match_operand:VDI 1 "register_operand" "w"))
5619 (match_operand:VDI 2 "register_operand" "w"))))]
5621 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5622 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5625 (define_expand "vec_widen_<US>mult_hi_<mode>"
5626 [(match_operand:<V_double_width> 0 "register_operand" "")
5627 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5628 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5631 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5632 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5633 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5640 (define_expand "vec_widen_<US>mult_lo_<mode>"
5641 [(match_operand:<V_double_width> 0 "register_operand" "")
5642 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5643 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5646 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5647 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5648 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5655 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5656 [(match_operand:<V_double_width> 0 "register_operand" "")
5657 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5658 (match_operand:SI 2 "immediate_operand" "i")]
5661 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5662 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5663 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5669 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5670 [(match_operand:<V_double_width> 0 "register_operand" "")
5671 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5672 (match_operand:SI 2 "immediate_operand" "i")]
5675 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5676 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5677 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5683 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5684 ; because the ordering of vector elements in Q registers is different from what
5685 ; the semantics of the instructions require.
5687 (define_insn "vec_pack_trunc_<mode>"
5688 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5689 (vec_concat:<V_narrow_pack>
5690 (truncate:<V_narrow>
5691 (match_operand:VN 1 "register_operand" "w"))
5692 (truncate:<V_narrow>
5693 (match_operand:VN 2 "register_operand" "w"))))]
5694 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5695 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5696 [(set_attr "type" "multiple")
5697 (set_attr "length" "8")]
5700 ;; For the non-quad case.
5701 (define_insn "neon_vec_pack_trunc_<mode>"
5702 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5703 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5704 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5705 "vmovn.i<V_sz_elem>\t%P0, %q1"
5706 [(set_attr "type" "neon_move_narrow_q")]
5709 (define_expand "vec_pack_trunc_<mode>"
5710 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5711 (match_operand:VSHFT 1 "register_operand" "")
5712 (match_operand:VSHFT 2 "register_operand")]
5713 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5715 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5717 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5718 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5719 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5723 (define_insn "neon_vabd<mode>_2"
5724 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5725 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5726 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5727 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5728 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5730 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5731 (const_string "neon_fp_abd_s<q>")
5732 (const_string "neon_abd<q>")))]
5735 (define_insn "neon_vabd<mode>_3"
5736 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5737 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5738 (match_operand:VDQ 2 "s_register_operand" "w")]
5740 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5741 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5743 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5744 (const_string "neon_fp_abd_s<q>")
5745 (const_string "neon_abd<q>")))]
5748 ;; Copy from core-to-neon regs, then extend, not vice-versa
5751 [(set (match_operand:DI 0 "s_register_operand" "")
5752 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5753 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5754 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5755 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5757 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5761 [(set (match_operand:DI 0 "s_register_operand" "")
5762 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5763 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5764 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5765 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5767 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5771 [(set (match_operand:DI 0 "s_register_operand" "")
5772 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5773 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5774 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5775 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5777 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5781 [(set (match_operand:DI 0 "s_register_operand" "")
5782 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5783 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5784 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5785 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5787 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5791 [(set (match_operand:DI 0 "s_register_operand" "")
5792 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5793 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5794 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5795 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5797 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5801 [(set (match_operand:DI 0 "s_register_operand" "")
5802 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5803 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5804 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5805 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5807 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));