1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_store1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "ior<mode>3"
633 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
634 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
635 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
638 switch (which_alternative)
640 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
641 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
642 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
643 default: gcc_unreachable ();
646 [(set_attr "type" "neon_logic<q>")]
649 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
650 ;; vorr. We support the pseudo-instruction vand instead, because that
651 ;; corresponds to the canonical form the middle-end expects to use for
652 ;; immediate bitwise-ANDs.
654 (define_insn "and<mode>3"
655 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
656 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
657 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
660 switch (which_alternative)
662 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
663 case 1: return neon_output_logic_immediate ("vand", &operands[2],
664 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
665 default: gcc_unreachable ();
668 [(set_attr "type" "neon_logic<q>")]
671 (define_insn "orn<mode>3_neon"
672 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
673 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
674 (match_operand:VDQ 1 "s_register_operand" "w")))]
676 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_logic<q>")]
680 ;; TODO: investigate whether we should disable
681 ;; this and bicdi3_neon for the A8 in line with the other
683 (define_insn_and_split "orndi3_neon"
684 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
685 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
686 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
694 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
695 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
696 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
701 operands[3] = gen_highpart (SImode, operands[0]);
702 operands[0] = gen_lowpart (SImode, operands[0]);
703 operands[4] = gen_highpart (SImode, operands[2]);
704 operands[2] = gen_lowpart (SImode, operands[2]);
705 operands[5] = gen_highpart (SImode, operands[1]);
706 operands[1] = gen_lowpart (SImode, operands[1]);
710 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
711 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
715 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
716 (set_attr "length" "*,16,8,8")
717 (set_attr "arch" "any,a,t2,t2")]
720 (define_insn "bic<mode>3_neon"
721 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
722 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
723 (match_operand:VDQ 1 "s_register_operand" "w")))]
725 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_logic<q>")]
729 ;; Compare to *anddi_notdi_di.
730 (define_insn "bicdi3_neon"
731 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
732 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
733 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
739 [(set_attr "type" "neon_logic,multiple,multiple")
740 (set_attr "length" "*,8,8")]
743 (define_insn "xor<mode>3"
744 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
745 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
746 (match_operand:VDQ 2 "s_register_operand" "w")))]
748 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
749 [(set_attr "type" "neon_logic<q>")]
752 (define_insn "one_cmpl<mode>2"
753 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
754 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
756 "vmvn\t%<V_reg>0, %<V_reg>1"
757 [(set_attr "type" "neon_move<q>")]
760 (define_insn "abs<mode>2"
761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
762 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
764 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
766 (if_then_else (match_test "<Is_float_mode>")
767 (const_string "neon_fp_abs_s<q>")
768 (const_string "neon_abs<q>")))]
771 (define_insn "neg<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_neg_s<q>")
779 (const_string "neon_neg<q>")))]
782 (define_insn "negdi2_neon"
783 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
784 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
785 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
786 (clobber (reg:CC CC_REGNUM))]
789 [(set_attr "length" "8")
790 (set_attr "type" "multiple")]
793 ; Split negdi2_neon for vfp registers
795 [(set (match_operand:DI 0 "s_register_operand" "")
796 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
797 (clobber (match_scratch:DI 2 ""))
798 (clobber (reg:CC CC_REGNUM))]
799 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
800 [(set (match_dup 2) (const_int 0))
801 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
802 (clobber (reg:CC CC_REGNUM))])]
804 if (!REG_P (operands[2]))
805 operands[2] = operands[0];
809 ; Split negdi2_neon for core registers
811 [(set (match_operand:DI 0 "s_register_operand" "")
812 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
813 (clobber (match_scratch:DI 2 ""))
814 (clobber (reg:CC CC_REGNUM))]
815 "TARGET_32BIT && reload_completed
816 && arm_general_register_operand (operands[0], DImode)"
817 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
818 (clobber (reg:CC CC_REGNUM))])]
822 (define_insn "*umin<mode>3_neon"
823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
824 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
825 (match_operand:VDQIW 2 "s_register_operand" "w")))]
827 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
828 [(set_attr "type" "neon_minmax<q>")]
831 (define_insn "*umax<mode>3_neon"
832 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
833 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
834 (match_operand:VDQIW 2 "s_register_operand" "w")))]
836 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
837 [(set_attr "type" "neon_minmax<q>")]
840 (define_insn "*smin<mode>3_neon"
841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
842 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
843 (match_operand:VDQW 2 "s_register_operand" "w")))]
845 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
847 (if_then_else (match_test "<Is_float_mode>")
848 (const_string "neon_fp_minmax_s<q>")
849 (const_string "neon_minmax<q>")))]
852 (define_insn "*smax<mode>3_neon"
853 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
854 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
855 (match_operand:VDQW 2 "s_register_operand" "w")))]
857 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
859 (if_then_else (match_test "<Is_float_mode>")
860 (const_string "neon_fp_minmax_s<q>")
861 (const_string "neon_minmax<q>")))]
864 ; TODO: V2DI shifts are current disabled because there are bugs in the
865 ; generic vectorizer code. It ends up creating a V2DI constructor with
868 (define_insn "vashl<mode>3"
869 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
870 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
871 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
874 switch (which_alternative)
876 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
877 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
879 VALID_NEON_QREG_MODE (<MODE>mode),
881 default: gcc_unreachable ();
884 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
887 (define_insn "vashr<mode>3_imm"
888 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
889 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
890 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
893 return neon_output_shift_immediate ("vshr", 's', &operands[2],
894 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
897 [(set_attr "type" "neon_shift_imm<q>")]
900 (define_insn "vlshr<mode>3_imm"
901 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
902 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
903 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
906 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
907 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
910 [(set_attr "type" "neon_shift_imm<q>")]
913 ; Used for implementing logical shift-right, which is a left-shift by a negative
914 ; amount, with signed operands. This is essentially the same as ashl<mode>3
915 ; above, but using an unspec in case GCC tries anything tricky with negative
918 (define_insn "ashl<mode>3_signed"
919 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
920 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
921 (match_operand:VDQI 2 "s_register_operand" "w")]
922 UNSPEC_ASHIFT_SIGNED))]
924 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
925 [(set_attr "type" "neon_shift_reg<q>")]
928 ; Used for implementing logical shift-right, which is a left-shift by a negative
929 ; amount, with unsigned operands.
931 (define_insn "ashl<mode>3_unsigned"
932 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
933 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
934 (match_operand:VDQI 2 "s_register_operand" "w")]
935 UNSPEC_ASHIFT_UNSIGNED))]
937 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
938 [(set_attr "type" "neon_shift_reg<q>")]
941 (define_expand "vashr<mode>3"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "")
943 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
944 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
947 if (s_register_operand (operands[2], <MODE>mode))
949 rtx neg = gen_reg_rtx (<MODE>mode);
950 emit_insn (gen_neg<mode>2 (neg, operands[2]));
951 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
954 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
958 (define_expand "vlshr<mode>3"
959 [(set (match_operand:VDQIW 0 "s_register_operand" "")
960 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
961 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
964 if (s_register_operand (operands[2], <MODE>mode))
966 rtx neg = gen_reg_rtx (<MODE>mode);
967 emit_insn (gen_neg<mode>2 (neg, operands[2]));
968 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
971 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
977 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
978 ;; leaving the upper half uninitalized. This is OK since the shift
979 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
980 ;; data flow analysis however, we pretend the full register is set
982 (define_insn "neon_load_count"
983 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
984 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
988 vld1.32\t{%P0[0]}, %A1
990 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
993 (define_insn "ashldi3_neon_noclobber"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
996 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
997 "TARGET_NEON && reload_completed
998 && (!CONST_INT_P (operands[2])
999 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1001 vshl.u64\t%P0, %P1, %2
1002 vshl.u64\t%P0, %P1, %P2"
1003 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1006 (define_insn_and_split "ashldi3_neon"
1007 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1008 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1009 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1010 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1011 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1012 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1013 (clobber (reg:CC_C CC_REGNUM))]
1016 "TARGET_NEON && reload_completed"
1020 if (IS_VFP_REGNUM (REGNO (operands[0])))
1022 if (CONST_INT_P (operands[2]))
1024 if (INTVAL (operands[2]) < 1)
1026 emit_insn (gen_movdi (operands[0], operands[1]));
1029 else if (INTVAL (operands[2]) > 63)
1030 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1034 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1035 operands[2] = operands[5];
1038 /* Ditch the unnecessary clobbers. */
1039 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1044 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1045 /* This clobbers CC. */
1046 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1048 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1049 operands[2], operands[3], operands[4]);
1053 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1054 (set_attr "opt" "*,*,speed,speed,*,*")
1055 (set_attr "type" "multiple")]
1058 ; The shift amount needs to be negated for right-shifts
1059 (define_insn "signed_shift_di3_neon"
1060 [(set (match_operand:DI 0 "s_register_operand" "=w")
1061 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1062 (match_operand:DI 2 "s_register_operand" " w")]
1063 UNSPEC_ASHIFT_SIGNED))]
1064 "TARGET_NEON && reload_completed"
1065 "vshl.s64\t%P0, %P1, %P2"
1066 [(set_attr "type" "neon_shift_reg")]
1069 ; The shift amount needs to be negated for right-shifts
1070 (define_insn "unsigned_shift_di3_neon"
1071 [(set (match_operand:DI 0 "s_register_operand" "=w")
1072 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1073 (match_operand:DI 2 "s_register_operand" " w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1075 "TARGET_NEON && reload_completed"
1076 "vshl.u64\t%P0, %P1, %P2"
1077 [(set_attr "type" "neon_shift_reg")]
1080 (define_insn "ashrdi3_neon_imm_noclobber"
1081 [(set (match_operand:DI 0 "s_register_operand" "=w")
1082 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1083 (match_operand:DI 2 "const_int_operand" " i")))]
1084 "TARGET_NEON && reload_completed
1085 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1086 "vshr.s64\t%P0, %P1, %2"
1087 [(set_attr "type" "neon_shift_imm")]
1090 (define_insn "lshrdi3_neon_imm_noclobber"
1091 [(set (match_operand:DI 0 "s_register_operand" "=w")
1092 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1093 (match_operand:DI 2 "const_int_operand" " i")))]
1094 "TARGET_NEON && reload_completed
1095 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1096 "vshr.u64\t%P0, %P1, %2"
1097 [(set_attr "type" "neon_shift_imm")]
1102 (define_insn_and_split "<shift>di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1104 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1105 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1106 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1107 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1108 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1109 (clobber (reg:CC CC_REGNUM))]
1112 "TARGET_NEON && reload_completed"
1116 if (IS_VFP_REGNUM (REGNO (operands[0])))
1118 if (CONST_INT_P (operands[2]))
1120 if (INTVAL (operands[2]) < 1)
1122 emit_insn (gen_movdi (operands[0], operands[1]));
1125 else if (INTVAL (operands[2]) > 64)
1126 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1128 /* Ditch the unnecessary clobbers. */
1129 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1135 /* We must use a negative left-shift. */
1136 emit_insn (gen_negsi2 (operands[3], operands[2]));
1137 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1138 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1144 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1145 /* This clobbers CC. */
1146 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1148 /* This clobbers CC (ASHIFTRT by register only). */
1149 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1150 operands[2], operands[3], operands[4]);
1155 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1156 (set_attr "opt" "*,*,speed,speed,*,*")
1157 (set_attr "type" "multiple")]
1160 ;; Widening operations
1162 (define_insn "widen_ssum<mode>3"
1163 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1164 (plus:<V_widen> (sign_extend:<V_widen>
1165 (match_operand:VW 1 "s_register_operand" "%w"))
1166 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1168 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1169 [(set_attr "type" "neon_add_widen")]
1172 (define_insn "widen_usum<mode>3"
1173 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1174 (plus:<V_widen> (zero_extend:<V_widen>
1175 (match_operand:VW 1 "s_register_operand" "%w"))
1176 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1178 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1179 [(set_attr "type" "neon_add_widen")]
1182 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1183 ;; shift-count granularity. That's good enough for the middle-end's current
1186 ;; Note that it's not safe to perform such an operation in big-endian mode,
1187 ;; due to element-ordering issues.
1189 (define_expand "vec_shr_<mode>"
1190 [(match_operand:VDQ 0 "s_register_operand" "")
1191 (match_operand:VDQ 1 "s_register_operand" "")
1192 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1193 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1196 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1197 const int width = GET_MODE_BITSIZE (<MODE>mode);
1198 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1199 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1200 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1202 if (num_bits == width)
1204 emit_move_insn (operands[0], operands[1]);
1208 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1209 operands[0] = gen_lowpart (bvecmode, operands[0]);
1210 operands[1] = gen_lowpart (bvecmode, operands[1]);
1212 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1213 GEN_INT (num_bits / BITS_PER_UNIT)));
1217 (define_expand "vec_shl_<mode>"
1218 [(match_operand:VDQ 0 "s_register_operand" "")
1219 (match_operand:VDQ 1 "s_register_operand" "")
1220 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1221 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1224 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1225 const int width = GET_MODE_BITSIZE (<MODE>mode);
1226 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1227 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1228 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1232 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1236 num_bits = width - num_bits;
1238 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1239 operands[0] = gen_lowpart (bvecmode, operands[0]);
1240 operands[1] = gen_lowpart (bvecmode, operands[1]);
1242 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1243 GEN_INT (num_bits / BITS_PER_UNIT)));
1247 ;; Helpers for quad-word reduction operations
1249 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1250 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1251 ; N/2-element vector.
1253 (define_insn "quad_halves_<code>v4si"
1254 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1256 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1257 (parallel [(const_int 0) (const_int 1)]))
1258 (vec_select:V2SI (match_dup 1)
1259 (parallel [(const_int 2) (const_int 3)]))))]
1261 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1262 [(set_attr "vqh_mnem" "<VQH_mnem>")
1263 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1266 (define_insn "quad_halves_<code>v4sf"
1267 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1269 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1270 (parallel [(const_int 0) (const_int 1)]))
1271 (vec_select:V2SF (match_dup 1)
1272 (parallel [(const_int 2) (const_int 3)]))))]
1273 "TARGET_NEON && flag_unsafe_math_optimizations"
1274 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1275 [(set_attr "vqh_mnem" "<VQH_mnem>")
1276 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1279 (define_insn "quad_halves_<code>v8hi"
1280 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1282 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1283 (parallel [(const_int 0) (const_int 1)
1284 (const_int 2) (const_int 3)]))
1285 (vec_select:V4HI (match_dup 1)
1286 (parallel [(const_int 4) (const_int 5)
1287 (const_int 6) (const_int 7)]))))]
1289 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1294 (define_insn "quad_halves_<code>v16qi"
1295 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1297 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)
1300 (const_int 4) (const_int 5)
1301 (const_int 6) (const_int 7)]))
1302 (vec_select:V8QI (match_dup 1)
1303 (parallel [(const_int 8) (const_int 9)
1304 (const_int 10) (const_int 11)
1305 (const_int 12) (const_int 13)
1306 (const_int 14) (const_int 15)]))))]
1308 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1309 [(set_attr "vqh_mnem" "<VQH_mnem>")
1310 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1313 (define_expand "move_hi_quad_<mode>"
1314 [(match_operand:ANY128 0 "s_register_operand" "")
1315 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1318 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1319 GET_MODE_SIZE (<V_HALF>mode)),
1324 (define_expand "move_lo_quad_<mode>"
1325 [(match_operand:ANY128 0 "s_register_operand" "")
1326 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1329 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1335 ;; Reduction operations
1337 (define_expand "reduc_splus_<mode>"
1338 [(match_operand:VD 0 "s_register_operand" "")
1339 (match_operand:VD 1 "s_register_operand" "")]
1340 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1342 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1343 &gen_neon_vpadd_internal<mode>);
1347 (define_expand "reduc_splus_<mode>"
1348 [(match_operand:VQ 0 "s_register_operand" "")
1349 (match_operand:VQ 1 "s_register_operand" "")]
1350 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1351 && !BYTES_BIG_ENDIAN"
1353 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1354 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1356 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1357 emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
1358 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1363 (define_insn "reduc_splus_v2di"
1364 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1365 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1367 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1368 "vadd.i64\t%e0, %e1, %f1"
1369 [(set_attr "type" "neon_add_q")]
1372 ;; NEON does not distinguish between signed and unsigned addition except on
1373 ;; widening operations.
1374 (define_expand "reduc_uplus_<mode>"
1375 [(match_operand:VDQI 0 "s_register_operand" "")
1376 (match_operand:VDQI 1 "s_register_operand" "")]
1377 "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
1379 emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
1383 (define_expand "reduc_smin_<mode>"
1384 [(match_operand:VD 0 "s_register_operand" "")
1385 (match_operand:VD 1 "s_register_operand" "")]
1386 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1388 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1389 &gen_neon_vpsmin<mode>);
1393 (define_expand "reduc_smin_<mode>"
1394 [(match_operand:VQ 0 "s_register_operand" "")
1395 (match_operand:VQ 1 "s_register_operand" "")]
1396 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1397 && !BYTES_BIG_ENDIAN"
1399 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1400 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1402 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1403 emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
1404 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1409 (define_expand "reduc_smax_<mode>"
1410 [(match_operand:VD 0 "s_register_operand" "")
1411 (match_operand:VD 1 "s_register_operand" "")]
1412 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1414 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1415 &gen_neon_vpsmax<mode>);
1419 (define_expand "reduc_smax_<mode>"
1420 [(match_operand:VQ 0 "s_register_operand" "")
1421 (match_operand:VQ 1 "s_register_operand" "")]
1422 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1423 && !BYTES_BIG_ENDIAN"
1425 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1426 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1428 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1429 emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
1430 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1435 (define_expand "reduc_umin_<mode>"
1436 [(match_operand:VDI 0 "s_register_operand" "")
1437 (match_operand:VDI 1 "s_register_operand" "")]
1440 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1441 &gen_neon_vpumin<mode>);
1445 (define_expand "reduc_umin_<mode>"
1446 [(match_operand:VQI 0 "s_register_operand" "")
1447 (match_operand:VQI 1 "s_register_operand" "")]
1448 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1450 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1451 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1453 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1454 emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
1455 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1460 (define_expand "reduc_umax_<mode>"
1461 [(match_operand:VDI 0 "s_register_operand" "")
1462 (match_operand:VDI 1 "s_register_operand" "")]
1465 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1466 &gen_neon_vpumax<mode>);
1470 (define_expand "reduc_umax_<mode>"
1471 [(match_operand:VQI 0 "s_register_operand" "")
1472 (match_operand:VQI 1 "s_register_operand" "")]
1473 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1475 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1476 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1478 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1479 emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
1480 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1485 (define_insn "neon_vpadd_internal<mode>"
1486 [(set (match_operand:VD 0 "s_register_operand" "=w")
1487 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1488 (match_operand:VD 2 "s_register_operand" "w")]
1491 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1492 ;; Assume this schedules like vadd.
1494 (if_then_else (match_test "<Is_float_mode>")
1495 (const_string "neon_fp_reduc_add_s<q>")
1496 (const_string "neon_reduc_add<q>")))]
1499 (define_insn "neon_vpsmin<mode>"
1500 [(set (match_operand:VD 0 "s_register_operand" "=w")
1501 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1502 (match_operand:VD 2 "s_register_operand" "w")]
1505 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1507 (if_then_else (match_test "<Is_float_mode>")
1508 (const_string "neon_fp_reduc_minmax_s<q>")
1509 (const_string "neon_reduc_minmax<q>")))]
1512 (define_insn "neon_vpsmax<mode>"
1513 [(set (match_operand:VD 0 "s_register_operand" "=w")
1514 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1515 (match_operand:VD 2 "s_register_operand" "w")]
1518 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1520 (if_then_else (match_test "<Is_float_mode>")
1521 (const_string "neon_fp_reduc_minmax_s<q>")
1522 (const_string "neon_reduc_minmax<q>")))]
1525 (define_insn "neon_vpumin<mode>"
1526 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1527 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1528 (match_operand:VDI 2 "s_register_operand" "w")]
1531 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1532 [(set_attr "type" "neon_reduc_minmax<q>")]
1535 (define_insn "neon_vpumax<mode>"
1536 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1537 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1538 (match_operand:VDI 2 "s_register_operand" "w")]
1541 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1542 [(set_attr "type" "neon_reduc_minmax<q>")]
1545 ;; Saturating arithmetic
1547 ; NOTE: Neon supports many more saturating variants of instructions than the
1548 ; following, but these are all GCC currently understands.
1549 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1550 ; yet either, although these patterns may be used by intrinsics when they're
1553 (define_insn "*ss_add<mode>_neon"
1554 [(set (match_operand:VD 0 "s_register_operand" "=w")
1555 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1556 (match_operand:VD 2 "s_register_operand" "w")))]
1558 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1559 [(set_attr "type" "neon_qadd<q>")]
1562 (define_insn "*us_add<mode>_neon"
1563 [(set (match_operand:VD 0 "s_register_operand" "=w")
1564 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1565 (match_operand:VD 2 "s_register_operand" "w")))]
1567 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1568 [(set_attr "type" "neon_qadd<q>")]
1571 (define_insn "*ss_sub<mode>_neon"
1572 [(set (match_operand:VD 0 "s_register_operand" "=w")
1573 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1574 (match_operand:VD 2 "s_register_operand" "w")))]
1576 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1577 [(set_attr "type" "neon_qsub<q>")]
1580 (define_insn "*us_sub<mode>_neon"
1581 [(set (match_operand:VD 0 "s_register_operand" "=w")
1582 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1583 (match_operand:VD 2 "s_register_operand" "w")))]
1585 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1586 [(set_attr "type" "neon_qsub<q>")]
1589 ;; Conditional instructions. These are comparisons with conditional moves for
1590 ;; vectors. They perform the assignment:
1592 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1594 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1597 (define_expand "vcond<mode><mode>"
1598 [(set (match_operand:VDQW 0 "s_register_operand" "")
1600 (match_operator 3 "comparison_operator"
1601 [(match_operand:VDQW 4 "s_register_operand" "")
1602 (match_operand:VDQW 5 "nonmemory_operand" "")])
1603 (match_operand:VDQW 1 "s_register_operand" "")
1604 (match_operand:VDQW 2 "s_register_operand" "")))]
1605 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1607 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1609 rtx magic_rtx = GEN_INT (magic_word);
1611 int use_zero_form = 0;
1612 int swap_bsl_operands = 0;
1613 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1614 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1616 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1617 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1619 switch (GET_CODE (operands[3]))
1626 if (operands[5] == CONST0_RTX (<MODE>mode))
1633 if (!REG_P (operands[5]))
1634 operands[5] = force_reg (<MODE>mode, operands[5]);
1637 switch (GET_CODE (operands[3]))
1647 base_comparison = gen_neon_vcge<mode>;
1648 complimentary_comparison = gen_neon_vcgt<mode>;
1656 base_comparison = gen_neon_vcgt<mode>;
1657 complimentary_comparison = gen_neon_vcge<mode>;
1662 base_comparison = gen_neon_vceq<mode>;
1663 complimentary_comparison = gen_neon_vceq<mode>;
1669 switch (GET_CODE (operands[3]))
1676 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1677 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1683 Note that there also exist direct comparison against 0 forms,
1684 so catch those as a special case. */
1688 switch (GET_CODE (operands[3]))
1691 base_comparison = gen_neon_vclt<mode>;
1694 base_comparison = gen_neon_vcle<mode>;
1697 /* Do nothing, other zero form cases already have the correct
1704 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1706 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1713 /* Vector compare returns false for lanes which are unordered, so if we use
1714 the inverse of the comparison we actually want to emit, then
1715 swap the operands to BSL, we will end up with the correct result.
1716 Note that a NE NaN and NaN NE b are true for all a, b.
1718 Our transformations are:
1723 a NE b -> !(a EQ b) */
1726 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1728 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1730 swap_bsl_operands = 1;
1733 /* We check (a > b || b > a). combining these comparisons give us
1734 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1735 will then give us (a == b || a UNORDERED b) as intended. */
1737 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1738 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1739 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1740 swap_bsl_operands = 1;
1743 /* Operands are ORDERED iff (a > b || b >= a).
1744 Swapping the operands to BSL will give the UNORDERED case. */
1745 swap_bsl_operands = 1;
1748 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1749 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1750 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1756 if (swap_bsl_operands)
1757 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1760 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1765 (define_expand "vcondu<mode><mode>"
1766 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1768 (match_operator 3 "arm_comparison_operator"
1769 [(match_operand:VDQIW 4 "s_register_operand" "")
1770 (match_operand:VDQIW 5 "s_register_operand" "")])
1771 (match_operand:VDQIW 1 "s_register_operand" "")
1772 (match_operand:VDQIW 2 "s_register_operand" "")))]
1776 int inverse = 0, immediate_zero = 0;
1778 mask = gen_reg_rtx (<V_cmp_result>mode);
1780 if (operands[5] == CONST0_RTX (<MODE>mode))
1782 else if (!REG_P (operands[5]))
1783 operands[5] = force_reg (<MODE>mode, operands[5]);
1785 switch (GET_CODE (operands[3]))
1788 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1793 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1798 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1804 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1807 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1813 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1816 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1821 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1831 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1834 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1840 ;; Patterns for builtins.
1842 ; good for plain vadd, vaddq.
1844 (define_expand "neon_vadd<mode>"
1845 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1846 (match_operand:VCVTF 1 "s_register_operand" "w")
1847 (match_operand:VCVTF 2 "s_register_operand" "w")
1848 (match_operand:SI 3 "immediate_operand" "i")]
1851 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1852 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1854 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1859 ; Note that NEON operations don't support the full IEEE 754 standard: in
1860 ; particular, denormal values are flushed to zero. This means that GCC cannot
1861 ; use those instructions for autovectorization, etc. unless
1862 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1863 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1864 ; header) must work in either case: if -funsafe-math-optimizations is given,
1865 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1866 ; expand to unspecs (which may potentially limit the extent to which they might
1867 ; be optimized by generic code).
1869 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1871 (define_insn "neon_vadd<mode>_unspec"
1872 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1873 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1874 (match_operand:VCVTF 2 "s_register_operand" "w")]
1877 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1879 (if_then_else (match_test "<Is_float_mode>")
1880 (const_string "neon_fp_addsub_s<q>")
1881 (const_string "neon_add<q>")))]
1884 ; operand 3 represents in bits:
1885 ; bit 0: signed (vs unsigned).
1886 ; bit 1: rounding (vs none).
1888 (define_insn "neon_vaddl<mode>"
1889 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1890 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1891 (match_operand:VDI 2 "s_register_operand" "w")
1892 (match_operand:SI 3 "immediate_operand" "i")]
1895 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1896 [(set_attr "type" "neon_add_long")]
1899 (define_insn "neon_vaddw<mode>"
1900 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1901 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1902 (match_operand:VDI 2 "s_register_operand" "w")
1903 (match_operand:SI 3 "immediate_operand" "i")]
1906 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1907 [(set_attr "type" "neon_add_widen")]
1912 (define_insn "neon_vhadd<mode>"
1913 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1914 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1915 (match_operand:VDQIW 2 "s_register_operand" "w")
1916 (match_operand:SI 3 "immediate_operand" "i")]
1919 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1920 [(set_attr "type" "neon_add_halve_q")]
1923 (define_insn "neon_vqadd<mode>"
1924 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1925 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1926 (match_operand:VDQIX 2 "s_register_operand" "w")
1927 (match_operand:SI 3 "immediate_operand" "i")]
1930 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1931 [(set_attr "type" "neon_qadd<q>")]
1934 (define_insn "neon_vaddhn<mode>"
1935 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1936 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1937 (match_operand:VN 2 "s_register_operand" "w")
1938 (match_operand:SI 3 "immediate_operand" "i")]
1941 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1942 [(set_attr "type" "neon_add_halve_narrow_q")]
1945 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1946 ;; polynomial multiplication case that can specified by operand 3.
1947 (define_insn "neon_vmul<mode>"
1948 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1949 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1950 (match_operand:VDQW 2 "s_register_operand" "w")
1951 (match_operand:SI 3 "immediate_operand" "i")]
1954 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1956 (if_then_else (match_test "<Is_float_mode>")
1957 (const_string "neon_fp_mul_s<q>")
1958 (const_string "neon_mul_<V_elem_ch><q>")))]
1961 (define_expand "neon_vmla<mode>"
1962 [(match_operand:VDQW 0 "s_register_operand" "=w")
1963 (match_operand:VDQW 1 "s_register_operand" "0")
1964 (match_operand:VDQW 2 "s_register_operand" "w")
1965 (match_operand:VDQW 3 "s_register_operand" "w")
1966 (match_operand:SI 4 "immediate_operand" "i")]
1969 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1970 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1971 operands[2], operands[3]));
1973 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1974 operands[2], operands[3]));
1978 (define_expand "neon_vfma<VCVTF:mode>"
1979 [(match_operand:VCVTF 0 "s_register_operand")
1980 (match_operand:VCVTF 1 "s_register_operand")
1981 (match_operand:VCVTF 2 "s_register_operand")
1982 (match_operand:VCVTF 3 "s_register_operand")
1983 (match_operand:SI 4 "immediate_operand")]
1984 "TARGET_NEON && TARGET_FMA"
1986 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1991 (define_expand "neon_vfms<VCVTF:mode>"
1992 [(match_operand:VCVTF 0 "s_register_operand")
1993 (match_operand:VCVTF 1 "s_register_operand")
1994 (match_operand:VCVTF 2 "s_register_operand")
1995 (match_operand:VCVTF 3 "s_register_operand")
1996 (match_operand:SI 4 "immediate_operand")]
1997 "TARGET_NEON && TARGET_FMA"
1999 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2004 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2006 (define_insn "neon_vmla<mode>_unspec"
2007 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2008 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2009 (match_operand:VDQW 2 "s_register_operand" "w")
2010 (match_operand:VDQW 3 "s_register_operand" "w")]
2013 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2015 (if_then_else (match_test "<Is_float_mode>")
2016 (const_string "neon_fp_mla_s<q>")
2017 (const_string "neon_mla_<V_elem_ch><q>")))]
2020 (define_insn "neon_vmlal<mode>"
2021 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2022 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2023 (match_operand:VW 2 "s_register_operand" "w")
2024 (match_operand:VW 3 "s_register_operand" "w")
2025 (match_operand:SI 4 "immediate_operand" "i")]
2028 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2029 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2032 (define_expand "neon_vmls<mode>"
2033 [(match_operand:VDQW 0 "s_register_operand" "=w")
2034 (match_operand:VDQW 1 "s_register_operand" "0")
2035 (match_operand:VDQW 2 "s_register_operand" "w")
2036 (match_operand:VDQW 3 "s_register_operand" "w")
2037 (match_operand:SI 4 "immediate_operand" "i")]
2040 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2041 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2042 operands[1], operands[2], operands[3]));
2044 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2045 operands[2], operands[3]));
2049 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2051 (define_insn "neon_vmls<mode>_unspec"
2052 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2053 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2054 (match_operand:VDQW 2 "s_register_operand" "w")
2055 (match_operand:VDQW 3 "s_register_operand" "w")]
2058 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2060 (if_then_else (match_test "<Is_float_mode>")
2061 (const_string "neon_fp_mla_s<q>")
2062 (const_string "neon_mla_<V_elem_ch><q>")))]
2065 (define_insn "neon_vmlsl<mode>"
2066 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2067 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2068 (match_operand:VW 2 "s_register_operand" "w")
2069 (match_operand:VW 3 "s_register_operand" "w")
2070 (match_operand:SI 4 "immediate_operand" "i")]
2073 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2074 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2077 (define_insn "neon_vqdmulh<mode>"
2078 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2079 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2080 (match_operand:VMDQI 2 "s_register_operand" "w")
2081 (match_operand:SI 3 "immediate_operand" "i")]
2084 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2085 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2088 (define_insn "neon_vqdmlal<mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2091 (match_operand:VMDI 2 "s_register_operand" "w")
2092 (match_operand:VMDI 3 "s_register_operand" "w")
2093 (match_operand:SI 4 "immediate_operand" "i")]
2096 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2097 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2100 (define_insn "neon_vqdmlsl<mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2103 (match_operand:VMDI 2 "s_register_operand" "w")
2104 (match_operand:VMDI 3 "s_register_operand" "w")
2105 (match_operand:SI 4 "immediate_operand" "i")]
2108 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2109 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2112 (define_insn "neon_vmull<mode>"
2113 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2114 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2115 (match_operand:VW 2 "s_register_operand" "w")
2116 (match_operand:SI 3 "immediate_operand" "i")]
2119 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2120 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2123 (define_insn "neon_vqdmull<mode>"
2124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2125 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2126 (match_operand:VMDI 2 "s_register_operand" "w")
2127 (match_operand:SI 3 "immediate_operand" "i")]
2130 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2131 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2134 (define_expand "neon_vsub<mode>"
2135 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2136 (match_operand:VCVTF 1 "s_register_operand" "w")
2137 (match_operand:VCVTF 2 "s_register_operand" "w")
2138 (match_operand:SI 3 "immediate_operand" "i")]
2141 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2142 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2144 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2149 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2151 (define_insn "neon_vsub<mode>_unspec"
2152 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2153 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2154 (match_operand:VCVTF 2 "s_register_operand" "w")]
2157 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2159 (if_then_else (match_test "<Is_float_mode>")
2160 (const_string "neon_fp_addsub_s<q>")
2161 (const_string "neon_sub<q>")))]
2164 (define_insn "neon_vsubl<mode>"
2165 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2166 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2167 (match_operand:VDI 2 "s_register_operand" "w")
2168 (match_operand:SI 3 "immediate_operand" "i")]
2171 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2172 [(set_attr "type" "neon_sub_long")]
2175 (define_insn "neon_vsubw<mode>"
2176 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2177 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2178 (match_operand:VDI 2 "s_register_operand" "w")
2179 (match_operand:SI 3 "immediate_operand" "i")]
2182 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2183 [(set_attr "type" "neon_sub_widen")]
2186 (define_insn "neon_vqsub<mode>"
2187 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2188 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2189 (match_operand:VDQIX 2 "s_register_operand" "w")
2190 (match_operand:SI 3 "immediate_operand" "i")]
2193 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2194 [(set_attr "type" "neon_qsub<q>")]
2197 (define_insn "neon_vhsub<mode>"
2198 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2199 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2200 (match_operand:VDQIW 2 "s_register_operand" "w")
2201 (match_operand:SI 3 "immediate_operand" "i")]
2204 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2205 [(set_attr "type" "neon_sub_halve<q>")]
2208 (define_insn "neon_vsubhn<mode>"
2209 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2210 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2211 (match_operand:VN 2 "s_register_operand" "w")
2212 (match_operand:SI 3 "immediate_operand" "i")]
2215 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2216 [(set_attr "type" "neon_sub_halve_narrow_q")]
2219 (define_insn "neon_vceq<mode>"
2220 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2221 (unspec:<V_cmp_result>
2222 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2223 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2224 (match_operand:SI 3 "immediate_operand" "i,i")]
2228 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2229 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2231 (if_then_else (match_test "<Is_float_mode>")
2232 (const_string "neon_fp_compare_s<q>")
2233 (if_then_else (match_operand 2 "zero_operand")
2234 (const_string "neon_compare_zero<q>")
2235 (const_string "neon_compare<q>"))))]
2238 (define_insn "neon_vcge<mode>"
2239 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2240 (unspec:<V_cmp_result>
2241 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2242 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2243 (match_operand:SI 3 "immediate_operand" "i,i")]
2247 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2248 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2250 (if_then_else (match_test "<Is_float_mode>")
2251 (const_string "neon_fp_compare_s<q>")
2252 (if_then_else (match_operand 2 "zero_operand")
2253 (const_string "neon_compare_zero<q>")
2254 (const_string "neon_compare<q>"))))]
2257 (define_insn "neon_vcgeu<mode>"
2258 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2259 (unspec:<V_cmp_result>
2260 [(match_operand:VDQIW 1 "s_register_operand" "w")
2261 (match_operand:VDQIW 2 "s_register_operand" "w")
2262 (match_operand:SI 3 "immediate_operand" "i")]
2265 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2266 [(set_attr "type" "neon_compare<q>")]
2269 (define_insn "neon_vcgt<mode>"
2270 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2271 (unspec:<V_cmp_result>
2272 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2273 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2274 (match_operand:SI 3 "immediate_operand" "i,i")]
2278 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2279 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2281 (if_then_else (match_test "<Is_float_mode>")
2282 (const_string "neon_fp_compare_s<q>")
2283 (if_then_else (match_operand 2 "zero_operand")
2284 (const_string "neon_compare_zero<q>")
2285 (const_string "neon_compare<q>"))))]
2288 (define_insn "neon_vcgtu<mode>"
2289 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2290 (unspec:<V_cmp_result>
2291 [(match_operand:VDQIW 1 "s_register_operand" "w")
2292 (match_operand:VDQIW 2 "s_register_operand" "w")
2293 (match_operand:SI 3 "immediate_operand" "i")]
2296 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2297 [(set_attr "type" "neon_compare<q>")]
2300 ;; VCLE and VCLT only support comparisons with immediate zero (register
2301 ;; variants are VCGE and VCGT with operands reversed).
2303 (define_insn "neon_vcle<mode>"
2304 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2305 (unspec:<V_cmp_result>
2306 [(match_operand:VDQW 1 "s_register_operand" "w")
2307 (match_operand:VDQW 2 "zero_operand" "Dz")
2308 (match_operand:SI 3 "immediate_operand" "i")]
2311 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2313 (if_then_else (match_test "<Is_float_mode>")
2314 (const_string "neon_fp_compare_s<q>")
2315 (if_then_else (match_operand 2 "zero_operand")
2316 (const_string "neon_compare_zero<q>")
2317 (const_string "neon_compare<q>"))))]
2320 (define_insn "neon_vclt<mode>"
2321 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2322 (unspec:<V_cmp_result>
2323 [(match_operand:VDQW 1 "s_register_operand" "w")
2324 (match_operand:VDQW 2 "zero_operand" "Dz")
2325 (match_operand:SI 3 "immediate_operand" "i")]
2328 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2330 (if_then_else (match_test "<Is_float_mode>")
2331 (const_string "neon_fp_compare_s<q>")
2332 (if_then_else (match_operand 2 "zero_operand")
2333 (const_string "neon_compare_zero<q>")
2334 (const_string "neon_compare<q>"))))]
2337 (define_insn "neon_vcage<mode>"
2338 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2339 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2340 (match_operand:VCVTF 2 "s_register_operand" "w")
2341 (match_operand:SI 3 "immediate_operand" "i")]
2344 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2345 [(set_attr "type" "neon_fp_compare_s<q>")]
2348 (define_insn "neon_vcagt<mode>"
2349 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2350 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2351 (match_operand:VCVTF 2 "s_register_operand" "w")
2352 (match_operand:SI 3 "immediate_operand" "i")]
2355 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_fp_compare_s<q>")]
2359 (define_insn "neon_vtst<mode>"
2360 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2361 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2362 (match_operand:VDQIW 2 "s_register_operand" "w")
2363 (match_operand:SI 3 "immediate_operand" "i")]
2366 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2367 [(set_attr "type" "neon_tst<q>")]
2370 (define_insn "neon_vabd<mode>"
2371 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2372 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2373 (match_operand:VDQW 2 "s_register_operand" "w")
2374 (match_operand:SI 3 "immediate_operand" "i")]
2377 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 (if_then_else (match_test "<Is_float_mode>")
2380 (const_string "neon_fp_abd_s<q>")
2381 (const_string "neon_abd<q>")))]
2384 (define_insn "neon_vabdl<mode>"
2385 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2386 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2387 (match_operand:VW 2 "s_register_operand" "w")
2388 (match_operand:SI 3 "immediate_operand" "i")]
2391 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2392 [(set_attr "type" "neon_abd_long")]
2395 (define_insn "neon_vaba<mode>"
2396 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2397 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2398 (match_operand:VDQIW 3 "s_register_operand" "w")
2399 (match_operand:SI 4 "immediate_operand" "i")]
2401 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2403 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2404 [(set_attr "type" "neon_arith_acc<q>")]
2407 (define_insn "neon_vabal<mode>"
2408 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2409 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2410 (match_operand:VW 3 "s_register_operand" "w")
2411 (match_operand:SI 4 "immediate_operand" "i")]
2413 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2415 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2416 [(set_attr "type" "neon_arith_acc<q>")]
2419 (define_insn "neon_vmax<mode>"
2420 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2421 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2422 (match_operand:VDQW 2 "s_register_operand" "w")
2423 (match_operand:SI 3 "immediate_operand" "i")]
2426 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2428 (if_then_else (match_test "<Is_float_mode>")
2429 (const_string "neon_fp_minmax_s<q>")
2430 (const_string "neon_minmax<q>")))]
2433 (define_insn "neon_vmin<mode>"
2434 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2435 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2436 (match_operand:VDQW 2 "s_register_operand" "w")
2437 (match_operand:SI 3 "immediate_operand" "i")]
2440 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2442 (if_then_else (match_test "<Is_float_mode>")
2443 (const_string "neon_fp_minmax_s<q>")
2444 (const_string "neon_minmax<q>")))]
2447 (define_expand "neon_vpadd<mode>"
2448 [(match_operand:VD 0 "s_register_operand" "=w")
2449 (match_operand:VD 1 "s_register_operand" "w")
2450 (match_operand:VD 2 "s_register_operand" "w")
2451 (match_operand:SI 3 "immediate_operand" "i")]
2454 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2459 (define_insn "neon_vpaddl<mode>"
2460 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2461 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2462 (match_operand:SI 2 "immediate_operand" "i")]
2465 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2466 [(set_attr "type" "neon_reduc_add_long")]
2469 (define_insn "neon_vpadal<mode>"
2470 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2471 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2472 (match_operand:VDQIW 2 "s_register_operand" "w")
2473 (match_operand:SI 3 "immediate_operand" "i")]
2476 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2477 [(set_attr "type" "neon_reduc_add_acc")]
2480 (define_insn "neon_vpmax<mode>"
2481 [(set (match_operand:VD 0 "s_register_operand" "=w")
2482 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2483 (match_operand:VD 2 "s_register_operand" "w")
2484 (match_operand:SI 3 "immediate_operand" "i")]
2487 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2489 (if_then_else (match_test "<Is_float_mode>")
2490 (const_string "neon_fp_reduc_minmax_s<q>")
2491 (const_string "neon_reduc_minmax<q>")))]
2494 (define_insn "neon_vpmin<mode>"
2495 [(set (match_operand:VD 0 "s_register_operand" "=w")
2496 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2497 (match_operand:VD 2 "s_register_operand" "w")
2498 (match_operand:SI 3 "immediate_operand" "i")]
2501 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2503 (if_then_else (match_test "<Is_float_mode>")
2504 (const_string "neon_fp_reduc_minmax_s<q>")
2505 (const_string "neon_reduc_minmax<q>")))]
2508 (define_insn "neon_vrecps<mode>"
2509 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2510 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2511 (match_operand:VCVTF 2 "s_register_operand" "w")
2512 (match_operand:SI 3 "immediate_operand" "i")]
2515 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2516 [(set_attr "type" "neon_fp_recps_s<q>")]
2519 (define_insn "neon_vrsqrts<mode>"
2520 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2521 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2522 (match_operand:VCVTF 2 "s_register_operand" "w")
2523 (match_operand:SI 3 "immediate_operand" "i")]
2526 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2527 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2530 (define_expand "neon_vabs<mode>"
2531 [(match_operand:VDQW 0 "s_register_operand" "")
2532 (match_operand:VDQW 1 "s_register_operand" "")
2533 (match_operand:SI 2 "immediate_operand" "")]
2536 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2540 (define_insn "neon_vqabs<mode>"
2541 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2542 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2543 (match_operand:SI 2 "immediate_operand" "i")]
2546 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_qabs<q>")]
2550 (define_insn "neon_bswap<mode>"
2551 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2552 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2554 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2555 [(set_attr "type" "neon_rev<q>")]
2558 (define_expand "neon_vneg<mode>"
2559 [(match_operand:VDQW 0 "s_register_operand" "")
2560 (match_operand:VDQW 1 "s_register_operand" "")
2561 (match_operand:SI 2 "immediate_operand" "")]
2564 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2568 (define_insn "neon_vqneg<mode>"
2569 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2570 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2571 (match_operand:SI 2 "immediate_operand" "i")]
2574 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2575 [(set_attr "type" "neon_qneg<q>")]
2578 (define_insn "neon_vcls<mode>"
2579 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2580 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2581 (match_operand:SI 2 "immediate_operand" "i")]
2584 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2585 [(set_attr "type" "neon_cls<q>")]
2588 (define_insn "clz<mode>2"
2589 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2590 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2592 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2593 [(set_attr "type" "neon_cnt<q>")]
2596 (define_expand "neon_vclz<mode>"
2597 [(match_operand:VDQIW 0 "s_register_operand" "")
2598 (match_operand:VDQIW 1 "s_register_operand" "")
2599 (match_operand:SI 2 "immediate_operand" "")]
2602 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2606 (define_insn "popcount<mode>2"
2607 [(set (match_operand:VE 0 "s_register_operand" "=w")
2608 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2610 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2611 [(set_attr "type" "neon_cnt<q>")]
2614 (define_expand "neon_vcnt<mode>"
2615 [(match_operand:VE 0 "s_register_operand" "=w")
2616 (match_operand:VE 1 "s_register_operand" "w")
2617 (match_operand:SI 2 "immediate_operand" "i")]
2620 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2624 (define_insn "neon_vrecpe<mode>"
2625 [(set (match_operand:V32 0 "s_register_operand" "=w")
2626 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2627 (match_operand:SI 2 "immediate_operand" "i")]
2630 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2631 [(set_attr "type" "neon_fp_recpe_s<q>")]
2634 (define_insn "neon_vrsqrte<mode>"
2635 [(set (match_operand:V32 0 "s_register_operand" "=w")
2636 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2637 (match_operand:SI 2 "immediate_operand" "i")]
2640 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2641 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2644 (define_expand "neon_vmvn<mode>"
2645 [(match_operand:VDQIW 0 "s_register_operand" "")
2646 (match_operand:VDQIW 1 "s_register_operand" "")
2647 (match_operand:SI 2 "immediate_operand" "")]
2650 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2654 (define_insn "neon_vget_lane<mode>_sext_internal"
2655 [(set (match_operand:SI 0 "s_register_operand" "=r")
2657 (vec_select:<V_elem>
2658 (match_operand:VD 1 "s_register_operand" "w")
2659 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2662 if (BYTES_BIG_ENDIAN)
2664 int elt = INTVAL (operands[2]);
2665 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2666 operands[2] = GEN_INT (elt);
2668 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2670 [(set_attr "type" "neon_to_gp")]
2673 (define_insn "neon_vget_lane<mode>_zext_internal"
2674 [(set (match_operand:SI 0 "s_register_operand" "=r")
2676 (vec_select:<V_elem>
2677 (match_operand:VD 1 "s_register_operand" "w")
2678 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2681 if (BYTES_BIG_ENDIAN)
2683 int elt = INTVAL (operands[2]);
2684 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2685 operands[2] = GEN_INT (elt);
2687 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2689 [(set_attr "type" "neon_to_gp")]
2692 (define_insn "neon_vget_lane<mode>_sext_internal"
2693 [(set (match_operand:SI 0 "s_register_operand" "=r")
2695 (vec_select:<V_elem>
2696 (match_operand:VQ 1 "s_register_operand" "w")
2697 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2701 int regno = REGNO (operands[1]);
2702 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2703 unsigned int elt = INTVAL (operands[2]);
2704 unsigned int elt_adj = elt % halfelts;
2706 if (BYTES_BIG_ENDIAN)
2707 elt_adj = halfelts - 1 - elt_adj;
2709 ops[0] = operands[0];
2710 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2711 ops[2] = GEN_INT (elt_adj);
2712 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2716 [(set_attr "type" "neon_to_gp_q")]
2719 (define_insn "neon_vget_lane<mode>_zext_internal"
2720 [(set (match_operand:SI 0 "s_register_operand" "=r")
2722 (vec_select:<V_elem>
2723 (match_operand:VQ 1 "s_register_operand" "w")
2724 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2728 int regno = REGNO (operands[1]);
2729 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2730 unsigned int elt = INTVAL (operands[2]);
2731 unsigned int elt_adj = elt % halfelts;
2733 if (BYTES_BIG_ENDIAN)
2734 elt_adj = halfelts - 1 - elt_adj;
2736 ops[0] = operands[0];
2737 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2738 ops[2] = GEN_INT (elt_adj);
2739 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2743 [(set_attr "type" "neon_to_gp_q")]
2746 (define_expand "neon_vget_lane<mode>"
2747 [(match_operand:<V_ext> 0 "s_register_operand" "")
2748 (match_operand:VDQW 1 "s_register_operand" "")
2749 (match_operand:SI 2 "immediate_operand" "")
2750 (match_operand:SI 3 "immediate_operand" "")]
2753 HOST_WIDE_INT magic = INTVAL (operands[3]);
2756 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2758 if (BYTES_BIG_ENDIAN)
2760 /* The intrinsics are defined in terms of a model where the
2761 element ordering in memory is vldm order, whereas the generic
2762 RTL is defined in terms of a model where the element ordering
2763 in memory is array order. Convert the lane number to conform
2765 unsigned int elt = INTVAL (operands[2]);
2766 unsigned int reg_nelts
2767 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2768 elt ^= reg_nelts - 1;
2769 operands[2] = GEN_INT (elt);
2772 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2773 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2776 if ((magic & 1) != 0)
2777 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2780 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2787 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2790 (define_expand "neon_vget_lanedi"
2791 [(match_operand:DI 0 "s_register_operand" "=r")
2792 (match_operand:DI 1 "s_register_operand" "w")
2793 (match_operand:SI 2 "immediate_operand" "i")
2794 (match_operand:SI 3 "immediate_operand" "i")]
2797 neon_lane_bounds (operands[2], 0, 1);
2798 emit_move_insn (operands[0], operands[1]);
2802 (define_expand "neon_vget_lanev2di"
2803 [(match_operand:DI 0 "s_register_operand" "")
2804 (match_operand:V2DI 1 "s_register_operand" "")
2805 (match_operand:SI 2 "immediate_operand" "")
2806 (match_operand:SI 3 "immediate_operand" "")]
2809 switch (INTVAL (operands[2]))
2812 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2815 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2818 neon_lane_bounds (operands[2], 0, 1);
2824 (define_expand "neon_vset_lane<mode>"
2825 [(match_operand:VDQ 0 "s_register_operand" "=w")
2826 (match_operand:<V_elem> 1 "s_register_operand" "r")
2827 (match_operand:VDQ 2 "s_register_operand" "0")
2828 (match_operand:SI 3 "immediate_operand" "i")]
2831 unsigned int elt = INTVAL (operands[3]);
2832 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2834 if (BYTES_BIG_ENDIAN)
2836 unsigned int reg_nelts
2837 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2838 elt ^= reg_nelts - 1;
2841 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2842 GEN_INT (1 << elt), operands[2]));
2846 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2848 (define_expand "neon_vset_lanedi"
2849 [(match_operand:DI 0 "s_register_operand" "=w")
2850 (match_operand:DI 1 "s_register_operand" "r")
2851 (match_operand:DI 2 "s_register_operand" "0")
2852 (match_operand:SI 3 "immediate_operand" "i")]
2855 neon_lane_bounds (operands[3], 0, 1);
2856 emit_move_insn (operands[0], operands[1]);
2860 (define_expand "neon_vcreate<mode>"
2861 [(match_operand:VDX 0 "s_register_operand" "")
2862 (match_operand:DI 1 "general_operand" "")]
2865 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2866 emit_move_insn (operands[0], src);
2870 (define_insn "neon_vdup_n<mode>"
2871 [(set (match_operand:VX 0 "s_register_operand" "=w")
2872 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2874 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2875 [(set_attr "type" "neon_from_gp<q>")]
2878 (define_insn "neon_vdup_n<mode>"
2879 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2880 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2883 vdup.<V_sz_elem>\t%<V_reg>0, %1
2884 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2885 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2888 (define_expand "neon_vdup_ndi"
2889 [(match_operand:DI 0 "s_register_operand" "=w")
2890 (match_operand:DI 1 "s_register_operand" "r")]
2893 emit_move_insn (operands[0], operands[1]);
2898 (define_insn "neon_vdup_nv2di"
2899 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2900 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2903 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2904 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2905 [(set_attr "length" "8")
2906 (set_attr "type" "multiple")]
2909 (define_insn "neon_vdup_lane<mode>_internal"
2910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2912 (vec_select:<V_elem>
2913 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2914 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2917 if (BYTES_BIG_ENDIAN)
2919 int elt = INTVAL (operands[2]);
2920 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2921 operands[2] = GEN_INT (elt);
2924 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2926 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2928 [(set_attr "type" "neon_dup<q>")]
2931 (define_expand "neon_vdup_lane<mode>"
2932 [(match_operand:VDQW 0 "s_register_operand" "=w")
2933 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2934 (match_operand:SI 2 "immediate_operand" "i")]
2937 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2938 if (BYTES_BIG_ENDIAN)
2940 unsigned int elt = INTVAL (operands[2]);
2941 unsigned int reg_nelts
2942 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2943 elt ^= reg_nelts - 1;
2944 operands[2] = GEN_INT (elt);
2946 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2951 ; Scalar index is ignored, since only zero is valid here.
2952 (define_expand "neon_vdup_lanedi"
2953 [(match_operand:DI 0 "s_register_operand" "=w")
2954 (match_operand:DI 1 "s_register_operand" "w")
2955 (match_operand:SI 2 "immediate_operand" "i")]
2958 neon_lane_bounds (operands[2], 0, 1);
2959 emit_move_insn (operands[0], operands[1]);
2963 ; Likewise for v2di, as the DImode second operand has only a single element.
2964 (define_expand "neon_vdup_lanev2di"
2965 [(match_operand:V2DI 0 "s_register_operand" "=w")
2966 (match_operand:DI 1 "s_register_operand" "w")
2967 (match_operand:SI 2 "immediate_operand" "i")]
2970 neon_lane_bounds (operands[2], 0, 1);
2971 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2975 ; Disabled before reload because we don't want combine doing something silly,
2976 ; but used by the post-reload expansion of neon_vcombine.
2977 (define_insn "*neon_vswp<mode>"
2978 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2979 (match_operand:VDQX 1 "s_register_operand" "+w"))
2980 (set (match_dup 1) (match_dup 0))]
2981 "TARGET_NEON && reload_completed"
2982 "vswp\t%<V_reg>0, %<V_reg>1"
2983 [(set_attr "type" "neon_permute<q>")]
2986 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2988 ;; FIXME: A different implementation of this builtin could make it much
2989 ;; more likely that we wouldn't actually need to output anything (we could make
2990 ;; it so that the reg allocator puts things in the right places magically
2991 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2993 (define_insn_and_split "neon_vcombine<mode>"
2994 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2995 (vec_concat:<V_DOUBLE>
2996 (match_operand:VDX 1 "s_register_operand" "w")
2997 (match_operand:VDX 2 "s_register_operand" "w")))]
3000 "&& reload_completed"
3003 neon_split_vcombine (operands);
3006 [(set_attr "type" "multiple")]
3009 (define_expand "neon_vget_high<mode>"
3010 [(match_operand:<V_HALF> 0 "s_register_operand")
3011 (match_operand:VQX 1 "s_register_operand")]
3014 emit_move_insn (operands[0],
3015 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3016 GET_MODE_SIZE (<V_HALF>mode)));
3020 (define_expand "neon_vget_low<mode>"
3021 [(match_operand:<V_HALF> 0 "s_register_operand")
3022 (match_operand:VQX 1 "s_register_operand")]
3025 emit_move_insn (operands[0],
3026 simplify_gen_subreg (<V_HALF>mode, operands[1],
3031 (define_insn "float<mode><V_cvtto>2"
3032 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3033 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3034 "TARGET_NEON && !flag_rounding_math"
3035 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3036 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3039 (define_insn "floatuns<mode><V_cvtto>2"
3040 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3041 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3042 "TARGET_NEON && !flag_rounding_math"
3043 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3044 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3047 (define_insn "fix_trunc<mode><V_cvtto>2"
3048 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3049 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3051 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3052 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3055 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3056 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3057 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3059 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3060 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3063 (define_insn "neon_vcvt<mode>"
3064 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3065 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3066 (match_operand:SI 2 "immediate_operand" "i")]
3069 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3070 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3073 (define_insn "neon_vcvt<mode>"
3074 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3075 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3076 (match_operand:SI 2 "immediate_operand" "i")]
3079 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3080 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3083 (define_insn "neon_vcvtv4sfv4hf"
3084 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3085 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3087 "TARGET_NEON && TARGET_FP16"
3088 "vcvt.f32.f16\t%q0, %P1"
3089 [(set_attr "type" "neon_fp_cvt_widen_h")]
3092 (define_insn "neon_vcvtv4hfv4sf"
3093 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3094 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3096 "TARGET_NEON && TARGET_FP16"
3097 "vcvt.f16.f32\t%P0, %q1"
3098 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3101 (define_insn "neon_vcvt_n<mode>"
3102 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3103 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3104 (match_operand:SI 2 "immediate_operand" "i")
3105 (match_operand:SI 3 "immediate_operand" "i")]
3109 neon_const_bounds (operands[2], 1, 33);
3110 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3112 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3115 (define_insn "neon_vcvt_n<mode>"
3116 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3117 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3118 (match_operand:SI 2 "immediate_operand" "i")
3119 (match_operand:SI 3 "immediate_operand" "i")]
3123 neon_const_bounds (operands[2], 1, 33);
3124 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3126 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3129 (define_insn "neon_vmovn<mode>"
3130 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3131 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3132 (match_operand:SI 2 "immediate_operand" "i")]
3135 "vmovn.<V_if_elem>\t%P0, %q1"
3136 [(set_attr "type" "neon_shift_imm_narrow_q")]
3139 (define_insn "neon_vqmovn<mode>"
3140 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3141 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3142 (match_operand:SI 2 "immediate_operand" "i")]
3145 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3146 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3149 (define_insn "neon_vqmovun<mode>"
3150 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3151 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3152 (match_operand:SI 2 "immediate_operand" "i")]
3155 "vqmovun.<V_s_elem>\t%P0, %q1"
3156 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3159 (define_insn "neon_vmovl<mode>"
3160 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3161 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3162 (match_operand:SI 2 "immediate_operand" "i")]
3165 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3166 [(set_attr "type" "neon_shift_imm_long")]
3169 (define_insn "neon_vmul_lane<mode>"
3170 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3171 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3172 (match_operand:VMD 2 "s_register_operand"
3173 "<scalar_mul_constraint>")
3174 (match_operand:SI 3 "immediate_operand" "i")
3175 (match_operand:SI 4 "immediate_operand" "i")]
3179 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3180 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3183 (if_then_else (match_test "<Is_float_mode>")
3184 (const_string "neon_fp_mul_s_scalar<q>")
3185 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3188 (define_insn "neon_vmul_lane<mode>"
3189 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3190 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3191 (match_operand:<V_HALF> 2 "s_register_operand"
3192 "<scalar_mul_constraint>")
3193 (match_operand:SI 3 "immediate_operand" "i")
3194 (match_operand:SI 4 "immediate_operand" "i")]
3198 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3199 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3202 (if_then_else (match_test "<Is_float_mode>")
3203 (const_string "neon_fp_mul_s_scalar<q>")
3204 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3207 (define_insn "neon_vmull_lane<mode>"
3208 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3209 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3210 (match_operand:VMDI 2 "s_register_operand"
3211 "<scalar_mul_constraint>")
3212 (match_operand:SI 3 "immediate_operand" "i")
3213 (match_operand:SI 4 "immediate_operand" "i")]
3214 UNSPEC_VMULL_LANE))]
3217 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3218 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3220 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3223 (define_insn "neon_vqdmull_lane<mode>"
3224 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3225 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3226 (match_operand:VMDI 2 "s_register_operand"
3227 "<scalar_mul_constraint>")
3228 (match_operand:SI 3 "immediate_operand" "i")
3229 (match_operand:SI 4 "immediate_operand" "i")]
3230 UNSPEC_VQDMULL_LANE))]
3233 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3234 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3236 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3239 (define_insn "neon_vqdmulh_lane<mode>"
3240 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3241 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3242 (match_operand:<V_HALF> 2 "s_register_operand"
3243 "<scalar_mul_constraint>")
3244 (match_operand:SI 3 "immediate_operand" "i")
3245 (match_operand:SI 4 "immediate_operand" "i")]
3246 UNSPEC_VQDMULH_LANE))]
3249 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3250 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3252 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3255 (define_insn "neon_vqdmulh_lane<mode>"
3256 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3257 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3258 (match_operand:VMDI 2 "s_register_operand"
3259 "<scalar_mul_constraint>")
3260 (match_operand:SI 3 "immediate_operand" "i")
3261 (match_operand:SI 4 "immediate_operand" "i")]
3262 UNSPEC_VQDMULH_LANE))]
3265 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3266 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3268 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3271 (define_insn "neon_vmla_lane<mode>"
3272 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3273 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3274 (match_operand:VMD 2 "s_register_operand" "w")
3275 (match_operand:VMD 3 "s_register_operand"
3276 "<scalar_mul_constraint>")
3277 (match_operand:SI 4 "immediate_operand" "i")
3278 (match_operand:SI 5 "immediate_operand" "i")]
3282 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3283 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3286 (if_then_else (match_test "<Is_float_mode>")
3287 (const_string "neon_fp_mla_s_scalar<q>")
3288 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3291 (define_insn "neon_vmla_lane<mode>"
3292 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3293 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3294 (match_operand:VMQ 2 "s_register_operand" "w")
3295 (match_operand:<V_HALF> 3 "s_register_operand"
3296 "<scalar_mul_constraint>")
3297 (match_operand:SI 4 "immediate_operand" "i")
3298 (match_operand:SI 5 "immediate_operand" "i")]
3302 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3303 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3306 (if_then_else (match_test "<Is_float_mode>")
3307 (const_string "neon_fp_mla_s_scalar<q>")
3308 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3311 (define_insn "neon_vmlal_lane<mode>"
3312 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3313 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3314 (match_operand:VMDI 2 "s_register_operand" "w")
3315 (match_operand:VMDI 3 "s_register_operand"
3316 "<scalar_mul_constraint>")
3317 (match_operand:SI 4 "immediate_operand" "i")
3318 (match_operand:SI 5 "immediate_operand" "i")]
3319 UNSPEC_VMLAL_LANE))]
3322 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3323 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3325 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3328 (define_insn "neon_vqdmlal_lane<mode>"
3329 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3330 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3331 (match_operand:VMDI 2 "s_register_operand" "w")
3332 (match_operand:VMDI 3 "s_register_operand"
3333 "<scalar_mul_constraint>")
3334 (match_operand:SI 4 "immediate_operand" "i")
3335 (match_operand:SI 5 "immediate_operand" "i")]
3336 UNSPEC_VQDMLAL_LANE))]
3339 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3340 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3342 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3345 (define_insn "neon_vmls_lane<mode>"
3346 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3347 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3348 (match_operand:VMD 2 "s_register_operand" "w")
3349 (match_operand:VMD 3 "s_register_operand"
3350 "<scalar_mul_constraint>")
3351 (match_operand:SI 4 "immediate_operand" "i")
3352 (match_operand:SI 5 "immediate_operand" "i")]
3356 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3357 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3360 (if_then_else (match_test "<Is_float_mode>")
3361 (const_string "neon_fp_mla_s_scalar<q>")
3362 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3365 (define_insn "neon_vmls_lane<mode>"
3366 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3367 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3368 (match_operand:VMQ 2 "s_register_operand" "w")
3369 (match_operand:<V_HALF> 3 "s_register_operand"
3370 "<scalar_mul_constraint>")
3371 (match_operand:SI 4 "immediate_operand" "i")
3372 (match_operand:SI 5 "immediate_operand" "i")]
3376 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3377 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3380 (if_then_else (match_test "<Is_float_mode>")
3381 (const_string "neon_fp_mla_s_scalar<q>")
3382 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3385 (define_insn "neon_vmlsl_lane<mode>"
3386 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3387 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3388 (match_operand:VMDI 2 "s_register_operand" "w")
3389 (match_operand:VMDI 3 "s_register_operand"
3390 "<scalar_mul_constraint>")
3391 (match_operand:SI 4 "immediate_operand" "i")
3392 (match_operand:SI 5 "immediate_operand" "i")]
3393 UNSPEC_VMLSL_LANE))]
3396 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3397 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3399 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3402 (define_insn "neon_vqdmlsl_lane<mode>"
3403 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3404 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3405 (match_operand:VMDI 2 "s_register_operand" "w")
3406 (match_operand:VMDI 3 "s_register_operand"
3407 "<scalar_mul_constraint>")
3408 (match_operand:SI 4 "immediate_operand" "i")
3409 (match_operand:SI 5 "immediate_operand" "i")]
3410 UNSPEC_VQDMLSL_LANE))]
3413 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3414 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3416 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3419 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3420 ; core register into a temp register, then use a scalar taken from that. This
3421 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3422 ; or extracted from another vector. The latter case it's currently better to
3423 ; use the "_lane" variant, and the former case can probably be implemented
3424 ; using vld1_lane, but that hasn't been done yet.
3426 (define_expand "neon_vmul_n<mode>"
3427 [(match_operand:VMD 0 "s_register_operand" "")
3428 (match_operand:VMD 1 "s_register_operand" "")
3429 (match_operand:<V_elem> 2 "s_register_operand" "")
3430 (match_operand:SI 3 "immediate_operand" "")]
3433 rtx tmp = gen_reg_rtx (<MODE>mode);
3434 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3435 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3436 const0_rtx, const0_rtx));
3440 (define_expand "neon_vmul_n<mode>"
3441 [(match_operand:VMQ 0 "s_register_operand" "")
3442 (match_operand:VMQ 1 "s_register_operand" "")
3443 (match_operand:<V_elem> 2 "s_register_operand" "")
3444 (match_operand:SI 3 "immediate_operand" "")]
3447 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3448 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3449 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3450 const0_rtx, const0_rtx));
3454 (define_expand "neon_vmull_n<mode>"
3455 [(match_operand:<V_widen> 0 "s_register_operand" "")
3456 (match_operand:VMDI 1 "s_register_operand" "")
3457 (match_operand:<V_elem> 2 "s_register_operand" "")
3458 (match_operand:SI 3 "immediate_operand" "")]
3461 rtx tmp = gen_reg_rtx (<MODE>mode);
3462 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3463 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3464 const0_rtx, operands[3]));
3468 (define_expand "neon_vqdmull_n<mode>"
3469 [(match_operand:<V_widen> 0 "s_register_operand" "")
3470 (match_operand:VMDI 1 "s_register_operand" "")
3471 (match_operand:<V_elem> 2 "s_register_operand" "")
3472 (match_operand:SI 3 "immediate_operand" "")]
3475 rtx tmp = gen_reg_rtx (<MODE>mode);
3476 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3477 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3478 const0_rtx, const0_rtx));
3482 (define_expand "neon_vqdmulh_n<mode>"
3483 [(match_operand:VMDI 0 "s_register_operand" "")
3484 (match_operand:VMDI 1 "s_register_operand" "")
3485 (match_operand:<V_elem> 2 "s_register_operand" "")
3486 (match_operand:SI 3 "immediate_operand" "")]
3489 rtx tmp = gen_reg_rtx (<MODE>mode);
3490 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3491 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3492 const0_rtx, operands[3]));
3496 (define_expand "neon_vqdmulh_n<mode>"
3497 [(match_operand:VMQI 0 "s_register_operand" "")
3498 (match_operand:VMQI 1 "s_register_operand" "")
3499 (match_operand:<V_elem> 2 "s_register_operand" "")
3500 (match_operand:SI 3 "immediate_operand" "")]
3503 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3504 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3505 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3506 const0_rtx, operands[3]));
3510 (define_expand "neon_vmla_n<mode>"
3511 [(match_operand:VMD 0 "s_register_operand" "")
3512 (match_operand:VMD 1 "s_register_operand" "")
3513 (match_operand:VMD 2 "s_register_operand" "")
3514 (match_operand:<V_elem> 3 "s_register_operand" "")
3515 (match_operand:SI 4 "immediate_operand" "")]
3518 rtx tmp = gen_reg_rtx (<MODE>mode);
3519 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3520 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3521 tmp, const0_rtx, operands[4]));
3525 (define_expand "neon_vmla_n<mode>"
3526 [(match_operand:VMQ 0 "s_register_operand" "")
3527 (match_operand:VMQ 1 "s_register_operand" "")
3528 (match_operand:VMQ 2 "s_register_operand" "")
3529 (match_operand:<V_elem> 3 "s_register_operand" "")
3530 (match_operand:SI 4 "immediate_operand" "")]
3533 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3534 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3535 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3536 tmp, const0_rtx, operands[4]));
3540 (define_expand "neon_vmlal_n<mode>"
3541 [(match_operand:<V_widen> 0 "s_register_operand" "")
3542 (match_operand:<V_widen> 1 "s_register_operand" "")
3543 (match_operand:VMDI 2 "s_register_operand" "")
3544 (match_operand:<V_elem> 3 "s_register_operand" "")
3545 (match_operand:SI 4 "immediate_operand" "")]
3548 rtx tmp = gen_reg_rtx (<MODE>mode);
3549 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3550 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3551 tmp, const0_rtx, operands[4]));
3555 (define_expand "neon_vqdmlal_n<mode>"
3556 [(match_operand:<V_widen> 0 "s_register_operand" "")
3557 (match_operand:<V_widen> 1 "s_register_operand" "")
3558 (match_operand:VMDI 2 "s_register_operand" "")
3559 (match_operand:<V_elem> 3 "s_register_operand" "")
3560 (match_operand:SI 4 "immediate_operand" "")]
3563 rtx tmp = gen_reg_rtx (<MODE>mode);
3564 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3565 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3566 tmp, const0_rtx, operands[4]));
3570 (define_expand "neon_vmls_n<mode>"
3571 [(match_operand:VMD 0 "s_register_operand" "")
3572 (match_operand:VMD 1 "s_register_operand" "")
3573 (match_operand:VMD 2 "s_register_operand" "")
3574 (match_operand:<V_elem> 3 "s_register_operand" "")
3575 (match_operand:SI 4 "immediate_operand" "")]
3578 rtx tmp = gen_reg_rtx (<MODE>mode);
3579 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3580 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3581 tmp, const0_rtx, operands[4]));
3585 (define_expand "neon_vmls_n<mode>"
3586 [(match_operand:VMQ 0 "s_register_operand" "")
3587 (match_operand:VMQ 1 "s_register_operand" "")
3588 (match_operand:VMQ 2 "s_register_operand" "")
3589 (match_operand:<V_elem> 3 "s_register_operand" "")
3590 (match_operand:SI 4 "immediate_operand" "")]
3593 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3594 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3595 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3596 tmp, const0_rtx, operands[4]));
3600 (define_expand "neon_vmlsl_n<mode>"
3601 [(match_operand:<V_widen> 0 "s_register_operand" "")
3602 (match_operand:<V_widen> 1 "s_register_operand" "")
3603 (match_operand:VMDI 2 "s_register_operand" "")
3604 (match_operand:<V_elem> 3 "s_register_operand" "")
3605 (match_operand:SI 4 "immediate_operand" "")]
3608 rtx tmp = gen_reg_rtx (<MODE>mode);
3609 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3610 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3611 tmp, const0_rtx, operands[4]));
3615 (define_expand "neon_vqdmlsl_n<mode>"
3616 [(match_operand:<V_widen> 0 "s_register_operand" "")
3617 (match_operand:<V_widen> 1 "s_register_operand" "")
3618 (match_operand:VMDI 2 "s_register_operand" "")
3619 (match_operand:<V_elem> 3 "s_register_operand" "")
3620 (match_operand:SI 4 "immediate_operand" "")]
3623 rtx tmp = gen_reg_rtx (<MODE>mode);
3624 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3625 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3626 tmp, const0_rtx, operands[4]));
3630 (define_insn "neon_vext<mode>"
3631 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3632 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3633 (match_operand:VDQX 2 "s_register_operand" "w")
3634 (match_operand:SI 3 "immediate_operand" "i")]
3638 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3639 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3641 [(set_attr "type" "neon_ext<q>")]
3644 (define_insn "neon_vrev64<mode>"
3645 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3646 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3647 (match_operand:SI 2 "immediate_operand" "i")]
3650 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3651 [(set_attr "type" "neon_rev<q>")]
3654 (define_insn "neon_vrev32<mode>"
3655 [(set (match_operand:VX 0 "s_register_operand" "=w")
3656 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3657 (match_operand:SI 2 "immediate_operand" "i")]
3660 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3661 [(set_attr "type" "neon_rev<q>")]
3664 (define_insn "neon_vrev16<mode>"
3665 [(set (match_operand:VE 0 "s_register_operand" "=w")
3666 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3667 (match_operand:SI 2 "immediate_operand" "i")]
3670 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3671 [(set_attr "type" "neon_rev<q>")]
3674 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3675 ; allocation. For an intrinsic of form:
3676 ; rD = vbsl_* (rS, rN, rM)
3677 ; We can use any of:
3678 ; vbsl rS, rN, rM (if D = S)
3679 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3680 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3682 (define_insn "neon_vbsl<mode>_internal"
3683 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3684 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3685 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3686 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3690 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3691 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3692 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3693 [(set_attr "type" "neon_bsl<q>")]
3696 (define_expand "neon_vbsl<mode>"
3697 [(set (match_operand:VDQX 0 "s_register_operand" "")
3698 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3699 (match_operand:VDQX 2 "s_register_operand" "")
3700 (match_operand:VDQX 3 "s_register_operand" "")]
3704 /* We can't alias operands together if they have different modes. */
3705 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3708 (define_insn "neon_vshl<mode>"
3709 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3710 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3711 (match_operand:VDQIX 2 "s_register_operand" "w")
3712 (match_operand:SI 3 "immediate_operand" "i")]
3715 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3716 [(set_attr "type" "neon_shift_imm<q>")]
3719 (define_insn "neon_vqshl<mode>"
3720 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3721 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3722 (match_operand:VDQIX 2 "s_register_operand" "w")
3723 (match_operand:SI 3 "immediate_operand" "i")]
3726 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3727 [(set_attr "type" "neon_sat_shift_imm<q>")]
3730 (define_insn "neon_vshr_n<mode>"
3731 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3732 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3733 (match_operand:SI 2 "immediate_operand" "i")
3734 (match_operand:SI 3 "immediate_operand" "i")]
3738 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3739 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3741 [(set_attr "type" "neon_shift_imm<q>")]
3744 (define_insn "neon_vshrn_n<mode>"
3745 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3746 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3747 (match_operand:SI 2 "immediate_operand" "i")
3748 (match_operand:SI 3 "immediate_operand" "i")]
3752 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3753 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3755 [(set_attr "type" "neon_shift_imm_narrow_q")]
3758 (define_insn "neon_vqshrn_n<mode>"
3759 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3760 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3761 (match_operand:SI 2 "immediate_operand" "i")
3762 (match_operand:SI 3 "immediate_operand" "i")]
3766 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3767 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3769 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3772 (define_insn "neon_vqshrun_n<mode>"
3773 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3774 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3775 (match_operand:SI 2 "immediate_operand" "i")
3776 (match_operand:SI 3 "immediate_operand" "i")]
3780 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3781 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3783 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3786 (define_insn "neon_vshl_n<mode>"
3787 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3788 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3789 (match_operand:SI 2 "immediate_operand" "i")
3790 (match_operand:SI 3 "immediate_operand" "i")]
3794 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3795 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3797 [(set_attr "type" "neon_shift_imm<q>")]
3800 (define_insn "neon_vqshl_n<mode>"
3801 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3802 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3803 (match_operand:SI 2 "immediate_operand" "i")
3804 (match_operand:SI 3 "immediate_operand" "i")]
3808 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3809 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3811 [(set_attr "type" "neon_sat_shift_imm<q>")]
3814 (define_insn "neon_vqshlu_n<mode>"
3815 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3816 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3817 (match_operand:SI 2 "immediate_operand" "i")
3818 (match_operand:SI 3 "immediate_operand" "i")]
3822 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3823 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3825 [(set_attr "type" "neon_sat_shift_imm<q>")]
3828 (define_insn "neon_vshll_n<mode>"
3829 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3830 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3831 (match_operand:SI 2 "immediate_operand" "i")
3832 (match_operand:SI 3 "immediate_operand" "i")]
3836 /* The boundaries are: 0 < imm <= size. */
3837 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3838 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3840 [(set_attr "type" "neon_shift_imm_long")]
3843 (define_insn "neon_vsra_n<mode>"
3844 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3845 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3846 (match_operand:VDQIX 2 "s_register_operand" "w")
3847 (match_operand:SI 3 "immediate_operand" "i")
3848 (match_operand:SI 4 "immediate_operand" "i")]
3852 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3853 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3855 [(set_attr "type" "neon_shift_acc<q>")]
3858 (define_insn "neon_vsri_n<mode>"
3859 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3860 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3861 (match_operand:VDQIX 2 "s_register_operand" "w")
3862 (match_operand:SI 3 "immediate_operand" "i")]
3866 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3867 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3869 [(set_attr "type" "neon_shift_reg<q>")]
3872 (define_insn "neon_vsli_n<mode>"
3873 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3874 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3875 (match_operand:VDQIX 2 "s_register_operand" "w")
3876 (match_operand:SI 3 "immediate_operand" "i")]
3880 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3881 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3883 [(set_attr "type" "neon_shift_reg<q>")]
3886 (define_insn "neon_vtbl1v8qi"
3887 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3888 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3889 (match_operand:V8QI 2 "s_register_operand" "w")]
3892 "vtbl.8\t%P0, {%P1}, %P2"
3893 [(set_attr "type" "neon_tbl1")]
3896 (define_insn "neon_vtbl2v8qi"
3897 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3898 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3899 (match_operand:V8QI 2 "s_register_operand" "w")]
3904 int tabbase = REGNO (operands[1]);
3906 ops[0] = operands[0];
3907 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3908 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3909 ops[3] = operands[2];
3910 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3914 [(set_attr "type" "neon_tbl2")]
3917 (define_insn "neon_vtbl3v8qi"
3918 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3919 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3920 (match_operand:V8QI 2 "s_register_operand" "w")]
3925 int tabbase = REGNO (operands[1]);
3927 ops[0] = operands[0];
3928 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3929 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3930 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3931 ops[4] = operands[2];
3932 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3936 [(set_attr "type" "neon_tbl3")]
3939 (define_insn "neon_vtbl4v8qi"
3940 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3941 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3942 (match_operand:V8QI 2 "s_register_operand" "w")]
3947 int tabbase = REGNO (operands[1]);
3949 ops[0] = operands[0];
3950 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3951 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3952 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3953 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3954 ops[5] = operands[2];
3955 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3959 [(set_attr "type" "neon_tbl4")]
3962 ;; These three are used by the vec_perm infrastructure for V16QImode.
3963 (define_insn_and_split "neon_vtbl1v16qi"
3964 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3965 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3966 (match_operand:V16QI 2 "s_register_operand" "w")]
3970 "&& reload_completed"
3973 rtx op0, op1, op2, part0, part2;
3977 op1 = gen_lowpart (TImode, operands[1]);
3980 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3981 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3982 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3983 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3985 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3986 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3987 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3988 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3991 [(set_attr "type" "multiple")]
3994 (define_insn_and_split "neon_vtbl2v16qi"
3995 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3996 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3997 (match_operand:V16QI 2 "s_register_operand" "w")]
4001 "&& reload_completed"
4004 rtx op0, op1, op2, part0, part2;
4011 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4012 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4013 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4014 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4016 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4017 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4018 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4019 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4022 [(set_attr "type" "multiple")]
4025 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4026 ;; handle quad-word input modes, producing octa-word output modes. But
4027 ;; that requires us to add support for octa-word vector modes in moves.
4028 ;; That seems overkill for this one use in vec_perm.
4029 (define_insn_and_split "neon_vcombinev16qi"
4030 [(set (match_operand:OI 0 "s_register_operand" "=w")
4031 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4032 (match_operand:V16QI 2 "s_register_operand" "w")]
4036 "&& reload_completed"
4039 neon_split_vcombine (operands);
4042 [(set_attr "type" "multiple")]
4045 (define_insn "neon_vtbx1v8qi"
4046 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4047 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4048 (match_operand:V8QI 2 "s_register_operand" "w")
4049 (match_operand:V8QI 3 "s_register_operand" "w")]
4052 "vtbx.8\t%P0, {%P2}, %P3"
4053 [(set_attr "type" "neon_tbl1")]
4056 (define_insn "neon_vtbx2v8qi"
4057 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4058 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4059 (match_operand:TI 2 "s_register_operand" "w")
4060 (match_operand:V8QI 3 "s_register_operand" "w")]
4065 int tabbase = REGNO (operands[2]);
4067 ops[0] = operands[0];
4068 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4069 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4070 ops[3] = operands[3];
4071 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4075 [(set_attr "type" "neon_tbl2")]
4078 (define_insn "neon_vtbx3v8qi"
4079 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4080 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4081 (match_operand:EI 2 "s_register_operand" "w")
4082 (match_operand:V8QI 3 "s_register_operand" "w")]
4087 int tabbase = REGNO (operands[2]);
4089 ops[0] = operands[0];
4090 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4091 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4092 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4093 ops[4] = operands[3];
4094 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4098 [(set_attr "type" "neon_tbl3")]
4101 (define_insn "neon_vtbx4v8qi"
4102 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4103 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4104 (match_operand:OI 2 "s_register_operand" "w")
4105 (match_operand:V8QI 3 "s_register_operand" "w")]
4110 int tabbase = REGNO (operands[2]);
4112 ops[0] = operands[0];
4113 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4114 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4115 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4116 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4117 ops[5] = operands[3];
4118 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4122 [(set_attr "type" "neon_tbl4")]
4125 (define_expand "neon_vtrn<mode>_internal"
4127 [(set (match_operand:VDQW 0 "s_register_operand" "")
4128 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4129 (match_operand:VDQW 2 "s_register_operand" "")]
4131 (set (match_operand:VDQW 3 "s_register_operand" "")
4132 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4137 ;; Note: Different operand numbering to handle tied registers correctly.
4138 (define_insn "*neon_vtrn<mode>_insn"
4139 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4140 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4141 (match_operand:VDQW 3 "s_register_operand" "2")]
4143 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4144 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4147 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4148 [(set_attr "type" "neon_permute<q>")]
4151 (define_expand "neon_vzip<mode>_internal"
4153 [(set (match_operand:VDQW 0 "s_register_operand" "")
4154 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4155 (match_operand:VDQW 2 "s_register_operand" "")]
4157 (set (match_operand:VDQW 3 "s_register_operand" "")
4158 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4163 ;; Note: Different operand numbering to handle tied registers correctly.
4164 (define_insn "*neon_vzip<mode>_insn"
4165 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4166 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4167 (match_operand:VDQW 3 "s_register_operand" "2")]
4169 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4170 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4173 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4174 [(set_attr "type" "neon_zip<q>")]
4177 (define_expand "neon_vuzp<mode>_internal"
4179 [(set (match_operand:VDQW 0 "s_register_operand" "")
4180 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4181 (match_operand:VDQW 2 "s_register_operand" "")]
4183 (set (match_operand:VDQW 3 "s_register_operand" "")
4184 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4189 ;; Note: Different operand numbering to handle tied registers correctly.
4190 (define_insn "*neon_vuzp<mode>_insn"
4191 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4192 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4193 (match_operand:VDQW 3 "s_register_operand" "2")]
4195 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4196 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4199 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4200 [(set_attr "type" "neon_zip<q>")]
4203 (define_expand "neon_vreinterpretv8qi<mode>"
4204 [(match_operand:V8QI 0 "s_register_operand" "")
4205 (match_operand:VDX 1 "s_register_operand" "")]
4208 neon_reinterpret (operands[0], operands[1]);
4212 (define_expand "neon_vreinterpretv4hi<mode>"
4213 [(match_operand:V4HI 0 "s_register_operand" "")
4214 (match_operand:VDX 1 "s_register_operand" "")]
4217 neon_reinterpret (operands[0], operands[1]);
4221 (define_expand "neon_vreinterpretv2si<mode>"
4222 [(match_operand:V2SI 0 "s_register_operand" "")
4223 (match_operand:VDX 1 "s_register_operand" "")]
4226 neon_reinterpret (operands[0], operands[1]);
4230 (define_expand "neon_vreinterpretv2sf<mode>"
4231 [(match_operand:V2SF 0 "s_register_operand" "")
4232 (match_operand:VDX 1 "s_register_operand" "")]
4235 neon_reinterpret (operands[0], operands[1]);
4239 (define_expand "neon_vreinterpretdi<mode>"
4240 [(match_operand:DI 0 "s_register_operand" "")
4241 (match_operand:VDX 1 "s_register_operand" "")]
4244 neon_reinterpret (operands[0], operands[1]);
4248 (define_expand "neon_vreinterpretti<mode>"
4249 [(match_operand:TI 0 "s_register_operand" "")
4250 (match_operand:VQXMOV 1 "s_register_operand" "")]
4253 neon_reinterpret (operands[0], operands[1]);
4258 (define_expand "neon_vreinterpretv16qi<mode>"
4259 [(match_operand:V16QI 0 "s_register_operand" "")
4260 (match_operand:VQXMOV 1 "s_register_operand" "")]
4263 neon_reinterpret (operands[0], operands[1]);
4267 (define_expand "neon_vreinterpretv8hi<mode>"
4268 [(match_operand:V8HI 0 "s_register_operand" "")
4269 (match_operand:VQXMOV 1 "s_register_operand" "")]
4272 neon_reinterpret (operands[0], operands[1]);
4276 (define_expand "neon_vreinterpretv4si<mode>"
4277 [(match_operand:V4SI 0 "s_register_operand" "")
4278 (match_operand:VQXMOV 1 "s_register_operand" "")]
4281 neon_reinterpret (operands[0], operands[1]);
4285 (define_expand "neon_vreinterpretv4sf<mode>"
4286 [(match_operand:V4SF 0 "s_register_operand" "")
4287 (match_operand:VQXMOV 1 "s_register_operand" "")]
4290 neon_reinterpret (operands[0], operands[1]);
4294 (define_expand "neon_vreinterpretv2di<mode>"
4295 [(match_operand:V2DI 0 "s_register_operand" "")
4296 (match_operand:VQXMOV 1 "s_register_operand" "")]
4299 neon_reinterpret (operands[0], operands[1]);
4303 (define_expand "vec_load_lanes<mode><mode>"
4304 [(set (match_operand:VDQX 0 "s_register_operand")
4305 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4309 (define_insn "neon_vld1<mode>"
4310 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4311 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4314 "vld1.<V_sz_elem>\t%h0, %A1"
4315 [(set_attr "type" "neon_load1_1reg<q>")]
4318 (define_insn "neon_vld1_lane<mode>"
4319 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4320 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4321 (match_operand:VDX 2 "s_register_operand" "0")
4322 (match_operand:SI 3 "immediate_operand" "i")]
4326 HOST_WIDE_INT lane = INTVAL (operands[3]);
4327 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4328 if (lane < 0 || lane >= max)
4329 error ("lane out of range");
4331 return "vld1.<V_sz_elem>\t%P0, %A1";
4333 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4335 [(set_attr "type" "neon_load1_one_lane<q>")]
4338 (define_insn "neon_vld1_lane<mode>"
4339 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4340 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4341 (match_operand:VQX 2 "s_register_operand" "0")
4342 (match_operand:SI 3 "immediate_operand" "i")]
4346 HOST_WIDE_INT lane = INTVAL (operands[3]);
4347 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4348 int regno = REGNO (operands[0]);
4349 if (lane < 0 || lane >= max)
4350 error ("lane out of range");
4351 else if (lane >= max / 2)
4355 operands[3] = GEN_INT (lane);
4357 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4359 return "vld1.<V_sz_elem>\t%P0, %A1";
4361 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4363 [(set_attr "type" "neon_load1_one_lane<q>")]
4366 (define_insn "neon_vld1_dup<mode>"
4367 [(set (match_operand:VD 0 "s_register_operand" "=w")
4368 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4370 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4371 [(set_attr "type" "neon_load1_all_lanes<q>")]
4374 ;; Special case for DImode. Treat it exactly like a simple load.
4375 (define_expand "neon_vld1_dupdi"
4376 [(set (match_operand:DI 0 "s_register_operand" "")
4377 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4383 (define_insn "neon_vld1_dup<mode>"
4384 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4385 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4388 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4390 [(set_attr "type" "neon_load1_all_lanes<q>")]
4393 (define_insn_and_split "neon_vld1_dupv2di"
4394 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4395 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4398 "&& reload_completed"
4401 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4402 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4403 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4406 [(set_attr "length" "8")
4407 (set_attr "type" "neon_load1_all_lanes_q")]
4410 (define_expand "vec_store_lanes<mode><mode>"
4411 [(set (match_operand:VDQX 0 "neon_struct_operand")
4412 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4416 (define_insn "neon_vst1<mode>"
4417 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4418 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4421 "vst1.<V_sz_elem>\t%h1, %A0"
4422 [(set_attr "type" "neon_store1_1reg<q>")])
4424 (define_insn "neon_vst1_lane<mode>"
4425 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4427 [(match_operand:VDX 1 "s_register_operand" "w")
4428 (match_operand:SI 2 "immediate_operand" "i")]
4432 HOST_WIDE_INT lane = INTVAL (operands[2]);
4433 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4434 if (lane < 0 || lane >= max)
4435 error ("lane out of range");
4437 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4439 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4441 [(set_attr "type" "neon_store1_one_lane<q>")]
4444 (define_insn "neon_vst1_lane<mode>"
4445 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4447 [(match_operand:VQX 1 "s_register_operand" "w")
4448 (match_operand:SI 2 "immediate_operand" "i")]
4452 HOST_WIDE_INT lane = INTVAL (operands[2]);
4453 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4454 int regno = REGNO (operands[1]);
4455 if (lane < 0 || lane >= max)
4456 error ("lane out of range");
4457 else if (lane >= max / 2)
4461 operands[2] = GEN_INT (lane);
4463 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4465 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4467 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4469 [(set_attr "type" "neon_store1_one_lane<q>")]
4472 (define_expand "vec_load_lanesti<mode>"
4473 [(set (match_operand:TI 0 "s_register_operand")
4474 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4475 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4479 (define_insn "neon_vld2<mode>"
4480 [(set (match_operand:TI 0 "s_register_operand" "=w")
4481 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4482 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4486 if (<V_sz_elem> == 64)
4487 return "vld1.64\t%h0, %A1";
4489 return "vld2.<V_sz_elem>\t%h0, %A1";
4492 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4493 (const_string "neon_load1_2reg<q>")
4494 (const_string "neon_load2_2reg<q>")))]
4497 (define_expand "vec_load_lanesoi<mode>"
4498 [(set (match_operand:OI 0 "s_register_operand")
4499 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4500 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4504 (define_insn "neon_vld2<mode>"
4505 [(set (match_operand:OI 0 "s_register_operand" "=w")
4506 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4507 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4510 "vld2.<V_sz_elem>\t%h0, %A1"
4511 [(set_attr "type" "neon_load2_2reg_q")])
4513 (define_insn "neon_vld2_lane<mode>"
4514 [(set (match_operand:TI 0 "s_register_operand" "=w")
4515 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4516 (match_operand:TI 2 "s_register_operand" "0")
4517 (match_operand:SI 3 "immediate_operand" "i")
4518 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4522 HOST_WIDE_INT lane = INTVAL (operands[3]);
4523 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4524 int regno = REGNO (operands[0]);
4526 if (lane < 0 || lane >= max)
4527 error ("lane out of range");
4528 ops[0] = gen_rtx_REG (DImode, regno);
4529 ops[1] = gen_rtx_REG (DImode, regno + 2);
4530 ops[2] = operands[1];
4531 ops[3] = operands[3];
4532 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4535 [(set_attr "type" "neon_load2_one_lane<q>")]
4538 (define_insn "neon_vld2_lane<mode>"
4539 [(set (match_operand:OI 0 "s_register_operand" "=w")
4540 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4541 (match_operand:OI 2 "s_register_operand" "0")
4542 (match_operand:SI 3 "immediate_operand" "i")
4543 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4547 HOST_WIDE_INT lane = INTVAL (operands[3]);
4548 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4549 int regno = REGNO (operands[0]);
4551 if (lane < 0 || lane >= max)
4552 error ("lane out of range");
4553 else if (lane >= max / 2)
4558 ops[0] = gen_rtx_REG (DImode, regno);
4559 ops[1] = gen_rtx_REG (DImode, regno + 4);
4560 ops[2] = operands[1];
4561 ops[3] = GEN_INT (lane);
4562 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4565 [(set_attr "type" "neon_load2_one_lane<q>")]
4568 (define_insn "neon_vld2_dup<mode>"
4569 [(set (match_operand:TI 0 "s_register_operand" "=w")
4570 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4571 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4575 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4576 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4578 return "vld1.<V_sz_elem>\t%h0, %A1";
4581 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4582 (const_string "neon_load2_all_lanes<q>")
4583 (const_string "neon_load1_1reg<q>")))]
4586 (define_expand "vec_store_lanesti<mode>"
4587 [(set (match_operand:TI 0 "neon_struct_operand")
4588 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4589 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4593 (define_insn "neon_vst2<mode>"
4594 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4595 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4596 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4600 if (<V_sz_elem> == 64)
4601 return "vst1.64\t%h1, %A0";
4603 return "vst2.<V_sz_elem>\t%h1, %A0";
4606 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4607 (const_string "neon_store1_2reg<q>")
4608 (const_string "neon_store2_one_lane<q>")))]
4611 (define_expand "vec_store_lanesoi<mode>"
4612 [(set (match_operand:OI 0 "neon_struct_operand")
4613 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4614 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4618 (define_insn "neon_vst2<mode>"
4619 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4620 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4621 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4624 "vst2.<V_sz_elem>\t%h1, %A0"
4625 [(set_attr "type" "neon_store2_4reg<q>")]
4628 (define_insn "neon_vst2_lane<mode>"
4629 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4630 (unspec:<V_two_elem>
4631 [(match_operand:TI 1 "s_register_operand" "w")
4632 (match_operand:SI 2 "immediate_operand" "i")
4633 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4637 HOST_WIDE_INT lane = INTVAL (operands[2]);
4638 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4639 int regno = REGNO (operands[1]);
4641 if (lane < 0 || lane >= max)
4642 error ("lane out of range");
4643 ops[0] = operands[0];
4644 ops[1] = gen_rtx_REG (DImode, regno);
4645 ops[2] = gen_rtx_REG (DImode, regno + 2);
4646 ops[3] = operands[2];
4647 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4650 [(set_attr "type" "neon_store2_one_lane<q>")]
4653 (define_insn "neon_vst2_lane<mode>"
4654 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4655 (unspec:<V_two_elem>
4656 [(match_operand:OI 1 "s_register_operand" "w")
4657 (match_operand:SI 2 "immediate_operand" "i")
4658 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4662 HOST_WIDE_INT lane = INTVAL (operands[2]);
4663 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4664 int regno = REGNO (operands[1]);
4666 if (lane < 0 || lane >= max)
4667 error ("lane out of range");
4668 else if (lane >= max / 2)
4673 ops[0] = operands[0];
4674 ops[1] = gen_rtx_REG (DImode, regno);
4675 ops[2] = gen_rtx_REG (DImode, regno + 4);
4676 ops[3] = GEN_INT (lane);
4677 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4680 [(set_attr "type" "neon_store2_one_lane<q>")]
4683 (define_expand "vec_load_lanesei<mode>"
4684 [(set (match_operand:EI 0 "s_register_operand")
4685 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4686 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4690 (define_insn "neon_vld3<mode>"
4691 [(set (match_operand:EI 0 "s_register_operand" "=w")
4692 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4693 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4697 if (<V_sz_elem> == 64)
4698 return "vld1.64\t%h0, %A1";
4700 return "vld3.<V_sz_elem>\t%h0, %A1";
4703 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4704 (const_string "neon_load1_3reg<q>")
4705 (const_string "neon_load3_3reg<q>")))]
4708 (define_expand "vec_load_lanesci<mode>"
4709 [(match_operand:CI 0 "s_register_operand")
4710 (match_operand:CI 1 "neon_struct_operand")
4711 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4714 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4718 (define_expand "neon_vld3<mode>"
4719 [(match_operand:CI 0 "s_register_operand")
4720 (match_operand:CI 1 "neon_struct_operand")
4721 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4726 mem = adjust_address (operands[1], EImode, 0);
4727 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4728 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4729 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4733 (define_insn "neon_vld3qa<mode>"
4734 [(set (match_operand:CI 0 "s_register_operand" "=w")
4735 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4736 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4740 int regno = REGNO (operands[0]);
4742 ops[0] = gen_rtx_REG (DImode, regno);
4743 ops[1] = gen_rtx_REG (DImode, regno + 4);
4744 ops[2] = gen_rtx_REG (DImode, regno + 8);
4745 ops[3] = operands[1];
4746 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4749 [(set_attr "type" "neon_load3_3reg<q>")]
4752 (define_insn "neon_vld3qb<mode>"
4753 [(set (match_operand:CI 0 "s_register_operand" "=w")
4754 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4755 (match_operand:CI 2 "s_register_operand" "0")
4756 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4760 int regno = REGNO (operands[0]);
4762 ops[0] = gen_rtx_REG (DImode, regno + 2);
4763 ops[1] = gen_rtx_REG (DImode, regno + 6);
4764 ops[2] = gen_rtx_REG (DImode, regno + 10);
4765 ops[3] = operands[1];
4766 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4769 [(set_attr "type" "neon_load3_3reg<q>")]
4772 (define_insn "neon_vld3_lane<mode>"
4773 [(set (match_operand:EI 0 "s_register_operand" "=w")
4774 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4775 (match_operand:EI 2 "s_register_operand" "0")
4776 (match_operand:SI 3 "immediate_operand" "i")
4777 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4781 HOST_WIDE_INT lane = INTVAL (operands[3]);
4782 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4783 int regno = REGNO (operands[0]);
4785 if (lane < 0 || lane >= max)
4786 error ("lane out of range");
4787 ops[0] = gen_rtx_REG (DImode, regno);
4788 ops[1] = gen_rtx_REG (DImode, regno + 2);
4789 ops[2] = gen_rtx_REG (DImode, regno + 4);
4790 ops[3] = operands[1];
4791 ops[4] = operands[3];
4792 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4796 [(set_attr "type" "neon_load3_one_lane<q>")]
4799 (define_insn "neon_vld3_lane<mode>"
4800 [(set (match_operand:CI 0 "s_register_operand" "=w")
4801 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4802 (match_operand:CI 2 "s_register_operand" "0")
4803 (match_operand:SI 3 "immediate_operand" "i")
4804 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4808 HOST_WIDE_INT lane = INTVAL (operands[3]);
4809 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4810 int regno = REGNO (operands[0]);
4812 if (lane < 0 || lane >= max)
4813 error ("lane out of range");
4814 else if (lane >= max / 2)
4819 ops[0] = gen_rtx_REG (DImode, regno);
4820 ops[1] = gen_rtx_REG (DImode, regno + 4);
4821 ops[2] = gen_rtx_REG (DImode, regno + 8);
4822 ops[3] = operands[1];
4823 ops[4] = GEN_INT (lane);
4824 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4828 [(set_attr "type" "neon_load3_one_lane<q>")]
4831 (define_insn "neon_vld3_dup<mode>"
4832 [(set (match_operand:EI 0 "s_register_operand" "=w")
4833 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4834 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4838 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4840 int regno = REGNO (operands[0]);
4842 ops[0] = gen_rtx_REG (DImode, regno);
4843 ops[1] = gen_rtx_REG (DImode, regno + 2);
4844 ops[2] = gen_rtx_REG (DImode, regno + 4);
4845 ops[3] = operands[1];
4846 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4850 return "vld1.<V_sz_elem>\t%h0, %A1";
4853 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4854 (const_string "neon_load3_all_lanes<q>")
4855 (const_string "neon_load1_1reg<q>")))])
4857 (define_expand "vec_store_lanesei<mode>"
4858 [(set (match_operand:EI 0 "neon_struct_operand")
4859 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4860 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4864 (define_insn "neon_vst3<mode>"
4865 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4866 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4867 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4871 if (<V_sz_elem> == 64)
4872 return "vst1.64\t%h1, %A0";
4874 return "vst3.<V_sz_elem>\t%h1, %A0";
4877 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4878 (const_string "neon_store1_3reg<q>")
4879 (const_string "neon_store3_one_lane<q>")))])
4881 (define_expand "vec_store_lanesci<mode>"
4882 [(match_operand:CI 0 "neon_struct_operand")
4883 (match_operand:CI 1 "s_register_operand")
4884 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4887 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4891 (define_expand "neon_vst3<mode>"
4892 [(match_operand:CI 0 "neon_struct_operand")
4893 (match_operand:CI 1 "s_register_operand")
4894 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4899 mem = adjust_address (operands[0], EImode, 0);
4900 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4901 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4902 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4906 (define_insn "neon_vst3qa<mode>"
4907 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4908 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4909 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913 int regno = REGNO (operands[1]);
4915 ops[0] = operands[0];
4916 ops[1] = gen_rtx_REG (DImode, regno);
4917 ops[2] = gen_rtx_REG (DImode, regno + 4);
4918 ops[3] = gen_rtx_REG (DImode, regno + 8);
4919 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4922 [(set_attr "type" "neon_store3_3reg<q>")]
4925 (define_insn "neon_vst3qb<mode>"
4926 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4927 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4928 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4932 int regno = REGNO (operands[1]);
4934 ops[0] = operands[0];
4935 ops[1] = gen_rtx_REG (DImode, regno + 2);
4936 ops[2] = gen_rtx_REG (DImode, regno + 6);
4937 ops[3] = gen_rtx_REG (DImode, regno + 10);
4938 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4941 [(set_attr "type" "neon_store3_3reg<q>")]
4944 (define_insn "neon_vst3_lane<mode>"
4945 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4946 (unspec:<V_three_elem>
4947 [(match_operand:EI 1 "s_register_operand" "w")
4948 (match_operand:SI 2 "immediate_operand" "i")
4949 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4953 HOST_WIDE_INT lane = INTVAL (operands[2]);
4954 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4955 int regno = REGNO (operands[1]);
4957 if (lane < 0 || lane >= max)
4958 error ("lane out of range");
4959 ops[0] = operands[0];
4960 ops[1] = gen_rtx_REG (DImode, regno);
4961 ops[2] = gen_rtx_REG (DImode, regno + 2);
4962 ops[3] = gen_rtx_REG (DImode, regno + 4);
4963 ops[4] = operands[2];
4964 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4968 [(set_attr "type" "neon_store3_one_lane<q>")]
4971 (define_insn "neon_vst3_lane<mode>"
4972 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4973 (unspec:<V_three_elem>
4974 [(match_operand:CI 1 "s_register_operand" "w")
4975 (match_operand:SI 2 "immediate_operand" "i")
4976 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980 HOST_WIDE_INT lane = INTVAL (operands[2]);
4981 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4982 int regno = REGNO (operands[1]);
4984 if (lane < 0 || lane >= max)
4985 error ("lane out of range");
4986 else if (lane >= max / 2)
4991 ops[0] = operands[0];
4992 ops[1] = gen_rtx_REG (DImode, regno);
4993 ops[2] = gen_rtx_REG (DImode, regno + 4);
4994 ops[3] = gen_rtx_REG (DImode, regno + 8);
4995 ops[4] = GEN_INT (lane);
4996 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5000 [(set_attr "type" "neon_store3_one_lane<q>")]
5003 (define_expand "vec_load_lanesoi<mode>"
5004 [(set (match_operand:OI 0 "s_register_operand")
5005 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5006 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010 (define_insn "neon_vld4<mode>"
5011 [(set (match_operand:OI 0 "s_register_operand" "=w")
5012 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5013 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5017 if (<V_sz_elem> == 64)
5018 return "vld1.64\t%h0, %A1";
5020 return "vld4.<V_sz_elem>\t%h0, %A1";
5023 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5024 (const_string "neon_load1_4reg<q>")
5025 (const_string "neon_load4_4reg<q>")))]
5028 (define_expand "vec_load_lanesxi<mode>"
5029 [(match_operand:XI 0 "s_register_operand")
5030 (match_operand:XI 1 "neon_struct_operand")
5031 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5038 (define_expand "neon_vld4<mode>"
5039 [(match_operand:XI 0 "s_register_operand")
5040 (match_operand:XI 1 "neon_struct_operand")
5041 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5046 mem = adjust_address (operands[1], OImode, 0);
5047 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5048 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5049 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5053 (define_insn "neon_vld4qa<mode>"
5054 [(set (match_operand:XI 0 "s_register_operand" "=w")
5055 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5056 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5060 int regno = REGNO (operands[0]);
5062 ops[0] = gen_rtx_REG (DImode, regno);
5063 ops[1] = gen_rtx_REG (DImode, regno + 4);
5064 ops[2] = gen_rtx_REG (DImode, regno + 8);
5065 ops[3] = gen_rtx_REG (DImode, regno + 12);
5066 ops[4] = operands[1];
5067 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5070 [(set_attr "type" "neon_load4_4reg<q>")]
5073 (define_insn "neon_vld4qb<mode>"
5074 [(set (match_operand:XI 0 "s_register_operand" "=w")
5075 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5076 (match_operand:XI 2 "s_register_operand" "0")
5077 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081 int regno = REGNO (operands[0]);
5083 ops[0] = gen_rtx_REG (DImode, regno + 2);
5084 ops[1] = gen_rtx_REG (DImode, regno + 6);
5085 ops[2] = gen_rtx_REG (DImode, regno + 10);
5086 ops[3] = gen_rtx_REG (DImode, regno + 14);
5087 ops[4] = operands[1];
5088 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5091 [(set_attr "type" "neon_load4_4reg<q>")]
5094 (define_insn "neon_vld4_lane<mode>"
5095 [(set (match_operand:OI 0 "s_register_operand" "=w")
5096 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5097 (match_operand:OI 2 "s_register_operand" "0")
5098 (match_operand:SI 3 "immediate_operand" "i")
5099 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5103 HOST_WIDE_INT lane = INTVAL (operands[3]);
5104 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5105 int regno = REGNO (operands[0]);
5107 if (lane < 0 || lane >= max)
5108 error ("lane out of range");
5109 ops[0] = gen_rtx_REG (DImode, regno);
5110 ops[1] = gen_rtx_REG (DImode, regno + 2);
5111 ops[2] = gen_rtx_REG (DImode, regno + 4);
5112 ops[3] = gen_rtx_REG (DImode, regno + 6);
5113 ops[4] = operands[1];
5114 ops[5] = operands[3];
5115 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5119 [(set_attr "type" "neon_load4_one_lane<q>")]
5122 (define_insn "neon_vld4_lane<mode>"
5123 [(set (match_operand:XI 0 "s_register_operand" "=w")
5124 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5125 (match_operand:XI 2 "s_register_operand" "0")
5126 (match_operand:SI 3 "immediate_operand" "i")
5127 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5131 HOST_WIDE_INT lane = INTVAL (operands[3]);
5132 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5133 int regno = REGNO (operands[0]);
5135 if (lane < 0 || lane >= max)
5136 error ("lane out of range");
5137 else if (lane >= max / 2)
5142 ops[0] = gen_rtx_REG (DImode, regno);
5143 ops[1] = gen_rtx_REG (DImode, regno + 4);
5144 ops[2] = gen_rtx_REG (DImode, regno + 8);
5145 ops[3] = gen_rtx_REG (DImode, regno + 12);
5146 ops[4] = operands[1];
5147 ops[5] = GEN_INT (lane);
5148 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5152 [(set_attr "type" "neon_load4_one_lane<q>")]
5155 (define_insn "neon_vld4_dup<mode>"
5156 [(set (match_operand:OI 0 "s_register_operand" "=w")
5157 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5158 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5162 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5164 int regno = REGNO (operands[0]);
5166 ops[0] = gen_rtx_REG (DImode, regno);
5167 ops[1] = gen_rtx_REG (DImode, regno + 2);
5168 ops[2] = gen_rtx_REG (DImode, regno + 4);
5169 ops[3] = gen_rtx_REG (DImode, regno + 6);
5170 ops[4] = operands[1];
5171 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5176 return "vld1.<V_sz_elem>\t%h0, %A1";
5179 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5180 (const_string "neon_load4_all_lanes<q>")
5181 (const_string "neon_load1_1reg<q>")))]
5184 (define_expand "vec_store_lanesoi<mode>"
5185 [(set (match_operand:OI 0 "neon_struct_operand")
5186 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5187 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5191 (define_insn "neon_vst4<mode>"
5192 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5193 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5194 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198 if (<V_sz_elem> == 64)
5199 return "vst1.64\t%h1, %A0";
5201 return "vst4.<V_sz_elem>\t%h1, %A0";
5204 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5205 (const_string "neon_store1_4reg<q>")
5206 (const_string "neon_store4_4reg<q>")))]
5209 (define_expand "vec_store_lanesxi<mode>"
5210 [(match_operand:XI 0 "neon_struct_operand")
5211 (match_operand:XI 1 "s_register_operand")
5212 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5215 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5219 (define_expand "neon_vst4<mode>"
5220 [(match_operand:XI 0 "neon_struct_operand")
5221 (match_operand:XI 1 "s_register_operand")
5222 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5227 mem = adjust_address (operands[0], OImode, 0);
5228 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5229 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5230 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5234 (define_insn "neon_vst4qa<mode>"
5235 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5236 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5237 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5241 int regno = REGNO (operands[1]);
5243 ops[0] = operands[0];
5244 ops[1] = gen_rtx_REG (DImode, regno);
5245 ops[2] = gen_rtx_REG (DImode, regno + 4);
5246 ops[3] = gen_rtx_REG (DImode, regno + 8);
5247 ops[4] = gen_rtx_REG (DImode, regno + 12);
5248 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5251 [(set_attr "type" "neon_store4_4reg<q>")]
5254 (define_insn "neon_vst4qb<mode>"
5255 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5256 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5257 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5261 int regno = REGNO (operands[1]);
5263 ops[0] = operands[0];
5264 ops[1] = gen_rtx_REG (DImode, regno + 2);
5265 ops[2] = gen_rtx_REG (DImode, regno + 6);
5266 ops[3] = gen_rtx_REG (DImode, regno + 10);
5267 ops[4] = gen_rtx_REG (DImode, regno + 14);
5268 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5271 [(set_attr "type" "neon_store4_4reg<q>")]
5274 (define_insn "neon_vst4_lane<mode>"
5275 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5276 (unspec:<V_four_elem>
5277 [(match_operand:OI 1 "s_register_operand" "w")
5278 (match_operand:SI 2 "immediate_operand" "i")
5279 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5283 HOST_WIDE_INT lane = INTVAL (operands[2]);
5284 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5285 int regno = REGNO (operands[1]);
5287 if (lane < 0 || lane >= max)
5288 error ("lane out of range");
5289 ops[0] = operands[0];
5290 ops[1] = gen_rtx_REG (DImode, regno);
5291 ops[2] = gen_rtx_REG (DImode, regno + 2);
5292 ops[3] = gen_rtx_REG (DImode, regno + 4);
5293 ops[4] = gen_rtx_REG (DImode, regno + 6);
5294 ops[5] = operands[2];
5295 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5299 [(set_attr "type" "neon_store4_one_lane<q>")]
5302 (define_insn "neon_vst4_lane<mode>"
5303 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5304 (unspec:<V_four_elem>
5305 [(match_operand:XI 1 "s_register_operand" "w")
5306 (match_operand:SI 2 "immediate_operand" "i")
5307 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311 HOST_WIDE_INT lane = INTVAL (operands[2]);
5312 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5313 int regno = REGNO (operands[1]);
5315 if (lane < 0 || lane >= max)
5316 error ("lane out of range");
5317 else if (lane >= max / 2)
5322 ops[0] = operands[0];
5323 ops[1] = gen_rtx_REG (DImode, regno);
5324 ops[2] = gen_rtx_REG (DImode, regno + 4);
5325 ops[3] = gen_rtx_REG (DImode, regno + 8);
5326 ops[4] = gen_rtx_REG (DImode, regno + 12);
5327 ops[5] = GEN_INT (lane);
5328 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5332 [(set_attr "type" "neon_store4_4reg<q>")]
5335 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5336 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5337 (SE:<V_unpack> (vec_select:<V_HALF>
5338 (match_operand:VU 1 "register_operand" "w")
5339 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5340 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5341 "vmovl.<US><V_sz_elem> %q0, %e1"
5342 [(set_attr "type" "neon_shift_imm_long")]
5345 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5346 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5347 (SE:<V_unpack> (vec_select:<V_HALF>
5348 (match_operand:VU 1 "register_operand" "w")
5349 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5350 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5351 "vmovl.<US><V_sz_elem> %q0, %f1"
5352 [(set_attr "type" "neon_shift_imm_long")]
5355 (define_expand "vec_unpack<US>_hi_<mode>"
5356 [(match_operand:<V_unpack> 0 "register_operand" "")
5357 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5358 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5360 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5363 for (i = 0; i < (<V_mode_nunits>/2); i++)
5364 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5366 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5367 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5374 (define_expand "vec_unpack<US>_lo_<mode>"
5375 [(match_operand:<V_unpack> 0 "register_operand" "")
5376 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5377 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5379 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5382 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5383 RTVEC_ELT (v, i) = GEN_INT (i);
5384 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5385 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5392 (define_insn "neon_vec_<US>mult_lo_<mode>"
5393 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5394 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5395 (match_operand:VU 1 "register_operand" "w")
5396 (match_operand:VU 2 "vect_par_constant_low" "")))
5397 (SE:<V_unpack> (vec_select:<V_HALF>
5398 (match_operand:VU 3 "register_operand" "w")
5400 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5401 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5402 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5405 (define_expand "vec_widen_<US>mult_lo_<mode>"
5406 [(match_operand:<V_unpack> 0 "register_operand" "")
5407 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5408 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5409 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5411 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5414 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5415 RTVEC_ELT (v, i) = GEN_INT (i);
5416 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5418 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5426 (define_insn "neon_vec_<US>mult_hi_<mode>"
5427 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5428 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5429 (match_operand:VU 1 "register_operand" "w")
5430 (match_operand:VU 2 "vect_par_constant_high" "")))
5431 (SE:<V_unpack> (vec_select:<V_HALF>
5432 (match_operand:VU 3 "register_operand" "w")
5434 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5435 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5436 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5439 (define_expand "vec_widen_<US>mult_hi_<mode>"
5440 [(match_operand:<V_unpack> 0 "register_operand" "")
5441 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5442 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5443 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5445 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5448 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5449 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5450 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5452 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5461 (define_insn "neon_vec_<US>shiftl_<mode>"
5462 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5463 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5464 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5467 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5469 [(set_attr "type" "neon_shift_imm_long")]
5472 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5473 [(match_operand:<V_unpack> 0 "register_operand" "")
5474 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5475 (match_operand:SI 2 "immediate_operand" "i")]
5476 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5478 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5479 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5485 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5486 [(match_operand:<V_unpack> 0 "register_operand" "")
5487 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5488 (match_operand:SI 2 "immediate_operand" "i")]
5489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5491 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5492 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5493 GET_MODE_SIZE (<V_HALF>mode)),
5499 ;; Vectorize for non-neon-quad case
5500 (define_insn "neon_unpack<US>_<mode>"
5501 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5502 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5504 "vmovl.<US><V_sz_elem> %q0, %P1"
5505 [(set_attr "type" "neon_move")]
5508 (define_expand "vec_unpack<US>_lo_<mode>"
5509 [(match_operand:<V_double_width> 0 "register_operand" "")
5510 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5513 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5514 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5515 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5521 (define_expand "vec_unpack<US>_hi_<mode>"
5522 [(match_operand:<V_double_width> 0 "register_operand" "")
5523 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5526 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5527 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5528 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5534 (define_insn "neon_vec_<US>mult_<mode>"
5535 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5536 (mult:<V_widen> (SE:<V_widen>
5537 (match_operand:VDI 1 "register_operand" "w"))
5539 (match_operand:VDI 2 "register_operand" "w"))))]
5541 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5542 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5545 (define_expand "vec_widen_<US>mult_hi_<mode>"
5546 [(match_operand:<V_double_width> 0 "register_operand" "")
5547 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5548 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5551 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5552 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5553 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5560 (define_expand "vec_widen_<US>mult_lo_<mode>"
5561 [(match_operand:<V_double_width> 0 "register_operand" "")
5562 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5563 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5566 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5567 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5568 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5575 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5576 [(match_operand:<V_double_width> 0 "register_operand" "")
5577 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5578 (match_operand:SI 2 "immediate_operand" "i")]
5581 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5582 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5583 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5589 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5590 [(match_operand:<V_double_width> 0 "register_operand" "")
5591 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5592 (match_operand:SI 2 "immediate_operand" "i")]
5595 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5596 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5597 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5603 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5604 ; because the ordering of vector elements in Q registers is different from what
5605 ; the semantics of the instructions require.
5607 (define_insn "vec_pack_trunc_<mode>"
5608 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5609 (vec_concat:<V_narrow_pack>
5610 (truncate:<V_narrow>
5611 (match_operand:VN 1 "register_operand" "w"))
5612 (truncate:<V_narrow>
5613 (match_operand:VN 2 "register_operand" "w"))))]
5614 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5615 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5616 [(set_attr "type" "multiple")
5617 (set_attr "length" "8")]
5620 ;; For the non-quad case.
5621 (define_insn "neon_vec_pack_trunc_<mode>"
5622 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5623 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5624 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5625 "vmovn.i<V_sz_elem>\t%P0, %q1"
5626 [(set_attr "type" "neon_move_narrow_q")]
5629 (define_expand "vec_pack_trunc_<mode>"
5630 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5631 (match_operand:VSHFT 1 "register_operand" "")
5632 (match_operand:VSHFT 2 "register_operand")]
5633 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5635 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5637 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5638 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5639 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5643 (define_insn "neon_vabd<mode>_2"
5644 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5645 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5646 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5647 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5648 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5650 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5651 (const_string "neon_fp_abd_s<q>")
5652 (const_string "neon_abd<q>")))]
5655 (define_insn "neon_vabd<mode>_3"
5656 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5657 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5658 (match_operand:VDQ 2 "s_register_operand" "w")]
5660 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5661 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5663 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5664 (const_string "neon_fp_abd_s<q>")
5665 (const_string "neon_abd<q>")))]
5668 ;; Copy from core-to-neon regs, then extend, not vice-versa
5671 [(set (match_operand:DI 0 "s_register_operand" "")
5672 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5673 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5674 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5675 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5677 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5681 [(set (match_operand:DI 0 "s_register_operand" "")
5682 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5683 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5684 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5685 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5687 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5691 [(set (match_operand:DI 0 "s_register_operand" "")
5692 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5693 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5694 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5695 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5697 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5701 [(set (match_operand:DI 0 "s_register_operand" "")
5702 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5703 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5704 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5705 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5707 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5711 [(set (match_operand:DI 0 "s_register_operand" "")
5712 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5713 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5714 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5715 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5717 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5721 [(set (match_operand:DI 0 "s_register_operand" "")
5722 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5723 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5724 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5725 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5727 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));