1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
147 switch (which_alternative)
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
173 neon_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
214 neon_disambiguate_copy (operands, dest, src, 3);
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
239 neon_disambiguate_copy (operands, dest, src, 4);
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
259 adjust_mem = operands[0];
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
363 operands[0] = gen_rtx_REG (DImode, regno);
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
368 return "vmov\t%P0, %Q1, %R1";
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
392 if (BYTES_BIG_ENDIAN)
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
442 operands[1] = gen_rtx_REG (DImode, regno);
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
457 neon_expand_vector_init (operands[0], operands[1]);
461 ;; Doubleword and quadword arithmetic.
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
485 switch (which_alternative)
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
494 default: gcc_unreachable ();
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
523 switch (which_alternative)
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
649 switch (which_alternative)
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
657 [(set_attr "type" "neon_logic<q>")]
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
671 switch (which_alternative)
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
804 ; Split negdi2_neon for vfp registers
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
820 ; Split negdi2_neon for core registers
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
885 switch (which_alternative)
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
890 VALID_NEON_QREG_MODE (<MODE>mode),
892 default: gcc_unreachable ();
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 [(set_attr "type" "neon_shift_imm<q>")]
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
921 [(set_attr "type" "neon_shift_imm<q>")]
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
958 if (s_register_operand (operands[2], <MODE>mode))
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
975 if (s_register_operand (operands[2], <MODE>mode))
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1027 "TARGET_NEON && reload_completed"
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1033 if (CONST_INT_P (operands[2]))
1035 if (INTVAL (operands[2]) < 1)
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1125 "TARGET_NEON && reload_completed"
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1131 if (CONST_INT_P (operands[2]))
1133 if (INTVAL (operands[2]) < 1)
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1175 ;; Widening operations
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1197 ;; Helpers for quad-word reduction operations
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1285 ;; Reduction operations
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1319 rtx vec = gen_reg_rtx (V2DImode);
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1503 ;; Saturating arithmetic
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1574 switch (GET_CODE (operands[3]))
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1592 switch (GET_CODE (operands[3]))
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1624 switch (GET_CODE (operands[3]))
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1643 switch (GET_CODE (operands[3]))
1646 base_comparison = gen_neon_vclt<mode>;
1649 base_comparison = gen_neon_vcle<mode>;
1652 /* Do nothing, other zero form cases already have the correct
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1673 Our transformations are:
1678 a NE b -> !(a EQ b) */
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1685 swap_bsl_operands = 1;
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1731 int inverse = 0, immediate_zero = 0;
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1740 switch (GET_CODE (operands[3]))
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1787 ;; Patterns for builtins.
1789 ; good for plain vadd, vaddq.
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2017 (define_insn "neon_vqdmlal<mode>"
2018 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2019 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2020 (match_operand:VMDI 2 "s_register_operand" "w")
2021 (match_operand:VMDI 3 "s_register_operand" "w")]
2024 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2025 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2028 (define_insn "neon_vqdmlsl<mode>"
2029 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2030 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2031 (match_operand:VMDI 2 "s_register_operand" "w")
2032 (match_operand:VMDI 3 "s_register_operand" "w")]
2035 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2036 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2039 (define_insn "neon_vmull<sup><mode>"
2040 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2041 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2042 (match_operand:VW 2 "s_register_operand" "w")]
2045 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2046 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2049 (define_insn "neon_vqdmull<mode>"
2050 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2051 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2052 (match_operand:VMDI 2 "s_register_operand" "w")]
2055 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2056 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2059 (define_expand "neon_vsub<mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2061 (match_operand:VCVTF 1 "s_register_operand" "w")
2062 (match_operand:VCVTF 2 "s_register_operand" "w")]
2065 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2066 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2068 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2073 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2075 (define_insn "neon_vsub<mode>_unspec"
2076 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2077 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2078 (match_operand:VCVTF 2 "s_register_operand" "w")]
2081 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2083 (if_then_else (match_test "<Is_float_mode>")
2084 (const_string "neon_fp_addsub_s<q>")
2085 (const_string "neon_sub<q>")))]
2088 (define_insn "neon_vsubl<sup><mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2091 (match_operand:VDI 2 "s_register_operand" "w")]
2094 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2095 [(set_attr "type" "neon_sub_long")]
2098 (define_insn "neon_vsubw<sup><mode>"
2099 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2100 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2101 (match_operand:VDI 2 "s_register_operand" "w")]
2104 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2105 [(set_attr "type" "neon_sub_widen")]
2108 (define_insn "neon_vqsub<sup><mode>"
2109 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2110 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2111 (match_operand:VDQIX 2 "s_register_operand" "w")]
2114 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2115 [(set_attr "type" "neon_qsub<q>")]
2118 (define_insn "neon_vhsub<sup><mode>"
2119 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2120 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2121 (match_operand:VDQIW 2 "s_register_operand" "w")]
2124 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2125 [(set_attr "type" "neon_sub_halve<q>")]
2128 (define_insn "neon_v<r>subhn<mode>"
2129 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2130 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2131 (match_operand:VN 2 "s_register_operand" "w")]
2134 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2135 [(set_attr "type" "neon_sub_halve_narrow_q")]
2138 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2139 ;; without unsafe math optimizations.
2140 (define_expand "neon_vc<cmp_op><mode>"
2141 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2143 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2144 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2147 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2149 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2150 && !flag_unsafe_math_optimizations)
2152 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2153 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2154 whereas this expander iterates over the integer modes as well,
2155 but we will never expand to UNSPECs for the integer comparisons. */
2159 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2164 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2173 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2180 (define_insn "neon_vc<cmp_op><mode>_insn"
2181 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2183 (COMPARISONS:<V_cmp_result>
2184 (match_operand:VDQW 1 "s_register_operand" "w,w")
2185 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2186 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2187 && !flag_unsafe_math_optimizations)"
2190 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2192 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2193 ? "f" : "<cmp_type>",
2194 which_alternative == 0
2195 ? "%<V_reg>2" : "#0");
2196 output_asm_insn (pattern, operands);
2200 (if_then_else (match_operand 2 "zero_operand")
2201 (const_string "neon_compare_zero<q>")
2202 (const_string "neon_compare<q>")))]
2205 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2206 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2207 (unspec:<V_cmp_result>
2208 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2209 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2214 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2216 which_alternative == 0
2217 ? "%<V_reg>2" : "#0");
2218 output_asm_insn (pattern, operands);
2221 [(set_attr "type" "neon_fp_compare_s<q>")]
2224 (define_insn "neon_vc<cmp_op>u<mode>"
2225 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2227 (GTUGEU:<V_cmp_result>
2228 (match_operand:VDQIW 1 "s_register_operand" "w")
2229 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2231 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2232 [(set_attr "type" "neon_compare<q>")]
2235 (define_expand "neon_vca<cmp_op><mode>"
2236 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2238 (GTGE:<V_cmp_result>
2239 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2240 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2243 if (flag_unsafe_math_optimizations)
2244 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2247 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2254 (define_insn "neon_vca<cmp_op><mode>_insn"
2255 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2257 (GTGE:<V_cmp_result>
2258 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2259 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2260 "TARGET_NEON && flag_unsafe_math_optimizations"
2261 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2262 [(set_attr "type" "neon_fp_compare_s<q>")]
2265 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2266 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2267 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2268 (match_operand:VCVTF 2 "s_register_operand" "w")]
2271 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2272 [(set_attr "type" "neon_fp_compare_s<q>")]
2275 (define_insn "neon_vtst<mode>"
2276 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2277 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2278 (match_operand:VDQIW 2 "s_register_operand" "w")]
2281 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2282 [(set_attr "type" "neon_tst<q>")]
2285 (define_insn "neon_vabd<sup><mode>"
2286 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2287 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2288 (match_operand:VDQIW 2 "s_register_operand" "w")]
2291 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2292 [(set_attr "type" "neon_abd<q>")]
2295 (define_insn "neon_vabdf<mode>"
2296 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2297 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2298 (match_operand:VCVTF 2 "s_register_operand" "w")]
2301 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2302 [(set_attr "type" "neon_fp_abd_s<q>")]
2305 (define_insn "neon_vabdl<sup><mode>"
2306 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2307 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2308 (match_operand:VW 2 "s_register_operand" "w")]
2311 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2312 [(set_attr "type" "neon_abd_long")]
2315 (define_insn "neon_vaba<sup><mode>"
2316 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2317 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2318 (match_operand:VDQIW 3 "s_register_operand" "w")]
2320 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2322 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2323 [(set_attr "type" "neon_arith_acc<q>")]
2326 (define_insn "neon_vabal<sup><mode>"
2327 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2328 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2329 (match_operand:VW 3 "s_register_operand" "w")]
2331 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2333 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2334 [(set_attr "type" "neon_arith_acc<q>")]
2337 (define_insn "neon_v<maxmin><sup><mode>"
2338 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2339 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2340 (match_operand:VDQIW 2 "s_register_operand" "w")]
2343 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2344 [(set_attr "type" "neon_minmax<q>")]
2347 (define_insn "neon_v<maxmin>f<mode>"
2348 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2349 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2350 (match_operand:VCVTF 2 "s_register_operand" "w")]
2353 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2354 [(set_attr "type" "neon_fp_minmax_s<q>")]
2357 (define_expand "neon_vpadd<mode>"
2358 [(match_operand:VD 0 "s_register_operand" "=w")
2359 (match_operand:VD 1 "s_register_operand" "w")
2360 (match_operand:VD 2 "s_register_operand" "w")]
2363 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2368 (define_insn "neon_vpaddl<sup><mode>"
2369 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2370 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2373 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2374 [(set_attr "type" "neon_reduc_add_long")]
2377 (define_insn "neon_vpadal<sup><mode>"
2378 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2379 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2380 (match_operand:VDQIW 2 "s_register_operand" "w")]
2383 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2384 [(set_attr "type" "neon_reduc_add_acc")]
2387 (define_insn "neon_vp<maxmin><sup><mode>"
2388 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2389 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2390 (match_operand:VDI 2 "s_register_operand" "w")]
2393 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2394 [(set_attr "type" "neon_reduc_minmax<q>")]
2397 (define_insn "neon_vp<maxmin>f<mode>"
2398 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2399 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2400 (match_operand:VCVTF 2 "s_register_operand" "w")]
2403 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2404 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2407 (define_insn "neon_vrecps<mode>"
2408 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2409 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2410 (match_operand:VCVTF 2 "s_register_operand" "w")]
2413 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2414 [(set_attr "type" "neon_fp_recps_s<q>")]
2417 (define_insn "neon_vrsqrts<mode>"
2418 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2419 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2420 (match_operand:VCVTF 2 "s_register_operand" "w")]
2423 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2424 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2427 (define_expand "neon_vabs<mode>"
2428 [(match_operand:VDQW 0 "s_register_operand" "")
2429 (match_operand:VDQW 1 "s_register_operand" "")]
2432 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2436 (define_insn "neon_vqabs<mode>"
2437 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2438 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2441 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2442 [(set_attr "type" "neon_qabs<q>")]
2445 (define_insn "neon_bswap<mode>"
2446 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2447 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2449 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2450 [(set_attr "type" "neon_rev<q>")]
2453 (define_expand "neon_vneg<mode>"
2454 [(match_operand:VDQW 0 "s_register_operand" "")
2455 (match_operand:VDQW 1 "s_register_operand" "")]
2458 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2462 (define_expand "neon_copysignf<mode>"
2463 [(match_operand:VCVTF 0 "register_operand")
2464 (match_operand:VCVTF 1 "register_operand")
2465 (match_operand:VCVTF 2 "register_operand")]
2469 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2470 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2471 rtvec v = rtvec_alloc (n_elt);
2473 /* Create bitmask for vector select. */
2474 for (i = 0; i < n_elt; ++i)
2475 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2477 emit_move_insn (v_bitmask,
2478 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2479 emit_move_insn (operands[0], operands[2]);
2480 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2481 <VCVTF:V_cmp_result>mode, 0);
2482 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2489 (define_insn "neon_vqneg<mode>"
2490 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2491 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2494 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2495 [(set_attr "type" "neon_qneg<q>")]
2498 (define_insn "neon_vcls<mode>"
2499 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2500 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2503 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2504 [(set_attr "type" "neon_cls<q>")]
2507 (define_insn "clz<mode>2"
2508 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2509 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2511 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2512 [(set_attr "type" "neon_cnt<q>")]
2515 (define_expand "neon_vclz<mode>"
2516 [(match_operand:VDQIW 0 "s_register_operand" "")
2517 (match_operand:VDQIW 1 "s_register_operand" "")]
2520 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2524 (define_insn "popcount<mode>2"
2525 [(set (match_operand:VE 0 "s_register_operand" "=w")
2526 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2528 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2529 [(set_attr "type" "neon_cnt<q>")]
2532 (define_expand "neon_vcnt<mode>"
2533 [(match_operand:VE 0 "s_register_operand" "=w")
2534 (match_operand:VE 1 "s_register_operand" "w")]
2537 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2541 (define_insn "neon_vrecpe<mode>"
2542 [(set (match_operand:V32 0 "s_register_operand" "=w")
2543 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2546 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_fp_recpe_s<q>")]
2550 (define_insn "neon_vrsqrte<mode>"
2551 [(set (match_operand:V32 0 "s_register_operand" "=w")
2552 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2555 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2556 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2559 (define_expand "neon_vmvn<mode>"
2560 [(match_operand:VDQIW 0 "s_register_operand" "")
2561 (match_operand:VDQIW 1 "s_register_operand" "")]
2564 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2568 (define_insn "neon_vget_lane<mode>_sext_internal"
2569 [(set (match_operand:SI 0 "s_register_operand" "=r")
2571 (vec_select:<V_elem>
2572 (match_operand:VD 1 "s_register_operand" "w")
2573 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2576 if (BYTES_BIG_ENDIAN)
2578 int elt = INTVAL (operands[2]);
2579 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2580 operands[2] = GEN_INT (elt);
2582 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2584 [(set_attr "type" "neon_to_gp")]
2587 (define_insn "neon_vget_lane<mode>_zext_internal"
2588 [(set (match_operand:SI 0 "s_register_operand" "=r")
2590 (vec_select:<V_elem>
2591 (match_operand:VD 1 "s_register_operand" "w")
2592 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2595 if (BYTES_BIG_ENDIAN)
2597 int elt = INTVAL (operands[2]);
2598 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2599 operands[2] = GEN_INT (elt);
2601 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2603 [(set_attr "type" "neon_to_gp")]
2606 (define_insn "neon_vget_lane<mode>_sext_internal"
2607 [(set (match_operand:SI 0 "s_register_operand" "=r")
2609 (vec_select:<V_elem>
2610 (match_operand:VQ2 1 "s_register_operand" "w")
2611 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2615 int regno = REGNO (operands[1]);
2616 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2617 unsigned int elt = INTVAL (operands[2]);
2618 unsigned int elt_adj = elt % halfelts;
2620 if (BYTES_BIG_ENDIAN)
2621 elt_adj = halfelts - 1 - elt_adj;
2623 ops[0] = operands[0];
2624 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2625 ops[2] = GEN_INT (elt_adj);
2626 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2630 [(set_attr "type" "neon_to_gp_q")]
2633 (define_insn "neon_vget_lane<mode>_zext_internal"
2634 [(set (match_operand:SI 0 "s_register_operand" "=r")
2636 (vec_select:<V_elem>
2637 (match_operand:VQ2 1 "s_register_operand" "w")
2638 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2642 int regno = REGNO (operands[1]);
2643 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2644 unsigned int elt = INTVAL (operands[2]);
2645 unsigned int elt_adj = elt % halfelts;
2647 if (BYTES_BIG_ENDIAN)
2648 elt_adj = halfelts - 1 - elt_adj;
2650 ops[0] = operands[0];
2651 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2652 ops[2] = GEN_INT (elt_adj);
2653 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2657 [(set_attr "type" "neon_to_gp_q")]
2660 (define_expand "neon_vget_lane<mode>"
2661 [(match_operand:<V_ext> 0 "s_register_operand" "")
2662 (match_operand:VDQW 1 "s_register_operand" "")
2663 (match_operand:SI 2 "immediate_operand" "")]
2666 if (BYTES_BIG_ENDIAN)
2668 /* The intrinsics are defined in terms of a model where the
2669 element ordering in memory is vldm order, whereas the generic
2670 RTL is defined in terms of a model where the element ordering
2671 in memory is array order. Convert the lane number to conform
2673 unsigned int elt = INTVAL (operands[2]);
2674 unsigned int reg_nelts
2675 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2676 elt ^= reg_nelts - 1;
2677 operands[2] = GEN_INT (elt);
2680 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2681 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2683 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2689 (define_expand "neon_vget_laneu<mode>"
2690 [(match_operand:<V_ext> 0 "s_register_operand" "")
2691 (match_operand:VDQIW 1 "s_register_operand" "")
2692 (match_operand:SI 2 "immediate_operand" "")]
2695 if (BYTES_BIG_ENDIAN)
2697 /* The intrinsics are defined in terms of a model where the
2698 element ordering in memory is vldm order, whereas the generic
2699 RTL is defined in terms of a model where the element ordering
2700 in memory is array order. Convert the lane number to conform
2702 unsigned int elt = INTVAL (operands[2]);
2703 unsigned int reg_nelts
2704 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2705 elt ^= reg_nelts - 1;
2706 operands[2] = GEN_INT (elt);
2709 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2710 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2712 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2718 (define_expand "neon_vget_lanedi"
2719 [(match_operand:DI 0 "s_register_operand" "=r")
2720 (match_operand:DI 1 "s_register_operand" "w")
2721 (match_operand:SI 2 "immediate_operand" "")]
2724 emit_move_insn (operands[0], operands[1]);
2728 (define_expand "neon_vget_lanev2di"
2729 [(match_operand:DI 0 "s_register_operand" "")
2730 (match_operand:V2DI 1 "s_register_operand" "")
2731 (match_operand:SI 2 "immediate_operand" "")]
2736 if (BYTES_BIG_ENDIAN)
2738 /* The intrinsics are defined in terms of a model where the
2739 element ordering in memory is vldm order, whereas the generic
2740 RTL is defined in terms of a model where the element ordering
2741 in memory is array order. Convert the lane number to conform
2743 unsigned int elt = INTVAL (operands[2]);
2744 unsigned int reg_nelts = 2;
2745 elt ^= reg_nelts - 1;
2746 operands[2] = GEN_INT (elt);
2749 lane = INTVAL (operands[2]);
2750 gcc_assert ((lane ==0) || (lane == 1));
2751 emit_move_insn (operands[0], lane == 0
2752 ? gen_lowpart (DImode, operands[1])
2753 : gen_highpart (DImode, operands[1]));
2757 (define_expand "neon_vset_lane<mode>"
2758 [(match_operand:VDQ 0 "s_register_operand" "=w")
2759 (match_operand:<V_elem> 1 "s_register_operand" "r")
2760 (match_operand:VDQ 2 "s_register_operand" "0")
2761 (match_operand:SI 3 "immediate_operand" "i")]
2764 unsigned int elt = INTVAL (operands[3]);
2766 if (BYTES_BIG_ENDIAN)
2768 unsigned int reg_nelts
2769 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2770 elt ^= reg_nelts - 1;
2773 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2774 GEN_INT (1 << elt), operands[2]));
2778 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2780 (define_expand "neon_vset_lanedi"
2781 [(match_operand:DI 0 "s_register_operand" "=w")
2782 (match_operand:DI 1 "s_register_operand" "r")
2783 (match_operand:DI 2 "s_register_operand" "0")
2784 (match_operand:SI 3 "immediate_operand" "i")]
2787 emit_move_insn (operands[0], operands[1]);
2791 (define_expand "neon_vcreate<mode>"
2792 [(match_operand:VD_RE 0 "s_register_operand" "")
2793 (match_operand:DI 1 "general_operand" "")]
2796 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2797 emit_move_insn (operands[0], src);
2801 (define_insn "neon_vdup_n<mode>"
2802 [(set (match_operand:VX 0 "s_register_operand" "=w")
2803 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2805 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2806 [(set_attr "type" "neon_from_gp<q>")]
2809 (define_insn "neon_vdup_n<mode>"
2810 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2811 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2814 vdup.<V_sz_elem>\t%<V_reg>0, %1
2815 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2816 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2819 (define_expand "neon_vdup_ndi"
2820 [(match_operand:DI 0 "s_register_operand" "=w")
2821 (match_operand:DI 1 "s_register_operand" "r")]
2824 emit_move_insn (operands[0], operands[1]);
2829 (define_insn "neon_vdup_nv2di"
2830 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2831 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2834 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2835 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2836 [(set_attr "length" "8")
2837 (set_attr "type" "multiple")]
2840 (define_insn "neon_vdup_lane<mode>_internal"
2841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2843 (vec_select:<V_elem>
2844 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2845 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2848 if (BYTES_BIG_ENDIAN)
2850 int elt = INTVAL (operands[2]);
2851 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2852 operands[2] = GEN_INT (elt);
2855 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2857 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2859 [(set_attr "type" "neon_dup<q>")]
2862 (define_expand "neon_vdup_lane<mode>"
2863 [(match_operand:VDQW 0 "s_register_operand" "=w")
2864 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2865 (match_operand:SI 2 "immediate_operand" "i")]
2868 if (BYTES_BIG_ENDIAN)
2870 unsigned int elt = INTVAL (operands[2]);
2871 unsigned int reg_nelts
2872 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2873 elt ^= reg_nelts - 1;
2874 operands[2] = GEN_INT (elt);
2876 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2881 ; Scalar index is ignored, since only zero is valid here.
2882 (define_expand "neon_vdup_lanedi"
2883 [(match_operand:DI 0 "s_register_operand" "=w")
2884 (match_operand:DI 1 "s_register_operand" "w")
2885 (match_operand:SI 2 "immediate_operand" "i")]
2888 emit_move_insn (operands[0], operands[1]);
2892 ; Likewise for v2di, as the DImode second operand has only a single element.
2893 (define_expand "neon_vdup_lanev2di"
2894 [(match_operand:V2DI 0 "s_register_operand" "=w")
2895 (match_operand:DI 1 "s_register_operand" "w")
2896 (match_operand:SI 2 "immediate_operand" "i")]
2899 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2903 ; Disabled before reload because we don't want combine doing something silly,
2904 ; but used by the post-reload expansion of neon_vcombine.
2905 (define_insn "*neon_vswp<mode>"
2906 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2907 (match_operand:VDQX 1 "s_register_operand" "+w"))
2908 (set (match_dup 1) (match_dup 0))]
2909 "TARGET_NEON && reload_completed"
2910 "vswp\t%<V_reg>0, %<V_reg>1"
2911 [(set_attr "type" "neon_permute<q>")]
2914 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2916 ;; FIXME: A different implementation of this builtin could make it much
2917 ;; more likely that we wouldn't actually need to output anything (we could make
2918 ;; it so that the reg allocator puts things in the right places magically
2919 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2921 (define_insn_and_split "neon_vcombine<mode>"
2922 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2923 (vec_concat:<V_DOUBLE>
2924 (match_operand:VDX 1 "s_register_operand" "w")
2925 (match_operand:VDX 2 "s_register_operand" "w")))]
2928 "&& reload_completed"
2931 neon_split_vcombine (operands);
2934 [(set_attr "type" "multiple")]
2937 (define_expand "neon_vget_high<mode>"
2938 [(match_operand:<V_HALF> 0 "s_register_operand")
2939 (match_operand:VQX 1 "s_register_operand")]
2942 emit_move_insn (operands[0],
2943 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2944 GET_MODE_SIZE (<V_HALF>mode)));
2948 (define_expand "neon_vget_low<mode>"
2949 [(match_operand:<V_HALF> 0 "s_register_operand")
2950 (match_operand:VQX 1 "s_register_operand")]
2953 emit_move_insn (operands[0],
2954 simplify_gen_subreg (<V_HALF>mode, operands[1],
2959 (define_insn "float<mode><V_cvtto>2"
2960 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2961 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2962 "TARGET_NEON && !flag_rounding_math"
2963 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2964 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2967 (define_insn "floatuns<mode><V_cvtto>2"
2968 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2969 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2970 "TARGET_NEON && !flag_rounding_math"
2971 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2972 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2975 (define_insn "fix_trunc<mode><V_cvtto>2"
2976 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2977 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2979 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
2980 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2983 (define_insn "fixuns_trunc<mode><V_cvtto>2"
2984 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2985 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2987 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
2988 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2991 (define_insn "neon_vcvt<sup><mode>"
2992 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2993 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
2996 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
2997 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3000 (define_insn "neon_vcvt<sup><mode>"
3001 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3002 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3005 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3006 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3009 (define_insn "neon_vcvtv4sfv4hf"
3010 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3011 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3013 "TARGET_NEON && TARGET_FP16"
3014 "vcvt.f32.f16\t%q0, %P1"
3015 [(set_attr "type" "neon_fp_cvt_widen_h")]
3018 (define_insn "neon_vcvtv4hfv4sf"
3019 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3020 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3022 "TARGET_NEON && TARGET_FP16"
3023 "vcvt.f16.f32\t%P0, %q1"
3024 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3027 (define_insn "neon_vcvt<sup>_n<mode>"
3028 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3029 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3030 (match_operand:SI 2 "immediate_operand" "i")]
3034 neon_const_bounds (operands[2], 1, 33);
3035 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3037 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3040 (define_insn "neon_vcvt<sup>_n<mode>"
3041 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3042 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3043 (match_operand:SI 2 "immediate_operand" "i")]
3047 neon_const_bounds (operands[2], 1, 33);
3048 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3050 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3053 (define_insn "neon_vmovn<mode>"
3054 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3055 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3058 "vmovn.<V_if_elem>\t%P0, %q1"
3059 [(set_attr "type" "neon_shift_imm_narrow_q")]
3062 (define_insn "neon_vqmovn<sup><mode>"
3063 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3064 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3067 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3068 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3071 (define_insn "neon_vqmovun<mode>"
3072 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3073 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3076 "vqmovun.<V_s_elem>\t%P0, %q1"
3077 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3080 (define_insn "neon_vmovl<sup><mode>"
3081 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3082 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3085 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3086 [(set_attr "type" "neon_shift_imm_long")]
3089 (define_insn "neon_vmul_lane<mode>"
3090 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3091 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3092 (match_operand:VMD 2 "s_register_operand"
3093 "<scalar_mul_constraint>")
3094 (match_operand:SI 3 "immediate_operand" "i")]
3098 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3101 (if_then_else (match_test "<Is_float_mode>")
3102 (const_string "neon_fp_mul_s_scalar<q>")
3103 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3106 (define_insn "neon_vmul_lane<mode>"
3107 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3108 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3109 (match_operand:<V_HALF> 2 "s_register_operand"
3110 "<scalar_mul_constraint>")
3111 (match_operand:SI 3 "immediate_operand" "i")]
3115 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3118 (if_then_else (match_test "<Is_float_mode>")
3119 (const_string "neon_fp_mul_s_scalar<q>")
3120 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3123 (define_insn "neon_vmull<sup>_lane<mode>"
3124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3125 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3126 (match_operand:VMDI 2 "s_register_operand"
3127 "<scalar_mul_constraint>")
3128 (match_operand:SI 3 "immediate_operand" "i")]
3132 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3134 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3137 (define_insn "neon_vqdmull_lane<mode>"
3138 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3139 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3140 (match_operand:VMDI 2 "s_register_operand"
3141 "<scalar_mul_constraint>")
3142 (match_operand:SI 3 "immediate_operand" "i")]
3143 UNSPEC_VQDMULL_LANE))]
3146 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3148 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3151 (define_insn "neon_vq<r>dmulh_lane<mode>"
3152 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3153 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3154 (match_operand:<V_HALF> 2 "s_register_operand"
3155 "<scalar_mul_constraint>")
3156 (match_operand:SI 3 "immediate_operand" "i")]
3160 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3162 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3165 (define_insn "neon_vq<r>dmulh_lane<mode>"
3166 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3167 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3168 (match_operand:VMDI 2 "s_register_operand"
3169 "<scalar_mul_constraint>")
3170 (match_operand:SI 3 "immediate_operand" "i")]
3174 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3176 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3179 (define_insn "neon_vmla_lane<mode>"
3180 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3181 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3182 (match_operand:VMD 2 "s_register_operand" "w")
3183 (match_operand:VMD 3 "s_register_operand"
3184 "<scalar_mul_constraint>")
3185 (match_operand:SI 4 "immediate_operand" "i")]
3189 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3192 (if_then_else (match_test "<Is_float_mode>")
3193 (const_string "neon_fp_mla_s_scalar<q>")
3194 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3197 (define_insn "neon_vmla_lane<mode>"
3198 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3199 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3200 (match_operand:VMQ 2 "s_register_operand" "w")
3201 (match_operand:<V_HALF> 3 "s_register_operand"
3202 "<scalar_mul_constraint>")
3203 (match_operand:SI 4 "immediate_operand" "i")]
3207 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3210 (if_then_else (match_test "<Is_float_mode>")
3211 (const_string "neon_fp_mla_s_scalar<q>")
3212 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3215 (define_insn "neon_vmlal<sup>_lane<mode>"
3216 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3217 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3218 (match_operand:VMDI 2 "s_register_operand" "w")
3219 (match_operand:VMDI 3 "s_register_operand"
3220 "<scalar_mul_constraint>")
3221 (match_operand:SI 4 "immediate_operand" "i")]
3225 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3227 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3230 (define_insn "neon_vqdmlal_lane<mode>"
3231 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3232 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3233 (match_operand:VMDI 2 "s_register_operand" "w")
3234 (match_operand:VMDI 3 "s_register_operand"
3235 "<scalar_mul_constraint>")
3236 (match_operand:SI 4 "immediate_operand" "i")]
3237 UNSPEC_VQDMLAL_LANE))]
3240 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3242 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3245 (define_insn "neon_vmls_lane<mode>"
3246 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3247 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3248 (match_operand:VMD 2 "s_register_operand" "w")
3249 (match_operand:VMD 3 "s_register_operand"
3250 "<scalar_mul_constraint>")
3251 (match_operand:SI 4 "immediate_operand" "i")]
3255 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3258 (if_then_else (match_test "<Is_float_mode>")
3259 (const_string "neon_fp_mla_s_scalar<q>")
3260 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3263 (define_insn "neon_vmls_lane<mode>"
3264 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3265 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3266 (match_operand:VMQ 2 "s_register_operand" "w")
3267 (match_operand:<V_HALF> 3 "s_register_operand"
3268 "<scalar_mul_constraint>")
3269 (match_operand:SI 4 "immediate_operand" "i")]
3273 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3276 (if_then_else (match_test "<Is_float_mode>")
3277 (const_string "neon_fp_mla_s_scalar<q>")
3278 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3281 (define_insn "neon_vmlsl<sup>_lane<mode>"
3282 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3283 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3284 (match_operand:VMDI 2 "s_register_operand" "w")
3285 (match_operand:VMDI 3 "s_register_operand"
3286 "<scalar_mul_constraint>")
3287 (match_operand:SI 4 "immediate_operand" "i")]
3291 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3293 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3296 (define_insn "neon_vqdmlsl_lane<mode>"
3297 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3298 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3299 (match_operand:VMDI 2 "s_register_operand" "w")
3300 (match_operand:VMDI 3 "s_register_operand"
3301 "<scalar_mul_constraint>")
3302 (match_operand:SI 4 "immediate_operand" "i")]
3303 UNSPEC_VQDMLSL_LANE))]
3306 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3308 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3311 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3312 ; core register into a temp register, then use a scalar taken from that. This
3313 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3314 ; or extracted from another vector. The latter case it's currently better to
3315 ; use the "_lane" variant, and the former case can probably be implemented
3316 ; using vld1_lane, but that hasn't been done yet.
3318 (define_expand "neon_vmul_n<mode>"
3319 [(match_operand:VMD 0 "s_register_operand" "")
3320 (match_operand:VMD 1 "s_register_operand" "")
3321 (match_operand:<V_elem> 2 "s_register_operand" "")]
3324 rtx tmp = gen_reg_rtx (<MODE>mode);
3325 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3326 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3331 (define_expand "neon_vmul_n<mode>"
3332 [(match_operand:VMQ 0 "s_register_operand" "")
3333 (match_operand:VMQ 1 "s_register_operand" "")
3334 (match_operand:<V_elem> 2 "s_register_operand" "")]
3337 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3338 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3339 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3344 (define_expand "neon_vmulls_n<mode>"
3345 [(match_operand:<V_widen> 0 "s_register_operand" "")
3346 (match_operand:VMDI 1 "s_register_operand" "")
3347 (match_operand:<V_elem> 2 "s_register_operand" "")]
3350 rtx tmp = gen_reg_rtx (<MODE>mode);
3351 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3352 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3357 (define_expand "neon_vmullu_n<mode>"
3358 [(match_operand:<V_widen> 0 "s_register_operand" "")
3359 (match_operand:VMDI 1 "s_register_operand" "")
3360 (match_operand:<V_elem> 2 "s_register_operand" "")]
3363 rtx tmp = gen_reg_rtx (<MODE>mode);
3364 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3365 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3370 (define_expand "neon_vqdmull_n<mode>"
3371 [(match_operand:<V_widen> 0 "s_register_operand" "")
3372 (match_operand:VMDI 1 "s_register_operand" "")
3373 (match_operand:<V_elem> 2 "s_register_operand" "")]
3376 rtx tmp = gen_reg_rtx (<MODE>mode);
3377 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3378 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3383 (define_expand "neon_vqdmulh_n<mode>"
3384 [(match_operand:VMDI 0 "s_register_operand" "")
3385 (match_operand:VMDI 1 "s_register_operand" "")
3386 (match_operand:<V_elem> 2 "s_register_operand" "")]
3389 rtx tmp = gen_reg_rtx (<MODE>mode);
3390 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3391 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3396 (define_expand "neon_vqrdmulh_n<mode>"
3397 [(match_operand:VMDI 0 "s_register_operand" "")
3398 (match_operand:VMDI 1 "s_register_operand" "")
3399 (match_operand:<V_elem> 2 "s_register_operand" "")]
3402 rtx tmp = gen_reg_rtx (<MODE>mode);
3403 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3404 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3409 (define_expand "neon_vqdmulh_n<mode>"
3410 [(match_operand:VMQI 0 "s_register_operand" "")
3411 (match_operand:VMQI 1 "s_register_operand" "")
3412 (match_operand:<V_elem> 2 "s_register_operand" "")]
3415 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3416 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3417 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3422 (define_expand "neon_vqrdmulh_n<mode>"
3423 [(match_operand:VMQI 0 "s_register_operand" "")
3424 (match_operand:VMQI 1 "s_register_operand" "")
3425 (match_operand:<V_elem> 2 "s_register_operand" "")]
3428 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3429 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3430 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3435 (define_expand "neon_vmla_n<mode>"
3436 [(match_operand:VMD 0 "s_register_operand" "")
3437 (match_operand:VMD 1 "s_register_operand" "")
3438 (match_operand:VMD 2 "s_register_operand" "")
3439 (match_operand:<V_elem> 3 "s_register_operand" "")]
3442 rtx tmp = gen_reg_rtx (<MODE>mode);
3443 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3444 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3449 (define_expand "neon_vmla_n<mode>"
3450 [(match_operand:VMQ 0 "s_register_operand" "")
3451 (match_operand:VMQ 1 "s_register_operand" "")
3452 (match_operand:VMQ 2 "s_register_operand" "")
3453 (match_operand:<V_elem> 3 "s_register_operand" "")]
3456 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3457 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3458 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3463 (define_expand "neon_vmlals_n<mode>"
3464 [(match_operand:<V_widen> 0 "s_register_operand" "")
3465 (match_operand:<V_widen> 1 "s_register_operand" "")
3466 (match_operand:VMDI 2 "s_register_operand" "")
3467 (match_operand:<V_elem> 3 "s_register_operand" "")]
3470 rtx tmp = gen_reg_rtx (<MODE>mode);
3471 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3472 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3477 (define_expand "neon_vmlalu_n<mode>"
3478 [(match_operand:<V_widen> 0 "s_register_operand" "")
3479 (match_operand:<V_widen> 1 "s_register_operand" "")
3480 (match_operand:VMDI 2 "s_register_operand" "")
3481 (match_operand:<V_elem> 3 "s_register_operand" "")]
3484 rtx tmp = gen_reg_rtx (<MODE>mode);
3485 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3486 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3491 (define_expand "neon_vqdmlal_n<mode>"
3492 [(match_operand:<V_widen> 0 "s_register_operand" "")
3493 (match_operand:<V_widen> 1 "s_register_operand" "")
3494 (match_operand:VMDI 2 "s_register_operand" "")
3495 (match_operand:<V_elem> 3 "s_register_operand" "")]
3498 rtx tmp = gen_reg_rtx (<MODE>mode);
3499 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3500 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3505 (define_expand "neon_vmls_n<mode>"
3506 [(match_operand:VMD 0 "s_register_operand" "")
3507 (match_operand:VMD 1 "s_register_operand" "")
3508 (match_operand:VMD 2 "s_register_operand" "")
3509 (match_operand:<V_elem> 3 "s_register_operand" "")]
3512 rtx tmp = gen_reg_rtx (<MODE>mode);
3513 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3514 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3519 (define_expand "neon_vmls_n<mode>"
3520 [(match_operand:VMQ 0 "s_register_operand" "")
3521 (match_operand:VMQ 1 "s_register_operand" "")
3522 (match_operand:VMQ 2 "s_register_operand" "")
3523 (match_operand:<V_elem> 3 "s_register_operand" "")]
3526 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3527 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3528 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3533 (define_expand "neon_vmlsls_n<mode>"
3534 [(match_operand:<V_widen> 0 "s_register_operand" "")
3535 (match_operand:<V_widen> 1 "s_register_operand" "")
3536 (match_operand:VMDI 2 "s_register_operand" "")
3537 (match_operand:<V_elem> 3 "s_register_operand" "")]
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3547 (define_expand "neon_vmlslu_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")]
3554 rtx tmp = gen_reg_rtx (<MODE>mode);
3555 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3556 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3561 (define_expand "neon_vqdmlsl_n<mode>"
3562 [(match_operand:<V_widen> 0 "s_register_operand" "")
3563 (match_operand:<V_widen> 1 "s_register_operand" "")
3564 (match_operand:VMDI 2 "s_register_operand" "")
3565 (match_operand:<V_elem> 3 "s_register_operand" "")]
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3575 (define_insn "neon_vext<mode>"
3576 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3577 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3578 (match_operand:VDQX 2 "s_register_operand" "w")
3579 (match_operand:SI 3 "immediate_operand" "i")]
3583 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3584 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3586 [(set_attr "type" "neon_ext<q>")]
3589 (define_insn "neon_vrev64<mode>"
3590 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3591 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3594 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3595 [(set_attr "type" "neon_rev<q>")]
3598 (define_insn "neon_vrev32<mode>"
3599 [(set (match_operand:VX 0 "s_register_operand" "=w")
3600 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3603 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3604 [(set_attr "type" "neon_rev<q>")]
3607 (define_insn "neon_vrev16<mode>"
3608 [(set (match_operand:VE 0 "s_register_operand" "=w")
3609 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3612 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3613 [(set_attr "type" "neon_rev<q>")]
3616 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3617 ; allocation. For an intrinsic of form:
3618 ; rD = vbsl_* (rS, rN, rM)
3619 ; We can use any of:
3620 ; vbsl rS, rN, rM (if D = S)
3621 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3622 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3624 (define_insn "neon_vbsl<mode>_internal"
3625 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3626 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3627 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3628 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3632 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3633 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3634 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3635 [(set_attr "type" "neon_bsl<q>")]
3638 (define_expand "neon_vbsl<mode>"
3639 [(set (match_operand:VDQX 0 "s_register_operand" "")
3640 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3641 (match_operand:VDQX 2 "s_register_operand" "")
3642 (match_operand:VDQX 3 "s_register_operand" "")]
3646 /* We can't alias operands together if they have different modes. */
3647 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3651 (define_insn "neon_v<shift_op><sup><mode>"
3652 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3653 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3654 (match_operand:VDQIX 2 "s_register_operand" "w")]
3657 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3658 [(set_attr "type" "neon_shift_imm<q>")]
3662 (define_insn "neon_v<shift_op><sup><mode>"
3663 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3664 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3665 (match_operand:VDQIX 2 "s_register_operand" "w")]
3668 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3669 [(set_attr "type" "neon_sat_shift_imm<q>")]
3673 (define_insn "neon_v<shift_op><sup>_n<mode>"
3674 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3675 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3676 (match_operand:SI 2 "immediate_operand" "i")]
3680 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3681 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3683 [(set_attr "type" "neon_shift_imm<q>")]
3686 ;; vshrn_n, vrshrn_n
3687 (define_insn "neon_v<shift_op>_n<mode>"
3688 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3689 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3690 (match_operand:SI 2 "immediate_operand" "i")]
3694 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3695 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3697 [(set_attr "type" "neon_shift_imm_narrow_q")]
3700 ;; vqshrn_n, vqrshrn_n
3701 (define_insn "neon_v<shift_op><sup>_n<mode>"
3702 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3703 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3704 (match_operand:SI 2 "immediate_operand" "i")]
3708 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3709 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3711 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3714 ;; vqshrun_n, vqrshrun_n
3715 (define_insn "neon_v<shift_op>_n<mode>"
3716 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3717 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3718 (match_operand:SI 2 "immediate_operand" "i")]
3722 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3723 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3725 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3728 (define_insn "neon_vshl_n<mode>"
3729 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3730 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3731 (match_operand:SI 2 "immediate_operand" "i")]
3735 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3736 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3738 [(set_attr "type" "neon_shift_imm<q>")]
3741 (define_insn "neon_vqshl_<sup>_n<mode>"
3742 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3743 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3744 (match_operand:SI 2 "immediate_operand" "i")]
3748 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3749 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3751 [(set_attr "type" "neon_sat_shift_imm<q>")]
3754 (define_insn "neon_vqshlu_n<mode>"
3755 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3756 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3757 (match_operand:SI 2 "immediate_operand" "i")]
3761 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3762 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3764 [(set_attr "type" "neon_sat_shift_imm<q>")]
3767 (define_insn "neon_vshll<sup>_n<mode>"
3768 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3769 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3770 (match_operand:SI 2 "immediate_operand" "i")]
3774 /* The boundaries are: 0 < imm <= size. */
3775 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3776 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3778 [(set_attr "type" "neon_shift_imm_long")]
3782 (define_insn "neon_v<shift_op><sup>_n<mode>"
3783 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3784 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3785 (match_operand:VDQIX 2 "s_register_operand" "w")
3786 (match_operand:SI 3 "immediate_operand" "i")]
3790 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3791 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3793 [(set_attr "type" "neon_shift_acc<q>")]
3796 (define_insn "neon_vsri_n<mode>"
3797 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3798 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3799 (match_operand:VDQIX 2 "s_register_operand" "w")
3800 (match_operand:SI 3 "immediate_operand" "i")]
3804 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3805 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3807 [(set_attr "type" "neon_shift_reg<q>")]
3810 (define_insn "neon_vsli_n<mode>"
3811 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3812 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3813 (match_operand:VDQIX 2 "s_register_operand" "w")
3814 (match_operand:SI 3 "immediate_operand" "i")]
3818 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3819 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3821 [(set_attr "type" "neon_shift_reg<q>")]
3824 (define_insn "neon_vtbl1v8qi"
3825 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3826 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3827 (match_operand:V8QI 2 "s_register_operand" "w")]
3830 "vtbl.8\t%P0, {%P1}, %P2"
3831 [(set_attr "type" "neon_tbl1")]
3834 (define_insn "neon_vtbl2v8qi"
3835 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3836 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3837 (match_operand:V8QI 2 "s_register_operand" "w")]
3842 int tabbase = REGNO (operands[1]);
3844 ops[0] = operands[0];
3845 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3846 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3847 ops[3] = operands[2];
3848 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3852 [(set_attr "type" "neon_tbl2")]
3855 (define_insn "neon_vtbl3v8qi"
3856 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3857 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3858 (match_operand:V8QI 2 "s_register_operand" "w")]
3863 int tabbase = REGNO (operands[1]);
3865 ops[0] = operands[0];
3866 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3867 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3868 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3869 ops[4] = operands[2];
3870 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3874 [(set_attr "type" "neon_tbl3")]
3877 (define_insn "neon_vtbl4v8qi"
3878 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3879 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3880 (match_operand:V8QI 2 "s_register_operand" "w")]
3885 int tabbase = REGNO (operands[1]);
3887 ops[0] = operands[0];
3888 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3889 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3890 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3891 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3892 ops[5] = operands[2];
3893 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3897 [(set_attr "type" "neon_tbl4")]
3900 ;; These three are used by the vec_perm infrastructure for V16QImode.
3901 (define_insn_and_split "neon_vtbl1v16qi"
3902 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3903 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3904 (match_operand:V16QI 2 "s_register_operand" "w")]
3908 "&& reload_completed"
3911 rtx op0, op1, op2, part0, part2;
3915 op1 = gen_lowpart (TImode, operands[1]);
3918 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3919 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3920 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3921 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3923 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3924 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3925 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3926 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3929 [(set_attr "type" "multiple")]
3932 (define_insn_and_split "neon_vtbl2v16qi"
3933 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3934 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3935 (match_operand:V16QI 2 "s_register_operand" "w")]
3939 "&& reload_completed"
3942 rtx op0, op1, op2, part0, part2;
3949 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3950 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3951 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3952 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3954 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3955 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3956 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3957 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3960 [(set_attr "type" "multiple")]
3963 ;; ??? Logically we should extend the regular neon_vcombine pattern to
3964 ;; handle quad-word input modes, producing octa-word output modes. But
3965 ;; that requires us to add support for octa-word vector modes in moves.
3966 ;; That seems overkill for this one use in vec_perm.
3967 (define_insn_and_split "neon_vcombinev16qi"
3968 [(set (match_operand:OI 0 "s_register_operand" "=w")
3969 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
3970 (match_operand:V16QI 2 "s_register_operand" "w")]
3974 "&& reload_completed"
3977 neon_split_vcombine (operands);
3980 [(set_attr "type" "multiple")]
3983 (define_insn "neon_vtbx1v8qi"
3984 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3985 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
3986 (match_operand:V8QI 2 "s_register_operand" "w")
3987 (match_operand:V8QI 3 "s_register_operand" "w")]
3990 "vtbx.8\t%P0, {%P2}, %P3"
3991 [(set_attr "type" "neon_tbl1")]
3994 (define_insn "neon_vtbx2v8qi"
3995 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3996 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
3997 (match_operand:TI 2 "s_register_operand" "w")
3998 (match_operand:V8QI 3 "s_register_operand" "w")]
4003 int tabbase = REGNO (operands[2]);
4005 ops[0] = operands[0];
4006 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4007 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4008 ops[3] = operands[3];
4009 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4013 [(set_attr "type" "neon_tbl2")]
4016 (define_insn "neon_vtbx3v8qi"
4017 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4018 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4019 (match_operand:EI 2 "s_register_operand" "w")
4020 (match_operand:V8QI 3 "s_register_operand" "w")]
4025 int tabbase = REGNO (operands[2]);
4027 ops[0] = operands[0];
4028 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4029 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4030 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4031 ops[4] = operands[3];
4032 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4036 [(set_attr "type" "neon_tbl3")]
4039 (define_insn "neon_vtbx4v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:OI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4048 int tabbase = REGNO (operands[2]);
4050 ops[0] = operands[0];
4051 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4052 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4053 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4054 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4055 ops[5] = operands[3];
4056 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4060 [(set_attr "type" "neon_tbl4")]
4063 (define_expand "neon_vtrn<mode>_internal"
4065 [(set (match_operand:VDQW 0 "s_register_operand" "")
4066 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4067 (match_operand:VDQW 2 "s_register_operand" "")]
4069 (set (match_operand:VDQW 3 "s_register_operand" "")
4070 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4075 ;; Note: Different operand numbering to handle tied registers correctly.
4076 (define_insn "*neon_vtrn<mode>_insn"
4077 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4078 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4079 (match_operand:VDQW 3 "s_register_operand" "2")]
4081 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4082 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4085 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4086 [(set_attr "type" "neon_permute<q>")]
4089 (define_expand "neon_vzip<mode>_internal"
4091 [(set (match_operand:VDQW 0 "s_register_operand" "")
4092 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4093 (match_operand:VDQW 2 "s_register_operand" "")]
4095 (set (match_operand:VDQW 3 "s_register_operand" "")
4096 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4101 ;; Note: Different operand numbering to handle tied registers correctly.
4102 (define_insn "*neon_vzip<mode>_insn"
4103 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4104 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4105 (match_operand:VDQW 3 "s_register_operand" "2")]
4107 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4108 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4111 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4112 [(set_attr "type" "neon_zip<q>")]
4115 (define_expand "neon_vuzp<mode>_internal"
4117 [(set (match_operand:VDQW 0 "s_register_operand" "")
4118 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4119 (match_operand:VDQW 2 "s_register_operand" "")]
4121 (set (match_operand:VDQW 3 "s_register_operand" "")
4122 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4127 ;; Note: Different operand numbering to handle tied registers correctly.
4128 (define_insn "*neon_vuzp<mode>_insn"
4129 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4131 (match_operand:VDQW 3 "s_register_operand" "2")]
4133 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4134 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4137 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4138 [(set_attr "type" "neon_zip<q>")]
4141 (define_expand "neon_vreinterpretv8qi<mode>"
4142 [(match_operand:V8QI 0 "s_register_operand" "")
4143 (match_operand:VD_RE 1 "s_register_operand" "")]
4146 neon_reinterpret (operands[0], operands[1]);
4150 (define_expand "neon_vreinterpretv4hi<mode>"
4151 [(match_operand:V4HI 0 "s_register_operand" "")
4152 (match_operand:VD_RE 1 "s_register_operand" "")]
4155 neon_reinterpret (operands[0], operands[1]);
4159 (define_expand "neon_vreinterpretv2si<mode>"
4160 [(match_operand:V2SI 0 "s_register_operand" "")
4161 (match_operand:VD_RE 1 "s_register_operand" "")]
4164 neon_reinterpret (operands[0], operands[1]);
4168 (define_expand "neon_vreinterpretv2sf<mode>"
4169 [(match_operand:V2SF 0 "s_register_operand" "")
4170 (match_operand:VD_RE 1 "s_register_operand" "")]
4173 neon_reinterpret (operands[0], operands[1]);
4177 (define_expand "neon_vreinterpretdi<mode>"
4178 [(match_operand:DI 0 "s_register_operand" "")
4179 (match_operand:VD_RE 1 "s_register_operand" "")]
4182 neon_reinterpret (operands[0], operands[1]);
4186 (define_expand "neon_vreinterpretti<mode>"
4187 [(match_operand:TI 0 "s_register_operand" "")
4188 (match_operand:VQXMOV 1 "s_register_operand" "")]
4191 neon_reinterpret (operands[0], operands[1]);
4196 (define_expand "neon_vreinterpretv16qi<mode>"
4197 [(match_operand:V16QI 0 "s_register_operand" "")
4198 (match_operand:VQXMOV 1 "s_register_operand" "")]
4201 neon_reinterpret (operands[0], operands[1]);
4205 (define_expand "neon_vreinterpretv8hi<mode>"
4206 [(match_operand:V8HI 0 "s_register_operand" "")
4207 (match_operand:VQXMOV 1 "s_register_operand" "")]
4210 neon_reinterpret (operands[0], operands[1]);
4214 (define_expand "neon_vreinterpretv4si<mode>"
4215 [(match_operand:V4SI 0 "s_register_operand" "")
4216 (match_operand:VQXMOV 1 "s_register_operand" "")]
4219 neon_reinterpret (operands[0], operands[1]);
4223 (define_expand "neon_vreinterpretv4sf<mode>"
4224 [(match_operand:V4SF 0 "s_register_operand" "")
4225 (match_operand:VQXMOV 1 "s_register_operand" "")]
4228 neon_reinterpret (operands[0], operands[1]);
4232 (define_expand "neon_vreinterpretv2di<mode>"
4233 [(match_operand:V2DI 0 "s_register_operand" "")
4234 (match_operand:VQXMOV 1 "s_register_operand" "")]
4237 neon_reinterpret (operands[0], operands[1]);
4241 (define_expand "vec_load_lanes<mode><mode>"
4242 [(set (match_operand:VDQX 0 "s_register_operand")
4243 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4247 (define_insn "neon_vld1<mode>"
4248 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4249 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4252 "vld1.<V_sz_elem>\t%h0, %A1"
4253 [(set_attr "type" "neon_load1_1reg<q>")]
4256 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4257 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4259 (define_insn "neon_vld1_lane<mode>"
4260 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4261 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4262 (match_operand:VDX 2 "s_register_operand" "0")
4263 (match_operand:SI 3 "immediate_operand" "i")]
4267 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4268 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4269 operands[3] = GEN_INT (lane);
4271 return "vld1.<V_sz_elem>\t%P0, %A1";
4273 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4275 [(set_attr "type" "neon_load1_one_lane<q>")]
4278 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4279 ;; here on big endian targets.
4280 (define_insn "neon_vld1_lane<mode>"
4281 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4282 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4283 (match_operand:VQX 2 "s_register_operand" "0")
4284 (match_operand:SI 3 "immediate_operand" "i")]
4288 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4289 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4290 operands[3] = GEN_INT (lane);
4291 int regno = REGNO (operands[0]);
4292 if (lane >= max / 2)
4296 operands[3] = GEN_INT (lane);
4298 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4300 return "vld1.<V_sz_elem>\t%P0, %A1";
4302 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4304 [(set_attr "type" "neon_load1_one_lane<q>")]
4307 (define_insn "neon_vld1_dup<mode>"
4308 [(set (match_operand:VD 0 "s_register_operand" "=w")
4309 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4311 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4312 [(set_attr "type" "neon_load1_all_lanes<q>")]
4315 ;; Special case for DImode. Treat it exactly like a simple load.
4316 (define_expand "neon_vld1_dupdi"
4317 [(set (match_operand:DI 0 "s_register_operand" "")
4318 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4324 (define_insn "neon_vld1_dup<mode>"
4325 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4326 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4329 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4331 [(set_attr "type" "neon_load1_all_lanes<q>")]
4334 (define_insn_and_split "neon_vld1_dupv2di"
4335 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4336 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4339 "&& reload_completed"
4342 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4343 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4344 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4347 [(set_attr "length" "8")
4348 (set_attr "type" "neon_load1_all_lanes_q")]
4351 (define_expand "vec_store_lanes<mode><mode>"
4352 [(set (match_operand:VDQX 0 "neon_struct_operand")
4353 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4357 (define_insn "neon_vst1<mode>"
4358 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4359 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4362 "vst1.<V_sz_elem>\t%h1, %A0"
4363 [(set_attr "type" "neon_store1_1reg<q>")])
4365 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4366 ;; here on big endian targets.
4367 (define_insn "neon_vst1_lane<mode>"
4368 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4370 [(match_operand:VDX 1 "s_register_operand" "w")
4371 (match_operand:SI 2 "immediate_operand" "i")]
4375 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4376 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4377 operands[2] = GEN_INT (lane);
4379 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4381 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4383 [(set_attr "type" "neon_store1_one_lane<q>")]
4386 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4387 ;; here on big endian targets.
4388 (define_insn "neon_vst1_lane<mode>"
4389 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4391 [(match_operand:VQX 1 "s_register_operand" "w")
4392 (match_operand:SI 2 "immediate_operand" "i")]
4396 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4398 int regno = REGNO (operands[1]);
4399 if (lane >= max / 2)
4404 operands[2] = GEN_INT (lane);
4405 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4407 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4409 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4411 [(set_attr "type" "neon_store1_one_lane<q>")]
4414 (define_expand "vec_load_lanesti<mode>"
4415 [(set (match_operand:TI 0 "s_register_operand")
4416 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4417 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4421 (define_insn "neon_vld2<mode>"
4422 [(set (match_operand:TI 0 "s_register_operand" "=w")
4423 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4424 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4428 if (<V_sz_elem> == 64)
4429 return "vld1.64\t%h0, %A1";
4431 return "vld2.<V_sz_elem>\t%h0, %A1";
4434 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4435 (const_string "neon_load1_2reg<q>")
4436 (const_string "neon_load2_2reg<q>")))]
4439 (define_expand "vec_load_lanesoi<mode>"
4440 [(set (match_operand:OI 0 "s_register_operand")
4441 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4442 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4446 (define_insn "neon_vld2<mode>"
4447 [(set (match_operand:OI 0 "s_register_operand" "=w")
4448 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4449 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4452 "vld2.<V_sz_elem>\t%h0, %A1"
4453 [(set_attr "type" "neon_load2_2reg_q")])
4455 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4456 ;; here on big endian targets.
4457 (define_insn "neon_vld2_lane<mode>"
4458 [(set (match_operand:TI 0 "s_register_operand" "=w")
4459 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4460 (match_operand:TI 2 "s_register_operand" "0")
4461 (match_operand:SI 3 "immediate_operand" "i")
4462 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4466 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4467 int regno = REGNO (operands[0]);
4469 ops[0] = gen_rtx_REG (DImode, regno);
4470 ops[1] = gen_rtx_REG (DImode, regno + 2);
4471 ops[2] = operands[1];
4472 ops[3] = GEN_INT (lane);
4473 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4476 [(set_attr "type" "neon_load2_one_lane<q>")]
4479 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4480 ;; here on big endian targets.
4481 (define_insn "neon_vld2_lane<mode>"
4482 [(set (match_operand:OI 0 "s_register_operand" "=w")
4483 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4484 (match_operand:OI 2 "s_register_operand" "0")
4485 (match_operand:SI 3 "immediate_operand" "i")
4486 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4490 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4491 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4492 int regno = REGNO (operands[0]);
4494 if (lane >= max / 2)
4499 ops[0] = gen_rtx_REG (DImode, regno);
4500 ops[1] = gen_rtx_REG (DImode, regno + 4);
4501 ops[2] = operands[1];
4502 ops[3] = GEN_INT (lane);
4503 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4506 [(set_attr "type" "neon_load2_one_lane<q>")]
4509 (define_insn "neon_vld2_dup<mode>"
4510 [(set (match_operand:TI 0 "s_register_operand" "=w")
4511 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4512 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4516 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4517 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4519 return "vld1.<V_sz_elem>\t%h0, %A1";
4522 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4523 (const_string "neon_load2_all_lanes<q>")
4524 (const_string "neon_load1_1reg<q>")))]
4527 (define_expand "vec_store_lanesti<mode>"
4528 [(set (match_operand:TI 0 "neon_struct_operand")
4529 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4530 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4534 (define_insn "neon_vst2<mode>"
4535 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4536 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4537 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4541 if (<V_sz_elem> == 64)
4542 return "vst1.64\t%h1, %A0";
4544 return "vst2.<V_sz_elem>\t%h1, %A0";
4547 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4548 (const_string "neon_store1_2reg<q>")
4549 (const_string "neon_store2_one_lane<q>")))]
4552 (define_expand "vec_store_lanesoi<mode>"
4553 [(set (match_operand:OI 0 "neon_struct_operand")
4554 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4555 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4559 (define_insn "neon_vst2<mode>"
4560 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4561 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4562 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4565 "vst2.<V_sz_elem>\t%h1, %A0"
4566 [(set_attr "type" "neon_store2_4reg<q>")]
4569 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4570 ;; here on big endian targets.
4571 (define_insn "neon_vst2_lane<mode>"
4572 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4573 (unspec:<V_two_elem>
4574 [(match_operand:TI 1 "s_register_operand" "w")
4575 (match_operand:SI 2 "immediate_operand" "i")
4576 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4580 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4581 int regno = REGNO (operands[1]);
4583 ops[0] = operands[0];
4584 ops[1] = gen_rtx_REG (DImode, regno);
4585 ops[2] = gen_rtx_REG (DImode, regno + 2);
4586 ops[3] = GEN_INT (lane);
4587 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4590 [(set_attr "type" "neon_store2_one_lane<q>")]
4593 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4594 ;; here on big endian targets.
4595 (define_insn "neon_vst2_lane<mode>"
4596 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4597 (unspec:<V_two_elem>
4598 [(match_operand:OI 1 "s_register_operand" "w")
4599 (match_operand:SI 2 "immediate_operand" "i")
4600 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4604 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4605 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4606 int regno = REGNO (operands[1]);
4608 if (lane >= max / 2)
4613 ops[0] = operands[0];
4614 ops[1] = gen_rtx_REG (DImode, regno);
4615 ops[2] = gen_rtx_REG (DImode, regno + 4);
4616 ops[3] = GEN_INT (lane);
4617 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4620 [(set_attr "type" "neon_store2_one_lane<q>")]
4623 (define_expand "vec_load_lanesei<mode>"
4624 [(set (match_operand:EI 0 "s_register_operand")
4625 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4626 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4630 (define_insn "neon_vld3<mode>"
4631 [(set (match_operand:EI 0 "s_register_operand" "=w")
4632 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4633 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4637 if (<V_sz_elem> == 64)
4638 return "vld1.64\t%h0, %A1";
4640 return "vld3.<V_sz_elem>\t%h0, %A1";
4643 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4644 (const_string "neon_load1_3reg<q>")
4645 (const_string "neon_load3_3reg<q>")))]
4648 (define_expand "vec_load_lanesci<mode>"
4649 [(match_operand:CI 0 "s_register_operand")
4650 (match_operand:CI 1 "neon_struct_operand")
4651 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4654 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4658 (define_expand "neon_vld3<mode>"
4659 [(match_operand:CI 0 "s_register_operand")
4660 (match_operand:CI 1 "neon_struct_operand")
4661 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4666 mem = adjust_address (operands[1], EImode, 0);
4667 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4668 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4669 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4673 (define_insn "neon_vld3qa<mode>"
4674 [(set (match_operand:CI 0 "s_register_operand" "=w")
4675 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4676 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4680 int regno = REGNO (operands[0]);
4682 ops[0] = gen_rtx_REG (DImode, regno);
4683 ops[1] = gen_rtx_REG (DImode, regno + 4);
4684 ops[2] = gen_rtx_REG (DImode, regno + 8);
4685 ops[3] = operands[1];
4686 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4689 [(set_attr "type" "neon_load3_3reg<q>")]
4692 (define_insn "neon_vld3qb<mode>"
4693 [(set (match_operand:CI 0 "s_register_operand" "=w")
4694 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4695 (match_operand:CI 2 "s_register_operand" "0")
4696 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4700 int regno = REGNO (operands[0]);
4702 ops[0] = gen_rtx_REG (DImode, regno + 2);
4703 ops[1] = gen_rtx_REG (DImode, regno + 6);
4704 ops[2] = gen_rtx_REG (DImode, regno + 10);
4705 ops[3] = operands[1];
4706 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4709 [(set_attr "type" "neon_load3_3reg<q>")]
4712 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4713 ;; here on big endian targets.
4714 (define_insn "neon_vld3_lane<mode>"
4715 [(set (match_operand:EI 0 "s_register_operand" "=w")
4716 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4717 (match_operand:EI 2 "s_register_operand" "0")
4718 (match_operand:SI 3 "immediate_operand" "i")
4719 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4723 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4724 int regno = REGNO (operands[0]);
4726 ops[0] = gen_rtx_REG (DImode, regno);
4727 ops[1] = gen_rtx_REG (DImode, regno + 2);
4728 ops[2] = gen_rtx_REG (DImode, regno + 4);
4729 ops[3] = operands[1];
4730 ops[4] = GEN_INT (lane);
4731 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4735 [(set_attr "type" "neon_load3_one_lane<q>")]
4738 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4739 ;; here on big endian targets.
4740 (define_insn "neon_vld3_lane<mode>"
4741 [(set (match_operand:CI 0 "s_register_operand" "=w")
4742 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4743 (match_operand:CI 2 "s_register_operand" "0")
4744 (match_operand:SI 3 "immediate_operand" "i")
4745 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4749 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4750 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4751 int regno = REGNO (operands[0]);
4753 if (lane >= max / 2)
4758 ops[0] = gen_rtx_REG (DImode, regno);
4759 ops[1] = gen_rtx_REG (DImode, regno + 4);
4760 ops[2] = gen_rtx_REG (DImode, regno + 8);
4761 ops[3] = operands[1];
4762 ops[4] = GEN_INT (lane);
4763 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4767 [(set_attr "type" "neon_load3_one_lane<q>")]
4770 (define_insn "neon_vld3_dup<mode>"
4771 [(set (match_operand:EI 0 "s_register_operand" "=w")
4772 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4773 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4777 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4779 int regno = REGNO (operands[0]);
4781 ops[0] = gen_rtx_REG (DImode, regno);
4782 ops[1] = gen_rtx_REG (DImode, regno + 2);
4783 ops[2] = gen_rtx_REG (DImode, regno + 4);
4784 ops[3] = operands[1];
4785 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4789 return "vld1.<V_sz_elem>\t%h0, %A1";
4792 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4793 (const_string "neon_load3_all_lanes<q>")
4794 (const_string "neon_load1_1reg<q>")))])
4796 (define_expand "vec_store_lanesei<mode>"
4797 [(set (match_operand:EI 0 "neon_struct_operand")
4798 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4799 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4803 (define_insn "neon_vst3<mode>"
4804 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4805 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4806 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810 if (<V_sz_elem> == 64)
4811 return "vst1.64\t%h1, %A0";
4813 return "vst3.<V_sz_elem>\t%h1, %A0";
4816 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4817 (const_string "neon_store1_3reg<q>")
4818 (const_string "neon_store3_one_lane<q>")))])
4820 (define_expand "vec_store_lanesci<mode>"
4821 [(match_operand:CI 0 "neon_struct_operand")
4822 (match_operand:CI 1 "s_register_operand")
4823 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4826 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4830 (define_expand "neon_vst3<mode>"
4831 [(match_operand:CI 0 "neon_struct_operand")
4832 (match_operand:CI 1 "s_register_operand")
4833 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4838 mem = adjust_address (operands[0], EImode, 0);
4839 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4840 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4841 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4845 (define_insn "neon_vst3qa<mode>"
4846 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4847 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4848 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4852 int regno = REGNO (operands[1]);
4854 ops[0] = operands[0];
4855 ops[1] = gen_rtx_REG (DImode, regno);
4856 ops[2] = gen_rtx_REG (DImode, regno + 4);
4857 ops[3] = gen_rtx_REG (DImode, regno + 8);
4858 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4861 [(set_attr "type" "neon_store3_3reg<q>")]
4864 (define_insn "neon_vst3qb<mode>"
4865 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4866 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4867 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4871 int regno = REGNO (operands[1]);
4873 ops[0] = operands[0];
4874 ops[1] = gen_rtx_REG (DImode, regno + 2);
4875 ops[2] = gen_rtx_REG (DImode, regno + 6);
4876 ops[3] = gen_rtx_REG (DImode, regno + 10);
4877 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4880 [(set_attr "type" "neon_store3_3reg<q>")]
4883 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4884 ;; here on big endian targets.
4885 (define_insn "neon_vst3_lane<mode>"
4886 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4887 (unspec:<V_three_elem>
4888 [(match_operand:EI 1 "s_register_operand" "w")
4889 (match_operand:SI 2 "immediate_operand" "i")
4890 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4894 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4895 int regno = REGNO (operands[1]);
4897 ops[0] = operands[0];
4898 ops[1] = gen_rtx_REG (DImode, regno);
4899 ops[2] = gen_rtx_REG (DImode, regno + 2);
4900 ops[3] = gen_rtx_REG (DImode, regno + 4);
4901 ops[4] = GEN_INT (lane);
4902 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4906 [(set_attr "type" "neon_store3_one_lane<q>")]
4909 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4910 ;; here on big endian targets.
4911 (define_insn "neon_vst3_lane<mode>"
4912 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4913 (unspec:<V_three_elem>
4914 [(match_operand:CI 1 "s_register_operand" "w")
4915 (match_operand:SI 2 "immediate_operand" "i")
4916 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4920 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4921 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4922 int regno = REGNO (operands[1]);
4924 if (lane >= max / 2)
4929 ops[0] = operands[0];
4930 ops[1] = gen_rtx_REG (DImode, regno);
4931 ops[2] = gen_rtx_REG (DImode, regno + 4);
4932 ops[3] = gen_rtx_REG (DImode, regno + 8);
4933 ops[4] = GEN_INT (lane);
4934 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4938 [(set_attr "type" "neon_store3_one_lane<q>")]
4941 (define_expand "vec_load_lanesoi<mode>"
4942 [(set (match_operand:OI 0 "s_register_operand")
4943 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4944 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948 (define_insn "neon_vld4<mode>"
4949 [(set (match_operand:OI 0 "s_register_operand" "=w")
4950 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4951 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4955 if (<V_sz_elem> == 64)
4956 return "vld1.64\t%h0, %A1";
4958 return "vld4.<V_sz_elem>\t%h0, %A1";
4961 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4962 (const_string "neon_load1_4reg<q>")
4963 (const_string "neon_load4_4reg<q>")))]
4966 (define_expand "vec_load_lanesxi<mode>"
4967 [(match_operand:XI 0 "s_register_operand")
4968 (match_operand:XI 1 "neon_struct_operand")
4969 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4972 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
4976 (define_expand "neon_vld4<mode>"
4977 [(match_operand:XI 0 "s_register_operand")
4978 (match_operand:XI 1 "neon_struct_operand")
4979 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4984 mem = adjust_address (operands[1], OImode, 0);
4985 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
4986 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
4987 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
4991 (define_insn "neon_vld4qa<mode>"
4992 [(set (match_operand:XI 0 "s_register_operand" "=w")
4993 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
4994 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4998 int regno = REGNO (operands[0]);
5000 ops[0] = gen_rtx_REG (DImode, regno);
5001 ops[1] = gen_rtx_REG (DImode, regno + 4);
5002 ops[2] = gen_rtx_REG (DImode, regno + 8);
5003 ops[3] = gen_rtx_REG (DImode, regno + 12);
5004 ops[4] = operands[1];
5005 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5008 [(set_attr "type" "neon_load4_4reg<q>")]
5011 (define_insn "neon_vld4qb<mode>"
5012 [(set (match_operand:XI 0 "s_register_operand" "=w")
5013 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5014 (match_operand:XI 2 "s_register_operand" "0")
5015 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5019 int regno = REGNO (operands[0]);
5021 ops[0] = gen_rtx_REG (DImode, regno + 2);
5022 ops[1] = gen_rtx_REG (DImode, regno + 6);
5023 ops[2] = gen_rtx_REG (DImode, regno + 10);
5024 ops[3] = gen_rtx_REG (DImode, regno + 14);
5025 ops[4] = operands[1];
5026 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5029 [(set_attr "type" "neon_load4_4reg<q>")]
5032 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5033 ;; here on big endian targets.
5034 (define_insn "neon_vld4_lane<mode>"
5035 [(set (match_operand:OI 0 "s_register_operand" "=w")
5036 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5037 (match_operand:OI 2 "s_register_operand" "0")
5038 (match_operand:SI 3 "immediate_operand" "i")
5039 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5043 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5044 int regno = REGNO (operands[0]);
5046 ops[0] = gen_rtx_REG (DImode, regno);
5047 ops[1] = gen_rtx_REG (DImode, regno + 2);
5048 ops[2] = gen_rtx_REG (DImode, regno + 4);
5049 ops[3] = gen_rtx_REG (DImode, regno + 6);
5050 ops[4] = operands[1];
5051 ops[5] = GEN_INT (lane);
5052 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5056 [(set_attr "type" "neon_load4_one_lane<q>")]
5059 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5060 ;; here on big endian targets.
5061 (define_insn "neon_vld4_lane<mode>"
5062 [(set (match_operand:XI 0 "s_register_operand" "=w")
5063 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5064 (match_operand:XI 2 "s_register_operand" "0")
5065 (match_operand:SI 3 "immediate_operand" "i")
5066 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5070 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5071 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5072 int regno = REGNO (operands[0]);
5074 if (lane >= max / 2)
5079 ops[0] = gen_rtx_REG (DImode, regno);
5080 ops[1] = gen_rtx_REG (DImode, regno + 4);
5081 ops[2] = gen_rtx_REG (DImode, regno + 8);
5082 ops[3] = gen_rtx_REG (DImode, regno + 12);
5083 ops[4] = operands[1];
5084 ops[5] = GEN_INT (lane);
5085 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5089 [(set_attr "type" "neon_load4_one_lane<q>")]
5092 (define_insn "neon_vld4_dup<mode>"
5093 [(set (match_operand:OI 0 "s_register_operand" "=w")
5094 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5095 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5099 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5101 int regno = REGNO (operands[0]);
5103 ops[0] = gen_rtx_REG (DImode, regno);
5104 ops[1] = gen_rtx_REG (DImode, regno + 2);
5105 ops[2] = gen_rtx_REG (DImode, regno + 4);
5106 ops[3] = gen_rtx_REG (DImode, regno + 6);
5107 ops[4] = operands[1];
5108 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5113 return "vld1.<V_sz_elem>\t%h0, %A1";
5116 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5117 (const_string "neon_load4_all_lanes<q>")
5118 (const_string "neon_load1_1reg<q>")))]
5121 (define_expand "vec_store_lanesoi<mode>"
5122 [(set (match_operand:OI 0 "neon_struct_operand")
5123 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5124 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5128 (define_insn "neon_vst4<mode>"
5129 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5130 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5131 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135 if (<V_sz_elem> == 64)
5136 return "vst1.64\t%h1, %A0";
5138 return "vst4.<V_sz_elem>\t%h1, %A0";
5141 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5142 (const_string "neon_store1_4reg<q>")
5143 (const_string "neon_store4_4reg<q>")))]
5146 (define_expand "vec_store_lanesxi<mode>"
5147 [(match_operand:XI 0 "neon_struct_operand")
5148 (match_operand:XI 1 "s_register_operand")
5149 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5152 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5156 (define_expand "neon_vst4<mode>"
5157 [(match_operand:XI 0 "neon_struct_operand")
5158 (match_operand:XI 1 "s_register_operand")
5159 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5164 mem = adjust_address (operands[0], OImode, 0);
5165 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5166 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5167 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5171 (define_insn "neon_vst4qa<mode>"
5172 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5173 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5174 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5178 int regno = REGNO (operands[1]);
5180 ops[0] = operands[0];
5181 ops[1] = gen_rtx_REG (DImode, regno);
5182 ops[2] = gen_rtx_REG (DImode, regno + 4);
5183 ops[3] = gen_rtx_REG (DImode, regno + 8);
5184 ops[4] = gen_rtx_REG (DImode, regno + 12);
5185 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5188 [(set_attr "type" "neon_store4_4reg<q>")]
5191 (define_insn "neon_vst4qb<mode>"
5192 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5193 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5194 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198 int regno = REGNO (operands[1]);
5200 ops[0] = operands[0];
5201 ops[1] = gen_rtx_REG (DImode, regno + 2);
5202 ops[2] = gen_rtx_REG (DImode, regno + 6);
5203 ops[3] = gen_rtx_REG (DImode, regno + 10);
5204 ops[4] = gen_rtx_REG (DImode, regno + 14);
5205 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5208 [(set_attr "type" "neon_store4_4reg<q>")]
5211 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5212 ;; here on big endian targets.
5213 (define_insn "neon_vst4_lane<mode>"
5214 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5215 (unspec:<V_four_elem>
5216 [(match_operand:OI 1 "s_register_operand" "w")
5217 (match_operand:SI 2 "immediate_operand" "i")
5218 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5222 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5223 int regno = REGNO (operands[1]);
5225 ops[0] = operands[0];
5226 ops[1] = gen_rtx_REG (DImode, regno);
5227 ops[2] = gen_rtx_REG (DImode, regno + 2);
5228 ops[3] = gen_rtx_REG (DImode, regno + 4);
5229 ops[4] = gen_rtx_REG (DImode, regno + 6);
5230 ops[5] = GEN_INT (lane);
5231 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5235 [(set_attr "type" "neon_store4_one_lane<q>")]
5238 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5239 ;; here on big endian targets.
5240 (define_insn "neon_vst4_lane<mode>"
5241 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5242 (unspec:<V_four_elem>
5243 [(match_operand:XI 1 "s_register_operand" "w")
5244 (match_operand:SI 2 "immediate_operand" "i")
5245 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5249 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5250 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5251 int regno = REGNO (operands[1]);
5253 if (lane >= max / 2)
5258 ops[0] = operands[0];
5259 ops[1] = gen_rtx_REG (DImode, regno);
5260 ops[2] = gen_rtx_REG (DImode, regno + 4);
5261 ops[3] = gen_rtx_REG (DImode, regno + 8);
5262 ops[4] = gen_rtx_REG (DImode, regno + 12);
5263 ops[5] = GEN_INT (lane);
5264 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5268 [(set_attr "type" "neon_store4_4reg<q>")]
5271 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5272 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5273 (SE:<V_unpack> (vec_select:<V_HALF>
5274 (match_operand:VU 1 "register_operand" "w")
5275 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5276 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5277 "vmovl.<US><V_sz_elem> %q0, %e1"
5278 [(set_attr "type" "neon_shift_imm_long")]
5281 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5282 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5283 (SE:<V_unpack> (vec_select:<V_HALF>
5284 (match_operand:VU 1 "register_operand" "w")
5285 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5286 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5287 "vmovl.<US><V_sz_elem> %q0, %f1"
5288 [(set_attr "type" "neon_shift_imm_long")]
5291 (define_expand "vec_unpack<US>_hi_<mode>"
5292 [(match_operand:<V_unpack> 0 "register_operand" "")
5293 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5294 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5296 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5299 for (i = 0; i < (<V_mode_nunits>/2); i++)
5300 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5302 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5303 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5310 (define_expand "vec_unpack<US>_lo_<mode>"
5311 [(match_operand:<V_unpack> 0 "register_operand" "")
5312 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5313 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5315 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5318 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5319 RTVEC_ELT (v, i) = GEN_INT (i);
5320 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5321 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5328 (define_insn "neon_vec_<US>mult_lo_<mode>"
5329 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5330 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5331 (match_operand:VU 1 "register_operand" "w")
5332 (match_operand:VU 2 "vect_par_constant_low" "")))
5333 (SE:<V_unpack> (vec_select:<V_HALF>
5334 (match_operand:VU 3 "register_operand" "w")
5336 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5337 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5338 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5341 (define_expand "vec_widen_<US>mult_lo_<mode>"
5342 [(match_operand:<V_unpack> 0 "register_operand" "")
5343 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5344 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5345 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5347 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5350 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5351 RTVEC_ELT (v, i) = GEN_INT (i);
5352 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5354 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5362 (define_insn "neon_vec_<US>mult_hi_<mode>"
5363 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5364 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5365 (match_operand:VU 1 "register_operand" "w")
5366 (match_operand:VU 2 "vect_par_constant_high" "")))
5367 (SE:<V_unpack> (vec_select:<V_HALF>
5368 (match_operand:VU 3 "register_operand" "w")
5370 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5371 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5372 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5375 (define_expand "vec_widen_<US>mult_hi_<mode>"
5376 [(match_operand:<V_unpack> 0 "register_operand" "")
5377 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5378 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5379 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5381 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5384 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5385 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5386 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5388 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5397 (define_insn "neon_vec_<US>shiftl_<mode>"
5398 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5399 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5400 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5403 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5405 [(set_attr "type" "neon_shift_imm_long")]
5408 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5409 [(match_operand:<V_unpack> 0 "register_operand" "")
5410 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5411 (match_operand:SI 2 "immediate_operand" "i")]
5412 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5414 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5415 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5421 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5422 [(match_operand:<V_unpack> 0 "register_operand" "")
5423 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5424 (match_operand:SI 2 "immediate_operand" "i")]
5425 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5427 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5428 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5429 GET_MODE_SIZE (<V_HALF>mode)),
5435 ;; Vectorize for non-neon-quad case
5436 (define_insn "neon_unpack<US>_<mode>"
5437 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5438 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5440 "vmovl.<US><V_sz_elem> %q0, %P1"
5441 [(set_attr "type" "neon_move")]
5444 (define_expand "vec_unpack<US>_lo_<mode>"
5445 [(match_operand:<V_double_width> 0 "register_operand" "")
5446 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5449 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5450 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5451 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5457 (define_expand "vec_unpack<US>_hi_<mode>"
5458 [(match_operand:<V_double_width> 0 "register_operand" "")
5459 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5462 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5463 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5464 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5470 (define_insn "neon_vec_<US>mult_<mode>"
5471 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5472 (mult:<V_widen> (SE:<V_widen>
5473 (match_operand:VDI 1 "register_operand" "w"))
5475 (match_operand:VDI 2 "register_operand" "w"))))]
5477 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5478 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5481 (define_expand "vec_widen_<US>mult_hi_<mode>"
5482 [(match_operand:<V_double_width> 0 "register_operand" "")
5483 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5484 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5487 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5488 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5489 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5496 (define_expand "vec_widen_<US>mult_lo_<mode>"
5497 [(match_operand:<V_double_width> 0 "register_operand" "")
5498 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5499 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5502 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5503 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5504 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5511 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5512 [(match_operand:<V_double_width> 0 "register_operand" "")
5513 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5514 (match_operand:SI 2 "immediate_operand" "i")]
5517 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5518 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5519 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5525 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5526 [(match_operand:<V_double_width> 0 "register_operand" "")
5527 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5528 (match_operand:SI 2 "immediate_operand" "i")]
5531 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5532 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5533 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5539 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5540 ; because the ordering of vector elements in Q registers is different from what
5541 ; the semantics of the instructions require.
5543 (define_insn "vec_pack_trunc_<mode>"
5544 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5545 (vec_concat:<V_narrow_pack>
5546 (truncate:<V_narrow>
5547 (match_operand:VN 1 "register_operand" "w"))
5548 (truncate:<V_narrow>
5549 (match_operand:VN 2 "register_operand" "w"))))]
5550 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5551 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5552 [(set_attr "type" "multiple")
5553 (set_attr "length" "8")]
5556 ;; For the non-quad case.
5557 (define_insn "neon_vec_pack_trunc_<mode>"
5558 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5559 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5560 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5561 "vmovn.i<V_sz_elem>\t%P0, %q1"
5562 [(set_attr "type" "neon_move_narrow_q")]
5565 (define_expand "vec_pack_trunc_<mode>"
5566 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5567 (match_operand:VSHFT 1 "register_operand" "")
5568 (match_operand:VSHFT 2 "register_operand")]
5569 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5571 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5573 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5574 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5575 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5579 (define_insn "neon_vabd<mode>_2"
5580 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5581 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5582 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5583 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5584 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5586 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5587 (const_string "neon_fp_abd_s<q>")
5588 (const_string "neon_abd<q>")))]
5591 (define_insn "neon_vabd<mode>_3"
5592 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5593 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5594 (match_operand:VDQ 2 "s_register_operand" "w")]
5596 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5597 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5599 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5600 (const_string "neon_fp_abd_s<q>")
5601 (const_string "neon_abd<q>")))]
5604 ;; Copy from core-to-neon regs, then extend, not vice-versa
5607 [(set (match_operand:DI 0 "s_register_operand" "")
5608 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5609 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5610 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5611 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5613 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5617 [(set (match_operand:DI 0 "s_register_operand" "")
5618 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5619 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5620 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5621 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5623 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5627 [(set (match_operand:DI 0 "s_register_operand" "")
5628 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5629 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5630 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5631 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5633 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5637 [(set (match_operand:DI 0 "s_register_operand" "")
5638 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5639 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5640 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5641 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5643 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5647 [(set (match_operand:DI 0 "s_register_operand" "")
5648 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5649 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5650 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5651 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5653 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5657 [(set (match_operand:DI 0 "s_register_operand" "")
5658 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5659 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5660 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5661 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5663 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));