re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right...
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
144 {
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ())
149 {
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 }
168 })
169
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
173 "TARGET_NEON
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
176 {
177 switch (which_alternative)
178 {
179 case 0: return "#";
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
182 }
183 }
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187 (define_split
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
193 {
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
196 rtx dest[2], src[2];
197
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203 neon_disambiguate_copy (operands, dest, src, 2);
204 })
205
206 (define_split
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
212 {
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
215 rtx dest[2], src[2];
216
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222 neon_disambiguate_copy (operands, dest, src, 2);
223 })
224
225 (define_split
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
232 {
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
235 rtx dest[3], src[3];
236
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244 neon_disambiguate_copy (operands, dest, src, 3);
245 })
246
247 (define_split
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
255 {
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
258 rtx dest[4], src[4];
259
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269 neon_disambiguate_copy (operands, dest, src, 4);
270 })
271
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277 {
278 rtx adjust_mem;
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
285
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
288 else
289 adjust_mem = operands[0];
290
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295 })
296
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308 " Um")]
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
313
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
321
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325 " Um")]
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
330
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333 (vec_merge:VD_LANE
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
338 "TARGET_NEON"
339 {
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
344
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347 else
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349 }
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354 (vec_merge:VQ2
355 (vec_duplicate:VQ2
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
359 "TARGET_NEON"
360 {
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
366
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
369
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
372
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375 else
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377 }
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379 )
380
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383 (vec_merge:V2DI
384 (vec_duplicate:V2DI
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
388 "TARGET_NEON"
389 {
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
392
393 operands[0] = gen_rtx_REG (DImode, regno);
394
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
397 else
398 return "vmov\t%P0, %Q1, %R1";
399 }
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401 )
402
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
407 "TARGET_NEON"
408 {
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
412 DONE;
413 })
414
415 (define_insn "vec_extract<mode><V_elem_l>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (vec_select:<V_elem>
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420 "TARGET_NEON"
421 {
422 if (BYTES_BIG_ENDIAN)
423 {
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
427 }
428
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 else
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 }
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435 )
436
437 (define_insn "vec_extract<mode><V_elem_l>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439 (vec_select:<V_elem>
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442 "TARGET_NEON"
443 {
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
448
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
451
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
454
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457 else
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459 }
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461 )
462
463 (define_insn "vec_extractv2didi"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465 (vec_select:DI
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468 "TARGET_NEON"
469 {
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472 operands[1] = gen_rtx_REG (DImode, regno);
473
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
476 else
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
478 }
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480 )
481
482 (define_expand "vec_init<mode><V_elem_l>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
485 "TARGET_NEON"
486 {
487 neon_expand_vector_init (operands[0], operands[1]);
488 DONE;
489 })
490
491 ;; Doubleword and quadword arithmetic.
492
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
495
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502 [(set (attr "type")
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
506 )
507
508 ;; As with SFmode, full support for HFmode vector arithmetic is only available
509 ;; when flag-unsafe-math-optimizations is enabled.
510
511 (define_insn "add<mode>3"
512 [(set
513 (match_operand:VH 0 "s_register_operand" "=w")
514 (plus:VH
515 (match_operand:VH 1 "s_register_operand" "w")
516 (match_operand:VH 2 "s_register_operand" "w")))]
517 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
518 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
519 [(set (attr "type")
520 (if_then_else (match_test "<Is_float_mode>")
521 (const_string "neon_fp_addsub_s<q>")
522 (const_string "neon_add<q>")))]
523 )
524
525 (define_insn "add<mode>3_fp16"
526 [(set
527 (match_operand:VH 0 "s_register_operand" "=w")
528 (plus:VH
529 (match_operand:VH 1 "s_register_operand" "w")
530 (match_operand:VH 2 "s_register_operand" "w")))]
531 "TARGET_NEON_FP16INST"
532 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
533 [(set (attr "type")
534 (if_then_else (match_test "<Is_float_mode>")
535 (const_string "neon_fp_addsub_s<q>")
536 (const_string "neon_add<q>")))]
537 )
538
539 (define_insn "adddi3_neon"
540 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
541 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
542 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
543 (clobber (reg:CC CC_REGNUM))]
544 "TARGET_NEON"
545 {
546 switch (which_alternative)
547 {
548 case 0: /* fall through */
549 case 3: return "vadd.i64\t%P0, %P1, %P2";
550 case 1: return "#";
551 case 2: return "#";
552 case 4: return "#";
553 case 5: return "#";
554 case 6: return "#";
555 default: gcc_unreachable ();
556 }
557 }
558 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
559 multiple,multiple,multiple")
560 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
561 (set_attr "length" "*,8,8,*,8,8,8")
562 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
563 )
564
565 (define_insn "*sub<mode>3_neon"
566 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
567 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
568 (match_operand:VDQ 2 "s_register_operand" "w")))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_addsub_s<q>")
574 (const_string "neon_sub<q>")))]
575 )
576
577 (define_insn "sub<mode>3"
578 [(set
579 (match_operand:VH 0 "s_register_operand" "=w")
580 (minus:VH
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
586 )
587
588 (define_insn "sub<mode>3_fp16"
589 [(set
590 (match_operand:VH 0 "s_register_operand" "=w")
591 (minus:VH
592 (match_operand:VH 1 "s_register_operand" "w")
593 (match_operand:VH 2 "s_register_operand" "w")))]
594 "TARGET_NEON_FP16INST"
595 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
596 [(set_attr "type" "neon_sub<q>")]
597 )
598
599 (define_insn "subdi3_neon"
600 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
601 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
602 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
603 (clobber (reg:CC CC_REGNUM))]
604 "TARGET_NEON"
605 {
606 switch (which_alternative)
607 {
608 case 0: /* fall through */
609 case 4: return "vsub.i64\t%P0, %P1, %P2";
610 case 1: /* fall through */
611 case 2: /* fall through */
612 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
613 default: gcc_unreachable ();
614 }
615 }
616 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
617 (set_attr "conds" "*,clob,clob,clob,*")
618 (set_attr "length" "*,8,8,8,*")
619 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
620 )
621
622 (define_insn "*mul<mode>3_neon"
623 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
624 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
625 (match_operand:VDQW 2 "s_register_operand" "w")))]
626 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
627 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
628 [(set (attr "type")
629 (if_then_else (match_test "<Is_float_mode>")
630 (const_string "neon_fp_mul_s<q>")
631 (const_string "neon_mul_<V_elem_ch><q>")))]
632 )
633
634 (define_insn "mul<mode>3add<mode>_neon"
635 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
636 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
637 (match_operand:VDQW 3 "s_register_operand" "w"))
638 (match_operand:VDQW 1 "s_register_operand" "0")))]
639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
640 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
641 [(set (attr "type")
642 (if_then_else (match_test "<Is_float_mode>")
643 (const_string "neon_fp_mla_s<q>")
644 (const_string "neon_mla_<V_elem_ch><q>")))]
645 )
646
647 (define_insn "mul<mode>3add<mode>_neon"
648 [(set (match_operand:VH 0 "s_register_operand" "=w")
649 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
650 (match_operand:VH 3 "s_register_operand" "w"))
651 (match_operand:VH 1 "s_register_operand" "0")))]
652 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
655 )
656
657 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
660 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
661 (match_operand:VDQW 3 "s_register_operand" "w"))))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
664 [(set (attr "type")
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
668 )
669
670 ;; Fused multiply-accumulate
671 ;; We define each insn twice here:
672 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
673 ;; to be able to use when converting to FMA.
674 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
675 (define_insn "fma<VCVTF:mode>4"
676 [(set (match_operand:VCVTF 0 "register_operand" "=w")
677 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678 (match_operand:VCVTF 2 "register_operand" "w")
679 (match_operand:VCVTF 3 "register_operand" "0")))]
680 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
681 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682 [(set_attr "type" "neon_fp_mla_s<q>")]
683 )
684
685 (define_insn "fma<VCVTF:mode>4_intrinsic"
686 [(set (match_operand:VCVTF 0 "register_operand" "=w")
687 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
688 (match_operand:VCVTF 2 "register_operand" "w")
689 (match_operand:VCVTF 3 "register_operand" "0")))]
690 "TARGET_NEON && TARGET_FMA"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
693 )
694
695 (define_insn "fma<VH:mode>4"
696 [(set (match_operand:VH 0 "register_operand" "=w")
697 (fma:VH
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
704 )
705
706 (define_insn "fma<VH:mode>4_intrinsic"
707 [(set (match_operand:VH 0 "register_operand" "=w")
708 (fma:VH
709 (match_operand:VH 1 "register_operand" "w")
710 (match_operand:VH 2 "register_operand" "w")
711 (match_operand:VH 3 "register_operand" "0")))]
712 "TARGET_NEON_FP16INST"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
715 )
716
717 (define_insn "*fmsub<VCVTF:mode>4"
718 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
725 )
726
727 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
728 [(set (match_operand:VCVTF 0 "register_operand" "=w")
729 (fma:VCVTF
730 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
731 (match_operand:VCVTF 2 "register_operand" "w")
732 (match_operand:VCVTF 3 "register_operand" "0")))]
733 "TARGET_NEON && TARGET_FMA"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
736 )
737
738 (define_insn "fmsub<VH:mode>4_intrinsic"
739 [(set (match_operand:VH 0 "register_operand" "=w")
740 (fma:VH
741 (neg:VH (match_operand:VH 1 "register_operand" "w"))
742 (match_operand:VH 2 "register_operand" "w")
743 (match_operand:VH 3 "register_operand" "0")))]
744 "TARGET_NEON_FP16INST"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
747 )
748
749 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
750 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
751 (unspec:VCVTF [(match_operand:VCVTF 1
752 "s_register_operand" "w")]
753 NEON_VRINT))]
754 "TARGET_NEON && TARGET_VFP5"
755 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
756 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
757 )
758
759 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
760 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
761 (FIXUORS:<V_cmp_result> (unspec:VCVTF
762 [(match_operand:VCVTF 1 "register_operand" "w")]
763 NEON_VCVT)))]
764 "TARGET_NEON && TARGET_VFP5"
765 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
766 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
767 (set_attr "predicable" "no")]
768 )
769
770 (define_insn "ior<mode>3"
771 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
772 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
773 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
774 "TARGET_NEON"
775 {
776 switch (which_alternative)
777 {
778 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
779 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
780 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
781 default: gcc_unreachable ();
782 }
783 }
784 [(set_attr "type" "neon_logic<q>")]
785 )
786
787 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
788 ;; vorr. We support the pseudo-instruction vand instead, because that
789 ;; corresponds to the canonical form the middle-end expects to use for
790 ;; immediate bitwise-ANDs.
791
792 (define_insn "and<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
796 "TARGET_NEON"
797 {
798 switch (which_alternative)
799 {
800 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vand", &operands[2],
802 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
804 }
805 }
806 [(set_attr "type" "neon_logic<q>")]
807 )
808
809 (define_insn "orn<mode>3_neon"
810 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
811 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
812 (match_operand:VDQ 1 "s_register_operand" "w")))]
813 "TARGET_NEON"
814 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
815 [(set_attr "type" "neon_logic<q>")]
816 )
817
818 ;; TODO: investigate whether we should disable
819 ;; this and bicdi3_neon for the A8 in line with the other
820 ;; changes above.
821 (define_insn_and_split "orndi3_neon"
822 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
823 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
824 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
825 "TARGET_NEON"
826 "@
827 vorn\t%P0, %P1, %P2
828 #
829 #
830 #"
831 "reload_completed &&
832 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
833 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
834 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
835 "
836 {
837 if (TARGET_THUMB2)
838 {
839 operands[3] = gen_highpart (SImode, operands[0]);
840 operands[0] = gen_lowpart (SImode, operands[0]);
841 operands[4] = gen_highpart (SImode, operands[2]);
842 operands[2] = gen_lowpart (SImode, operands[2]);
843 operands[5] = gen_highpart (SImode, operands[1]);
844 operands[1] = gen_lowpart (SImode, operands[1]);
845 }
846 else
847 {
848 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
849 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
850 DONE;
851 }
852 }"
853 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
854 (set_attr "length" "*,16,8,8")
855 (set_attr "arch" "any,a,t2,t2")]
856 )
857
858 (define_insn "bic<mode>3_neon"
859 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
860 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
861 (match_operand:VDQ 1 "s_register_operand" "w")))]
862 "TARGET_NEON"
863 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864 [(set_attr "type" "neon_logic<q>")]
865 )
866
867 ;; Compare to *anddi_notdi_di.
868 (define_insn "bicdi3_neon"
869 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
870 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
871 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
872 "TARGET_NEON"
873 "@
874 vbic\t%P0, %P1, %P2
875 #
876 #"
877 [(set_attr "type" "neon_logic,multiple,multiple")
878 (set_attr "length" "*,8,8")]
879 )
880
881 (define_insn "xor<mode>3"
882 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
884 (match_operand:VDQ 2 "s_register_operand" "w")))]
885 "TARGET_NEON"
886 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set_attr "type" "neon_logic<q>")]
888 )
889
890 (define_insn "one_cmpl<mode>2"
891 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
892 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
893 "TARGET_NEON"
894 "vmvn\t%<V_reg>0, %<V_reg>1"
895 [(set_attr "type" "neon_move<q>")]
896 )
897
898 (define_insn "abs<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
901 "TARGET_NEON"
902 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
903 [(set (attr "type")
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_abs_s<q>")
906 (const_string "neon_abs<q>")))]
907 )
908
909 (define_insn "neg<mode>2"
910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
911 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
912 "TARGET_NEON"
913 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
914 [(set (attr "type")
915 (if_then_else (match_test "<Is_float_mode>")
916 (const_string "neon_fp_neg_s<q>")
917 (const_string "neon_neg<q>")))]
918 )
919
920 (define_insn "negdi2_neon"
921 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
922 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
923 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
924 (clobber (reg:CC CC_REGNUM))]
925 "TARGET_NEON"
926 "#"
927 [(set_attr "length" "8")
928 (set_attr "type" "multiple")]
929 )
930
931 ; Split negdi2_neon for vfp registers
932 (define_split
933 [(set (match_operand:DI 0 "s_register_operand" "")
934 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
935 (clobber (match_scratch:DI 2 ""))
936 (clobber (reg:CC CC_REGNUM))]
937 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
938 [(set (match_dup 2) (const_int 0))
939 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
940 (clobber (reg:CC CC_REGNUM))])]
941 {
942 if (!REG_P (operands[2]))
943 operands[2] = operands[0];
944 }
945 )
946
947 ; Split negdi2_neon for core registers
948 (define_split
949 [(set (match_operand:DI 0 "s_register_operand" "")
950 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
951 (clobber (match_scratch:DI 2 ""))
952 (clobber (reg:CC CC_REGNUM))]
953 "TARGET_32BIT && reload_completed
954 && arm_general_register_operand (operands[0], DImode)"
955 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
956 (clobber (reg:CC CC_REGNUM))])]
957 ""
958 )
959
960 (define_insn "<absneg_str><mode>2"
961 [(set (match_operand:VH 0 "s_register_operand" "=w")
962 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
963 "TARGET_NEON_FP16INST"
964 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
965 [(set_attr "type" "neon_abs<q>")]
966 )
967
968 (define_expand "neon_v<absneg_str><mode>"
969 [(set
970 (match_operand:VH 0 "s_register_operand")
971 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
972 "TARGET_NEON_FP16INST"
973 {
974 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
975 DONE;
976 })
977
978 (define_insn "neon_v<fp16_rnd_str><mode>"
979 [(set (match_operand:VH 0 "s_register_operand" "=w")
980 (unspec:VH
981 [(match_operand:VH 1 "s_register_operand" "w")]
982 FP16_RND))]
983 "TARGET_NEON_FP16INST"
984 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
985 [(set_attr "type" "neon_fp_round_s<q>")]
986 )
987
988 (define_insn "neon_vrsqrte<mode>"
989 [(set (match_operand:VH 0 "s_register_operand" "=w")
990 (unspec:VH
991 [(match_operand:VH 1 "s_register_operand" "w")]
992 UNSPEC_VRSQRTE))]
993 "TARGET_NEON_FP16INST"
994 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
995 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
996 )
997
998 (define_insn "*umin<mode>3_neon"
999 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1000 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1001 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1002 "TARGET_NEON"
1003 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004 [(set_attr "type" "neon_minmax<q>")]
1005 )
1006
1007 (define_insn "*umax<mode>3_neon"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1009 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1010 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1011 "TARGET_NEON"
1012 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1013 [(set_attr "type" "neon_minmax<q>")]
1014 )
1015
1016 (define_insn "*smin<mode>3_neon"
1017 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1018 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1019 (match_operand:VDQW 2 "s_register_operand" "w")))]
1020 "TARGET_NEON"
1021 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1022 [(set (attr "type")
1023 (if_then_else (match_test "<Is_float_mode>")
1024 (const_string "neon_fp_minmax_s<q>")
1025 (const_string "neon_minmax<q>")))]
1026 )
1027
1028 (define_insn "*smax<mode>3_neon"
1029 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1030 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1031 (match_operand:VDQW 2 "s_register_operand" "w")))]
1032 "TARGET_NEON"
1033 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1034 [(set (attr "type")
1035 (if_then_else (match_test "<Is_float_mode>")
1036 (const_string "neon_fp_minmax_s<q>")
1037 (const_string "neon_minmax<q>")))]
1038 )
1039
1040 ; TODO: V2DI shifts are current disabled because there are bugs in the
1041 ; generic vectorizer code. It ends up creating a V2DI constructor with
1042 ; SImode elements.
1043
1044 (define_insn "vashl<mode>3"
1045 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1046 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1047 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1048 "TARGET_NEON"
1049 {
1050 switch (which_alternative)
1051 {
1052 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1053 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1054 <MODE>mode,
1055 VALID_NEON_QREG_MODE (<MODE>mode),
1056 true);
1057 default: gcc_unreachable ();
1058 }
1059 }
1060 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1061 )
1062
1063 (define_insn "vashr<mode>3_imm"
1064 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1065 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1066 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1067 "TARGET_NEON"
1068 {
1069 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1070 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1071 false);
1072 }
1073 [(set_attr "type" "neon_shift_imm<q>")]
1074 )
1075
1076 (define_insn "vlshr<mode>3_imm"
1077 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1078 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1079 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1080 "TARGET_NEON"
1081 {
1082 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1083 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1084 false);
1085 }
1086 [(set_attr "type" "neon_shift_imm<q>")]
1087 )
1088
1089 ; Used for implementing logical shift-right, which is a left-shift by a negative
1090 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1091 ; above, but using an unspec in case GCC tries anything tricky with negative
1092 ; shift amounts.
1093
1094 (define_insn "ashl<mode>3_signed"
1095 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1096 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1097 (match_operand:VDQI 2 "s_register_operand" "w")]
1098 UNSPEC_ASHIFT_SIGNED))]
1099 "TARGET_NEON"
1100 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1101 [(set_attr "type" "neon_shift_reg<q>")]
1102 )
1103
1104 ; Used for implementing logical shift-right, which is a left-shift by a negative
1105 ; amount, with unsigned operands.
1106
1107 (define_insn "ashl<mode>3_unsigned"
1108 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1109 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1110 (match_operand:VDQI 2 "s_register_operand" "w")]
1111 UNSPEC_ASHIFT_UNSIGNED))]
1112 "TARGET_NEON"
1113 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1114 [(set_attr "type" "neon_shift_reg<q>")]
1115 )
1116
1117 (define_expand "vashr<mode>3"
1118 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1119 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1120 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1121 "TARGET_NEON"
1122 {
1123 if (s_register_operand (operands[2], <MODE>mode))
1124 {
1125 rtx neg = gen_reg_rtx (<MODE>mode);
1126 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1127 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1128 }
1129 else
1130 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1131 DONE;
1132 })
1133
1134 (define_expand "vlshr<mode>3"
1135 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1136 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1137 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1138 "TARGET_NEON"
1139 {
1140 if (s_register_operand (operands[2], <MODE>mode))
1141 {
1142 rtx neg = gen_reg_rtx (<MODE>mode);
1143 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1144 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1145 }
1146 else
1147 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1148 DONE;
1149 })
1150
1151 ;; 64-bit shifts
1152
1153 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1154 ;; leaving the upper half uninitalized. This is OK since the shift
1155 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1156 ;; data flow analysis however, we pretend the full register is set
1157 ;; using an unspec.
1158 (define_insn "neon_load_count"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1161 UNSPEC_LOAD_COUNT))]
1162 "TARGET_NEON"
1163 "@
1164 vld1.32\t{%P0[0]}, %A1
1165 vmov.32\t%P0[0], %1"
1166 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1167 )
1168
1169 (define_insn "ashldi3_neon_noclobber"
1170 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1171 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1172 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1173 "TARGET_NEON && reload_completed
1174 && (!CONST_INT_P (operands[2])
1175 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1176 "@
1177 vshl.u64\t%P0, %P1, %2
1178 vshl.u64\t%P0, %P1, %P2"
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1180 )
1181
1182 (define_insn_and_split "ashldi3_neon"
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1186 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1187 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1189 (clobber (reg:CC_C CC_REGNUM))]
1190 "TARGET_NEON"
1191 "#"
1192 "TARGET_NEON && reload_completed"
1193 [(const_int 0)]
1194 "
1195 {
1196 if (IS_VFP_REGNUM (REGNO (operands[0])))
1197 {
1198 if (CONST_INT_P (operands[2]))
1199 {
1200 if (INTVAL (operands[2]) < 1)
1201 {
1202 emit_insn (gen_movdi (operands[0], operands[1]));
1203 DONE;
1204 }
1205 else if (INTVAL (operands[2]) > 63)
1206 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1207 }
1208 else
1209 {
1210 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1211 operands[2] = operands[5];
1212 }
1213
1214 /* Ditch the unnecessary clobbers. */
1215 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1216 operands[2]));
1217 }
1218 else
1219 {
1220 /* The shift expanders support either full overlap or no overlap. */
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222 || REGNO (operands[0]) == REGNO (operands[1]));
1223
1224 if (operands[2] == CONST1_RTX (SImode))
1225 /* This clobbers CC. */
1226 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1227 else
1228 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1229 operands[2], operands[3], operands[4]);
1230 }
1231 DONE;
1232 }"
1233 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1234 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1235 (set_attr "type" "multiple")]
1236 )
1237
1238 ; The shift amount needs to be negated for right-shifts
1239 (define_insn "signed_shift_di3_neon"
1240 [(set (match_operand:DI 0 "s_register_operand" "=w")
1241 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1242 (match_operand:DI 2 "s_register_operand" " w")]
1243 UNSPEC_ASHIFT_SIGNED))]
1244 "TARGET_NEON && reload_completed"
1245 "vshl.s64\t%P0, %P1, %P2"
1246 [(set_attr "type" "neon_shift_reg")]
1247 )
1248
1249 ; The shift amount needs to be negated for right-shifts
1250 (define_insn "unsigned_shift_di3_neon"
1251 [(set (match_operand:DI 0 "s_register_operand" "=w")
1252 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1253 (match_operand:DI 2 "s_register_operand" " w")]
1254 UNSPEC_ASHIFT_UNSIGNED))]
1255 "TARGET_NEON && reload_completed"
1256 "vshl.u64\t%P0, %P1, %P2"
1257 [(set_attr "type" "neon_shift_reg")]
1258 )
1259
1260 (define_insn "ashrdi3_neon_imm_noclobber"
1261 [(set (match_operand:DI 0 "s_register_operand" "=w")
1262 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1263 (match_operand:DI 2 "const_int_operand" " i")))]
1264 "TARGET_NEON && reload_completed
1265 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1266 "vshr.s64\t%P0, %P1, %2"
1267 [(set_attr "type" "neon_shift_imm")]
1268 )
1269
1270 (define_insn "lshrdi3_neon_imm_noclobber"
1271 [(set (match_operand:DI 0 "s_register_operand" "=w")
1272 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1273 (match_operand:DI 2 "const_int_operand" " i")))]
1274 "TARGET_NEON && reload_completed
1275 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1276 "vshr.u64\t%P0, %P1, %2"
1277 [(set_attr "type" "neon_shift_imm")]
1278 )
1279
1280 ;; ashrdi3_neon
1281 ;; lshrdi3_neon
1282 (define_insn_and_split "<shift>di3_neon"
1283 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1284 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1285 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1286 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1287 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1288 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1289 (clobber (reg:CC CC_REGNUM))]
1290 "TARGET_NEON"
1291 "#"
1292 "TARGET_NEON && reload_completed"
1293 [(const_int 0)]
1294 "
1295 {
1296 if (IS_VFP_REGNUM (REGNO (operands[0])))
1297 {
1298 if (CONST_INT_P (operands[2]))
1299 {
1300 if (INTVAL (operands[2]) < 1)
1301 {
1302 emit_insn (gen_movdi (operands[0], operands[1]));
1303 DONE;
1304 }
1305 else if (INTVAL (operands[2]) > 64)
1306 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1307
1308 /* Ditch the unnecessary clobbers. */
1309 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1310 operands[1],
1311 operands[2]));
1312 }
1313 else
1314 {
1315 /* We must use a negative left-shift. */
1316 emit_insn (gen_negsi2 (operands[3], operands[2]));
1317 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1318 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1319 operands[5]));
1320 }
1321 }
1322 else
1323 {
1324 /* The shift expanders support either full overlap or no overlap. */
1325 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1326 || REGNO (operands[0]) == REGNO (operands[1]));
1327
1328 if (operands[2] == CONST1_RTX (SImode))
1329 /* This clobbers CC. */
1330 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1331 else
1332 /* This clobbers CC (ASHIFTRT by register only). */
1333 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1334 operands[2], operands[3], operands[4]);
1335 }
1336
1337 DONE;
1338 }"
1339 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1340 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1341 (set_attr "type" "multiple")]
1342 )
1343
1344 ;; Widening operations
1345
1346 (define_expand "widen_ssum<mode>3"
1347 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1348 (plus:<V_double_width>
1349 (sign_extend:<V_double_width>
1350 (match_operand:VQI 1 "s_register_operand" ""))
1351 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1352 "TARGET_NEON"
1353 {
1354 machine_mode mode = GET_MODE (operands[1]);
1355 rtx p1, p2;
1356
1357 p1 = arm_simd_vect_par_cnst_half (mode, false);
1358 p2 = arm_simd_vect_par_cnst_half (mode, true);
1359
1360 if (operands[0] != operands[2])
1361 emit_move_insn (operands[0], operands[2]);
1362
1363 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1364 operands[1],
1365 p1,
1366 operands[0]));
1367 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1368 operands[1],
1369 p2,
1370 operands[0]));
1371 DONE;
1372 }
1373 )
1374
1375 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1376 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1377 (plus:<V_double_width>
1378 (sign_extend:<V_double_width>
1379 (vec_select:<V_HALF>
1380 (match_operand:VQI 1 "s_register_operand" "%w")
1381 (match_operand:VQI 2 "vect_par_constant_low" "")))
1382 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1383 "TARGET_NEON"
1384 {
1385 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1386 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1387 }
1388 [(set_attr "type" "neon_add_widen")])
1389
1390 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1391 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1392 (plus:<V_double_width>
1393 (sign_extend:<V_double_width>
1394 (vec_select:<V_HALF>
1395 (match_operand:VQI 1 "s_register_operand" "%w")
1396 (match_operand:VQI 2 "vect_par_constant_high" "")))
1397 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1398 "TARGET_NEON"
1399 {
1400 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1401 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1402 }
1403 [(set_attr "type" "neon_add_widen")])
1404
1405 (define_insn "widen_ssum<mode>3"
1406 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1407 (plus:<V_widen>
1408 (sign_extend:<V_widen>
1409 (match_operand:VW 1 "s_register_operand" "%w"))
1410 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1411 "TARGET_NEON"
1412 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1413 [(set_attr "type" "neon_add_widen")]
1414 )
1415
1416 (define_expand "widen_usum<mode>3"
1417 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1418 (plus:<V_double_width>
1419 (zero_extend:<V_double_width>
1420 (match_operand:VQI 1 "s_register_operand" ""))
1421 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1422 "TARGET_NEON"
1423 {
1424 machine_mode mode = GET_MODE (operands[1]);
1425 rtx p1, p2;
1426
1427 p1 = arm_simd_vect_par_cnst_half (mode, false);
1428 p2 = arm_simd_vect_par_cnst_half (mode, true);
1429
1430 if (operands[0] != operands[2])
1431 emit_move_insn (operands[0], operands[2]);
1432
1433 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1434 operands[1],
1435 p1,
1436 operands[0]));
1437 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1438 operands[1],
1439 p2,
1440 operands[0]));
1441 DONE;
1442 }
1443 )
1444
1445 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1446 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1447 (plus:<V_double_width>
1448 (zero_extend:<V_double_width>
1449 (vec_select:<V_HALF>
1450 (match_operand:VQI 1 "s_register_operand" "%w")
1451 (match_operand:VQI 2 "vect_par_constant_low" "")))
1452 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1453 "TARGET_NEON"
1454 {
1455 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1456 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1457 }
1458 [(set_attr "type" "neon_add_widen")])
1459
1460 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1461 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1462 (plus:<V_double_width>
1463 (zero_extend:<V_double_width>
1464 (vec_select:<V_HALF>
1465 (match_operand:VQI 1 "s_register_operand" "%w")
1466 (match_operand:VQI 2 "vect_par_constant_high" "")))
1467 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1468 "TARGET_NEON"
1469 {
1470 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1471 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1472 }
1473 [(set_attr "type" "neon_add_widen")])
1474
1475 (define_insn "widen_usum<mode>3"
1476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1477 (plus:<V_widen> (zero_extend:<V_widen>
1478 (match_operand:VW 1 "s_register_operand" "%w"))
1479 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1480 "TARGET_NEON"
1481 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1482 [(set_attr "type" "neon_add_widen")]
1483 )
1484
1485 ;; Helpers for quad-word reduction operations
1486
1487 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1488 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1489 ; N/2-element vector.
1490
1491 (define_insn "quad_halves_<code>v4si"
1492 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1493 (VQH_OPS:V2SI
1494 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1495 (parallel [(const_int 0) (const_int 1)]))
1496 (vec_select:V2SI (match_dup 1)
1497 (parallel [(const_int 2) (const_int 3)]))))]
1498 "TARGET_NEON"
1499 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1500 [(set_attr "vqh_mnem" "<VQH_mnem>")
1501 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1502 )
1503
1504 (define_insn "quad_halves_<code>v4sf"
1505 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1506 (VQHS_OPS:V2SF
1507 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1508 (parallel [(const_int 0) (const_int 1)]))
1509 (vec_select:V2SF (match_dup 1)
1510 (parallel [(const_int 2) (const_int 3)]))))]
1511 "TARGET_NEON && flag_unsafe_math_optimizations"
1512 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1513 [(set_attr "vqh_mnem" "<VQH_mnem>")
1514 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1515 )
1516
1517 (define_insn "quad_halves_<code>v8hi"
1518 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1519 (VQH_OPS:V4HI
1520 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1521 (parallel [(const_int 0) (const_int 1)
1522 (const_int 2) (const_int 3)]))
1523 (vec_select:V4HI (match_dup 1)
1524 (parallel [(const_int 4) (const_int 5)
1525 (const_int 6) (const_int 7)]))))]
1526 "TARGET_NEON"
1527 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1528 [(set_attr "vqh_mnem" "<VQH_mnem>")
1529 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1530 )
1531
1532 (define_insn "quad_halves_<code>v16qi"
1533 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1534 (VQH_OPS:V8QI
1535 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1536 (parallel [(const_int 0) (const_int 1)
1537 (const_int 2) (const_int 3)
1538 (const_int 4) (const_int 5)
1539 (const_int 6) (const_int 7)]))
1540 (vec_select:V8QI (match_dup 1)
1541 (parallel [(const_int 8) (const_int 9)
1542 (const_int 10) (const_int 11)
1543 (const_int 12) (const_int 13)
1544 (const_int 14) (const_int 15)]))))]
1545 "TARGET_NEON"
1546 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1547 [(set_attr "vqh_mnem" "<VQH_mnem>")
1548 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1549 )
1550
1551 (define_expand "move_hi_quad_<mode>"
1552 [(match_operand:ANY128 0 "s_register_operand" "")
1553 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1554 "TARGET_NEON"
1555 {
1556 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1557 GET_MODE_SIZE (<V_HALF>mode)),
1558 operands[1]);
1559 DONE;
1560 })
1561
1562 (define_expand "move_lo_quad_<mode>"
1563 [(match_operand:ANY128 0 "s_register_operand" "")
1564 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1565 "TARGET_NEON"
1566 {
1567 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1568 <MODE>mode, 0),
1569 operands[1]);
1570 DONE;
1571 })
1572
1573 ;; Reduction operations
1574
1575 (define_expand "reduc_plus_scal_<mode>"
1576 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1577 (match_operand:VD 1 "s_register_operand" "")]
1578 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1579 {
1580 rtx vec = gen_reg_rtx (<MODE>mode);
1581 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1582 &gen_neon_vpadd_internal<mode>);
1583 /* The same result is actually computed into every element. */
1584 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1585 DONE;
1586 })
1587
1588 (define_expand "reduc_plus_scal_<mode>"
1589 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1590 (match_operand:VQ 1 "s_register_operand" "")]
1591 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1592 && !BYTES_BIG_ENDIAN"
1593 {
1594 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1595
1596 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1597 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1598
1599 DONE;
1600 })
1601
1602 (define_expand "reduc_plus_scal_v2di"
1603 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1604 (match_operand:V2DI 1 "s_register_operand" "")]
1605 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1606 {
1607 rtx vec = gen_reg_rtx (V2DImode);
1608
1609 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1610 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1611
1612 DONE;
1613 })
1614
1615 (define_insn "arm_reduc_plus_internal_v2di"
1616 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1617 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1618 UNSPEC_VPADD))]
1619 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1620 "vadd.i64\t%e0, %e1, %f1"
1621 [(set_attr "type" "neon_add_q")]
1622 )
1623
1624 (define_expand "reduc_smin_scal_<mode>"
1625 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1626 (match_operand:VD 1 "s_register_operand" "")]
1627 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1628 {
1629 rtx vec = gen_reg_rtx (<MODE>mode);
1630
1631 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1632 &gen_neon_vpsmin<mode>);
1633 /* The result is computed into every element of the vector. */
1634 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1635 DONE;
1636 })
1637
1638 (define_expand "reduc_smin_scal_<mode>"
1639 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1640 (match_operand:VQ 1 "s_register_operand" "")]
1641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1642 && !BYTES_BIG_ENDIAN"
1643 {
1644 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1645
1646 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1647 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1648
1649 DONE;
1650 })
1651
1652 (define_expand "reduc_smax_scal_<mode>"
1653 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1654 (match_operand:VD 1 "s_register_operand" "")]
1655 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1656 {
1657 rtx vec = gen_reg_rtx (<MODE>mode);
1658 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1659 &gen_neon_vpsmax<mode>);
1660 /* The result is computed into every element of the vector. */
1661 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1662 DONE;
1663 })
1664
1665 (define_expand "reduc_smax_scal_<mode>"
1666 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1667 (match_operand:VQ 1 "s_register_operand" "")]
1668 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1669 && !BYTES_BIG_ENDIAN"
1670 {
1671 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1672
1673 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1674 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1675
1676 DONE;
1677 })
1678
1679 (define_expand "reduc_umin_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VDI 1 "s_register_operand" "")]
1682 "TARGET_NEON"
1683 {
1684 rtx vec = gen_reg_rtx (<MODE>mode);
1685 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1686 &gen_neon_vpumin<mode>);
1687 /* The result is computed into every element of the vector. */
1688 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1689 DONE;
1690 })
1691
1692 (define_expand "reduc_umin_scal_<mode>"
1693 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1694 (match_operand:VQI 1 "s_register_operand" "")]
1695 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1696 {
1697 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1698
1699 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1700 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1701
1702 DONE;
1703 })
1704
1705 (define_expand "reduc_umax_scal_<mode>"
1706 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1707 (match_operand:VDI 1 "s_register_operand" "")]
1708 "TARGET_NEON"
1709 {
1710 rtx vec = gen_reg_rtx (<MODE>mode);
1711 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1712 &gen_neon_vpumax<mode>);
1713 /* The result is computed into every element of the vector. */
1714 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1715 DONE;
1716 })
1717
1718 (define_expand "reduc_umax_scal_<mode>"
1719 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1720 (match_operand:VQI 1 "s_register_operand" "")]
1721 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1722 {
1723 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1724
1725 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1726 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1727
1728 DONE;
1729 })
1730
1731 (define_insn "neon_vpadd_internal<mode>"
1732 [(set (match_operand:VD 0 "s_register_operand" "=w")
1733 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1734 (match_operand:VD 2 "s_register_operand" "w")]
1735 UNSPEC_VPADD))]
1736 "TARGET_NEON"
1737 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1738 ;; Assume this schedules like vadd.
1739 [(set (attr "type")
1740 (if_then_else (match_test "<Is_float_mode>")
1741 (const_string "neon_fp_reduc_add_s<q>")
1742 (const_string "neon_reduc_add<q>")))]
1743 )
1744
1745 (define_insn "neon_vpaddv4hf"
1746 [(set
1747 (match_operand:V4HF 0 "s_register_operand" "=w")
1748 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1749 (match_operand:V4HF 2 "s_register_operand" "w")]
1750 UNSPEC_VPADD))]
1751 "TARGET_NEON_FP16INST"
1752 "vpadd.f16\t%P0, %P1, %P2"
1753 [(set_attr "type" "neon_reduc_add")]
1754 )
1755
1756 (define_insn "neon_vpsmin<mode>"
1757 [(set (match_operand:VD 0 "s_register_operand" "=w")
1758 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1759 (match_operand:VD 2 "s_register_operand" "w")]
1760 UNSPEC_VPSMIN))]
1761 "TARGET_NEON"
1762 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1763 [(set (attr "type")
1764 (if_then_else (match_test "<Is_float_mode>")
1765 (const_string "neon_fp_reduc_minmax_s<q>")
1766 (const_string "neon_reduc_minmax<q>")))]
1767 )
1768
1769 (define_insn "neon_vpsmax<mode>"
1770 [(set (match_operand:VD 0 "s_register_operand" "=w")
1771 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1772 (match_operand:VD 2 "s_register_operand" "w")]
1773 UNSPEC_VPSMAX))]
1774 "TARGET_NEON"
1775 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1776 [(set (attr "type")
1777 (if_then_else (match_test "<Is_float_mode>")
1778 (const_string "neon_fp_reduc_minmax_s<q>")
1779 (const_string "neon_reduc_minmax<q>")))]
1780 )
1781
1782 (define_insn "neon_vpumin<mode>"
1783 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1784 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1785 (match_operand:VDI 2 "s_register_operand" "w")]
1786 UNSPEC_VPUMIN))]
1787 "TARGET_NEON"
1788 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1789 [(set_attr "type" "neon_reduc_minmax<q>")]
1790 )
1791
1792 (define_insn "neon_vpumax<mode>"
1793 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1794 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1795 (match_operand:VDI 2 "s_register_operand" "w")]
1796 UNSPEC_VPUMAX))]
1797 "TARGET_NEON"
1798 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1799 [(set_attr "type" "neon_reduc_minmax<q>")]
1800 )
1801
1802 ;; Saturating arithmetic
1803
1804 ; NOTE: Neon supports many more saturating variants of instructions than the
1805 ; following, but these are all GCC currently understands.
1806 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1807 ; yet either, although these patterns may be used by intrinsics when they're
1808 ; added.
1809
1810 (define_insn "*ss_add<mode>_neon"
1811 [(set (match_operand:VD 0 "s_register_operand" "=w")
1812 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1813 (match_operand:VD 2 "s_register_operand" "w")))]
1814 "TARGET_NEON"
1815 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1816 [(set_attr "type" "neon_qadd<q>")]
1817 )
1818
1819 (define_insn "*us_add<mode>_neon"
1820 [(set (match_operand:VD 0 "s_register_operand" "=w")
1821 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1822 (match_operand:VD 2 "s_register_operand" "w")))]
1823 "TARGET_NEON"
1824 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1825 [(set_attr "type" "neon_qadd<q>")]
1826 )
1827
1828 (define_insn "*ss_sub<mode>_neon"
1829 [(set (match_operand:VD 0 "s_register_operand" "=w")
1830 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1831 (match_operand:VD 2 "s_register_operand" "w")))]
1832 "TARGET_NEON"
1833 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1834 [(set_attr "type" "neon_qsub<q>")]
1835 )
1836
1837 (define_insn "*us_sub<mode>_neon"
1838 [(set (match_operand:VD 0 "s_register_operand" "=w")
1839 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1840 (match_operand:VD 2 "s_register_operand" "w")))]
1841 "TARGET_NEON"
1842 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1843 [(set_attr "type" "neon_qsub<q>")]
1844 )
1845
1846 ;; Conditional instructions. These are comparisons with conditional moves for
1847 ;; vectors. They perform the assignment:
1848 ;;
1849 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1850 ;;
1851 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1852 ;; element-wise.
1853
1854 (define_expand "vcond<mode><mode>"
1855 [(set (match_operand:VDQW 0 "s_register_operand" "")
1856 (if_then_else:VDQW
1857 (match_operator 3 "comparison_operator"
1858 [(match_operand:VDQW 4 "s_register_operand" "")
1859 (match_operand:VDQW 5 "nonmemory_operand" "")])
1860 (match_operand:VDQW 1 "s_register_operand" "")
1861 (match_operand:VDQW 2 "s_register_operand" "")))]
1862 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1863 {
1864 int inverse = 0;
1865 int use_zero_form = 0;
1866 int swap_bsl_operands = 0;
1867 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1868 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1869
1870 rtx (*base_comparison) (rtx, rtx, rtx);
1871 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1872
1873 switch (GET_CODE (operands[3]))
1874 {
1875 case GE:
1876 case GT:
1877 case LE:
1878 case LT:
1879 case EQ:
1880 if (operands[5] == CONST0_RTX (<MODE>mode))
1881 {
1882 use_zero_form = 1;
1883 break;
1884 }
1885 /* Fall through. */
1886 default:
1887 if (!REG_P (operands[5]))
1888 operands[5] = force_reg (<MODE>mode, operands[5]);
1889 }
1890
1891 switch (GET_CODE (operands[3]))
1892 {
1893 case LT:
1894 case UNLT:
1895 inverse = 1;
1896 /* Fall through. */
1897 case GE:
1898 case UNGE:
1899 case ORDERED:
1900 case UNORDERED:
1901 base_comparison = gen_neon_vcge<mode>;
1902 complimentary_comparison = gen_neon_vcgt<mode>;
1903 break;
1904 case LE:
1905 case UNLE:
1906 inverse = 1;
1907 /* Fall through. */
1908 case GT:
1909 case UNGT:
1910 base_comparison = gen_neon_vcgt<mode>;
1911 complimentary_comparison = gen_neon_vcge<mode>;
1912 break;
1913 case EQ:
1914 case NE:
1915 case UNEQ:
1916 base_comparison = gen_neon_vceq<mode>;
1917 complimentary_comparison = gen_neon_vceq<mode>;
1918 break;
1919 default:
1920 gcc_unreachable ();
1921 }
1922
1923 switch (GET_CODE (operands[3]))
1924 {
1925 case LT:
1926 case LE:
1927 case GT:
1928 case GE:
1929 case EQ:
1930 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1931 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1932 a GE b -> a GE b
1933 a GT b -> a GT b
1934 a LE b -> b GE a
1935 a LT b -> b GT a
1936 a EQ b -> a EQ b
1937 Note that there also exist direct comparison against 0 forms,
1938 so catch those as a special case. */
1939 if (use_zero_form)
1940 {
1941 inverse = 0;
1942 switch (GET_CODE (operands[3]))
1943 {
1944 case LT:
1945 base_comparison = gen_neon_vclt<mode>;
1946 break;
1947 case LE:
1948 base_comparison = gen_neon_vcle<mode>;
1949 break;
1950 default:
1951 /* Do nothing, other zero form cases already have the correct
1952 base_comparison. */
1953 break;
1954 }
1955 }
1956
1957 if (!inverse)
1958 emit_insn (base_comparison (mask, operands[4], operands[5]));
1959 else
1960 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1961 break;
1962 case UNLT:
1963 case UNLE:
1964 case UNGT:
1965 case UNGE:
1966 case NE:
1967 /* Vector compare returns false for lanes which are unordered, so if we use
1968 the inverse of the comparison we actually want to emit, then
1969 swap the operands to BSL, we will end up with the correct result.
1970 Note that a NE NaN and NaN NE b are true for all a, b.
1971
1972 Our transformations are:
1973 a GE b -> !(b GT a)
1974 a GT b -> !(b GE a)
1975 a LE b -> !(a GT b)
1976 a LT b -> !(a GE b)
1977 a NE b -> !(a EQ b) */
1978
1979 if (inverse)
1980 emit_insn (base_comparison (mask, operands[4], operands[5]));
1981 else
1982 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1983
1984 swap_bsl_operands = 1;
1985 break;
1986 case UNEQ:
1987 /* We check (a > b || b > a). combining these comparisons give us
1988 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1989 will then give us (a == b || a UNORDERED b) as intended. */
1990
1991 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1992 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1993 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1994 swap_bsl_operands = 1;
1995 break;
1996 case UNORDERED:
1997 /* Operands are ORDERED iff (a > b || b >= a).
1998 Swapping the operands to BSL will give the UNORDERED case. */
1999 swap_bsl_operands = 1;
2000 /* Fall through. */
2001 case ORDERED:
2002 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2003 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2004 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2005 break;
2006 default:
2007 gcc_unreachable ();
2008 }
2009
2010 if (swap_bsl_operands)
2011 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2012 operands[1]));
2013 else
2014 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2015 operands[2]));
2016 DONE;
2017 })
2018
2019 (define_expand "vcondu<mode><mode>"
2020 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2021 (if_then_else:VDQIW
2022 (match_operator 3 "arm_comparison_operator"
2023 [(match_operand:VDQIW 4 "s_register_operand" "")
2024 (match_operand:VDQIW 5 "s_register_operand" "")])
2025 (match_operand:VDQIW 1 "s_register_operand" "")
2026 (match_operand:VDQIW 2 "s_register_operand" "")))]
2027 "TARGET_NEON"
2028 {
2029 rtx mask;
2030 int inverse = 0, immediate_zero = 0;
2031
2032 mask = gen_reg_rtx (<V_cmp_result>mode);
2033
2034 if (operands[5] == CONST0_RTX (<MODE>mode))
2035 immediate_zero = 1;
2036 else if (!REG_P (operands[5]))
2037 operands[5] = force_reg (<MODE>mode, operands[5]);
2038
2039 switch (GET_CODE (operands[3]))
2040 {
2041 case GEU:
2042 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2043 break;
2044
2045 case GTU:
2046 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2047 break;
2048
2049 case EQ:
2050 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2051 break;
2052
2053 case LEU:
2054 if (immediate_zero)
2055 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2056 else
2057 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2058 break;
2059
2060 case LTU:
2061 if (immediate_zero)
2062 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2063 else
2064 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2065 break;
2066
2067 case NE:
2068 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2069 inverse = 1;
2070 break;
2071
2072 default:
2073 gcc_unreachable ();
2074 }
2075
2076 if (inverse)
2077 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2078 operands[1]));
2079 else
2080 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2081 operands[2]));
2082
2083 DONE;
2084 })
2085
2086 ;; Patterns for builtins.
2087
2088 ; good for plain vadd, vaddq.
2089
2090 (define_expand "neon_vadd<mode>"
2091 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2092 (match_operand:VCVTF 1 "s_register_operand" "w")
2093 (match_operand:VCVTF 2 "s_register_operand" "w")]
2094 "TARGET_NEON"
2095 {
2096 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2097 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2098 else
2099 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2100 operands[2]));
2101 DONE;
2102 })
2103
2104 (define_expand "neon_vadd<mode>"
2105 [(match_operand:VH 0 "s_register_operand")
2106 (match_operand:VH 1 "s_register_operand")
2107 (match_operand:VH 2 "s_register_operand")]
2108 "TARGET_NEON_FP16INST"
2109 {
2110 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2111 DONE;
2112 })
2113
2114 (define_expand "neon_vsub<mode>"
2115 [(match_operand:VH 0 "s_register_operand")
2116 (match_operand:VH 1 "s_register_operand")
2117 (match_operand:VH 2 "s_register_operand")]
2118 "TARGET_NEON_FP16INST"
2119 {
2120 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2121 DONE;
2122 })
2123
2124 ; Note that NEON operations don't support the full IEEE 754 standard: in
2125 ; particular, denormal values are flushed to zero. This means that GCC cannot
2126 ; use those instructions for autovectorization, etc. unless
2127 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2128 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2129 ; header) must work in either case: if -funsafe-math-optimizations is given,
2130 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2131 ; expand to unspecs (which may potentially limit the extent to which they might
2132 ; be optimized by generic code).
2133
2134 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2135
2136 (define_insn "neon_vadd<mode>_unspec"
2137 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2138 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2139 (match_operand:VCVTF 2 "s_register_operand" "w")]
2140 UNSPEC_VADD))]
2141 "TARGET_NEON"
2142 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2143 [(set (attr "type")
2144 (if_then_else (match_test "<Is_float_mode>")
2145 (const_string "neon_fp_addsub_s<q>")
2146 (const_string "neon_add<q>")))]
2147 )
2148
2149 (define_insn "neon_vaddl<sup><mode>"
2150 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2151 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2152 (match_operand:VDI 2 "s_register_operand" "w")]
2153 VADDL))]
2154 "TARGET_NEON"
2155 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2156 [(set_attr "type" "neon_add_long")]
2157 )
2158
2159 (define_insn "neon_vaddw<sup><mode>"
2160 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2161 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2162 (match_operand:VDI 2 "s_register_operand" "w")]
2163 VADDW))]
2164 "TARGET_NEON"
2165 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2166 [(set_attr "type" "neon_add_widen")]
2167 )
2168
2169 ; vhadd and vrhadd.
2170
2171 (define_insn "neon_v<r>hadd<sup><mode>"
2172 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2173 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2174 (match_operand:VDQIW 2 "s_register_operand" "w")]
2175 VHADD))]
2176 "TARGET_NEON"
2177 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2178 [(set_attr "type" "neon_add_halve_q")]
2179 )
2180
2181 (define_insn "neon_vqadd<sup><mode>"
2182 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2183 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2184 (match_operand:VDQIX 2 "s_register_operand" "w")]
2185 VQADD))]
2186 "TARGET_NEON"
2187 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2188 [(set_attr "type" "neon_qadd<q>")]
2189 )
2190
2191 (define_insn "neon_v<r>addhn<mode>"
2192 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2193 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2194 (match_operand:VN 2 "s_register_operand" "w")]
2195 VADDHN))]
2196 "TARGET_NEON"
2197 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2198 [(set_attr "type" "neon_add_halve_narrow_q")]
2199 )
2200
2201 ;; Polynomial and Float multiplication.
2202 (define_insn "neon_vmul<pf><mode>"
2203 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2204 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2205 (match_operand:VPF 2 "s_register_operand" "w")]
2206 UNSPEC_VMUL))]
2207 "TARGET_NEON"
2208 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2209 [(set (attr "type")
2210 (if_then_else (match_test "<Is_float_mode>")
2211 (const_string "neon_fp_mul_s<q>")
2212 (const_string "neon_mul_<V_elem_ch><q>")))]
2213 )
2214
2215 (define_insn "mul<mode>3"
2216 [(set
2217 (match_operand:VH 0 "s_register_operand" "=w")
2218 (mult:VH
2219 (match_operand:VH 1 "s_register_operand" "w")
2220 (match_operand:VH 2 "s_register_operand" "w")))]
2221 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2222 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2223 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2224 )
2225
2226 (define_insn "neon_vmulf<mode>"
2227 [(set
2228 (match_operand:VH 0 "s_register_operand" "=w")
2229 (mult:VH
2230 (match_operand:VH 1 "s_register_operand" "w")
2231 (match_operand:VH 2 "s_register_operand" "w")))]
2232 "TARGET_NEON_FP16INST"
2233 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2234 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2235 )
2236
2237 (define_expand "neon_vmla<mode>"
2238 [(match_operand:VDQW 0 "s_register_operand" "=w")
2239 (match_operand:VDQW 1 "s_register_operand" "0")
2240 (match_operand:VDQW 2 "s_register_operand" "w")
2241 (match_operand:VDQW 3 "s_register_operand" "w")]
2242 "TARGET_NEON"
2243 {
2244 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2245 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2246 operands[2], operands[3]));
2247 else
2248 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2249 operands[2], operands[3]));
2250 DONE;
2251 })
2252
2253 (define_expand "neon_vfma<VCVTF:mode>"
2254 [(match_operand:VCVTF 0 "s_register_operand")
2255 (match_operand:VCVTF 1 "s_register_operand")
2256 (match_operand:VCVTF 2 "s_register_operand")
2257 (match_operand:VCVTF 3 "s_register_operand")]
2258 "TARGET_NEON && TARGET_FMA"
2259 {
2260 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2261 operands[1]));
2262 DONE;
2263 })
2264
2265 (define_expand "neon_vfma<VH:mode>"
2266 [(match_operand:VH 0 "s_register_operand")
2267 (match_operand:VH 1 "s_register_operand")
2268 (match_operand:VH 2 "s_register_operand")
2269 (match_operand:VH 3 "s_register_operand")]
2270 "TARGET_NEON_FP16INST"
2271 {
2272 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2273 operands[1]));
2274 DONE;
2275 })
2276
2277 (define_expand "neon_vfms<VCVTF:mode>"
2278 [(match_operand:VCVTF 0 "s_register_operand")
2279 (match_operand:VCVTF 1 "s_register_operand")
2280 (match_operand:VCVTF 2 "s_register_operand")
2281 (match_operand:VCVTF 3 "s_register_operand")]
2282 "TARGET_NEON && TARGET_FMA"
2283 {
2284 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2285 operands[1]));
2286 DONE;
2287 })
2288
2289 (define_expand "neon_vfms<VH:mode>"
2290 [(match_operand:VH 0 "s_register_operand")
2291 (match_operand:VH 1 "s_register_operand")
2292 (match_operand:VH 2 "s_register_operand")
2293 (match_operand:VH 3 "s_register_operand")]
2294 "TARGET_NEON_FP16INST"
2295 {
2296 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2297 operands[1]));
2298 DONE;
2299 })
2300
2301 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2302
2303 (define_insn "neon_vmla<mode>_unspec"
2304 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2305 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2306 (match_operand:VDQW 2 "s_register_operand" "w")
2307 (match_operand:VDQW 3 "s_register_operand" "w")]
2308 UNSPEC_VMLA))]
2309 "TARGET_NEON"
2310 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2311 [(set (attr "type")
2312 (if_then_else (match_test "<Is_float_mode>")
2313 (const_string "neon_fp_mla_s<q>")
2314 (const_string "neon_mla_<V_elem_ch><q>")))]
2315 )
2316
2317 (define_insn "neon_vmlal<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2320 (match_operand:VW 2 "s_register_operand" "w")
2321 (match_operand:VW 3 "s_register_operand" "w")]
2322 VMLAL))]
2323 "TARGET_NEON"
2324 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2325 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2326 )
2327
2328 (define_expand "neon_vmls<mode>"
2329 [(match_operand:VDQW 0 "s_register_operand" "=w")
2330 (match_operand:VDQW 1 "s_register_operand" "0")
2331 (match_operand:VDQW 2 "s_register_operand" "w")
2332 (match_operand:VDQW 3 "s_register_operand" "w")]
2333 "TARGET_NEON"
2334 {
2335 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2336 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2337 operands[1], operands[2], operands[3]));
2338 else
2339 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2340 operands[2], operands[3]));
2341 DONE;
2342 })
2343
2344 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2345
2346 (define_insn "neon_vmls<mode>_unspec"
2347 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2348 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2349 (match_operand:VDQW 2 "s_register_operand" "w")
2350 (match_operand:VDQW 3 "s_register_operand" "w")]
2351 UNSPEC_VMLS))]
2352 "TARGET_NEON"
2353 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2354 [(set (attr "type")
2355 (if_then_else (match_test "<Is_float_mode>")
2356 (const_string "neon_fp_mla_s<q>")
2357 (const_string "neon_mla_<V_elem_ch><q>")))]
2358 )
2359
2360 (define_insn "neon_vmlsl<sup><mode>"
2361 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2362 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2363 (match_operand:VW 2 "s_register_operand" "w")
2364 (match_operand:VW 3 "s_register_operand" "w")]
2365 VMLSL))]
2366 "TARGET_NEON"
2367 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2368 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2369 )
2370
2371 ;; vqdmulh, vqrdmulh
2372 (define_insn "neon_vq<r>dmulh<mode>"
2373 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2374 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2375 (match_operand:VMDQI 2 "s_register_operand" "w")]
2376 VQDMULH))]
2377 "TARGET_NEON"
2378 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2380 )
2381
2382 ;; vqrdmlah, vqrdmlsh
2383 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2384 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2385 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2386 (match_operand:VMDQI 2 "s_register_operand" "w")
2387 (match_operand:VMDQI 3 "s_register_operand" "w")]
2388 VQRDMLH_AS))]
2389 "TARGET_NEON_RDMA"
2390 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2391 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2392 )
2393
2394 (define_insn "neon_vqdmlal<mode>"
2395 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2396 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2397 (match_operand:VMDI 2 "s_register_operand" "w")
2398 (match_operand:VMDI 3 "s_register_operand" "w")]
2399 UNSPEC_VQDMLAL))]
2400 "TARGET_NEON"
2401 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2402 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2403 )
2404
2405 (define_insn "neon_vqdmlsl<mode>"
2406 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2407 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2408 (match_operand:VMDI 2 "s_register_operand" "w")
2409 (match_operand:VMDI 3 "s_register_operand" "w")]
2410 UNSPEC_VQDMLSL))]
2411 "TARGET_NEON"
2412 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2413 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2414 )
2415
2416 (define_insn "neon_vmull<sup><mode>"
2417 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2418 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2419 (match_operand:VW 2 "s_register_operand" "w")]
2420 VMULL))]
2421 "TARGET_NEON"
2422 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2423 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2424 )
2425
2426 (define_insn "neon_vqdmull<mode>"
2427 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2428 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2429 (match_operand:VMDI 2 "s_register_operand" "w")]
2430 UNSPEC_VQDMULL))]
2431 "TARGET_NEON"
2432 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2433 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2434 )
2435
2436 (define_expand "neon_vsub<mode>"
2437 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2438 (match_operand:VCVTF 1 "s_register_operand" "w")
2439 (match_operand:VCVTF 2 "s_register_operand" "w")]
2440 "TARGET_NEON"
2441 {
2442 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2443 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2444 else
2445 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2446 operands[2]));
2447 DONE;
2448 })
2449
2450 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2451
2452 (define_insn "neon_vsub<mode>_unspec"
2453 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2454 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2455 (match_operand:VCVTF 2 "s_register_operand" "w")]
2456 UNSPEC_VSUB))]
2457 "TARGET_NEON"
2458 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2459 [(set (attr "type")
2460 (if_then_else (match_test "<Is_float_mode>")
2461 (const_string "neon_fp_addsub_s<q>")
2462 (const_string "neon_sub<q>")))]
2463 )
2464
2465 (define_insn "neon_vsubl<sup><mode>"
2466 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2467 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2468 (match_operand:VDI 2 "s_register_operand" "w")]
2469 VSUBL))]
2470 "TARGET_NEON"
2471 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2472 [(set_attr "type" "neon_sub_long")]
2473 )
2474
2475 (define_insn "neon_vsubw<sup><mode>"
2476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2477 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2478 (match_operand:VDI 2 "s_register_operand" "w")]
2479 VSUBW))]
2480 "TARGET_NEON"
2481 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2482 [(set_attr "type" "neon_sub_widen")]
2483 )
2484
2485 (define_insn "neon_vqsub<sup><mode>"
2486 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2487 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2488 (match_operand:VDQIX 2 "s_register_operand" "w")]
2489 VQSUB))]
2490 "TARGET_NEON"
2491 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2492 [(set_attr "type" "neon_qsub<q>")]
2493 )
2494
2495 (define_insn "neon_vhsub<sup><mode>"
2496 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2497 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2498 (match_operand:VDQIW 2 "s_register_operand" "w")]
2499 VHSUB))]
2500 "TARGET_NEON"
2501 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2502 [(set_attr "type" "neon_sub_halve<q>")]
2503 )
2504
2505 (define_insn "neon_v<r>subhn<mode>"
2506 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2507 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2508 (match_operand:VN 2 "s_register_operand" "w")]
2509 VSUBHN))]
2510 "TARGET_NEON"
2511 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2512 [(set_attr "type" "neon_sub_halve_narrow_q")]
2513 )
2514
2515 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2516 ;; without unsafe math optimizations.
2517 (define_expand "neon_vc<cmp_op><mode>"
2518 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2519 (neg:<V_cmp_result>
2520 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2521 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2522 "TARGET_NEON"
2523 {
2524 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2525 are enabled. */
2526 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2527 && !flag_unsafe_math_optimizations)
2528 {
2529 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2530 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2531 whereas this expander iterates over the integer modes as well,
2532 but we will never expand to UNSPECs for the integer comparisons. */
2533 switch (<MODE>mode)
2534 {
2535 case V2SFmode:
2536 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2537 operands[1],
2538 operands[2]));
2539 break;
2540 case V4SFmode:
2541 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2542 operands[1],
2543 operands[2]));
2544 break;
2545 default:
2546 gcc_unreachable ();
2547 }
2548 }
2549 else
2550 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2551 operands[1],
2552 operands[2]));
2553 DONE;
2554 }
2555 )
2556
2557 (define_insn "neon_vc<cmp_op><mode>_insn"
2558 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2559 (neg:<V_cmp_result>
2560 (COMPARISONS:<V_cmp_result>
2561 (match_operand:VDQW 1 "s_register_operand" "w,w")
2562 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2563 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2564 && !flag_unsafe_math_optimizations)"
2565 {
2566 char pattern[100];
2567 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2568 " %%<V_reg>1, %s",
2569 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2570 ? "f" : "<cmp_type>",
2571 which_alternative == 0
2572 ? "%<V_reg>2" : "#0");
2573 output_asm_insn (pattern, operands);
2574 return "";
2575 }
2576 [(set (attr "type")
2577 (if_then_else (match_operand 2 "zero_operand")
2578 (const_string "neon_compare_zero<q>")
2579 (const_string "neon_compare<q>")))]
2580 )
2581
2582 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2583 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2584 (unspec:<V_cmp_result>
2585 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2586 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2587 NEON_VCMP))]
2588 "TARGET_NEON"
2589 {
2590 char pattern[100];
2591 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2592 " %%<V_reg>1, %s",
2593 which_alternative == 0
2594 ? "%<V_reg>2" : "#0");
2595 output_asm_insn (pattern, operands);
2596 return "";
2597 }
2598 [(set_attr "type" "neon_fp_compare_s<q>")]
2599 )
2600
2601 (define_expand "neon_vc<cmp_op><mode>"
2602 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2603 (neg:<V_cmp_result>
2604 (COMPARISONS:VH
2605 (match_operand:VH 1 "s_register_operand")
2606 (match_operand:VH 2 "reg_or_zero_operand")))]
2607 "TARGET_NEON_FP16INST"
2608 {
2609 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2610 are enabled. */
2611 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2612 && !flag_unsafe_math_optimizations)
2613 emit_insn
2614 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2615 (operands[0], operands[1], operands[2]));
2616 else
2617 emit_insn
2618 (gen_neon_vc<cmp_op><mode>_fp16insn
2619 (operands[0], operands[1], operands[2]));
2620 DONE;
2621 })
2622
2623 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2624 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2625 (neg:<V_cmp_result>
2626 (COMPARISONS:<V_cmp_result>
2627 (match_operand:VH 1 "s_register_operand" "w,w")
2628 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2629 "TARGET_NEON_FP16INST
2630 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2631 && !flag_unsafe_math_optimizations)"
2632 {
2633 char pattern[100];
2634 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2635 " %%<V_reg>1, %s",
2636 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2637 ? "f" : "<cmp_type>",
2638 which_alternative == 0
2639 ? "%<V_reg>2" : "#0");
2640 output_asm_insn (pattern, operands);
2641 return "";
2642 }
2643 [(set (attr "type")
2644 (if_then_else (match_operand 2 "zero_operand")
2645 (const_string "neon_compare_zero<q>")
2646 (const_string "neon_compare<q>")))])
2647
2648 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2649 [(set
2650 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2651 (unspec:<V_cmp_result>
2652 [(match_operand:VH 1 "s_register_operand" "w,w")
2653 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2654 NEON_VCMP))]
2655 "TARGET_NEON_FP16INST"
2656 {
2657 char pattern[100];
2658 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2659 " %%<V_reg>1, %s",
2660 which_alternative == 0
2661 ? "%<V_reg>2" : "#0");
2662 output_asm_insn (pattern, operands);
2663 return "";
2664 }
2665 [(set_attr "type" "neon_fp_compare_s<q>")])
2666
2667 (define_insn "neon_vc<cmp_op>u<mode>"
2668 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2669 (neg:<V_cmp_result>
2670 (GTUGEU:<V_cmp_result>
2671 (match_operand:VDQIW 1 "s_register_operand" "w")
2672 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2673 "TARGET_NEON"
2674 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2675 [(set_attr "type" "neon_compare<q>")]
2676 )
2677
2678 (define_expand "neon_vca<cmp_op><mode>"
2679 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2680 (neg:<V_cmp_result>
2681 (GTGE:<V_cmp_result>
2682 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2683 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2684 "TARGET_NEON"
2685 {
2686 if (flag_unsafe_math_optimizations)
2687 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2688 operands[2]));
2689 else
2690 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2691 operands[1],
2692 operands[2]));
2693 DONE;
2694 }
2695 )
2696
2697 (define_insn "neon_vca<cmp_op><mode>_insn"
2698 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2699 (neg:<V_cmp_result>
2700 (GTGE:<V_cmp_result>
2701 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2702 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2703 "TARGET_NEON && flag_unsafe_math_optimizations"
2704 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2705 [(set_attr "type" "neon_fp_compare_s<q>")]
2706 )
2707
2708 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2709 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2710 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2711 (match_operand:VCVTF 2 "s_register_operand" "w")]
2712 NEON_VACMP))]
2713 "TARGET_NEON"
2714 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2715 [(set_attr "type" "neon_fp_compare_s<q>")]
2716 )
2717
2718 (define_expand "neon_vca<cmp_op><mode>"
2719 [(set
2720 (match_operand:<V_cmp_result> 0 "s_register_operand")
2721 (neg:<V_cmp_result>
2722 (GLTE:<V_cmp_result>
2723 (abs:VH (match_operand:VH 1 "s_register_operand"))
2724 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2725 "TARGET_NEON_FP16INST"
2726 {
2727 if (flag_unsafe_math_optimizations)
2728 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2729 (operands[0], operands[1], operands[2]));
2730 else
2731 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2732 (operands[0], operands[1], operands[2]));
2733 DONE;
2734 })
2735
2736 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2737 [(set
2738 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2739 (neg:<V_cmp_result>
2740 (GLTE:<V_cmp_result>
2741 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2742 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2743 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2744 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2745 [(set_attr "type" "neon_fp_compare_s<q>")]
2746 )
2747
2748 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2749 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2750 (unspec:<V_cmp_result>
2751 [(match_operand:VH 1 "s_register_operand" "w")
2752 (match_operand:VH 2 "s_register_operand" "w")]
2753 NEON_VAGLTE))]
2754 "TARGET_NEON"
2755 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2756 [(set_attr "type" "neon_fp_compare_s<q>")]
2757 )
2758
2759 (define_expand "neon_vc<cmp_op>z<mode>"
2760 [(set
2761 (match_operand:<V_cmp_result> 0 "s_register_operand")
2762 (COMPARISONS:<V_cmp_result>
2763 (match_operand:VH 1 "s_register_operand")
2764 (const_int 0)))]
2765 "TARGET_NEON_FP16INST"
2766 {
2767 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2768 CONST0_RTX (<MODE>mode)));
2769 DONE;
2770 })
2771
2772 (define_insn "neon_vtst<mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2775 (match_operand:VDQIW 2 "s_register_operand" "w")]
2776 UNSPEC_VTST))]
2777 "TARGET_NEON"
2778 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2779 [(set_attr "type" "neon_tst<q>")]
2780 )
2781
2782 (define_insn "neon_vabd<sup><mode>"
2783 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2784 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2785 (match_operand:VDQIW 2 "s_register_operand" "w")]
2786 VABD))]
2787 "TARGET_NEON"
2788 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2789 [(set_attr "type" "neon_abd<q>")]
2790 )
2791
2792 (define_insn "neon_vabd<mode>"
2793 [(set (match_operand:VH 0 "s_register_operand" "=w")
2794 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2795 (match_operand:VH 2 "s_register_operand" "w")]
2796 UNSPEC_VABD_F))]
2797 "TARGET_NEON_FP16INST"
2798 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2799 [(set_attr "type" "neon_abd<q>")]
2800 )
2801
2802 (define_insn "neon_vabdf<mode>"
2803 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2804 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2805 (match_operand:VCVTF 2 "s_register_operand" "w")]
2806 UNSPEC_VABD_F))]
2807 "TARGET_NEON"
2808 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2809 [(set_attr "type" "neon_fp_abd_s<q>")]
2810 )
2811
2812 (define_insn "neon_vabdl<sup><mode>"
2813 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2814 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2815 (match_operand:VW 2 "s_register_operand" "w")]
2816 VABDL))]
2817 "TARGET_NEON"
2818 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2819 [(set_attr "type" "neon_abd_long")]
2820 )
2821
2822 (define_insn "neon_vaba<sup><mode>"
2823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2824 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2825 (match_operand:VDQIW 3 "s_register_operand" "w")]
2826 VABD)
2827 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2828 "TARGET_NEON"
2829 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2830 [(set_attr "type" "neon_arith_acc<q>")]
2831 )
2832
2833 (define_insn "neon_vabal<sup><mode>"
2834 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2835 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2836 (match_operand:VW 3 "s_register_operand" "w")]
2837 VABDL)
2838 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2839 "TARGET_NEON"
2840 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2841 [(set_attr "type" "neon_arith_acc<q>")]
2842 )
2843
2844 (define_insn "neon_v<maxmin><sup><mode>"
2845 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2846 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2847 (match_operand:VDQIW 2 "s_register_operand" "w")]
2848 VMAXMIN))]
2849 "TARGET_NEON"
2850 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2851 [(set_attr "type" "neon_minmax<q>")]
2852 )
2853
2854 (define_insn "neon_v<maxmin>f<mode>"
2855 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2856 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2857 (match_operand:VCVTF 2 "s_register_operand" "w")]
2858 VMAXMINF))]
2859 "TARGET_NEON"
2860 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2861 [(set_attr "type" "neon_fp_minmax_s<q>")]
2862 )
2863
2864 (define_insn "neon_v<maxmin>f<mode>"
2865 [(set (match_operand:VH 0 "s_register_operand" "=w")
2866 (unspec:VH
2867 [(match_operand:VH 1 "s_register_operand" "w")
2868 (match_operand:VH 2 "s_register_operand" "w")]
2869 VMAXMINF))]
2870 "TARGET_NEON_FP16INST"
2871 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2872 [(set_attr "type" "neon_fp_minmax_s<q>")]
2873 )
2874
2875 (define_insn "neon_vp<maxmin>fv4hf"
2876 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2877 (unspec:V4HF
2878 [(match_operand:V4HF 1 "s_register_operand" "w")
2879 (match_operand:V4HF 2 "s_register_operand" "w")]
2880 VPMAXMINF))]
2881 "TARGET_NEON_FP16INST"
2882 "vp<maxmin>.f16\t%P0, %P1, %P2"
2883 [(set_attr "type" "neon_reduc_minmax")]
2884 )
2885
2886 (define_insn "neon_<fmaxmin_op><mode>"
2887 [(set
2888 (match_operand:VH 0 "s_register_operand" "=w")
2889 (unspec:VH
2890 [(match_operand:VH 1 "s_register_operand" "w")
2891 (match_operand:VH 2 "s_register_operand" "w")]
2892 VMAXMINFNM))]
2893 "TARGET_NEON_FP16INST"
2894 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2895 [(set_attr "type" "neon_fp_minmax_s<q>")]
2896 )
2897
2898 ;; v<maxmin>nm intrinsics.
2899 (define_insn "neon_<fmaxmin_op><mode>"
2900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2901 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2902 (match_operand:VCVTF 2 "s_register_operand" "w")]
2903 VMAXMINFNM))]
2904 "TARGET_NEON && TARGET_VFP5"
2905 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_fp_minmax_s<q>")]
2907 )
2908
2909 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2910 (define_insn "<fmaxmin><mode>3"
2911 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2912 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2913 (match_operand:VCVTF 2 "s_register_operand" "w")]
2914 VMAXMINFNM))]
2915 "TARGET_NEON && TARGET_VFP5"
2916 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2917 [(set_attr "type" "neon_fp_minmax_s<q>")]
2918 )
2919
2920 (define_expand "neon_vpadd<mode>"
2921 [(match_operand:VD 0 "s_register_operand" "=w")
2922 (match_operand:VD 1 "s_register_operand" "w")
2923 (match_operand:VD 2 "s_register_operand" "w")]
2924 "TARGET_NEON"
2925 {
2926 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2927 operands[2]));
2928 DONE;
2929 })
2930
2931 (define_insn "neon_vpaddl<sup><mode>"
2932 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2933 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2934 VPADDL))]
2935 "TARGET_NEON"
2936 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2937 [(set_attr "type" "neon_reduc_add_long")]
2938 )
2939
2940 (define_insn "neon_vpadal<sup><mode>"
2941 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2942 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2943 (match_operand:VDQIW 2 "s_register_operand" "w")]
2944 VPADAL))]
2945 "TARGET_NEON"
2946 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2947 [(set_attr "type" "neon_reduc_add_acc")]
2948 )
2949
2950 (define_insn "neon_vp<maxmin><sup><mode>"
2951 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2952 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2953 (match_operand:VDI 2 "s_register_operand" "w")]
2954 VPMAXMIN))]
2955 "TARGET_NEON"
2956 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2957 [(set_attr "type" "neon_reduc_minmax<q>")]
2958 )
2959
2960 (define_insn "neon_vp<maxmin>f<mode>"
2961 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2962 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2963 (match_operand:VCVTF 2 "s_register_operand" "w")]
2964 VPMAXMINF))]
2965 "TARGET_NEON"
2966 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2967 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2968 )
2969
2970 (define_insn "neon_vrecps<mode>"
2971 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2972 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2973 (match_operand:VCVTF 2 "s_register_operand" "w")]
2974 UNSPEC_VRECPS))]
2975 "TARGET_NEON"
2976 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2977 [(set_attr "type" "neon_fp_recps_s<q>")]
2978 )
2979
2980 (define_insn "neon_vrecps<mode>"
2981 [(set
2982 (match_operand:VH 0 "s_register_operand" "=w")
2983 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2984 (match_operand:VH 2 "s_register_operand" "w")]
2985 UNSPEC_VRECPS))]
2986 "TARGET_NEON_FP16INST"
2987 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2988 [(set_attr "type" "neon_fp_recps_s<q>")]
2989 )
2990
2991 (define_insn "neon_vrsqrts<mode>"
2992 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2993 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2994 (match_operand:VCVTF 2 "s_register_operand" "w")]
2995 UNSPEC_VRSQRTS))]
2996 "TARGET_NEON"
2997 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2998 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2999 )
3000
3001 (define_insn "neon_vrsqrts<mode>"
3002 [(set
3003 (match_operand:VH 0 "s_register_operand" "=w")
3004 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3005 (match_operand:VH 2 "s_register_operand" "w")]
3006 UNSPEC_VRSQRTS))]
3007 "TARGET_NEON_FP16INST"
3008 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3009 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3010 )
3011
3012 (define_expand "neon_vabs<mode>"
3013 [(match_operand:VDQW 0 "s_register_operand" "")
3014 (match_operand:VDQW 1 "s_register_operand" "")]
3015 "TARGET_NEON"
3016 {
3017 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3018 DONE;
3019 })
3020
3021 (define_insn "neon_vqabs<mode>"
3022 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3023 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3024 UNSPEC_VQABS))]
3025 "TARGET_NEON"
3026 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3027 [(set_attr "type" "neon_qabs<q>")]
3028 )
3029
3030 (define_insn "neon_bswap<mode>"
3031 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3032 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3033 "TARGET_NEON"
3034 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3035 [(set_attr "type" "neon_rev<q>")]
3036 )
3037
3038 (define_expand "neon_vneg<mode>"
3039 [(match_operand:VDQW 0 "s_register_operand" "")
3040 (match_operand:VDQW 1 "s_register_operand" "")]
3041 "TARGET_NEON"
3042 {
3043 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3044 DONE;
3045 })
3046
3047 (define_expand "neon_copysignf<mode>"
3048 [(match_operand:VCVTF 0 "register_operand")
3049 (match_operand:VCVTF 1 "register_operand")
3050 (match_operand:VCVTF 2 "register_operand")]
3051 "TARGET_NEON"
3052 "{
3053 rtx v_bitmask_cast;
3054 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3055 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
3056 rtvec v = rtvec_alloc (n_elt);
3057
3058 /* Create bitmask for vector select. */
3059 for (i = 0; i < n_elt; ++i)
3060 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3061
3062 emit_move_insn (v_bitmask,
3063 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3064 emit_move_insn (operands[0], operands[2]);
3065 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3066 <VCVTF:V_cmp_result>mode, 0);
3067 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3068 operands[1]));
3069
3070 DONE;
3071 }"
3072 )
3073
3074 (define_insn "neon_vqneg<mode>"
3075 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3076 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3077 UNSPEC_VQNEG))]
3078 "TARGET_NEON"
3079 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3080 [(set_attr "type" "neon_qneg<q>")]
3081 )
3082
3083 (define_insn "neon_vcls<mode>"
3084 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3085 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3086 UNSPEC_VCLS))]
3087 "TARGET_NEON"
3088 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3089 [(set_attr "type" "neon_cls<q>")]
3090 )
3091
3092 (define_insn "clz<mode>2"
3093 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3094 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3095 "TARGET_NEON"
3096 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3097 [(set_attr "type" "neon_cnt<q>")]
3098 )
3099
3100 (define_expand "neon_vclz<mode>"
3101 [(match_operand:VDQIW 0 "s_register_operand" "")
3102 (match_operand:VDQIW 1 "s_register_operand" "")]
3103 "TARGET_NEON"
3104 {
3105 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3106 DONE;
3107 })
3108
3109 (define_insn "popcount<mode>2"
3110 [(set (match_operand:VE 0 "s_register_operand" "=w")
3111 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3112 "TARGET_NEON"
3113 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3114 [(set_attr "type" "neon_cnt<q>")]
3115 )
3116
3117 (define_expand "neon_vcnt<mode>"
3118 [(match_operand:VE 0 "s_register_operand" "=w")
3119 (match_operand:VE 1 "s_register_operand" "w")]
3120 "TARGET_NEON"
3121 {
3122 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3123 DONE;
3124 })
3125
3126 (define_insn "neon_vrecpe<mode>"
3127 [(set (match_operand:VH 0 "s_register_operand" "=w")
3128 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3129 UNSPEC_VRECPE))]
3130 "TARGET_NEON_FP16INST"
3131 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3132 [(set_attr "type" "neon_fp_recpe_s<q>")]
3133 )
3134
3135 (define_insn "neon_vrecpe<mode>"
3136 [(set (match_operand:V32 0 "s_register_operand" "=w")
3137 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3138 UNSPEC_VRECPE))]
3139 "TARGET_NEON"
3140 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3141 [(set_attr "type" "neon_fp_recpe_s<q>")]
3142 )
3143
3144 (define_insn "neon_vrsqrte<mode>"
3145 [(set (match_operand:V32 0 "s_register_operand" "=w")
3146 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3147 UNSPEC_VRSQRTE))]
3148 "TARGET_NEON"
3149 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3150 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3151 )
3152
3153 (define_expand "neon_vmvn<mode>"
3154 [(match_operand:VDQIW 0 "s_register_operand" "")
3155 (match_operand:VDQIW 1 "s_register_operand" "")]
3156 "TARGET_NEON"
3157 {
3158 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3159 DONE;
3160 })
3161
3162 (define_insn "neon_vget_lane<mode>_sext_internal"
3163 [(set (match_operand:SI 0 "s_register_operand" "=r")
3164 (sign_extend:SI
3165 (vec_select:<V_elem>
3166 (match_operand:VD 1 "s_register_operand" "w")
3167 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3168 "TARGET_NEON"
3169 {
3170 if (BYTES_BIG_ENDIAN)
3171 {
3172 int elt = INTVAL (operands[2]);
3173 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3174 operands[2] = GEN_INT (elt);
3175 }
3176 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3177 }
3178 [(set_attr "type" "neon_to_gp")]
3179 )
3180
3181 (define_insn "neon_vget_lane<mode>_zext_internal"
3182 [(set (match_operand:SI 0 "s_register_operand" "=r")
3183 (zero_extend:SI
3184 (vec_select:<V_elem>
3185 (match_operand:VD 1 "s_register_operand" "w")
3186 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3187 "TARGET_NEON"
3188 {
3189 if (BYTES_BIG_ENDIAN)
3190 {
3191 int elt = INTVAL (operands[2]);
3192 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3193 operands[2] = GEN_INT (elt);
3194 }
3195 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3196 }
3197 [(set_attr "type" "neon_to_gp")]
3198 )
3199
3200 (define_insn "neon_vget_lane<mode>_sext_internal"
3201 [(set (match_operand:SI 0 "s_register_operand" "=r")
3202 (sign_extend:SI
3203 (vec_select:<V_elem>
3204 (match_operand:VQ2 1 "s_register_operand" "w")
3205 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3206 "TARGET_NEON"
3207 {
3208 rtx ops[3];
3209 int regno = REGNO (operands[1]);
3210 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3211 unsigned int elt = INTVAL (operands[2]);
3212 unsigned int elt_adj = elt % halfelts;
3213
3214 if (BYTES_BIG_ENDIAN)
3215 elt_adj = halfelts - 1 - elt_adj;
3216
3217 ops[0] = operands[0];
3218 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3219 ops[2] = GEN_INT (elt_adj);
3220 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3221
3222 return "";
3223 }
3224 [(set_attr "type" "neon_to_gp_q")]
3225 )
3226
3227 (define_insn "neon_vget_lane<mode>_zext_internal"
3228 [(set (match_operand:SI 0 "s_register_operand" "=r")
3229 (zero_extend:SI
3230 (vec_select:<V_elem>
3231 (match_operand:VQ2 1 "s_register_operand" "w")
3232 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3233 "TARGET_NEON"
3234 {
3235 rtx ops[3];
3236 int regno = REGNO (operands[1]);
3237 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3238 unsigned int elt = INTVAL (operands[2]);
3239 unsigned int elt_adj = elt % halfelts;
3240
3241 if (BYTES_BIG_ENDIAN)
3242 elt_adj = halfelts - 1 - elt_adj;
3243
3244 ops[0] = operands[0];
3245 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3246 ops[2] = GEN_INT (elt_adj);
3247 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3248
3249 return "";
3250 }
3251 [(set_attr "type" "neon_to_gp_q")]
3252 )
3253
3254 (define_expand "neon_vget_lane<mode>"
3255 [(match_operand:<V_ext> 0 "s_register_operand" "")
3256 (match_operand:VDQW 1 "s_register_operand" "")
3257 (match_operand:SI 2 "immediate_operand" "")]
3258 "TARGET_NEON"
3259 {
3260 if (BYTES_BIG_ENDIAN)
3261 {
3262 /* The intrinsics are defined in terms of a model where the
3263 element ordering in memory is vldm order, whereas the generic
3264 RTL is defined in terms of a model where the element ordering
3265 in memory is array order. Convert the lane number to conform
3266 to this model. */
3267 unsigned int elt = INTVAL (operands[2]);
3268 unsigned int reg_nelts
3269 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3270 elt ^= reg_nelts - 1;
3271 operands[2] = GEN_INT (elt);
3272 }
3273
3274 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3275 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3276 operands[2]));
3277 else
3278 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3279 operands[1],
3280 operands[2]));
3281 DONE;
3282 })
3283
3284 (define_expand "neon_vget_laneu<mode>"
3285 [(match_operand:<V_ext> 0 "s_register_operand" "")
3286 (match_operand:VDQIW 1 "s_register_operand" "")
3287 (match_operand:SI 2 "immediate_operand" "")]
3288 "TARGET_NEON"
3289 {
3290 if (BYTES_BIG_ENDIAN)
3291 {
3292 /* The intrinsics are defined in terms of a model where the
3293 element ordering in memory is vldm order, whereas the generic
3294 RTL is defined in terms of a model where the element ordering
3295 in memory is array order. Convert the lane number to conform
3296 to this model. */
3297 unsigned int elt = INTVAL (operands[2]);
3298 unsigned int reg_nelts
3299 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3300 elt ^= reg_nelts - 1;
3301 operands[2] = GEN_INT (elt);
3302 }
3303
3304 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3305 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3306 operands[2]));
3307 else
3308 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3309 operands[1],
3310 operands[2]));
3311 DONE;
3312 })
3313
3314 (define_expand "neon_vget_lanedi"
3315 [(match_operand:DI 0 "s_register_operand" "=r")
3316 (match_operand:DI 1 "s_register_operand" "w")
3317 (match_operand:SI 2 "immediate_operand" "")]
3318 "TARGET_NEON"
3319 {
3320 emit_move_insn (operands[0], operands[1]);
3321 DONE;
3322 })
3323
3324 (define_expand "neon_vget_lanev2di"
3325 [(match_operand:DI 0 "s_register_operand" "")
3326 (match_operand:V2DI 1 "s_register_operand" "")
3327 (match_operand:SI 2 "immediate_operand" "")]
3328 "TARGET_NEON"
3329 {
3330 int lane;
3331
3332 if (BYTES_BIG_ENDIAN)
3333 {
3334 /* The intrinsics are defined in terms of a model where the
3335 element ordering in memory is vldm order, whereas the generic
3336 RTL is defined in terms of a model where the element ordering
3337 in memory is array order. Convert the lane number to conform
3338 to this model. */
3339 unsigned int elt = INTVAL (operands[2]);
3340 unsigned int reg_nelts = 2;
3341 elt ^= reg_nelts - 1;
3342 operands[2] = GEN_INT (elt);
3343 }
3344
3345 lane = INTVAL (operands[2]);
3346 gcc_assert ((lane ==0) || (lane == 1));
3347 emit_move_insn (operands[0], lane == 0
3348 ? gen_lowpart (DImode, operands[1])
3349 : gen_highpart (DImode, operands[1]));
3350 DONE;
3351 })
3352
3353 (define_expand "neon_vset_lane<mode>"
3354 [(match_operand:VDQ 0 "s_register_operand" "=w")
3355 (match_operand:<V_elem> 1 "s_register_operand" "r")
3356 (match_operand:VDQ 2 "s_register_operand" "0")
3357 (match_operand:SI 3 "immediate_operand" "i")]
3358 "TARGET_NEON"
3359 {
3360 unsigned int elt = INTVAL (operands[3]);
3361
3362 if (BYTES_BIG_ENDIAN)
3363 {
3364 unsigned int reg_nelts
3365 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3366 elt ^= reg_nelts - 1;
3367 }
3368
3369 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3370 GEN_INT (1 << elt), operands[2]));
3371 DONE;
3372 })
3373
3374 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3375
3376 (define_expand "neon_vset_lanedi"
3377 [(match_operand:DI 0 "s_register_operand" "=w")
3378 (match_operand:DI 1 "s_register_operand" "r")
3379 (match_operand:DI 2 "s_register_operand" "0")
3380 (match_operand:SI 3 "immediate_operand" "i")]
3381 "TARGET_NEON"
3382 {
3383 emit_move_insn (operands[0], operands[1]);
3384 DONE;
3385 })
3386
3387 (define_expand "neon_vcreate<mode>"
3388 [(match_operand:VD_RE 0 "s_register_operand" "")
3389 (match_operand:DI 1 "general_operand" "")]
3390 "TARGET_NEON"
3391 {
3392 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3393 emit_move_insn (operands[0], src);
3394 DONE;
3395 })
3396
3397 (define_insn "neon_vdup_n<mode>"
3398 [(set (match_operand:VX 0 "s_register_operand" "=w")
3399 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3400 "TARGET_NEON"
3401 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3402 [(set_attr "type" "neon_from_gp<q>")]
3403 )
3404
3405 (define_insn "neon_vdup_nv4hf"
3406 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3407 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3408 "TARGET_NEON"
3409 "vdup.16\t%P0, %1"
3410 [(set_attr "type" "neon_from_gp")]
3411 )
3412
3413 (define_insn "neon_vdup_nv8hf"
3414 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3415 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3416 "TARGET_NEON"
3417 "vdup.16\t%q0, %1"
3418 [(set_attr "type" "neon_from_gp_q")]
3419 )
3420
3421 (define_insn "neon_vdup_n<mode>"
3422 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3423 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3424 "TARGET_NEON"
3425 "@
3426 vdup.<V_sz_elem>\t%<V_reg>0, %1
3427 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3428 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3429 )
3430
3431 (define_expand "neon_vdup_ndi"
3432 [(match_operand:DI 0 "s_register_operand" "=w")
3433 (match_operand:DI 1 "s_register_operand" "r")]
3434 "TARGET_NEON"
3435 {
3436 emit_move_insn (operands[0], operands[1]);
3437 DONE;
3438 }
3439 )
3440
3441 (define_insn "neon_vdup_nv2di"
3442 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3443 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3444 "TARGET_NEON"
3445 "@
3446 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3447 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3448 [(set_attr "length" "8")
3449 (set_attr "type" "multiple")]
3450 )
3451
3452 (define_insn "neon_vdup_lane<mode>_internal"
3453 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3454 (vec_duplicate:VDQW
3455 (vec_select:<V_elem>
3456 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3457 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3458 "TARGET_NEON"
3459 {
3460 if (BYTES_BIG_ENDIAN)
3461 {
3462 int elt = INTVAL (operands[2]);
3463 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3464 operands[2] = GEN_INT (elt);
3465 }
3466 if (<Is_d_reg>)
3467 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3468 else
3469 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3470 }
3471 [(set_attr "type" "neon_dup<q>")]
3472 )
3473
3474 (define_insn "neon_vdup_lane<mode>_internal"
3475 [(set (match_operand:VH 0 "s_register_operand" "=w")
3476 (vec_duplicate:VH
3477 (vec_select:<V_elem>
3478 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3479 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3480 "TARGET_NEON && TARGET_FP16"
3481 {
3482 if (BYTES_BIG_ENDIAN)
3483 {
3484 int elt = INTVAL (operands[2]);
3485 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3486 operands[2] = GEN_INT (elt);
3487 }
3488 if (<Is_d_reg>)
3489 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3490 else
3491 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3492 }
3493 [(set_attr "type" "neon_dup<q>")]
3494 )
3495
3496 (define_expand "neon_vdup_lane<mode>"
3497 [(match_operand:VDQW 0 "s_register_operand" "=w")
3498 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3499 (match_operand:SI 2 "immediate_operand" "i")]
3500 "TARGET_NEON"
3501 {
3502 if (BYTES_BIG_ENDIAN)
3503 {
3504 unsigned int elt = INTVAL (operands[2]);
3505 unsigned int reg_nelts
3506 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3507 elt ^= reg_nelts - 1;
3508 operands[2] = GEN_INT (elt);
3509 }
3510 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3511 operands[2]));
3512 DONE;
3513 })
3514
3515 (define_expand "neon_vdup_lane<mode>"
3516 [(match_operand:VH 0 "s_register_operand")
3517 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3518 (match_operand:SI 2 "immediate_operand")]
3519 "TARGET_NEON && TARGET_FP16"
3520 {
3521 if (BYTES_BIG_ENDIAN)
3522 {
3523 unsigned int elt = INTVAL (operands[2]);
3524 unsigned int reg_nelts
3525 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3526 elt ^= reg_nelts - 1;
3527 operands[2] = GEN_INT (elt);
3528 }
3529 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3530 operands[2]));
3531 DONE;
3532 })
3533
3534 ; Scalar index is ignored, since only zero is valid here.
3535 (define_expand "neon_vdup_lanedi"
3536 [(match_operand:DI 0 "s_register_operand" "=w")
3537 (match_operand:DI 1 "s_register_operand" "w")
3538 (match_operand:SI 2 "immediate_operand" "i")]
3539 "TARGET_NEON"
3540 {
3541 emit_move_insn (operands[0], operands[1]);
3542 DONE;
3543 })
3544
3545 ; Likewise for v2di, as the DImode second operand has only a single element.
3546 (define_expand "neon_vdup_lanev2di"
3547 [(match_operand:V2DI 0 "s_register_operand" "=w")
3548 (match_operand:DI 1 "s_register_operand" "w")
3549 (match_operand:SI 2 "immediate_operand" "i")]
3550 "TARGET_NEON"
3551 {
3552 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3553 DONE;
3554 })
3555
3556 ; Disabled before reload because we don't want combine doing something silly,
3557 ; but used by the post-reload expansion of neon_vcombine.
3558 (define_insn "*neon_vswp<mode>"
3559 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3560 (match_operand:VDQX 1 "s_register_operand" "+w"))
3561 (set (match_dup 1) (match_dup 0))]
3562 "TARGET_NEON && reload_completed"
3563 "vswp\t%<V_reg>0, %<V_reg>1"
3564 [(set_attr "type" "neon_permute<q>")]
3565 )
3566
3567 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3568 ;; dest vector.
3569 ;; FIXME: A different implementation of this builtin could make it much
3570 ;; more likely that we wouldn't actually need to output anything (we could make
3571 ;; it so that the reg allocator puts things in the right places magically
3572 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3573
3574 (define_insn_and_split "neon_vcombine<mode>"
3575 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3576 (vec_concat:<V_DOUBLE>
3577 (match_operand:VDX 1 "s_register_operand" "w")
3578 (match_operand:VDX 2 "s_register_operand" "w")))]
3579 "TARGET_NEON"
3580 "#"
3581 "&& reload_completed"
3582 [(const_int 0)]
3583 {
3584 neon_split_vcombine (operands);
3585 DONE;
3586 }
3587 [(set_attr "type" "multiple")]
3588 )
3589
3590 (define_expand "neon_vget_high<mode>"
3591 [(match_operand:<V_HALF> 0 "s_register_operand")
3592 (match_operand:VQX 1 "s_register_operand")]
3593 "TARGET_NEON"
3594 {
3595 emit_move_insn (operands[0],
3596 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3597 GET_MODE_SIZE (<V_HALF>mode)));
3598 DONE;
3599 })
3600
3601 (define_expand "neon_vget_low<mode>"
3602 [(match_operand:<V_HALF> 0 "s_register_operand")
3603 (match_operand:VQX 1 "s_register_operand")]
3604 "TARGET_NEON"
3605 {
3606 emit_move_insn (operands[0],
3607 simplify_gen_subreg (<V_HALF>mode, operands[1],
3608 <MODE>mode, 0));
3609 DONE;
3610 })
3611
3612 (define_insn "float<mode><V_cvtto>2"
3613 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3614 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3615 "TARGET_NEON && !flag_rounding_math"
3616 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3617 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3618 )
3619
3620 (define_insn "floatuns<mode><V_cvtto>2"
3621 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3622 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3623 "TARGET_NEON && !flag_rounding_math"
3624 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3625 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3626 )
3627
3628 (define_insn "fix_trunc<mode><V_cvtto>2"
3629 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3630 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3631 "TARGET_NEON"
3632 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3633 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3634 )
3635
3636 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3637 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3638 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3639 "TARGET_NEON"
3640 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3641 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3642 )
3643
3644 (define_insn "neon_vcvt<sup><mode>"
3645 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3646 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3647 VCVT_US))]
3648 "TARGET_NEON"
3649 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3650 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3651 )
3652
3653 (define_insn "neon_vcvt<sup><mode>"
3654 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3655 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3656 VCVT_US))]
3657 "TARGET_NEON"
3658 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3659 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3660 )
3661
3662 (define_insn "neon_vcvtv4sfv4hf"
3663 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3664 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3665 UNSPEC_VCVT))]
3666 "TARGET_NEON && TARGET_FP16"
3667 "vcvt.f32.f16\t%q0, %P1"
3668 [(set_attr "type" "neon_fp_cvt_widen_h")]
3669 )
3670
3671 (define_insn "neon_vcvtv4hfv4sf"
3672 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3673 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3674 UNSPEC_VCVT))]
3675 "TARGET_NEON && TARGET_FP16"
3676 "vcvt.f16.f32\t%P0, %q1"
3677 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3678 )
3679
3680 (define_insn "neon_vcvt<sup><mode>"
3681 [(set
3682 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3683 (unspec:<VH_CVTTO>
3684 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3685 VCVT_US))]
3686 "TARGET_NEON_FP16INST"
3687 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3688 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3689 )
3690
3691 (define_insn "neon_vcvt<sup><mode>"
3692 [(set
3693 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3694 (unspec:<VH_CVTTO>
3695 [(match_operand:VH 1 "s_register_operand" "w")]
3696 VCVT_US))]
3697 "TARGET_NEON_FP16INST"
3698 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3699 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3700 )
3701
3702 (define_insn "neon_vcvt<sup>_n<mode>"
3703 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3704 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3705 (match_operand:SI 2 "immediate_operand" "i")]
3706 VCVT_US_N))]
3707 "TARGET_NEON"
3708 {
3709 arm_const_bounds (operands[2], 1, 33);
3710 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3711 }
3712 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3713 )
3714
3715 (define_insn "neon_vcvt<sup>_n<mode>"
3716 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3717 (unspec:<VH_CVTTO>
3718 [(match_operand:VH 1 "s_register_operand" "w")
3719 (match_operand:SI 2 "immediate_operand" "i")]
3720 VCVT_US_N))]
3721 "TARGET_NEON_FP16INST"
3722 {
3723 arm_const_bounds (operands[2], 0, 17);
3724 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3725 }
3726 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3727 )
3728
3729 (define_insn "neon_vcvt<sup>_n<mode>"
3730 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3731 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3732 (match_operand:SI 2 "immediate_operand" "i")]
3733 VCVT_US_N))]
3734 "TARGET_NEON"
3735 {
3736 arm_const_bounds (operands[2], 1, 33);
3737 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3738 }
3739 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3740 )
3741
3742 (define_insn "neon_vcvt<sup>_n<mode>"
3743 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3744 (unspec:<VH_CVTTO>
3745 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3746 (match_operand:SI 2 "immediate_operand" "i")]
3747 VCVT_US_N))]
3748 "TARGET_NEON_FP16INST"
3749 {
3750 arm_const_bounds (operands[2], 0, 17);
3751 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3752 }
3753 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3754 )
3755
3756 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3757 [(set
3758 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3759 (unspec:<VH_CVTTO>
3760 [(match_operand:VH 1 "s_register_operand" "w")]
3761 VCVT_HF_US))]
3762 "TARGET_NEON_FP16INST"
3763 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3764 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3765 )
3766
3767 (define_insn "neon_vmovn<mode>"
3768 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3769 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3770 UNSPEC_VMOVN))]
3771 "TARGET_NEON"
3772 "vmovn.<V_if_elem>\t%P0, %q1"
3773 [(set_attr "type" "neon_shift_imm_narrow_q")]
3774 )
3775
3776 (define_insn "neon_vqmovn<sup><mode>"
3777 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3778 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3779 VQMOVN))]
3780 "TARGET_NEON"
3781 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3782 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3783 )
3784
3785 (define_insn "neon_vqmovun<mode>"
3786 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3787 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3788 UNSPEC_VQMOVUN))]
3789 "TARGET_NEON"
3790 "vqmovun.<V_s_elem>\t%P0, %q1"
3791 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3792 )
3793
3794 (define_insn "neon_vmovl<sup><mode>"
3795 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3796 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3797 VMOVL))]
3798 "TARGET_NEON"
3799 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3800 [(set_attr "type" "neon_shift_imm_long")]
3801 )
3802
3803 (define_insn "neon_vmul_lane<mode>"
3804 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3805 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3806 (match_operand:VMD 2 "s_register_operand"
3807 "<scalar_mul_constraint>")
3808 (match_operand:SI 3 "immediate_operand" "i")]
3809 UNSPEC_VMUL_LANE))]
3810 "TARGET_NEON"
3811 {
3812 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3813 }
3814 [(set (attr "type")
3815 (if_then_else (match_test "<Is_float_mode>")
3816 (const_string "neon_fp_mul_s_scalar<q>")
3817 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3818 )
3819
3820 (define_insn "neon_vmul_lane<mode>"
3821 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3822 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3823 (match_operand:<V_HALF> 2 "s_register_operand"
3824 "<scalar_mul_constraint>")
3825 (match_operand:SI 3 "immediate_operand" "i")]
3826 UNSPEC_VMUL_LANE))]
3827 "TARGET_NEON"
3828 {
3829 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3830 }
3831 [(set (attr "type")
3832 (if_then_else (match_test "<Is_float_mode>")
3833 (const_string "neon_fp_mul_s_scalar<q>")
3834 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3835 )
3836
3837 (define_insn "neon_vmul_lane<mode>"
3838 [(set (match_operand:VH 0 "s_register_operand" "=w")
3839 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3840 (match_operand:V4HF 2 "s_register_operand"
3841 "<scalar_mul_constraint>")
3842 (match_operand:SI 3 "immediate_operand" "i")]
3843 UNSPEC_VMUL_LANE))]
3844 "TARGET_NEON_FP16INST"
3845 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3846 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3847 )
3848
3849 (define_insn "neon_vmull<sup>_lane<mode>"
3850 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3851 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3852 (match_operand:VMDI 2 "s_register_operand"
3853 "<scalar_mul_constraint>")
3854 (match_operand:SI 3 "immediate_operand" "i")]
3855 VMULL_LANE))]
3856 "TARGET_NEON"
3857 {
3858 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3859 }
3860 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3861 )
3862
3863 (define_insn "neon_vqdmull_lane<mode>"
3864 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3865 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3866 (match_operand:VMDI 2 "s_register_operand"
3867 "<scalar_mul_constraint>")
3868 (match_operand:SI 3 "immediate_operand" "i")]
3869 UNSPEC_VQDMULL_LANE))]
3870 "TARGET_NEON"
3871 {
3872 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3873 }
3874 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3875 )
3876
3877 (define_insn "neon_vq<r>dmulh_lane<mode>"
3878 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3879 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3880 (match_operand:<V_HALF> 2 "s_register_operand"
3881 "<scalar_mul_constraint>")
3882 (match_operand:SI 3 "immediate_operand" "i")]
3883 VQDMULH_LANE))]
3884 "TARGET_NEON"
3885 {
3886 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3887 }
3888 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3889 )
3890
3891 (define_insn "neon_vq<r>dmulh_lane<mode>"
3892 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3893 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3894 (match_operand:VMDI 2 "s_register_operand"
3895 "<scalar_mul_constraint>")
3896 (match_operand:SI 3 "immediate_operand" "i")]
3897 VQDMULH_LANE))]
3898 "TARGET_NEON"
3899 {
3900 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3901 }
3902 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3903 )
3904
3905 ;; vqrdmlah_lane, vqrdmlsh_lane
3906 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3907 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3908 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3909 (match_operand:VMQI 2 "s_register_operand" "w")
3910 (match_operand:<V_HALF> 3 "s_register_operand"
3911 "<scalar_mul_constraint>")
3912 (match_operand:SI 4 "immediate_operand" "i")]
3913 VQRDMLH_AS))]
3914 "TARGET_NEON_RDMA"
3915 {
3916 return
3917 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3918 }
3919 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3920 )
3921
3922 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3923 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3924 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3925 (match_operand:VMDI 2 "s_register_operand" "w")
3926 (match_operand:VMDI 3 "s_register_operand"
3927 "<scalar_mul_constraint>")
3928 (match_operand:SI 4 "immediate_operand" "i")]
3929 VQRDMLH_AS))]
3930 "TARGET_NEON_RDMA"
3931 {
3932 return
3933 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3934 }
3935 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3936 )
3937
3938 (define_insn "neon_vmla_lane<mode>"
3939 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3940 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3941 (match_operand:VMD 2 "s_register_operand" "w")
3942 (match_operand:VMD 3 "s_register_operand"
3943 "<scalar_mul_constraint>")
3944 (match_operand:SI 4 "immediate_operand" "i")]
3945 UNSPEC_VMLA_LANE))]
3946 "TARGET_NEON"
3947 {
3948 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3949 }
3950 [(set (attr "type")
3951 (if_then_else (match_test "<Is_float_mode>")
3952 (const_string "neon_fp_mla_s_scalar<q>")
3953 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3954 )
3955
3956 (define_insn "neon_vmla_lane<mode>"
3957 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3958 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3959 (match_operand:VMQ 2 "s_register_operand" "w")
3960 (match_operand:<V_HALF> 3 "s_register_operand"
3961 "<scalar_mul_constraint>")
3962 (match_operand:SI 4 "immediate_operand" "i")]
3963 UNSPEC_VMLA_LANE))]
3964 "TARGET_NEON"
3965 {
3966 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3967 }
3968 [(set (attr "type")
3969 (if_then_else (match_test "<Is_float_mode>")
3970 (const_string "neon_fp_mla_s_scalar<q>")
3971 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3972 )
3973
3974 (define_insn "neon_vmlal<sup>_lane<mode>"
3975 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3976 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3977 (match_operand:VMDI 2 "s_register_operand" "w")
3978 (match_operand:VMDI 3 "s_register_operand"
3979 "<scalar_mul_constraint>")
3980 (match_operand:SI 4 "immediate_operand" "i")]
3981 VMLAL_LANE))]
3982 "TARGET_NEON"
3983 {
3984 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3985 }
3986 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3987 )
3988
3989 (define_insn "neon_vqdmlal_lane<mode>"
3990 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3991 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3992 (match_operand:VMDI 2 "s_register_operand" "w")
3993 (match_operand:VMDI 3 "s_register_operand"
3994 "<scalar_mul_constraint>")
3995 (match_operand:SI 4 "immediate_operand" "i")]
3996 UNSPEC_VQDMLAL_LANE))]
3997 "TARGET_NEON"
3998 {
3999 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4000 }
4001 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4002 )
4003
4004 (define_insn "neon_vmls_lane<mode>"
4005 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4006 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4007 (match_operand:VMD 2 "s_register_operand" "w")
4008 (match_operand:VMD 3 "s_register_operand"
4009 "<scalar_mul_constraint>")
4010 (match_operand:SI 4 "immediate_operand" "i")]
4011 UNSPEC_VMLS_LANE))]
4012 "TARGET_NEON"
4013 {
4014 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4015 }
4016 [(set (attr "type")
4017 (if_then_else (match_test "<Is_float_mode>")
4018 (const_string "neon_fp_mla_s_scalar<q>")
4019 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4020 )
4021
4022 (define_insn "neon_vmls_lane<mode>"
4023 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4024 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4025 (match_operand:VMQ 2 "s_register_operand" "w")
4026 (match_operand:<V_HALF> 3 "s_register_operand"
4027 "<scalar_mul_constraint>")
4028 (match_operand:SI 4 "immediate_operand" "i")]
4029 UNSPEC_VMLS_LANE))]
4030 "TARGET_NEON"
4031 {
4032 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4033 }
4034 [(set (attr "type")
4035 (if_then_else (match_test "<Is_float_mode>")
4036 (const_string "neon_fp_mla_s_scalar<q>")
4037 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4038 )
4039
4040 (define_insn "neon_vmlsl<sup>_lane<mode>"
4041 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4042 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4043 (match_operand:VMDI 2 "s_register_operand" "w")
4044 (match_operand:VMDI 3 "s_register_operand"
4045 "<scalar_mul_constraint>")
4046 (match_operand:SI 4 "immediate_operand" "i")]
4047 VMLSL_LANE))]
4048 "TARGET_NEON"
4049 {
4050 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4051 }
4052 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4053 )
4054
4055 (define_insn "neon_vqdmlsl_lane<mode>"
4056 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4057 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4058 (match_operand:VMDI 2 "s_register_operand" "w")
4059 (match_operand:VMDI 3 "s_register_operand"
4060 "<scalar_mul_constraint>")
4061 (match_operand:SI 4 "immediate_operand" "i")]
4062 UNSPEC_VQDMLSL_LANE))]
4063 "TARGET_NEON"
4064 {
4065 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4066 }
4067 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4068 )
4069
4070 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4071 ; core register into a temp register, then use a scalar taken from that. This
4072 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4073 ; or extracted from another vector. The latter case it's currently better to
4074 ; use the "_lane" variant, and the former case can probably be implemented
4075 ; using vld1_lane, but that hasn't been done yet.
4076
4077 (define_expand "neon_vmul_n<mode>"
4078 [(match_operand:VMD 0 "s_register_operand" "")
4079 (match_operand:VMD 1 "s_register_operand" "")
4080 (match_operand:<V_elem> 2 "s_register_operand" "")]
4081 "TARGET_NEON"
4082 {
4083 rtx tmp = gen_reg_rtx (<MODE>mode);
4084 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4085 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4086 const0_rtx));
4087 DONE;
4088 })
4089
4090 (define_expand "neon_vmul_n<mode>"
4091 [(match_operand:VMQ 0 "s_register_operand" "")
4092 (match_operand:VMQ 1 "s_register_operand" "")
4093 (match_operand:<V_elem> 2 "s_register_operand" "")]
4094 "TARGET_NEON"
4095 {
4096 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4097 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4098 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4099 const0_rtx));
4100 DONE;
4101 })
4102
4103 (define_expand "neon_vmul_n<mode>"
4104 [(match_operand:VH 0 "s_register_operand")
4105 (match_operand:VH 1 "s_register_operand")
4106 (match_operand:<V_elem> 2 "s_register_operand")]
4107 "TARGET_NEON_FP16INST"
4108 {
4109 rtx tmp = gen_reg_rtx (V4HFmode);
4110 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4111 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4112 const0_rtx));
4113 DONE;
4114 })
4115
4116 (define_expand "neon_vmulls_n<mode>"
4117 [(match_operand:<V_widen> 0 "s_register_operand" "")
4118 (match_operand:VMDI 1 "s_register_operand" "")
4119 (match_operand:<V_elem> 2 "s_register_operand" "")]
4120 "TARGET_NEON"
4121 {
4122 rtx tmp = gen_reg_rtx (<MODE>mode);
4123 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4124 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4125 const0_rtx));
4126 DONE;
4127 })
4128
4129 (define_expand "neon_vmullu_n<mode>"
4130 [(match_operand:<V_widen> 0 "s_register_operand" "")
4131 (match_operand:VMDI 1 "s_register_operand" "")
4132 (match_operand:<V_elem> 2 "s_register_operand" "")]
4133 "TARGET_NEON"
4134 {
4135 rtx tmp = gen_reg_rtx (<MODE>mode);
4136 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4137 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4138 const0_rtx));
4139 DONE;
4140 })
4141
4142 (define_expand "neon_vqdmull_n<mode>"
4143 [(match_operand:<V_widen> 0 "s_register_operand" "")
4144 (match_operand:VMDI 1 "s_register_operand" "")
4145 (match_operand:<V_elem> 2 "s_register_operand" "")]
4146 "TARGET_NEON"
4147 {
4148 rtx tmp = gen_reg_rtx (<MODE>mode);
4149 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4150 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4151 const0_rtx));
4152 DONE;
4153 })
4154
4155 (define_expand "neon_vqdmulh_n<mode>"
4156 [(match_operand:VMDI 0 "s_register_operand" "")
4157 (match_operand:VMDI 1 "s_register_operand" "")
4158 (match_operand:<V_elem> 2 "s_register_operand" "")]
4159 "TARGET_NEON"
4160 {
4161 rtx tmp = gen_reg_rtx (<MODE>mode);
4162 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4163 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4164 const0_rtx));
4165 DONE;
4166 })
4167
4168 (define_expand "neon_vqrdmulh_n<mode>"
4169 [(match_operand:VMDI 0 "s_register_operand" "")
4170 (match_operand:VMDI 1 "s_register_operand" "")
4171 (match_operand:<V_elem> 2 "s_register_operand" "")]
4172 "TARGET_NEON"
4173 {
4174 rtx tmp = gen_reg_rtx (<MODE>mode);
4175 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4176 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4177 const0_rtx));
4178 DONE;
4179 })
4180
4181 (define_expand "neon_vqdmulh_n<mode>"
4182 [(match_operand:VMQI 0 "s_register_operand" "")
4183 (match_operand:VMQI 1 "s_register_operand" "")
4184 (match_operand:<V_elem> 2 "s_register_operand" "")]
4185 "TARGET_NEON"
4186 {
4187 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4188 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4189 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4190 const0_rtx));
4191 DONE;
4192 })
4193
4194 (define_expand "neon_vqrdmulh_n<mode>"
4195 [(match_operand:VMQI 0 "s_register_operand" "")
4196 (match_operand:VMQI 1 "s_register_operand" "")
4197 (match_operand:<V_elem> 2 "s_register_operand" "")]
4198 "TARGET_NEON"
4199 {
4200 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4201 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4202 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4203 const0_rtx));
4204 DONE;
4205 })
4206
4207 (define_expand "neon_vmla_n<mode>"
4208 [(match_operand:VMD 0 "s_register_operand" "")
4209 (match_operand:VMD 1 "s_register_operand" "")
4210 (match_operand:VMD 2 "s_register_operand" "")
4211 (match_operand:<V_elem> 3 "s_register_operand" "")]
4212 "TARGET_NEON"
4213 {
4214 rtx tmp = gen_reg_rtx (<MODE>mode);
4215 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4216 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4217 tmp, const0_rtx));
4218 DONE;
4219 })
4220
4221 (define_expand "neon_vmla_n<mode>"
4222 [(match_operand:VMQ 0 "s_register_operand" "")
4223 (match_operand:VMQ 1 "s_register_operand" "")
4224 (match_operand:VMQ 2 "s_register_operand" "")
4225 (match_operand:<V_elem> 3 "s_register_operand" "")]
4226 "TARGET_NEON"
4227 {
4228 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4229 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4230 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4231 tmp, const0_rtx));
4232 DONE;
4233 })
4234
4235 (define_expand "neon_vmlals_n<mode>"
4236 [(match_operand:<V_widen> 0 "s_register_operand" "")
4237 (match_operand:<V_widen> 1 "s_register_operand" "")
4238 (match_operand:VMDI 2 "s_register_operand" "")
4239 (match_operand:<V_elem> 3 "s_register_operand" "")]
4240 "TARGET_NEON"
4241 {
4242 rtx tmp = gen_reg_rtx (<MODE>mode);
4243 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4244 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4245 tmp, const0_rtx));
4246 DONE;
4247 })
4248
4249 (define_expand "neon_vmlalu_n<mode>"
4250 [(match_operand:<V_widen> 0 "s_register_operand" "")
4251 (match_operand:<V_widen> 1 "s_register_operand" "")
4252 (match_operand:VMDI 2 "s_register_operand" "")
4253 (match_operand:<V_elem> 3 "s_register_operand" "")]
4254 "TARGET_NEON"
4255 {
4256 rtx tmp = gen_reg_rtx (<MODE>mode);
4257 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4258 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4259 tmp, const0_rtx));
4260 DONE;
4261 })
4262
4263 (define_expand "neon_vqdmlal_n<mode>"
4264 [(match_operand:<V_widen> 0 "s_register_operand" "")
4265 (match_operand:<V_widen> 1 "s_register_operand" "")
4266 (match_operand:VMDI 2 "s_register_operand" "")
4267 (match_operand:<V_elem> 3 "s_register_operand" "")]
4268 "TARGET_NEON"
4269 {
4270 rtx tmp = gen_reg_rtx (<MODE>mode);
4271 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4272 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4273 tmp, const0_rtx));
4274 DONE;
4275 })
4276
4277 (define_expand "neon_vmls_n<mode>"
4278 [(match_operand:VMD 0 "s_register_operand" "")
4279 (match_operand:VMD 1 "s_register_operand" "")
4280 (match_operand:VMD 2 "s_register_operand" "")
4281 (match_operand:<V_elem> 3 "s_register_operand" "")]
4282 "TARGET_NEON"
4283 {
4284 rtx tmp = gen_reg_rtx (<MODE>mode);
4285 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4286 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4287 tmp, const0_rtx));
4288 DONE;
4289 })
4290
4291 (define_expand "neon_vmls_n<mode>"
4292 [(match_operand:VMQ 0 "s_register_operand" "")
4293 (match_operand:VMQ 1 "s_register_operand" "")
4294 (match_operand:VMQ 2 "s_register_operand" "")
4295 (match_operand:<V_elem> 3 "s_register_operand" "")]
4296 "TARGET_NEON"
4297 {
4298 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4299 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4300 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4301 tmp, const0_rtx));
4302 DONE;
4303 })
4304
4305 (define_expand "neon_vmlsls_n<mode>"
4306 [(match_operand:<V_widen> 0 "s_register_operand" "")
4307 (match_operand:<V_widen> 1 "s_register_operand" "")
4308 (match_operand:VMDI 2 "s_register_operand" "")
4309 (match_operand:<V_elem> 3 "s_register_operand" "")]
4310 "TARGET_NEON"
4311 {
4312 rtx tmp = gen_reg_rtx (<MODE>mode);
4313 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4314 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4315 tmp, const0_rtx));
4316 DONE;
4317 })
4318
4319 (define_expand "neon_vmlslu_n<mode>"
4320 [(match_operand:<V_widen> 0 "s_register_operand" "")
4321 (match_operand:<V_widen> 1 "s_register_operand" "")
4322 (match_operand:VMDI 2 "s_register_operand" "")
4323 (match_operand:<V_elem> 3 "s_register_operand" "")]
4324 "TARGET_NEON"
4325 {
4326 rtx tmp = gen_reg_rtx (<MODE>mode);
4327 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4328 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4329 tmp, const0_rtx));
4330 DONE;
4331 })
4332
4333 (define_expand "neon_vqdmlsl_n<mode>"
4334 [(match_operand:<V_widen> 0 "s_register_operand" "")
4335 (match_operand:<V_widen> 1 "s_register_operand" "")
4336 (match_operand:VMDI 2 "s_register_operand" "")
4337 (match_operand:<V_elem> 3 "s_register_operand" "")]
4338 "TARGET_NEON"
4339 {
4340 rtx tmp = gen_reg_rtx (<MODE>mode);
4341 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4342 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4343 tmp, const0_rtx));
4344 DONE;
4345 })
4346
4347 (define_insn "neon_vext<mode>"
4348 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4349 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4350 (match_operand:VDQX 2 "s_register_operand" "w")
4351 (match_operand:SI 3 "immediate_operand" "i")]
4352 UNSPEC_VEXT))]
4353 "TARGET_NEON"
4354 {
4355 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4356 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4357 }
4358 [(set_attr "type" "neon_ext<q>")]
4359 )
4360
4361 (define_insn "neon_vrev64<mode>"
4362 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4363 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4364 UNSPEC_VREV64))]
4365 "TARGET_NEON"
4366 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4367 [(set_attr "type" "neon_rev<q>")]
4368 )
4369
4370 (define_insn "neon_vrev32<mode>"
4371 [(set (match_operand:VX 0 "s_register_operand" "=w")
4372 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4373 UNSPEC_VREV32))]
4374 "TARGET_NEON"
4375 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4376 [(set_attr "type" "neon_rev<q>")]
4377 )
4378
4379 (define_insn "neon_vrev16<mode>"
4380 [(set (match_operand:VE 0 "s_register_operand" "=w")
4381 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4382 UNSPEC_VREV16))]
4383 "TARGET_NEON"
4384 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4385 [(set_attr "type" "neon_rev<q>")]
4386 )
4387
4388 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4389 ; allocation. For an intrinsic of form:
4390 ; rD = vbsl_* (rS, rN, rM)
4391 ; We can use any of:
4392 ; vbsl rS, rN, rM (if D = S)
4393 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4394 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4395
4396 (define_insn "neon_vbsl<mode>_internal"
4397 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4398 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4399 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4400 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4401 UNSPEC_VBSL))]
4402 "TARGET_NEON"
4403 "@
4404 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4405 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4406 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4407 [(set_attr "type" "neon_bsl<q>")]
4408 )
4409
4410 (define_expand "neon_vbsl<mode>"
4411 [(set (match_operand:VDQX 0 "s_register_operand" "")
4412 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4413 (match_operand:VDQX 2 "s_register_operand" "")
4414 (match_operand:VDQX 3 "s_register_operand" "")]
4415 UNSPEC_VBSL))]
4416 "TARGET_NEON"
4417 {
4418 /* We can't alias operands together if they have different modes. */
4419 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4420 })
4421
4422 ;; vshl, vrshl
4423 (define_insn "neon_v<shift_op><sup><mode>"
4424 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4425 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4426 (match_operand:VDQIX 2 "s_register_operand" "w")]
4427 VSHL))]
4428 "TARGET_NEON"
4429 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4430 [(set_attr "type" "neon_shift_imm<q>")]
4431 )
4432
4433 ;; vqshl, vqrshl
4434 (define_insn "neon_v<shift_op><sup><mode>"
4435 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4436 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4437 (match_operand:VDQIX 2 "s_register_operand" "w")]
4438 VQSHL))]
4439 "TARGET_NEON"
4440 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4441 [(set_attr "type" "neon_sat_shift_imm<q>")]
4442 )
4443
4444 ;; vshr_n, vrshr_n
4445 (define_insn "neon_v<shift_op><sup>_n<mode>"
4446 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4447 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4448 (match_operand:SI 2 "immediate_operand" "i")]
4449 VSHR_N))]
4450 "TARGET_NEON"
4451 {
4452 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4453 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4454 }
4455 [(set_attr "type" "neon_shift_imm<q>")]
4456 )
4457
4458 ;; vshrn_n, vrshrn_n
4459 (define_insn "neon_v<shift_op>_n<mode>"
4460 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4461 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4462 (match_operand:SI 2 "immediate_operand" "i")]
4463 VSHRN_N))]
4464 "TARGET_NEON"
4465 {
4466 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4467 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4468 }
4469 [(set_attr "type" "neon_shift_imm_narrow_q")]
4470 )
4471
4472 ;; vqshrn_n, vqrshrn_n
4473 (define_insn "neon_v<shift_op><sup>_n<mode>"
4474 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4475 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4476 (match_operand:SI 2 "immediate_operand" "i")]
4477 VQSHRN_N))]
4478 "TARGET_NEON"
4479 {
4480 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4481 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4482 }
4483 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4484 )
4485
4486 ;; vqshrun_n, vqrshrun_n
4487 (define_insn "neon_v<shift_op>_n<mode>"
4488 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4489 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4491 VQSHRUN_N))]
4492 "TARGET_NEON"
4493 {
4494 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4495 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4496 }
4497 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4498 )
4499
4500 (define_insn "neon_vshl_n<mode>"
4501 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4502 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4503 (match_operand:SI 2 "immediate_operand" "i")]
4504 UNSPEC_VSHL_N))]
4505 "TARGET_NEON"
4506 {
4507 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4508 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4509 }
4510 [(set_attr "type" "neon_shift_imm<q>")]
4511 )
4512
4513 (define_insn "neon_vqshl_<sup>_n<mode>"
4514 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4515 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4516 (match_operand:SI 2 "immediate_operand" "i")]
4517 VQSHL_N))]
4518 "TARGET_NEON"
4519 {
4520 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4521 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4522 }
4523 [(set_attr "type" "neon_sat_shift_imm<q>")]
4524 )
4525
4526 (define_insn "neon_vqshlu_n<mode>"
4527 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4528 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4529 (match_operand:SI 2 "immediate_operand" "i")]
4530 UNSPEC_VQSHLU_N))]
4531 "TARGET_NEON"
4532 {
4533 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4534 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4535 }
4536 [(set_attr "type" "neon_sat_shift_imm<q>")]
4537 )
4538
4539 (define_insn "neon_vshll<sup>_n<mode>"
4540 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4541 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4542 (match_operand:SI 2 "immediate_operand" "i")]
4543 VSHLL_N))]
4544 "TARGET_NEON"
4545 {
4546 /* The boundaries are: 0 < imm <= size. */
4547 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4548 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4549 }
4550 [(set_attr "type" "neon_shift_imm_long")]
4551 )
4552
4553 ;; vsra_n, vrsra_n
4554 (define_insn "neon_v<shift_op><sup>_n<mode>"
4555 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4556 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4557 (match_operand:VDQIX 2 "s_register_operand" "w")
4558 (match_operand:SI 3 "immediate_operand" "i")]
4559 VSRA_N))]
4560 "TARGET_NEON"
4561 {
4562 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4563 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4564 }
4565 [(set_attr "type" "neon_shift_acc<q>")]
4566 )
4567
4568 (define_insn "neon_vsri_n<mode>"
4569 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4570 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4571 (match_operand:VDQIX 2 "s_register_operand" "w")
4572 (match_operand:SI 3 "immediate_operand" "i")]
4573 UNSPEC_VSRI))]
4574 "TARGET_NEON"
4575 {
4576 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4577 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4578 }
4579 [(set_attr "type" "neon_shift_reg<q>")]
4580 )
4581
4582 (define_insn "neon_vsli_n<mode>"
4583 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4584 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4585 (match_operand:VDQIX 2 "s_register_operand" "w")
4586 (match_operand:SI 3 "immediate_operand" "i")]
4587 UNSPEC_VSLI))]
4588 "TARGET_NEON"
4589 {
4590 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4591 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4592 }
4593 [(set_attr "type" "neon_shift_reg<q>")]
4594 )
4595
4596 (define_insn "neon_vtbl1v8qi"
4597 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4598 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4599 (match_operand:V8QI 2 "s_register_operand" "w")]
4600 UNSPEC_VTBL))]
4601 "TARGET_NEON"
4602 "vtbl.8\t%P0, {%P1}, %P2"
4603 [(set_attr "type" "neon_tbl1")]
4604 )
4605
4606 (define_insn "neon_vtbl2v8qi"
4607 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4608 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4609 (match_operand:V8QI 2 "s_register_operand" "w")]
4610 UNSPEC_VTBL))]
4611 "TARGET_NEON"
4612 {
4613 rtx ops[4];
4614 int tabbase = REGNO (operands[1]);
4615
4616 ops[0] = operands[0];
4617 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4618 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4619 ops[3] = operands[2];
4620 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4621
4622 return "";
4623 }
4624 [(set_attr "type" "neon_tbl2")]
4625 )
4626
4627 (define_insn "neon_vtbl3v8qi"
4628 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4629 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4630 (match_operand:V8QI 2 "s_register_operand" "w")]
4631 UNSPEC_VTBL))]
4632 "TARGET_NEON"
4633 {
4634 rtx ops[5];
4635 int tabbase = REGNO (operands[1]);
4636
4637 ops[0] = operands[0];
4638 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4639 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4640 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4641 ops[4] = operands[2];
4642 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4643
4644 return "";
4645 }
4646 [(set_attr "type" "neon_tbl3")]
4647 )
4648
4649 (define_insn "neon_vtbl4v8qi"
4650 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4651 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4652 (match_operand:V8QI 2 "s_register_operand" "w")]
4653 UNSPEC_VTBL))]
4654 "TARGET_NEON"
4655 {
4656 rtx ops[6];
4657 int tabbase = REGNO (operands[1]);
4658
4659 ops[0] = operands[0];
4660 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4661 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4662 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4663 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4664 ops[5] = operands[2];
4665 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4666
4667 return "";
4668 }
4669 [(set_attr "type" "neon_tbl4")]
4670 )
4671
4672 ;; These three are used by the vec_perm infrastructure for V16QImode.
4673 (define_insn_and_split "neon_vtbl1v16qi"
4674 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4675 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4676 (match_operand:V16QI 2 "s_register_operand" "w")]
4677 UNSPEC_VTBL))]
4678 "TARGET_NEON"
4679 "#"
4680 "&& reload_completed"
4681 [(const_int 0)]
4682 {
4683 rtx op0, op1, op2, part0, part2;
4684 unsigned ofs;
4685
4686 op0 = operands[0];
4687 op1 = gen_lowpart (TImode, operands[1]);
4688 op2 = operands[2];
4689
4690 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4691 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4692 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4693 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4694
4695 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4696 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4697 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4698 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4699 DONE;
4700 }
4701 [(set_attr "type" "multiple")]
4702 )
4703
4704 (define_insn_and_split "neon_vtbl2v16qi"
4705 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4706 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4707 (match_operand:V16QI 2 "s_register_operand" "w")]
4708 UNSPEC_VTBL))]
4709 "TARGET_NEON"
4710 "#"
4711 "&& reload_completed"
4712 [(const_int 0)]
4713 {
4714 rtx op0, op1, op2, part0, part2;
4715 unsigned ofs;
4716
4717 op0 = operands[0];
4718 op1 = operands[1];
4719 op2 = operands[2];
4720
4721 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4722 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4723 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4724 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4725
4726 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4727 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4728 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4729 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4730 DONE;
4731 }
4732 [(set_attr "type" "multiple")]
4733 )
4734
4735 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4736 ;; handle quad-word input modes, producing octa-word output modes. But
4737 ;; that requires us to add support for octa-word vector modes in moves.
4738 ;; That seems overkill for this one use in vec_perm.
4739 (define_insn_and_split "neon_vcombinev16qi"
4740 [(set (match_operand:OI 0 "s_register_operand" "=w")
4741 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4742 (match_operand:V16QI 2 "s_register_operand" "w")]
4743 UNSPEC_VCONCAT))]
4744 "TARGET_NEON"
4745 "#"
4746 "&& reload_completed"
4747 [(const_int 0)]
4748 {
4749 neon_split_vcombine (operands);
4750 DONE;
4751 }
4752 [(set_attr "type" "multiple")]
4753 )
4754
4755 (define_insn "neon_vtbx1v8qi"
4756 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4757 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4758 (match_operand:V8QI 2 "s_register_operand" "w")
4759 (match_operand:V8QI 3 "s_register_operand" "w")]
4760 UNSPEC_VTBX))]
4761 "TARGET_NEON"
4762 "vtbx.8\t%P0, {%P2}, %P3"
4763 [(set_attr "type" "neon_tbl1")]
4764 )
4765
4766 (define_insn "neon_vtbx2v8qi"
4767 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4768 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4769 (match_operand:TI 2 "s_register_operand" "w")
4770 (match_operand:V8QI 3 "s_register_operand" "w")]
4771 UNSPEC_VTBX))]
4772 "TARGET_NEON"
4773 {
4774 rtx ops[4];
4775 int tabbase = REGNO (operands[2]);
4776
4777 ops[0] = operands[0];
4778 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4779 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4780 ops[3] = operands[3];
4781 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4782
4783 return "";
4784 }
4785 [(set_attr "type" "neon_tbl2")]
4786 )
4787
4788 (define_insn "neon_vtbx3v8qi"
4789 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4790 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4791 (match_operand:EI 2 "s_register_operand" "w")
4792 (match_operand:V8QI 3 "s_register_operand" "w")]
4793 UNSPEC_VTBX))]
4794 "TARGET_NEON"
4795 {
4796 rtx ops[5];
4797 int tabbase = REGNO (operands[2]);
4798
4799 ops[0] = operands[0];
4800 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4801 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4802 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4803 ops[4] = operands[3];
4804 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4805
4806 return "";
4807 }
4808 [(set_attr "type" "neon_tbl3")]
4809 )
4810
4811 (define_insn "neon_vtbx4v8qi"
4812 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4813 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4814 (match_operand:OI 2 "s_register_operand" "w")
4815 (match_operand:V8QI 3 "s_register_operand" "w")]
4816 UNSPEC_VTBX))]
4817 "TARGET_NEON"
4818 {
4819 rtx ops[6];
4820 int tabbase = REGNO (operands[2]);
4821
4822 ops[0] = operands[0];
4823 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4824 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4825 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4826 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4827 ops[5] = operands[3];
4828 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4829
4830 return "";
4831 }
4832 [(set_attr "type" "neon_tbl4")]
4833 )
4834
4835 (define_expand "neon_vtrn<mode>_internal"
4836 [(parallel
4837 [(set (match_operand:VDQWH 0 "s_register_operand")
4838 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4839 (match_operand:VDQWH 2 "s_register_operand")]
4840 UNSPEC_VTRN1))
4841 (set (match_operand:VDQWH 3 "s_register_operand")
4842 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4843 "TARGET_NEON"
4844 ""
4845 )
4846
4847 ;; Note: Different operand numbering to handle tied registers correctly.
4848 (define_insn "*neon_vtrn<mode>_insn"
4849 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4850 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4851 (match_operand:VDQWH 3 "s_register_operand" "2")]
4852 UNSPEC_VTRN1))
4853 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4854 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4855 UNSPEC_VTRN2))]
4856 "TARGET_NEON"
4857 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4858 [(set_attr "type" "neon_permute<q>")]
4859 )
4860
4861 (define_expand "neon_vzip<mode>_internal"
4862 [(parallel
4863 [(set (match_operand:VDQWH 0 "s_register_operand")
4864 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4865 (match_operand:VDQWH 2 "s_register_operand")]
4866 UNSPEC_VZIP1))
4867 (set (match_operand:VDQWH 3 "s_register_operand")
4868 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4869 "TARGET_NEON"
4870 ""
4871 )
4872
4873 ;; Note: Different operand numbering to handle tied registers correctly.
4874 (define_insn "*neon_vzip<mode>_insn"
4875 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4876 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4877 (match_operand:VDQWH 3 "s_register_operand" "2")]
4878 UNSPEC_VZIP1))
4879 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4880 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4881 UNSPEC_VZIP2))]
4882 "TARGET_NEON"
4883 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4884 [(set_attr "type" "neon_zip<q>")]
4885 )
4886
4887 (define_expand "neon_vuzp<mode>_internal"
4888 [(parallel
4889 [(set (match_operand:VDQWH 0 "s_register_operand")
4890 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4891 (match_operand:VDQWH 2 "s_register_operand")]
4892 UNSPEC_VUZP1))
4893 (set (match_operand:VDQWH 3 "s_register_operand" "")
4894 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4895 "TARGET_NEON"
4896 ""
4897 )
4898
4899 ;; Note: Different operand numbering to handle tied registers correctly.
4900 (define_insn "*neon_vuzp<mode>_insn"
4901 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4902 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4903 (match_operand:VDQWH 3 "s_register_operand" "2")]
4904 UNSPEC_VUZP1))
4905 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4906 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4907 UNSPEC_VUZP2))]
4908 "TARGET_NEON"
4909 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4910 [(set_attr "type" "neon_zip<q>")]
4911 )
4912
4913 (define_expand "vec_load_lanes<mode><mode>"
4914 [(set (match_operand:VDQX 0 "s_register_operand")
4915 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4916 UNSPEC_VLD1))]
4917 "TARGET_NEON")
4918
4919 (define_insn "neon_vld1<mode>"
4920 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4921 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4922 UNSPEC_VLD1))]
4923 "TARGET_NEON"
4924 "vld1.<V_sz_elem>\t%h0, %A1"
4925 [(set_attr "type" "neon_load1_1reg<q>")]
4926 )
4927
4928 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4929 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4930 ;; lane order here.
4931 (define_insn "neon_vld1_lane<mode>"
4932 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4933 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4934 (match_operand:VDX 2 "s_register_operand" "0")
4935 (match_operand:SI 3 "immediate_operand" "i")]
4936 UNSPEC_VLD1_LANE))]
4937 "TARGET_NEON"
4938 {
4939 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4940 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4941 operands[3] = GEN_INT (lane);
4942 if (max == 1)
4943 return "vld1.<V_sz_elem>\t%P0, %A1";
4944 else
4945 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4946 }
4947 [(set_attr "type" "neon_load1_one_lane<q>")]
4948 )
4949
4950 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4951 ;; here on big endian targets.
4952 (define_insn "neon_vld1_lane<mode>"
4953 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4954 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4955 (match_operand:VQX 2 "s_register_operand" "0")
4956 (match_operand:SI 3 "immediate_operand" "i")]
4957 UNSPEC_VLD1_LANE))]
4958 "TARGET_NEON"
4959 {
4960 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4961 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4962 operands[3] = GEN_INT (lane);
4963 int regno = REGNO (operands[0]);
4964 if (lane >= max / 2)
4965 {
4966 lane -= max / 2;
4967 regno += 2;
4968 operands[3] = GEN_INT (lane);
4969 }
4970 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4971 if (max == 2)
4972 return "vld1.<V_sz_elem>\t%P0, %A1";
4973 else
4974 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4975 }
4976 [(set_attr "type" "neon_load1_one_lane<q>")]
4977 )
4978
4979 (define_insn "neon_vld1_dup<mode>"
4980 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4981 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4982 "TARGET_NEON"
4983 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4984 [(set_attr "type" "neon_load1_all_lanes<q>")]
4985 )
4986
4987 ;; Special case for DImode. Treat it exactly like a simple load.
4988 (define_expand "neon_vld1_dupdi"
4989 [(set (match_operand:DI 0 "s_register_operand" "")
4990 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4991 UNSPEC_VLD1))]
4992 "TARGET_NEON"
4993 ""
4994 )
4995
4996 (define_insn "neon_vld1_dup<mode>"
4997 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4998 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4999 "TARGET_NEON"
5000 {
5001 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5002 }
5003 [(set_attr "type" "neon_load1_all_lanes<q>")]
5004 )
5005
5006 (define_insn_and_split "neon_vld1_dupv2di"
5007 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5008 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5009 "TARGET_NEON"
5010 "#"
5011 "&& reload_completed"
5012 [(const_int 0)]
5013 {
5014 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5015 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5016 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5017 DONE;
5018 }
5019 [(set_attr "length" "8")
5020 (set_attr "type" "neon_load1_all_lanes_q")]
5021 )
5022
5023 (define_expand "vec_store_lanes<mode><mode>"
5024 [(set (match_operand:VDQX 0 "neon_struct_operand")
5025 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5026 UNSPEC_VST1))]
5027 "TARGET_NEON")
5028
5029 (define_insn "neon_vst1<mode>"
5030 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5031 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5032 UNSPEC_VST1))]
5033 "TARGET_NEON"
5034 "vst1.<V_sz_elem>\t%h1, %A0"
5035 [(set_attr "type" "neon_store1_1reg<q>")])
5036
5037 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5038 ;; here on big endian targets.
5039 (define_insn "neon_vst1_lane<mode>"
5040 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5041 (unspec:<V_elem>
5042 [(match_operand:VDX 1 "s_register_operand" "w")
5043 (match_operand:SI 2 "immediate_operand" "i")]
5044 UNSPEC_VST1_LANE))]
5045 "TARGET_NEON"
5046 {
5047 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5048 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5049 operands[2] = GEN_INT (lane);
5050 if (max == 1)
5051 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5052 else
5053 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5054 }
5055 [(set_attr "type" "neon_store1_one_lane<q>")]
5056 )
5057
5058 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5059 ;; here on big endian targets.
5060 (define_insn "neon_vst1_lane<mode>"
5061 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5062 (unspec:<V_elem>
5063 [(match_operand:VQX 1 "s_register_operand" "w")
5064 (match_operand:SI 2 "immediate_operand" "i")]
5065 UNSPEC_VST1_LANE))]
5066 "TARGET_NEON"
5067 {
5068 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5069 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5070 int regno = REGNO (operands[1]);
5071 if (lane >= max / 2)
5072 {
5073 lane -= max / 2;
5074 regno += 2;
5075 }
5076 operands[2] = GEN_INT (lane);
5077 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5078 if (max == 2)
5079 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5080 else
5081 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5082 }
5083 [(set_attr "type" "neon_store1_one_lane<q>")]
5084 )
5085
5086 (define_expand "vec_load_lanesti<mode>"
5087 [(set (match_operand:TI 0 "s_register_operand")
5088 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5089 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5090 UNSPEC_VLD2))]
5091 "TARGET_NEON")
5092
5093 (define_insn "neon_vld2<mode>"
5094 [(set (match_operand:TI 0 "s_register_operand" "=w")
5095 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5096 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5097 UNSPEC_VLD2))]
5098 "TARGET_NEON"
5099 {
5100 if (<V_sz_elem> == 64)
5101 return "vld1.64\t%h0, %A1";
5102 else
5103 return "vld2.<V_sz_elem>\t%h0, %A1";
5104 }
5105 [(set (attr "type")
5106 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5107 (const_string "neon_load1_2reg<q>")
5108 (const_string "neon_load2_2reg<q>")))]
5109 )
5110
5111 (define_expand "vec_load_lanesoi<mode>"
5112 [(set (match_operand:OI 0 "s_register_operand")
5113 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5114 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5115 UNSPEC_VLD2))]
5116 "TARGET_NEON")
5117
5118 (define_insn "neon_vld2<mode>"
5119 [(set (match_operand:OI 0 "s_register_operand" "=w")
5120 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5121 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5122 UNSPEC_VLD2))]
5123 "TARGET_NEON"
5124 "vld2.<V_sz_elem>\t%h0, %A1"
5125 [(set_attr "type" "neon_load2_2reg_q")])
5126
5127 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5128 ;; here on big endian targets.
5129 (define_insn "neon_vld2_lane<mode>"
5130 [(set (match_operand:TI 0 "s_register_operand" "=w")
5131 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5132 (match_operand:TI 2 "s_register_operand" "0")
5133 (match_operand:SI 3 "immediate_operand" "i")
5134 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135 UNSPEC_VLD2_LANE))]
5136 "TARGET_NEON"
5137 {
5138 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5139 int regno = REGNO (operands[0]);
5140 rtx ops[4];
5141 ops[0] = gen_rtx_REG (DImode, regno);
5142 ops[1] = gen_rtx_REG (DImode, regno + 2);
5143 ops[2] = operands[1];
5144 ops[3] = GEN_INT (lane);
5145 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5146 return "";
5147 }
5148 [(set_attr "type" "neon_load2_one_lane<q>")]
5149 )
5150
5151 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5152 ;; here on big endian targets.
5153 (define_insn "neon_vld2_lane<mode>"
5154 [(set (match_operand:OI 0 "s_register_operand" "=w")
5155 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5156 (match_operand:OI 2 "s_register_operand" "0")
5157 (match_operand:SI 3 "immediate_operand" "i")
5158 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5159 UNSPEC_VLD2_LANE))]
5160 "TARGET_NEON"
5161 {
5162 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5163 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5164 int regno = REGNO (operands[0]);
5165 rtx ops[4];
5166 if (lane >= max / 2)
5167 {
5168 lane -= max / 2;
5169 regno += 2;
5170 }
5171 ops[0] = gen_rtx_REG (DImode, regno);
5172 ops[1] = gen_rtx_REG (DImode, regno + 4);
5173 ops[2] = operands[1];
5174 ops[3] = GEN_INT (lane);
5175 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5176 return "";
5177 }
5178 [(set_attr "type" "neon_load2_one_lane<q>")]
5179 )
5180
5181 (define_insn "neon_vld2_dup<mode>"
5182 [(set (match_operand:TI 0 "s_register_operand" "=w")
5183 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5184 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5185 UNSPEC_VLD2_DUP))]
5186 "TARGET_NEON"
5187 {
5188 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5189 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5190 else
5191 return "vld1.<V_sz_elem>\t%h0, %A1";
5192 }
5193 [(set (attr "type")
5194 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5195 (const_string "neon_load2_all_lanes<q>")
5196 (const_string "neon_load1_1reg<q>")))]
5197 )
5198
5199 (define_expand "vec_store_lanesti<mode>"
5200 [(set (match_operand:TI 0 "neon_struct_operand")
5201 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5202 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5203 UNSPEC_VST2))]
5204 "TARGET_NEON")
5205
5206 (define_insn "neon_vst2<mode>"
5207 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5208 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5209 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210 UNSPEC_VST2))]
5211 "TARGET_NEON"
5212 {
5213 if (<V_sz_elem> == 64)
5214 return "vst1.64\t%h1, %A0";
5215 else
5216 return "vst2.<V_sz_elem>\t%h1, %A0";
5217 }
5218 [(set (attr "type")
5219 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5220 (const_string "neon_store1_2reg<q>")
5221 (const_string "neon_store2_one_lane<q>")))]
5222 )
5223
5224 (define_expand "vec_store_lanesoi<mode>"
5225 [(set (match_operand:OI 0 "neon_struct_operand")
5226 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5227 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5228 UNSPEC_VST2))]
5229 "TARGET_NEON")
5230
5231 (define_insn "neon_vst2<mode>"
5232 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5233 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5234 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5235 UNSPEC_VST2))]
5236 "TARGET_NEON"
5237 "vst2.<V_sz_elem>\t%h1, %A0"
5238 [(set_attr "type" "neon_store2_4reg<q>")]
5239 )
5240
5241 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5242 ;; here on big endian targets.
5243 (define_insn "neon_vst2_lane<mode>"
5244 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5245 (unspec:<V_two_elem>
5246 [(match_operand:TI 1 "s_register_operand" "w")
5247 (match_operand:SI 2 "immediate_operand" "i")
5248 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5249 UNSPEC_VST2_LANE))]
5250 "TARGET_NEON"
5251 {
5252 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5253 int regno = REGNO (operands[1]);
5254 rtx ops[4];
5255 ops[0] = operands[0];
5256 ops[1] = gen_rtx_REG (DImode, regno);
5257 ops[2] = gen_rtx_REG (DImode, regno + 2);
5258 ops[3] = GEN_INT (lane);
5259 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5260 return "";
5261 }
5262 [(set_attr "type" "neon_store2_one_lane<q>")]
5263 )
5264
5265 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5266 ;; here on big endian targets.
5267 (define_insn "neon_vst2_lane<mode>"
5268 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5269 (unspec:<V_two_elem>
5270 [(match_operand:OI 1 "s_register_operand" "w")
5271 (match_operand:SI 2 "immediate_operand" "i")
5272 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5273 UNSPEC_VST2_LANE))]
5274 "TARGET_NEON"
5275 {
5276 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5277 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5278 int regno = REGNO (operands[1]);
5279 rtx ops[4];
5280 if (lane >= max / 2)
5281 {
5282 lane -= max / 2;
5283 regno += 2;
5284 }
5285 ops[0] = operands[0];
5286 ops[1] = gen_rtx_REG (DImode, regno);
5287 ops[2] = gen_rtx_REG (DImode, regno + 4);
5288 ops[3] = GEN_INT (lane);
5289 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5290 return "";
5291 }
5292 [(set_attr "type" "neon_store2_one_lane<q>")]
5293 )
5294
5295 (define_expand "vec_load_lanesei<mode>"
5296 [(set (match_operand:EI 0 "s_register_operand")
5297 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5298 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5299 UNSPEC_VLD3))]
5300 "TARGET_NEON")
5301
5302 (define_insn "neon_vld3<mode>"
5303 [(set (match_operand:EI 0 "s_register_operand" "=w")
5304 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5305 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5306 UNSPEC_VLD3))]
5307 "TARGET_NEON"
5308 {
5309 if (<V_sz_elem> == 64)
5310 return "vld1.64\t%h0, %A1";
5311 else
5312 return "vld3.<V_sz_elem>\t%h0, %A1";
5313 }
5314 [(set (attr "type")
5315 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5316 (const_string "neon_load1_3reg<q>")
5317 (const_string "neon_load3_3reg<q>")))]
5318 )
5319
5320 (define_expand "vec_load_lanesci<mode>"
5321 [(match_operand:CI 0 "s_register_operand")
5322 (match_operand:CI 1 "neon_struct_operand")
5323 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 "TARGET_NEON"
5325 {
5326 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5327 DONE;
5328 })
5329
5330 (define_expand "neon_vld3<mode>"
5331 [(match_operand:CI 0 "s_register_operand")
5332 (match_operand:CI 1 "neon_struct_operand")
5333 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5334 "TARGET_NEON"
5335 {
5336 rtx mem;
5337
5338 mem = adjust_address (operands[1], EImode, 0);
5339 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5340 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5341 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5342 DONE;
5343 })
5344
5345 (define_insn "neon_vld3qa<mode>"
5346 [(set (match_operand:CI 0 "s_register_operand" "=w")
5347 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5348 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5349 UNSPEC_VLD3A))]
5350 "TARGET_NEON"
5351 {
5352 int regno = REGNO (operands[0]);
5353 rtx ops[4];
5354 ops[0] = gen_rtx_REG (DImode, regno);
5355 ops[1] = gen_rtx_REG (DImode, regno + 4);
5356 ops[2] = gen_rtx_REG (DImode, regno + 8);
5357 ops[3] = operands[1];
5358 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5359 return "";
5360 }
5361 [(set_attr "type" "neon_load3_3reg<q>")]
5362 )
5363
5364 (define_insn "neon_vld3qb<mode>"
5365 [(set (match_operand:CI 0 "s_register_operand" "=w")
5366 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5367 (match_operand:CI 2 "s_register_operand" "0")
5368 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5369 UNSPEC_VLD3B))]
5370 "TARGET_NEON"
5371 {
5372 int regno = REGNO (operands[0]);
5373 rtx ops[4];
5374 ops[0] = gen_rtx_REG (DImode, regno + 2);
5375 ops[1] = gen_rtx_REG (DImode, regno + 6);
5376 ops[2] = gen_rtx_REG (DImode, regno + 10);
5377 ops[3] = operands[1];
5378 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5379 return "";
5380 }
5381 [(set_attr "type" "neon_load3_3reg<q>")]
5382 )
5383
5384 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5385 ;; here on big endian targets.
5386 (define_insn "neon_vld3_lane<mode>"
5387 [(set (match_operand:EI 0 "s_register_operand" "=w")
5388 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5389 (match_operand:EI 2 "s_register_operand" "0")
5390 (match_operand:SI 3 "immediate_operand" "i")
5391 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5392 UNSPEC_VLD3_LANE))]
5393 "TARGET_NEON"
5394 {
5395 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5396 int regno = REGNO (operands[0]);
5397 rtx ops[5];
5398 ops[0] = gen_rtx_REG (DImode, regno);
5399 ops[1] = gen_rtx_REG (DImode, regno + 2);
5400 ops[2] = gen_rtx_REG (DImode, regno + 4);
5401 ops[3] = operands[1];
5402 ops[4] = GEN_INT (lane);
5403 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5404 ops);
5405 return "";
5406 }
5407 [(set_attr "type" "neon_load3_one_lane<q>")]
5408 )
5409
5410 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5411 ;; here on big endian targets.
5412 (define_insn "neon_vld3_lane<mode>"
5413 [(set (match_operand:CI 0 "s_register_operand" "=w")
5414 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5415 (match_operand:CI 2 "s_register_operand" "0")
5416 (match_operand:SI 3 "immediate_operand" "i")
5417 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5418 UNSPEC_VLD3_LANE))]
5419 "TARGET_NEON"
5420 {
5421 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5422 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5423 int regno = REGNO (operands[0]);
5424 rtx ops[5];
5425 if (lane >= max / 2)
5426 {
5427 lane -= max / 2;
5428 regno += 2;
5429 }
5430 ops[0] = gen_rtx_REG (DImode, regno);
5431 ops[1] = gen_rtx_REG (DImode, regno + 4);
5432 ops[2] = gen_rtx_REG (DImode, regno + 8);
5433 ops[3] = operands[1];
5434 ops[4] = GEN_INT (lane);
5435 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5436 ops);
5437 return "";
5438 }
5439 [(set_attr "type" "neon_load3_one_lane<q>")]
5440 )
5441
5442 (define_insn "neon_vld3_dup<mode>"
5443 [(set (match_operand:EI 0 "s_register_operand" "=w")
5444 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5445 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5446 UNSPEC_VLD3_DUP))]
5447 "TARGET_NEON"
5448 {
5449 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5450 {
5451 int regno = REGNO (operands[0]);
5452 rtx ops[4];
5453 ops[0] = gen_rtx_REG (DImode, regno);
5454 ops[1] = gen_rtx_REG (DImode, regno + 2);
5455 ops[2] = gen_rtx_REG (DImode, regno + 4);
5456 ops[3] = operands[1];
5457 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5458 return "";
5459 }
5460 else
5461 return "vld1.<V_sz_elem>\t%h0, %A1";
5462 }
5463 [(set (attr "type")
5464 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5465 (const_string "neon_load3_all_lanes<q>")
5466 (const_string "neon_load1_1reg<q>")))])
5467
5468 (define_expand "vec_store_lanesei<mode>"
5469 [(set (match_operand:EI 0 "neon_struct_operand")
5470 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5471 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5472 UNSPEC_VST3))]
5473 "TARGET_NEON")
5474
5475 (define_insn "neon_vst3<mode>"
5476 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5477 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5478 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5479 UNSPEC_VST3))]
5480 "TARGET_NEON"
5481 {
5482 if (<V_sz_elem> == 64)
5483 return "vst1.64\t%h1, %A0";
5484 else
5485 return "vst3.<V_sz_elem>\t%h1, %A0";
5486 }
5487 [(set (attr "type")
5488 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5489 (const_string "neon_store1_3reg<q>")
5490 (const_string "neon_store3_one_lane<q>")))])
5491
5492 (define_expand "vec_store_lanesci<mode>"
5493 [(match_operand:CI 0 "neon_struct_operand")
5494 (match_operand:CI 1 "s_register_operand")
5495 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5496 "TARGET_NEON"
5497 {
5498 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5499 DONE;
5500 })
5501
5502 (define_expand "neon_vst3<mode>"
5503 [(match_operand:CI 0 "neon_struct_operand")
5504 (match_operand:CI 1 "s_register_operand")
5505 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5506 "TARGET_NEON"
5507 {
5508 rtx mem;
5509
5510 mem = adjust_address (operands[0], EImode, 0);
5511 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5512 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5513 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5514 DONE;
5515 })
5516
5517 (define_insn "neon_vst3qa<mode>"
5518 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5519 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5520 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5521 UNSPEC_VST3A))]
5522 "TARGET_NEON"
5523 {
5524 int regno = REGNO (operands[1]);
5525 rtx ops[4];
5526 ops[0] = operands[0];
5527 ops[1] = gen_rtx_REG (DImode, regno);
5528 ops[2] = gen_rtx_REG (DImode, regno + 4);
5529 ops[3] = gen_rtx_REG (DImode, regno + 8);
5530 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5531 return "";
5532 }
5533 [(set_attr "type" "neon_store3_3reg<q>")]
5534 )
5535
5536 (define_insn "neon_vst3qb<mode>"
5537 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5538 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5539 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540 UNSPEC_VST3B))]
5541 "TARGET_NEON"
5542 {
5543 int regno = REGNO (operands[1]);
5544 rtx ops[4];
5545 ops[0] = operands[0];
5546 ops[1] = gen_rtx_REG (DImode, regno + 2);
5547 ops[2] = gen_rtx_REG (DImode, regno + 6);
5548 ops[3] = gen_rtx_REG (DImode, regno + 10);
5549 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5550 return "";
5551 }
5552 [(set_attr "type" "neon_store3_3reg<q>")]
5553 )
5554
5555 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5556 ;; here on big endian targets.
5557 (define_insn "neon_vst3_lane<mode>"
5558 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5559 (unspec:<V_three_elem>
5560 [(match_operand:EI 1 "s_register_operand" "w")
5561 (match_operand:SI 2 "immediate_operand" "i")
5562 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5563 UNSPEC_VST3_LANE))]
5564 "TARGET_NEON"
5565 {
5566 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5567 int regno = REGNO (operands[1]);
5568 rtx ops[5];
5569 ops[0] = operands[0];
5570 ops[1] = gen_rtx_REG (DImode, regno);
5571 ops[2] = gen_rtx_REG (DImode, regno + 2);
5572 ops[3] = gen_rtx_REG (DImode, regno + 4);
5573 ops[4] = GEN_INT (lane);
5574 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5575 ops);
5576 return "";
5577 }
5578 [(set_attr "type" "neon_store3_one_lane<q>")]
5579 )
5580
5581 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5582 ;; here on big endian targets.
5583 (define_insn "neon_vst3_lane<mode>"
5584 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5585 (unspec:<V_three_elem>
5586 [(match_operand:CI 1 "s_register_operand" "w")
5587 (match_operand:SI 2 "immediate_operand" "i")
5588 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5589 UNSPEC_VST3_LANE))]
5590 "TARGET_NEON"
5591 {
5592 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5593 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5594 int regno = REGNO (operands[1]);
5595 rtx ops[5];
5596 if (lane >= max / 2)
5597 {
5598 lane -= max / 2;
5599 regno += 2;
5600 }
5601 ops[0] = operands[0];
5602 ops[1] = gen_rtx_REG (DImode, regno);
5603 ops[2] = gen_rtx_REG (DImode, regno + 4);
5604 ops[3] = gen_rtx_REG (DImode, regno + 8);
5605 ops[4] = GEN_INT (lane);
5606 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5607 ops);
5608 return "";
5609 }
5610 [(set_attr "type" "neon_store3_one_lane<q>")]
5611 )
5612
5613 (define_expand "vec_load_lanesoi<mode>"
5614 [(set (match_operand:OI 0 "s_register_operand")
5615 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5616 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5617 UNSPEC_VLD4))]
5618 "TARGET_NEON")
5619
5620 (define_insn "neon_vld4<mode>"
5621 [(set (match_operand:OI 0 "s_register_operand" "=w")
5622 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5623 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5624 UNSPEC_VLD4))]
5625 "TARGET_NEON"
5626 {
5627 if (<V_sz_elem> == 64)
5628 return "vld1.64\t%h0, %A1";
5629 else
5630 return "vld4.<V_sz_elem>\t%h0, %A1";
5631 }
5632 [(set (attr "type")
5633 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5634 (const_string "neon_load1_4reg<q>")
5635 (const_string "neon_load4_4reg<q>")))]
5636 )
5637
5638 (define_expand "vec_load_lanesxi<mode>"
5639 [(match_operand:XI 0 "s_register_operand")
5640 (match_operand:XI 1 "neon_struct_operand")
5641 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5642 "TARGET_NEON"
5643 {
5644 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5645 DONE;
5646 })
5647
5648 (define_expand "neon_vld4<mode>"
5649 [(match_operand:XI 0 "s_register_operand")
5650 (match_operand:XI 1 "neon_struct_operand")
5651 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5652 "TARGET_NEON"
5653 {
5654 rtx mem;
5655
5656 mem = adjust_address (operands[1], OImode, 0);
5657 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5658 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5659 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5660 DONE;
5661 })
5662
5663 (define_insn "neon_vld4qa<mode>"
5664 [(set (match_operand:XI 0 "s_register_operand" "=w")
5665 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5666 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5667 UNSPEC_VLD4A))]
5668 "TARGET_NEON"
5669 {
5670 int regno = REGNO (operands[0]);
5671 rtx ops[5];
5672 ops[0] = gen_rtx_REG (DImode, regno);
5673 ops[1] = gen_rtx_REG (DImode, regno + 4);
5674 ops[2] = gen_rtx_REG (DImode, regno + 8);
5675 ops[3] = gen_rtx_REG (DImode, regno + 12);
5676 ops[4] = operands[1];
5677 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5678 return "";
5679 }
5680 [(set_attr "type" "neon_load4_4reg<q>")]
5681 )
5682
5683 (define_insn "neon_vld4qb<mode>"
5684 [(set (match_operand:XI 0 "s_register_operand" "=w")
5685 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5686 (match_operand:XI 2 "s_register_operand" "0")
5687 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688 UNSPEC_VLD4B))]
5689 "TARGET_NEON"
5690 {
5691 int regno = REGNO (operands[0]);
5692 rtx ops[5];
5693 ops[0] = gen_rtx_REG (DImode, regno + 2);
5694 ops[1] = gen_rtx_REG (DImode, regno + 6);
5695 ops[2] = gen_rtx_REG (DImode, regno + 10);
5696 ops[3] = gen_rtx_REG (DImode, regno + 14);
5697 ops[4] = operands[1];
5698 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5699 return "";
5700 }
5701 [(set_attr "type" "neon_load4_4reg<q>")]
5702 )
5703
5704 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5705 ;; here on big endian targets.
5706 (define_insn "neon_vld4_lane<mode>"
5707 [(set (match_operand:OI 0 "s_register_operand" "=w")
5708 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5709 (match_operand:OI 2 "s_register_operand" "0")
5710 (match_operand:SI 3 "immediate_operand" "i")
5711 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5712 UNSPEC_VLD4_LANE))]
5713 "TARGET_NEON"
5714 {
5715 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5716 int regno = REGNO (operands[0]);
5717 rtx ops[6];
5718 ops[0] = gen_rtx_REG (DImode, regno);
5719 ops[1] = gen_rtx_REG (DImode, regno + 2);
5720 ops[2] = gen_rtx_REG (DImode, regno + 4);
5721 ops[3] = gen_rtx_REG (DImode, regno + 6);
5722 ops[4] = operands[1];
5723 ops[5] = GEN_INT (lane);
5724 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5725 ops);
5726 return "";
5727 }
5728 [(set_attr "type" "neon_load4_one_lane<q>")]
5729 )
5730
5731 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5732 ;; here on big endian targets.
5733 (define_insn "neon_vld4_lane<mode>"
5734 [(set (match_operand:XI 0 "s_register_operand" "=w")
5735 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5736 (match_operand:XI 2 "s_register_operand" "0")
5737 (match_operand:SI 3 "immediate_operand" "i")
5738 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5739 UNSPEC_VLD4_LANE))]
5740 "TARGET_NEON"
5741 {
5742 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5743 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5744 int regno = REGNO (operands[0]);
5745 rtx ops[6];
5746 if (lane >= max / 2)
5747 {
5748 lane -= max / 2;
5749 regno += 2;
5750 }
5751 ops[0] = gen_rtx_REG (DImode, regno);
5752 ops[1] = gen_rtx_REG (DImode, regno + 4);
5753 ops[2] = gen_rtx_REG (DImode, regno + 8);
5754 ops[3] = gen_rtx_REG (DImode, regno + 12);
5755 ops[4] = operands[1];
5756 ops[5] = GEN_INT (lane);
5757 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5758 ops);
5759 return "";
5760 }
5761 [(set_attr "type" "neon_load4_one_lane<q>")]
5762 )
5763
5764 (define_insn "neon_vld4_dup<mode>"
5765 [(set (match_operand:OI 0 "s_register_operand" "=w")
5766 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5767 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5768 UNSPEC_VLD4_DUP))]
5769 "TARGET_NEON"
5770 {
5771 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5772 {
5773 int regno = REGNO (operands[0]);
5774 rtx ops[5];
5775 ops[0] = gen_rtx_REG (DImode, regno);
5776 ops[1] = gen_rtx_REG (DImode, regno + 2);
5777 ops[2] = gen_rtx_REG (DImode, regno + 4);
5778 ops[3] = gen_rtx_REG (DImode, regno + 6);
5779 ops[4] = operands[1];
5780 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5781 ops);
5782 return "";
5783 }
5784 else
5785 return "vld1.<V_sz_elem>\t%h0, %A1";
5786 }
5787 [(set (attr "type")
5788 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5789 (const_string "neon_load4_all_lanes<q>")
5790 (const_string "neon_load1_1reg<q>")))]
5791 )
5792
5793 (define_expand "vec_store_lanesoi<mode>"
5794 [(set (match_operand:OI 0 "neon_struct_operand")
5795 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5796 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5797 UNSPEC_VST4))]
5798 "TARGET_NEON")
5799
5800 (define_insn "neon_vst4<mode>"
5801 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5802 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5803 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5804 UNSPEC_VST4))]
5805 "TARGET_NEON"
5806 {
5807 if (<V_sz_elem> == 64)
5808 return "vst1.64\t%h1, %A0";
5809 else
5810 return "vst4.<V_sz_elem>\t%h1, %A0";
5811 }
5812 [(set (attr "type")
5813 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5814 (const_string "neon_store1_4reg<q>")
5815 (const_string "neon_store4_4reg<q>")))]
5816 )
5817
5818 (define_expand "vec_store_lanesxi<mode>"
5819 [(match_operand:XI 0 "neon_struct_operand")
5820 (match_operand:XI 1 "s_register_operand")
5821 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5822 "TARGET_NEON"
5823 {
5824 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5825 DONE;
5826 })
5827
5828 (define_expand "neon_vst4<mode>"
5829 [(match_operand:XI 0 "neon_struct_operand")
5830 (match_operand:XI 1 "s_register_operand")
5831 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5832 "TARGET_NEON"
5833 {
5834 rtx mem;
5835
5836 mem = adjust_address (operands[0], OImode, 0);
5837 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5838 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5839 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5840 DONE;
5841 })
5842
5843 (define_insn "neon_vst4qa<mode>"
5844 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5845 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5846 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5847 UNSPEC_VST4A))]
5848 "TARGET_NEON"
5849 {
5850 int regno = REGNO (operands[1]);
5851 rtx ops[5];
5852 ops[0] = operands[0];
5853 ops[1] = gen_rtx_REG (DImode, regno);
5854 ops[2] = gen_rtx_REG (DImode, regno + 4);
5855 ops[3] = gen_rtx_REG (DImode, regno + 8);
5856 ops[4] = gen_rtx_REG (DImode, regno + 12);
5857 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5858 return "";
5859 }
5860 [(set_attr "type" "neon_store4_4reg<q>")]
5861 )
5862
5863 (define_insn "neon_vst4qb<mode>"
5864 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5865 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5866 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5867 UNSPEC_VST4B))]
5868 "TARGET_NEON"
5869 {
5870 int regno = REGNO (operands[1]);
5871 rtx ops[5];
5872 ops[0] = operands[0];
5873 ops[1] = gen_rtx_REG (DImode, regno + 2);
5874 ops[2] = gen_rtx_REG (DImode, regno + 6);
5875 ops[3] = gen_rtx_REG (DImode, regno + 10);
5876 ops[4] = gen_rtx_REG (DImode, regno + 14);
5877 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5878 return "";
5879 }
5880 [(set_attr "type" "neon_store4_4reg<q>")]
5881 )
5882
5883 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5884 ;; here on big endian targets.
5885 (define_insn "neon_vst4_lane<mode>"
5886 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5887 (unspec:<V_four_elem>
5888 [(match_operand:OI 1 "s_register_operand" "w")
5889 (match_operand:SI 2 "immediate_operand" "i")
5890 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5891 UNSPEC_VST4_LANE))]
5892 "TARGET_NEON"
5893 {
5894 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5895 int regno = REGNO (operands[1]);
5896 rtx ops[6];
5897 ops[0] = operands[0];
5898 ops[1] = gen_rtx_REG (DImode, regno);
5899 ops[2] = gen_rtx_REG (DImode, regno + 2);
5900 ops[3] = gen_rtx_REG (DImode, regno + 4);
5901 ops[4] = gen_rtx_REG (DImode, regno + 6);
5902 ops[5] = GEN_INT (lane);
5903 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5904 ops);
5905 return "";
5906 }
5907 [(set_attr "type" "neon_store4_one_lane<q>")]
5908 )
5909
5910 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5911 ;; here on big endian targets.
5912 (define_insn "neon_vst4_lane<mode>"
5913 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5914 (unspec:<V_four_elem>
5915 [(match_operand:XI 1 "s_register_operand" "w")
5916 (match_operand:SI 2 "immediate_operand" "i")
5917 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5918 UNSPEC_VST4_LANE))]
5919 "TARGET_NEON"
5920 {
5921 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5922 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5923 int regno = REGNO (operands[1]);
5924 rtx ops[6];
5925 if (lane >= max / 2)
5926 {
5927 lane -= max / 2;
5928 regno += 2;
5929 }
5930 ops[0] = operands[0];
5931 ops[1] = gen_rtx_REG (DImode, regno);
5932 ops[2] = gen_rtx_REG (DImode, regno + 4);
5933 ops[3] = gen_rtx_REG (DImode, regno + 8);
5934 ops[4] = gen_rtx_REG (DImode, regno + 12);
5935 ops[5] = GEN_INT (lane);
5936 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5937 ops);
5938 return "";
5939 }
5940 [(set_attr "type" "neon_store4_4reg<q>")]
5941 )
5942
5943 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5944 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5945 (SE:<V_unpack> (vec_select:<V_HALF>
5946 (match_operand:VU 1 "register_operand" "w")
5947 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5948 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5949 "vmovl.<US><V_sz_elem> %q0, %e1"
5950 [(set_attr "type" "neon_shift_imm_long")]
5951 )
5952
5953 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5954 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5955 (SE:<V_unpack> (vec_select:<V_HALF>
5956 (match_operand:VU 1 "register_operand" "w")
5957 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5958 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5959 "vmovl.<US><V_sz_elem> %q0, %f1"
5960 [(set_attr "type" "neon_shift_imm_long")]
5961 )
5962
5963 (define_expand "vec_unpack<US>_hi_<mode>"
5964 [(match_operand:<V_unpack> 0 "register_operand" "")
5965 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5966 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5967 {
5968 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5969 rtx t1;
5970 int i;
5971 for (i = 0; i < (<V_mode_nunits>/2); i++)
5972 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5973
5974 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5975 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5976 operands[1],
5977 t1));
5978 DONE;
5979 }
5980 )
5981
5982 (define_expand "vec_unpack<US>_lo_<mode>"
5983 [(match_operand:<V_unpack> 0 "register_operand" "")
5984 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5985 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5986 {
5987 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5988 rtx t1;
5989 int i;
5990 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5991 RTVEC_ELT (v, i) = GEN_INT (i);
5992 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5993 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5994 operands[1],
5995 t1));
5996 DONE;
5997 }
5998 )
5999
6000 (define_insn "neon_vec_<US>mult_lo_<mode>"
6001 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6002 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6003 (match_operand:VU 1 "register_operand" "w")
6004 (match_operand:VU 2 "vect_par_constant_low" "")))
6005 (SE:<V_unpack> (vec_select:<V_HALF>
6006 (match_operand:VU 3 "register_operand" "w")
6007 (match_dup 2)))))]
6008 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6009 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6010 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6011 )
6012
6013 (define_expand "vec_widen_<US>mult_lo_<mode>"
6014 [(match_operand:<V_unpack> 0 "register_operand" "")
6015 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6016 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6017 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6018 {
6019 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6020 rtx t1;
6021 int i;
6022 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6023 RTVEC_ELT (v, i) = GEN_INT (i);
6024 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6025
6026 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6027 operands[1],
6028 t1,
6029 operands[2]));
6030 DONE;
6031 }
6032 )
6033
6034 (define_insn "neon_vec_<US>mult_hi_<mode>"
6035 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6036 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6037 (match_operand:VU 1 "register_operand" "w")
6038 (match_operand:VU 2 "vect_par_constant_high" "")))
6039 (SE:<V_unpack> (vec_select:<V_HALF>
6040 (match_operand:VU 3 "register_operand" "w")
6041 (match_dup 2)))))]
6042 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6043 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6044 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6045 )
6046
6047 (define_expand "vec_widen_<US>mult_hi_<mode>"
6048 [(match_operand:<V_unpack> 0 "register_operand" "")
6049 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6050 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6051 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6052 {
6053 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6054 rtx t1;
6055 int i;
6056 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6057 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6058 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6059
6060 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6061 operands[1],
6062 t1,
6063 operands[2]));
6064 DONE;
6065
6066 }
6067 )
6068
6069 (define_insn "neon_vec_<US>shiftl_<mode>"
6070 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6071 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6072 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6073 "TARGET_NEON"
6074 {
6075 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6076 }
6077 [(set_attr "type" "neon_shift_imm_long")]
6078 )
6079
6080 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6081 [(match_operand:<V_unpack> 0 "register_operand" "")
6082 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6083 (match_operand:SI 2 "immediate_operand" "i")]
6084 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6085 {
6086 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6087 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6088 operands[2]));
6089 DONE;
6090 }
6091 )
6092
6093 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6094 [(match_operand:<V_unpack> 0 "register_operand" "")
6095 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6096 (match_operand:SI 2 "immediate_operand" "i")]
6097 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6098 {
6099 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6100 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6101 GET_MODE_SIZE (<V_HALF>mode)),
6102 operands[2]));
6103 DONE;
6104 }
6105 )
6106
6107 ;; Vectorize for non-neon-quad case
6108 (define_insn "neon_unpack<US>_<mode>"
6109 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6110 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6111 "TARGET_NEON"
6112 "vmovl.<US><V_sz_elem> %q0, %P1"
6113 [(set_attr "type" "neon_move")]
6114 )
6115
6116 (define_expand "vec_unpack<US>_lo_<mode>"
6117 [(match_operand:<V_double_width> 0 "register_operand" "")
6118 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6119 "TARGET_NEON"
6120 {
6121 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6122 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6123 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6124
6125 DONE;
6126 }
6127 )
6128
6129 (define_expand "vec_unpack<US>_hi_<mode>"
6130 [(match_operand:<V_double_width> 0 "register_operand" "")
6131 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6132 "TARGET_NEON"
6133 {
6134 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6135 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6136 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6137
6138 DONE;
6139 }
6140 )
6141
6142 (define_insn "neon_vec_<US>mult_<mode>"
6143 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6144 (mult:<V_widen> (SE:<V_widen>
6145 (match_operand:VDI 1 "register_operand" "w"))
6146 (SE:<V_widen>
6147 (match_operand:VDI 2 "register_operand" "w"))))]
6148 "TARGET_NEON"
6149 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6150 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6151 )
6152
6153 (define_expand "vec_widen_<US>mult_hi_<mode>"
6154 [(match_operand:<V_double_width> 0 "register_operand" "")
6155 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6156 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6157 "TARGET_NEON"
6158 {
6159 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6160 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6161 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6162
6163 DONE;
6164
6165 }
6166 )
6167
6168 (define_expand "vec_widen_<US>mult_lo_<mode>"
6169 [(match_operand:<V_double_width> 0 "register_operand" "")
6170 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6171 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6172 "TARGET_NEON"
6173 {
6174 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6175 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6176 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6177
6178 DONE;
6179
6180 }
6181 )
6182
6183 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6184 [(match_operand:<V_double_width> 0 "register_operand" "")
6185 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6186 (match_operand:SI 2 "immediate_operand" "i")]
6187 "TARGET_NEON"
6188 {
6189 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6190 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6191 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6192
6193 DONE;
6194 }
6195 )
6196
6197 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6198 [(match_operand:<V_double_width> 0 "register_operand" "")
6199 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6200 (match_operand:SI 2 "immediate_operand" "i")]
6201 "TARGET_NEON"
6202 {
6203 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6204 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6205 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6206
6207 DONE;
6208 }
6209 )
6210
6211 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6212 ; because the ordering of vector elements in Q registers is different from what
6213 ; the semantics of the instructions require.
6214
6215 (define_insn "vec_pack_trunc_<mode>"
6216 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6217 (vec_concat:<V_narrow_pack>
6218 (truncate:<V_narrow>
6219 (match_operand:VN 1 "register_operand" "w"))
6220 (truncate:<V_narrow>
6221 (match_operand:VN 2 "register_operand" "w"))))]
6222 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6223 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6224 [(set_attr "type" "multiple")
6225 (set_attr "length" "8")]
6226 )
6227
6228 ;; For the non-quad case.
6229 (define_insn "neon_vec_pack_trunc_<mode>"
6230 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6231 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6232 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6233 "vmovn.i<V_sz_elem>\t%P0, %q1"
6234 [(set_attr "type" "neon_move_narrow_q")]
6235 )
6236
6237 (define_expand "vec_pack_trunc_<mode>"
6238 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6239 (match_operand:VSHFT 1 "register_operand" "")
6240 (match_operand:VSHFT 2 "register_operand")]
6241 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6242 {
6243 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6244
6245 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6246 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6247 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6248 DONE;
6249 })
6250
6251 (define_insn "neon_vabd<mode>_2"
6252 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6253 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6254 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6255 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6256 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6257 [(set (attr "type")
6258 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6259 (const_string "neon_fp_abd_s<q>")
6260 (const_string "neon_abd<q>")))]
6261 )
6262
6263 (define_insn "neon_vabd<mode>_3"
6264 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6265 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6266 (match_operand:VDQ 2 "s_register_operand" "w")]
6267 UNSPEC_VSUB)))]
6268 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6269 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6270 [(set (attr "type")
6271 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6272 (const_string "neon_fp_abd_s<q>")
6273 (const_string "neon_abd<q>")))]
6274 )
6275
6276 ;; Copy from core-to-neon regs, then extend, not vice-versa
6277
6278 (define_split
6279 [(set (match_operand:DI 0 "s_register_operand" "")
6280 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6281 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6282 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6283 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6284 {
6285 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6286 })
6287
6288 (define_split
6289 [(set (match_operand:DI 0 "s_register_operand" "")
6290 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6291 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6292 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6293 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6294 {
6295 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6296 })
6297
6298 (define_split
6299 [(set (match_operand:DI 0 "s_register_operand" "")
6300 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6301 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6302 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6303 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6304 {
6305 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6306 })
6307
6308 (define_split
6309 [(set (match_operand:DI 0 "s_register_operand" "")
6310 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6311 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6312 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6313 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6314 {
6315 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6316 })
6317
6318 (define_split
6319 [(set (match_operand:DI 0 "s_register_operand" "")
6320 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6321 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6322 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6323 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6324 {
6325 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6326 })
6327
6328 (define_split
6329 [(set (match_operand:DI 0 "s_register_operand" "")
6330 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6331 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6332 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6333 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6334 {
6335 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6336 })