Remove dead pattern on ARM.
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
144 {
145 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ())
149 {
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 }
168 })
169
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
173 "TARGET_NEON
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
176 {
177 switch (which_alternative)
178 {
179 case 0: return "#";
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
182 }
183 }
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187 (define_split
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
193 {
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
196 rtx dest[2], src[2];
197
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203 neon_disambiguate_copy (operands, dest, src, 2);
204 })
205
206 (define_split
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
212 {
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
215 rtx dest[2], src[2];
216
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222 neon_disambiguate_copy (operands, dest, src, 2);
223 })
224
225 (define_split
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
232 {
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
235 rtx dest[3], src[3];
236
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244 neon_disambiguate_copy (operands, dest, src, 3);
245 })
246
247 (define_split
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
255 {
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
258 rtx dest[4], src[4];
259
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269 neon_disambiguate_copy (operands, dest, src, 4);
270 })
271
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277 {
278 rtx adjust_mem;
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
285
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
288 else
289 adjust_mem = operands[0];
290
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295 })
296
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308 " Um")]
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
313
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
321
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325 " Um")]
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
330
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333 (vec_merge:VD_LANE
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
338 "TARGET_NEON"
339 {
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
344
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347 else
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349 }
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354 (vec_merge:VQ2
355 (vec_duplicate:VQ2
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
359 "TARGET_NEON"
360 {
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
366
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
369
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
372
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375 else
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377 }
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379 )
380
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383 (vec_merge:V2DI
384 (vec_duplicate:V2DI
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
388 "TARGET_NEON"
389 {
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
392
393 operands[0] = gen_rtx_REG (DImode, regno);
394
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
397 else
398 return "vmov\t%P0, %Q1, %R1";
399 }
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401 )
402
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
407 "TARGET_NEON"
408 {
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
412 DONE;
413 })
414
415 (define_insn "vec_extract<mode><V_elem_l>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (vec_select:<V_elem>
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420 "TARGET_NEON"
421 {
422 if (BYTES_BIG_ENDIAN)
423 {
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
427 }
428
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 else
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 }
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435 )
436
437 (define_insn "vec_extract<mode><V_elem_l>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439 (vec_select:<V_elem>
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442 "TARGET_NEON"
443 {
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
448
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
451
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
454
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457 else
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459 }
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461 )
462
463 (define_insn "vec_extractv2didi"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465 (vec_select:DI
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468 "TARGET_NEON"
469 {
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472 operands[1] = gen_rtx_REG (DImode, regno);
473
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
476 else
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
478 }
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480 )
481
482 (define_expand "vec_init<mode><V_elem_l>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
485 "TARGET_NEON"
486 {
487 neon_expand_vector_init (operands[0], operands[1]);
488 DONE;
489 })
490
491 ;; Doubleword and quadword arithmetic.
492
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
495
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502 [(set (attr "type")
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
506 )
507
508 ;; As with SFmode, full support for HFmode vector arithmetic is only available
509 ;; when flag-unsafe-math-optimizations is enabled.
510
511 (define_insn "add<mode>3"
512 [(set
513 (match_operand:VH 0 "s_register_operand" "=w")
514 (plus:VH
515 (match_operand:VH 1 "s_register_operand" "w")
516 (match_operand:VH 2 "s_register_operand" "w")))]
517 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
518 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
519 [(set (attr "type")
520 (if_then_else (match_test "<Is_float_mode>")
521 (const_string "neon_fp_addsub_s<q>")
522 (const_string "neon_add<q>")))]
523 )
524
525 (define_insn "add<mode>3_fp16"
526 [(set
527 (match_operand:VH 0 "s_register_operand" "=w")
528 (plus:VH
529 (match_operand:VH 1 "s_register_operand" "w")
530 (match_operand:VH 2 "s_register_operand" "w")))]
531 "TARGET_NEON_FP16INST"
532 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
533 [(set (attr "type")
534 (if_then_else (match_test "<Is_float_mode>")
535 (const_string "neon_fp_addsub_s<q>")
536 (const_string "neon_add<q>")))]
537 )
538
539 (define_insn "adddi3_neon"
540 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
541 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
542 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
543 (clobber (reg:CC CC_REGNUM))]
544 "TARGET_NEON"
545 {
546 switch (which_alternative)
547 {
548 case 0: /* fall through */
549 case 3: return "vadd.i64\t%P0, %P1, %P2";
550 case 1: return "#";
551 case 2: return "#";
552 case 4: return "#";
553 case 5: return "#";
554 case 6: return "#";
555 default: gcc_unreachable ();
556 }
557 }
558 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
559 multiple,multiple,multiple")
560 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
561 (set_attr "length" "*,8,8,*,8,8,8")
562 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
563 )
564
565 (define_insn "*sub<mode>3_neon"
566 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
567 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
568 (match_operand:VDQ 2 "s_register_operand" "w")))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_addsub_s<q>")
574 (const_string "neon_sub<q>")))]
575 )
576
577 (define_insn "sub<mode>3"
578 [(set
579 (match_operand:VH 0 "s_register_operand" "=w")
580 (minus:VH
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
586 )
587
588 (define_insn "sub<mode>3_fp16"
589 [(set
590 (match_operand:VH 0 "s_register_operand" "=w")
591 (minus:VH
592 (match_operand:VH 1 "s_register_operand" "w")
593 (match_operand:VH 2 "s_register_operand" "w")))]
594 "TARGET_NEON_FP16INST"
595 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
596 [(set_attr "type" "neon_sub<q>")]
597 )
598
599 (define_insn "subdi3_neon"
600 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
601 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
602 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
603 (clobber (reg:CC CC_REGNUM))]
604 "TARGET_NEON"
605 {
606 switch (which_alternative)
607 {
608 case 0: /* fall through */
609 case 4: return "vsub.i64\t%P0, %P1, %P2";
610 case 1: /* fall through */
611 case 2: /* fall through */
612 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
613 default: gcc_unreachable ();
614 }
615 }
616 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
617 (set_attr "conds" "*,clob,clob,clob,*")
618 (set_attr "length" "*,8,8,8,*")
619 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
620 )
621
622 (define_insn "*mul<mode>3_neon"
623 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
624 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
625 (match_operand:VDQW 2 "s_register_operand" "w")))]
626 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
627 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
628 [(set (attr "type")
629 (if_then_else (match_test "<Is_float_mode>")
630 (const_string "neon_fp_mul_s<q>")
631 (const_string "neon_mul_<V_elem_ch><q>")))]
632 )
633
634 (define_insn "mul<mode>3add<mode>_neon"
635 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
636 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
637 (match_operand:VDQW 3 "s_register_operand" "w"))
638 (match_operand:VDQW 1 "s_register_operand" "0")))]
639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
640 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
641 [(set (attr "type")
642 (if_then_else (match_test "<Is_float_mode>")
643 (const_string "neon_fp_mla_s<q>")
644 (const_string "neon_mla_<V_elem_ch><q>")))]
645 )
646
647 (define_insn "mul<mode>3add<mode>_neon"
648 [(set (match_operand:VH 0 "s_register_operand" "=w")
649 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
650 (match_operand:VH 3 "s_register_operand" "w"))
651 (match_operand:VH 1 "s_register_operand" "0")))]
652 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
655 )
656
657 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
660 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
661 (match_operand:VDQW 3 "s_register_operand" "w"))))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
664 [(set (attr "type")
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
668 )
669
670 ;; Fused multiply-accumulate
671 ;; We define each insn twice here:
672 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
673 ;; to be able to use when converting to FMA.
674 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
675 (define_insn "fma<VCVTF:mode>4"
676 [(set (match_operand:VCVTF 0 "register_operand" "=w")
677 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678 (match_operand:VCVTF 2 "register_operand" "w")
679 (match_operand:VCVTF 3 "register_operand" "0")))]
680 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
681 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682 [(set_attr "type" "neon_fp_mla_s<q>")]
683 )
684
685 (define_insn "fma<VCVTF:mode>4_intrinsic"
686 [(set (match_operand:VCVTF 0 "register_operand" "=w")
687 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
688 (match_operand:VCVTF 2 "register_operand" "w")
689 (match_operand:VCVTF 3 "register_operand" "0")))]
690 "TARGET_NEON && TARGET_FMA"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
693 )
694
695 (define_insn "fma<VH:mode>4"
696 [(set (match_operand:VH 0 "register_operand" "=w")
697 (fma:VH
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
704 )
705
706 (define_insn "fma<VH:mode>4_intrinsic"
707 [(set (match_operand:VH 0 "register_operand" "=w")
708 (fma:VH
709 (match_operand:VH 1 "register_operand" "w")
710 (match_operand:VH 2 "register_operand" "w")
711 (match_operand:VH 3 "register_operand" "0")))]
712 "TARGET_NEON_FP16INST"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
715 )
716
717 (define_insn "*fmsub<VCVTF:mode>4"
718 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
725 )
726
727 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
728 [(set (match_operand:VCVTF 0 "register_operand" "=w")
729 (fma:VCVTF
730 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
731 (match_operand:VCVTF 2 "register_operand" "w")
732 (match_operand:VCVTF 3 "register_operand" "0")))]
733 "TARGET_NEON && TARGET_FMA"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
736 )
737
738 (define_insn "fmsub<VH:mode>4_intrinsic"
739 [(set (match_operand:VH 0 "register_operand" "=w")
740 (fma:VH
741 (neg:VH (match_operand:VH 1 "register_operand" "w"))
742 (match_operand:VH 2 "register_operand" "w")
743 (match_operand:VH 3 "register_operand" "0")))]
744 "TARGET_NEON_FP16INST"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
747 )
748
749 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
750 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
751 (unspec:VCVTF [(match_operand:VCVTF 1
752 "s_register_operand" "w")]
753 NEON_VRINT))]
754 "TARGET_NEON && TARGET_VFP5"
755 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
756 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
757 )
758
759 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
760 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
761 (FIXUORS:<V_cmp_result> (unspec:VCVTF
762 [(match_operand:VCVTF 1 "register_operand" "w")]
763 NEON_VCVT)))]
764 "TARGET_NEON && TARGET_VFP5"
765 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
766 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
767 (set_attr "predicable" "no")]
768 )
769
770 (define_insn "ior<mode>3"
771 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
772 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
773 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
774 "TARGET_NEON"
775 {
776 switch (which_alternative)
777 {
778 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
779 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
780 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
781 default: gcc_unreachable ();
782 }
783 }
784 [(set_attr "type" "neon_logic<q>")]
785 )
786
787 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
788 ;; vorr. We support the pseudo-instruction vand instead, because that
789 ;; corresponds to the canonical form the middle-end expects to use for
790 ;; immediate bitwise-ANDs.
791
792 (define_insn "and<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
796 "TARGET_NEON"
797 {
798 switch (which_alternative)
799 {
800 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vand", &operands[2],
802 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
804 }
805 }
806 [(set_attr "type" "neon_logic<q>")]
807 )
808
809 (define_insn "orn<mode>3_neon"
810 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
811 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
812 (match_operand:VDQ 1 "s_register_operand" "w")))]
813 "TARGET_NEON"
814 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
815 [(set_attr "type" "neon_logic<q>")]
816 )
817
818 ;; TODO: investigate whether we should disable
819 ;; this and bicdi3_neon for the A8 in line with the other
820 ;; changes above.
821 (define_insn_and_split "orndi3_neon"
822 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
823 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
824 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
825 "TARGET_NEON"
826 "@
827 vorn\t%P0, %P1, %P2
828 #
829 #
830 #"
831 "reload_completed &&
832 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
833 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
834 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
835 "
836 {
837 if (TARGET_THUMB2)
838 {
839 operands[3] = gen_highpart (SImode, operands[0]);
840 operands[0] = gen_lowpart (SImode, operands[0]);
841 operands[4] = gen_highpart (SImode, operands[2]);
842 operands[2] = gen_lowpart (SImode, operands[2]);
843 operands[5] = gen_highpart (SImode, operands[1]);
844 operands[1] = gen_lowpart (SImode, operands[1]);
845 }
846 else
847 {
848 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
849 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
850 DONE;
851 }
852 }"
853 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
854 (set_attr "length" "*,16,8,8")
855 (set_attr "arch" "any,a,t2,t2")]
856 )
857
858 (define_insn "bic<mode>3_neon"
859 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
860 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
861 (match_operand:VDQ 1 "s_register_operand" "w")))]
862 "TARGET_NEON"
863 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864 [(set_attr "type" "neon_logic<q>")]
865 )
866
867 ;; Compare to *anddi_notdi_di.
868 (define_insn "bicdi3_neon"
869 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
870 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
871 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
872 "TARGET_NEON"
873 "@
874 vbic\t%P0, %P1, %P2
875 #
876 #"
877 [(set_attr "type" "neon_logic,multiple,multiple")
878 (set_attr "length" "*,8,8")]
879 )
880
881 (define_insn "xor<mode>3"
882 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
884 (match_operand:VDQ 2 "s_register_operand" "w")))]
885 "TARGET_NEON"
886 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set_attr "type" "neon_logic<q>")]
888 )
889
890 (define_insn "one_cmpl<mode>2"
891 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
892 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
893 "TARGET_NEON"
894 "vmvn\t%<V_reg>0, %<V_reg>1"
895 [(set_attr "type" "neon_move<q>")]
896 )
897
898 (define_insn "abs<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
901 "TARGET_NEON"
902 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
903 [(set (attr "type")
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_abs_s<q>")
906 (const_string "neon_abs<q>")))]
907 )
908
909 (define_insn "neg<mode>2"
910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
911 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
912 "TARGET_NEON"
913 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
914 [(set (attr "type")
915 (if_then_else (match_test "<Is_float_mode>")
916 (const_string "neon_fp_neg_s<q>")
917 (const_string "neon_neg<q>")))]
918 )
919
920 (define_insn "negdi2_neon"
921 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
922 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
923 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
924 (clobber (reg:CC CC_REGNUM))]
925 "TARGET_NEON"
926 "#"
927 [(set_attr "length" "8")
928 (set_attr "type" "multiple")]
929 )
930
931 ; Split negdi2_neon for vfp registers
932 (define_split
933 [(set (match_operand:DI 0 "s_register_operand" "")
934 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
935 (clobber (match_scratch:DI 2 ""))
936 (clobber (reg:CC CC_REGNUM))]
937 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
938 [(set (match_dup 2) (const_int 0))
939 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
940 (clobber (reg:CC CC_REGNUM))])]
941 {
942 if (!REG_P (operands[2]))
943 operands[2] = operands[0];
944 }
945 )
946
947 ; Split negdi2_neon for core registers
948 (define_split
949 [(set (match_operand:DI 0 "s_register_operand" "")
950 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
951 (clobber (match_scratch:DI 2 ""))
952 (clobber (reg:CC CC_REGNUM))]
953 "TARGET_32BIT && reload_completed
954 && arm_general_register_operand (operands[0], DImode)"
955 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
956 (clobber (reg:CC CC_REGNUM))])]
957 ""
958 )
959
960 (define_insn "<absneg_str><mode>2"
961 [(set (match_operand:VH 0 "s_register_operand" "=w")
962 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
963 "TARGET_NEON_FP16INST"
964 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
965 [(set_attr "type" "neon_abs<q>")]
966 )
967
968 (define_expand "neon_v<absneg_str><mode>"
969 [(set
970 (match_operand:VH 0 "s_register_operand")
971 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
972 "TARGET_NEON_FP16INST"
973 {
974 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
975 DONE;
976 })
977
978 (define_insn "neon_v<fp16_rnd_str><mode>"
979 [(set (match_operand:VH 0 "s_register_operand" "=w")
980 (unspec:VH
981 [(match_operand:VH 1 "s_register_operand" "w")]
982 FP16_RND))]
983 "TARGET_NEON_FP16INST"
984 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
985 [(set_attr "type" "neon_fp_round_s<q>")]
986 )
987
988 (define_insn "neon_vrsqrte<mode>"
989 [(set (match_operand:VH 0 "s_register_operand" "=w")
990 (unspec:VH
991 [(match_operand:VH 1 "s_register_operand" "w")]
992 UNSPEC_VRSQRTE))]
993 "TARGET_NEON_FP16INST"
994 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
995 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
996 )
997
998 (define_insn "*umin<mode>3_neon"
999 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1000 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1001 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1002 "TARGET_NEON"
1003 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004 [(set_attr "type" "neon_minmax<q>")]
1005 )
1006
1007 (define_insn "*umax<mode>3_neon"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1009 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1010 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1011 "TARGET_NEON"
1012 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1013 [(set_attr "type" "neon_minmax<q>")]
1014 )
1015
1016 (define_insn "*smin<mode>3_neon"
1017 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1018 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1019 (match_operand:VDQW 2 "s_register_operand" "w")))]
1020 "TARGET_NEON"
1021 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1022 [(set (attr "type")
1023 (if_then_else (match_test "<Is_float_mode>")
1024 (const_string "neon_fp_minmax_s<q>")
1025 (const_string "neon_minmax<q>")))]
1026 )
1027
1028 (define_insn "*smax<mode>3_neon"
1029 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1030 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1031 (match_operand:VDQW 2 "s_register_operand" "w")))]
1032 "TARGET_NEON"
1033 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1034 [(set (attr "type")
1035 (if_then_else (match_test "<Is_float_mode>")
1036 (const_string "neon_fp_minmax_s<q>")
1037 (const_string "neon_minmax<q>")))]
1038 )
1039
1040 ; TODO: V2DI shifts are current disabled because there are bugs in the
1041 ; generic vectorizer code. It ends up creating a V2DI constructor with
1042 ; SImode elements.
1043
1044 (define_insn "vashl<mode>3"
1045 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1046 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1047 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1048 "TARGET_NEON"
1049 {
1050 switch (which_alternative)
1051 {
1052 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1053 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1054 <MODE>mode,
1055 VALID_NEON_QREG_MODE (<MODE>mode),
1056 true);
1057 default: gcc_unreachable ();
1058 }
1059 }
1060 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1061 )
1062
1063 (define_insn "vashr<mode>3_imm"
1064 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1065 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1066 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1067 "TARGET_NEON"
1068 {
1069 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1070 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1071 false);
1072 }
1073 [(set_attr "type" "neon_shift_imm<q>")]
1074 )
1075
1076 (define_insn "vlshr<mode>3_imm"
1077 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1078 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1079 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1080 "TARGET_NEON"
1081 {
1082 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1083 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1084 false);
1085 }
1086 [(set_attr "type" "neon_shift_imm<q>")]
1087 )
1088
1089 ; Used for implementing logical shift-right, which is a left-shift by a negative
1090 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1091 ; above, but using an unspec in case GCC tries anything tricky with negative
1092 ; shift amounts.
1093
1094 (define_insn "ashl<mode>3_signed"
1095 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1096 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1097 (match_operand:VDQI 2 "s_register_operand" "w")]
1098 UNSPEC_ASHIFT_SIGNED))]
1099 "TARGET_NEON"
1100 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1101 [(set_attr "type" "neon_shift_reg<q>")]
1102 )
1103
1104 ; Used for implementing logical shift-right, which is a left-shift by a negative
1105 ; amount, with unsigned operands.
1106
1107 (define_insn "ashl<mode>3_unsigned"
1108 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1109 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1110 (match_operand:VDQI 2 "s_register_operand" "w")]
1111 UNSPEC_ASHIFT_UNSIGNED))]
1112 "TARGET_NEON"
1113 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1114 [(set_attr "type" "neon_shift_reg<q>")]
1115 )
1116
1117 (define_expand "vashr<mode>3"
1118 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1119 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1120 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1121 "TARGET_NEON"
1122 {
1123 if (s_register_operand (operands[2], <MODE>mode))
1124 {
1125 rtx neg = gen_reg_rtx (<MODE>mode);
1126 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1127 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1128 }
1129 else
1130 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1131 DONE;
1132 })
1133
1134 (define_expand "vlshr<mode>3"
1135 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1136 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1137 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1138 "TARGET_NEON"
1139 {
1140 if (s_register_operand (operands[2], <MODE>mode))
1141 {
1142 rtx neg = gen_reg_rtx (<MODE>mode);
1143 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1144 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1145 }
1146 else
1147 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1148 DONE;
1149 })
1150
1151 ;; 64-bit shifts
1152
1153 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1154 ;; leaving the upper half uninitalized. This is OK since the shift
1155 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1156 ;; data flow analysis however, we pretend the full register is set
1157 ;; using an unspec.
1158 (define_insn "neon_load_count"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1161 UNSPEC_LOAD_COUNT))]
1162 "TARGET_NEON"
1163 "@
1164 vld1.32\t{%P0[0]}, %A1
1165 vmov.32\t%P0[0], %1"
1166 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1167 )
1168
1169 (define_insn "ashldi3_neon_noclobber"
1170 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1171 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1172 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1173 "TARGET_NEON && reload_completed
1174 && (!CONST_INT_P (operands[2])
1175 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1176 "@
1177 vshl.u64\t%P0, %P1, %2
1178 vshl.u64\t%P0, %P1, %P2"
1179 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1180 )
1181
1182 (define_insn_and_split "ashldi3_neon"
1183 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1184 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1185 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1186 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1187 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1188 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1189 (clobber (reg:CC_C CC_REGNUM))]
1190 "TARGET_NEON"
1191 "#"
1192 "TARGET_NEON && reload_completed"
1193 [(const_int 0)]
1194 "
1195 {
1196 if (IS_VFP_REGNUM (REGNO (operands[0])))
1197 {
1198 if (CONST_INT_P (operands[2]))
1199 {
1200 if (INTVAL (operands[2]) < 1)
1201 {
1202 emit_insn (gen_movdi (operands[0], operands[1]));
1203 DONE;
1204 }
1205 else if (INTVAL (operands[2]) > 63)
1206 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1207 }
1208 else
1209 {
1210 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1211 operands[2] = operands[5];
1212 }
1213
1214 /* Ditch the unnecessary clobbers. */
1215 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1216 operands[2]));
1217 }
1218 else
1219 {
1220 /* The shift expanders support either full overlap or no overlap. */
1221 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1222 || REGNO (operands[0]) == REGNO (operands[1]));
1223
1224 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1225 operands[2], operands[3], operands[4]);
1226 }
1227 DONE;
1228 }"
1229 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1230 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1231 (set_attr "type" "multiple")]
1232 )
1233
1234 ; The shift amount needs to be negated for right-shifts
1235 (define_insn "signed_shift_di3_neon"
1236 [(set (match_operand:DI 0 "s_register_operand" "=w")
1237 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1238 (match_operand:DI 2 "s_register_operand" " w")]
1239 UNSPEC_ASHIFT_SIGNED))]
1240 "TARGET_NEON && reload_completed"
1241 "vshl.s64\t%P0, %P1, %P2"
1242 [(set_attr "type" "neon_shift_reg")]
1243 )
1244
1245 ; The shift amount needs to be negated for right-shifts
1246 (define_insn "unsigned_shift_di3_neon"
1247 [(set (match_operand:DI 0 "s_register_operand" "=w")
1248 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1249 (match_operand:DI 2 "s_register_operand" " w")]
1250 UNSPEC_ASHIFT_UNSIGNED))]
1251 "TARGET_NEON && reload_completed"
1252 "vshl.u64\t%P0, %P1, %P2"
1253 [(set_attr "type" "neon_shift_reg")]
1254 )
1255
1256 (define_insn "ashrdi3_neon_imm_noclobber"
1257 [(set (match_operand:DI 0 "s_register_operand" "=w")
1258 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1259 (match_operand:DI 2 "const_int_operand" " i")))]
1260 "TARGET_NEON && reload_completed
1261 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1262 "vshr.s64\t%P0, %P1, %2"
1263 [(set_attr "type" "neon_shift_imm")]
1264 )
1265
1266 (define_insn "lshrdi3_neon_imm_noclobber"
1267 [(set (match_operand:DI 0 "s_register_operand" "=w")
1268 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1269 (match_operand:DI 2 "const_int_operand" " i")))]
1270 "TARGET_NEON && reload_completed
1271 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1272 "vshr.u64\t%P0, %P1, %2"
1273 [(set_attr "type" "neon_shift_imm")]
1274 )
1275
1276 ;; ashrdi3_neon
1277 ;; lshrdi3_neon
1278 (define_insn_and_split "<shift>di3_neon"
1279 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1280 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1281 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1282 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1283 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1284 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1285 (clobber (reg:CC CC_REGNUM))]
1286 "TARGET_NEON"
1287 "#"
1288 "TARGET_NEON && reload_completed"
1289 [(const_int 0)]
1290 "
1291 {
1292 if (IS_VFP_REGNUM (REGNO (operands[0])))
1293 {
1294 if (CONST_INT_P (operands[2]))
1295 {
1296 if (INTVAL (operands[2]) < 1)
1297 {
1298 emit_insn (gen_movdi (operands[0], operands[1]));
1299 DONE;
1300 }
1301 else if (INTVAL (operands[2]) > 64)
1302 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1303
1304 /* Ditch the unnecessary clobbers. */
1305 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1306 operands[1],
1307 operands[2]));
1308 }
1309 else
1310 {
1311 /* We must use a negative left-shift. */
1312 emit_insn (gen_negsi2 (operands[3], operands[2]));
1313 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1314 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1315 operands[5]));
1316 }
1317 }
1318 else
1319 {
1320 /* The shift expanders support either full overlap or no overlap. */
1321 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1322 || REGNO (operands[0]) == REGNO (operands[1]));
1323
1324 /* This clobbers CC (ASHIFTRT by register only). */
1325 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1326 operands[2], operands[3], operands[4]);
1327 }
1328
1329 DONE;
1330 }"
1331 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1332 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1333 (set_attr "type" "multiple")]
1334 )
1335
1336 ;; Widening operations
1337
1338 (define_expand "widen_ssum<mode>3"
1339 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1340 (plus:<V_double_width>
1341 (sign_extend:<V_double_width>
1342 (match_operand:VQI 1 "s_register_operand" ""))
1343 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1344 "TARGET_NEON"
1345 {
1346 machine_mode mode = GET_MODE (operands[1]);
1347 rtx p1, p2;
1348
1349 p1 = arm_simd_vect_par_cnst_half (mode, false);
1350 p2 = arm_simd_vect_par_cnst_half (mode, true);
1351
1352 if (operands[0] != operands[2])
1353 emit_move_insn (operands[0], operands[2]);
1354
1355 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1356 operands[1],
1357 p1,
1358 operands[0]));
1359 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1360 operands[1],
1361 p2,
1362 operands[0]));
1363 DONE;
1364 }
1365 )
1366
1367 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1368 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1369 (plus:<V_double_width>
1370 (sign_extend:<V_double_width>
1371 (vec_select:<V_HALF>
1372 (match_operand:VQI 1 "s_register_operand" "%w")
1373 (match_operand:VQI 2 "vect_par_constant_low" "")))
1374 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1375 "TARGET_NEON"
1376 {
1377 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1378 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1379 }
1380 [(set_attr "type" "neon_add_widen")])
1381
1382 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1383 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1384 (plus:<V_double_width>
1385 (sign_extend:<V_double_width>
1386 (vec_select:<V_HALF>
1387 (match_operand:VQI 1 "s_register_operand" "%w")
1388 (match_operand:VQI 2 "vect_par_constant_high" "")))
1389 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1390 "TARGET_NEON"
1391 {
1392 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1393 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1394 }
1395 [(set_attr "type" "neon_add_widen")])
1396
1397 (define_insn "widen_ssum<mode>3"
1398 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1399 (plus:<V_widen>
1400 (sign_extend:<V_widen>
1401 (match_operand:VW 1 "s_register_operand" "%w"))
1402 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1403 "TARGET_NEON"
1404 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1405 [(set_attr "type" "neon_add_widen")]
1406 )
1407
1408 (define_expand "widen_usum<mode>3"
1409 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1410 (plus:<V_double_width>
1411 (zero_extend:<V_double_width>
1412 (match_operand:VQI 1 "s_register_operand" ""))
1413 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1414 "TARGET_NEON"
1415 {
1416 machine_mode mode = GET_MODE (operands[1]);
1417 rtx p1, p2;
1418
1419 p1 = arm_simd_vect_par_cnst_half (mode, false);
1420 p2 = arm_simd_vect_par_cnst_half (mode, true);
1421
1422 if (operands[0] != operands[2])
1423 emit_move_insn (operands[0], operands[2]);
1424
1425 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1426 operands[1],
1427 p1,
1428 operands[0]));
1429 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1430 operands[1],
1431 p2,
1432 operands[0]));
1433 DONE;
1434 }
1435 )
1436
1437 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1438 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1439 (plus:<V_double_width>
1440 (zero_extend:<V_double_width>
1441 (vec_select:<V_HALF>
1442 (match_operand:VQI 1 "s_register_operand" "%w")
1443 (match_operand:VQI 2 "vect_par_constant_low" "")))
1444 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1445 "TARGET_NEON"
1446 {
1447 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1448 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1449 }
1450 [(set_attr "type" "neon_add_widen")])
1451
1452 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1453 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1454 (plus:<V_double_width>
1455 (zero_extend:<V_double_width>
1456 (vec_select:<V_HALF>
1457 (match_operand:VQI 1 "s_register_operand" "%w")
1458 (match_operand:VQI 2 "vect_par_constant_high" "")))
1459 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1460 "TARGET_NEON"
1461 {
1462 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1463 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1464 }
1465 [(set_attr "type" "neon_add_widen")])
1466
1467 (define_insn "widen_usum<mode>3"
1468 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1469 (plus:<V_widen> (zero_extend:<V_widen>
1470 (match_operand:VW 1 "s_register_operand" "%w"))
1471 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1472 "TARGET_NEON"
1473 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1474 [(set_attr "type" "neon_add_widen")]
1475 )
1476
1477 ;; Helpers for quad-word reduction operations
1478
1479 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1480 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1481 ; N/2-element vector.
1482
1483 (define_insn "quad_halves_<code>v4si"
1484 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1485 (VQH_OPS:V2SI
1486 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1487 (parallel [(const_int 0) (const_int 1)]))
1488 (vec_select:V2SI (match_dup 1)
1489 (parallel [(const_int 2) (const_int 3)]))))]
1490 "TARGET_NEON"
1491 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1492 [(set_attr "vqh_mnem" "<VQH_mnem>")
1493 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1494 )
1495
1496 (define_insn "quad_halves_<code>v4sf"
1497 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1498 (VQHS_OPS:V2SF
1499 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1500 (parallel [(const_int 0) (const_int 1)]))
1501 (vec_select:V2SF (match_dup 1)
1502 (parallel [(const_int 2) (const_int 3)]))))]
1503 "TARGET_NEON && flag_unsafe_math_optimizations"
1504 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1505 [(set_attr "vqh_mnem" "<VQH_mnem>")
1506 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1507 )
1508
1509 (define_insn "quad_halves_<code>v8hi"
1510 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1511 (VQH_OPS:V4HI
1512 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1513 (parallel [(const_int 0) (const_int 1)
1514 (const_int 2) (const_int 3)]))
1515 (vec_select:V4HI (match_dup 1)
1516 (parallel [(const_int 4) (const_int 5)
1517 (const_int 6) (const_int 7)]))))]
1518 "TARGET_NEON"
1519 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1520 [(set_attr "vqh_mnem" "<VQH_mnem>")
1521 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1522 )
1523
1524 (define_insn "quad_halves_<code>v16qi"
1525 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1526 (VQH_OPS:V8QI
1527 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1528 (parallel [(const_int 0) (const_int 1)
1529 (const_int 2) (const_int 3)
1530 (const_int 4) (const_int 5)
1531 (const_int 6) (const_int 7)]))
1532 (vec_select:V8QI (match_dup 1)
1533 (parallel [(const_int 8) (const_int 9)
1534 (const_int 10) (const_int 11)
1535 (const_int 12) (const_int 13)
1536 (const_int 14) (const_int 15)]))))]
1537 "TARGET_NEON"
1538 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1539 [(set_attr "vqh_mnem" "<VQH_mnem>")
1540 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1541 )
1542
1543 (define_expand "move_hi_quad_<mode>"
1544 [(match_operand:ANY128 0 "s_register_operand" "")
1545 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1546 "TARGET_NEON"
1547 {
1548 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1549 GET_MODE_SIZE (<V_HALF>mode)),
1550 operands[1]);
1551 DONE;
1552 })
1553
1554 (define_expand "move_lo_quad_<mode>"
1555 [(match_operand:ANY128 0 "s_register_operand" "")
1556 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1557 "TARGET_NEON"
1558 {
1559 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1560 <MODE>mode, 0),
1561 operands[1]);
1562 DONE;
1563 })
1564
1565 ;; Reduction operations
1566
1567 (define_expand "reduc_plus_scal_<mode>"
1568 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1569 (match_operand:VD 1 "s_register_operand" "")]
1570 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1571 {
1572 rtx vec = gen_reg_rtx (<MODE>mode);
1573 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1574 &gen_neon_vpadd_internal<mode>);
1575 /* The same result is actually computed into every element. */
1576 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1577 DONE;
1578 })
1579
1580 (define_expand "reduc_plus_scal_<mode>"
1581 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1582 (match_operand:VQ 1 "s_register_operand" "")]
1583 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1584 && !BYTES_BIG_ENDIAN"
1585 {
1586 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1587
1588 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1589 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1590
1591 DONE;
1592 })
1593
1594 (define_expand "reduc_plus_scal_v2di"
1595 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1596 (match_operand:V2DI 1 "s_register_operand" "")]
1597 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1598 {
1599 rtx vec = gen_reg_rtx (V2DImode);
1600
1601 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1602 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1603
1604 DONE;
1605 })
1606
1607 (define_insn "arm_reduc_plus_internal_v2di"
1608 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1609 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1610 UNSPEC_VPADD))]
1611 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1612 "vadd.i64\t%e0, %e1, %f1"
1613 [(set_attr "type" "neon_add_q")]
1614 )
1615
1616 (define_expand "reduc_smin_scal_<mode>"
1617 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1618 (match_operand:VD 1 "s_register_operand" "")]
1619 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1620 {
1621 rtx vec = gen_reg_rtx (<MODE>mode);
1622
1623 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1624 &gen_neon_vpsmin<mode>);
1625 /* The result is computed into every element of the vector. */
1626 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1627 DONE;
1628 })
1629
1630 (define_expand "reduc_smin_scal_<mode>"
1631 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1632 (match_operand:VQ 1 "s_register_operand" "")]
1633 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1634 && !BYTES_BIG_ENDIAN"
1635 {
1636 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1637
1638 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1639 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1640
1641 DONE;
1642 })
1643
1644 (define_expand "reduc_smax_scal_<mode>"
1645 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1646 (match_operand:VD 1 "s_register_operand" "")]
1647 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1648 {
1649 rtx vec = gen_reg_rtx (<MODE>mode);
1650 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1651 &gen_neon_vpsmax<mode>);
1652 /* The result is computed into every element of the vector. */
1653 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1654 DONE;
1655 })
1656
1657 (define_expand "reduc_smax_scal_<mode>"
1658 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1659 (match_operand:VQ 1 "s_register_operand" "")]
1660 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1661 && !BYTES_BIG_ENDIAN"
1662 {
1663 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1664
1665 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1666 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1667
1668 DONE;
1669 })
1670
1671 (define_expand "reduc_umin_scal_<mode>"
1672 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1673 (match_operand:VDI 1 "s_register_operand" "")]
1674 "TARGET_NEON"
1675 {
1676 rtx vec = gen_reg_rtx (<MODE>mode);
1677 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1678 &gen_neon_vpumin<mode>);
1679 /* The result is computed into every element of the vector. */
1680 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1681 DONE;
1682 })
1683
1684 (define_expand "reduc_umin_scal_<mode>"
1685 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1686 (match_operand:VQI 1 "s_register_operand" "")]
1687 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1688 {
1689 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1690
1691 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1692 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1693
1694 DONE;
1695 })
1696
1697 (define_expand "reduc_umax_scal_<mode>"
1698 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1699 (match_operand:VDI 1 "s_register_operand" "")]
1700 "TARGET_NEON"
1701 {
1702 rtx vec = gen_reg_rtx (<MODE>mode);
1703 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1704 &gen_neon_vpumax<mode>);
1705 /* The result is computed into every element of the vector. */
1706 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1707 DONE;
1708 })
1709
1710 (define_expand "reduc_umax_scal_<mode>"
1711 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1712 (match_operand:VQI 1 "s_register_operand" "")]
1713 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1714 {
1715 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1716
1717 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1718 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1719
1720 DONE;
1721 })
1722
1723 (define_insn "neon_vpadd_internal<mode>"
1724 [(set (match_operand:VD 0 "s_register_operand" "=w")
1725 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1726 (match_operand:VD 2 "s_register_operand" "w")]
1727 UNSPEC_VPADD))]
1728 "TARGET_NEON"
1729 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1730 ;; Assume this schedules like vadd.
1731 [(set (attr "type")
1732 (if_then_else (match_test "<Is_float_mode>")
1733 (const_string "neon_fp_reduc_add_s<q>")
1734 (const_string "neon_reduc_add<q>")))]
1735 )
1736
1737 (define_insn "neon_vpaddv4hf"
1738 [(set
1739 (match_operand:V4HF 0 "s_register_operand" "=w")
1740 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1741 (match_operand:V4HF 2 "s_register_operand" "w")]
1742 UNSPEC_VPADD))]
1743 "TARGET_NEON_FP16INST"
1744 "vpadd.f16\t%P0, %P1, %P2"
1745 [(set_attr "type" "neon_reduc_add")]
1746 )
1747
1748 (define_insn "neon_vpsmin<mode>"
1749 [(set (match_operand:VD 0 "s_register_operand" "=w")
1750 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1751 (match_operand:VD 2 "s_register_operand" "w")]
1752 UNSPEC_VPSMIN))]
1753 "TARGET_NEON"
1754 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1755 [(set (attr "type")
1756 (if_then_else (match_test "<Is_float_mode>")
1757 (const_string "neon_fp_reduc_minmax_s<q>")
1758 (const_string "neon_reduc_minmax<q>")))]
1759 )
1760
1761 (define_insn "neon_vpsmax<mode>"
1762 [(set (match_operand:VD 0 "s_register_operand" "=w")
1763 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1764 (match_operand:VD 2 "s_register_operand" "w")]
1765 UNSPEC_VPSMAX))]
1766 "TARGET_NEON"
1767 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1768 [(set (attr "type")
1769 (if_then_else (match_test "<Is_float_mode>")
1770 (const_string "neon_fp_reduc_minmax_s<q>")
1771 (const_string "neon_reduc_minmax<q>")))]
1772 )
1773
1774 (define_insn "neon_vpumin<mode>"
1775 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1776 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1777 (match_operand:VDI 2 "s_register_operand" "w")]
1778 UNSPEC_VPUMIN))]
1779 "TARGET_NEON"
1780 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1781 [(set_attr "type" "neon_reduc_minmax<q>")]
1782 )
1783
1784 (define_insn "neon_vpumax<mode>"
1785 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1786 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1787 (match_operand:VDI 2 "s_register_operand" "w")]
1788 UNSPEC_VPUMAX))]
1789 "TARGET_NEON"
1790 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1791 [(set_attr "type" "neon_reduc_minmax<q>")]
1792 )
1793
1794 ;; Saturating arithmetic
1795
1796 ; NOTE: Neon supports many more saturating variants of instructions than the
1797 ; following, but these are all GCC currently understands.
1798 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1799 ; yet either, although these patterns may be used by intrinsics when they're
1800 ; added.
1801
1802 (define_insn "*ss_add<mode>_neon"
1803 [(set (match_operand:VD 0 "s_register_operand" "=w")
1804 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1805 (match_operand:VD 2 "s_register_operand" "w")))]
1806 "TARGET_NEON"
1807 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1808 [(set_attr "type" "neon_qadd<q>")]
1809 )
1810
1811 (define_insn "*us_add<mode>_neon"
1812 [(set (match_operand:VD 0 "s_register_operand" "=w")
1813 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1814 (match_operand:VD 2 "s_register_operand" "w")))]
1815 "TARGET_NEON"
1816 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1817 [(set_attr "type" "neon_qadd<q>")]
1818 )
1819
1820 (define_insn "*ss_sub<mode>_neon"
1821 [(set (match_operand:VD 0 "s_register_operand" "=w")
1822 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1823 (match_operand:VD 2 "s_register_operand" "w")))]
1824 "TARGET_NEON"
1825 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1826 [(set_attr "type" "neon_qsub<q>")]
1827 )
1828
1829 (define_insn "*us_sub<mode>_neon"
1830 [(set (match_operand:VD 0 "s_register_operand" "=w")
1831 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1832 (match_operand:VD 2 "s_register_operand" "w")))]
1833 "TARGET_NEON"
1834 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1835 [(set_attr "type" "neon_qsub<q>")]
1836 )
1837
1838 ;; Conditional instructions. These are comparisons with conditional moves for
1839 ;; vectors. They perform the assignment:
1840 ;;
1841 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1842 ;;
1843 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1844 ;; element-wise.
1845
1846 (define_expand "vcond<mode><mode>"
1847 [(set (match_operand:VDQW 0 "s_register_operand" "")
1848 (if_then_else:VDQW
1849 (match_operator 3 "comparison_operator"
1850 [(match_operand:VDQW 4 "s_register_operand" "")
1851 (match_operand:VDQW 5 "nonmemory_operand" "")])
1852 (match_operand:VDQW 1 "s_register_operand" "")
1853 (match_operand:VDQW 2 "s_register_operand" "")))]
1854 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1855 {
1856 int inverse = 0;
1857 int use_zero_form = 0;
1858 int swap_bsl_operands = 0;
1859 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1860 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1861
1862 rtx (*base_comparison) (rtx, rtx, rtx);
1863 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1864
1865 switch (GET_CODE (operands[3]))
1866 {
1867 case GE:
1868 case GT:
1869 case LE:
1870 case LT:
1871 case EQ:
1872 if (operands[5] == CONST0_RTX (<MODE>mode))
1873 {
1874 use_zero_form = 1;
1875 break;
1876 }
1877 /* Fall through. */
1878 default:
1879 if (!REG_P (operands[5]))
1880 operands[5] = force_reg (<MODE>mode, operands[5]);
1881 }
1882
1883 switch (GET_CODE (operands[3]))
1884 {
1885 case LT:
1886 case UNLT:
1887 inverse = 1;
1888 /* Fall through. */
1889 case GE:
1890 case UNGE:
1891 case ORDERED:
1892 case UNORDERED:
1893 base_comparison = gen_neon_vcge<mode>;
1894 complimentary_comparison = gen_neon_vcgt<mode>;
1895 break;
1896 case LE:
1897 case UNLE:
1898 inverse = 1;
1899 /* Fall through. */
1900 case GT:
1901 case UNGT:
1902 base_comparison = gen_neon_vcgt<mode>;
1903 complimentary_comparison = gen_neon_vcge<mode>;
1904 break;
1905 case EQ:
1906 case NE:
1907 case UNEQ:
1908 base_comparison = gen_neon_vceq<mode>;
1909 complimentary_comparison = gen_neon_vceq<mode>;
1910 break;
1911 default:
1912 gcc_unreachable ();
1913 }
1914
1915 switch (GET_CODE (operands[3]))
1916 {
1917 case LT:
1918 case LE:
1919 case GT:
1920 case GE:
1921 case EQ:
1922 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1923 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1924 a GE b -> a GE b
1925 a GT b -> a GT b
1926 a LE b -> b GE a
1927 a LT b -> b GT a
1928 a EQ b -> a EQ b
1929 Note that there also exist direct comparison against 0 forms,
1930 so catch those as a special case. */
1931 if (use_zero_form)
1932 {
1933 inverse = 0;
1934 switch (GET_CODE (operands[3]))
1935 {
1936 case LT:
1937 base_comparison = gen_neon_vclt<mode>;
1938 break;
1939 case LE:
1940 base_comparison = gen_neon_vcle<mode>;
1941 break;
1942 default:
1943 /* Do nothing, other zero form cases already have the correct
1944 base_comparison. */
1945 break;
1946 }
1947 }
1948
1949 if (!inverse)
1950 emit_insn (base_comparison (mask, operands[4], operands[5]));
1951 else
1952 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1953 break;
1954 case UNLT:
1955 case UNLE:
1956 case UNGT:
1957 case UNGE:
1958 case NE:
1959 /* Vector compare returns false for lanes which are unordered, so if we use
1960 the inverse of the comparison we actually want to emit, then
1961 swap the operands to BSL, we will end up with the correct result.
1962 Note that a NE NaN and NaN NE b are true for all a, b.
1963
1964 Our transformations are:
1965 a GE b -> !(b GT a)
1966 a GT b -> !(b GE a)
1967 a LE b -> !(a GT b)
1968 a LT b -> !(a GE b)
1969 a NE b -> !(a EQ b) */
1970
1971 if (inverse)
1972 emit_insn (base_comparison (mask, operands[4], operands[5]));
1973 else
1974 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1975
1976 swap_bsl_operands = 1;
1977 break;
1978 case UNEQ:
1979 /* We check (a > b || b > a). combining these comparisons give us
1980 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1981 will then give us (a == b || a UNORDERED b) as intended. */
1982
1983 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1984 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1985 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1986 swap_bsl_operands = 1;
1987 break;
1988 case UNORDERED:
1989 /* Operands are ORDERED iff (a > b || b >= a).
1990 Swapping the operands to BSL will give the UNORDERED case. */
1991 swap_bsl_operands = 1;
1992 /* Fall through. */
1993 case ORDERED:
1994 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1995 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1996 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1997 break;
1998 default:
1999 gcc_unreachable ();
2000 }
2001
2002 if (swap_bsl_operands)
2003 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2004 operands[1]));
2005 else
2006 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2007 operands[2]));
2008 DONE;
2009 })
2010
2011 (define_expand "vcondu<mode><mode>"
2012 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2013 (if_then_else:VDQIW
2014 (match_operator 3 "arm_comparison_operator"
2015 [(match_operand:VDQIW 4 "s_register_operand" "")
2016 (match_operand:VDQIW 5 "s_register_operand" "")])
2017 (match_operand:VDQIW 1 "s_register_operand" "")
2018 (match_operand:VDQIW 2 "s_register_operand" "")))]
2019 "TARGET_NEON"
2020 {
2021 rtx mask;
2022 int inverse = 0, immediate_zero = 0;
2023
2024 mask = gen_reg_rtx (<V_cmp_result>mode);
2025
2026 if (operands[5] == CONST0_RTX (<MODE>mode))
2027 immediate_zero = 1;
2028 else if (!REG_P (operands[5]))
2029 operands[5] = force_reg (<MODE>mode, operands[5]);
2030
2031 switch (GET_CODE (operands[3]))
2032 {
2033 case GEU:
2034 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2035 break;
2036
2037 case GTU:
2038 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2039 break;
2040
2041 case EQ:
2042 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2043 break;
2044
2045 case LEU:
2046 if (immediate_zero)
2047 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2048 else
2049 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2050 break;
2051
2052 case LTU:
2053 if (immediate_zero)
2054 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2055 else
2056 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2057 break;
2058
2059 case NE:
2060 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2061 inverse = 1;
2062 break;
2063
2064 default:
2065 gcc_unreachable ();
2066 }
2067
2068 if (inverse)
2069 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2070 operands[1]));
2071 else
2072 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2073 operands[2]));
2074
2075 DONE;
2076 })
2077
2078 ;; Patterns for builtins.
2079
2080 ; good for plain vadd, vaddq.
2081
2082 (define_expand "neon_vadd<mode>"
2083 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2084 (match_operand:VCVTF 1 "s_register_operand" "w")
2085 (match_operand:VCVTF 2 "s_register_operand" "w")]
2086 "TARGET_NEON"
2087 {
2088 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2089 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2090 else
2091 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2092 operands[2]));
2093 DONE;
2094 })
2095
2096 (define_expand "neon_vadd<mode>"
2097 [(match_operand:VH 0 "s_register_operand")
2098 (match_operand:VH 1 "s_register_operand")
2099 (match_operand:VH 2 "s_register_operand")]
2100 "TARGET_NEON_FP16INST"
2101 {
2102 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2103 DONE;
2104 })
2105
2106 (define_expand "neon_vsub<mode>"
2107 [(match_operand:VH 0 "s_register_operand")
2108 (match_operand:VH 1 "s_register_operand")
2109 (match_operand:VH 2 "s_register_operand")]
2110 "TARGET_NEON_FP16INST"
2111 {
2112 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2113 DONE;
2114 })
2115
2116 ; Note that NEON operations don't support the full IEEE 754 standard: in
2117 ; particular, denormal values are flushed to zero. This means that GCC cannot
2118 ; use those instructions for autovectorization, etc. unless
2119 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2120 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2121 ; header) must work in either case: if -funsafe-math-optimizations is given,
2122 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2123 ; expand to unspecs (which may potentially limit the extent to which they might
2124 ; be optimized by generic code).
2125
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2127
2128 (define_insn "neon_vadd<mode>_unspec"
2129 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2130 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2131 (match_operand:VCVTF 2 "s_register_operand" "w")]
2132 UNSPEC_VADD))]
2133 "TARGET_NEON"
2134 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2135 [(set (attr "type")
2136 (if_then_else (match_test "<Is_float_mode>")
2137 (const_string "neon_fp_addsub_s<q>")
2138 (const_string "neon_add<q>")))]
2139 )
2140
2141 (define_insn "neon_vaddl<sup><mode>"
2142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2143 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2144 (match_operand:VDI 2 "s_register_operand" "w")]
2145 VADDL))]
2146 "TARGET_NEON"
2147 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2148 [(set_attr "type" "neon_add_long")]
2149 )
2150
2151 (define_insn "neon_vaddw<sup><mode>"
2152 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2153 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2154 (match_operand:VDI 2 "s_register_operand" "w")]
2155 VADDW))]
2156 "TARGET_NEON"
2157 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2158 [(set_attr "type" "neon_add_widen")]
2159 )
2160
2161 ; vhadd and vrhadd.
2162
2163 (define_insn "neon_v<r>hadd<sup><mode>"
2164 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2165 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2166 (match_operand:VDQIW 2 "s_register_operand" "w")]
2167 VHADD))]
2168 "TARGET_NEON"
2169 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2170 [(set_attr "type" "neon_add_halve_q")]
2171 )
2172
2173 (define_insn "neon_vqadd<sup><mode>"
2174 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2175 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2176 (match_operand:VDQIX 2 "s_register_operand" "w")]
2177 VQADD))]
2178 "TARGET_NEON"
2179 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2180 [(set_attr "type" "neon_qadd<q>")]
2181 )
2182
2183 (define_insn "neon_v<r>addhn<mode>"
2184 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2185 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2186 (match_operand:VN 2 "s_register_operand" "w")]
2187 VADDHN))]
2188 "TARGET_NEON"
2189 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2190 [(set_attr "type" "neon_add_halve_narrow_q")]
2191 )
2192
2193 ;; Polynomial and Float multiplication.
2194 (define_insn "neon_vmul<pf><mode>"
2195 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2196 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2197 (match_operand:VPF 2 "s_register_operand" "w")]
2198 UNSPEC_VMUL))]
2199 "TARGET_NEON"
2200 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2201 [(set (attr "type")
2202 (if_then_else (match_test "<Is_float_mode>")
2203 (const_string "neon_fp_mul_s<q>")
2204 (const_string "neon_mul_<V_elem_ch><q>")))]
2205 )
2206
2207 (define_insn "mul<mode>3"
2208 [(set
2209 (match_operand:VH 0 "s_register_operand" "=w")
2210 (mult:VH
2211 (match_operand:VH 1 "s_register_operand" "w")
2212 (match_operand:VH 2 "s_register_operand" "w")))]
2213 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2214 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2215 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2216 )
2217
2218 (define_insn "neon_vmulf<mode>"
2219 [(set
2220 (match_operand:VH 0 "s_register_operand" "=w")
2221 (mult:VH
2222 (match_operand:VH 1 "s_register_operand" "w")
2223 (match_operand:VH 2 "s_register_operand" "w")))]
2224 "TARGET_NEON_FP16INST"
2225 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2226 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2227 )
2228
2229 (define_expand "neon_vmla<mode>"
2230 [(match_operand:VDQW 0 "s_register_operand" "=w")
2231 (match_operand:VDQW 1 "s_register_operand" "0")
2232 (match_operand:VDQW 2 "s_register_operand" "w")
2233 (match_operand:VDQW 3 "s_register_operand" "w")]
2234 "TARGET_NEON"
2235 {
2236 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2237 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2238 operands[2], operands[3]));
2239 else
2240 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2241 operands[2], operands[3]));
2242 DONE;
2243 })
2244
2245 (define_expand "neon_vfma<VCVTF:mode>"
2246 [(match_operand:VCVTF 0 "s_register_operand")
2247 (match_operand:VCVTF 1 "s_register_operand")
2248 (match_operand:VCVTF 2 "s_register_operand")
2249 (match_operand:VCVTF 3 "s_register_operand")]
2250 "TARGET_NEON && TARGET_FMA"
2251 {
2252 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2253 operands[1]));
2254 DONE;
2255 })
2256
2257 (define_expand "neon_vfma<VH:mode>"
2258 [(match_operand:VH 0 "s_register_operand")
2259 (match_operand:VH 1 "s_register_operand")
2260 (match_operand:VH 2 "s_register_operand")
2261 (match_operand:VH 3 "s_register_operand")]
2262 "TARGET_NEON_FP16INST"
2263 {
2264 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2265 operands[1]));
2266 DONE;
2267 })
2268
2269 (define_expand "neon_vfms<VCVTF:mode>"
2270 [(match_operand:VCVTF 0 "s_register_operand")
2271 (match_operand:VCVTF 1 "s_register_operand")
2272 (match_operand:VCVTF 2 "s_register_operand")
2273 (match_operand:VCVTF 3 "s_register_operand")]
2274 "TARGET_NEON && TARGET_FMA"
2275 {
2276 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2277 operands[1]));
2278 DONE;
2279 })
2280
2281 (define_expand "neon_vfms<VH:mode>"
2282 [(match_operand:VH 0 "s_register_operand")
2283 (match_operand:VH 1 "s_register_operand")
2284 (match_operand:VH 2 "s_register_operand")
2285 (match_operand:VH 3 "s_register_operand")]
2286 "TARGET_NEON_FP16INST"
2287 {
2288 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2289 operands[1]));
2290 DONE;
2291 })
2292
2293 ;; The expand RTL structure here is not important.
2294 ;; We use the gen_* functions anyway.
2295 ;; We just need something to wrap the iterators around.
2296
2297 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2298 [(set (match_operand:VCVTF 0 "s_register_operand")
2299 (unspec:VCVTF
2300 [(match_operand:VCVTF 1 "s_register_operand")
2301 (PLUSMINUS:<VFML>
2302 (match_operand:<VFML> 2 "s_register_operand")
2303 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2304 "TARGET_FP16FML"
2305 {
2306 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2307 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2308 operands[1],
2309 operands[2],
2310 operands[3],
2311 half, half));
2312 DONE;
2313 })
2314
2315 (define_insn "vfmal_low<mode>_intrinsic"
2316 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2317 (fma:VCVTF
2318 (float_extend:VCVTF
2319 (vec_select:<VFMLSEL>
2320 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2321 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2322 (float_extend:VCVTF
2323 (vec_select:<VFMLSEL>
2324 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2325 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2326 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2327 "TARGET_FP16FML"
2328 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2329 [(set_attr "type" "neon_fp_mla_s<q>")]
2330 )
2331
2332 (define_insn "vfmsl_high<mode>_intrinsic"
2333 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2334 (fma:VCVTF
2335 (float_extend:VCVTF
2336 (neg:<VFMLSEL>
2337 (vec_select:<VFMLSEL>
2338 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2339 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2340 (float_extend:VCVTF
2341 (vec_select:<VFMLSEL>
2342 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2343 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2344 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2345 "TARGET_FP16FML"
2346 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2347 [(set_attr "type" "neon_fp_mla_s<q>")]
2348 )
2349
2350 (define_insn "vfmal_high<mode>_intrinsic"
2351 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2352 (fma:VCVTF
2353 (float_extend:VCVTF
2354 (vec_select:<VFMLSEL>
2355 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2356 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2357 (float_extend:VCVTF
2358 (vec_select:<VFMLSEL>
2359 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2360 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2361 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2362 "TARGET_FP16FML"
2363 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2364 [(set_attr "type" "neon_fp_mla_s<q>")]
2365 )
2366
2367 (define_insn "vfmsl_low<mode>_intrinsic"
2368 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2369 (fma:VCVTF
2370 (float_extend:VCVTF
2371 (neg:<VFMLSEL>
2372 (vec_select:<VFMLSEL>
2373 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2374 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2375 (float_extend:VCVTF
2376 (vec_select:<VFMLSEL>
2377 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2378 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2379 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2380 "TARGET_FP16FML"
2381 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2382 [(set_attr "type" "neon_fp_mla_s<q>")]
2383 )
2384
2385 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2386 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2387 (unspec:VCVTF
2388 [(match_operand:VCVTF 1 "s_register_operand")
2389 (PLUSMINUS:<VFML>
2390 (match_operand:<VFML> 2 "s_register_operand")
2391 (match_operand:<VFML> 3 "s_register_operand"))
2392 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2393 "TARGET_FP16FML"
2394 {
2395 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2396 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2397 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2398 (operands[0], operands[1],
2399 operands[2], operands[3],
2400 half, lane));
2401 DONE;
2402 })
2403
2404 (define_insn "vfmal_lane_low<mode>_intrinsic"
2405 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2406 (fma:VCVTF
2407 (float_extend:VCVTF
2408 (vec_select:<VFMLSEL>
2409 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2410 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2411 (float_extend:VCVTF
2412 (vec_duplicate:<VFMLSEL>
2413 (vec_select:HF
2414 (match_operand:<VFML> 3 "s_register_operand" "x")
2415 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2416 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2417 "TARGET_FP16FML"
2418 {
2419 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2420 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2421 {
2422 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2423 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2424 }
2425 else
2426 {
2427 operands[5] = GEN_INT (lane);
2428 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2429 }
2430 }
2431 [(set_attr "type" "neon_fp_mla_s<q>")]
2432 )
2433
2434 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2435 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2436 (unspec:VCVTF
2437 [(match_operand:VCVTF 1 "s_register_operand")
2438 (PLUSMINUS:<VFML>
2439 (match_operand:<VFML> 2 "s_register_operand")
2440 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2441 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2442 "TARGET_FP16FML"
2443 {
2444 rtx lane
2445 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2446 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2447 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2448 (operands[0], operands[1], operands[2], operands[3],
2449 half, lane));
2450 DONE;
2451 })
2452
2453 ;; Used to implement the intrinsics:
2454 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2455 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2456 ;; Needs a bit of care to get the modes of the different sub-expressions right
2457 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2458 ;; S or D subregister to select the appropriate lane from.
2459
2460 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2461 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2462 (fma:VCVTF
2463 (float_extend:VCVTF
2464 (vec_select:<VFMLSEL>
2465 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2466 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2467 (float_extend:VCVTF
2468 (vec_duplicate:<VFMLSEL>
2469 (vec_select:HF
2470 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2471 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2472 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2473 "TARGET_FP16FML"
2474 {
2475 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2476 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2477 int new_lane = lane % elts_per_reg;
2478 int regdiff = lane / elts_per_reg;
2479 operands[5] = GEN_INT (new_lane);
2480 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2481 because we want the print_operand code to print the appropriate
2482 S or D register prefix. */
2483 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2484 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2485 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2486 }
2487 [(set_attr "type" "neon_fp_mla_s<q>")]
2488 )
2489
2490 ;; Used to implement the intrinsics:
2491 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2492 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2493 ;; Needs a bit of care to get the modes of the different sub-expressions right
2494 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2495 ;; S or D subregister to select the appropriate lane from.
2496
2497 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2498 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2499 (fma:VCVTF
2500 (float_extend:VCVTF
2501 (vec_select:<VFMLSEL>
2502 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2503 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2504 (float_extend:VCVTF
2505 (vec_duplicate:<VFMLSEL>
2506 (vec_select:HF
2507 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2508 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2509 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2510 "TARGET_FP16FML"
2511 {
2512 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2513 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2514 int new_lane = lane % elts_per_reg;
2515 int regdiff = lane / elts_per_reg;
2516 operands[5] = GEN_INT (new_lane);
2517 /* We re-create operands[3] in the halved VFMLSEL mode
2518 because we've calculated the correct half-width subreg to extract
2519 the lane from and we want to print *that* subreg instead. */
2520 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2521 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2522 }
2523 [(set_attr "type" "neon_fp_mla_s<q>")]
2524 )
2525
2526 (define_insn "vfmal_lane_high<mode>_intrinsic"
2527 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2528 (fma:VCVTF
2529 (float_extend:VCVTF
2530 (vec_select:<VFMLSEL>
2531 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2532 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2533 (float_extend:VCVTF
2534 (vec_duplicate:<VFMLSEL>
2535 (vec_select:HF
2536 (match_operand:<VFML> 3 "s_register_operand" "x")
2537 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2538 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2539 "TARGET_FP16FML"
2540 {
2541 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2542 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2543 {
2544 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2545 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2546 }
2547 else
2548 {
2549 operands[5] = GEN_INT (lane);
2550 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2551 }
2552 }
2553 [(set_attr "type" "neon_fp_mla_s<q>")]
2554 )
2555
2556 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2557 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2558 (fma:VCVTF
2559 (float_extend:VCVTF
2560 (neg:<VFMLSEL>
2561 (vec_select:<VFMLSEL>
2562 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2563 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2564 (float_extend:VCVTF
2565 (vec_duplicate:<VFMLSEL>
2566 (vec_select:HF
2567 (match_operand:<VFML> 3 "s_register_operand" "x")
2568 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2569 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2570 "TARGET_FP16FML"
2571 {
2572 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2573 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2574 {
2575 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2576 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2577 }
2578 else
2579 {
2580 operands[5] = GEN_INT (lane);
2581 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2582 }
2583 }
2584 [(set_attr "type" "neon_fp_mla_s<q>")]
2585 )
2586
2587 ;; Used to implement the intrinsics:
2588 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2589 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2590 ;; Needs a bit of care to get the modes of the different sub-expressions right
2591 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2592 ;; S or D subregister to select the appropriate lane from.
2593
2594 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2595 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2596 (fma:VCVTF
2597 (float_extend:VCVTF
2598 (neg:<VFMLSEL>
2599 (vec_select:<VFMLSEL>
2600 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2601 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2602 (float_extend:VCVTF
2603 (vec_duplicate:<VFMLSEL>
2604 (vec_select:HF
2605 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2606 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2607 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2608 "TARGET_FP16FML"
2609 {
2610 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2611 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2612 int new_lane = lane % elts_per_reg;
2613 int regdiff = lane / elts_per_reg;
2614 operands[5] = GEN_INT (new_lane);
2615 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2616 because we want the print_operand code to print the appropriate
2617 S or D register prefix. */
2618 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2619 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2620 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2621 }
2622 [(set_attr "type" "neon_fp_mla_s<q>")]
2623 )
2624
2625 ;; Used to implement the intrinsics:
2626 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2627 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2628 ;; Needs a bit of care to get the modes of the different sub-expressions right
2629 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2630 ;; S or D subregister to select the appropriate lane from.
2631
2632 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2633 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2634 (fma:VCVTF
2635 (float_extend:VCVTF
2636 (neg:<VFMLSEL>
2637 (vec_select:<VFMLSEL>
2638 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2639 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2640 (float_extend:VCVTF
2641 (vec_duplicate:<VFMLSEL>
2642 (vec_select:HF
2643 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2644 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2645 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2646 "TARGET_FP16FML"
2647 {
2648 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2649 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2650 int new_lane = lane % elts_per_reg;
2651 int regdiff = lane / elts_per_reg;
2652 operands[5] = GEN_INT (new_lane);
2653 /* We re-create operands[3] in the halved VFMLSEL mode
2654 because we've calculated the correct half-width subreg to extract
2655 the lane from and we want to print *that* subreg instead. */
2656 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2657 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2658 }
2659 [(set_attr "type" "neon_fp_mla_s<q>")]
2660 )
2661
2662 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2663 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2664 (fma:VCVTF
2665 (float_extend:VCVTF
2666 (neg:<VFMLSEL>
2667 (vec_select:<VFMLSEL>
2668 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2669 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2670 (float_extend:VCVTF
2671 (vec_duplicate:<VFMLSEL>
2672 (vec_select:HF
2673 (match_operand:<VFML> 3 "s_register_operand" "x")
2674 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2675 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2676 "TARGET_FP16FML"
2677 {
2678 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2679 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2680 {
2681 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2682 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2683 }
2684 else
2685 {
2686 operands[5] = GEN_INT (lane);
2687 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2688 }
2689 }
2690 [(set_attr "type" "neon_fp_mla_s<q>")]
2691 )
2692
2693 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2694
2695 (define_insn "neon_vmla<mode>_unspec"
2696 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2697 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2698 (match_operand:VDQW 2 "s_register_operand" "w")
2699 (match_operand:VDQW 3 "s_register_operand" "w")]
2700 UNSPEC_VMLA))]
2701 "TARGET_NEON"
2702 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2703 [(set (attr "type")
2704 (if_then_else (match_test "<Is_float_mode>")
2705 (const_string "neon_fp_mla_s<q>")
2706 (const_string "neon_mla_<V_elem_ch><q>")))]
2707 )
2708
2709 (define_insn "neon_vmlal<sup><mode>"
2710 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2711 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2712 (match_operand:VW 2 "s_register_operand" "w")
2713 (match_operand:VW 3 "s_register_operand" "w")]
2714 VMLAL))]
2715 "TARGET_NEON"
2716 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2717 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2718 )
2719
2720 (define_expand "neon_vmls<mode>"
2721 [(match_operand:VDQW 0 "s_register_operand" "=w")
2722 (match_operand:VDQW 1 "s_register_operand" "0")
2723 (match_operand:VDQW 2 "s_register_operand" "w")
2724 (match_operand:VDQW 3 "s_register_operand" "w")]
2725 "TARGET_NEON"
2726 {
2727 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2728 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2729 operands[1], operands[2], operands[3]));
2730 else
2731 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2732 operands[2], operands[3]));
2733 DONE;
2734 })
2735
2736 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2737
2738 (define_insn "neon_vmls<mode>_unspec"
2739 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2740 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2741 (match_operand:VDQW 2 "s_register_operand" "w")
2742 (match_operand:VDQW 3 "s_register_operand" "w")]
2743 UNSPEC_VMLS))]
2744 "TARGET_NEON"
2745 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2746 [(set (attr "type")
2747 (if_then_else (match_test "<Is_float_mode>")
2748 (const_string "neon_fp_mla_s<q>")
2749 (const_string "neon_mla_<V_elem_ch><q>")))]
2750 )
2751
2752 (define_insn "neon_vmlsl<sup><mode>"
2753 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2754 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2755 (match_operand:VW 2 "s_register_operand" "w")
2756 (match_operand:VW 3 "s_register_operand" "w")]
2757 VMLSL))]
2758 "TARGET_NEON"
2759 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2760 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2761 )
2762
2763 ;; vqdmulh, vqrdmulh
2764 (define_insn "neon_vq<r>dmulh<mode>"
2765 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2766 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2767 (match_operand:VMDQI 2 "s_register_operand" "w")]
2768 VQDMULH))]
2769 "TARGET_NEON"
2770 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2771 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2772 )
2773
2774 ;; vqrdmlah, vqrdmlsh
2775 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2776 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2777 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2778 (match_operand:VMDQI 2 "s_register_operand" "w")
2779 (match_operand:VMDQI 3 "s_register_operand" "w")]
2780 VQRDMLH_AS))]
2781 "TARGET_NEON_RDMA"
2782 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2783 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2784 )
2785
2786 (define_insn "neon_vqdmlal<mode>"
2787 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2788 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2789 (match_operand:VMDI 2 "s_register_operand" "w")
2790 (match_operand:VMDI 3 "s_register_operand" "w")]
2791 UNSPEC_VQDMLAL))]
2792 "TARGET_NEON"
2793 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2794 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2795 )
2796
2797 (define_insn "neon_vqdmlsl<mode>"
2798 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2799 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2800 (match_operand:VMDI 2 "s_register_operand" "w")
2801 (match_operand:VMDI 3 "s_register_operand" "w")]
2802 UNSPEC_VQDMLSL))]
2803 "TARGET_NEON"
2804 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2805 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2806 )
2807
2808 (define_insn "neon_vmull<sup><mode>"
2809 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2810 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2811 (match_operand:VW 2 "s_register_operand" "w")]
2812 VMULL))]
2813 "TARGET_NEON"
2814 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2815 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2816 )
2817
2818 (define_insn "neon_vqdmull<mode>"
2819 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2820 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2821 (match_operand:VMDI 2 "s_register_operand" "w")]
2822 UNSPEC_VQDMULL))]
2823 "TARGET_NEON"
2824 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2825 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2826 )
2827
2828 (define_expand "neon_vsub<mode>"
2829 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2830 (match_operand:VCVTF 1 "s_register_operand" "w")
2831 (match_operand:VCVTF 2 "s_register_operand" "w")]
2832 "TARGET_NEON"
2833 {
2834 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2835 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2836 else
2837 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2838 operands[2]));
2839 DONE;
2840 })
2841
2842 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2843
2844 (define_insn "neon_vsub<mode>_unspec"
2845 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2846 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2847 (match_operand:VCVTF 2 "s_register_operand" "w")]
2848 UNSPEC_VSUB))]
2849 "TARGET_NEON"
2850 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2851 [(set (attr "type")
2852 (if_then_else (match_test "<Is_float_mode>")
2853 (const_string "neon_fp_addsub_s<q>")
2854 (const_string "neon_sub<q>")))]
2855 )
2856
2857 (define_insn "neon_vsubl<sup><mode>"
2858 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2859 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2860 (match_operand:VDI 2 "s_register_operand" "w")]
2861 VSUBL))]
2862 "TARGET_NEON"
2863 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2864 [(set_attr "type" "neon_sub_long")]
2865 )
2866
2867 (define_insn "neon_vsubw<sup><mode>"
2868 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2869 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2870 (match_operand:VDI 2 "s_register_operand" "w")]
2871 VSUBW))]
2872 "TARGET_NEON"
2873 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2874 [(set_attr "type" "neon_sub_widen")]
2875 )
2876
2877 (define_insn "neon_vqsub<sup><mode>"
2878 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2879 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2880 (match_operand:VDQIX 2 "s_register_operand" "w")]
2881 VQSUB))]
2882 "TARGET_NEON"
2883 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2884 [(set_attr "type" "neon_qsub<q>")]
2885 )
2886
2887 (define_insn "neon_vhsub<sup><mode>"
2888 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2889 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2890 (match_operand:VDQIW 2 "s_register_operand" "w")]
2891 VHSUB))]
2892 "TARGET_NEON"
2893 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2894 [(set_attr "type" "neon_sub_halve<q>")]
2895 )
2896
2897 (define_insn "neon_v<r>subhn<mode>"
2898 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2899 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2900 (match_operand:VN 2 "s_register_operand" "w")]
2901 VSUBHN))]
2902 "TARGET_NEON"
2903 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2904 [(set_attr "type" "neon_sub_halve_narrow_q")]
2905 )
2906
2907 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2908 ;; without unsafe math optimizations.
2909 (define_expand "neon_vc<cmp_op><mode>"
2910 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2911 (neg:<V_cmp_result>
2912 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2913 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2914 "TARGET_NEON"
2915 {
2916 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2917 are enabled. */
2918 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2919 && !flag_unsafe_math_optimizations)
2920 {
2921 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2922 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2923 whereas this expander iterates over the integer modes as well,
2924 but we will never expand to UNSPECs for the integer comparisons. */
2925 switch (<MODE>mode)
2926 {
2927 case E_V2SFmode:
2928 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2929 operands[1],
2930 operands[2]));
2931 break;
2932 case E_V4SFmode:
2933 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2934 operands[1],
2935 operands[2]));
2936 break;
2937 default:
2938 gcc_unreachable ();
2939 }
2940 }
2941 else
2942 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2943 operands[1],
2944 operands[2]));
2945 DONE;
2946 }
2947 )
2948
2949 (define_insn "neon_vc<cmp_op><mode>_insn"
2950 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2951 (neg:<V_cmp_result>
2952 (COMPARISONS:<V_cmp_result>
2953 (match_operand:VDQW 1 "s_register_operand" "w,w")
2954 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2955 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2956 && !flag_unsafe_math_optimizations)"
2957 {
2958 char pattern[100];
2959 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2960 " %%<V_reg>1, %s",
2961 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2962 ? "f" : "<cmp_type>",
2963 which_alternative == 0
2964 ? "%<V_reg>2" : "#0");
2965 output_asm_insn (pattern, operands);
2966 return "";
2967 }
2968 [(set (attr "type")
2969 (if_then_else (match_operand 2 "zero_operand")
2970 (const_string "neon_compare_zero<q>")
2971 (const_string "neon_compare<q>")))]
2972 )
2973
2974 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2975 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2976 (unspec:<V_cmp_result>
2977 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2978 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2979 NEON_VCMP))]
2980 "TARGET_NEON"
2981 {
2982 char pattern[100];
2983 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2984 " %%<V_reg>1, %s",
2985 which_alternative == 0
2986 ? "%<V_reg>2" : "#0");
2987 output_asm_insn (pattern, operands);
2988 return "";
2989 }
2990 [(set_attr "type" "neon_fp_compare_s<q>")]
2991 )
2992
2993 (define_expand "neon_vc<cmp_op><mode>"
2994 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2995 (neg:<V_cmp_result>
2996 (COMPARISONS:VH
2997 (match_operand:VH 1 "s_register_operand")
2998 (match_operand:VH 2 "reg_or_zero_operand")))]
2999 "TARGET_NEON_FP16INST"
3000 {
3001 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3002 are enabled. */
3003 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3004 && !flag_unsafe_math_optimizations)
3005 emit_insn
3006 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3007 (operands[0], operands[1], operands[2]));
3008 else
3009 emit_insn
3010 (gen_neon_vc<cmp_op><mode>_fp16insn
3011 (operands[0], operands[1], operands[2]));
3012 DONE;
3013 })
3014
3015 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3016 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3017 (neg:<V_cmp_result>
3018 (COMPARISONS:<V_cmp_result>
3019 (match_operand:VH 1 "s_register_operand" "w,w")
3020 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3021 "TARGET_NEON_FP16INST
3022 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3023 && !flag_unsafe_math_optimizations)"
3024 {
3025 char pattern[100];
3026 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3027 " %%<V_reg>1, %s",
3028 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3029 ? "f" : "<cmp_type>",
3030 which_alternative == 0
3031 ? "%<V_reg>2" : "#0");
3032 output_asm_insn (pattern, operands);
3033 return "";
3034 }
3035 [(set (attr "type")
3036 (if_then_else (match_operand 2 "zero_operand")
3037 (const_string "neon_compare_zero<q>")
3038 (const_string "neon_compare<q>")))])
3039
3040 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3041 [(set
3042 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3043 (unspec:<V_cmp_result>
3044 [(match_operand:VH 1 "s_register_operand" "w,w")
3045 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3046 NEON_VCMP))]
3047 "TARGET_NEON_FP16INST"
3048 {
3049 char pattern[100];
3050 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3051 " %%<V_reg>1, %s",
3052 which_alternative == 0
3053 ? "%<V_reg>2" : "#0");
3054 output_asm_insn (pattern, operands);
3055 return "";
3056 }
3057 [(set_attr "type" "neon_fp_compare_s<q>")])
3058
3059 (define_insn "neon_vc<cmp_op>u<mode>"
3060 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3061 (neg:<V_cmp_result>
3062 (GTUGEU:<V_cmp_result>
3063 (match_operand:VDQIW 1 "s_register_operand" "w")
3064 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3065 "TARGET_NEON"
3066 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3067 [(set_attr "type" "neon_compare<q>")]
3068 )
3069
3070 (define_expand "neon_vca<cmp_op><mode>"
3071 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3072 (neg:<V_cmp_result>
3073 (GTGE:<V_cmp_result>
3074 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3075 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3076 "TARGET_NEON"
3077 {
3078 if (flag_unsafe_math_optimizations)
3079 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3080 operands[2]));
3081 else
3082 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3083 operands[1],
3084 operands[2]));
3085 DONE;
3086 }
3087 )
3088
3089 (define_insn "neon_vca<cmp_op><mode>_insn"
3090 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3091 (neg:<V_cmp_result>
3092 (GTGE:<V_cmp_result>
3093 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3094 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3095 "TARGET_NEON && flag_unsafe_math_optimizations"
3096 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3097 [(set_attr "type" "neon_fp_compare_s<q>")]
3098 )
3099
3100 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3101 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3102 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3103 (match_operand:VCVTF 2 "s_register_operand" "w")]
3104 NEON_VACMP))]
3105 "TARGET_NEON"
3106 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3107 [(set_attr "type" "neon_fp_compare_s<q>")]
3108 )
3109
3110 (define_expand "neon_vca<cmp_op><mode>"
3111 [(set
3112 (match_operand:<V_cmp_result> 0 "s_register_operand")
3113 (neg:<V_cmp_result>
3114 (GLTE:<V_cmp_result>
3115 (abs:VH (match_operand:VH 1 "s_register_operand"))
3116 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3117 "TARGET_NEON_FP16INST"
3118 {
3119 if (flag_unsafe_math_optimizations)
3120 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3121 (operands[0], operands[1], operands[2]));
3122 else
3123 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3124 (operands[0], operands[1], operands[2]));
3125 DONE;
3126 })
3127
3128 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3129 [(set
3130 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3131 (neg:<V_cmp_result>
3132 (GLTE:<V_cmp_result>
3133 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3134 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3135 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3136 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137 [(set_attr "type" "neon_fp_compare_s<q>")]
3138 )
3139
3140 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3141 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3142 (unspec:<V_cmp_result>
3143 [(match_operand:VH 1 "s_register_operand" "w")
3144 (match_operand:VH 2 "s_register_operand" "w")]
3145 NEON_VAGLTE))]
3146 "TARGET_NEON"
3147 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3148 [(set_attr "type" "neon_fp_compare_s<q>")]
3149 )
3150
3151 (define_expand "neon_vc<cmp_op>z<mode>"
3152 [(set
3153 (match_operand:<V_cmp_result> 0 "s_register_operand")
3154 (COMPARISONS:<V_cmp_result>
3155 (match_operand:VH 1 "s_register_operand")
3156 (const_int 0)))]
3157 "TARGET_NEON_FP16INST"
3158 {
3159 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3160 CONST0_RTX (<MODE>mode)));
3161 DONE;
3162 })
3163
3164 (define_insn "neon_vtst<mode>"
3165 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3166 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3167 (match_operand:VDQIW 2 "s_register_operand" "w")]
3168 UNSPEC_VTST))]
3169 "TARGET_NEON"
3170 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3171 [(set_attr "type" "neon_tst<q>")]
3172 )
3173
3174 (define_insn "neon_vabd<sup><mode>"
3175 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3176 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3177 (match_operand:VDQIW 2 "s_register_operand" "w")]
3178 VABD))]
3179 "TARGET_NEON"
3180 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3181 [(set_attr "type" "neon_abd<q>")]
3182 )
3183
3184 (define_insn "neon_vabd<mode>"
3185 [(set (match_operand:VH 0 "s_register_operand" "=w")
3186 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3187 (match_operand:VH 2 "s_register_operand" "w")]
3188 UNSPEC_VABD_F))]
3189 "TARGET_NEON_FP16INST"
3190 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3191 [(set_attr "type" "neon_abd<q>")]
3192 )
3193
3194 (define_insn "neon_vabdf<mode>"
3195 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3196 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3197 (match_operand:VCVTF 2 "s_register_operand" "w")]
3198 UNSPEC_VABD_F))]
3199 "TARGET_NEON"
3200 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3201 [(set_attr "type" "neon_fp_abd_s<q>")]
3202 )
3203
3204 (define_insn "neon_vabdl<sup><mode>"
3205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3206 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3207 (match_operand:VW 2 "s_register_operand" "w")]
3208 VABDL))]
3209 "TARGET_NEON"
3210 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3211 [(set_attr "type" "neon_abd_long")]
3212 )
3213
3214 (define_insn "neon_vaba<sup><mode>"
3215 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3216 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3217 (match_operand:VDQIW 3 "s_register_operand" "w")]
3218 VABD)
3219 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3220 "TARGET_NEON"
3221 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3222 [(set_attr "type" "neon_arith_acc<q>")]
3223 )
3224
3225 (define_insn "neon_vabal<sup><mode>"
3226 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3227 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3228 (match_operand:VW 3 "s_register_operand" "w")]
3229 VABDL)
3230 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3231 "TARGET_NEON"
3232 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3233 [(set_attr "type" "neon_arith_acc<q>")]
3234 )
3235
3236 (define_insn "neon_v<maxmin><sup><mode>"
3237 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3238 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3239 (match_operand:VDQIW 2 "s_register_operand" "w")]
3240 VMAXMIN))]
3241 "TARGET_NEON"
3242 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3243 [(set_attr "type" "neon_minmax<q>")]
3244 )
3245
3246 (define_insn "neon_v<maxmin>f<mode>"
3247 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3248 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3249 (match_operand:VCVTF 2 "s_register_operand" "w")]
3250 VMAXMINF))]
3251 "TARGET_NEON"
3252 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3253 [(set_attr "type" "neon_fp_minmax_s<q>")]
3254 )
3255
3256 (define_insn "neon_v<maxmin>f<mode>"
3257 [(set (match_operand:VH 0 "s_register_operand" "=w")
3258 (unspec:VH
3259 [(match_operand:VH 1 "s_register_operand" "w")
3260 (match_operand:VH 2 "s_register_operand" "w")]
3261 VMAXMINF))]
3262 "TARGET_NEON_FP16INST"
3263 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3264 [(set_attr "type" "neon_fp_minmax_s<q>")]
3265 )
3266
3267 (define_insn "neon_vp<maxmin>fv4hf"
3268 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3269 (unspec:V4HF
3270 [(match_operand:V4HF 1 "s_register_operand" "w")
3271 (match_operand:V4HF 2 "s_register_operand" "w")]
3272 VPMAXMINF))]
3273 "TARGET_NEON_FP16INST"
3274 "vp<maxmin>.f16\t%P0, %P1, %P2"
3275 [(set_attr "type" "neon_reduc_minmax")]
3276 )
3277
3278 (define_insn "neon_<fmaxmin_op><mode>"
3279 [(set
3280 (match_operand:VH 0 "s_register_operand" "=w")
3281 (unspec:VH
3282 [(match_operand:VH 1 "s_register_operand" "w")
3283 (match_operand:VH 2 "s_register_operand" "w")]
3284 VMAXMINFNM))]
3285 "TARGET_NEON_FP16INST"
3286 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3287 [(set_attr "type" "neon_fp_minmax_s<q>")]
3288 )
3289
3290 ;; v<maxmin>nm intrinsics.
3291 (define_insn "neon_<fmaxmin_op><mode>"
3292 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3293 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3294 (match_operand:VCVTF 2 "s_register_operand" "w")]
3295 VMAXMINFNM))]
3296 "TARGET_NEON && TARGET_VFP5"
3297 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3298 [(set_attr "type" "neon_fp_minmax_s<q>")]
3299 )
3300
3301 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3302 (define_insn "<fmaxmin><mode>3"
3303 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3304 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3305 (match_operand:VCVTF 2 "s_register_operand" "w")]
3306 VMAXMINFNM))]
3307 "TARGET_NEON && TARGET_VFP5"
3308 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3309 [(set_attr "type" "neon_fp_minmax_s<q>")]
3310 )
3311
3312 (define_expand "neon_vpadd<mode>"
3313 [(match_operand:VD 0 "s_register_operand" "=w")
3314 (match_operand:VD 1 "s_register_operand" "w")
3315 (match_operand:VD 2 "s_register_operand" "w")]
3316 "TARGET_NEON"
3317 {
3318 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3319 operands[2]));
3320 DONE;
3321 })
3322
3323 (define_insn "neon_vpaddl<sup><mode>"
3324 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3325 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3326 VPADDL))]
3327 "TARGET_NEON"
3328 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3329 [(set_attr "type" "neon_reduc_add_long")]
3330 )
3331
3332 (define_insn "neon_vpadal<sup><mode>"
3333 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3334 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3335 (match_operand:VDQIW 2 "s_register_operand" "w")]
3336 VPADAL))]
3337 "TARGET_NEON"
3338 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3339 [(set_attr "type" "neon_reduc_add_acc")]
3340 )
3341
3342 (define_insn "neon_vp<maxmin><sup><mode>"
3343 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3344 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3345 (match_operand:VDI 2 "s_register_operand" "w")]
3346 VPMAXMIN))]
3347 "TARGET_NEON"
3348 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3349 [(set_attr "type" "neon_reduc_minmax<q>")]
3350 )
3351
3352 (define_insn "neon_vp<maxmin>f<mode>"
3353 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3354 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3355 (match_operand:VCVTF 2 "s_register_operand" "w")]
3356 VPMAXMINF))]
3357 "TARGET_NEON"
3358 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3359 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3360 )
3361
3362 (define_insn "neon_vrecps<mode>"
3363 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3364 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3365 (match_operand:VCVTF 2 "s_register_operand" "w")]
3366 UNSPEC_VRECPS))]
3367 "TARGET_NEON"
3368 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3369 [(set_attr "type" "neon_fp_recps_s<q>")]
3370 )
3371
3372 (define_insn "neon_vrecps<mode>"
3373 [(set
3374 (match_operand:VH 0 "s_register_operand" "=w")
3375 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3376 (match_operand:VH 2 "s_register_operand" "w")]
3377 UNSPEC_VRECPS))]
3378 "TARGET_NEON_FP16INST"
3379 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380 [(set_attr "type" "neon_fp_recps_s<q>")]
3381 )
3382
3383 (define_insn "neon_vrsqrts<mode>"
3384 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3385 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3386 (match_operand:VCVTF 2 "s_register_operand" "w")]
3387 UNSPEC_VRSQRTS))]
3388 "TARGET_NEON"
3389 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3391 )
3392
3393 (define_insn "neon_vrsqrts<mode>"
3394 [(set
3395 (match_operand:VH 0 "s_register_operand" "=w")
3396 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3397 (match_operand:VH 2 "s_register_operand" "w")]
3398 UNSPEC_VRSQRTS))]
3399 "TARGET_NEON_FP16INST"
3400 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3401 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3402 )
3403
3404 (define_expand "neon_vabs<mode>"
3405 [(match_operand:VDQW 0 "s_register_operand" "")
3406 (match_operand:VDQW 1 "s_register_operand" "")]
3407 "TARGET_NEON"
3408 {
3409 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3410 DONE;
3411 })
3412
3413 (define_insn "neon_vqabs<mode>"
3414 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3415 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3416 UNSPEC_VQABS))]
3417 "TARGET_NEON"
3418 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3419 [(set_attr "type" "neon_qabs<q>")]
3420 )
3421
3422 (define_insn "neon_bswap<mode>"
3423 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3424 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3425 "TARGET_NEON"
3426 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3427 [(set_attr "type" "neon_rev<q>")]
3428 )
3429
3430 (define_expand "neon_vneg<mode>"
3431 [(match_operand:VDQW 0 "s_register_operand" "")
3432 (match_operand:VDQW 1 "s_register_operand" "")]
3433 "TARGET_NEON"
3434 {
3435 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3436 DONE;
3437 })
3438
3439 ;; These instructions map to the __builtins for the Dot Product operations.
3440 (define_insn "neon_<sup>dot<vsi2qi>"
3441 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3442 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3443 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3444 "register_operand" "w")
3445 (match_operand:<VSI2QI> 3
3446 "register_operand" "w")]
3447 DOTPROD)))]
3448 "TARGET_DOTPROD"
3449 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3450 [(set_attr "type" "neon_dot")]
3451 )
3452
3453 ;; These instructions map to the __builtins for the Dot Product
3454 ;; indexed operations.
3455 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3456 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3457 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3458 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3459 "register_operand" "w")
3460 (match_operand:V8QI 3 "register_operand" "t")
3461 (match_operand:SI 4 "immediate_operand" "i")]
3462 DOTPROD)))]
3463 "TARGET_DOTPROD"
3464 {
3465 operands[4]
3466 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3467 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3468 }
3469 [(set_attr "type" "neon_dot")]
3470 )
3471
3472 ;; These expands map to the Dot Product optab the vectorizer checks for.
3473 ;; The auto-vectorizer expects a dot product builtin that also does an
3474 ;; accumulation into the provided register.
3475 ;; Given the following pattern
3476 ;;
3477 ;; for (i=0; i<len; i++) {
3478 ;; c = a[i] * b[i];
3479 ;; r += c;
3480 ;; }
3481 ;; return result;
3482 ;;
3483 ;; This can be auto-vectorized to
3484 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3485 ;;
3486 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3487 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3488 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3489 ;; ...
3490 ;;
3491 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3492 (define_expand "<sup>dot_prod<vsi2qi>"
3493 [(set (match_operand:VCVTI 0 "register_operand")
3494 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3495 "register_operand")
3496 (match_operand:<VSI2QI> 2
3497 "register_operand")]
3498 DOTPROD)
3499 (match_operand:VCVTI 3 "register_operand")))]
3500 "TARGET_DOTPROD"
3501 {
3502 emit_insn (
3503 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3504 operands[2]));
3505 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3506 DONE;
3507 })
3508
3509 (define_expand "neon_copysignf<mode>"
3510 [(match_operand:VCVTF 0 "register_operand")
3511 (match_operand:VCVTF 1 "register_operand")
3512 (match_operand:VCVTF 2 "register_operand")]
3513 "TARGET_NEON"
3514 "{
3515 rtx v_bitmask_cast;
3516 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3517 rtx c = GEN_INT (0x80000000);
3518
3519 emit_move_insn (v_bitmask,
3520 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3521 emit_move_insn (operands[0], operands[2]);
3522 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3523 <VCVTF:V_cmp_result>mode, 0);
3524 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3525 operands[1]));
3526
3527 DONE;
3528 }"
3529 )
3530
3531 (define_insn "neon_vqneg<mode>"
3532 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3533 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3534 UNSPEC_VQNEG))]
3535 "TARGET_NEON"
3536 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3537 [(set_attr "type" "neon_qneg<q>")]
3538 )
3539
3540 (define_insn "neon_vcls<mode>"
3541 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3542 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3543 UNSPEC_VCLS))]
3544 "TARGET_NEON"
3545 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3546 [(set_attr "type" "neon_cls<q>")]
3547 )
3548
3549 (define_insn "clz<mode>2"
3550 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3551 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3552 "TARGET_NEON"
3553 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3554 [(set_attr "type" "neon_cnt<q>")]
3555 )
3556
3557 (define_expand "neon_vclz<mode>"
3558 [(match_operand:VDQIW 0 "s_register_operand" "")
3559 (match_operand:VDQIW 1 "s_register_operand" "")]
3560 "TARGET_NEON"
3561 {
3562 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3563 DONE;
3564 })
3565
3566 (define_insn "popcount<mode>2"
3567 [(set (match_operand:VE 0 "s_register_operand" "=w")
3568 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3569 "TARGET_NEON"
3570 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3571 [(set_attr "type" "neon_cnt<q>")]
3572 )
3573
3574 (define_expand "neon_vcnt<mode>"
3575 [(match_operand:VE 0 "s_register_operand" "=w")
3576 (match_operand:VE 1 "s_register_operand" "w")]
3577 "TARGET_NEON"
3578 {
3579 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3580 DONE;
3581 })
3582
3583 (define_insn "neon_vrecpe<mode>"
3584 [(set (match_operand:VH 0 "s_register_operand" "=w")
3585 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3586 UNSPEC_VRECPE))]
3587 "TARGET_NEON_FP16INST"
3588 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3589 [(set_attr "type" "neon_fp_recpe_s<q>")]
3590 )
3591
3592 (define_insn "neon_vrecpe<mode>"
3593 [(set (match_operand:V32 0 "s_register_operand" "=w")
3594 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3595 UNSPEC_VRECPE))]
3596 "TARGET_NEON"
3597 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3598 [(set_attr "type" "neon_fp_recpe_s<q>")]
3599 )
3600
3601 (define_insn "neon_vrsqrte<mode>"
3602 [(set (match_operand:V32 0 "s_register_operand" "=w")
3603 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3604 UNSPEC_VRSQRTE))]
3605 "TARGET_NEON"
3606 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3607 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3608 )
3609
3610 (define_expand "neon_vmvn<mode>"
3611 [(match_operand:VDQIW 0 "s_register_operand" "")
3612 (match_operand:VDQIW 1 "s_register_operand" "")]
3613 "TARGET_NEON"
3614 {
3615 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3616 DONE;
3617 })
3618
3619 (define_insn "neon_vget_lane<mode>_sext_internal"
3620 [(set (match_operand:SI 0 "s_register_operand" "=r")
3621 (sign_extend:SI
3622 (vec_select:<V_elem>
3623 (match_operand:VD 1 "s_register_operand" "w")
3624 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3625 "TARGET_NEON"
3626 {
3627 if (BYTES_BIG_ENDIAN)
3628 {
3629 int elt = INTVAL (operands[2]);
3630 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3631 operands[2] = GEN_INT (elt);
3632 }
3633 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3634 }
3635 [(set_attr "type" "neon_to_gp")]
3636 )
3637
3638 (define_insn "neon_vget_lane<mode>_zext_internal"
3639 [(set (match_operand:SI 0 "s_register_operand" "=r")
3640 (zero_extend:SI
3641 (vec_select:<V_elem>
3642 (match_operand:VD 1 "s_register_operand" "w")
3643 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3644 "TARGET_NEON"
3645 {
3646 if (BYTES_BIG_ENDIAN)
3647 {
3648 int elt = INTVAL (operands[2]);
3649 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3650 operands[2] = GEN_INT (elt);
3651 }
3652 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3653 }
3654 [(set_attr "type" "neon_to_gp")]
3655 )
3656
3657 (define_insn "neon_vget_lane<mode>_sext_internal"
3658 [(set (match_operand:SI 0 "s_register_operand" "=r")
3659 (sign_extend:SI
3660 (vec_select:<V_elem>
3661 (match_operand:VQ2 1 "s_register_operand" "w")
3662 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3663 "TARGET_NEON"
3664 {
3665 rtx ops[3];
3666 int regno = REGNO (operands[1]);
3667 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3668 unsigned int elt = INTVAL (operands[2]);
3669 unsigned int elt_adj = elt % halfelts;
3670
3671 if (BYTES_BIG_ENDIAN)
3672 elt_adj = halfelts - 1 - elt_adj;
3673
3674 ops[0] = operands[0];
3675 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3676 ops[2] = GEN_INT (elt_adj);
3677 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3678
3679 return "";
3680 }
3681 [(set_attr "type" "neon_to_gp_q")]
3682 )
3683
3684 (define_insn "neon_vget_lane<mode>_zext_internal"
3685 [(set (match_operand:SI 0 "s_register_operand" "=r")
3686 (zero_extend:SI
3687 (vec_select:<V_elem>
3688 (match_operand:VQ2 1 "s_register_operand" "w")
3689 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3690 "TARGET_NEON"
3691 {
3692 rtx ops[3];
3693 int regno = REGNO (operands[1]);
3694 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3695 unsigned int elt = INTVAL (operands[2]);
3696 unsigned int elt_adj = elt % halfelts;
3697
3698 if (BYTES_BIG_ENDIAN)
3699 elt_adj = halfelts - 1 - elt_adj;
3700
3701 ops[0] = operands[0];
3702 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3703 ops[2] = GEN_INT (elt_adj);
3704 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3705
3706 return "";
3707 }
3708 [(set_attr "type" "neon_to_gp_q")]
3709 )
3710
3711 (define_expand "neon_vget_lane<mode>"
3712 [(match_operand:<V_ext> 0 "s_register_operand" "")
3713 (match_operand:VDQW 1 "s_register_operand" "")
3714 (match_operand:SI 2 "immediate_operand" "")]
3715 "TARGET_NEON"
3716 {
3717 if (BYTES_BIG_ENDIAN)
3718 {
3719 /* The intrinsics are defined in terms of a model where the
3720 element ordering in memory is vldm order, whereas the generic
3721 RTL is defined in terms of a model where the element ordering
3722 in memory is array order. Convert the lane number to conform
3723 to this model. */
3724 unsigned int elt = INTVAL (operands[2]);
3725 unsigned int reg_nelts
3726 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3727 elt ^= reg_nelts - 1;
3728 operands[2] = GEN_INT (elt);
3729 }
3730
3731 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3732 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3733 operands[2]));
3734 else
3735 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3736 operands[1],
3737 operands[2]));
3738 DONE;
3739 })
3740
3741 (define_expand "neon_vget_laneu<mode>"
3742 [(match_operand:<V_ext> 0 "s_register_operand" "")
3743 (match_operand:VDQIW 1 "s_register_operand" "")
3744 (match_operand:SI 2 "immediate_operand" "")]
3745 "TARGET_NEON"
3746 {
3747 if (BYTES_BIG_ENDIAN)
3748 {
3749 /* The intrinsics are defined in terms of a model where the
3750 element ordering in memory is vldm order, whereas the generic
3751 RTL is defined in terms of a model where the element ordering
3752 in memory is array order. Convert the lane number to conform
3753 to this model. */
3754 unsigned int elt = INTVAL (operands[2]);
3755 unsigned int reg_nelts
3756 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3757 elt ^= reg_nelts - 1;
3758 operands[2] = GEN_INT (elt);
3759 }
3760
3761 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3762 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3763 operands[2]));
3764 else
3765 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3766 operands[1],
3767 operands[2]));
3768 DONE;
3769 })
3770
3771 (define_expand "neon_vget_lanedi"
3772 [(match_operand:DI 0 "s_register_operand" "=r")
3773 (match_operand:DI 1 "s_register_operand" "w")
3774 (match_operand:SI 2 "immediate_operand" "")]
3775 "TARGET_NEON"
3776 {
3777 emit_move_insn (operands[0], operands[1]);
3778 DONE;
3779 })
3780
3781 (define_expand "neon_vget_lanev2di"
3782 [(match_operand:DI 0 "s_register_operand" "")
3783 (match_operand:V2DI 1 "s_register_operand" "")
3784 (match_operand:SI 2 "immediate_operand" "")]
3785 "TARGET_NEON"
3786 {
3787 int lane;
3788
3789 if (BYTES_BIG_ENDIAN)
3790 {
3791 /* The intrinsics are defined in terms of a model where the
3792 element ordering in memory is vldm order, whereas the generic
3793 RTL is defined in terms of a model where the element ordering
3794 in memory is array order. Convert the lane number to conform
3795 to this model. */
3796 unsigned int elt = INTVAL (operands[2]);
3797 unsigned int reg_nelts = 2;
3798 elt ^= reg_nelts - 1;
3799 operands[2] = GEN_INT (elt);
3800 }
3801
3802 lane = INTVAL (operands[2]);
3803 gcc_assert ((lane ==0) || (lane == 1));
3804 emit_move_insn (operands[0], lane == 0
3805 ? gen_lowpart (DImode, operands[1])
3806 : gen_highpart (DImode, operands[1]));
3807 DONE;
3808 })
3809
3810 (define_expand "neon_vset_lane<mode>"
3811 [(match_operand:VDQ 0 "s_register_operand" "=w")
3812 (match_operand:<V_elem> 1 "s_register_operand" "r")
3813 (match_operand:VDQ 2 "s_register_operand" "0")
3814 (match_operand:SI 3 "immediate_operand" "i")]
3815 "TARGET_NEON"
3816 {
3817 unsigned int elt = INTVAL (operands[3]);
3818
3819 if (BYTES_BIG_ENDIAN)
3820 {
3821 unsigned int reg_nelts
3822 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3823 elt ^= reg_nelts - 1;
3824 }
3825
3826 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3827 GEN_INT (1 << elt), operands[2]));
3828 DONE;
3829 })
3830
3831 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3832
3833 (define_expand "neon_vset_lanedi"
3834 [(match_operand:DI 0 "s_register_operand" "=w")
3835 (match_operand:DI 1 "s_register_operand" "r")
3836 (match_operand:DI 2 "s_register_operand" "0")
3837 (match_operand:SI 3 "immediate_operand" "i")]
3838 "TARGET_NEON"
3839 {
3840 emit_move_insn (operands[0], operands[1]);
3841 DONE;
3842 })
3843
3844 (define_expand "neon_vcreate<mode>"
3845 [(match_operand:VD_RE 0 "s_register_operand" "")
3846 (match_operand:DI 1 "general_operand" "")]
3847 "TARGET_NEON"
3848 {
3849 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3850 emit_move_insn (operands[0], src);
3851 DONE;
3852 })
3853
3854 (define_insn "neon_vdup_n<mode>"
3855 [(set (match_operand:VX 0 "s_register_operand" "=w")
3856 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3857 "TARGET_NEON"
3858 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3859 [(set_attr "type" "neon_from_gp<q>")]
3860 )
3861
3862 (define_insn "neon_vdup_nv4hf"
3863 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3864 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3865 "TARGET_NEON"
3866 "vdup.16\t%P0, %1"
3867 [(set_attr "type" "neon_from_gp")]
3868 )
3869
3870 (define_insn "neon_vdup_nv8hf"
3871 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3872 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3873 "TARGET_NEON"
3874 "vdup.16\t%q0, %1"
3875 [(set_attr "type" "neon_from_gp_q")]
3876 )
3877
3878 (define_insn "neon_vdup_n<mode>"
3879 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3880 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3881 "TARGET_NEON"
3882 "@
3883 vdup.<V_sz_elem>\t%<V_reg>0, %1
3884 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3885 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3886 )
3887
3888 (define_expand "neon_vdup_ndi"
3889 [(match_operand:DI 0 "s_register_operand" "=w")
3890 (match_operand:DI 1 "s_register_operand" "r")]
3891 "TARGET_NEON"
3892 {
3893 emit_move_insn (operands[0], operands[1]);
3894 DONE;
3895 }
3896 )
3897
3898 (define_insn "neon_vdup_nv2di"
3899 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3900 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3901 "TARGET_NEON"
3902 "@
3903 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3904 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3905 [(set_attr "length" "8")
3906 (set_attr "type" "multiple")]
3907 )
3908
3909 (define_insn "neon_vdup_lane<mode>_internal"
3910 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3911 (vec_duplicate:VDQW
3912 (vec_select:<V_elem>
3913 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3914 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3915 "TARGET_NEON"
3916 {
3917 if (BYTES_BIG_ENDIAN)
3918 {
3919 int elt = INTVAL (operands[2]);
3920 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3921 operands[2] = GEN_INT (elt);
3922 }
3923 if (<Is_d_reg>)
3924 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3925 else
3926 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3927 }
3928 [(set_attr "type" "neon_dup<q>")]
3929 )
3930
3931 (define_insn "neon_vdup_lane<mode>_internal"
3932 [(set (match_operand:VH 0 "s_register_operand" "=w")
3933 (vec_duplicate:VH
3934 (vec_select:<V_elem>
3935 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3936 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3937 "TARGET_NEON && TARGET_FP16"
3938 {
3939 if (BYTES_BIG_ENDIAN)
3940 {
3941 int elt = INTVAL (operands[2]);
3942 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3943 operands[2] = GEN_INT (elt);
3944 }
3945 if (<Is_d_reg>)
3946 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3947 else
3948 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3949 }
3950 [(set_attr "type" "neon_dup<q>")]
3951 )
3952
3953 (define_expand "neon_vdup_lane<mode>"
3954 [(match_operand:VDQW 0 "s_register_operand" "=w")
3955 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3956 (match_operand:SI 2 "immediate_operand" "i")]
3957 "TARGET_NEON"
3958 {
3959 if (BYTES_BIG_ENDIAN)
3960 {
3961 unsigned int elt = INTVAL (operands[2]);
3962 unsigned int reg_nelts
3963 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3964 elt ^= reg_nelts - 1;
3965 operands[2] = GEN_INT (elt);
3966 }
3967 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3968 operands[2]));
3969 DONE;
3970 })
3971
3972 (define_expand "neon_vdup_lane<mode>"
3973 [(match_operand:VH 0 "s_register_operand")
3974 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3975 (match_operand:SI 2 "immediate_operand")]
3976 "TARGET_NEON && TARGET_FP16"
3977 {
3978 if (BYTES_BIG_ENDIAN)
3979 {
3980 unsigned int elt = INTVAL (operands[2]);
3981 unsigned int reg_nelts
3982 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3983 elt ^= reg_nelts - 1;
3984 operands[2] = GEN_INT (elt);
3985 }
3986 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3987 operands[2]));
3988 DONE;
3989 })
3990
3991 ; Scalar index is ignored, since only zero is valid here.
3992 (define_expand "neon_vdup_lanedi"
3993 [(match_operand:DI 0 "s_register_operand" "=w")
3994 (match_operand:DI 1 "s_register_operand" "w")
3995 (match_operand:SI 2 "immediate_operand" "i")]
3996 "TARGET_NEON"
3997 {
3998 emit_move_insn (operands[0], operands[1]);
3999 DONE;
4000 })
4001
4002 ; Likewise for v2di, as the DImode second operand has only a single element.
4003 (define_expand "neon_vdup_lanev2di"
4004 [(match_operand:V2DI 0 "s_register_operand" "=w")
4005 (match_operand:DI 1 "s_register_operand" "w")
4006 (match_operand:SI 2 "immediate_operand" "i")]
4007 "TARGET_NEON"
4008 {
4009 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4010 DONE;
4011 })
4012
4013 ; Disabled before reload because we don't want combine doing something silly,
4014 ; but used by the post-reload expansion of neon_vcombine.
4015 (define_insn "*neon_vswp<mode>"
4016 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4017 (match_operand:VDQX 1 "s_register_operand" "+w"))
4018 (set (match_dup 1) (match_dup 0))]
4019 "TARGET_NEON && reload_completed"
4020 "vswp\t%<V_reg>0, %<V_reg>1"
4021 [(set_attr "type" "neon_permute<q>")]
4022 )
4023
4024 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4025 ;; dest vector.
4026 ;; FIXME: A different implementation of this builtin could make it much
4027 ;; more likely that we wouldn't actually need to output anything (we could make
4028 ;; it so that the reg allocator puts things in the right places magically
4029 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4030
4031 (define_insn_and_split "neon_vcombine<mode>"
4032 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4033 (vec_concat:<V_DOUBLE>
4034 (match_operand:VDX 1 "s_register_operand" "w")
4035 (match_operand:VDX 2 "s_register_operand" "w")))]
4036 "TARGET_NEON"
4037 "#"
4038 "&& reload_completed"
4039 [(const_int 0)]
4040 {
4041 neon_split_vcombine (operands);
4042 DONE;
4043 }
4044 [(set_attr "type" "multiple")]
4045 )
4046
4047 (define_expand "neon_vget_high<mode>"
4048 [(match_operand:<V_HALF> 0 "s_register_operand")
4049 (match_operand:VQX 1 "s_register_operand")]
4050 "TARGET_NEON"
4051 {
4052 emit_move_insn (operands[0],
4053 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4054 GET_MODE_SIZE (<V_HALF>mode)));
4055 DONE;
4056 })
4057
4058 (define_expand "neon_vget_low<mode>"
4059 [(match_operand:<V_HALF> 0 "s_register_operand")
4060 (match_operand:VQX 1 "s_register_operand")]
4061 "TARGET_NEON"
4062 {
4063 emit_move_insn (operands[0],
4064 simplify_gen_subreg (<V_HALF>mode, operands[1],
4065 <MODE>mode, 0));
4066 DONE;
4067 })
4068
4069 (define_insn "float<mode><V_cvtto>2"
4070 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4071 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4072 "TARGET_NEON && !flag_rounding_math"
4073 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4074 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4075 )
4076
4077 (define_insn "floatuns<mode><V_cvtto>2"
4078 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4079 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4080 "TARGET_NEON && !flag_rounding_math"
4081 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4082 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4083 )
4084
4085 (define_insn "fix_trunc<mode><V_cvtto>2"
4086 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4087 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4088 "TARGET_NEON"
4089 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4090 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4091 )
4092
4093 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4094 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4095 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4096 "TARGET_NEON"
4097 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4098 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4099 )
4100
4101 (define_insn "neon_vcvt<sup><mode>"
4102 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4103 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4104 VCVT_US))]
4105 "TARGET_NEON"
4106 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4107 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4108 )
4109
4110 (define_insn "neon_vcvt<sup><mode>"
4111 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4112 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4113 VCVT_US))]
4114 "TARGET_NEON"
4115 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4116 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4117 )
4118
4119 (define_insn "neon_vcvtv4sfv4hf"
4120 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4121 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4122 UNSPEC_VCVT))]
4123 "TARGET_NEON && TARGET_FP16"
4124 "vcvt.f32.f16\t%q0, %P1"
4125 [(set_attr "type" "neon_fp_cvt_widen_h")]
4126 )
4127
4128 (define_insn "neon_vcvtv4hfv4sf"
4129 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4130 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4131 UNSPEC_VCVT))]
4132 "TARGET_NEON && TARGET_FP16"
4133 "vcvt.f16.f32\t%P0, %q1"
4134 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4135 )
4136
4137 (define_insn "neon_vcvt<sup><mode>"
4138 [(set
4139 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4140 (unspec:<VH_CVTTO>
4141 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4142 VCVT_US))]
4143 "TARGET_NEON_FP16INST"
4144 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4145 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4146 )
4147
4148 (define_insn "neon_vcvt<sup><mode>"
4149 [(set
4150 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4151 (unspec:<VH_CVTTO>
4152 [(match_operand:VH 1 "s_register_operand" "w")]
4153 VCVT_US))]
4154 "TARGET_NEON_FP16INST"
4155 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4156 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4157 )
4158
4159 (define_insn "neon_vcvt<sup>_n<mode>"
4160 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4161 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4162 (match_operand:SI 2 "immediate_operand" "i")]
4163 VCVT_US_N))]
4164 "TARGET_NEON"
4165 {
4166 arm_const_bounds (operands[2], 1, 33);
4167 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4168 }
4169 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4170 )
4171
4172 (define_insn "neon_vcvt<sup>_n<mode>"
4173 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4174 (unspec:<VH_CVTTO>
4175 [(match_operand:VH 1 "s_register_operand" "w")
4176 (match_operand:SI 2 "immediate_operand" "i")]
4177 VCVT_US_N))]
4178 "TARGET_NEON_FP16INST"
4179 {
4180 arm_const_bounds (operands[2], 0, 17);
4181 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4182 }
4183 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4184 )
4185
4186 (define_insn "neon_vcvt<sup>_n<mode>"
4187 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4188 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4189 (match_operand:SI 2 "immediate_operand" "i")]
4190 VCVT_US_N))]
4191 "TARGET_NEON"
4192 {
4193 arm_const_bounds (operands[2], 1, 33);
4194 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4195 }
4196 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4197 )
4198
4199 (define_insn "neon_vcvt<sup>_n<mode>"
4200 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4201 (unspec:<VH_CVTTO>
4202 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4203 (match_operand:SI 2 "immediate_operand" "i")]
4204 VCVT_US_N))]
4205 "TARGET_NEON_FP16INST"
4206 {
4207 arm_const_bounds (operands[2], 0, 17);
4208 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4209 }
4210 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4211 )
4212
4213 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4214 [(set
4215 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4216 (unspec:<VH_CVTTO>
4217 [(match_operand:VH 1 "s_register_operand" "w")]
4218 VCVT_HF_US))]
4219 "TARGET_NEON_FP16INST"
4220 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4221 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4222 )
4223
4224 (define_insn "neon_vmovn<mode>"
4225 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4226 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4227 UNSPEC_VMOVN))]
4228 "TARGET_NEON"
4229 "vmovn.<V_if_elem>\t%P0, %q1"
4230 [(set_attr "type" "neon_shift_imm_narrow_q")]
4231 )
4232
4233 (define_insn "neon_vqmovn<sup><mode>"
4234 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4235 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4236 VQMOVN))]
4237 "TARGET_NEON"
4238 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4239 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4240 )
4241
4242 (define_insn "neon_vqmovun<mode>"
4243 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4244 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4245 UNSPEC_VQMOVUN))]
4246 "TARGET_NEON"
4247 "vqmovun.<V_s_elem>\t%P0, %q1"
4248 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4249 )
4250
4251 (define_insn "neon_vmovl<sup><mode>"
4252 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4253 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4254 VMOVL))]
4255 "TARGET_NEON"
4256 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4257 [(set_attr "type" "neon_shift_imm_long")]
4258 )
4259
4260 (define_insn "neon_vmul_lane<mode>"
4261 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4262 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4263 (match_operand:VMD 2 "s_register_operand"
4264 "<scalar_mul_constraint>")
4265 (match_operand:SI 3 "immediate_operand" "i")]
4266 UNSPEC_VMUL_LANE))]
4267 "TARGET_NEON"
4268 {
4269 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4270 }
4271 [(set (attr "type")
4272 (if_then_else (match_test "<Is_float_mode>")
4273 (const_string "neon_fp_mul_s_scalar<q>")
4274 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4275 )
4276
4277 (define_insn "neon_vmul_lane<mode>"
4278 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4279 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4280 (match_operand:<V_HALF> 2 "s_register_operand"
4281 "<scalar_mul_constraint>")
4282 (match_operand:SI 3 "immediate_operand" "i")]
4283 UNSPEC_VMUL_LANE))]
4284 "TARGET_NEON"
4285 {
4286 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4287 }
4288 [(set (attr "type")
4289 (if_then_else (match_test "<Is_float_mode>")
4290 (const_string "neon_fp_mul_s_scalar<q>")
4291 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4292 )
4293
4294 (define_insn "neon_vmul_lane<mode>"
4295 [(set (match_operand:VH 0 "s_register_operand" "=w")
4296 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4297 (match_operand:V4HF 2 "s_register_operand"
4298 "<scalar_mul_constraint>")
4299 (match_operand:SI 3 "immediate_operand" "i")]
4300 UNSPEC_VMUL_LANE))]
4301 "TARGET_NEON_FP16INST"
4302 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4303 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4304 )
4305
4306 (define_insn "neon_vmull<sup>_lane<mode>"
4307 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4308 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4309 (match_operand:VMDI 2 "s_register_operand"
4310 "<scalar_mul_constraint>")
4311 (match_operand:SI 3 "immediate_operand" "i")]
4312 VMULL_LANE))]
4313 "TARGET_NEON"
4314 {
4315 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4316 }
4317 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4318 )
4319
4320 (define_insn "neon_vqdmull_lane<mode>"
4321 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4322 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4323 (match_operand:VMDI 2 "s_register_operand"
4324 "<scalar_mul_constraint>")
4325 (match_operand:SI 3 "immediate_operand" "i")]
4326 UNSPEC_VQDMULL_LANE))]
4327 "TARGET_NEON"
4328 {
4329 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4330 }
4331 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4332 )
4333
4334 (define_insn "neon_vq<r>dmulh_lane<mode>"
4335 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4336 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4337 (match_operand:<V_HALF> 2 "s_register_operand"
4338 "<scalar_mul_constraint>")
4339 (match_operand:SI 3 "immediate_operand" "i")]
4340 VQDMULH_LANE))]
4341 "TARGET_NEON"
4342 {
4343 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4344 }
4345 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4346 )
4347
4348 (define_insn "neon_vq<r>dmulh_lane<mode>"
4349 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4350 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4351 (match_operand:VMDI 2 "s_register_operand"
4352 "<scalar_mul_constraint>")
4353 (match_operand:SI 3 "immediate_operand" "i")]
4354 VQDMULH_LANE))]
4355 "TARGET_NEON"
4356 {
4357 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4358 }
4359 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4360 )
4361
4362 ;; vqrdmlah_lane, vqrdmlsh_lane
4363 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4364 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4365 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4366 (match_operand:VMQI 2 "s_register_operand" "w")
4367 (match_operand:<V_HALF> 3 "s_register_operand"
4368 "<scalar_mul_constraint>")
4369 (match_operand:SI 4 "immediate_operand" "i")]
4370 VQRDMLH_AS))]
4371 "TARGET_NEON_RDMA"
4372 {
4373 return
4374 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4375 }
4376 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4377 )
4378
4379 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4380 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4381 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4382 (match_operand:VMDI 2 "s_register_operand" "w")
4383 (match_operand:VMDI 3 "s_register_operand"
4384 "<scalar_mul_constraint>")
4385 (match_operand:SI 4 "immediate_operand" "i")]
4386 VQRDMLH_AS))]
4387 "TARGET_NEON_RDMA"
4388 {
4389 return
4390 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4391 }
4392 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4393 )
4394
4395 (define_insn "neon_vmla_lane<mode>"
4396 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4397 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4398 (match_operand:VMD 2 "s_register_operand" "w")
4399 (match_operand:VMD 3 "s_register_operand"
4400 "<scalar_mul_constraint>")
4401 (match_operand:SI 4 "immediate_operand" "i")]
4402 UNSPEC_VMLA_LANE))]
4403 "TARGET_NEON"
4404 {
4405 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4406 }
4407 [(set (attr "type")
4408 (if_then_else (match_test "<Is_float_mode>")
4409 (const_string "neon_fp_mla_s_scalar<q>")
4410 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4411 )
4412
4413 (define_insn "neon_vmla_lane<mode>"
4414 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4415 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4416 (match_operand:VMQ 2 "s_register_operand" "w")
4417 (match_operand:<V_HALF> 3 "s_register_operand"
4418 "<scalar_mul_constraint>")
4419 (match_operand:SI 4 "immediate_operand" "i")]
4420 UNSPEC_VMLA_LANE))]
4421 "TARGET_NEON"
4422 {
4423 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4424 }
4425 [(set (attr "type")
4426 (if_then_else (match_test "<Is_float_mode>")
4427 (const_string "neon_fp_mla_s_scalar<q>")
4428 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4429 )
4430
4431 (define_insn "neon_vmlal<sup>_lane<mode>"
4432 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4433 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4434 (match_operand:VMDI 2 "s_register_operand" "w")
4435 (match_operand:VMDI 3 "s_register_operand"
4436 "<scalar_mul_constraint>")
4437 (match_operand:SI 4 "immediate_operand" "i")]
4438 VMLAL_LANE))]
4439 "TARGET_NEON"
4440 {
4441 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4442 }
4443 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4444 )
4445
4446 (define_insn "neon_vqdmlal_lane<mode>"
4447 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4448 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4449 (match_operand:VMDI 2 "s_register_operand" "w")
4450 (match_operand:VMDI 3 "s_register_operand"
4451 "<scalar_mul_constraint>")
4452 (match_operand:SI 4 "immediate_operand" "i")]
4453 UNSPEC_VQDMLAL_LANE))]
4454 "TARGET_NEON"
4455 {
4456 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4457 }
4458 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4459 )
4460
4461 (define_insn "neon_vmls_lane<mode>"
4462 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4463 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4464 (match_operand:VMD 2 "s_register_operand" "w")
4465 (match_operand:VMD 3 "s_register_operand"
4466 "<scalar_mul_constraint>")
4467 (match_operand:SI 4 "immediate_operand" "i")]
4468 UNSPEC_VMLS_LANE))]
4469 "TARGET_NEON"
4470 {
4471 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4472 }
4473 [(set (attr "type")
4474 (if_then_else (match_test "<Is_float_mode>")
4475 (const_string "neon_fp_mla_s_scalar<q>")
4476 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4477 )
4478
4479 (define_insn "neon_vmls_lane<mode>"
4480 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4481 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4482 (match_operand:VMQ 2 "s_register_operand" "w")
4483 (match_operand:<V_HALF> 3 "s_register_operand"
4484 "<scalar_mul_constraint>")
4485 (match_operand:SI 4 "immediate_operand" "i")]
4486 UNSPEC_VMLS_LANE))]
4487 "TARGET_NEON"
4488 {
4489 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4490 }
4491 [(set (attr "type")
4492 (if_then_else (match_test "<Is_float_mode>")
4493 (const_string "neon_fp_mla_s_scalar<q>")
4494 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4495 )
4496
4497 (define_insn "neon_vmlsl<sup>_lane<mode>"
4498 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4499 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4500 (match_operand:VMDI 2 "s_register_operand" "w")
4501 (match_operand:VMDI 3 "s_register_operand"
4502 "<scalar_mul_constraint>")
4503 (match_operand:SI 4 "immediate_operand" "i")]
4504 VMLSL_LANE))]
4505 "TARGET_NEON"
4506 {
4507 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4508 }
4509 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4510 )
4511
4512 (define_insn "neon_vqdmlsl_lane<mode>"
4513 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4514 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4515 (match_operand:VMDI 2 "s_register_operand" "w")
4516 (match_operand:VMDI 3 "s_register_operand"
4517 "<scalar_mul_constraint>")
4518 (match_operand:SI 4 "immediate_operand" "i")]
4519 UNSPEC_VQDMLSL_LANE))]
4520 "TARGET_NEON"
4521 {
4522 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4523 }
4524 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4525 )
4526
4527 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4528 ; core register into a temp register, then use a scalar taken from that. This
4529 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4530 ; or extracted from another vector. The latter case it's currently better to
4531 ; use the "_lane" variant, and the former case can probably be implemented
4532 ; using vld1_lane, but that hasn't been done yet.
4533
4534 (define_expand "neon_vmul_n<mode>"
4535 [(match_operand:VMD 0 "s_register_operand" "")
4536 (match_operand:VMD 1 "s_register_operand" "")
4537 (match_operand:<V_elem> 2 "s_register_operand" "")]
4538 "TARGET_NEON"
4539 {
4540 rtx tmp = gen_reg_rtx (<MODE>mode);
4541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4542 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4543 const0_rtx));
4544 DONE;
4545 })
4546
4547 (define_expand "neon_vmul_n<mode>"
4548 [(match_operand:VMQ 0 "s_register_operand" "")
4549 (match_operand:VMQ 1 "s_register_operand" "")
4550 (match_operand:<V_elem> 2 "s_register_operand" "")]
4551 "TARGET_NEON"
4552 {
4553 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4554 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4555 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4556 const0_rtx));
4557 DONE;
4558 })
4559
4560 (define_expand "neon_vmul_n<mode>"
4561 [(match_operand:VH 0 "s_register_operand")
4562 (match_operand:VH 1 "s_register_operand")
4563 (match_operand:<V_elem> 2 "s_register_operand")]
4564 "TARGET_NEON_FP16INST"
4565 {
4566 rtx tmp = gen_reg_rtx (V4HFmode);
4567 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4568 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4569 const0_rtx));
4570 DONE;
4571 })
4572
4573 (define_expand "neon_vmulls_n<mode>"
4574 [(match_operand:<V_widen> 0 "s_register_operand" "")
4575 (match_operand:VMDI 1 "s_register_operand" "")
4576 (match_operand:<V_elem> 2 "s_register_operand" "")]
4577 "TARGET_NEON"
4578 {
4579 rtx tmp = gen_reg_rtx (<MODE>mode);
4580 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4581 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4582 const0_rtx));
4583 DONE;
4584 })
4585
4586 (define_expand "neon_vmullu_n<mode>"
4587 [(match_operand:<V_widen> 0 "s_register_operand" "")
4588 (match_operand:VMDI 1 "s_register_operand" "")
4589 (match_operand:<V_elem> 2 "s_register_operand" "")]
4590 "TARGET_NEON"
4591 {
4592 rtx tmp = gen_reg_rtx (<MODE>mode);
4593 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4594 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4595 const0_rtx));
4596 DONE;
4597 })
4598
4599 (define_expand "neon_vqdmull_n<mode>"
4600 [(match_operand:<V_widen> 0 "s_register_operand" "")
4601 (match_operand:VMDI 1 "s_register_operand" "")
4602 (match_operand:<V_elem> 2 "s_register_operand" "")]
4603 "TARGET_NEON"
4604 {
4605 rtx tmp = gen_reg_rtx (<MODE>mode);
4606 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4607 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4608 const0_rtx));
4609 DONE;
4610 })
4611
4612 (define_expand "neon_vqdmulh_n<mode>"
4613 [(match_operand:VMDI 0 "s_register_operand" "")
4614 (match_operand:VMDI 1 "s_register_operand" "")
4615 (match_operand:<V_elem> 2 "s_register_operand" "")]
4616 "TARGET_NEON"
4617 {
4618 rtx tmp = gen_reg_rtx (<MODE>mode);
4619 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4620 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4621 const0_rtx));
4622 DONE;
4623 })
4624
4625 (define_expand "neon_vqrdmulh_n<mode>"
4626 [(match_operand:VMDI 0 "s_register_operand" "")
4627 (match_operand:VMDI 1 "s_register_operand" "")
4628 (match_operand:<V_elem> 2 "s_register_operand" "")]
4629 "TARGET_NEON"
4630 {
4631 rtx tmp = gen_reg_rtx (<MODE>mode);
4632 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4633 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4634 const0_rtx));
4635 DONE;
4636 })
4637
4638 (define_expand "neon_vqdmulh_n<mode>"
4639 [(match_operand:VMQI 0 "s_register_operand" "")
4640 (match_operand:VMQI 1 "s_register_operand" "")
4641 (match_operand:<V_elem> 2 "s_register_operand" "")]
4642 "TARGET_NEON"
4643 {
4644 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4645 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4646 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4647 const0_rtx));
4648 DONE;
4649 })
4650
4651 (define_expand "neon_vqrdmulh_n<mode>"
4652 [(match_operand:VMQI 0 "s_register_operand" "")
4653 (match_operand:VMQI 1 "s_register_operand" "")
4654 (match_operand:<V_elem> 2 "s_register_operand" "")]
4655 "TARGET_NEON"
4656 {
4657 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4658 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4659 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4660 const0_rtx));
4661 DONE;
4662 })
4663
4664 (define_expand "neon_vmla_n<mode>"
4665 [(match_operand:VMD 0 "s_register_operand" "")
4666 (match_operand:VMD 1 "s_register_operand" "")
4667 (match_operand:VMD 2 "s_register_operand" "")
4668 (match_operand:<V_elem> 3 "s_register_operand" "")]
4669 "TARGET_NEON"
4670 {
4671 rtx tmp = gen_reg_rtx (<MODE>mode);
4672 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4673 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4674 tmp, const0_rtx));
4675 DONE;
4676 })
4677
4678 (define_expand "neon_vmla_n<mode>"
4679 [(match_operand:VMQ 0 "s_register_operand" "")
4680 (match_operand:VMQ 1 "s_register_operand" "")
4681 (match_operand:VMQ 2 "s_register_operand" "")
4682 (match_operand:<V_elem> 3 "s_register_operand" "")]
4683 "TARGET_NEON"
4684 {
4685 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4686 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4687 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4688 tmp, const0_rtx));
4689 DONE;
4690 })
4691
4692 (define_expand "neon_vmlals_n<mode>"
4693 [(match_operand:<V_widen> 0 "s_register_operand" "")
4694 (match_operand:<V_widen> 1 "s_register_operand" "")
4695 (match_operand:VMDI 2 "s_register_operand" "")
4696 (match_operand:<V_elem> 3 "s_register_operand" "")]
4697 "TARGET_NEON"
4698 {
4699 rtx tmp = gen_reg_rtx (<MODE>mode);
4700 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4701 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4702 tmp, const0_rtx));
4703 DONE;
4704 })
4705
4706 (define_expand "neon_vmlalu_n<mode>"
4707 [(match_operand:<V_widen> 0 "s_register_operand" "")
4708 (match_operand:<V_widen> 1 "s_register_operand" "")
4709 (match_operand:VMDI 2 "s_register_operand" "")
4710 (match_operand:<V_elem> 3 "s_register_operand" "")]
4711 "TARGET_NEON"
4712 {
4713 rtx tmp = gen_reg_rtx (<MODE>mode);
4714 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4715 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4716 tmp, const0_rtx));
4717 DONE;
4718 })
4719
4720 (define_expand "neon_vqdmlal_n<mode>"
4721 [(match_operand:<V_widen> 0 "s_register_operand" "")
4722 (match_operand:<V_widen> 1 "s_register_operand" "")
4723 (match_operand:VMDI 2 "s_register_operand" "")
4724 (match_operand:<V_elem> 3 "s_register_operand" "")]
4725 "TARGET_NEON"
4726 {
4727 rtx tmp = gen_reg_rtx (<MODE>mode);
4728 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4729 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4730 tmp, const0_rtx));
4731 DONE;
4732 })
4733
4734 (define_expand "neon_vmls_n<mode>"
4735 [(match_operand:VMD 0 "s_register_operand" "")
4736 (match_operand:VMD 1 "s_register_operand" "")
4737 (match_operand:VMD 2 "s_register_operand" "")
4738 (match_operand:<V_elem> 3 "s_register_operand" "")]
4739 "TARGET_NEON"
4740 {
4741 rtx tmp = gen_reg_rtx (<MODE>mode);
4742 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4743 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4744 tmp, const0_rtx));
4745 DONE;
4746 })
4747
4748 (define_expand "neon_vmls_n<mode>"
4749 [(match_operand:VMQ 0 "s_register_operand" "")
4750 (match_operand:VMQ 1 "s_register_operand" "")
4751 (match_operand:VMQ 2 "s_register_operand" "")
4752 (match_operand:<V_elem> 3 "s_register_operand" "")]
4753 "TARGET_NEON"
4754 {
4755 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4756 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4757 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4758 tmp, const0_rtx));
4759 DONE;
4760 })
4761
4762 (define_expand "neon_vmlsls_n<mode>"
4763 [(match_operand:<V_widen> 0 "s_register_operand" "")
4764 (match_operand:<V_widen> 1 "s_register_operand" "")
4765 (match_operand:VMDI 2 "s_register_operand" "")
4766 (match_operand:<V_elem> 3 "s_register_operand" "")]
4767 "TARGET_NEON"
4768 {
4769 rtx tmp = gen_reg_rtx (<MODE>mode);
4770 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4771 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4772 tmp, const0_rtx));
4773 DONE;
4774 })
4775
4776 (define_expand "neon_vmlslu_n<mode>"
4777 [(match_operand:<V_widen> 0 "s_register_operand" "")
4778 (match_operand:<V_widen> 1 "s_register_operand" "")
4779 (match_operand:VMDI 2 "s_register_operand" "")
4780 (match_operand:<V_elem> 3 "s_register_operand" "")]
4781 "TARGET_NEON"
4782 {
4783 rtx tmp = gen_reg_rtx (<MODE>mode);
4784 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4785 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4786 tmp, const0_rtx));
4787 DONE;
4788 })
4789
4790 (define_expand "neon_vqdmlsl_n<mode>"
4791 [(match_operand:<V_widen> 0 "s_register_operand" "")
4792 (match_operand:<V_widen> 1 "s_register_operand" "")
4793 (match_operand:VMDI 2 "s_register_operand" "")
4794 (match_operand:<V_elem> 3 "s_register_operand" "")]
4795 "TARGET_NEON"
4796 {
4797 rtx tmp = gen_reg_rtx (<MODE>mode);
4798 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4799 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4800 tmp, const0_rtx));
4801 DONE;
4802 })
4803
4804 (define_insn "neon_vext<mode>"
4805 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4806 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4807 (match_operand:VDQX 2 "s_register_operand" "w")
4808 (match_operand:SI 3 "immediate_operand" "i")]
4809 UNSPEC_VEXT))]
4810 "TARGET_NEON"
4811 {
4812 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4813 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4814 }
4815 [(set_attr "type" "neon_ext<q>")]
4816 )
4817
4818 (define_insn "neon_vrev64<mode>"
4819 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4820 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4821 UNSPEC_VREV64))]
4822 "TARGET_NEON"
4823 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4824 [(set_attr "type" "neon_rev<q>")]
4825 )
4826
4827 (define_insn "neon_vrev32<mode>"
4828 [(set (match_operand:VX 0 "s_register_operand" "=w")
4829 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4830 UNSPEC_VREV32))]
4831 "TARGET_NEON"
4832 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4833 [(set_attr "type" "neon_rev<q>")]
4834 )
4835
4836 (define_insn "neon_vrev16<mode>"
4837 [(set (match_operand:VE 0 "s_register_operand" "=w")
4838 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4839 UNSPEC_VREV16))]
4840 "TARGET_NEON"
4841 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4842 [(set_attr "type" "neon_rev<q>")]
4843 )
4844
4845 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4846 ; allocation. For an intrinsic of form:
4847 ; rD = vbsl_* (rS, rN, rM)
4848 ; We can use any of:
4849 ; vbsl rS, rN, rM (if D = S)
4850 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4851 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4852
4853 (define_insn "neon_vbsl<mode>_internal"
4854 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4855 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4856 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4857 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4858 UNSPEC_VBSL))]
4859 "TARGET_NEON"
4860 "@
4861 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4862 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4863 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4864 [(set_attr "type" "neon_bsl<q>")]
4865 )
4866
4867 (define_expand "neon_vbsl<mode>"
4868 [(set (match_operand:VDQX 0 "s_register_operand" "")
4869 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4870 (match_operand:VDQX 2 "s_register_operand" "")
4871 (match_operand:VDQX 3 "s_register_operand" "")]
4872 UNSPEC_VBSL))]
4873 "TARGET_NEON"
4874 {
4875 /* We can't alias operands together if they have different modes. */
4876 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4877 })
4878
4879 ;; vshl, vrshl
4880 (define_insn "neon_v<shift_op><sup><mode>"
4881 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4882 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4883 (match_operand:VDQIX 2 "s_register_operand" "w")]
4884 VSHL))]
4885 "TARGET_NEON"
4886 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4887 [(set_attr "type" "neon_shift_imm<q>")]
4888 )
4889
4890 ;; vqshl, vqrshl
4891 (define_insn "neon_v<shift_op><sup><mode>"
4892 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4893 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4894 (match_operand:VDQIX 2 "s_register_operand" "w")]
4895 VQSHL))]
4896 "TARGET_NEON"
4897 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4898 [(set_attr "type" "neon_sat_shift_imm<q>")]
4899 )
4900
4901 ;; vshr_n, vrshr_n
4902 (define_insn "neon_v<shift_op><sup>_n<mode>"
4903 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4904 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4905 (match_operand:SI 2 "immediate_operand" "i")]
4906 VSHR_N))]
4907 "TARGET_NEON"
4908 {
4909 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4910 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4911 }
4912 [(set_attr "type" "neon_shift_imm<q>")]
4913 )
4914
4915 ;; vshrn_n, vrshrn_n
4916 (define_insn "neon_v<shift_op>_n<mode>"
4917 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4918 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4919 (match_operand:SI 2 "immediate_operand" "i")]
4920 VSHRN_N))]
4921 "TARGET_NEON"
4922 {
4923 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4924 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4925 }
4926 [(set_attr "type" "neon_shift_imm_narrow_q")]
4927 )
4928
4929 ;; vqshrn_n, vqrshrn_n
4930 (define_insn "neon_v<shift_op><sup>_n<mode>"
4931 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4932 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4933 (match_operand:SI 2 "immediate_operand" "i")]
4934 VQSHRN_N))]
4935 "TARGET_NEON"
4936 {
4937 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4938 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4939 }
4940 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4941 )
4942
4943 ;; vqshrun_n, vqrshrun_n
4944 (define_insn "neon_v<shift_op>_n<mode>"
4945 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4946 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4947 (match_operand:SI 2 "immediate_operand" "i")]
4948 VQSHRUN_N))]
4949 "TARGET_NEON"
4950 {
4951 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4952 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4953 }
4954 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4955 )
4956
4957 (define_insn "neon_vshl_n<mode>"
4958 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4959 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4960 (match_operand:SI 2 "immediate_operand" "i")]
4961 UNSPEC_VSHL_N))]
4962 "TARGET_NEON"
4963 {
4964 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4965 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4966 }
4967 [(set_attr "type" "neon_shift_imm<q>")]
4968 )
4969
4970 (define_insn "neon_vqshl_<sup>_n<mode>"
4971 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4972 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4973 (match_operand:SI 2 "immediate_operand" "i")]
4974 VQSHL_N))]
4975 "TARGET_NEON"
4976 {
4977 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4978 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4979 }
4980 [(set_attr "type" "neon_sat_shift_imm<q>")]
4981 )
4982
4983 (define_insn "neon_vqshlu_n<mode>"
4984 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4985 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4986 (match_operand:SI 2 "immediate_operand" "i")]
4987 UNSPEC_VQSHLU_N))]
4988 "TARGET_NEON"
4989 {
4990 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4991 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4992 }
4993 [(set_attr "type" "neon_sat_shift_imm<q>")]
4994 )
4995
4996 (define_insn "neon_vshll<sup>_n<mode>"
4997 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4998 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4999 (match_operand:SI 2 "immediate_operand" "i")]
5000 VSHLL_N))]
5001 "TARGET_NEON"
5002 {
5003 /* The boundaries are: 0 < imm <= size. */
5004 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5005 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5006 }
5007 [(set_attr "type" "neon_shift_imm_long")]
5008 )
5009
5010 ;; vsra_n, vrsra_n
5011 (define_insn "neon_v<shift_op><sup>_n<mode>"
5012 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5013 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5014 (match_operand:VDQIX 2 "s_register_operand" "w")
5015 (match_operand:SI 3 "immediate_operand" "i")]
5016 VSRA_N))]
5017 "TARGET_NEON"
5018 {
5019 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5020 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5021 }
5022 [(set_attr "type" "neon_shift_acc<q>")]
5023 )
5024
5025 (define_insn "neon_vsri_n<mode>"
5026 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5027 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5028 (match_operand:VDQIX 2 "s_register_operand" "w")
5029 (match_operand:SI 3 "immediate_operand" "i")]
5030 UNSPEC_VSRI))]
5031 "TARGET_NEON"
5032 {
5033 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5034 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5035 }
5036 [(set_attr "type" "neon_shift_reg<q>")]
5037 )
5038
5039 (define_insn "neon_vsli_n<mode>"
5040 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5041 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5042 (match_operand:VDQIX 2 "s_register_operand" "w")
5043 (match_operand:SI 3 "immediate_operand" "i")]
5044 UNSPEC_VSLI))]
5045 "TARGET_NEON"
5046 {
5047 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5048 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5049 }
5050 [(set_attr "type" "neon_shift_reg<q>")]
5051 )
5052
5053 (define_insn "neon_vtbl1v8qi"
5054 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5055 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5056 (match_operand:V8QI 2 "s_register_operand" "w")]
5057 UNSPEC_VTBL))]
5058 "TARGET_NEON"
5059 "vtbl.8\t%P0, {%P1}, %P2"
5060 [(set_attr "type" "neon_tbl1")]
5061 )
5062
5063 (define_insn "neon_vtbl2v8qi"
5064 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5065 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5066 (match_operand:V8QI 2 "s_register_operand" "w")]
5067 UNSPEC_VTBL))]
5068 "TARGET_NEON"
5069 {
5070 rtx ops[4];
5071 int tabbase = REGNO (operands[1]);
5072
5073 ops[0] = operands[0];
5074 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5075 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5076 ops[3] = operands[2];
5077 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5078
5079 return "";
5080 }
5081 [(set_attr "type" "neon_tbl2")]
5082 )
5083
5084 (define_insn "neon_vtbl3v8qi"
5085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5086 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5087 (match_operand:V8QI 2 "s_register_operand" "w")]
5088 UNSPEC_VTBL))]
5089 "TARGET_NEON"
5090 {
5091 rtx ops[5];
5092 int tabbase = REGNO (operands[1]);
5093
5094 ops[0] = operands[0];
5095 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5096 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5097 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5098 ops[4] = operands[2];
5099 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5100
5101 return "";
5102 }
5103 [(set_attr "type" "neon_tbl3")]
5104 )
5105
5106 (define_insn "neon_vtbl4v8qi"
5107 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5108 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5109 (match_operand:V8QI 2 "s_register_operand" "w")]
5110 UNSPEC_VTBL))]
5111 "TARGET_NEON"
5112 {
5113 rtx ops[6];
5114 int tabbase = REGNO (operands[1]);
5115
5116 ops[0] = operands[0];
5117 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5118 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5119 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5120 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5121 ops[5] = operands[2];
5122 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5123
5124 return "";
5125 }
5126 [(set_attr "type" "neon_tbl4")]
5127 )
5128
5129 ;; These three are used by the vec_perm infrastructure for V16QImode.
5130 (define_insn_and_split "neon_vtbl1v16qi"
5131 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5132 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5133 (match_operand:V16QI 2 "s_register_operand" "w")]
5134 UNSPEC_VTBL))]
5135 "TARGET_NEON"
5136 "#"
5137 "&& reload_completed"
5138 [(const_int 0)]
5139 {
5140 rtx op0, op1, op2, part0, part2;
5141 unsigned ofs;
5142
5143 op0 = operands[0];
5144 op1 = gen_lowpart (TImode, operands[1]);
5145 op2 = operands[2];
5146
5147 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5148 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5149 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5150 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5151
5152 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5153 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5154 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5155 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5156 DONE;
5157 }
5158 [(set_attr "type" "multiple")]
5159 )
5160
5161 (define_insn_and_split "neon_vtbl2v16qi"
5162 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5163 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5164 (match_operand:V16QI 2 "s_register_operand" "w")]
5165 UNSPEC_VTBL))]
5166 "TARGET_NEON"
5167 "#"
5168 "&& reload_completed"
5169 [(const_int 0)]
5170 {
5171 rtx op0, op1, op2, part0, part2;
5172 unsigned ofs;
5173
5174 op0 = operands[0];
5175 op1 = operands[1];
5176 op2 = operands[2];
5177
5178 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5179 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5180 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5181 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5182
5183 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5184 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5185 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5186 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5187 DONE;
5188 }
5189 [(set_attr "type" "multiple")]
5190 )
5191
5192 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5193 ;; handle quad-word input modes, producing octa-word output modes. But
5194 ;; that requires us to add support for octa-word vector modes in moves.
5195 ;; That seems overkill for this one use in vec_perm.
5196 (define_insn_and_split "neon_vcombinev16qi"
5197 [(set (match_operand:OI 0 "s_register_operand" "=w")
5198 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5199 (match_operand:V16QI 2 "s_register_operand" "w")]
5200 UNSPEC_VCONCAT))]
5201 "TARGET_NEON"
5202 "#"
5203 "&& reload_completed"
5204 [(const_int 0)]
5205 {
5206 neon_split_vcombine (operands);
5207 DONE;
5208 }
5209 [(set_attr "type" "multiple")]
5210 )
5211
5212 (define_insn "neon_vtbx1v8qi"
5213 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5214 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5215 (match_operand:V8QI 2 "s_register_operand" "w")
5216 (match_operand:V8QI 3 "s_register_operand" "w")]
5217 UNSPEC_VTBX))]
5218 "TARGET_NEON"
5219 "vtbx.8\t%P0, {%P2}, %P3"
5220 [(set_attr "type" "neon_tbl1")]
5221 )
5222
5223 (define_insn "neon_vtbx2v8qi"
5224 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5225 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5226 (match_operand:TI 2 "s_register_operand" "w")
5227 (match_operand:V8QI 3 "s_register_operand" "w")]
5228 UNSPEC_VTBX))]
5229 "TARGET_NEON"
5230 {
5231 rtx ops[4];
5232 int tabbase = REGNO (operands[2]);
5233
5234 ops[0] = operands[0];
5235 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5236 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5237 ops[3] = operands[3];
5238 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5239
5240 return "";
5241 }
5242 [(set_attr "type" "neon_tbl2")]
5243 )
5244
5245 (define_insn "neon_vtbx3v8qi"
5246 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5247 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5248 (match_operand:EI 2 "s_register_operand" "w")
5249 (match_operand:V8QI 3 "s_register_operand" "w")]
5250 UNSPEC_VTBX))]
5251 "TARGET_NEON"
5252 {
5253 rtx ops[5];
5254 int tabbase = REGNO (operands[2]);
5255
5256 ops[0] = operands[0];
5257 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5258 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5259 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5260 ops[4] = operands[3];
5261 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5262
5263 return "";
5264 }
5265 [(set_attr "type" "neon_tbl3")]
5266 )
5267
5268 (define_insn "neon_vtbx4v8qi"
5269 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5270 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5271 (match_operand:OI 2 "s_register_operand" "w")
5272 (match_operand:V8QI 3 "s_register_operand" "w")]
5273 UNSPEC_VTBX))]
5274 "TARGET_NEON"
5275 {
5276 rtx ops[6];
5277 int tabbase = REGNO (operands[2]);
5278
5279 ops[0] = operands[0];
5280 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5281 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5282 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5283 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5284 ops[5] = operands[3];
5285 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5286
5287 return "";
5288 }
5289 [(set_attr "type" "neon_tbl4")]
5290 )
5291
5292 (define_expand "neon_vtrn<mode>_internal"
5293 [(parallel
5294 [(set (match_operand:VDQWH 0 "s_register_operand")
5295 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5296 (match_operand:VDQWH 2 "s_register_operand")]
5297 UNSPEC_VTRN1))
5298 (set (match_operand:VDQWH 3 "s_register_operand")
5299 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5300 "TARGET_NEON"
5301 ""
5302 )
5303
5304 ;; Note: Different operand numbering to handle tied registers correctly.
5305 (define_insn "*neon_vtrn<mode>_insn"
5306 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5307 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5308 (match_operand:VDQWH 3 "s_register_operand" "2")]
5309 UNSPEC_VTRN1))
5310 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5311 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5312 UNSPEC_VTRN2))]
5313 "TARGET_NEON"
5314 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5315 [(set_attr "type" "neon_permute<q>")]
5316 )
5317
5318 (define_expand "neon_vzip<mode>_internal"
5319 [(parallel
5320 [(set (match_operand:VDQWH 0 "s_register_operand")
5321 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5322 (match_operand:VDQWH 2 "s_register_operand")]
5323 UNSPEC_VZIP1))
5324 (set (match_operand:VDQWH 3 "s_register_operand")
5325 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5326 "TARGET_NEON"
5327 ""
5328 )
5329
5330 ;; Note: Different operand numbering to handle tied registers correctly.
5331 (define_insn "*neon_vzip<mode>_insn"
5332 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5333 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5334 (match_operand:VDQWH 3 "s_register_operand" "2")]
5335 UNSPEC_VZIP1))
5336 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5337 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5338 UNSPEC_VZIP2))]
5339 "TARGET_NEON"
5340 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5341 [(set_attr "type" "neon_zip<q>")]
5342 )
5343
5344 (define_expand "neon_vuzp<mode>_internal"
5345 [(parallel
5346 [(set (match_operand:VDQWH 0 "s_register_operand")
5347 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5348 (match_operand:VDQWH 2 "s_register_operand")]
5349 UNSPEC_VUZP1))
5350 (set (match_operand:VDQWH 3 "s_register_operand" "")
5351 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5352 "TARGET_NEON"
5353 ""
5354 )
5355
5356 ;; Note: Different operand numbering to handle tied registers correctly.
5357 (define_insn "*neon_vuzp<mode>_insn"
5358 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5359 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5360 (match_operand:VDQWH 3 "s_register_operand" "2")]
5361 UNSPEC_VUZP1))
5362 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5363 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5364 UNSPEC_VUZP2))]
5365 "TARGET_NEON"
5366 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5367 [(set_attr "type" "neon_zip<q>")]
5368 )
5369
5370 (define_expand "vec_load_lanes<mode><mode>"
5371 [(set (match_operand:VDQX 0 "s_register_operand")
5372 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5373 UNSPEC_VLD1))]
5374 "TARGET_NEON")
5375
5376 (define_insn "neon_vld1<mode>"
5377 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5378 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5379 UNSPEC_VLD1))]
5380 "TARGET_NEON"
5381 "vld1.<V_sz_elem>\t%h0, %A1"
5382 [(set_attr "type" "neon_load1_1reg<q>")]
5383 )
5384
5385 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5386 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5387 ;; lane order here.
5388 (define_insn "neon_vld1_lane<mode>"
5389 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5390 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5391 (match_operand:VDX 2 "s_register_operand" "0")
5392 (match_operand:SI 3 "immediate_operand" "i")]
5393 UNSPEC_VLD1_LANE))]
5394 "TARGET_NEON"
5395 {
5396 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5398 operands[3] = GEN_INT (lane);
5399 if (max == 1)
5400 return "vld1.<V_sz_elem>\t%P0, %A1";
5401 else
5402 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5403 }
5404 [(set_attr "type" "neon_load1_one_lane<q>")]
5405 )
5406
5407 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5408 ;; here on big endian targets.
5409 (define_insn "neon_vld1_lane<mode>"
5410 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5411 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5412 (match_operand:VQX 2 "s_register_operand" "0")
5413 (match_operand:SI 3 "immediate_operand" "i")]
5414 UNSPEC_VLD1_LANE))]
5415 "TARGET_NEON"
5416 {
5417 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5418 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5419 operands[3] = GEN_INT (lane);
5420 int regno = REGNO (operands[0]);
5421 if (lane >= max / 2)
5422 {
5423 lane -= max / 2;
5424 regno += 2;
5425 operands[3] = GEN_INT (lane);
5426 }
5427 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5428 if (max == 2)
5429 return "vld1.<V_sz_elem>\t%P0, %A1";
5430 else
5431 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5432 }
5433 [(set_attr "type" "neon_load1_one_lane<q>")]
5434 )
5435
5436 (define_insn "neon_vld1_dup<mode>"
5437 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5438 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5439 "TARGET_NEON"
5440 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5441 [(set_attr "type" "neon_load1_all_lanes<q>")]
5442 )
5443
5444 ;; Special case for DImode. Treat it exactly like a simple load.
5445 (define_expand "neon_vld1_dupdi"
5446 [(set (match_operand:DI 0 "s_register_operand" "")
5447 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5448 UNSPEC_VLD1))]
5449 "TARGET_NEON"
5450 ""
5451 )
5452
5453 (define_insn "neon_vld1_dup<mode>"
5454 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5455 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5456 "TARGET_NEON"
5457 {
5458 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5459 }
5460 [(set_attr "type" "neon_load1_all_lanes<q>")]
5461 )
5462
5463 (define_insn_and_split "neon_vld1_dupv2di"
5464 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5465 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5466 "TARGET_NEON"
5467 "#"
5468 "&& reload_completed"
5469 [(const_int 0)]
5470 {
5471 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5472 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5473 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5474 DONE;
5475 }
5476 [(set_attr "length" "8")
5477 (set_attr "type" "neon_load1_all_lanes_q")]
5478 )
5479
5480 (define_expand "vec_store_lanes<mode><mode>"
5481 [(set (match_operand:VDQX 0 "neon_struct_operand")
5482 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5483 UNSPEC_VST1))]
5484 "TARGET_NEON")
5485
5486 (define_insn "neon_vst1<mode>"
5487 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5488 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5489 UNSPEC_VST1))]
5490 "TARGET_NEON"
5491 "vst1.<V_sz_elem>\t%h1, %A0"
5492 [(set_attr "type" "neon_store1_1reg<q>")])
5493
5494 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5495 ;; here on big endian targets.
5496 (define_insn "neon_vst1_lane<mode>"
5497 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5498 (unspec:<V_elem>
5499 [(match_operand:VDX 1 "s_register_operand" "w")
5500 (match_operand:SI 2 "immediate_operand" "i")]
5501 UNSPEC_VST1_LANE))]
5502 "TARGET_NEON"
5503 {
5504 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5505 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5506 operands[2] = GEN_INT (lane);
5507 if (max == 1)
5508 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5509 else
5510 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5511 }
5512 [(set_attr "type" "neon_store1_one_lane<q>")]
5513 )
5514
5515 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5516 ;; here on big endian targets.
5517 (define_insn "neon_vst1_lane<mode>"
5518 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5519 (unspec:<V_elem>
5520 [(match_operand:VQX 1 "s_register_operand" "w")
5521 (match_operand:SI 2 "immediate_operand" "i")]
5522 UNSPEC_VST1_LANE))]
5523 "TARGET_NEON"
5524 {
5525 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5526 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5527 int regno = REGNO (operands[1]);
5528 if (lane >= max / 2)
5529 {
5530 lane -= max / 2;
5531 regno += 2;
5532 }
5533 operands[2] = GEN_INT (lane);
5534 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5535 if (max == 2)
5536 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5537 else
5538 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5539 }
5540 [(set_attr "type" "neon_store1_one_lane<q>")]
5541 )
5542
5543 (define_expand "vec_load_lanesti<mode>"
5544 [(set (match_operand:TI 0 "s_register_operand")
5545 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5546 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5547 UNSPEC_VLD2))]
5548 "TARGET_NEON")
5549
5550 (define_insn "neon_vld2<mode>"
5551 [(set (match_operand:TI 0 "s_register_operand" "=w")
5552 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5553 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5554 UNSPEC_VLD2))]
5555 "TARGET_NEON"
5556 {
5557 if (<V_sz_elem> == 64)
5558 return "vld1.64\t%h0, %A1";
5559 else
5560 return "vld2.<V_sz_elem>\t%h0, %A1";
5561 }
5562 [(set (attr "type")
5563 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5564 (const_string "neon_load1_2reg<q>")
5565 (const_string "neon_load2_2reg<q>")))]
5566 )
5567
5568 (define_expand "vec_load_lanesoi<mode>"
5569 [(set (match_operand:OI 0 "s_register_operand")
5570 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5571 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5572 UNSPEC_VLD2))]
5573 "TARGET_NEON")
5574
5575 (define_insn "neon_vld2<mode>"
5576 [(set (match_operand:OI 0 "s_register_operand" "=w")
5577 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5578 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579 UNSPEC_VLD2))]
5580 "TARGET_NEON"
5581 "vld2.<V_sz_elem>\t%h0, %A1"
5582 [(set_attr "type" "neon_load2_2reg_q")])
5583
5584 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5585 ;; here on big endian targets.
5586 (define_insn "neon_vld2_lane<mode>"
5587 [(set (match_operand:TI 0 "s_register_operand" "=w")
5588 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5589 (match_operand:TI 2 "s_register_operand" "0")
5590 (match_operand:SI 3 "immediate_operand" "i")
5591 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5592 UNSPEC_VLD2_LANE))]
5593 "TARGET_NEON"
5594 {
5595 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5596 int regno = REGNO (operands[0]);
5597 rtx ops[4];
5598 ops[0] = gen_rtx_REG (DImode, regno);
5599 ops[1] = gen_rtx_REG (DImode, regno + 2);
5600 ops[2] = operands[1];
5601 ops[3] = GEN_INT (lane);
5602 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5603 return "";
5604 }
5605 [(set_attr "type" "neon_load2_one_lane<q>")]
5606 )
5607
5608 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5609 ;; here on big endian targets.
5610 (define_insn "neon_vld2_lane<mode>"
5611 [(set (match_operand:OI 0 "s_register_operand" "=w")
5612 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5613 (match_operand:OI 2 "s_register_operand" "0")
5614 (match_operand:SI 3 "immediate_operand" "i")
5615 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5616 UNSPEC_VLD2_LANE))]
5617 "TARGET_NEON"
5618 {
5619 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5620 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5621 int regno = REGNO (operands[0]);
5622 rtx ops[4];
5623 if (lane >= max / 2)
5624 {
5625 lane -= max / 2;
5626 regno += 2;
5627 }
5628 ops[0] = gen_rtx_REG (DImode, regno);
5629 ops[1] = gen_rtx_REG (DImode, regno + 4);
5630 ops[2] = operands[1];
5631 ops[3] = GEN_INT (lane);
5632 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5633 return "";
5634 }
5635 [(set_attr "type" "neon_load2_one_lane<q>")]
5636 )
5637
5638 (define_insn "neon_vld2_dup<mode>"
5639 [(set (match_operand:TI 0 "s_register_operand" "=w")
5640 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5641 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5642 UNSPEC_VLD2_DUP))]
5643 "TARGET_NEON"
5644 {
5645 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5646 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5647 else
5648 return "vld1.<V_sz_elem>\t%h0, %A1";
5649 }
5650 [(set (attr "type")
5651 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5652 (const_string "neon_load2_all_lanes<q>")
5653 (const_string "neon_load1_1reg<q>")))]
5654 )
5655
5656 (define_expand "vec_store_lanesti<mode>"
5657 [(set (match_operand:TI 0 "neon_struct_operand")
5658 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5659 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5660 UNSPEC_VST2))]
5661 "TARGET_NEON")
5662
5663 (define_insn "neon_vst2<mode>"
5664 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5665 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5666 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5667 UNSPEC_VST2))]
5668 "TARGET_NEON"
5669 {
5670 if (<V_sz_elem> == 64)
5671 return "vst1.64\t%h1, %A0";
5672 else
5673 return "vst2.<V_sz_elem>\t%h1, %A0";
5674 }
5675 [(set (attr "type")
5676 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5677 (const_string "neon_store1_2reg<q>")
5678 (const_string "neon_store2_one_lane<q>")))]
5679 )
5680
5681 (define_expand "vec_store_lanesoi<mode>"
5682 [(set (match_operand:OI 0 "neon_struct_operand")
5683 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5684 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5685 UNSPEC_VST2))]
5686 "TARGET_NEON")
5687
5688 (define_insn "neon_vst2<mode>"
5689 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5690 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5691 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5692 UNSPEC_VST2))]
5693 "TARGET_NEON"
5694 "vst2.<V_sz_elem>\t%h1, %A0"
5695 [(set_attr "type" "neon_store2_4reg<q>")]
5696 )
5697
5698 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5699 ;; here on big endian targets.
5700 (define_insn "neon_vst2_lane<mode>"
5701 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5702 (unspec:<V_two_elem>
5703 [(match_operand:TI 1 "s_register_operand" "w")
5704 (match_operand:SI 2 "immediate_operand" "i")
5705 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5706 UNSPEC_VST2_LANE))]
5707 "TARGET_NEON"
5708 {
5709 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5710 int regno = REGNO (operands[1]);
5711 rtx ops[4];
5712 ops[0] = operands[0];
5713 ops[1] = gen_rtx_REG (DImode, regno);
5714 ops[2] = gen_rtx_REG (DImode, regno + 2);
5715 ops[3] = GEN_INT (lane);
5716 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5717 return "";
5718 }
5719 [(set_attr "type" "neon_store2_one_lane<q>")]
5720 )
5721
5722 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5723 ;; here on big endian targets.
5724 (define_insn "neon_vst2_lane<mode>"
5725 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5726 (unspec:<V_two_elem>
5727 [(match_operand:OI 1 "s_register_operand" "w")
5728 (match_operand:SI 2 "immediate_operand" "i")
5729 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5730 UNSPEC_VST2_LANE))]
5731 "TARGET_NEON"
5732 {
5733 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5734 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5735 int regno = REGNO (operands[1]);
5736 rtx ops[4];
5737 if (lane >= max / 2)
5738 {
5739 lane -= max / 2;
5740 regno += 2;
5741 }
5742 ops[0] = operands[0];
5743 ops[1] = gen_rtx_REG (DImode, regno);
5744 ops[2] = gen_rtx_REG (DImode, regno + 4);
5745 ops[3] = GEN_INT (lane);
5746 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5747 return "";
5748 }
5749 [(set_attr "type" "neon_store2_one_lane<q>")]
5750 )
5751
5752 (define_expand "vec_load_lanesei<mode>"
5753 [(set (match_operand:EI 0 "s_register_operand")
5754 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5755 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5756 UNSPEC_VLD3))]
5757 "TARGET_NEON")
5758
5759 (define_insn "neon_vld3<mode>"
5760 [(set (match_operand:EI 0 "s_register_operand" "=w")
5761 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5762 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5763 UNSPEC_VLD3))]
5764 "TARGET_NEON"
5765 {
5766 if (<V_sz_elem> == 64)
5767 return "vld1.64\t%h0, %A1";
5768 else
5769 return "vld3.<V_sz_elem>\t%h0, %A1";
5770 }
5771 [(set (attr "type")
5772 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5773 (const_string "neon_load1_3reg<q>")
5774 (const_string "neon_load3_3reg<q>")))]
5775 )
5776
5777 (define_expand "vec_load_lanesci<mode>"
5778 [(match_operand:CI 0 "s_register_operand")
5779 (match_operand:CI 1 "neon_struct_operand")
5780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5781 "TARGET_NEON"
5782 {
5783 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5784 DONE;
5785 })
5786
5787 (define_expand "neon_vld3<mode>"
5788 [(match_operand:CI 0 "s_register_operand")
5789 (match_operand:CI 1 "neon_struct_operand")
5790 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5791 "TARGET_NEON"
5792 {
5793 rtx mem;
5794
5795 mem = adjust_address (operands[1], EImode, 0);
5796 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5797 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5798 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5799 DONE;
5800 })
5801
5802 (define_insn "neon_vld3qa<mode>"
5803 [(set (match_operand:CI 0 "s_register_operand" "=w")
5804 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5805 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5806 UNSPEC_VLD3A))]
5807 "TARGET_NEON"
5808 {
5809 int regno = REGNO (operands[0]);
5810 rtx ops[4];
5811 ops[0] = gen_rtx_REG (DImode, regno);
5812 ops[1] = gen_rtx_REG (DImode, regno + 4);
5813 ops[2] = gen_rtx_REG (DImode, regno + 8);
5814 ops[3] = operands[1];
5815 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5816 return "";
5817 }
5818 [(set_attr "type" "neon_load3_3reg<q>")]
5819 )
5820
5821 (define_insn "neon_vld3qb<mode>"
5822 [(set (match_operand:CI 0 "s_register_operand" "=w")
5823 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5824 (match_operand:CI 2 "s_register_operand" "0")
5825 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5826 UNSPEC_VLD3B))]
5827 "TARGET_NEON"
5828 {
5829 int regno = REGNO (operands[0]);
5830 rtx ops[4];
5831 ops[0] = gen_rtx_REG (DImode, regno + 2);
5832 ops[1] = gen_rtx_REG (DImode, regno + 6);
5833 ops[2] = gen_rtx_REG (DImode, regno + 10);
5834 ops[3] = operands[1];
5835 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5836 return "";
5837 }
5838 [(set_attr "type" "neon_load3_3reg<q>")]
5839 )
5840
5841 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5842 ;; here on big endian targets.
5843 (define_insn "neon_vld3_lane<mode>"
5844 [(set (match_operand:EI 0 "s_register_operand" "=w")
5845 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5846 (match_operand:EI 2 "s_register_operand" "0")
5847 (match_operand:SI 3 "immediate_operand" "i")
5848 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5849 UNSPEC_VLD3_LANE))]
5850 "TARGET_NEON"
5851 {
5852 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5853 int regno = REGNO (operands[0]);
5854 rtx ops[5];
5855 ops[0] = gen_rtx_REG (DImode, regno);
5856 ops[1] = gen_rtx_REG (DImode, regno + 2);
5857 ops[2] = gen_rtx_REG (DImode, regno + 4);
5858 ops[3] = operands[1];
5859 ops[4] = GEN_INT (lane);
5860 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5861 ops);
5862 return "";
5863 }
5864 [(set_attr "type" "neon_load3_one_lane<q>")]
5865 )
5866
5867 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5868 ;; here on big endian targets.
5869 (define_insn "neon_vld3_lane<mode>"
5870 [(set (match_operand:CI 0 "s_register_operand" "=w")
5871 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5872 (match_operand:CI 2 "s_register_operand" "0")
5873 (match_operand:SI 3 "immediate_operand" "i")
5874 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5875 UNSPEC_VLD3_LANE))]
5876 "TARGET_NEON"
5877 {
5878 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5879 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5880 int regno = REGNO (operands[0]);
5881 rtx ops[5];
5882 if (lane >= max / 2)
5883 {
5884 lane -= max / 2;
5885 regno += 2;
5886 }
5887 ops[0] = gen_rtx_REG (DImode, regno);
5888 ops[1] = gen_rtx_REG (DImode, regno + 4);
5889 ops[2] = gen_rtx_REG (DImode, regno + 8);
5890 ops[3] = operands[1];
5891 ops[4] = GEN_INT (lane);
5892 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5893 ops);
5894 return "";
5895 }
5896 [(set_attr "type" "neon_load3_one_lane<q>")]
5897 )
5898
5899 (define_insn "neon_vld3_dup<mode>"
5900 [(set (match_operand:EI 0 "s_register_operand" "=w")
5901 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5902 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5903 UNSPEC_VLD3_DUP))]
5904 "TARGET_NEON"
5905 {
5906 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5907 {
5908 int regno = REGNO (operands[0]);
5909 rtx ops[4];
5910 ops[0] = gen_rtx_REG (DImode, regno);
5911 ops[1] = gen_rtx_REG (DImode, regno + 2);
5912 ops[2] = gen_rtx_REG (DImode, regno + 4);
5913 ops[3] = operands[1];
5914 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5915 return "";
5916 }
5917 else
5918 return "vld1.<V_sz_elem>\t%h0, %A1";
5919 }
5920 [(set (attr "type")
5921 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5922 (const_string "neon_load3_all_lanes<q>")
5923 (const_string "neon_load1_1reg<q>")))])
5924
5925 (define_expand "vec_store_lanesei<mode>"
5926 [(set (match_operand:EI 0 "neon_struct_operand")
5927 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5928 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5929 UNSPEC_VST3))]
5930 "TARGET_NEON")
5931
5932 (define_insn "neon_vst3<mode>"
5933 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5934 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5935 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5936 UNSPEC_VST3))]
5937 "TARGET_NEON"
5938 {
5939 if (<V_sz_elem> == 64)
5940 return "vst1.64\t%h1, %A0";
5941 else
5942 return "vst3.<V_sz_elem>\t%h1, %A0";
5943 }
5944 [(set (attr "type")
5945 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5946 (const_string "neon_store1_3reg<q>")
5947 (const_string "neon_store3_one_lane<q>")))])
5948
5949 (define_expand "vec_store_lanesci<mode>"
5950 [(match_operand:CI 0 "neon_struct_operand")
5951 (match_operand:CI 1 "s_register_operand")
5952 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5953 "TARGET_NEON"
5954 {
5955 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5956 DONE;
5957 })
5958
5959 (define_expand "neon_vst3<mode>"
5960 [(match_operand:CI 0 "neon_struct_operand")
5961 (match_operand:CI 1 "s_register_operand")
5962 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5963 "TARGET_NEON"
5964 {
5965 rtx mem;
5966
5967 mem = adjust_address (operands[0], EImode, 0);
5968 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5969 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5970 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5971 DONE;
5972 })
5973
5974 (define_insn "neon_vst3qa<mode>"
5975 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5976 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5977 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5978 UNSPEC_VST3A))]
5979 "TARGET_NEON"
5980 {
5981 int regno = REGNO (operands[1]);
5982 rtx ops[4];
5983 ops[0] = operands[0];
5984 ops[1] = gen_rtx_REG (DImode, regno);
5985 ops[2] = gen_rtx_REG (DImode, regno + 4);
5986 ops[3] = gen_rtx_REG (DImode, regno + 8);
5987 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5988 return "";
5989 }
5990 [(set_attr "type" "neon_store3_3reg<q>")]
5991 )
5992
5993 (define_insn "neon_vst3qb<mode>"
5994 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5995 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5996 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5997 UNSPEC_VST3B))]
5998 "TARGET_NEON"
5999 {
6000 int regno = REGNO (operands[1]);
6001 rtx ops[4];
6002 ops[0] = operands[0];
6003 ops[1] = gen_rtx_REG (DImode, regno + 2);
6004 ops[2] = gen_rtx_REG (DImode, regno + 6);
6005 ops[3] = gen_rtx_REG (DImode, regno + 10);
6006 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6007 return "";
6008 }
6009 [(set_attr "type" "neon_store3_3reg<q>")]
6010 )
6011
6012 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6013 ;; here on big endian targets.
6014 (define_insn "neon_vst3_lane<mode>"
6015 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6016 (unspec:<V_three_elem>
6017 [(match_operand:EI 1 "s_register_operand" "w")
6018 (match_operand:SI 2 "immediate_operand" "i")
6019 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6020 UNSPEC_VST3_LANE))]
6021 "TARGET_NEON"
6022 {
6023 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6024 int regno = REGNO (operands[1]);
6025 rtx ops[5];
6026 ops[0] = operands[0];
6027 ops[1] = gen_rtx_REG (DImode, regno);
6028 ops[2] = gen_rtx_REG (DImode, regno + 2);
6029 ops[3] = gen_rtx_REG (DImode, regno + 4);
6030 ops[4] = GEN_INT (lane);
6031 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6032 ops);
6033 return "";
6034 }
6035 [(set_attr "type" "neon_store3_one_lane<q>")]
6036 )
6037
6038 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6039 ;; here on big endian targets.
6040 (define_insn "neon_vst3_lane<mode>"
6041 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6042 (unspec:<V_three_elem>
6043 [(match_operand:CI 1 "s_register_operand" "w")
6044 (match_operand:SI 2 "immediate_operand" "i")
6045 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6046 UNSPEC_VST3_LANE))]
6047 "TARGET_NEON"
6048 {
6049 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6050 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6051 int regno = REGNO (operands[1]);
6052 rtx ops[5];
6053 if (lane >= max / 2)
6054 {
6055 lane -= max / 2;
6056 regno += 2;
6057 }
6058 ops[0] = operands[0];
6059 ops[1] = gen_rtx_REG (DImode, regno);
6060 ops[2] = gen_rtx_REG (DImode, regno + 4);
6061 ops[3] = gen_rtx_REG (DImode, regno + 8);
6062 ops[4] = GEN_INT (lane);
6063 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6064 ops);
6065 return "";
6066 }
6067 [(set_attr "type" "neon_store3_one_lane<q>")]
6068 )
6069
6070 (define_expand "vec_load_lanesoi<mode>"
6071 [(set (match_operand:OI 0 "s_register_operand")
6072 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6073 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6074 UNSPEC_VLD4))]
6075 "TARGET_NEON")
6076
6077 (define_insn "neon_vld4<mode>"
6078 [(set (match_operand:OI 0 "s_register_operand" "=w")
6079 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6080 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6081 UNSPEC_VLD4))]
6082 "TARGET_NEON"
6083 {
6084 if (<V_sz_elem> == 64)
6085 return "vld1.64\t%h0, %A1";
6086 else
6087 return "vld4.<V_sz_elem>\t%h0, %A1";
6088 }
6089 [(set (attr "type")
6090 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6091 (const_string "neon_load1_4reg<q>")
6092 (const_string "neon_load4_4reg<q>")))]
6093 )
6094
6095 (define_expand "vec_load_lanesxi<mode>"
6096 [(match_operand:XI 0 "s_register_operand")
6097 (match_operand:XI 1 "neon_struct_operand")
6098 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6099 "TARGET_NEON"
6100 {
6101 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6102 DONE;
6103 })
6104
6105 (define_expand "neon_vld4<mode>"
6106 [(match_operand:XI 0 "s_register_operand")
6107 (match_operand:XI 1 "neon_struct_operand")
6108 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6109 "TARGET_NEON"
6110 {
6111 rtx mem;
6112
6113 mem = adjust_address (operands[1], OImode, 0);
6114 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6115 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6116 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6117 DONE;
6118 })
6119
6120 (define_insn "neon_vld4qa<mode>"
6121 [(set (match_operand:XI 0 "s_register_operand" "=w")
6122 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6123 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6124 UNSPEC_VLD4A))]
6125 "TARGET_NEON"
6126 {
6127 int regno = REGNO (operands[0]);
6128 rtx ops[5];
6129 ops[0] = gen_rtx_REG (DImode, regno);
6130 ops[1] = gen_rtx_REG (DImode, regno + 4);
6131 ops[2] = gen_rtx_REG (DImode, regno + 8);
6132 ops[3] = gen_rtx_REG (DImode, regno + 12);
6133 ops[4] = operands[1];
6134 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6135 return "";
6136 }
6137 [(set_attr "type" "neon_load4_4reg<q>")]
6138 )
6139
6140 (define_insn "neon_vld4qb<mode>"
6141 [(set (match_operand:XI 0 "s_register_operand" "=w")
6142 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6143 (match_operand:XI 2 "s_register_operand" "0")
6144 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6145 UNSPEC_VLD4B))]
6146 "TARGET_NEON"
6147 {
6148 int regno = REGNO (operands[0]);
6149 rtx ops[5];
6150 ops[0] = gen_rtx_REG (DImode, regno + 2);
6151 ops[1] = gen_rtx_REG (DImode, regno + 6);
6152 ops[2] = gen_rtx_REG (DImode, regno + 10);
6153 ops[3] = gen_rtx_REG (DImode, regno + 14);
6154 ops[4] = operands[1];
6155 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6156 return "";
6157 }
6158 [(set_attr "type" "neon_load4_4reg<q>")]
6159 )
6160
6161 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6162 ;; here on big endian targets.
6163 (define_insn "neon_vld4_lane<mode>"
6164 [(set (match_operand:OI 0 "s_register_operand" "=w")
6165 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6166 (match_operand:OI 2 "s_register_operand" "0")
6167 (match_operand:SI 3 "immediate_operand" "i")
6168 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6169 UNSPEC_VLD4_LANE))]
6170 "TARGET_NEON"
6171 {
6172 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6173 int regno = REGNO (operands[0]);
6174 rtx ops[6];
6175 ops[0] = gen_rtx_REG (DImode, regno);
6176 ops[1] = gen_rtx_REG (DImode, regno + 2);
6177 ops[2] = gen_rtx_REG (DImode, regno + 4);
6178 ops[3] = gen_rtx_REG (DImode, regno + 6);
6179 ops[4] = operands[1];
6180 ops[5] = GEN_INT (lane);
6181 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6182 ops);
6183 return "";
6184 }
6185 [(set_attr "type" "neon_load4_one_lane<q>")]
6186 )
6187
6188 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6189 ;; here on big endian targets.
6190 (define_insn "neon_vld4_lane<mode>"
6191 [(set (match_operand:XI 0 "s_register_operand" "=w")
6192 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6193 (match_operand:XI 2 "s_register_operand" "0")
6194 (match_operand:SI 3 "immediate_operand" "i")
6195 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6196 UNSPEC_VLD4_LANE))]
6197 "TARGET_NEON"
6198 {
6199 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6200 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6201 int regno = REGNO (operands[0]);
6202 rtx ops[6];
6203 if (lane >= max / 2)
6204 {
6205 lane -= max / 2;
6206 regno += 2;
6207 }
6208 ops[0] = gen_rtx_REG (DImode, regno);
6209 ops[1] = gen_rtx_REG (DImode, regno + 4);
6210 ops[2] = gen_rtx_REG (DImode, regno + 8);
6211 ops[3] = gen_rtx_REG (DImode, regno + 12);
6212 ops[4] = operands[1];
6213 ops[5] = GEN_INT (lane);
6214 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6215 ops);
6216 return "";
6217 }
6218 [(set_attr "type" "neon_load4_one_lane<q>")]
6219 )
6220
6221 (define_insn "neon_vld4_dup<mode>"
6222 [(set (match_operand:OI 0 "s_register_operand" "=w")
6223 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6224 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6225 UNSPEC_VLD4_DUP))]
6226 "TARGET_NEON"
6227 {
6228 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6229 {
6230 int regno = REGNO (operands[0]);
6231 rtx ops[5];
6232 ops[0] = gen_rtx_REG (DImode, regno);
6233 ops[1] = gen_rtx_REG (DImode, regno + 2);
6234 ops[2] = gen_rtx_REG (DImode, regno + 4);
6235 ops[3] = gen_rtx_REG (DImode, regno + 6);
6236 ops[4] = operands[1];
6237 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6238 ops);
6239 return "";
6240 }
6241 else
6242 return "vld1.<V_sz_elem>\t%h0, %A1";
6243 }
6244 [(set (attr "type")
6245 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6246 (const_string "neon_load4_all_lanes<q>")
6247 (const_string "neon_load1_1reg<q>")))]
6248 )
6249
6250 (define_expand "vec_store_lanesoi<mode>"
6251 [(set (match_operand:OI 0 "neon_struct_operand")
6252 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6253 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6254 UNSPEC_VST4))]
6255 "TARGET_NEON")
6256
6257 (define_insn "neon_vst4<mode>"
6258 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6259 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6260 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6261 UNSPEC_VST4))]
6262 "TARGET_NEON"
6263 {
6264 if (<V_sz_elem> == 64)
6265 return "vst1.64\t%h1, %A0";
6266 else
6267 return "vst4.<V_sz_elem>\t%h1, %A0";
6268 }
6269 [(set (attr "type")
6270 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6271 (const_string "neon_store1_4reg<q>")
6272 (const_string "neon_store4_4reg<q>")))]
6273 )
6274
6275 (define_expand "vec_store_lanesxi<mode>"
6276 [(match_operand:XI 0 "neon_struct_operand")
6277 (match_operand:XI 1 "s_register_operand")
6278 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6279 "TARGET_NEON"
6280 {
6281 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6282 DONE;
6283 })
6284
6285 (define_expand "neon_vst4<mode>"
6286 [(match_operand:XI 0 "neon_struct_operand")
6287 (match_operand:XI 1 "s_register_operand")
6288 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6289 "TARGET_NEON"
6290 {
6291 rtx mem;
6292
6293 mem = adjust_address (operands[0], OImode, 0);
6294 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6295 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6296 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6297 DONE;
6298 })
6299
6300 (define_insn "neon_vst4qa<mode>"
6301 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6302 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6303 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6304 UNSPEC_VST4A))]
6305 "TARGET_NEON"
6306 {
6307 int regno = REGNO (operands[1]);
6308 rtx ops[5];
6309 ops[0] = operands[0];
6310 ops[1] = gen_rtx_REG (DImode, regno);
6311 ops[2] = gen_rtx_REG (DImode, regno + 4);
6312 ops[3] = gen_rtx_REG (DImode, regno + 8);
6313 ops[4] = gen_rtx_REG (DImode, regno + 12);
6314 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6315 return "";
6316 }
6317 [(set_attr "type" "neon_store4_4reg<q>")]
6318 )
6319
6320 (define_insn "neon_vst4qb<mode>"
6321 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6322 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6323 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6324 UNSPEC_VST4B))]
6325 "TARGET_NEON"
6326 {
6327 int regno = REGNO (operands[1]);
6328 rtx ops[5];
6329 ops[0] = operands[0];
6330 ops[1] = gen_rtx_REG (DImode, regno + 2);
6331 ops[2] = gen_rtx_REG (DImode, regno + 6);
6332 ops[3] = gen_rtx_REG (DImode, regno + 10);
6333 ops[4] = gen_rtx_REG (DImode, regno + 14);
6334 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6335 return "";
6336 }
6337 [(set_attr "type" "neon_store4_4reg<q>")]
6338 )
6339
6340 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6341 ;; here on big endian targets.
6342 (define_insn "neon_vst4_lane<mode>"
6343 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6344 (unspec:<V_four_elem>
6345 [(match_operand:OI 1 "s_register_operand" "w")
6346 (match_operand:SI 2 "immediate_operand" "i")
6347 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6348 UNSPEC_VST4_LANE))]
6349 "TARGET_NEON"
6350 {
6351 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6352 int regno = REGNO (operands[1]);
6353 rtx ops[6];
6354 ops[0] = operands[0];
6355 ops[1] = gen_rtx_REG (DImode, regno);
6356 ops[2] = gen_rtx_REG (DImode, regno + 2);
6357 ops[3] = gen_rtx_REG (DImode, regno + 4);
6358 ops[4] = gen_rtx_REG (DImode, regno + 6);
6359 ops[5] = GEN_INT (lane);
6360 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6361 ops);
6362 return "";
6363 }
6364 [(set_attr "type" "neon_store4_one_lane<q>")]
6365 )
6366
6367 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6368 ;; here on big endian targets.
6369 (define_insn "neon_vst4_lane<mode>"
6370 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6371 (unspec:<V_four_elem>
6372 [(match_operand:XI 1 "s_register_operand" "w")
6373 (match_operand:SI 2 "immediate_operand" "i")
6374 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6375 UNSPEC_VST4_LANE))]
6376 "TARGET_NEON"
6377 {
6378 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6379 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6380 int regno = REGNO (operands[1]);
6381 rtx ops[6];
6382 if (lane >= max / 2)
6383 {
6384 lane -= max / 2;
6385 regno += 2;
6386 }
6387 ops[0] = operands[0];
6388 ops[1] = gen_rtx_REG (DImode, regno);
6389 ops[2] = gen_rtx_REG (DImode, regno + 4);
6390 ops[3] = gen_rtx_REG (DImode, regno + 8);
6391 ops[4] = gen_rtx_REG (DImode, regno + 12);
6392 ops[5] = GEN_INT (lane);
6393 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6394 ops);
6395 return "";
6396 }
6397 [(set_attr "type" "neon_store4_4reg<q>")]
6398 )
6399
6400 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6401 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6402 (SE:<V_unpack> (vec_select:<V_HALF>
6403 (match_operand:VU 1 "register_operand" "w")
6404 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6405 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6406 "vmovl.<US><V_sz_elem> %q0, %e1"
6407 [(set_attr "type" "neon_shift_imm_long")]
6408 )
6409
6410 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6411 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6412 (SE:<V_unpack> (vec_select:<V_HALF>
6413 (match_operand:VU 1 "register_operand" "w")
6414 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6415 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6416 "vmovl.<US><V_sz_elem> %q0, %f1"
6417 [(set_attr "type" "neon_shift_imm_long")]
6418 )
6419
6420 (define_expand "vec_unpack<US>_hi_<mode>"
6421 [(match_operand:<V_unpack> 0 "register_operand" "")
6422 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6423 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6424 {
6425 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6426 rtx t1;
6427 int i;
6428 for (i = 0; i < (<V_mode_nunits>/2); i++)
6429 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6430
6431 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6432 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6433 operands[1],
6434 t1));
6435 DONE;
6436 }
6437 )
6438
6439 (define_expand "vec_unpack<US>_lo_<mode>"
6440 [(match_operand:<V_unpack> 0 "register_operand" "")
6441 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6442 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6443 {
6444 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6445 rtx t1;
6446 int i;
6447 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6448 RTVEC_ELT (v, i) = GEN_INT (i);
6449 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6450 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6451 operands[1],
6452 t1));
6453 DONE;
6454 }
6455 )
6456
6457 (define_insn "neon_vec_<US>mult_lo_<mode>"
6458 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6459 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6460 (match_operand:VU 1 "register_operand" "w")
6461 (match_operand:VU 2 "vect_par_constant_low" "")))
6462 (SE:<V_unpack> (vec_select:<V_HALF>
6463 (match_operand:VU 3 "register_operand" "w")
6464 (match_dup 2)))))]
6465 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6466 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6467 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6468 )
6469
6470 (define_expand "vec_widen_<US>mult_lo_<mode>"
6471 [(match_operand:<V_unpack> 0 "register_operand" "")
6472 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6473 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6474 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6475 {
6476 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6477 rtx t1;
6478 int i;
6479 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6480 RTVEC_ELT (v, i) = GEN_INT (i);
6481 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6482
6483 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6484 operands[1],
6485 t1,
6486 operands[2]));
6487 DONE;
6488 }
6489 )
6490
6491 (define_insn "neon_vec_<US>mult_hi_<mode>"
6492 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6493 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6494 (match_operand:VU 1 "register_operand" "w")
6495 (match_operand:VU 2 "vect_par_constant_high" "")))
6496 (SE:<V_unpack> (vec_select:<V_HALF>
6497 (match_operand:VU 3 "register_operand" "w")
6498 (match_dup 2)))))]
6499 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6500 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6501 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6502 )
6503
6504 (define_expand "vec_widen_<US>mult_hi_<mode>"
6505 [(match_operand:<V_unpack> 0 "register_operand" "")
6506 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6507 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6508 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6509 {
6510 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6511 rtx t1;
6512 int i;
6513 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6514 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6515 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6516
6517 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6518 operands[1],
6519 t1,
6520 operands[2]));
6521 DONE;
6522
6523 }
6524 )
6525
6526 (define_insn "neon_vec_<US>shiftl_<mode>"
6527 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6528 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6529 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6530 "TARGET_NEON"
6531 {
6532 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6533 }
6534 [(set_attr "type" "neon_shift_imm_long")]
6535 )
6536
6537 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6538 [(match_operand:<V_unpack> 0 "register_operand" "")
6539 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6540 (match_operand:SI 2 "immediate_operand" "i")]
6541 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6542 {
6543 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6544 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6545 operands[2]));
6546 DONE;
6547 }
6548 )
6549
6550 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6551 [(match_operand:<V_unpack> 0 "register_operand" "")
6552 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6553 (match_operand:SI 2 "immediate_operand" "i")]
6554 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6555 {
6556 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6557 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6558 GET_MODE_SIZE (<V_HALF>mode)),
6559 operands[2]));
6560 DONE;
6561 }
6562 )
6563
6564 ;; Vectorize for non-neon-quad case
6565 (define_insn "neon_unpack<US>_<mode>"
6566 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6567 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6568 "TARGET_NEON"
6569 "vmovl.<US><V_sz_elem> %q0, %P1"
6570 [(set_attr "type" "neon_move")]
6571 )
6572
6573 (define_expand "vec_unpack<US>_lo_<mode>"
6574 [(match_operand:<V_double_width> 0 "register_operand" "")
6575 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6576 "TARGET_NEON"
6577 {
6578 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6579 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6580 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6581
6582 DONE;
6583 }
6584 )
6585
6586 (define_expand "vec_unpack<US>_hi_<mode>"
6587 [(match_operand:<V_double_width> 0 "register_operand" "")
6588 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6589 "TARGET_NEON"
6590 {
6591 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6592 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6593 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6594
6595 DONE;
6596 }
6597 )
6598
6599 (define_insn "neon_vec_<US>mult_<mode>"
6600 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6601 (mult:<V_widen> (SE:<V_widen>
6602 (match_operand:VDI 1 "register_operand" "w"))
6603 (SE:<V_widen>
6604 (match_operand:VDI 2 "register_operand" "w"))))]
6605 "TARGET_NEON"
6606 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6607 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6608 )
6609
6610 (define_expand "vec_widen_<US>mult_hi_<mode>"
6611 [(match_operand:<V_double_width> 0 "register_operand" "")
6612 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6613 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6614 "TARGET_NEON"
6615 {
6616 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6617 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6618 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6619
6620 DONE;
6621
6622 }
6623 )
6624
6625 (define_expand "vec_widen_<US>mult_lo_<mode>"
6626 [(match_operand:<V_double_width> 0 "register_operand" "")
6627 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6628 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6629 "TARGET_NEON"
6630 {
6631 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6632 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6633 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6634
6635 DONE;
6636
6637 }
6638 )
6639
6640 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6641 [(match_operand:<V_double_width> 0 "register_operand" "")
6642 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6643 (match_operand:SI 2 "immediate_operand" "i")]
6644 "TARGET_NEON"
6645 {
6646 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6647 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6648 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6649
6650 DONE;
6651 }
6652 )
6653
6654 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6655 [(match_operand:<V_double_width> 0 "register_operand" "")
6656 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6657 (match_operand:SI 2 "immediate_operand" "i")]
6658 "TARGET_NEON"
6659 {
6660 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6661 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6662 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6663
6664 DONE;
6665 }
6666 )
6667
6668 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6669 ; because the ordering of vector elements in Q registers is different from what
6670 ; the semantics of the instructions require.
6671
6672 (define_insn "vec_pack_trunc_<mode>"
6673 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6674 (vec_concat:<V_narrow_pack>
6675 (truncate:<V_narrow>
6676 (match_operand:VN 1 "register_operand" "w"))
6677 (truncate:<V_narrow>
6678 (match_operand:VN 2 "register_operand" "w"))))]
6679 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6680 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6681 [(set_attr "type" "multiple")
6682 (set_attr "length" "8")]
6683 )
6684
6685 ;; For the non-quad case.
6686 (define_insn "neon_vec_pack_trunc_<mode>"
6687 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6688 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6689 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6690 "vmovn.i<V_sz_elem>\t%P0, %q1"
6691 [(set_attr "type" "neon_move_narrow_q")]
6692 )
6693
6694 (define_expand "vec_pack_trunc_<mode>"
6695 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6696 (match_operand:VSHFT 1 "register_operand" "")
6697 (match_operand:VSHFT 2 "register_operand")]
6698 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6699 {
6700 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6701
6702 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6703 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6704 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6705 DONE;
6706 })
6707
6708 (define_insn "neon_vabd<mode>_2"
6709 [(set (match_operand:VF 0 "s_register_operand" "=w")
6710 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6711 (match_operand:VF 2 "s_register_operand" "w"))))]
6712 "TARGET_NEON && flag_unsafe_math_optimizations"
6713 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6714 [(set_attr "type" "neon_fp_abd_s<q>")]
6715 )
6716
6717 (define_insn "neon_vabd<mode>_3"
6718 [(set (match_operand:VF 0 "s_register_operand" "=w")
6719 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6720 (match_operand:VF 2 "s_register_operand" "w")]
6721 UNSPEC_VSUB)))]
6722 "TARGET_NEON && flag_unsafe_math_optimizations"
6723 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6724 [(set_attr "type" "neon_fp_abd_s<q>")]
6725 )
6726
6727 ;; Copy from core-to-neon regs, then extend, not vice-versa
6728
6729 (define_split
6730 [(set (match_operand:DI 0 "s_register_operand" "")
6731 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6732 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6733 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6734 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6735 {
6736 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6737 })
6738
6739 (define_split
6740 [(set (match_operand:DI 0 "s_register_operand" "")
6741 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6742 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6743 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6744 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6745 {
6746 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6747 })
6748
6749 (define_split
6750 [(set (match_operand:DI 0 "s_register_operand" "")
6751 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6752 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6753 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6754 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6755 {
6756 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6757 })
6758
6759 (define_split
6760 [(set (match_operand:DI 0 "s_register_operand" "")
6761 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6762 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6763 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6764 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6765 {
6766 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6767 })
6768
6769 (define_split
6770 [(set (match_operand:DI 0 "s_register_operand" "")
6771 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6772 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6773 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6774 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6775 {
6776 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6777 })
6778
6779 (define_split
6780 [(set (match_operand:DI 0 "s_register_operand" "")
6781 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6782 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6783 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6784 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6785 {
6786 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6787 })