Arm: Update Armv8.4-a's FP16 FML intrinsics
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r, ?Us,*r")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, Usi,r,*r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 case 8: return "#";
61 default: return output_move_double (operands, true, NULL);
62 }
63 }
64 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
65 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
66 neon_load1_2reg, neon_store1_2reg, multiple")
67 (set_attr "length" "4,4,4,4,4,4,8,8,8")
68 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*,*")
69 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*,*")
70 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*,*")])
71
72 (define_insn "*neon_mov<mode>"
73 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
74 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
75 (match_operand:VQXMOV 1 "general_operand"
76 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 "TARGET_NEON
78 && (register_operand (operands[0], <MODE>mode)
79 || register_operand (operands[1], <MODE>mode))"
80 {
81 if (which_alternative == 2)
82 {
83 int width, is_valid;
84 static char templ[40];
85
86 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
87 &operands[1], &width);
88
89 gcc_assert (is_valid != 0);
90
91 if (width == 0)
92 return "vmov.f32\t%q0, %1 @ <mode>";
93 else
94 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
95
96 return templ;
97 }
98
99 switch (which_alternative)
100 {
101 case 0: return "vmov\t%q0, %q1 @ <mode>";
102 case 1: case 3: return output_move_neon (operands);
103 case 2: gcc_unreachable ();
104 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
105 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
106 default: return output_move_quad (operands);
107 }
108 }
109 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
110 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
111 mov_reg,neon_load1_4reg,neon_store1_4reg")
112 (set_attr "length" "4,8,4,8,8,8,16,8,16")
113 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
114 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
115 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116
117 /* We define these mov expanders to match the standard mov$a optab to prevent
118 the mid-end from trying to do a subreg for these modes which is the most
119 inefficient way to expand the move. Also big-endian subreg's aren't
120 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
121 Without these RTL generation patterns the mid-end would attempt to take a
122 sub-reg and may ICE if it can't. */
123
124 (define_expand "movti"
125 [(set (match_operand:TI 0 "nonimmediate_operand" "")
126 (match_operand:TI 1 "general_operand" ""))]
127 "TARGET_NEON"
128 {
129 if (can_create_pseudo_p ())
130 {
131 if (!REG_P (operands[0]))
132 operands[1] = force_reg (TImode, operands[1]);
133 }
134 })
135
136 (define_expand "mov<mode>"
137 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
138 (match_operand:VSTRUCT 1 "general_operand" ""))]
139 "TARGET_NEON"
140 {
141 if (can_create_pseudo_p ())
142 {
143 if (!REG_P (operands[0]))
144 operands[1] = force_reg (<MODE>mode, operands[1]);
145 }
146 })
147
148 (define_expand "mov<mode>"
149 [(set (match_operand:VH 0 "s_register_operand")
150 (match_operand:VH 1 "s_register_operand"))]
151 "TARGET_NEON"
152 {
153 if (can_create_pseudo_p ())
154 {
155 if (!REG_P (operands[0]))
156 operands[1] = force_reg (<MODE>mode, operands[1]);
157 }
158 })
159
160 (define_insn "*neon_mov<mode>"
161 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
162 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
163 "TARGET_NEON
164 && (register_operand (operands[0], <MODE>mode)
165 || register_operand (operands[1], <MODE>mode))"
166 {
167 switch (which_alternative)
168 {
169 case 0: return "#";
170 case 1: case 2: return output_move_neon (operands);
171 default: gcc_unreachable ();
172 }
173 }
174 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
175 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
176
177 (define_split
178 [(set (match_operand:EI 0 "s_register_operand" "")
179 (match_operand:EI 1 "s_register_operand" ""))]
180 "TARGET_NEON && reload_completed"
181 [(set (match_dup 0) (match_dup 1))
182 (set (match_dup 2) (match_dup 3))]
183 {
184 int rdest = REGNO (operands[0]);
185 int rsrc = REGNO (operands[1]);
186 rtx dest[2], src[2];
187
188 dest[0] = gen_rtx_REG (TImode, rdest);
189 src[0] = gen_rtx_REG (TImode, rsrc);
190 dest[1] = gen_rtx_REG (DImode, rdest + 4);
191 src[1] = gen_rtx_REG (DImode, rsrc + 4);
192
193 neon_disambiguate_copy (operands, dest, src, 2);
194 })
195
196 (define_split
197 [(set (match_operand:OI 0 "s_register_operand" "")
198 (match_operand:OI 1 "s_register_operand" ""))]
199 "TARGET_NEON && reload_completed"
200 [(set (match_dup 0) (match_dup 1))
201 (set (match_dup 2) (match_dup 3))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[2], src[2];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211
212 neon_disambiguate_copy (operands, dest, src, 2);
213 })
214
215 (define_split
216 [(set (match_operand:CI 0 "s_register_operand" "")
217 (match_operand:CI 1 "s_register_operand" ""))]
218 "TARGET_NEON && reload_completed"
219 [(set (match_dup 0) (match_dup 1))
220 (set (match_dup 2) (match_dup 3))
221 (set (match_dup 4) (match_dup 5))]
222 {
223 int rdest = REGNO (operands[0]);
224 int rsrc = REGNO (operands[1]);
225 rtx dest[3], src[3];
226
227 dest[0] = gen_rtx_REG (TImode, rdest);
228 src[0] = gen_rtx_REG (TImode, rsrc);
229 dest[1] = gen_rtx_REG (TImode, rdest + 4);
230 src[1] = gen_rtx_REG (TImode, rsrc + 4);
231 dest[2] = gen_rtx_REG (TImode, rdest + 8);
232 src[2] = gen_rtx_REG (TImode, rsrc + 8);
233
234 neon_disambiguate_copy (operands, dest, src, 3);
235 })
236
237 (define_split
238 [(set (match_operand:XI 0 "s_register_operand" "")
239 (match_operand:XI 1 "s_register_operand" ""))]
240 "TARGET_NEON && reload_completed"
241 [(set (match_dup 0) (match_dup 1))
242 (set (match_dup 2) (match_dup 3))
243 (set (match_dup 4) (match_dup 5))
244 (set (match_dup 6) (match_dup 7))]
245 {
246 int rdest = REGNO (operands[0]);
247 int rsrc = REGNO (operands[1]);
248 rtx dest[4], src[4];
249
250 dest[0] = gen_rtx_REG (TImode, rdest);
251 src[0] = gen_rtx_REG (TImode, rsrc);
252 dest[1] = gen_rtx_REG (TImode, rdest + 4);
253 src[1] = gen_rtx_REG (TImode, rsrc + 4);
254 dest[2] = gen_rtx_REG (TImode, rdest + 8);
255 src[2] = gen_rtx_REG (TImode, rsrc + 8);
256 dest[3] = gen_rtx_REG (TImode, rdest + 12);
257 src[3] = gen_rtx_REG (TImode, rsrc + 12);
258
259 neon_disambiguate_copy (operands, dest, src, 4);
260 })
261
262 (define_expand "movmisalign<mode>"
263 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
264 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
265 UNSPEC_MISALIGNED_ACCESS))]
266 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
267 {
268 rtx adjust_mem;
269 /* This pattern is not permitted to fail during expansion: if both arguments
270 are non-registers (e.g. memory := constant, which can be created by the
271 auto-vectorizer), force operand 1 into a register. */
272 if (!s_register_operand (operands[0], <MODE>mode)
273 && !s_register_operand (operands[1], <MODE>mode))
274 operands[1] = force_reg (<MODE>mode, operands[1]);
275
276 if (s_register_operand (operands[0], <MODE>mode))
277 adjust_mem = operands[1];
278 else
279 adjust_mem = operands[0];
280
281 /* Legitimize address. */
282 if (!neon_vector_mem_operand (adjust_mem, 2, true))
283 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
284
285 })
286
287 (define_insn "*movmisalign<mode>_neon_store"
288 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
289 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
290 UNSPEC_MISALIGNED_ACCESS))]
291 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
292 "vst1.<V_sz_elem>\t{%P1}, %A0"
293 [(set_attr "type" "neon_store1_1reg<q>")])
294
295 (define_insn "*movmisalign<mode>_neon_load"
296 [(set (match_operand:VDX 0 "s_register_operand" "=w")
297 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
298 " Um")]
299 UNSPEC_MISALIGNED_ACCESS))]
300 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
301 "vld1.<V_sz_elem>\t{%P0}, %A1"
302 [(set_attr "type" "neon_load1_1reg<q>")])
303
304 (define_insn "*movmisalign<mode>_neon_store"
305 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
306 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
307 UNSPEC_MISALIGNED_ACCESS))]
308 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
309 "vst1.<V_sz_elem>\t{%q1}, %A0"
310 [(set_attr "type" "neon_store1_1reg<q>")])
311
312 (define_insn "*movmisalign<mode>_neon_load"
313 [(set (match_operand:VQX 0 "s_register_operand" "=w")
314 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
315 " Um")]
316 UNSPEC_MISALIGNED_ACCESS))]
317 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
318 "vld1.<V_sz_elem>\t{%q0}, %A1"
319 [(set_attr "type" "neon_load1_1reg<q>")])
320
321 (define_insn "vec_set<mode>_internal"
322 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
323 (vec_merge:VD_LANE
324 (vec_duplicate:VD_LANE
325 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
326 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
327 (match_operand:SI 2 "immediate_operand" "i,i")))]
328 "TARGET_NEON"
329 {
330 int elt = ffs ((int) INTVAL (operands[2])) - 1;
331 if (BYTES_BIG_ENDIAN)
332 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
333 operands[2] = GEN_INT (elt);
334
335 if (which_alternative == 0)
336 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
337 else
338 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
339 }
340 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
341
342 (define_insn "vec_set<mode>_internal"
343 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
344 (vec_merge:VQ2
345 (vec_duplicate:VQ2
346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
347 (match_operand:VQ2 3 "s_register_operand" "0,0")
348 (match_operand:SI 2 "immediate_operand" "i,i")))]
349 "TARGET_NEON"
350 {
351 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
352 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
353 int elt = elem % half_elts;
354 int hi = (elem / half_elts) * 2;
355 int regno = REGNO (operands[0]);
356
357 if (BYTES_BIG_ENDIAN)
358 elt = half_elts - 1 - elt;
359
360 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
361 operands[2] = GEN_INT (elt);
362
363 if (which_alternative == 0)
364 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
365 else
366 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
367 }
368 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
369 )
370
371 (define_insn "vec_setv2di_internal"
372 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
373 (vec_merge:V2DI
374 (vec_duplicate:V2DI
375 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
376 (match_operand:V2DI 3 "s_register_operand" "0,0")
377 (match_operand:SI 2 "immediate_operand" "i,i")))]
378 "TARGET_NEON"
379 {
380 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
381 int regno = REGNO (operands[0]) + 2 * elem;
382
383 operands[0] = gen_rtx_REG (DImode, regno);
384
385 if (which_alternative == 0)
386 return "vld1.64\t%P0, %A1";
387 else
388 return "vmov\t%P0, %Q1, %R1";
389 }
390 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
391 )
392
393 (define_expand "vec_set<mode>"
394 [(match_operand:VDQ 0 "s_register_operand" "")
395 (match_operand:<V_elem> 1 "s_register_operand" "")
396 (match_operand:SI 2 "immediate_operand" "")]
397 "TARGET_NEON"
398 {
399 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
400 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
401 GEN_INT (elem), operands[0]));
402 DONE;
403 })
404
405 (define_insn "vec_extract<mode><V_elem_l>"
406 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
407 (vec_select:<V_elem>
408 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
409 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
410 "TARGET_NEON"
411 {
412 if (BYTES_BIG_ENDIAN)
413 {
414 int elt = INTVAL (operands[2]);
415 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
416 operands[2] = GEN_INT (elt);
417 }
418
419 if (which_alternative == 0)
420 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
421 else
422 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
423 }
424 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
425 )
426
427 (define_insn "vec_extract<mode><V_elem_l>"
428 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
429 (vec_select:<V_elem>
430 (match_operand:VQ2 1 "s_register_operand" "w,w")
431 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
432 "TARGET_NEON"
433 {
434 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
435 int elt = INTVAL (operands[2]) % half_elts;
436 int hi = (INTVAL (operands[2]) / half_elts) * 2;
437 int regno = REGNO (operands[1]);
438
439 if (BYTES_BIG_ENDIAN)
440 elt = half_elts - 1 - elt;
441
442 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
443 operands[2] = GEN_INT (elt);
444
445 if (which_alternative == 0)
446 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
447 else
448 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
449 }
450 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
451 )
452
453 (define_insn "vec_extractv2didi"
454 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
455 (vec_select:DI
456 (match_operand:V2DI 1 "s_register_operand" "w,w")
457 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
458 "TARGET_NEON"
459 {
460 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
461
462 operands[1] = gen_rtx_REG (DImode, regno);
463
464 if (which_alternative == 0)
465 return "vst1.64\t{%P1}, %A0 @ v2di";
466 else
467 return "vmov\t%Q0, %R0, %P1 @ v2di";
468 }
469 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
470 )
471
472 (define_expand "vec_init<mode><V_elem_l>"
473 [(match_operand:VDQ 0 "s_register_operand" "")
474 (match_operand 1 "" "")]
475 "TARGET_NEON"
476 {
477 neon_expand_vector_init (operands[0], operands[1]);
478 DONE;
479 })
480
481 ;; Doubleword and quadword arithmetic.
482
483 ;; NOTE: some other instructions also support 64-bit integer
484 ;; element size, which we could potentially use for "long long" operations.
485
486 (define_insn "*add<mode>3_neon"
487 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
488 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
489 (match_operand:VDQ 2 "s_register_operand" "w")))]
490 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
491 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
492 [(set (attr "type")
493 (if_then_else (match_test "<Is_float_mode>")
494 (const_string "neon_fp_addsub_s<q>")
495 (const_string "neon_add<q>")))]
496 )
497
498 ;; As with SFmode, full support for HFmode vector arithmetic is only available
499 ;; when flag-unsafe-math-optimizations is enabled.
500
501 (define_insn "add<mode>3"
502 [(set
503 (match_operand:VH 0 "s_register_operand" "=w")
504 (plus:VH
505 (match_operand:VH 1 "s_register_operand" "w")
506 (match_operand:VH 2 "s_register_operand" "w")))]
507 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
508 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
509 [(set (attr "type")
510 (if_then_else (match_test "<Is_float_mode>")
511 (const_string "neon_fp_addsub_s<q>")
512 (const_string "neon_add<q>")))]
513 )
514
515 (define_insn "add<mode>3_fp16"
516 [(set
517 (match_operand:VH 0 "s_register_operand" "=w")
518 (plus:VH
519 (match_operand:VH 1 "s_register_operand" "w")
520 (match_operand:VH 2 "s_register_operand" "w")))]
521 "TARGET_NEON_FP16INST"
522 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
523 [(set (attr "type")
524 (if_then_else (match_test "<Is_float_mode>")
525 (const_string "neon_fp_addsub_s<q>")
526 (const_string "neon_add<q>")))]
527 )
528
529 (define_insn "adddi3_neon"
530 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
531 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
532 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
533 (clobber (reg:CC CC_REGNUM))]
534 "TARGET_NEON"
535 {
536 switch (which_alternative)
537 {
538 case 0: /* fall through */
539 case 3: return "vadd.i64\t%P0, %P1, %P2";
540 case 1: return "#";
541 case 2: return "#";
542 case 4: return "#";
543 case 5: return "#";
544 case 6: return "#";
545 default: gcc_unreachable ();
546 }
547 }
548 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
549 multiple,multiple,multiple")
550 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
551 (set_attr "length" "*,8,8,*,8,8,8")
552 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
553 )
554
555 (define_insn "*sub<mode>3_neon"
556 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
557 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
558 (match_operand:VDQ 2 "s_register_operand" "w")))]
559 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
560 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
561 [(set (attr "type")
562 (if_then_else (match_test "<Is_float_mode>")
563 (const_string "neon_fp_addsub_s<q>")
564 (const_string "neon_sub<q>")))]
565 )
566
567 (define_insn "sub<mode>3"
568 [(set
569 (match_operand:VH 0 "s_register_operand" "=w")
570 (minus:VH
571 (match_operand:VH 1 "s_register_operand" "w")
572 (match_operand:VH 2 "s_register_operand" "w")))]
573 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
574 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575 [(set_attr "type" "neon_sub<q>")]
576 )
577
578 (define_insn "sub<mode>3_fp16"
579 [(set
580 (match_operand:VH 0 "s_register_operand" "=w")
581 (minus:VH
582 (match_operand:VH 1 "s_register_operand" "w")
583 (match_operand:VH 2 "s_register_operand" "w")))]
584 "TARGET_NEON_FP16INST"
585 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
586 [(set_attr "type" "neon_sub<q>")]
587 )
588
589 (define_insn "subdi3_neon"
590 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
591 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
592 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
593 (clobber (reg:CC CC_REGNUM))]
594 "TARGET_NEON"
595 {
596 switch (which_alternative)
597 {
598 case 0: /* fall through */
599 case 4: return "vsub.i64\t%P0, %P1, %P2";
600 case 1: /* fall through */
601 case 2: /* fall through */
602 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
603 default: gcc_unreachable ();
604 }
605 }
606 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
607 (set_attr "conds" "*,clob,clob,clob,*")
608 (set_attr "length" "*,8,8,8,*")
609 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
610 )
611
612 (define_insn "*mul<mode>3_neon"
613 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
614 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
615 (match_operand:VDQW 2 "s_register_operand" "w")))]
616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
617 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
618 [(set (attr "type")
619 (if_then_else (match_test "<Is_float_mode>")
620 (const_string "neon_fp_mul_s<q>")
621 (const_string "neon_mul_<V_elem_ch><q>")))]
622 )
623
624 /* Perform division using multiply-by-reciprocal.
625 Reciprocal is calculated using Newton-Raphson method.
626 Enabled with -funsafe-math-optimizations -freciprocal-math
627 and disabled for -Os since it increases code size . */
628
629 (define_expand "div<mode>3"
630 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
631 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
632 (match_operand:VCVTF 2 "s_register_operand" "w")))]
633 "TARGET_NEON && !optimize_size
634 && flag_reciprocal_math"
635 {
636 rtx rec = gen_reg_rtx (<MODE>mode);
637 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
638
639 /* Reciprocal estimate. */
640 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
641
642 /* Perform 2 iterations of newton-raphson method. */
643 for (int i = 0; i < 2; i++)
644 {
645 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
646 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
647 }
648
649 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
650 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
651 DONE;
652 }
653 )
654
655
656 (define_insn "mul<mode>3add<mode>_neon"
657 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
658 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
659 (match_operand:VDQW 3 "s_register_operand" "w"))
660 (match_operand:VDQW 1 "s_register_operand" "0")))]
661 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
662 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
663 [(set (attr "type")
664 (if_then_else (match_test "<Is_float_mode>")
665 (const_string "neon_fp_mla_s<q>")
666 (const_string "neon_mla_<V_elem_ch><q>")))]
667 )
668
669 (define_insn "mul<mode>3add<mode>_neon"
670 [(set (match_operand:VH 0 "s_register_operand" "=w")
671 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
672 (match_operand:VH 3 "s_register_operand" "w"))
673 (match_operand:VH 1 "s_register_operand" "0")))]
674 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
675 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
676 [(set_attr "type" "neon_fp_mla_s<q>")]
677 )
678
679 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
680 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
681 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
682 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
683 (match_operand:VDQW 3 "s_register_operand" "w"))))]
684 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
685 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
686 [(set (attr "type")
687 (if_then_else (match_test "<Is_float_mode>")
688 (const_string "neon_fp_mla_s<q>")
689 (const_string "neon_mla_<V_elem_ch><q>")))]
690 )
691
692 ;; Fused multiply-accumulate
693 ;; We define each insn twice here:
694 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
695 ;; to be able to use when converting to FMA.
696 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
697 (define_insn "fma<VCVTF:mode>4"
698 [(set (match_operand:VCVTF 0 "register_operand" "=w")
699 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
700 (match_operand:VCVTF 2 "register_operand" "w")
701 (match_operand:VCVTF 3 "register_operand" "0")))]
702 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
703 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
704 [(set_attr "type" "neon_fp_mla_s<q>")]
705 )
706
707 (define_insn "fma<VCVTF:mode>4_intrinsic"
708 [(set (match_operand:VCVTF 0 "register_operand" "=w")
709 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
710 (match_operand:VCVTF 2 "register_operand" "w")
711 (match_operand:VCVTF 3 "register_operand" "0")))]
712 "TARGET_NEON && TARGET_FMA"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
715 )
716
717 (define_insn "fma<VH:mode>4"
718 [(set (match_operand:VH 0 "register_operand" "=w")
719 (fma:VH
720 (match_operand:VH 1 "register_operand" "w")
721 (match_operand:VH 2 "register_operand" "w")
722 (match_operand:VH 3 "register_operand" "0")))]
723 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
724 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
725 [(set_attr "type" "neon_fp_mla_s<q>")]
726 )
727
728 (define_insn "fma<VH:mode>4_intrinsic"
729 [(set (match_operand:VH 0 "register_operand" "=w")
730 (fma:VH
731 (match_operand:VH 1 "register_operand" "w")
732 (match_operand:VH 2 "register_operand" "w")
733 (match_operand:VH 3 "register_operand" "0")))]
734 "TARGET_NEON_FP16INST"
735 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
736 [(set_attr "type" "neon_fp_mla_s<q>")]
737 )
738
739 (define_insn "*fmsub<VCVTF:mode>4"
740 [(set (match_operand:VCVTF 0 "register_operand" "=w")
741 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
742 (match_operand:VCVTF 2 "register_operand" "w")
743 (match_operand:VCVTF 3 "register_operand" "0")))]
744 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
747 )
748
749 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
750 [(set (match_operand:VCVTF 0 "register_operand" "=w")
751 (fma:VCVTF
752 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
753 (match_operand:VCVTF 2 "register_operand" "w")
754 (match_operand:VCVTF 3 "register_operand" "0")))]
755 "TARGET_NEON && TARGET_FMA"
756 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
757 [(set_attr "type" "neon_fp_mla_s<q>")]
758 )
759
760 (define_insn "fmsub<VH:mode>4_intrinsic"
761 [(set (match_operand:VH 0 "register_operand" "=w")
762 (fma:VH
763 (neg:VH (match_operand:VH 1 "register_operand" "w"))
764 (match_operand:VH 2 "register_operand" "w")
765 (match_operand:VH 3 "register_operand" "0")))]
766 "TARGET_NEON_FP16INST"
767 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
768 [(set_attr "type" "neon_fp_mla_s<q>")]
769 )
770
771 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
772 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
773 (unspec:VCVTF [(match_operand:VCVTF 1
774 "s_register_operand" "w")]
775 NEON_VRINT))]
776 "TARGET_NEON && TARGET_VFP5"
777 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
778 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
779 )
780
781 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
782 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
783 (FIXUORS:<V_cmp_result> (unspec:VCVTF
784 [(match_operand:VCVTF 1 "register_operand" "w")]
785 NEON_VCVT)))]
786 "TARGET_NEON && TARGET_VFP5"
787 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
788 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
789 (set_attr "predicable" "no")]
790 )
791
792 (define_insn "ior<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
796 "TARGET_NEON"
797 {
798 switch (which_alternative)
799 {
800 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
802 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
804 }
805 }
806 [(set_attr "type" "neon_logic<q>")]
807 )
808
809 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
810 ;; vorr. We support the pseudo-instruction vand instead, because that
811 ;; corresponds to the canonical form the middle-end expects to use for
812 ;; immediate bitwise-ANDs.
813
814 (define_insn "and<mode>3"
815 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
816 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
817 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
818 "TARGET_NEON"
819 {
820 switch (which_alternative)
821 {
822 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
823 case 1: return neon_output_logic_immediate ("vand", &operands[2],
824 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
825 default: gcc_unreachable ();
826 }
827 }
828 [(set_attr "type" "neon_logic<q>")]
829 )
830
831 (define_insn "orn<mode>3_neon"
832 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
833 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
834 (match_operand:VDQ 1 "s_register_operand" "w")))]
835 "TARGET_NEON"
836 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
837 [(set_attr "type" "neon_logic<q>")]
838 )
839
840 ;; TODO: investigate whether we should disable
841 ;; this and bicdi3_neon for the A8 in line with the other
842 ;; changes above.
843 (define_insn_and_split "orndi3_neon"
844 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
845 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
846 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
847 "TARGET_NEON"
848 "@
849 vorn\t%P0, %P1, %P2
850 #
851 #
852 #"
853 "reload_completed &&
854 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
855 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
856 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
857 "
858 {
859 if (TARGET_THUMB2)
860 {
861 operands[3] = gen_highpart (SImode, operands[0]);
862 operands[0] = gen_lowpart (SImode, operands[0]);
863 operands[4] = gen_highpart (SImode, operands[2]);
864 operands[2] = gen_lowpart (SImode, operands[2]);
865 operands[5] = gen_highpart (SImode, operands[1]);
866 operands[1] = gen_lowpart (SImode, operands[1]);
867 }
868 else
869 {
870 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
871 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
872 DONE;
873 }
874 }"
875 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
876 (set_attr "length" "*,16,8,8")
877 (set_attr "arch" "any,a,t2,t2")]
878 )
879
880 (define_insn "bic<mode>3_neon"
881 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
882 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
883 (match_operand:VDQ 1 "s_register_operand" "w")))]
884 "TARGET_NEON"
885 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
886 [(set_attr "type" "neon_logic<q>")]
887 )
888
889 ;; Compare to *anddi_notdi_di.
890 (define_insn "bicdi3_neon"
891 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
892 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
893 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
894 "TARGET_NEON"
895 "@
896 vbic\t%P0, %P1, %P2
897 #
898 #"
899 [(set_attr "type" "neon_logic,multiple,multiple")
900 (set_attr "length" "*,8,8")]
901 )
902
903 (define_insn "xor<mode>3"
904 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
905 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
906 (match_operand:VDQ 2 "s_register_operand" "w")))]
907 "TARGET_NEON"
908 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
909 [(set_attr "type" "neon_logic<q>")]
910 )
911
912 (define_insn "one_cmpl<mode>2"
913 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
914 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
915 "TARGET_NEON"
916 "vmvn\t%<V_reg>0, %<V_reg>1"
917 [(set_attr "type" "neon_move<q>")]
918 )
919
920 (define_insn "abs<mode>2"
921 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
922 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
923 "TARGET_NEON"
924 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
925 [(set (attr "type")
926 (if_then_else (match_test "<Is_float_mode>")
927 (const_string "neon_fp_abs_s<q>")
928 (const_string "neon_abs<q>")))]
929 )
930
931 (define_insn "neg<mode>2"
932 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
933 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
934 "TARGET_NEON"
935 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
936 [(set (attr "type")
937 (if_then_else (match_test "<Is_float_mode>")
938 (const_string "neon_fp_neg_s<q>")
939 (const_string "neon_neg<q>")))]
940 )
941
942 (define_insn "negdi2_neon"
943 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
944 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
945 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
946 (clobber (reg:CC CC_REGNUM))]
947 "TARGET_NEON"
948 "#"
949 [(set_attr "length" "8")
950 (set_attr "type" "multiple")]
951 )
952
953 ; Split negdi2_neon for vfp registers
954 (define_split
955 [(set (match_operand:DI 0 "s_register_operand" "")
956 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
957 (clobber (match_scratch:DI 2 ""))
958 (clobber (reg:CC CC_REGNUM))]
959 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
960 [(set (match_dup 2) (const_int 0))
961 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
962 (clobber (reg:CC CC_REGNUM))])]
963 {
964 if (!REG_P (operands[2]))
965 operands[2] = operands[0];
966 }
967 )
968
969 ; Split negdi2_neon for core registers
970 (define_split
971 [(set (match_operand:DI 0 "s_register_operand" "")
972 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
973 (clobber (match_scratch:DI 2 ""))
974 (clobber (reg:CC CC_REGNUM))]
975 "TARGET_32BIT && reload_completed
976 && arm_general_register_operand (operands[0], DImode)"
977 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
978 (clobber (reg:CC CC_REGNUM))])]
979 ""
980 )
981
982 (define_insn "<absneg_str><mode>2"
983 [(set (match_operand:VH 0 "s_register_operand" "=w")
984 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
985 "TARGET_NEON_FP16INST"
986 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
987 [(set_attr "type" "neon_abs<q>")]
988 )
989
990 (define_expand "neon_v<absneg_str><mode>"
991 [(set
992 (match_operand:VH 0 "s_register_operand")
993 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
994 "TARGET_NEON_FP16INST"
995 {
996 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
997 DONE;
998 })
999
1000 (define_insn "neon_v<fp16_rnd_str><mode>"
1001 [(set (match_operand:VH 0 "s_register_operand" "=w")
1002 (unspec:VH
1003 [(match_operand:VH 1 "s_register_operand" "w")]
1004 FP16_RND))]
1005 "TARGET_NEON_FP16INST"
1006 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
1007 [(set_attr "type" "neon_fp_round_s<q>")]
1008 )
1009
1010 (define_insn "neon_vrsqrte<mode>"
1011 [(set (match_operand:VH 0 "s_register_operand" "=w")
1012 (unspec:VH
1013 [(match_operand:VH 1 "s_register_operand" "w")]
1014 UNSPEC_VRSQRTE))]
1015 "TARGET_NEON_FP16INST"
1016 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
1017 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
1018 )
1019
1020 (define_insn "*umin<mode>3_neon"
1021 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1022 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1023 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1024 "TARGET_NEON"
1025 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1026 [(set_attr "type" "neon_minmax<q>")]
1027 )
1028
1029 (define_insn "*umax<mode>3_neon"
1030 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1031 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1032 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1033 "TARGET_NEON"
1034 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1035 [(set_attr "type" "neon_minmax<q>")]
1036 )
1037
1038 (define_insn "*smin<mode>3_neon"
1039 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1040 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1041 (match_operand:VDQW 2 "s_register_operand" "w")))]
1042 "TARGET_NEON"
1043 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1044 [(set (attr "type")
1045 (if_then_else (match_test "<Is_float_mode>")
1046 (const_string "neon_fp_minmax_s<q>")
1047 (const_string "neon_minmax<q>")))]
1048 )
1049
1050 (define_insn "*smax<mode>3_neon"
1051 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1052 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1053 (match_operand:VDQW 2 "s_register_operand" "w")))]
1054 "TARGET_NEON"
1055 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1056 [(set (attr "type")
1057 (if_then_else (match_test "<Is_float_mode>")
1058 (const_string "neon_fp_minmax_s<q>")
1059 (const_string "neon_minmax<q>")))]
1060 )
1061
1062 ; TODO: V2DI shifts are current disabled because there are bugs in the
1063 ; generic vectorizer code. It ends up creating a V2DI constructor with
1064 ; SImode elements.
1065
1066 (define_insn "vashl<mode>3"
1067 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1068 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1069 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1070 "TARGET_NEON"
1071 {
1072 switch (which_alternative)
1073 {
1074 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1075 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1076 <MODE>mode,
1077 VALID_NEON_QREG_MODE (<MODE>mode),
1078 true);
1079 default: gcc_unreachable ();
1080 }
1081 }
1082 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1083 )
1084
1085 (define_insn "vashr<mode>3_imm"
1086 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1087 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1088 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1089 "TARGET_NEON"
1090 {
1091 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1092 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1093 false);
1094 }
1095 [(set_attr "type" "neon_shift_imm<q>")]
1096 )
1097
1098 (define_insn "vlshr<mode>3_imm"
1099 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1100 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1101 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1102 "TARGET_NEON"
1103 {
1104 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1105 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1106 false);
1107 }
1108 [(set_attr "type" "neon_shift_imm<q>")]
1109 )
1110
1111 ; Used for implementing logical shift-right, which is a left-shift by a negative
1112 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1113 ; above, but using an unspec in case GCC tries anything tricky with negative
1114 ; shift amounts.
1115
1116 (define_insn "ashl<mode>3_signed"
1117 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1118 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1119 (match_operand:VDQI 2 "s_register_operand" "w")]
1120 UNSPEC_ASHIFT_SIGNED))]
1121 "TARGET_NEON"
1122 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1123 [(set_attr "type" "neon_shift_reg<q>")]
1124 )
1125
1126 ; Used for implementing logical shift-right, which is a left-shift by a negative
1127 ; amount, with unsigned operands.
1128
1129 (define_insn "ashl<mode>3_unsigned"
1130 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1131 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1132 (match_operand:VDQI 2 "s_register_operand" "w")]
1133 UNSPEC_ASHIFT_UNSIGNED))]
1134 "TARGET_NEON"
1135 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1136 [(set_attr "type" "neon_shift_reg<q>")]
1137 )
1138
1139 (define_expand "vashr<mode>3"
1140 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1141 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1142 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1143 "TARGET_NEON"
1144 {
1145 if (s_register_operand (operands[2], <MODE>mode))
1146 {
1147 rtx neg = gen_reg_rtx (<MODE>mode);
1148 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1149 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1150 }
1151 else
1152 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1153 DONE;
1154 })
1155
1156 (define_expand "vlshr<mode>3"
1157 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1158 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1159 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1160 "TARGET_NEON"
1161 {
1162 if (s_register_operand (operands[2], <MODE>mode))
1163 {
1164 rtx neg = gen_reg_rtx (<MODE>mode);
1165 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1166 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1167 }
1168 else
1169 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1170 DONE;
1171 })
1172
1173 ;; 64-bit shifts
1174
1175 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1176 ;; leaving the upper half uninitalized. This is OK since the shift
1177 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1178 ;; data flow analysis however, we pretend the full register is set
1179 ;; using an unspec.
1180 (define_insn "neon_load_count"
1181 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1182 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1183 UNSPEC_LOAD_COUNT))]
1184 "TARGET_NEON"
1185 "@
1186 vld1.32\t{%P0[0]}, %A1
1187 vmov.32\t%P0[0], %1"
1188 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1189 )
1190
1191 (define_insn "ashldi3_neon_noclobber"
1192 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1193 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1194 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1195 "TARGET_NEON && reload_completed
1196 && (!CONST_INT_P (operands[2])
1197 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1198 "@
1199 vshl.u64\t%P0, %P1, %2
1200 vshl.u64\t%P0, %P1, %P2"
1201 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1202 )
1203
1204 (define_insn_and_split "ashldi3_neon"
1205 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1206 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1207 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1208 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1209 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1210 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1211 (clobber (reg:CC_C CC_REGNUM))]
1212 "TARGET_NEON"
1213 "#"
1214 "TARGET_NEON && reload_completed"
1215 [(const_int 0)]
1216 "
1217 {
1218 if (IS_VFP_REGNUM (REGNO (operands[0])))
1219 {
1220 if (CONST_INT_P (operands[2]))
1221 {
1222 if (INTVAL (operands[2]) < 1)
1223 {
1224 emit_insn (gen_movdi (operands[0], operands[1]));
1225 DONE;
1226 }
1227 else if (INTVAL (operands[2]) > 63)
1228 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1229 }
1230 else
1231 {
1232 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1233 operands[2] = operands[5];
1234 }
1235
1236 /* Ditch the unnecessary clobbers. */
1237 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1238 operands[2]));
1239 }
1240 else
1241 {
1242 /* The shift expanders support either full overlap or no overlap. */
1243 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1244 || REGNO (operands[0]) == REGNO (operands[1]));
1245
1246 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1247 operands[2], operands[3], operands[4]);
1248 }
1249 DONE;
1250 }"
1251 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1252 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1253 (set_attr "type" "multiple")]
1254 )
1255
1256 ; The shift amount needs to be negated for right-shifts
1257 (define_insn "signed_shift_di3_neon"
1258 [(set (match_operand:DI 0 "s_register_operand" "=w")
1259 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1260 (match_operand:DI 2 "s_register_operand" " w")]
1261 UNSPEC_ASHIFT_SIGNED))]
1262 "TARGET_NEON && reload_completed"
1263 "vshl.s64\t%P0, %P1, %P2"
1264 [(set_attr "type" "neon_shift_reg")]
1265 )
1266
1267 ; The shift amount needs to be negated for right-shifts
1268 (define_insn "unsigned_shift_di3_neon"
1269 [(set (match_operand:DI 0 "s_register_operand" "=w")
1270 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1271 (match_operand:DI 2 "s_register_operand" " w")]
1272 UNSPEC_ASHIFT_UNSIGNED))]
1273 "TARGET_NEON && reload_completed"
1274 "vshl.u64\t%P0, %P1, %P2"
1275 [(set_attr "type" "neon_shift_reg")]
1276 )
1277
1278 (define_insn "ashrdi3_neon_imm_noclobber"
1279 [(set (match_operand:DI 0 "s_register_operand" "=w")
1280 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1281 (match_operand:DI 2 "const_int_operand" " i")))]
1282 "TARGET_NEON && reload_completed
1283 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1284 "vshr.s64\t%P0, %P1, %2"
1285 [(set_attr "type" "neon_shift_imm")]
1286 )
1287
1288 (define_insn "lshrdi3_neon_imm_noclobber"
1289 [(set (match_operand:DI 0 "s_register_operand" "=w")
1290 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1291 (match_operand:DI 2 "const_int_operand" " i")))]
1292 "TARGET_NEON && reload_completed
1293 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1294 "vshr.u64\t%P0, %P1, %2"
1295 [(set_attr "type" "neon_shift_imm")]
1296 )
1297
1298 ;; ashrdi3_neon
1299 ;; lshrdi3_neon
1300 (define_insn_and_split "<shift>di3_neon"
1301 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1302 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1303 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1304 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1305 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1306 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1307 (clobber (reg:CC CC_REGNUM))]
1308 "TARGET_NEON"
1309 "#"
1310 "TARGET_NEON && reload_completed"
1311 [(const_int 0)]
1312 "
1313 {
1314 if (IS_VFP_REGNUM (REGNO (operands[0])))
1315 {
1316 if (CONST_INT_P (operands[2]))
1317 {
1318 if (INTVAL (operands[2]) < 1)
1319 {
1320 emit_insn (gen_movdi (operands[0], operands[1]));
1321 DONE;
1322 }
1323 else if (INTVAL (operands[2]) > 64)
1324 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1325
1326 /* Ditch the unnecessary clobbers. */
1327 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1328 operands[1],
1329 operands[2]));
1330 }
1331 else
1332 {
1333 /* We must use a negative left-shift. */
1334 emit_insn (gen_negsi2 (operands[3], operands[2]));
1335 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1336 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1337 operands[5]));
1338 }
1339 }
1340 else
1341 {
1342 /* The shift expanders support either full overlap or no overlap. */
1343 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1344 || REGNO (operands[0]) == REGNO (operands[1]));
1345
1346 /* This clobbers CC (ASHIFTRT by register only). */
1347 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1348 operands[2], operands[3], operands[4]);
1349 }
1350
1351 DONE;
1352 }"
1353 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1354 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1355 (set_attr "type" "multiple")]
1356 )
1357
1358 ;; Widening operations
1359
1360 (define_expand "widen_ssum<mode>3"
1361 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1362 (plus:<V_double_width>
1363 (sign_extend:<V_double_width>
1364 (match_operand:VQI 1 "s_register_operand" ""))
1365 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1366 "TARGET_NEON"
1367 {
1368 machine_mode mode = GET_MODE (operands[1]);
1369 rtx p1, p2;
1370
1371 p1 = arm_simd_vect_par_cnst_half (mode, false);
1372 p2 = arm_simd_vect_par_cnst_half (mode, true);
1373
1374 if (operands[0] != operands[2])
1375 emit_move_insn (operands[0], operands[2]);
1376
1377 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1378 operands[1],
1379 p1,
1380 operands[0]));
1381 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1382 operands[1],
1383 p2,
1384 operands[0]));
1385 DONE;
1386 }
1387 )
1388
1389 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1390 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1391 (plus:<V_double_width>
1392 (sign_extend:<V_double_width>
1393 (vec_select:<V_HALF>
1394 (match_operand:VQI 1 "s_register_operand" "%w")
1395 (match_operand:VQI 2 "vect_par_constant_low" "")))
1396 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1397 "TARGET_NEON"
1398 {
1399 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1400 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1401 }
1402 [(set_attr "type" "neon_add_widen")])
1403
1404 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1405 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1406 (plus:<V_double_width>
1407 (sign_extend:<V_double_width>
1408 (vec_select:<V_HALF>
1409 (match_operand:VQI 1 "s_register_operand" "%w")
1410 (match_operand:VQI 2 "vect_par_constant_high" "")))
1411 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1412 "TARGET_NEON"
1413 {
1414 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1415 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1416 }
1417 [(set_attr "type" "neon_add_widen")])
1418
1419 (define_insn "widen_ssum<mode>3"
1420 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1421 (plus:<V_widen>
1422 (sign_extend:<V_widen>
1423 (match_operand:VW 1 "s_register_operand" "%w"))
1424 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1425 "TARGET_NEON"
1426 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1427 [(set_attr "type" "neon_add_widen")]
1428 )
1429
1430 (define_expand "widen_usum<mode>3"
1431 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1432 (plus:<V_double_width>
1433 (zero_extend:<V_double_width>
1434 (match_operand:VQI 1 "s_register_operand" ""))
1435 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1436 "TARGET_NEON"
1437 {
1438 machine_mode mode = GET_MODE (operands[1]);
1439 rtx p1, p2;
1440
1441 p1 = arm_simd_vect_par_cnst_half (mode, false);
1442 p2 = arm_simd_vect_par_cnst_half (mode, true);
1443
1444 if (operands[0] != operands[2])
1445 emit_move_insn (operands[0], operands[2]);
1446
1447 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1448 operands[1],
1449 p1,
1450 operands[0]));
1451 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1452 operands[1],
1453 p2,
1454 operands[0]));
1455 DONE;
1456 }
1457 )
1458
1459 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1460 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1461 (plus:<V_double_width>
1462 (zero_extend:<V_double_width>
1463 (vec_select:<V_HALF>
1464 (match_operand:VQI 1 "s_register_operand" "%w")
1465 (match_operand:VQI 2 "vect_par_constant_low" "")))
1466 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1467 "TARGET_NEON"
1468 {
1469 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1470 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1471 }
1472 [(set_attr "type" "neon_add_widen")])
1473
1474 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1475 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1476 (plus:<V_double_width>
1477 (zero_extend:<V_double_width>
1478 (vec_select:<V_HALF>
1479 (match_operand:VQI 1 "s_register_operand" "%w")
1480 (match_operand:VQI 2 "vect_par_constant_high" "")))
1481 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1482 "TARGET_NEON"
1483 {
1484 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1485 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1486 }
1487 [(set_attr "type" "neon_add_widen")])
1488
1489 (define_insn "widen_usum<mode>3"
1490 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1491 (plus:<V_widen> (zero_extend:<V_widen>
1492 (match_operand:VW 1 "s_register_operand" "%w"))
1493 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1494 "TARGET_NEON"
1495 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1496 [(set_attr "type" "neon_add_widen")]
1497 )
1498
1499 ;; Helpers for quad-word reduction operations
1500
1501 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1502 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1503 ; N/2-element vector.
1504
1505 (define_insn "quad_halves_<code>v4si"
1506 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1507 (VQH_OPS:V2SI
1508 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1509 (parallel [(const_int 0) (const_int 1)]))
1510 (vec_select:V2SI (match_dup 1)
1511 (parallel [(const_int 2) (const_int 3)]))))]
1512 "TARGET_NEON"
1513 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1514 [(set_attr "vqh_mnem" "<VQH_mnem>")
1515 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1516 )
1517
1518 (define_insn "quad_halves_<code>v4sf"
1519 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1520 (VQHS_OPS:V2SF
1521 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1522 (parallel [(const_int 0) (const_int 1)]))
1523 (vec_select:V2SF (match_dup 1)
1524 (parallel [(const_int 2) (const_int 3)]))))]
1525 "TARGET_NEON && flag_unsafe_math_optimizations"
1526 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1527 [(set_attr "vqh_mnem" "<VQH_mnem>")
1528 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1529 )
1530
1531 (define_insn "quad_halves_<code>v8hi"
1532 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1533 (VQH_OPS:V4HI
1534 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1535 (parallel [(const_int 0) (const_int 1)
1536 (const_int 2) (const_int 3)]))
1537 (vec_select:V4HI (match_dup 1)
1538 (parallel [(const_int 4) (const_int 5)
1539 (const_int 6) (const_int 7)]))))]
1540 "TARGET_NEON"
1541 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1542 [(set_attr "vqh_mnem" "<VQH_mnem>")
1543 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1544 )
1545
1546 (define_insn "quad_halves_<code>v16qi"
1547 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1548 (VQH_OPS:V8QI
1549 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1550 (parallel [(const_int 0) (const_int 1)
1551 (const_int 2) (const_int 3)
1552 (const_int 4) (const_int 5)
1553 (const_int 6) (const_int 7)]))
1554 (vec_select:V8QI (match_dup 1)
1555 (parallel [(const_int 8) (const_int 9)
1556 (const_int 10) (const_int 11)
1557 (const_int 12) (const_int 13)
1558 (const_int 14) (const_int 15)]))))]
1559 "TARGET_NEON"
1560 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1561 [(set_attr "vqh_mnem" "<VQH_mnem>")
1562 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1563 )
1564
1565 (define_expand "move_hi_quad_<mode>"
1566 [(match_operand:ANY128 0 "s_register_operand" "")
1567 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1568 "TARGET_NEON"
1569 {
1570 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1571 GET_MODE_SIZE (<V_HALF>mode)),
1572 operands[1]);
1573 DONE;
1574 })
1575
1576 (define_expand "move_lo_quad_<mode>"
1577 [(match_operand:ANY128 0 "s_register_operand" "")
1578 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1579 "TARGET_NEON"
1580 {
1581 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1582 <MODE>mode, 0),
1583 operands[1]);
1584 DONE;
1585 })
1586
1587 ;; Reduction operations
1588
1589 (define_expand "reduc_plus_scal_<mode>"
1590 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1591 (match_operand:VD 1 "s_register_operand" "")]
1592 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1593 {
1594 rtx vec = gen_reg_rtx (<MODE>mode);
1595 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1596 &gen_neon_vpadd_internal<mode>);
1597 /* The same result is actually computed into every element. */
1598 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1599 DONE;
1600 })
1601
1602 (define_expand "reduc_plus_scal_<mode>"
1603 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1604 (match_operand:VQ 1 "s_register_operand" "")]
1605 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1606 && !BYTES_BIG_ENDIAN"
1607 {
1608 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1609
1610 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1611 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1612
1613 DONE;
1614 })
1615
1616 (define_expand "reduc_plus_scal_v2di"
1617 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1618 (match_operand:V2DI 1 "s_register_operand" "")]
1619 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1620 {
1621 rtx vec = gen_reg_rtx (V2DImode);
1622
1623 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1624 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1625
1626 DONE;
1627 })
1628
1629 (define_insn "arm_reduc_plus_internal_v2di"
1630 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1631 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1632 UNSPEC_VPADD))]
1633 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1634 "vadd.i64\t%e0, %e1, %f1"
1635 [(set_attr "type" "neon_add_q")]
1636 )
1637
1638 (define_expand "reduc_smin_scal_<mode>"
1639 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1640 (match_operand:VD 1 "s_register_operand" "")]
1641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1642 {
1643 rtx vec = gen_reg_rtx (<MODE>mode);
1644
1645 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1646 &gen_neon_vpsmin<mode>);
1647 /* The result is computed into every element of the vector. */
1648 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1649 DONE;
1650 })
1651
1652 (define_expand "reduc_smin_scal_<mode>"
1653 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1654 (match_operand:VQ 1 "s_register_operand" "")]
1655 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1656 && !BYTES_BIG_ENDIAN"
1657 {
1658 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1659
1660 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1661 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1662
1663 DONE;
1664 })
1665
1666 (define_expand "reduc_smax_scal_<mode>"
1667 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1668 (match_operand:VD 1 "s_register_operand" "")]
1669 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1670 {
1671 rtx vec = gen_reg_rtx (<MODE>mode);
1672 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1673 &gen_neon_vpsmax<mode>);
1674 /* The result is computed into every element of the vector. */
1675 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1676 DONE;
1677 })
1678
1679 (define_expand "reduc_smax_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VQ 1 "s_register_operand" "")]
1682 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1683 && !BYTES_BIG_ENDIAN"
1684 {
1685 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1686
1687 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1688 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1689
1690 DONE;
1691 })
1692
1693 (define_expand "reduc_umin_scal_<mode>"
1694 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1695 (match_operand:VDI 1 "s_register_operand" "")]
1696 "TARGET_NEON"
1697 {
1698 rtx vec = gen_reg_rtx (<MODE>mode);
1699 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1700 &gen_neon_vpumin<mode>);
1701 /* The result is computed into every element of the vector. */
1702 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1703 DONE;
1704 })
1705
1706 (define_expand "reduc_umin_scal_<mode>"
1707 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1708 (match_operand:VQI 1 "s_register_operand" "")]
1709 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1710 {
1711 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1712
1713 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1714 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1715
1716 DONE;
1717 })
1718
1719 (define_expand "reduc_umax_scal_<mode>"
1720 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1721 (match_operand:VDI 1 "s_register_operand" "")]
1722 "TARGET_NEON"
1723 {
1724 rtx vec = gen_reg_rtx (<MODE>mode);
1725 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1726 &gen_neon_vpumax<mode>);
1727 /* The result is computed into every element of the vector. */
1728 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1729 DONE;
1730 })
1731
1732 (define_expand "reduc_umax_scal_<mode>"
1733 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1734 (match_operand:VQI 1 "s_register_operand" "")]
1735 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1736 {
1737 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1738
1739 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1740 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1741
1742 DONE;
1743 })
1744
1745 (define_insn "neon_vpadd_internal<mode>"
1746 [(set (match_operand:VD 0 "s_register_operand" "=w")
1747 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1748 (match_operand:VD 2 "s_register_operand" "w")]
1749 UNSPEC_VPADD))]
1750 "TARGET_NEON"
1751 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1752 ;; Assume this schedules like vadd.
1753 [(set (attr "type")
1754 (if_then_else (match_test "<Is_float_mode>")
1755 (const_string "neon_fp_reduc_add_s<q>")
1756 (const_string "neon_reduc_add<q>")))]
1757 )
1758
1759 (define_insn "neon_vpaddv4hf"
1760 [(set
1761 (match_operand:V4HF 0 "s_register_operand" "=w")
1762 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1763 (match_operand:V4HF 2 "s_register_operand" "w")]
1764 UNSPEC_VPADD))]
1765 "TARGET_NEON_FP16INST"
1766 "vpadd.f16\t%P0, %P1, %P2"
1767 [(set_attr "type" "neon_reduc_add")]
1768 )
1769
1770 (define_insn "neon_vpsmin<mode>"
1771 [(set (match_operand:VD 0 "s_register_operand" "=w")
1772 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1773 (match_operand:VD 2 "s_register_operand" "w")]
1774 UNSPEC_VPSMIN))]
1775 "TARGET_NEON"
1776 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1777 [(set (attr "type")
1778 (if_then_else (match_test "<Is_float_mode>")
1779 (const_string "neon_fp_reduc_minmax_s<q>")
1780 (const_string "neon_reduc_minmax<q>")))]
1781 )
1782
1783 (define_insn "neon_vpsmax<mode>"
1784 [(set (match_operand:VD 0 "s_register_operand" "=w")
1785 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1786 (match_operand:VD 2 "s_register_operand" "w")]
1787 UNSPEC_VPSMAX))]
1788 "TARGET_NEON"
1789 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1790 [(set (attr "type")
1791 (if_then_else (match_test "<Is_float_mode>")
1792 (const_string "neon_fp_reduc_minmax_s<q>")
1793 (const_string "neon_reduc_minmax<q>")))]
1794 )
1795
1796 (define_insn "neon_vpumin<mode>"
1797 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1798 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1799 (match_operand:VDI 2 "s_register_operand" "w")]
1800 UNSPEC_VPUMIN))]
1801 "TARGET_NEON"
1802 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1803 [(set_attr "type" "neon_reduc_minmax<q>")]
1804 )
1805
1806 (define_insn "neon_vpumax<mode>"
1807 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1808 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1809 (match_operand:VDI 2 "s_register_operand" "w")]
1810 UNSPEC_VPUMAX))]
1811 "TARGET_NEON"
1812 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1813 [(set_attr "type" "neon_reduc_minmax<q>")]
1814 )
1815
1816 ;; Saturating arithmetic
1817
1818 ; NOTE: Neon supports many more saturating variants of instructions than the
1819 ; following, but these are all GCC currently understands.
1820 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1821 ; yet either, although these patterns may be used by intrinsics when they're
1822 ; added.
1823
1824 (define_insn "*ss_add<mode>_neon"
1825 [(set (match_operand:VD 0 "s_register_operand" "=w")
1826 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1827 (match_operand:VD 2 "s_register_operand" "w")))]
1828 "TARGET_NEON"
1829 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1830 [(set_attr "type" "neon_qadd<q>")]
1831 )
1832
1833 (define_insn "*us_add<mode>_neon"
1834 [(set (match_operand:VD 0 "s_register_operand" "=w")
1835 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1836 (match_operand:VD 2 "s_register_operand" "w")))]
1837 "TARGET_NEON"
1838 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1839 [(set_attr "type" "neon_qadd<q>")]
1840 )
1841
1842 (define_insn "*ss_sub<mode>_neon"
1843 [(set (match_operand:VD 0 "s_register_operand" "=w")
1844 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1845 (match_operand:VD 2 "s_register_operand" "w")))]
1846 "TARGET_NEON"
1847 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1848 [(set_attr "type" "neon_qsub<q>")]
1849 )
1850
1851 (define_insn "*us_sub<mode>_neon"
1852 [(set (match_operand:VD 0 "s_register_operand" "=w")
1853 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1854 (match_operand:VD 2 "s_register_operand" "w")))]
1855 "TARGET_NEON"
1856 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1857 [(set_attr "type" "neon_qsub<q>")]
1858 )
1859
1860 ;; Conditional instructions. These are comparisons with conditional moves for
1861 ;; vectors. They perform the assignment:
1862 ;;
1863 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1864 ;;
1865 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1866 ;; element-wise.
1867
1868 (define_expand "vcond<mode><mode>"
1869 [(set (match_operand:VDQW 0 "s_register_operand" "")
1870 (if_then_else:VDQW
1871 (match_operator 3 "comparison_operator"
1872 [(match_operand:VDQW 4 "s_register_operand" "")
1873 (match_operand:VDQW 5 "nonmemory_operand" "")])
1874 (match_operand:VDQW 1 "s_register_operand" "")
1875 (match_operand:VDQW 2 "s_register_operand" "")))]
1876 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1877 {
1878 int inverse = 0;
1879 int use_zero_form = 0;
1880 int swap_bsl_operands = 0;
1881 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1882 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1883
1884 rtx (*base_comparison) (rtx, rtx, rtx);
1885 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1886
1887 switch (GET_CODE (operands[3]))
1888 {
1889 case GE:
1890 case GT:
1891 case LE:
1892 case LT:
1893 case EQ:
1894 if (operands[5] == CONST0_RTX (<MODE>mode))
1895 {
1896 use_zero_form = 1;
1897 break;
1898 }
1899 /* Fall through. */
1900 default:
1901 if (!REG_P (operands[5]))
1902 operands[5] = force_reg (<MODE>mode, operands[5]);
1903 }
1904
1905 switch (GET_CODE (operands[3]))
1906 {
1907 case LT:
1908 case UNLT:
1909 inverse = 1;
1910 /* Fall through. */
1911 case GE:
1912 case UNGE:
1913 case ORDERED:
1914 case UNORDERED:
1915 base_comparison = gen_neon_vcge<mode>;
1916 complimentary_comparison = gen_neon_vcgt<mode>;
1917 break;
1918 case LE:
1919 case UNLE:
1920 inverse = 1;
1921 /* Fall through. */
1922 case GT:
1923 case UNGT:
1924 base_comparison = gen_neon_vcgt<mode>;
1925 complimentary_comparison = gen_neon_vcge<mode>;
1926 break;
1927 case EQ:
1928 case NE:
1929 case UNEQ:
1930 base_comparison = gen_neon_vceq<mode>;
1931 complimentary_comparison = gen_neon_vceq<mode>;
1932 break;
1933 default:
1934 gcc_unreachable ();
1935 }
1936
1937 switch (GET_CODE (operands[3]))
1938 {
1939 case LT:
1940 case LE:
1941 case GT:
1942 case GE:
1943 case EQ:
1944 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1945 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1946 a GE b -> a GE b
1947 a GT b -> a GT b
1948 a LE b -> b GE a
1949 a LT b -> b GT a
1950 a EQ b -> a EQ b
1951 Note that there also exist direct comparison against 0 forms,
1952 so catch those as a special case. */
1953 if (use_zero_form)
1954 {
1955 inverse = 0;
1956 switch (GET_CODE (operands[3]))
1957 {
1958 case LT:
1959 base_comparison = gen_neon_vclt<mode>;
1960 break;
1961 case LE:
1962 base_comparison = gen_neon_vcle<mode>;
1963 break;
1964 default:
1965 /* Do nothing, other zero form cases already have the correct
1966 base_comparison. */
1967 break;
1968 }
1969 }
1970
1971 if (!inverse)
1972 emit_insn (base_comparison (mask, operands[4], operands[5]));
1973 else
1974 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1975 break;
1976 case UNLT:
1977 case UNLE:
1978 case UNGT:
1979 case UNGE:
1980 case NE:
1981 /* Vector compare returns false for lanes which are unordered, so if we use
1982 the inverse of the comparison we actually want to emit, then
1983 swap the operands to BSL, we will end up with the correct result.
1984 Note that a NE NaN and NaN NE b are true for all a, b.
1985
1986 Our transformations are:
1987 a GE b -> !(b GT a)
1988 a GT b -> !(b GE a)
1989 a LE b -> !(a GT b)
1990 a LT b -> !(a GE b)
1991 a NE b -> !(a EQ b) */
1992
1993 if (inverse)
1994 emit_insn (base_comparison (mask, operands[4], operands[5]));
1995 else
1996 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1997
1998 swap_bsl_operands = 1;
1999 break;
2000 case UNEQ:
2001 /* We check (a > b || b > a). combining these comparisons give us
2002 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2003 will then give us (a == b || a UNORDERED b) as intended. */
2004
2005 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
2006 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
2007 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2008 swap_bsl_operands = 1;
2009 break;
2010 case UNORDERED:
2011 /* Operands are ORDERED iff (a > b || b >= a).
2012 Swapping the operands to BSL will give the UNORDERED case. */
2013 swap_bsl_operands = 1;
2014 /* Fall through. */
2015 case ORDERED:
2016 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2017 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2018 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2019 break;
2020 default:
2021 gcc_unreachable ();
2022 }
2023
2024 if (swap_bsl_operands)
2025 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2026 operands[1]));
2027 else
2028 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2029 operands[2]));
2030 DONE;
2031 })
2032
2033 (define_expand "vcondu<mode><mode>"
2034 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2035 (if_then_else:VDQIW
2036 (match_operator 3 "arm_comparison_operator"
2037 [(match_operand:VDQIW 4 "s_register_operand" "")
2038 (match_operand:VDQIW 5 "s_register_operand" "")])
2039 (match_operand:VDQIW 1 "s_register_operand" "")
2040 (match_operand:VDQIW 2 "s_register_operand" "")))]
2041 "TARGET_NEON"
2042 {
2043 rtx mask;
2044 int inverse = 0, immediate_zero = 0;
2045
2046 mask = gen_reg_rtx (<V_cmp_result>mode);
2047
2048 if (operands[5] == CONST0_RTX (<MODE>mode))
2049 immediate_zero = 1;
2050 else if (!REG_P (operands[5]))
2051 operands[5] = force_reg (<MODE>mode, operands[5]);
2052
2053 switch (GET_CODE (operands[3]))
2054 {
2055 case GEU:
2056 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2057 break;
2058
2059 case GTU:
2060 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2061 break;
2062
2063 case EQ:
2064 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2065 break;
2066
2067 case LEU:
2068 if (immediate_zero)
2069 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2070 else
2071 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2072 break;
2073
2074 case LTU:
2075 if (immediate_zero)
2076 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2077 else
2078 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2079 break;
2080
2081 case NE:
2082 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2083 inverse = 1;
2084 break;
2085
2086 default:
2087 gcc_unreachable ();
2088 }
2089
2090 if (inverse)
2091 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2092 operands[1]));
2093 else
2094 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2095 operands[2]));
2096
2097 DONE;
2098 })
2099
2100 ;; Patterns for builtins.
2101
2102 ; good for plain vadd, vaddq.
2103
2104 (define_expand "neon_vadd<mode>"
2105 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2106 (match_operand:VCVTF 1 "s_register_operand" "w")
2107 (match_operand:VCVTF 2 "s_register_operand" "w")]
2108 "TARGET_NEON"
2109 {
2110 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2111 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2112 else
2113 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2114 operands[2]));
2115 DONE;
2116 })
2117
2118 (define_expand "neon_vadd<mode>"
2119 [(match_operand:VH 0 "s_register_operand")
2120 (match_operand:VH 1 "s_register_operand")
2121 (match_operand:VH 2 "s_register_operand")]
2122 "TARGET_NEON_FP16INST"
2123 {
2124 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2125 DONE;
2126 })
2127
2128 (define_expand "neon_vsub<mode>"
2129 [(match_operand:VH 0 "s_register_operand")
2130 (match_operand:VH 1 "s_register_operand")
2131 (match_operand:VH 2 "s_register_operand")]
2132 "TARGET_NEON_FP16INST"
2133 {
2134 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2135 DONE;
2136 })
2137
2138 ; Note that NEON operations don't support the full IEEE 754 standard: in
2139 ; particular, denormal values are flushed to zero. This means that GCC cannot
2140 ; use those instructions for autovectorization, etc. unless
2141 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2142 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2143 ; header) must work in either case: if -funsafe-math-optimizations is given,
2144 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2145 ; expand to unspecs (which may potentially limit the extent to which they might
2146 ; be optimized by generic code).
2147
2148 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2149
2150 (define_insn "neon_vadd<mode>_unspec"
2151 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2152 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2153 (match_operand:VCVTF 2 "s_register_operand" "w")]
2154 UNSPEC_VADD))]
2155 "TARGET_NEON"
2156 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2157 [(set (attr "type")
2158 (if_then_else (match_test "<Is_float_mode>")
2159 (const_string "neon_fp_addsub_s<q>")
2160 (const_string "neon_add<q>")))]
2161 )
2162
2163 (define_insn "neon_vaddl<sup><mode>"
2164 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2165 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2166 (match_operand:VDI 2 "s_register_operand" "w")]
2167 VADDL))]
2168 "TARGET_NEON"
2169 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2170 [(set_attr "type" "neon_add_long")]
2171 )
2172
2173 (define_insn "neon_vaddw<sup><mode>"
2174 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2175 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2176 (match_operand:VDI 2 "s_register_operand" "w")]
2177 VADDW))]
2178 "TARGET_NEON"
2179 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2180 [(set_attr "type" "neon_add_widen")]
2181 )
2182
2183 ; vhadd and vrhadd.
2184
2185 (define_insn "neon_v<r>hadd<sup><mode>"
2186 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2187 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2188 (match_operand:VDQIW 2 "s_register_operand" "w")]
2189 VHADD))]
2190 "TARGET_NEON"
2191 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2192 [(set_attr "type" "neon_add_halve_q")]
2193 )
2194
2195 (define_insn "neon_vqadd<sup><mode>"
2196 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2197 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2198 (match_operand:VDQIX 2 "s_register_operand" "w")]
2199 VQADD))]
2200 "TARGET_NEON"
2201 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2202 [(set_attr "type" "neon_qadd<q>")]
2203 )
2204
2205 (define_insn "neon_v<r>addhn<mode>"
2206 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2207 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2208 (match_operand:VN 2 "s_register_operand" "w")]
2209 VADDHN))]
2210 "TARGET_NEON"
2211 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2212 [(set_attr "type" "neon_add_halve_narrow_q")]
2213 )
2214
2215 ;; Polynomial and Float multiplication.
2216 (define_insn "neon_vmul<pf><mode>"
2217 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2218 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2219 (match_operand:VPF 2 "s_register_operand" "w")]
2220 UNSPEC_VMUL))]
2221 "TARGET_NEON"
2222 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2223 [(set (attr "type")
2224 (if_then_else (match_test "<Is_float_mode>")
2225 (const_string "neon_fp_mul_s<q>")
2226 (const_string "neon_mul_<V_elem_ch><q>")))]
2227 )
2228
2229 (define_insn "mul<mode>3"
2230 [(set
2231 (match_operand:VH 0 "s_register_operand" "=w")
2232 (mult:VH
2233 (match_operand:VH 1 "s_register_operand" "w")
2234 (match_operand:VH 2 "s_register_operand" "w")))]
2235 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2236 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2237 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2238 )
2239
2240 (define_insn "neon_vmulf<mode>"
2241 [(set
2242 (match_operand:VH 0 "s_register_operand" "=w")
2243 (mult:VH
2244 (match_operand:VH 1 "s_register_operand" "w")
2245 (match_operand:VH 2 "s_register_operand" "w")))]
2246 "TARGET_NEON_FP16INST"
2247 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2248 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2249 )
2250
2251 (define_expand "neon_vmla<mode>"
2252 [(match_operand:VDQW 0 "s_register_operand" "=w")
2253 (match_operand:VDQW 1 "s_register_operand" "0")
2254 (match_operand:VDQW 2 "s_register_operand" "w")
2255 (match_operand:VDQW 3 "s_register_operand" "w")]
2256 "TARGET_NEON"
2257 {
2258 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2259 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2260 operands[2], operands[3]));
2261 else
2262 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2263 operands[2], operands[3]));
2264 DONE;
2265 })
2266
2267 (define_expand "neon_vfma<VCVTF:mode>"
2268 [(match_operand:VCVTF 0 "s_register_operand")
2269 (match_operand:VCVTF 1 "s_register_operand")
2270 (match_operand:VCVTF 2 "s_register_operand")
2271 (match_operand:VCVTF 3 "s_register_operand")]
2272 "TARGET_NEON && TARGET_FMA"
2273 {
2274 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2275 operands[1]));
2276 DONE;
2277 })
2278
2279 (define_expand "neon_vfma<VH:mode>"
2280 [(match_operand:VH 0 "s_register_operand")
2281 (match_operand:VH 1 "s_register_operand")
2282 (match_operand:VH 2 "s_register_operand")
2283 (match_operand:VH 3 "s_register_operand")]
2284 "TARGET_NEON_FP16INST"
2285 {
2286 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2287 operands[1]));
2288 DONE;
2289 })
2290
2291 (define_expand "neon_vfms<VCVTF:mode>"
2292 [(match_operand:VCVTF 0 "s_register_operand")
2293 (match_operand:VCVTF 1 "s_register_operand")
2294 (match_operand:VCVTF 2 "s_register_operand")
2295 (match_operand:VCVTF 3 "s_register_operand")]
2296 "TARGET_NEON && TARGET_FMA"
2297 {
2298 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2299 operands[1]));
2300 DONE;
2301 })
2302
2303 (define_expand "neon_vfms<VH:mode>"
2304 [(match_operand:VH 0 "s_register_operand")
2305 (match_operand:VH 1 "s_register_operand")
2306 (match_operand:VH 2 "s_register_operand")
2307 (match_operand:VH 3 "s_register_operand")]
2308 "TARGET_NEON_FP16INST"
2309 {
2310 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2311 operands[1]));
2312 DONE;
2313 })
2314
2315 ;; The expand RTL structure here is not important.
2316 ;; We use the gen_* functions anyway.
2317 ;; We just need something to wrap the iterators around.
2318
2319 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2320 [(set (match_operand:VCVTF 0 "s_register_operand")
2321 (unspec:VCVTF
2322 [(match_operand:VCVTF 1 "s_register_operand")
2323 (PLUSMINUS:<VFML>
2324 (match_operand:<VFML> 2 "s_register_operand")
2325 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2326 "TARGET_FP16FML"
2327 {
2328 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2329 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2330 operands[1],
2331 operands[2],
2332 operands[3],
2333 half, half));
2334 DONE;
2335 })
2336
2337 (define_insn "vfmal_low<mode>_intrinsic"
2338 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2339 (fma:VCVTF
2340 (float_extend:VCVTF
2341 (vec_select:<VFMLSEL>
2342 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2343 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2344 (float_extend:VCVTF
2345 (vec_select:<VFMLSEL>
2346 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2347 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2348 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2349 "TARGET_FP16FML"
2350 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2351 [(set_attr "type" "neon_fp_mla_s<q>")]
2352 )
2353
2354 (define_insn "vfmsl_high<mode>_intrinsic"
2355 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2356 (fma:VCVTF
2357 (float_extend:VCVTF
2358 (neg:<VFMLSEL>
2359 (vec_select:<VFMLSEL>
2360 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2361 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2362 (float_extend:VCVTF
2363 (vec_select:<VFMLSEL>
2364 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2365 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2366 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2367 "TARGET_FP16FML"
2368 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2369 [(set_attr "type" "neon_fp_mla_s<q>")]
2370 )
2371
2372 (define_insn "vfmal_high<mode>_intrinsic"
2373 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2374 (fma:VCVTF
2375 (float_extend:VCVTF
2376 (vec_select:<VFMLSEL>
2377 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2378 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2379 (float_extend:VCVTF
2380 (vec_select:<VFMLSEL>
2381 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2382 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2383 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2384 "TARGET_FP16FML"
2385 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2386 [(set_attr "type" "neon_fp_mla_s<q>")]
2387 )
2388
2389 (define_insn "vfmsl_low<mode>_intrinsic"
2390 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2391 (fma:VCVTF
2392 (float_extend:VCVTF
2393 (neg:<VFMLSEL>
2394 (vec_select:<VFMLSEL>
2395 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2396 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2397 (float_extend:VCVTF
2398 (vec_select:<VFMLSEL>
2399 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2400 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2401 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2402 "TARGET_FP16FML"
2403 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2404 [(set_attr "type" "neon_fp_mla_s<q>")]
2405 )
2406
2407 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2408 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2409 (unspec:VCVTF
2410 [(match_operand:VCVTF 1 "s_register_operand")
2411 (PLUSMINUS:<VFML>
2412 (match_operand:<VFML> 2 "s_register_operand")
2413 (match_operand:<VFML> 3 "s_register_operand"))
2414 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2415 "TARGET_FP16FML"
2416 {
2417 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2418 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2419 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2420 (operands[0], operands[1],
2421 operands[2], operands[3],
2422 half, lane));
2423 DONE;
2424 })
2425
2426 (define_insn "vfmal_lane_low<mode>_intrinsic"
2427 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2428 (fma:VCVTF
2429 (float_extend:VCVTF
2430 (vec_select:<VFMLSEL>
2431 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2432 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2433 (float_extend:VCVTF
2434 (vec_duplicate:<VFMLSEL>
2435 (vec_select:HF
2436 (match_operand:<VFML> 3 "s_register_operand" "x")
2437 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2438 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2439 "TARGET_FP16FML"
2440 {
2441 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2442 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2443 {
2444 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2445 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2446 }
2447 else
2448 {
2449 operands[5] = GEN_INT (lane);
2450 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2451 }
2452 }
2453 [(set_attr "type" "neon_fp_mla_s<q>")]
2454 )
2455
2456 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2457 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2458 (unspec:VCVTF
2459 [(match_operand:VCVTF 1 "s_register_operand")
2460 (PLUSMINUS:<VFML>
2461 (match_operand:<VFML> 2 "s_register_operand")
2462 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2463 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2464 "TARGET_FP16FML"
2465 {
2466 rtx lane
2467 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2468 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2469 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2470 (operands[0], operands[1], operands[2], operands[3],
2471 half, lane));
2472 DONE;
2473 })
2474
2475 ;; Used to implement the intrinsics:
2476 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2477 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2478 ;; Needs a bit of care to get the modes of the different sub-expressions right
2479 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2480 ;; S or D subregister to select the appropriate lane from.
2481
2482 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2483 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2484 (fma:VCVTF
2485 (float_extend:VCVTF
2486 (vec_select:<VFMLSEL>
2487 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2488 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2489 (float_extend:VCVTF
2490 (vec_duplicate:<VFMLSEL>
2491 (vec_select:HF
2492 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2493 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2494 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2495 "TARGET_FP16FML"
2496 {
2497 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2498 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2499 int new_lane = lane % elts_per_reg;
2500 int regdiff = lane / elts_per_reg;
2501 operands[5] = GEN_INT (new_lane);
2502 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2503 because we want the print_operand code to print the appropriate
2504 S or D register prefix. */
2505 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2506 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2507 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2508 }
2509 [(set_attr "type" "neon_fp_mla_s<q>")]
2510 )
2511
2512 ;; Used to implement the intrinsics:
2513 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2514 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2515 ;; Needs a bit of care to get the modes of the different sub-expressions right
2516 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2517 ;; S or D subregister to select the appropriate lane from.
2518
2519 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2520 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2521 (fma:VCVTF
2522 (float_extend:VCVTF
2523 (vec_select:<VFMLSEL>
2524 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2525 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2526 (float_extend:VCVTF
2527 (vec_duplicate:<VFMLSEL>
2528 (vec_select:HF
2529 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2530 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2531 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2532 "TARGET_FP16FML"
2533 {
2534 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2535 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2536 int new_lane = lane % elts_per_reg;
2537 int regdiff = lane / elts_per_reg;
2538 operands[5] = GEN_INT (new_lane);
2539 /* We re-create operands[3] in the halved VFMLSEL mode
2540 because we've calculated the correct half-width subreg to extract
2541 the lane from and we want to print *that* subreg instead. */
2542 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2543 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2544 }
2545 [(set_attr "type" "neon_fp_mla_s<q>")]
2546 )
2547
2548 (define_insn "vfmal_lane_high<mode>_intrinsic"
2549 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2550 (fma:VCVTF
2551 (float_extend:VCVTF
2552 (vec_select:<VFMLSEL>
2553 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2554 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2555 (float_extend:VCVTF
2556 (vec_duplicate:<VFMLSEL>
2557 (vec_select:HF
2558 (match_operand:<VFML> 3 "s_register_operand" "x")
2559 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2560 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2561 "TARGET_FP16FML"
2562 {
2563 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2564 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2565 {
2566 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2567 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2568 }
2569 else
2570 {
2571 operands[5] = GEN_INT (lane);
2572 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2573 }
2574 }
2575 [(set_attr "type" "neon_fp_mla_s<q>")]
2576 )
2577
2578 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2579 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2580 (fma:VCVTF
2581 (float_extend:VCVTF
2582 (neg:<VFMLSEL>
2583 (vec_select:<VFMLSEL>
2584 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2585 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2586 (float_extend:VCVTF
2587 (vec_duplicate:<VFMLSEL>
2588 (vec_select:HF
2589 (match_operand:<VFML> 3 "s_register_operand" "x")
2590 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2591 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2592 "TARGET_FP16FML"
2593 {
2594 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2595 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2596 {
2597 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2598 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2599 }
2600 else
2601 {
2602 operands[5] = GEN_INT (lane);
2603 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2604 }
2605 }
2606 [(set_attr "type" "neon_fp_mla_s<q>")]
2607 )
2608
2609 ;; Used to implement the intrinsics:
2610 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2611 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2612 ;; Needs a bit of care to get the modes of the different sub-expressions right
2613 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2614 ;; S or D subregister to select the appropriate lane from.
2615
2616 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2617 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2618 (fma:VCVTF
2619 (float_extend:VCVTF
2620 (neg:<VFMLSEL>
2621 (vec_select:<VFMLSEL>
2622 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2623 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2624 (float_extend:VCVTF
2625 (vec_duplicate:<VFMLSEL>
2626 (vec_select:HF
2627 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2628 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2629 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2630 "TARGET_FP16FML"
2631 {
2632 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2633 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2634 int new_lane = lane % elts_per_reg;
2635 int regdiff = lane / elts_per_reg;
2636 operands[5] = GEN_INT (new_lane);
2637 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2638 because we want the print_operand code to print the appropriate
2639 S or D register prefix. */
2640 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2641 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2642 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2643 }
2644 [(set_attr "type" "neon_fp_mla_s<q>")]
2645 )
2646
2647 ;; Used to implement the intrinsics:
2648 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2649 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2650 ;; Needs a bit of care to get the modes of the different sub-expressions right
2651 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2652 ;; S or D subregister to select the appropriate lane from.
2653
2654 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2655 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2656 (fma:VCVTF
2657 (float_extend:VCVTF
2658 (neg:<VFMLSEL>
2659 (vec_select:<VFMLSEL>
2660 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2661 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2662 (float_extend:VCVTF
2663 (vec_duplicate:<VFMLSEL>
2664 (vec_select:HF
2665 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2666 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2667 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2668 "TARGET_FP16FML"
2669 {
2670 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2671 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2672 int new_lane = lane % elts_per_reg;
2673 int regdiff = lane / elts_per_reg;
2674 operands[5] = GEN_INT (new_lane);
2675 /* We re-create operands[3] in the halved VFMLSEL mode
2676 because we've calculated the correct half-width subreg to extract
2677 the lane from and we want to print *that* subreg instead. */
2678 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2679 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2680 }
2681 [(set_attr "type" "neon_fp_mla_s<q>")]
2682 )
2683
2684 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2685 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2686 (fma:VCVTF
2687 (float_extend:VCVTF
2688 (neg:<VFMLSEL>
2689 (vec_select:<VFMLSEL>
2690 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2691 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2692 (float_extend:VCVTF
2693 (vec_duplicate:<VFMLSEL>
2694 (vec_select:HF
2695 (match_operand:<VFML> 3 "s_register_operand" "x")
2696 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2697 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2698 "TARGET_FP16FML"
2699 {
2700 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2701 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2702 {
2703 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2704 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2705 }
2706 else
2707 {
2708 operands[5] = GEN_INT (lane);
2709 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2710 }
2711 }
2712 [(set_attr "type" "neon_fp_mla_s<q>")]
2713 )
2714
2715 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2716
2717 (define_insn "neon_vmla<mode>_unspec"
2718 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2719 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2720 (match_operand:VDQW 2 "s_register_operand" "w")
2721 (match_operand:VDQW 3 "s_register_operand" "w")]
2722 UNSPEC_VMLA))]
2723 "TARGET_NEON"
2724 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2725 [(set (attr "type")
2726 (if_then_else (match_test "<Is_float_mode>")
2727 (const_string "neon_fp_mla_s<q>")
2728 (const_string "neon_mla_<V_elem_ch><q>")))]
2729 )
2730
2731 (define_insn "neon_vmlal<sup><mode>"
2732 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2733 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2734 (match_operand:VW 2 "s_register_operand" "w")
2735 (match_operand:VW 3 "s_register_operand" "w")]
2736 VMLAL))]
2737 "TARGET_NEON"
2738 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2739 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2740 )
2741
2742 (define_expand "neon_vmls<mode>"
2743 [(match_operand:VDQW 0 "s_register_operand" "=w")
2744 (match_operand:VDQW 1 "s_register_operand" "0")
2745 (match_operand:VDQW 2 "s_register_operand" "w")
2746 (match_operand:VDQW 3 "s_register_operand" "w")]
2747 "TARGET_NEON"
2748 {
2749 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2750 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2751 operands[1], operands[2], operands[3]));
2752 else
2753 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2754 operands[2], operands[3]));
2755 DONE;
2756 })
2757
2758 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2759
2760 (define_insn "neon_vmls<mode>_unspec"
2761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2762 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2763 (match_operand:VDQW 2 "s_register_operand" "w")
2764 (match_operand:VDQW 3 "s_register_operand" "w")]
2765 UNSPEC_VMLS))]
2766 "TARGET_NEON"
2767 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2768 [(set (attr "type")
2769 (if_then_else (match_test "<Is_float_mode>")
2770 (const_string "neon_fp_mla_s<q>")
2771 (const_string "neon_mla_<V_elem_ch><q>")))]
2772 )
2773
2774 (define_insn "neon_vmlsl<sup><mode>"
2775 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2776 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2777 (match_operand:VW 2 "s_register_operand" "w")
2778 (match_operand:VW 3 "s_register_operand" "w")]
2779 VMLSL))]
2780 "TARGET_NEON"
2781 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2782 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2783 )
2784
2785 ;; vqdmulh, vqrdmulh
2786 (define_insn "neon_vq<r>dmulh<mode>"
2787 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2788 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2789 (match_operand:VMDQI 2 "s_register_operand" "w")]
2790 VQDMULH))]
2791 "TARGET_NEON"
2792 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2793 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2794 )
2795
2796 ;; vqrdmlah, vqrdmlsh
2797 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2798 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2799 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2800 (match_operand:VMDQI 2 "s_register_operand" "w")
2801 (match_operand:VMDQI 3 "s_register_operand" "w")]
2802 VQRDMLH_AS))]
2803 "TARGET_NEON_RDMA"
2804 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2805 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2806 )
2807
2808 (define_insn "neon_vqdmlal<mode>"
2809 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2810 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2811 (match_operand:VMDI 2 "s_register_operand" "w")
2812 (match_operand:VMDI 3 "s_register_operand" "w")]
2813 UNSPEC_VQDMLAL))]
2814 "TARGET_NEON"
2815 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2816 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2817 )
2818
2819 (define_insn "neon_vqdmlsl<mode>"
2820 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2821 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2822 (match_operand:VMDI 2 "s_register_operand" "w")
2823 (match_operand:VMDI 3 "s_register_operand" "w")]
2824 UNSPEC_VQDMLSL))]
2825 "TARGET_NEON"
2826 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2827 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2828 )
2829
2830 (define_insn "neon_vmull<sup><mode>"
2831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2832 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2833 (match_operand:VW 2 "s_register_operand" "w")]
2834 VMULL))]
2835 "TARGET_NEON"
2836 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2837 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2838 )
2839
2840 (define_insn "neon_vqdmull<mode>"
2841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2842 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2843 (match_operand:VMDI 2 "s_register_operand" "w")]
2844 UNSPEC_VQDMULL))]
2845 "TARGET_NEON"
2846 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2847 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2848 )
2849
2850 (define_expand "neon_vsub<mode>"
2851 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2852 (match_operand:VCVTF 1 "s_register_operand" "w")
2853 (match_operand:VCVTF 2 "s_register_operand" "w")]
2854 "TARGET_NEON"
2855 {
2856 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2857 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2858 else
2859 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2860 operands[2]));
2861 DONE;
2862 })
2863
2864 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2865
2866 (define_insn "neon_vsub<mode>_unspec"
2867 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2868 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2869 (match_operand:VCVTF 2 "s_register_operand" "w")]
2870 UNSPEC_VSUB))]
2871 "TARGET_NEON"
2872 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2873 [(set (attr "type")
2874 (if_then_else (match_test "<Is_float_mode>")
2875 (const_string "neon_fp_addsub_s<q>")
2876 (const_string "neon_sub<q>")))]
2877 )
2878
2879 (define_insn "neon_vsubl<sup><mode>"
2880 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2881 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2882 (match_operand:VDI 2 "s_register_operand" "w")]
2883 VSUBL))]
2884 "TARGET_NEON"
2885 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2886 [(set_attr "type" "neon_sub_long")]
2887 )
2888
2889 (define_insn "neon_vsubw<sup><mode>"
2890 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2891 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2892 (match_operand:VDI 2 "s_register_operand" "w")]
2893 VSUBW))]
2894 "TARGET_NEON"
2895 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2896 [(set_attr "type" "neon_sub_widen")]
2897 )
2898
2899 (define_insn "neon_vqsub<sup><mode>"
2900 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2901 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2902 (match_operand:VDQIX 2 "s_register_operand" "w")]
2903 VQSUB))]
2904 "TARGET_NEON"
2905 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_qsub<q>")]
2907 )
2908
2909 (define_insn "neon_vhsub<sup><mode>"
2910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2911 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2912 (match_operand:VDQIW 2 "s_register_operand" "w")]
2913 VHSUB))]
2914 "TARGET_NEON"
2915 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2916 [(set_attr "type" "neon_sub_halve<q>")]
2917 )
2918
2919 (define_insn "neon_v<r>subhn<mode>"
2920 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2921 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2922 (match_operand:VN 2 "s_register_operand" "w")]
2923 VSUBHN))]
2924 "TARGET_NEON"
2925 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2926 [(set_attr "type" "neon_sub_halve_narrow_q")]
2927 )
2928
2929 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2930 ;; without unsafe math optimizations.
2931 (define_expand "neon_vc<cmp_op><mode>"
2932 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2933 (neg:<V_cmp_result>
2934 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2935 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2936 "TARGET_NEON"
2937 {
2938 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2939 are enabled. */
2940 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2941 && !flag_unsafe_math_optimizations)
2942 {
2943 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2944 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2945 whereas this expander iterates over the integer modes as well,
2946 but we will never expand to UNSPECs for the integer comparisons. */
2947 switch (<MODE>mode)
2948 {
2949 case E_V2SFmode:
2950 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2951 operands[1],
2952 operands[2]));
2953 break;
2954 case E_V4SFmode:
2955 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2956 operands[1],
2957 operands[2]));
2958 break;
2959 default:
2960 gcc_unreachable ();
2961 }
2962 }
2963 else
2964 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2965 operands[1],
2966 operands[2]));
2967 DONE;
2968 }
2969 )
2970
2971 (define_insn "neon_vc<cmp_op><mode>_insn"
2972 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2973 (neg:<V_cmp_result>
2974 (COMPARISONS:<V_cmp_result>
2975 (match_operand:VDQW 1 "s_register_operand" "w,w")
2976 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2977 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2978 && !flag_unsafe_math_optimizations)"
2979 {
2980 char pattern[100];
2981 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2982 " %%<V_reg>1, %s",
2983 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2984 ? "f" : "<cmp_type>",
2985 which_alternative == 0
2986 ? "%<V_reg>2" : "#0");
2987 output_asm_insn (pattern, operands);
2988 return "";
2989 }
2990 [(set (attr "type")
2991 (if_then_else (match_operand 2 "zero_operand")
2992 (const_string "neon_compare_zero<q>")
2993 (const_string "neon_compare<q>")))]
2994 )
2995
2996 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2997 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2998 (unspec:<V_cmp_result>
2999 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
3000 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
3001 NEON_VCMP))]
3002 "TARGET_NEON"
3003 {
3004 char pattern[100];
3005 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3006 " %%<V_reg>1, %s",
3007 which_alternative == 0
3008 ? "%<V_reg>2" : "#0");
3009 output_asm_insn (pattern, operands);
3010 return "";
3011 }
3012 [(set_attr "type" "neon_fp_compare_s<q>")]
3013 )
3014
3015 (define_expand "neon_vc<cmp_op><mode>"
3016 [(match_operand:<V_cmp_result> 0 "s_register_operand")
3017 (neg:<V_cmp_result>
3018 (COMPARISONS:VH
3019 (match_operand:VH 1 "s_register_operand")
3020 (match_operand:VH 2 "reg_or_zero_operand")))]
3021 "TARGET_NEON_FP16INST"
3022 {
3023 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3024 are enabled. */
3025 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3026 && !flag_unsafe_math_optimizations)
3027 emit_insn
3028 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3029 (operands[0], operands[1], operands[2]));
3030 else
3031 emit_insn
3032 (gen_neon_vc<cmp_op><mode>_fp16insn
3033 (operands[0], operands[1], operands[2]));
3034 DONE;
3035 })
3036
3037 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3038 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3039 (neg:<V_cmp_result>
3040 (COMPARISONS:<V_cmp_result>
3041 (match_operand:VH 1 "s_register_operand" "w,w")
3042 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3043 "TARGET_NEON_FP16INST
3044 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3045 && !flag_unsafe_math_optimizations)"
3046 {
3047 char pattern[100];
3048 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3049 " %%<V_reg>1, %s",
3050 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3051 ? "f" : "<cmp_type>",
3052 which_alternative == 0
3053 ? "%<V_reg>2" : "#0");
3054 output_asm_insn (pattern, operands);
3055 return "";
3056 }
3057 [(set (attr "type")
3058 (if_then_else (match_operand 2 "zero_operand")
3059 (const_string "neon_compare_zero<q>")
3060 (const_string "neon_compare<q>")))])
3061
3062 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3063 [(set
3064 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3065 (unspec:<V_cmp_result>
3066 [(match_operand:VH 1 "s_register_operand" "w,w")
3067 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3068 NEON_VCMP))]
3069 "TARGET_NEON_FP16INST"
3070 {
3071 char pattern[100];
3072 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3073 " %%<V_reg>1, %s",
3074 which_alternative == 0
3075 ? "%<V_reg>2" : "#0");
3076 output_asm_insn (pattern, operands);
3077 return "";
3078 }
3079 [(set_attr "type" "neon_fp_compare_s<q>")])
3080
3081 (define_insn "neon_vc<cmp_op>u<mode>"
3082 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3083 (neg:<V_cmp_result>
3084 (GTUGEU:<V_cmp_result>
3085 (match_operand:VDQIW 1 "s_register_operand" "w")
3086 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3087 "TARGET_NEON"
3088 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3089 [(set_attr "type" "neon_compare<q>")]
3090 )
3091
3092 (define_expand "neon_vca<cmp_op><mode>"
3093 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3094 (neg:<V_cmp_result>
3095 (GTGE:<V_cmp_result>
3096 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3097 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3098 "TARGET_NEON"
3099 {
3100 if (flag_unsafe_math_optimizations)
3101 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3102 operands[2]));
3103 else
3104 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3105 operands[1],
3106 operands[2]));
3107 DONE;
3108 }
3109 )
3110
3111 (define_insn "neon_vca<cmp_op><mode>_insn"
3112 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3113 (neg:<V_cmp_result>
3114 (GTGE:<V_cmp_result>
3115 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3116 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3117 "TARGET_NEON && flag_unsafe_math_optimizations"
3118 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3119 [(set_attr "type" "neon_fp_compare_s<q>")]
3120 )
3121
3122 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3123 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3124 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3125 (match_operand:VCVTF 2 "s_register_operand" "w")]
3126 NEON_VACMP))]
3127 "TARGET_NEON"
3128 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3129 [(set_attr "type" "neon_fp_compare_s<q>")]
3130 )
3131
3132 (define_expand "neon_vca<cmp_op><mode>"
3133 [(set
3134 (match_operand:<V_cmp_result> 0 "s_register_operand")
3135 (neg:<V_cmp_result>
3136 (GLTE:<V_cmp_result>
3137 (abs:VH (match_operand:VH 1 "s_register_operand"))
3138 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3139 "TARGET_NEON_FP16INST"
3140 {
3141 if (flag_unsafe_math_optimizations)
3142 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3143 (operands[0], operands[1], operands[2]));
3144 else
3145 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3146 (operands[0], operands[1], operands[2]));
3147 DONE;
3148 })
3149
3150 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3151 [(set
3152 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3153 (neg:<V_cmp_result>
3154 (GLTE:<V_cmp_result>
3155 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3156 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3157 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3158 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3159 [(set_attr "type" "neon_fp_compare_s<q>")]
3160 )
3161
3162 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3163 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3164 (unspec:<V_cmp_result>
3165 [(match_operand:VH 1 "s_register_operand" "w")
3166 (match_operand:VH 2 "s_register_operand" "w")]
3167 NEON_VAGLTE))]
3168 "TARGET_NEON"
3169 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3170 [(set_attr "type" "neon_fp_compare_s<q>")]
3171 )
3172
3173 (define_expand "neon_vc<cmp_op>z<mode>"
3174 [(set
3175 (match_operand:<V_cmp_result> 0 "s_register_operand")
3176 (COMPARISONS:<V_cmp_result>
3177 (match_operand:VH 1 "s_register_operand")
3178 (const_int 0)))]
3179 "TARGET_NEON_FP16INST"
3180 {
3181 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3182 CONST0_RTX (<MODE>mode)));
3183 DONE;
3184 })
3185
3186 (define_insn "neon_vtst<mode>"
3187 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3188 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3189 (match_operand:VDQIW 2 "s_register_operand" "w")]
3190 UNSPEC_VTST))]
3191 "TARGET_NEON"
3192 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3193 [(set_attr "type" "neon_tst<q>")]
3194 )
3195
3196 (define_insn "neon_vabd<sup><mode>"
3197 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3198 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3199 (match_operand:VDQIW 2 "s_register_operand" "w")]
3200 VABD))]
3201 "TARGET_NEON"
3202 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3203 [(set_attr "type" "neon_abd<q>")]
3204 )
3205
3206 (define_insn "neon_vabd<mode>"
3207 [(set (match_operand:VH 0 "s_register_operand" "=w")
3208 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3209 (match_operand:VH 2 "s_register_operand" "w")]
3210 UNSPEC_VABD_F))]
3211 "TARGET_NEON_FP16INST"
3212 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3213 [(set_attr "type" "neon_abd<q>")]
3214 )
3215
3216 (define_insn "neon_vabdf<mode>"
3217 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3218 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3219 (match_operand:VCVTF 2 "s_register_operand" "w")]
3220 UNSPEC_VABD_F))]
3221 "TARGET_NEON"
3222 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3223 [(set_attr "type" "neon_fp_abd_s<q>")]
3224 )
3225
3226 (define_insn "neon_vabdl<sup><mode>"
3227 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3228 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3229 (match_operand:VW 2 "s_register_operand" "w")]
3230 VABDL))]
3231 "TARGET_NEON"
3232 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3233 [(set_attr "type" "neon_abd_long")]
3234 )
3235
3236 (define_insn "neon_vaba<sup><mode>"
3237 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3238 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3239 (match_operand:VDQIW 3 "s_register_operand" "w")]
3240 VABD)
3241 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3242 "TARGET_NEON"
3243 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3244 [(set_attr "type" "neon_arith_acc<q>")]
3245 )
3246
3247 (define_insn "neon_vabal<sup><mode>"
3248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3249 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3250 (match_operand:VW 3 "s_register_operand" "w")]
3251 VABDL)
3252 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3253 "TARGET_NEON"
3254 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3255 [(set_attr "type" "neon_arith_acc<q>")]
3256 )
3257
3258 (define_insn "neon_v<maxmin><sup><mode>"
3259 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3260 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3261 (match_operand:VDQIW 2 "s_register_operand" "w")]
3262 VMAXMIN))]
3263 "TARGET_NEON"
3264 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3265 [(set_attr "type" "neon_minmax<q>")]
3266 )
3267
3268 (define_insn "neon_v<maxmin>f<mode>"
3269 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3270 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3271 (match_operand:VCVTF 2 "s_register_operand" "w")]
3272 VMAXMINF))]
3273 "TARGET_NEON"
3274 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3275 [(set_attr "type" "neon_fp_minmax_s<q>")]
3276 )
3277
3278 (define_insn "neon_v<maxmin>f<mode>"
3279 [(set (match_operand:VH 0 "s_register_operand" "=w")
3280 (unspec:VH
3281 [(match_operand:VH 1 "s_register_operand" "w")
3282 (match_operand:VH 2 "s_register_operand" "w")]
3283 VMAXMINF))]
3284 "TARGET_NEON_FP16INST"
3285 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3286 [(set_attr "type" "neon_fp_minmax_s<q>")]
3287 )
3288
3289 (define_insn "neon_vp<maxmin>fv4hf"
3290 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3291 (unspec:V4HF
3292 [(match_operand:V4HF 1 "s_register_operand" "w")
3293 (match_operand:V4HF 2 "s_register_operand" "w")]
3294 VPMAXMINF))]
3295 "TARGET_NEON_FP16INST"
3296 "vp<maxmin>.f16\t%P0, %P1, %P2"
3297 [(set_attr "type" "neon_reduc_minmax")]
3298 )
3299
3300 (define_insn "neon_<fmaxmin_op><mode>"
3301 [(set
3302 (match_operand:VH 0 "s_register_operand" "=w")
3303 (unspec:VH
3304 [(match_operand:VH 1 "s_register_operand" "w")
3305 (match_operand:VH 2 "s_register_operand" "w")]
3306 VMAXMINFNM))]
3307 "TARGET_NEON_FP16INST"
3308 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3309 [(set_attr "type" "neon_fp_minmax_s<q>")]
3310 )
3311
3312 ;; v<maxmin>nm intrinsics.
3313 (define_insn "neon_<fmaxmin_op><mode>"
3314 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3315 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3316 (match_operand:VCVTF 2 "s_register_operand" "w")]
3317 VMAXMINFNM))]
3318 "TARGET_NEON && TARGET_VFP5"
3319 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3320 [(set_attr "type" "neon_fp_minmax_s<q>")]
3321 )
3322
3323 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3324 (define_insn "<fmaxmin><mode>3"
3325 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3326 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3327 (match_operand:VCVTF 2 "s_register_operand" "w")]
3328 VMAXMINFNM))]
3329 "TARGET_NEON && TARGET_VFP5"
3330 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3331 [(set_attr "type" "neon_fp_minmax_s<q>")]
3332 )
3333
3334 (define_expand "neon_vpadd<mode>"
3335 [(match_operand:VD 0 "s_register_operand" "=w")
3336 (match_operand:VD 1 "s_register_operand" "w")
3337 (match_operand:VD 2 "s_register_operand" "w")]
3338 "TARGET_NEON"
3339 {
3340 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3341 operands[2]));
3342 DONE;
3343 })
3344
3345 (define_insn "neon_vpaddl<sup><mode>"
3346 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3347 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3348 VPADDL))]
3349 "TARGET_NEON"
3350 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3351 [(set_attr "type" "neon_reduc_add_long")]
3352 )
3353
3354 (define_insn "neon_vpadal<sup><mode>"
3355 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3356 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3357 (match_operand:VDQIW 2 "s_register_operand" "w")]
3358 VPADAL))]
3359 "TARGET_NEON"
3360 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3361 [(set_attr "type" "neon_reduc_add_acc")]
3362 )
3363
3364 (define_insn "neon_vp<maxmin><sup><mode>"
3365 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3366 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3367 (match_operand:VDI 2 "s_register_operand" "w")]
3368 VPMAXMIN))]
3369 "TARGET_NEON"
3370 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3371 [(set_attr "type" "neon_reduc_minmax<q>")]
3372 )
3373
3374 (define_insn "neon_vp<maxmin>f<mode>"
3375 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3376 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3377 (match_operand:VCVTF 2 "s_register_operand" "w")]
3378 VPMAXMINF))]
3379 "TARGET_NEON"
3380 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3381 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3382 )
3383
3384 (define_insn "neon_vrecps<mode>"
3385 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3386 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3387 (match_operand:VCVTF 2 "s_register_operand" "w")]
3388 UNSPEC_VRECPS))]
3389 "TARGET_NEON"
3390 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3391 [(set_attr "type" "neon_fp_recps_s<q>")]
3392 )
3393
3394 (define_insn "neon_vrecps<mode>"
3395 [(set
3396 (match_operand:VH 0 "s_register_operand" "=w")
3397 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3398 (match_operand:VH 2 "s_register_operand" "w")]
3399 UNSPEC_VRECPS))]
3400 "TARGET_NEON_FP16INST"
3401 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3402 [(set_attr "type" "neon_fp_recps_s<q>")]
3403 )
3404
3405 (define_insn "neon_vrsqrts<mode>"
3406 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3407 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3408 (match_operand:VCVTF 2 "s_register_operand" "w")]
3409 UNSPEC_VRSQRTS))]
3410 "TARGET_NEON"
3411 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3412 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3413 )
3414
3415 (define_insn "neon_vrsqrts<mode>"
3416 [(set
3417 (match_operand:VH 0 "s_register_operand" "=w")
3418 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3419 (match_operand:VH 2 "s_register_operand" "w")]
3420 UNSPEC_VRSQRTS))]
3421 "TARGET_NEON_FP16INST"
3422 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3423 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3424 )
3425
3426 (define_expand "neon_vabs<mode>"
3427 [(match_operand:VDQW 0 "s_register_operand" "")
3428 (match_operand:VDQW 1 "s_register_operand" "")]
3429 "TARGET_NEON"
3430 {
3431 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3432 DONE;
3433 })
3434
3435 (define_insn "neon_vqabs<mode>"
3436 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3437 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3438 UNSPEC_VQABS))]
3439 "TARGET_NEON"
3440 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3441 [(set_attr "type" "neon_qabs<q>")]
3442 )
3443
3444 (define_insn "neon_bswap<mode>"
3445 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3446 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3447 "TARGET_NEON"
3448 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3449 [(set_attr "type" "neon_rev<q>")]
3450 )
3451
3452 (define_expand "neon_vneg<mode>"
3453 [(match_operand:VDQW 0 "s_register_operand" "")
3454 (match_operand:VDQW 1 "s_register_operand" "")]
3455 "TARGET_NEON"
3456 {
3457 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3458 DONE;
3459 })
3460
3461
3462 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3463 ;; fact that their usage need to guarantee that the source vectors are
3464 ;; contiguous. It would be wrong to describe the operation without being able
3465 ;; to describe the permute that is also required, but even if that is done
3466 ;; the permute would have been created as a LOAD_LANES which means the values
3467 ;; in the registers are in the wrong order.
3468 (define_insn "neon_vcadd<rot><mode>"
3469 [(set (match_operand:VF 0 "register_operand" "=w")
3470 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3471 (match_operand:VF 2 "register_operand" "w")]
3472 VCADD))]
3473 "TARGET_COMPLEX"
3474 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3475 [(set_attr "type" "neon_fcadd")]
3476 )
3477
3478 (define_insn "neon_vcmla<rot><mode>"
3479 [(set (match_operand:VF 0 "register_operand" "=w")
3480 (plus:VF (match_operand:VF 1 "register_operand" "0")
3481 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3482 (match_operand:VF 3 "register_operand" "w")]
3483 VCMLA)))]
3484 "TARGET_COMPLEX"
3485 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3486 [(set_attr "type" "neon_fcmla")]
3487 )
3488
3489 (define_insn "neon_vcmla_lane<rot><mode>"
3490 [(set (match_operand:VF 0 "s_register_operand" "=w")
3491 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3492 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3493 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3494 (match_operand:SI 4 "const_int_operand" "n")]
3495 VCMLA)))]
3496 "TARGET_COMPLEX"
3497 {
3498 operands = neon_vcmla_lane_prepare_operands (operands);
3499 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3500 }
3501 [(set_attr "type" "neon_fcmla")]
3502 )
3503
3504 (define_insn "neon_vcmla_laneq<rot><mode>"
3505 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3506 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3507 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3508 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3509 (match_operand:SI 4 "const_int_operand" "n")]
3510 VCMLA)))]
3511 "TARGET_COMPLEX"
3512 {
3513 operands = neon_vcmla_lane_prepare_operands (operands);
3514 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3515 }
3516 [(set_attr "type" "neon_fcmla")]
3517 )
3518
3519 (define_insn "neon_vcmlaq_lane<rot><mode>"
3520 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3521 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3522 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3523 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3524 (match_operand:SI 4 "const_int_operand" "n")]
3525 VCMLA)))]
3526 "TARGET_COMPLEX"
3527 {
3528 operands = neon_vcmla_lane_prepare_operands (operands);
3529 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3530 }
3531 [(set_attr "type" "neon_fcmla")]
3532 )
3533
3534
3535 ;; These instructions map to the __builtins for the Dot Product operations.
3536 (define_insn "neon_<sup>dot<vsi2qi>"
3537 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3538 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3539 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3540 "register_operand" "w")
3541 (match_operand:<VSI2QI> 3
3542 "register_operand" "w")]
3543 DOTPROD)))]
3544 "TARGET_DOTPROD"
3545 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3546 [(set_attr "type" "neon_dot<q>")]
3547 )
3548
3549 ;; These instructions map to the __builtins for the Dot Product
3550 ;; indexed operations.
3551 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3552 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3553 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3554 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3555 "register_operand" "w")
3556 (match_operand:V8QI 3 "register_operand" "t")
3557 (match_operand:SI 4 "immediate_operand" "i")]
3558 DOTPROD)))]
3559 "TARGET_DOTPROD"
3560 {
3561 operands[4]
3562 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3563 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3564 }
3565 [(set_attr "type" "neon_dot<q>")]
3566 )
3567
3568 ;; These expands map to the Dot Product optab the vectorizer checks for.
3569 ;; The auto-vectorizer expects a dot product builtin that also does an
3570 ;; accumulation into the provided register.
3571 ;; Given the following pattern
3572 ;;
3573 ;; for (i=0; i<len; i++) {
3574 ;; c = a[i] * b[i];
3575 ;; r += c;
3576 ;; }
3577 ;; return result;
3578 ;;
3579 ;; This can be auto-vectorized to
3580 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3581 ;;
3582 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3583 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3584 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3585 ;; ...
3586 ;;
3587 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3588 (define_expand "<sup>dot_prod<vsi2qi>"
3589 [(set (match_operand:VCVTI 0 "register_operand")
3590 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3591 "register_operand")
3592 (match_operand:<VSI2QI> 2
3593 "register_operand")]
3594 DOTPROD)
3595 (match_operand:VCVTI 3 "register_operand")))]
3596 "TARGET_DOTPROD"
3597 {
3598 emit_insn (
3599 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3600 operands[2]));
3601 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3602 DONE;
3603 })
3604
3605 (define_expand "neon_copysignf<mode>"
3606 [(match_operand:VCVTF 0 "register_operand")
3607 (match_operand:VCVTF 1 "register_operand")
3608 (match_operand:VCVTF 2 "register_operand")]
3609 "TARGET_NEON"
3610 "{
3611 rtx v_bitmask_cast;
3612 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3613 rtx c = gen_int_mode (0x80000000, SImode);
3614
3615 emit_move_insn (v_bitmask,
3616 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3617 emit_move_insn (operands[0], operands[2]);
3618 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3619 <VCVTF:V_cmp_result>mode, 0);
3620 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3621 operands[1]));
3622
3623 DONE;
3624 }"
3625 )
3626
3627 (define_insn "neon_vqneg<mode>"
3628 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3629 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3630 UNSPEC_VQNEG))]
3631 "TARGET_NEON"
3632 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3633 [(set_attr "type" "neon_qneg<q>")]
3634 )
3635
3636 (define_insn "neon_vcls<mode>"
3637 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3638 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3639 UNSPEC_VCLS))]
3640 "TARGET_NEON"
3641 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3642 [(set_attr "type" "neon_cls<q>")]
3643 )
3644
3645 (define_insn "clz<mode>2"
3646 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3647 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3648 "TARGET_NEON"
3649 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3650 [(set_attr "type" "neon_cnt<q>")]
3651 )
3652
3653 (define_expand "neon_vclz<mode>"
3654 [(match_operand:VDQIW 0 "s_register_operand" "")
3655 (match_operand:VDQIW 1 "s_register_operand" "")]
3656 "TARGET_NEON"
3657 {
3658 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3659 DONE;
3660 })
3661
3662 (define_insn "popcount<mode>2"
3663 [(set (match_operand:VE 0 "s_register_operand" "=w")
3664 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3665 "TARGET_NEON"
3666 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3667 [(set_attr "type" "neon_cnt<q>")]
3668 )
3669
3670 (define_expand "neon_vcnt<mode>"
3671 [(match_operand:VE 0 "s_register_operand" "=w")
3672 (match_operand:VE 1 "s_register_operand" "w")]
3673 "TARGET_NEON"
3674 {
3675 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3676 DONE;
3677 })
3678
3679 (define_insn "neon_vrecpe<mode>"
3680 [(set (match_operand:VH 0 "s_register_operand" "=w")
3681 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3682 UNSPEC_VRECPE))]
3683 "TARGET_NEON_FP16INST"
3684 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3685 [(set_attr "type" "neon_fp_recpe_s<q>")]
3686 )
3687
3688 (define_insn "neon_vrecpe<mode>"
3689 [(set (match_operand:V32 0 "s_register_operand" "=w")
3690 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3691 UNSPEC_VRECPE))]
3692 "TARGET_NEON"
3693 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3694 [(set_attr "type" "neon_fp_recpe_s<q>")]
3695 )
3696
3697 (define_insn "neon_vrsqrte<mode>"
3698 [(set (match_operand:V32 0 "s_register_operand" "=w")
3699 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3700 UNSPEC_VRSQRTE))]
3701 "TARGET_NEON"
3702 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3703 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3704 )
3705
3706 (define_expand "neon_vmvn<mode>"
3707 [(match_operand:VDQIW 0 "s_register_operand" "")
3708 (match_operand:VDQIW 1 "s_register_operand" "")]
3709 "TARGET_NEON"
3710 {
3711 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3712 DONE;
3713 })
3714
3715 (define_insn "neon_vget_lane<mode>_sext_internal"
3716 [(set (match_operand:SI 0 "s_register_operand" "=r")
3717 (sign_extend:SI
3718 (vec_select:<V_elem>
3719 (match_operand:VD 1 "s_register_operand" "w")
3720 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3721 "TARGET_NEON"
3722 {
3723 if (BYTES_BIG_ENDIAN)
3724 {
3725 int elt = INTVAL (operands[2]);
3726 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3727 operands[2] = GEN_INT (elt);
3728 }
3729 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3730 }
3731 [(set_attr "type" "neon_to_gp")]
3732 )
3733
3734 (define_insn "neon_vget_lane<mode>_zext_internal"
3735 [(set (match_operand:SI 0 "s_register_operand" "=r")
3736 (zero_extend:SI
3737 (vec_select:<V_elem>
3738 (match_operand:VD 1 "s_register_operand" "w")
3739 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3740 "TARGET_NEON"
3741 {
3742 if (BYTES_BIG_ENDIAN)
3743 {
3744 int elt = INTVAL (operands[2]);
3745 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3746 operands[2] = GEN_INT (elt);
3747 }
3748 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3749 }
3750 [(set_attr "type" "neon_to_gp")]
3751 )
3752
3753 (define_insn "neon_vget_lane<mode>_sext_internal"
3754 [(set (match_operand:SI 0 "s_register_operand" "=r")
3755 (sign_extend:SI
3756 (vec_select:<V_elem>
3757 (match_operand:VQ2 1 "s_register_operand" "w")
3758 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3759 "TARGET_NEON"
3760 {
3761 rtx ops[3];
3762 int regno = REGNO (operands[1]);
3763 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3764 unsigned int elt = INTVAL (operands[2]);
3765 unsigned int elt_adj = elt % halfelts;
3766
3767 if (BYTES_BIG_ENDIAN)
3768 elt_adj = halfelts - 1 - elt_adj;
3769
3770 ops[0] = operands[0];
3771 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3772 ops[2] = GEN_INT (elt_adj);
3773 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3774
3775 return "";
3776 }
3777 [(set_attr "type" "neon_to_gp_q")]
3778 )
3779
3780 (define_insn "neon_vget_lane<mode>_zext_internal"
3781 [(set (match_operand:SI 0 "s_register_operand" "=r")
3782 (zero_extend:SI
3783 (vec_select:<V_elem>
3784 (match_operand:VQ2 1 "s_register_operand" "w")
3785 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3786 "TARGET_NEON"
3787 {
3788 rtx ops[3];
3789 int regno = REGNO (operands[1]);
3790 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3791 unsigned int elt = INTVAL (operands[2]);
3792 unsigned int elt_adj = elt % halfelts;
3793
3794 if (BYTES_BIG_ENDIAN)
3795 elt_adj = halfelts - 1 - elt_adj;
3796
3797 ops[0] = operands[0];
3798 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3799 ops[2] = GEN_INT (elt_adj);
3800 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3801
3802 return "";
3803 }
3804 [(set_attr "type" "neon_to_gp_q")]
3805 )
3806
3807 (define_expand "neon_vget_lane<mode>"
3808 [(match_operand:<V_ext> 0 "s_register_operand" "")
3809 (match_operand:VDQW 1 "s_register_operand" "")
3810 (match_operand:SI 2 "immediate_operand" "")]
3811 "TARGET_NEON"
3812 {
3813 if (BYTES_BIG_ENDIAN)
3814 {
3815 /* The intrinsics are defined in terms of a model where the
3816 element ordering in memory is vldm order, whereas the generic
3817 RTL is defined in terms of a model where the element ordering
3818 in memory is array order. Convert the lane number to conform
3819 to this model. */
3820 unsigned int elt = INTVAL (operands[2]);
3821 unsigned int reg_nelts
3822 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3823 elt ^= reg_nelts - 1;
3824 operands[2] = GEN_INT (elt);
3825 }
3826
3827 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3828 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3829 operands[2]));
3830 else
3831 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3832 operands[1],
3833 operands[2]));
3834 DONE;
3835 })
3836
3837 (define_expand "neon_vget_laneu<mode>"
3838 [(match_operand:<V_ext> 0 "s_register_operand" "")
3839 (match_operand:VDQIW 1 "s_register_operand" "")
3840 (match_operand:SI 2 "immediate_operand" "")]
3841 "TARGET_NEON"
3842 {
3843 if (BYTES_BIG_ENDIAN)
3844 {
3845 /* The intrinsics are defined in terms of a model where the
3846 element ordering in memory is vldm order, whereas the generic
3847 RTL is defined in terms of a model where the element ordering
3848 in memory is array order. Convert the lane number to conform
3849 to this model. */
3850 unsigned int elt = INTVAL (operands[2]);
3851 unsigned int reg_nelts
3852 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3853 elt ^= reg_nelts - 1;
3854 operands[2] = GEN_INT (elt);
3855 }
3856
3857 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3858 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3859 operands[2]));
3860 else
3861 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3862 operands[1],
3863 operands[2]));
3864 DONE;
3865 })
3866
3867 (define_expand "neon_vget_lanedi"
3868 [(match_operand:DI 0 "s_register_operand" "=r")
3869 (match_operand:DI 1 "s_register_operand" "w")
3870 (match_operand:SI 2 "immediate_operand" "")]
3871 "TARGET_NEON"
3872 {
3873 emit_move_insn (operands[0], operands[1]);
3874 DONE;
3875 })
3876
3877 (define_expand "neon_vget_lanev2di"
3878 [(match_operand:DI 0 "s_register_operand" "")
3879 (match_operand:V2DI 1 "s_register_operand" "")
3880 (match_operand:SI 2 "immediate_operand" "")]
3881 "TARGET_NEON"
3882 {
3883 int lane;
3884
3885 if (BYTES_BIG_ENDIAN)
3886 {
3887 /* The intrinsics are defined in terms of a model where the
3888 element ordering in memory is vldm order, whereas the generic
3889 RTL is defined in terms of a model where the element ordering
3890 in memory is array order. Convert the lane number to conform
3891 to this model. */
3892 unsigned int elt = INTVAL (operands[2]);
3893 unsigned int reg_nelts = 2;
3894 elt ^= reg_nelts - 1;
3895 operands[2] = GEN_INT (elt);
3896 }
3897
3898 lane = INTVAL (operands[2]);
3899 gcc_assert ((lane ==0) || (lane == 1));
3900 emit_move_insn (operands[0], lane == 0
3901 ? gen_lowpart (DImode, operands[1])
3902 : gen_highpart (DImode, operands[1]));
3903 DONE;
3904 })
3905
3906 (define_expand "neon_vset_lane<mode>"
3907 [(match_operand:VDQ 0 "s_register_operand" "=w")
3908 (match_operand:<V_elem> 1 "s_register_operand" "r")
3909 (match_operand:VDQ 2 "s_register_operand" "0")
3910 (match_operand:SI 3 "immediate_operand" "i")]
3911 "TARGET_NEON"
3912 {
3913 unsigned int elt = INTVAL (operands[3]);
3914
3915 if (BYTES_BIG_ENDIAN)
3916 {
3917 unsigned int reg_nelts
3918 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3919 elt ^= reg_nelts - 1;
3920 }
3921
3922 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3923 GEN_INT (1 << elt), operands[2]));
3924 DONE;
3925 })
3926
3927 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3928
3929 (define_expand "neon_vset_lanedi"
3930 [(match_operand:DI 0 "s_register_operand" "=w")
3931 (match_operand:DI 1 "s_register_operand" "r")
3932 (match_operand:DI 2 "s_register_operand" "0")
3933 (match_operand:SI 3 "immediate_operand" "i")]
3934 "TARGET_NEON"
3935 {
3936 emit_move_insn (operands[0], operands[1]);
3937 DONE;
3938 })
3939
3940 (define_expand "neon_vcreate<mode>"
3941 [(match_operand:VD_RE 0 "s_register_operand" "")
3942 (match_operand:DI 1 "general_operand" "")]
3943 "TARGET_NEON"
3944 {
3945 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3946 emit_move_insn (operands[0], src);
3947 DONE;
3948 })
3949
3950 (define_insn "neon_vdup_n<mode>"
3951 [(set (match_operand:VX 0 "s_register_operand" "=w")
3952 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3953 "TARGET_NEON"
3954 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3955 [(set_attr "type" "neon_from_gp<q>")]
3956 )
3957
3958 (define_insn "neon_vdup_nv4hf"
3959 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3960 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3961 "TARGET_NEON"
3962 "vdup.16\t%P0, %1"
3963 [(set_attr "type" "neon_from_gp")]
3964 )
3965
3966 (define_insn "neon_vdup_nv8hf"
3967 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3968 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3969 "TARGET_NEON"
3970 "vdup.16\t%q0, %1"
3971 [(set_attr "type" "neon_from_gp_q")]
3972 )
3973
3974 (define_insn "neon_vdup_n<mode>"
3975 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3976 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3977 "TARGET_NEON"
3978 "@
3979 vdup.<V_sz_elem>\t%<V_reg>0, %1
3980 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3981 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3982 )
3983
3984 (define_expand "neon_vdup_ndi"
3985 [(match_operand:DI 0 "s_register_operand" "=w")
3986 (match_operand:DI 1 "s_register_operand" "r")]
3987 "TARGET_NEON"
3988 {
3989 emit_move_insn (operands[0], operands[1]);
3990 DONE;
3991 }
3992 )
3993
3994 (define_insn "neon_vdup_nv2di"
3995 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3996 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3997 "TARGET_NEON"
3998 "@
3999 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
4000 vmov\t%e0, %P1\;vmov\t%f0, %P1"
4001 [(set_attr "length" "8")
4002 (set_attr "type" "multiple")]
4003 )
4004
4005 (define_insn "neon_vdup_lane<mode>_internal"
4006 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4007 (vec_duplicate:VDQW
4008 (vec_select:<V_elem>
4009 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4010 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4011 "TARGET_NEON"
4012 {
4013 if (BYTES_BIG_ENDIAN)
4014 {
4015 int elt = INTVAL (operands[2]);
4016 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
4017 operands[2] = GEN_INT (elt);
4018 }
4019 if (<Is_d_reg>)
4020 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
4021 else
4022 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
4023 }
4024 [(set_attr "type" "neon_dup<q>")]
4025 )
4026
4027 (define_insn "neon_vdup_lane<mode>_internal"
4028 [(set (match_operand:VH 0 "s_register_operand" "=w")
4029 (vec_duplicate:VH
4030 (vec_select:<V_elem>
4031 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4032 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4033 "TARGET_NEON && TARGET_FP16"
4034 {
4035 if (BYTES_BIG_ENDIAN)
4036 {
4037 int elt = INTVAL (operands[2]);
4038 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
4039 operands[2] = GEN_INT (elt);
4040 }
4041 if (<Is_d_reg>)
4042 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
4043 else
4044 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
4045 }
4046 [(set_attr "type" "neon_dup<q>")]
4047 )
4048
4049 (define_expand "neon_vdup_lane<mode>"
4050 [(match_operand:VDQW 0 "s_register_operand" "=w")
4051 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4052 (match_operand:SI 2 "immediate_operand" "i")]
4053 "TARGET_NEON"
4054 {
4055 if (BYTES_BIG_ENDIAN)
4056 {
4057 unsigned int elt = INTVAL (operands[2]);
4058 unsigned int reg_nelts
4059 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4060 elt ^= reg_nelts - 1;
4061 operands[2] = GEN_INT (elt);
4062 }
4063 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4064 operands[2]));
4065 DONE;
4066 })
4067
4068 (define_expand "neon_vdup_lane<mode>"
4069 [(match_operand:VH 0 "s_register_operand")
4070 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
4071 (match_operand:SI 2 "immediate_operand")]
4072 "TARGET_NEON && TARGET_FP16"
4073 {
4074 if (BYTES_BIG_ENDIAN)
4075 {
4076 unsigned int elt = INTVAL (operands[2]);
4077 unsigned int reg_nelts
4078 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4079 elt ^= reg_nelts - 1;
4080 operands[2] = GEN_INT (elt);
4081 }
4082 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4083 operands[2]));
4084 DONE;
4085 })
4086
4087 ; Scalar index is ignored, since only zero is valid here.
4088 (define_expand "neon_vdup_lanedi"
4089 [(match_operand:DI 0 "s_register_operand" "=w")
4090 (match_operand:DI 1 "s_register_operand" "w")
4091 (match_operand:SI 2 "immediate_operand" "i")]
4092 "TARGET_NEON"
4093 {
4094 emit_move_insn (operands[0], operands[1]);
4095 DONE;
4096 })
4097
4098 ; Likewise for v2di, as the DImode second operand has only a single element.
4099 (define_expand "neon_vdup_lanev2di"
4100 [(match_operand:V2DI 0 "s_register_operand" "=w")
4101 (match_operand:DI 1 "s_register_operand" "w")
4102 (match_operand:SI 2 "immediate_operand" "i")]
4103 "TARGET_NEON"
4104 {
4105 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4106 DONE;
4107 })
4108
4109 ; Disabled before reload because we don't want combine doing something silly,
4110 ; but used by the post-reload expansion of neon_vcombine.
4111 (define_insn "*neon_vswp<mode>"
4112 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4113 (match_operand:VDQX 1 "s_register_operand" "+w"))
4114 (set (match_dup 1) (match_dup 0))]
4115 "TARGET_NEON && reload_completed"
4116 "vswp\t%<V_reg>0, %<V_reg>1"
4117 [(set_attr "type" "neon_permute<q>")]
4118 )
4119
4120 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4121 ;; dest vector.
4122 ;; FIXME: A different implementation of this builtin could make it much
4123 ;; more likely that we wouldn't actually need to output anything (we could make
4124 ;; it so that the reg allocator puts things in the right places magically
4125 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4126
4127 (define_insn_and_split "neon_vcombine<mode>"
4128 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4129 (vec_concat:<V_DOUBLE>
4130 (match_operand:VDX 1 "s_register_operand" "w")
4131 (match_operand:VDX 2 "s_register_operand" "w")))]
4132 "TARGET_NEON"
4133 "#"
4134 "&& reload_completed"
4135 [(const_int 0)]
4136 {
4137 neon_split_vcombine (operands);
4138 DONE;
4139 }
4140 [(set_attr "type" "multiple")]
4141 )
4142
4143 (define_expand "neon_vget_high<mode>"
4144 [(match_operand:<V_HALF> 0 "s_register_operand")
4145 (match_operand:VQX 1 "s_register_operand")]
4146 "TARGET_NEON"
4147 {
4148 emit_move_insn (operands[0],
4149 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4150 GET_MODE_SIZE (<V_HALF>mode)));
4151 DONE;
4152 })
4153
4154 (define_expand "neon_vget_low<mode>"
4155 [(match_operand:<V_HALF> 0 "s_register_operand")
4156 (match_operand:VQX 1 "s_register_operand")]
4157 "TARGET_NEON"
4158 {
4159 emit_move_insn (operands[0],
4160 simplify_gen_subreg (<V_HALF>mode, operands[1],
4161 <MODE>mode, 0));
4162 DONE;
4163 })
4164
4165 (define_insn "float<mode><V_cvtto>2"
4166 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4167 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4168 "TARGET_NEON && !flag_rounding_math"
4169 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4170 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4171 )
4172
4173 (define_insn "floatuns<mode><V_cvtto>2"
4174 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4175 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4176 "TARGET_NEON && !flag_rounding_math"
4177 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4178 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4179 )
4180
4181 (define_insn "fix_trunc<mode><V_cvtto>2"
4182 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4183 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4184 "TARGET_NEON"
4185 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4186 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4187 )
4188
4189 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4190 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4191 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4192 "TARGET_NEON"
4193 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4194 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4195 )
4196
4197 (define_insn "neon_vcvt<sup><mode>"
4198 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4199 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4200 VCVT_US))]
4201 "TARGET_NEON"
4202 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4203 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4204 )
4205
4206 (define_insn "neon_vcvt<sup><mode>"
4207 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4208 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4209 VCVT_US))]
4210 "TARGET_NEON"
4211 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4212 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4213 )
4214
4215 (define_insn "neon_vcvtv4sfv4hf"
4216 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4217 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4218 UNSPEC_VCVT))]
4219 "TARGET_NEON && TARGET_FP16"
4220 "vcvt.f32.f16\t%q0, %P1"
4221 [(set_attr "type" "neon_fp_cvt_widen_h")]
4222 )
4223
4224 (define_insn "neon_vcvtv4hfv4sf"
4225 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4226 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4227 UNSPEC_VCVT))]
4228 "TARGET_NEON && TARGET_FP16"
4229 "vcvt.f16.f32\t%P0, %q1"
4230 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4231 )
4232
4233 (define_insn "neon_vcvt<sup><mode>"
4234 [(set
4235 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4236 (unspec:<VH_CVTTO>
4237 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4238 VCVT_US))]
4239 "TARGET_NEON_FP16INST"
4240 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4241 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4242 )
4243
4244 (define_insn "neon_vcvt<sup><mode>"
4245 [(set
4246 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4247 (unspec:<VH_CVTTO>
4248 [(match_operand:VH 1 "s_register_operand" "w")]
4249 VCVT_US))]
4250 "TARGET_NEON_FP16INST"
4251 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4252 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4253 )
4254
4255 (define_insn "neon_vcvt<sup>_n<mode>"
4256 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4257 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4258 (match_operand:SI 2 "immediate_operand" "i")]
4259 VCVT_US_N))]
4260 "TARGET_NEON"
4261 {
4262 arm_const_bounds (operands[2], 1, 33);
4263 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4264 }
4265 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4266 )
4267
4268 (define_insn "neon_vcvt<sup>_n<mode>"
4269 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4270 (unspec:<VH_CVTTO>
4271 [(match_operand:VH 1 "s_register_operand" "w")
4272 (match_operand:SI 2 "immediate_operand" "i")]
4273 VCVT_US_N))]
4274 "TARGET_NEON_FP16INST"
4275 {
4276 arm_const_bounds (operands[2], 0, 17);
4277 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4278 }
4279 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4280 )
4281
4282 (define_insn "neon_vcvt<sup>_n<mode>"
4283 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4284 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4285 (match_operand:SI 2 "immediate_operand" "i")]
4286 VCVT_US_N))]
4287 "TARGET_NEON"
4288 {
4289 arm_const_bounds (operands[2], 1, 33);
4290 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4291 }
4292 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4293 )
4294
4295 (define_insn "neon_vcvt<sup>_n<mode>"
4296 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4297 (unspec:<VH_CVTTO>
4298 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4299 (match_operand:SI 2 "immediate_operand" "i")]
4300 VCVT_US_N))]
4301 "TARGET_NEON_FP16INST"
4302 {
4303 arm_const_bounds (operands[2], 0, 17);
4304 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4305 }
4306 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4307 )
4308
4309 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4310 [(set
4311 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4312 (unspec:<VH_CVTTO>
4313 [(match_operand:VH 1 "s_register_operand" "w")]
4314 VCVT_HF_US))]
4315 "TARGET_NEON_FP16INST"
4316 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4317 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4318 )
4319
4320 (define_insn "neon_vmovn<mode>"
4321 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4322 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4323 UNSPEC_VMOVN))]
4324 "TARGET_NEON"
4325 "vmovn.<V_if_elem>\t%P0, %q1"
4326 [(set_attr "type" "neon_shift_imm_narrow_q")]
4327 )
4328
4329 (define_insn "neon_vqmovn<sup><mode>"
4330 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4331 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4332 VQMOVN))]
4333 "TARGET_NEON"
4334 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4335 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4336 )
4337
4338 (define_insn "neon_vqmovun<mode>"
4339 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4340 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4341 UNSPEC_VQMOVUN))]
4342 "TARGET_NEON"
4343 "vqmovun.<V_s_elem>\t%P0, %q1"
4344 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4345 )
4346
4347 (define_insn "neon_vmovl<sup><mode>"
4348 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4349 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4350 VMOVL))]
4351 "TARGET_NEON"
4352 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4353 [(set_attr "type" "neon_shift_imm_long")]
4354 )
4355
4356 (define_insn "neon_vmul_lane<mode>"
4357 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4358 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4359 (match_operand:VMD 2 "s_register_operand"
4360 "<scalar_mul_constraint>")
4361 (match_operand:SI 3 "immediate_operand" "i")]
4362 UNSPEC_VMUL_LANE))]
4363 "TARGET_NEON"
4364 {
4365 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4366 }
4367 [(set (attr "type")
4368 (if_then_else (match_test "<Is_float_mode>")
4369 (const_string "neon_fp_mul_s_scalar<q>")
4370 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4371 )
4372
4373 (define_insn "neon_vmul_lane<mode>"
4374 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4375 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4376 (match_operand:<V_HALF> 2 "s_register_operand"
4377 "<scalar_mul_constraint>")
4378 (match_operand:SI 3 "immediate_operand" "i")]
4379 UNSPEC_VMUL_LANE))]
4380 "TARGET_NEON"
4381 {
4382 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4383 }
4384 [(set (attr "type")
4385 (if_then_else (match_test "<Is_float_mode>")
4386 (const_string "neon_fp_mul_s_scalar<q>")
4387 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4388 )
4389
4390 (define_insn "neon_vmul_lane<mode>"
4391 [(set (match_operand:VH 0 "s_register_operand" "=w")
4392 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4393 (match_operand:V4HF 2 "s_register_operand"
4394 "<scalar_mul_constraint>")
4395 (match_operand:SI 3 "immediate_operand" "i")]
4396 UNSPEC_VMUL_LANE))]
4397 "TARGET_NEON_FP16INST"
4398 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4399 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4400 )
4401
4402 (define_insn "neon_vmull<sup>_lane<mode>"
4403 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4404 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4405 (match_operand:VMDI 2 "s_register_operand"
4406 "<scalar_mul_constraint>")
4407 (match_operand:SI 3 "immediate_operand" "i")]
4408 VMULL_LANE))]
4409 "TARGET_NEON"
4410 {
4411 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4412 }
4413 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4414 )
4415
4416 (define_insn "neon_vqdmull_lane<mode>"
4417 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4418 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4419 (match_operand:VMDI 2 "s_register_operand"
4420 "<scalar_mul_constraint>")
4421 (match_operand:SI 3 "immediate_operand" "i")]
4422 UNSPEC_VQDMULL_LANE))]
4423 "TARGET_NEON"
4424 {
4425 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4426 }
4427 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4428 )
4429
4430 (define_insn "neon_vq<r>dmulh_lane<mode>"
4431 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4432 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4433 (match_operand:<V_HALF> 2 "s_register_operand"
4434 "<scalar_mul_constraint>")
4435 (match_operand:SI 3 "immediate_operand" "i")]
4436 VQDMULH_LANE))]
4437 "TARGET_NEON"
4438 {
4439 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4440 }
4441 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4442 )
4443
4444 (define_insn "neon_vq<r>dmulh_lane<mode>"
4445 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4446 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4447 (match_operand:VMDI 2 "s_register_operand"
4448 "<scalar_mul_constraint>")
4449 (match_operand:SI 3 "immediate_operand" "i")]
4450 VQDMULH_LANE))]
4451 "TARGET_NEON"
4452 {
4453 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4454 }
4455 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4456 )
4457
4458 ;; vqrdmlah_lane, vqrdmlsh_lane
4459 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4460 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4461 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4462 (match_operand:VMQI 2 "s_register_operand" "w")
4463 (match_operand:<V_HALF> 3 "s_register_operand"
4464 "<scalar_mul_constraint>")
4465 (match_operand:SI 4 "immediate_operand" "i")]
4466 VQRDMLH_AS))]
4467 "TARGET_NEON_RDMA"
4468 {
4469 return
4470 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4471 }
4472 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4473 )
4474
4475 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4476 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4477 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4478 (match_operand:VMDI 2 "s_register_operand" "w")
4479 (match_operand:VMDI 3 "s_register_operand"
4480 "<scalar_mul_constraint>")
4481 (match_operand:SI 4 "immediate_operand" "i")]
4482 VQRDMLH_AS))]
4483 "TARGET_NEON_RDMA"
4484 {
4485 return
4486 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4487 }
4488 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4489 )
4490
4491 (define_insn "neon_vmla_lane<mode>"
4492 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4493 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4494 (match_operand:VMD 2 "s_register_operand" "w")
4495 (match_operand:VMD 3 "s_register_operand"
4496 "<scalar_mul_constraint>")
4497 (match_operand:SI 4 "immediate_operand" "i")]
4498 UNSPEC_VMLA_LANE))]
4499 "TARGET_NEON"
4500 {
4501 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4502 }
4503 [(set (attr "type")
4504 (if_then_else (match_test "<Is_float_mode>")
4505 (const_string "neon_fp_mla_s_scalar<q>")
4506 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4507 )
4508
4509 (define_insn "neon_vmla_lane<mode>"
4510 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4511 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4512 (match_operand:VMQ 2 "s_register_operand" "w")
4513 (match_operand:<V_HALF> 3 "s_register_operand"
4514 "<scalar_mul_constraint>")
4515 (match_operand:SI 4 "immediate_operand" "i")]
4516 UNSPEC_VMLA_LANE))]
4517 "TARGET_NEON"
4518 {
4519 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4520 }
4521 [(set (attr "type")
4522 (if_then_else (match_test "<Is_float_mode>")
4523 (const_string "neon_fp_mla_s_scalar<q>")
4524 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4525 )
4526
4527 (define_insn "neon_vmlal<sup>_lane<mode>"
4528 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4529 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4530 (match_operand:VMDI 2 "s_register_operand" "w")
4531 (match_operand:VMDI 3 "s_register_operand"
4532 "<scalar_mul_constraint>")
4533 (match_operand:SI 4 "immediate_operand" "i")]
4534 VMLAL_LANE))]
4535 "TARGET_NEON"
4536 {
4537 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4538 }
4539 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4540 )
4541
4542 (define_insn "neon_vqdmlal_lane<mode>"
4543 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4544 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4545 (match_operand:VMDI 2 "s_register_operand" "w")
4546 (match_operand:VMDI 3 "s_register_operand"
4547 "<scalar_mul_constraint>")
4548 (match_operand:SI 4 "immediate_operand" "i")]
4549 UNSPEC_VQDMLAL_LANE))]
4550 "TARGET_NEON"
4551 {
4552 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4553 }
4554 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4555 )
4556
4557 (define_insn "neon_vmls_lane<mode>"
4558 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4559 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4560 (match_operand:VMD 2 "s_register_operand" "w")
4561 (match_operand:VMD 3 "s_register_operand"
4562 "<scalar_mul_constraint>")
4563 (match_operand:SI 4 "immediate_operand" "i")]
4564 UNSPEC_VMLS_LANE))]
4565 "TARGET_NEON"
4566 {
4567 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4568 }
4569 [(set (attr "type")
4570 (if_then_else (match_test "<Is_float_mode>")
4571 (const_string "neon_fp_mla_s_scalar<q>")
4572 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4573 )
4574
4575 (define_insn "neon_vmls_lane<mode>"
4576 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4577 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4578 (match_operand:VMQ 2 "s_register_operand" "w")
4579 (match_operand:<V_HALF> 3 "s_register_operand"
4580 "<scalar_mul_constraint>")
4581 (match_operand:SI 4 "immediate_operand" "i")]
4582 UNSPEC_VMLS_LANE))]
4583 "TARGET_NEON"
4584 {
4585 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4586 }
4587 [(set (attr "type")
4588 (if_then_else (match_test "<Is_float_mode>")
4589 (const_string "neon_fp_mla_s_scalar<q>")
4590 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4591 )
4592
4593 (define_insn "neon_vmlsl<sup>_lane<mode>"
4594 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4595 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4596 (match_operand:VMDI 2 "s_register_operand" "w")
4597 (match_operand:VMDI 3 "s_register_operand"
4598 "<scalar_mul_constraint>")
4599 (match_operand:SI 4 "immediate_operand" "i")]
4600 VMLSL_LANE))]
4601 "TARGET_NEON"
4602 {
4603 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4604 }
4605 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4606 )
4607
4608 (define_insn "neon_vqdmlsl_lane<mode>"
4609 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4610 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4611 (match_operand:VMDI 2 "s_register_operand" "w")
4612 (match_operand:VMDI 3 "s_register_operand"
4613 "<scalar_mul_constraint>")
4614 (match_operand:SI 4 "immediate_operand" "i")]
4615 UNSPEC_VQDMLSL_LANE))]
4616 "TARGET_NEON"
4617 {
4618 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4619 }
4620 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4621 )
4622
4623 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4624 ; core register into a temp register, then use a scalar taken from that. This
4625 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4626 ; or extracted from another vector. The latter case it's currently better to
4627 ; use the "_lane" variant, and the former case can probably be implemented
4628 ; using vld1_lane, but that hasn't been done yet.
4629
4630 (define_expand "neon_vmul_n<mode>"
4631 [(match_operand:VMD 0 "s_register_operand" "")
4632 (match_operand:VMD 1 "s_register_operand" "")
4633 (match_operand:<V_elem> 2 "s_register_operand" "")]
4634 "TARGET_NEON"
4635 {
4636 rtx tmp = gen_reg_rtx (<MODE>mode);
4637 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4638 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4639 const0_rtx));
4640 DONE;
4641 })
4642
4643 (define_expand "neon_vmul_n<mode>"
4644 [(match_operand:VMQ 0 "s_register_operand" "")
4645 (match_operand:VMQ 1 "s_register_operand" "")
4646 (match_operand:<V_elem> 2 "s_register_operand" "")]
4647 "TARGET_NEON"
4648 {
4649 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4650 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4651 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4652 const0_rtx));
4653 DONE;
4654 })
4655
4656 (define_expand "neon_vmul_n<mode>"
4657 [(match_operand:VH 0 "s_register_operand")
4658 (match_operand:VH 1 "s_register_operand")
4659 (match_operand:<V_elem> 2 "s_register_operand")]
4660 "TARGET_NEON_FP16INST"
4661 {
4662 rtx tmp = gen_reg_rtx (V4HFmode);
4663 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4664 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4665 const0_rtx));
4666 DONE;
4667 })
4668
4669 (define_expand "neon_vmulls_n<mode>"
4670 [(match_operand:<V_widen> 0 "s_register_operand" "")
4671 (match_operand:VMDI 1 "s_register_operand" "")
4672 (match_operand:<V_elem> 2 "s_register_operand" "")]
4673 "TARGET_NEON"
4674 {
4675 rtx tmp = gen_reg_rtx (<MODE>mode);
4676 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4677 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4678 const0_rtx));
4679 DONE;
4680 })
4681
4682 (define_expand "neon_vmullu_n<mode>"
4683 [(match_operand:<V_widen> 0 "s_register_operand" "")
4684 (match_operand:VMDI 1 "s_register_operand" "")
4685 (match_operand:<V_elem> 2 "s_register_operand" "")]
4686 "TARGET_NEON"
4687 {
4688 rtx tmp = gen_reg_rtx (<MODE>mode);
4689 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4690 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4691 const0_rtx));
4692 DONE;
4693 })
4694
4695 (define_expand "neon_vqdmull_n<mode>"
4696 [(match_operand:<V_widen> 0 "s_register_operand" "")
4697 (match_operand:VMDI 1 "s_register_operand" "")
4698 (match_operand:<V_elem> 2 "s_register_operand" "")]
4699 "TARGET_NEON"
4700 {
4701 rtx tmp = gen_reg_rtx (<MODE>mode);
4702 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4703 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4704 const0_rtx));
4705 DONE;
4706 })
4707
4708 (define_expand "neon_vqdmulh_n<mode>"
4709 [(match_operand:VMDI 0 "s_register_operand" "")
4710 (match_operand:VMDI 1 "s_register_operand" "")
4711 (match_operand:<V_elem> 2 "s_register_operand" "")]
4712 "TARGET_NEON"
4713 {
4714 rtx tmp = gen_reg_rtx (<MODE>mode);
4715 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4716 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4717 const0_rtx));
4718 DONE;
4719 })
4720
4721 (define_expand "neon_vqrdmulh_n<mode>"
4722 [(match_operand:VMDI 0 "s_register_operand" "")
4723 (match_operand:VMDI 1 "s_register_operand" "")
4724 (match_operand:<V_elem> 2 "s_register_operand" "")]
4725 "TARGET_NEON"
4726 {
4727 rtx tmp = gen_reg_rtx (<MODE>mode);
4728 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4729 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4730 const0_rtx));
4731 DONE;
4732 })
4733
4734 (define_expand "neon_vqdmulh_n<mode>"
4735 [(match_operand:VMQI 0 "s_register_operand" "")
4736 (match_operand:VMQI 1 "s_register_operand" "")
4737 (match_operand:<V_elem> 2 "s_register_operand" "")]
4738 "TARGET_NEON"
4739 {
4740 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4741 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4742 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4743 const0_rtx));
4744 DONE;
4745 })
4746
4747 (define_expand "neon_vqrdmulh_n<mode>"
4748 [(match_operand:VMQI 0 "s_register_operand" "")
4749 (match_operand:VMQI 1 "s_register_operand" "")
4750 (match_operand:<V_elem> 2 "s_register_operand" "")]
4751 "TARGET_NEON"
4752 {
4753 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4754 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4755 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4756 const0_rtx));
4757 DONE;
4758 })
4759
4760 (define_expand "neon_vmla_n<mode>"
4761 [(match_operand:VMD 0 "s_register_operand" "")
4762 (match_operand:VMD 1 "s_register_operand" "")
4763 (match_operand:VMD 2 "s_register_operand" "")
4764 (match_operand:<V_elem> 3 "s_register_operand" "")]
4765 "TARGET_NEON"
4766 {
4767 rtx tmp = gen_reg_rtx (<MODE>mode);
4768 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4769 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4770 tmp, const0_rtx));
4771 DONE;
4772 })
4773
4774 (define_expand "neon_vmla_n<mode>"
4775 [(match_operand:VMQ 0 "s_register_operand" "")
4776 (match_operand:VMQ 1 "s_register_operand" "")
4777 (match_operand:VMQ 2 "s_register_operand" "")
4778 (match_operand:<V_elem> 3 "s_register_operand" "")]
4779 "TARGET_NEON"
4780 {
4781 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4782 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4783 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4784 tmp, const0_rtx));
4785 DONE;
4786 })
4787
4788 (define_expand "neon_vmlals_n<mode>"
4789 [(match_operand:<V_widen> 0 "s_register_operand" "")
4790 (match_operand:<V_widen> 1 "s_register_operand" "")
4791 (match_operand:VMDI 2 "s_register_operand" "")
4792 (match_operand:<V_elem> 3 "s_register_operand" "")]
4793 "TARGET_NEON"
4794 {
4795 rtx tmp = gen_reg_rtx (<MODE>mode);
4796 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4797 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4798 tmp, const0_rtx));
4799 DONE;
4800 })
4801
4802 (define_expand "neon_vmlalu_n<mode>"
4803 [(match_operand:<V_widen> 0 "s_register_operand" "")
4804 (match_operand:<V_widen> 1 "s_register_operand" "")
4805 (match_operand:VMDI 2 "s_register_operand" "")
4806 (match_operand:<V_elem> 3 "s_register_operand" "")]
4807 "TARGET_NEON"
4808 {
4809 rtx tmp = gen_reg_rtx (<MODE>mode);
4810 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4811 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4812 tmp, const0_rtx));
4813 DONE;
4814 })
4815
4816 (define_expand "neon_vqdmlal_n<mode>"
4817 [(match_operand:<V_widen> 0 "s_register_operand" "")
4818 (match_operand:<V_widen> 1 "s_register_operand" "")
4819 (match_operand:VMDI 2 "s_register_operand" "")
4820 (match_operand:<V_elem> 3 "s_register_operand" "")]
4821 "TARGET_NEON"
4822 {
4823 rtx tmp = gen_reg_rtx (<MODE>mode);
4824 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4825 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4826 tmp, const0_rtx));
4827 DONE;
4828 })
4829
4830 (define_expand "neon_vmls_n<mode>"
4831 [(match_operand:VMD 0 "s_register_operand" "")
4832 (match_operand:VMD 1 "s_register_operand" "")
4833 (match_operand:VMD 2 "s_register_operand" "")
4834 (match_operand:<V_elem> 3 "s_register_operand" "")]
4835 "TARGET_NEON"
4836 {
4837 rtx tmp = gen_reg_rtx (<MODE>mode);
4838 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4839 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4840 tmp, const0_rtx));
4841 DONE;
4842 })
4843
4844 (define_expand "neon_vmls_n<mode>"
4845 [(match_operand:VMQ 0 "s_register_operand" "")
4846 (match_operand:VMQ 1 "s_register_operand" "")
4847 (match_operand:VMQ 2 "s_register_operand" "")
4848 (match_operand:<V_elem> 3 "s_register_operand" "")]
4849 "TARGET_NEON"
4850 {
4851 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4852 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4853 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4854 tmp, const0_rtx));
4855 DONE;
4856 })
4857
4858 (define_expand "neon_vmlsls_n<mode>"
4859 [(match_operand:<V_widen> 0 "s_register_operand" "")
4860 (match_operand:<V_widen> 1 "s_register_operand" "")
4861 (match_operand:VMDI 2 "s_register_operand" "")
4862 (match_operand:<V_elem> 3 "s_register_operand" "")]
4863 "TARGET_NEON"
4864 {
4865 rtx tmp = gen_reg_rtx (<MODE>mode);
4866 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4867 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4868 tmp, const0_rtx));
4869 DONE;
4870 })
4871
4872 (define_expand "neon_vmlslu_n<mode>"
4873 [(match_operand:<V_widen> 0 "s_register_operand" "")
4874 (match_operand:<V_widen> 1 "s_register_operand" "")
4875 (match_operand:VMDI 2 "s_register_operand" "")
4876 (match_operand:<V_elem> 3 "s_register_operand" "")]
4877 "TARGET_NEON"
4878 {
4879 rtx tmp = gen_reg_rtx (<MODE>mode);
4880 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4881 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4882 tmp, const0_rtx));
4883 DONE;
4884 })
4885
4886 (define_expand "neon_vqdmlsl_n<mode>"
4887 [(match_operand:<V_widen> 0 "s_register_operand" "")
4888 (match_operand:<V_widen> 1 "s_register_operand" "")
4889 (match_operand:VMDI 2 "s_register_operand" "")
4890 (match_operand:<V_elem> 3 "s_register_operand" "")]
4891 "TARGET_NEON"
4892 {
4893 rtx tmp = gen_reg_rtx (<MODE>mode);
4894 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4895 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4896 tmp, const0_rtx));
4897 DONE;
4898 })
4899
4900 (define_insn "@neon_vext<mode>"
4901 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4902 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4903 (match_operand:VDQX 2 "s_register_operand" "w")
4904 (match_operand:SI 3 "immediate_operand" "i")]
4905 UNSPEC_VEXT))]
4906 "TARGET_NEON"
4907 {
4908 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4909 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4910 }
4911 [(set_attr "type" "neon_ext<q>")]
4912 )
4913
4914 (define_insn "@neon_vrev64<mode>"
4915 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4916 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4917 UNSPEC_VREV64))]
4918 "TARGET_NEON"
4919 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4920 [(set_attr "type" "neon_rev<q>")]
4921 )
4922
4923 (define_insn "@neon_vrev32<mode>"
4924 [(set (match_operand:VX 0 "s_register_operand" "=w")
4925 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4926 UNSPEC_VREV32))]
4927 "TARGET_NEON"
4928 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4929 [(set_attr "type" "neon_rev<q>")]
4930 )
4931
4932 (define_insn "@neon_vrev16<mode>"
4933 [(set (match_operand:VE 0 "s_register_operand" "=w")
4934 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4935 UNSPEC_VREV16))]
4936 "TARGET_NEON"
4937 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4938 [(set_attr "type" "neon_rev<q>")]
4939 )
4940
4941 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4942 ; allocation. For an intrinsic of form:
4943 ; rD = vbsl_* (rS, rN, rM)
4944 ; We can use any of:
4945 ; vbsl rS, rN, rM (if D = S)
4946 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4947 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4948
4949 (define_insn "neon_vbsl<mode>_internal"
4950 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4951 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4952 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4953 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4954 UNSPEC_VBSL))]
4955 "TARGET_NEON"
4956 "@
4957 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4958 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4959 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4960 [(set_attr "type" "neon_bsl<q>")]
4961 )
4962
4963 (define_expand "neon_vbsl<mode>"
4964 [(set (match_operand:VDQX 0 "s_register_operand" "")
4965 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4966 (match_operand:VDQX 2 "s_register_operand" "")
4967 (match_operand:VDQX 3 "s_register_operand" "")]
4968 UNSPEC_VBSL))]
4969 "TARGET_NEON"
4970 {
4971 /* We can't alias operands together if they have different modes. */
4972 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4973 })
4974
4975 ;; vshl, vrshl
4976 (define_insn "neon_v<shift_op><sup><mode>"
4977 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4978 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4979 (match_operand:VDQIX 2 "s_register_operand" "w")]
4980 VSHL))]
4981 "TARGET_NEON"
4982 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4983 [(set_attr "type" "neon_shift_imm<q>")]
4984 )
4985
4986 ;; vqshl, vqrshl
4987 (define_insn "neon_v<shift_op><sup><mode>"
4988 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4989 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4990 (match_operand:VDQIX 2 "s_register_operand" "w")]
4991 VQSHL))]
4992 "TARGET_NEON"
4993 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4994 [(set_attr "type" "neon_sat_shift_imm<q>")]
4995 )
4996
4997 ;; vshr_n, vrshr_n
4998 (define_insn "neon_v<shift_op><sup>_n<mode>"
4999 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5000 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5001 (match_operand:SI 2 "immediate_operand" "i")]
5002 VSHR_N))]
5003 "TARGET_NEON"
5004 {
5005 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
5006 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5007 }
5008 [(set_attr "type" "neon_shift_imm<q>")]
5009 )
5010
5011 ;; vshrn_n, vrshrn_n
5012 (define_insn "neon_v<shift_op>_n<mode>"
5013 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5014 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5015 (match_operand:SI 2 "immediate_operand" "i")]
5016 VSHRN_N))]
5017 "TARGET_NEON"
5018 {
5019 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5020 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
5021 }
5022 [(set_attr "type" "neon_shift_imm_narrow_q")]
5023 )
5024
5025 ;; vqshrn_n, vqrshrn_n
5026 (define_insn "neon_v<shift_op><sup>_n<mode>"
5027 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5028 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5029 (match_operand:SI 2 "immediate_operand" "i")]
5030 VQSHRN_N))]
5031 "TARGET_NEON"
5032 {
5033 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5034 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
5035 }
5036 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5037 )
5038
5039 ;; vqshrun_n, vqrshrun_n
5040 (define_insn "neon_v<shift_op>_n<mode>"
5041 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5042 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5043 (match_operand:SI 2 "immediate_operand" "i")]
5044 VQSHRUN_N))]
5045 "TARGET_NEON"
5046 {
5047 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5048 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
5049 }
5050 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5051 )
5052
5053 (define_insn "neon_vshl_n<mode>"
5054 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5055 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5056 (match_operand:SI 2 "immediate_operand" "i")]
5057 UNSPEC_VSHL_N))]
5058 "TARGET_NEON"
5059 {
5060 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5061 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
5062 }
5063 [(set_attr "type" "neon_shift_imm<q>")]
5064 )
5065
5066 (define_insn "neon_vqshl_<sup>_n<mode>"
5067 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5068 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5069 (match_operand:SI 2 "immediate_operand" "i")]
5070 VQSHL_N))]
5071 "TARGET_NEON"
5072 {
5073 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5074 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5075 }
5076 [(set_attr "type" "neon_sat_shift_imm<q>")]
5077 )
5078
5079 (define_insn "neon_vqshlu_n<mode>"
5080 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5081 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5082 (match_operand:SI 2 "immediate_operand" "i")]
5083 UNSPEC_VQSHLU_N))]
5084 "TARGET_NEON"
5085 {
5086 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5087 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
5088 }
5089 [(set_attr "type" "neon_sat_shift_imm<q>")]
5090 )
5091
5092 (define_insn "neon_vshll<sup>_n<mode>"
5093 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5094 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
5095 (match_operand:SI 2 "immediate_operand" "i")]
5096 VSHLL_N))]
5097 "TARGET_NEON"
5098 {
5099 /* The boundaries are: 0 < imm <= size. */
5100 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5101 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5102 }
5103 [(set_attr "type" "neon_shift_imm_long")]
5104 )
5105
5106 ;; vsra_n, vrsra_n
5107 (define_insn "neon_v<shift_op><sup>_n<mode>"
5108 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5109 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5110 (match_operand:VDQIX 2 "s_register_operand" "w")
5111 (match_operand:SI 3 "immediate_operand" "i")]
5112 VSRA_N))]
5113 "TARGET_NEON"
5114 {
5115 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5116 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5117 }
5118 [(set_attr "type" "neon_shift_acc<q>")]
5119 )
5120
5121 (define_insn "neon_vsri_n<mode>"
5122 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5123 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5124 (match_operand:VDQIX 2 "s_register_operand" "w")
5125 (match_operand:SI 3 "immediate_operand" "i")]
5126 UNSPEC_VSRI))]
5127 "TARGET_NEON"
5128 {
5129 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5130 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5131 }
5132 [(set_attr "type" "neon_shift_reg<q>")]
5133 )
5134
5135 (define_insn "neon_vsli_n<mode>"
5136 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5137 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5138 (match_operand:VDQIX 2 "s_register_operand" "w")
5139 (match_operand:SI 3 "immediate_operand" "i")]
5140 UNSPEC_VSLI))]
5141 "TARGET_NEON"
5142 {
5143 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5144 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5145 }
5146 [(set_attr "type" "neon_shift_reg<q>")]
5147 )
5148
5149 (define_insn "neon_vtbl1v8qi"
5150 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5151 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5152 (match_operand:V8QI 2 "s_register_operand" "w")]
5153 UNSPEC_VTBL))]
5154 "TARGET_NEON"
5155 "vtbl.8\t%P0, {%P1}, %P2"
5156 [(set_attr "type" "neon_tbl1")]
5157 )
5158
5159 (define_insn "neon_vtbl2v8qi"
5160 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5161 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5162 (match_operand:V8QI 2 "s_register_operand" "w")]
5163 UNSPEC_VTBL))]
5164 "TARGET_NEON"
5165 {
5166 rtx ops[4];
5167 int tabbase = REGNO (operands[1]);
5168
5169 ops[0] = operands[0];
5170 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5171 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5172 ops[3] = operands[2];
5173 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5174
5175 return "";
5176 }
5177 [(set_attr "type" "neon_tbl2")]
5178 )
5179
5180 (define_insn "neon_vtbl3v8qi"
5181 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5182 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5183 (match_operand:V8QI 2 "s_register_operand" "w")]
5184 UNSPEC_VTBL))]
5185 "TARGET_NEON"
5186 {
5187 rtx ops[5];
5188 int tabbase = REGNO (operands[1]);
5189
5190 ops[0] = operands[0];
5191 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5192 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5193 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5194 ops[4] = operands[2];
5195 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5196
5197 return "";
5198 }
5199 [(set_attr "type" "neon_tbl3")]
5200 )
5201
5202 (define_insn "neon_vtbl4v8qi"
5203 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5204 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5205 (match_operand:V8QI 2 "s_register_operand" "w")]
5206 UNSPEC_VTBL))]
5207 "TARGET_NEON"
5208 {
5209 rtx ops[6];
5210 int tabbase = REGNO (operands[1]);
5211
5212 ops[0] = operands[0];
5213 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5214 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5215 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5216 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5217 ops[5] = operands[2];
5218 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5219
5220 return "";
5221 }
5222 [(set_attr "type" "neon_tbl4")]
5223 )
5224
5225 ;; These three are used by the vec_perm infrastructure for V16QImode.
5226 (define_insn_and_split "neon_vtbl1v16qi"
5227 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5228 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5229 (match_operand:V16QI 2 "s_register_operand" "w")]
5230 UNSPEC_VTBL))]
5231 "TARGET_NEON"
5232 "#"
5233 "&& reload_completed"
5234 [(const_int 0)]
5235 {
5236 rtx op0, op1, op2, part0, part2;
5237 unsigned ofs;
5238
5239 op0 = operands[0];
5240 op1 = gen_lowpart (TImode, operands[1]);
5241 op2 = operands[2];
5242
5243 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5244 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5245 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5246 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5247
5248 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5249 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5250 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5251 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5252 DONE;
5253 }
5254 [(set_attr "type" "multiple")]
5255 )
5256
5257 (define_insn_and_split "neon_vtbl2v16qi"
5258 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5259 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5260 (match_operand:V16QI 2 "s_register_operand" "w")]
5261 UNSPEC_VTBL))]
5262 "TARGET_NEON"
5263 "#"
5264 "&& reload_completed"
5265 [(const_int 0)]
5266 {
5267 rtx op0, op1, op2, part0, part2;
5268 unsigned ofs;
5269
5270 op0 = operands[0];
5271 op1 = operands[1];
5272 op2 = operands[2];
5273
5274 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5275 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5276 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5277 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5278
5279 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5280 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5281 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5282 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5283 DONE;
5284 }
5285 [(set_attr "type" "multiple")]
5286 )
5287
5288 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5289 ;; handle quad-word input modes, producing octa-word output modes. But
5290 ;; that requires us to add support for octa-word vector modes in moves.
5291 ;; That seems overkill for this one use in vec_perm.
5292 (define_insn_and_split "neon_vcombinev16qi"
5293 [(set (match_operand:OI 0 "s_register_operand" "=w")
5294 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5295 (match_operand:V16QI 2 "s_register_operand" "w")]
5296 UNSPEC_VCONCAT))]
5297 "TARGET_NEON"
5298 "#"
5299 "&& reload_completed"
5300 [(const_int 0)]
5301 {
5302 neon_split_vcombine (operands);
5303 DONE;
5304 }
5305 [(set_attr "type" "multiple")]
5306 )
5307
5308 (define_insn "neon_vtbx1v8qi"
5309 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5310 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5311 (match_operand:V8QI 2 "s_register_operand" "w")
5312 (match_operand:V8QI 3 "s_register_operand" "w")]
5313 UNSPEC_VTBX))]
5314 "TARGET_NEON"
5315 "vtbx.8\t%P0, {%P2}, %P3"
5316 [(set_attr "type" "neon_tbl1")]
5317 )
5318
5319 (define_insn "neon_vtbx2v8qi"
5320 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5321 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5322 (match_operand:TI 2 "s_register_operand" "w")
5323 (match_operand:V8QI 3 "s_register_operand" "w")]
5324 UNSPEC_VTBX))]
5325 "TARGET_NEON"
5326 {
5327 rtx ops[4];
5328 int tabbase = REGNO (operands[2]);
5329
5330 ops[0] = operands[0];
5331 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5332 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5333 ops[3] = operands[3];
5334 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5335
5336 return "";
5337 }
5338 [(set_attr "type" "neon_tbl2")]
5339 )
5340
5341 (define_insn "neon_vtbx3v8qi"
5342 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5343 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5344 (match_operand:EI 2 "s_register_operand" "w")
5345 (match_operand:V8QI 3 "s_register_operand" "w")]
5346 UNSPEC_VTBX))]
5347 "TARGET_NEON"
5348 {
5349 rtx ops[5];
5350 int tabbase = REGNO (operands[2]);
5351
5352 ops[0] = operands[0];
5353 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5354 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5355 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5356 ops[4] = operands[3];
5357 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5358
5359 return "";
5360 }
5361 [(set_attr "type" "neon_tbl3")]
5362 )
5363
5364 (define_insn "neon_vtbx4v8qi"
5365 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5366 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5367 (match_operand:OI 2 "s_register_operand" "w")
5368 (match_operand:V8QI 3 "s_register_operand" "w")]
5369 UNSPEC_VTBX))]
5370 "TARGET_NEON"
5371 {
5372 rtx ops[6];
5373 int tabbase = REGNO (operands[2]);
5374
5375 ops[0] = operands[0];
5376 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5377 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5378 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5379 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5380 ops[5] = operands[3];
5381 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5382
5383 return "";
5384 }
5385 [(set_attr "type" "neon_tbl4")]
5386 )
5387
5388 (define_expand "@neon_vtrn<mode>_internal"
5389 [(parallel
5390 [(set (match_operand:VDQWH 0 "s_register_operand")
5391 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5392 (match_operand:VDQWH 2 "s_register_operand")]
5393 UNSPEC_VTRN1))
5394 (set (match_operand:VDQWH 3 "s_register_operand")
5395 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5396 "TARGET_NEON"
5397 ""
5398 )
5399
5400 ;; Note: Different operand numbering to handle tied registers correctly.
5401 (define_insn "*neon_vtrn<mode>_insn"
5402 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5403 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5404 (match_operand:VDQWH 3 "s_register_operand" "2")]
5405 UNSPEC_VTRN1))
5406 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5407 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5408 UNSPEC_VTRN2))]
5409 "TARGET_NEON"
5410 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5411 [(set_attr "type" "neon_permute<q>")]
5412 )
5413
5414 (define_expand "@neon_vzip<mode>_internal"
5415 [(parallel
5416 [(set (match_operand:VDQWH 0 "s_register_operand")
5417 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5418 (match_operand:VDQWH 2 "s_register_operand")]
5419 UNSPEC_VZIP1))
5420 (set (match_operand:VDQWH 3 "s_register_operand")
5421 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5422 "TARGET_NEON"
5423 ""
5424 )
5425
5426 ;; Note: Different operand numbering to handle tied registers correctly.
5427 (define_insn "*neon_vzip<mode>_insn"
5428 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5429 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5430 (match_operand:VDQWH 3 "s_register_operand" "2")]
5431 UNSPEC_VZIP1))
5432 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5433 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5434 UNSPEC_VZIP2))]
5435 "TARGET_NEON"
5436 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5437 [(set_attr "type" "neon_zip<q>")]
5438 )
5439
5440 (define_expand "@neon_vuzp<mode>_internal"
5441 [(parallel
5442 [(set (match_operand:VDQWH 0 "s_register_operand")
5443 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5444 (match_operand:VDQWH 2 "s_register_operand")]
5445 UNSPEC_VUZP1))
5446 (set (match_operand:VDQWH 3 "s_register_operand" "")
5447 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5448 "TARGET_NEON"
5449 ""
5450 )
5451
5452 ;; Note: Different operand numbering to handle tied registers correctly.
5453 (define_insn "*neon_vuzp<mode>_insn"
5454 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5455 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5456 (match_operand:VDQWH 3 "s_register_operand" "2")]
5457 UNSPEC_VUZP1))
5458 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5459 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5460 UNSPEC_VUZP2))]
5461 "TARGET_NEON"
5462 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5463 [(set_attr "type" "neon_zip<q>")]
5464 )
5465
5466 (define_expand "vec_load_lanes<mode><mode>"
5467 [(set (match_operand:VDQX 0 "s_register_operand")
5468 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5469 UNSPEC_VLD1))]
5470 "TARGET_NEON")
5471
5472 (define_insn "neon_vld1<mode>"
5473 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5474 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5475 UNSPEC_VLD1))]
5476 "TARGET_NEON"
5477 "vld1.<V_sz_elem>\t%h0, %A1"
5478 [(set_attr "type" "neon_load1_1reg<q>")]
5479 )
5480
5481 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5482 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5483 ;; lane order here.
5484 (define_insn "neon_vld1_lane<mode>"
5485 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5486 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5487 (match_operand:VDX 2 "s_register_operand" "0")
5488 (match_operand:SI 3 "immediate_operand" "i")]
5489 UNSPEC_VLD1_LANE))]
5490 "TARGET_NEON"
5491 {
5492 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5493 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5494 operands[3] = GEN_INT (lane);
5495 if (max == 1)
5496 return "vld1.<V_sz_elem>\t%P0, %A1";
5497 else
5498 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5499 }
5500 [(set_attr "type" "neon_load1_one_lane<q>")]
5501 )
5502
5503 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5504 ;; here on big endian targets.
5505 (define_insn "neon_vld1_lane<mode>"
5506 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5507 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5508 (match_operand:VQX 2 "s_register_operand" "0")
5509 (match_operand:SI 3 "immediate_operand" "i")]
5510 UNSPEC_VLD1_LANE))]
5511 "TARGET_NEON"
5512 {
5513 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5514 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5515 operands[3] = GEN_INT (lane);
5516 int regno = REGNO (operands[0]);
5517 if (lane >= max / 2)
5518 {
5519 lane -= max / 2;
5520 regno += 2;
5521 operands[3] = GEN_INT (lane);
5522 }
5523 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5524 if (max == 2)
5525 return "vld1.<V_sz_elem>\t%P0, %A1";
5526 else
5527 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5528 }
5529 [(set_attr "type" "neon_load1_one_lane<q>")]
5530 )
5531
5532 (define_insn "neon_vld1_dup<mode>"
5533 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5534 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5535 "TARGET_NEON"
5536 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5537 [(set_attr "type" "neon_load1_all_lanes<q>")]
5538 )
5539
5540 ;; Special case for DImode. Treat it exactly like a simple load.
5541 (define_expand "neon_vld1_dupdi"
5542 [(set (match_operand:DI 0 "s_register_operand" "")
5543 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5544 UNSPEC_VLD1))]
5545 "TARGET_NEON"
5546 ""
5547 )
5548
5549 (define_insn "neon_vld1_dup<mode>"
5550 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5551 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5552 "TARGET_NEON"
5553 {
5554 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5555 }
5556 [(set_attr "type" "neon_load1_all_lanes<q>")]
5557 )
5558
5559 (define_insn_and_split "neon_vld1_dupv2di"
5560 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5561 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5562 "TARGET_NEON"
5563 "#"
5564 "&& reload_completed"
5565 [(const_int 0)]
5566 {
5567 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5568 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5569 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5570 DONE;
5571 }
5572 [(set_attr "length" "8")
5573 (set_attr "type" "neon_load1_all_lanes_q")]
5574 )
5575
5576 (define_expand "vec_store_lanes<mode><mode>"
5577 [(set (match_operand:VDQX 0 "neon_struct_operand")
5578 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5579 UNSPEC_VST1))]
5580 "TARGET_NEON")
5581
5582 (define_insn "neon_vst1<mode>"
5583 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5584 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5585 UNSPEC_VST1))]
5586 "TARGET_NEON"
5587 "vst1.<V_sz_elem>\t%h1, %A0"
5588 [(set_attr "type" "neon_store1_1reg<q>")])
5589
5590 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5591 ;; here on big endian targets.
5592 (define_insn "neon_vst1_lane<mode>"
5593 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5594 (unspec:<V_elem>
5595 [(match_operand:VDX 1 "s_register_operand" "w")
5596 (match_operand:SI 2 "immediate_operand" "i")]
5597 UNSPEC_VST1_LANE))]
5598 "TARGET_NEON"
5599 {
5600 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5601 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5602 operands[2] = GEN_INT (lane);
5603 if (max == 1)
5604 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5605 else
5606 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5607 }
5608 [(set_attr "type" "neon_store1_one_lane<q>")]
5609 )
5610
5611 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5612 ;; here on big endian targets.
5613 (define_insn "neon_vst1_lane<mode>"
5614 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5615 (unspec:<V_elem>
5616 [(match_operand:VQX 1 "s_register_operand" "w")
5617 (match_operand:SI 2 "immediate_operand" "i")]
5618 UNSPEC_VST1_LANE))]
5619 "TARGET_NEON"
5620 {
5621 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5622 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5623 int regno = REGNO (operands[1]);
5624 if (lane >= max / 2)
5625 {
5626 lane -= max / 2;
5627 regno += 2;
5628 }
5629 operands[2] = GEN_INT (lane);
5630 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5631 if (max == 2)
5632 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5633 else
5634 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5635 }
5636 [(set_attr "type" "neon_store1_one_lane<q>")]
5637 )
5638
5639 (define_expand "vec_load_lanesti<mode>"
5640 [(set (match_operand:TI 0 "s_register_operand")
5641 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5642 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5643 UNSPEC_VLD2))]
5644 "TARGET_NEON")
5645
5646 (define_insn "neon_vld2<mode>"
5647 [(set (match_operand:TI 0 "s_register_operand" "=w")
5648 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5649 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5650 UNSPEC_VLD2))]
5651 "TARGET_NEON"
5652 {
5653 if (<V_sz_elem> == 64)
5654 return "vld1.64\t%h0, %A1";
5655 else
5656 return "vld2.<V_sz_elem>\t%h0, %A1";
5657 }
5658 [(set (attr "type")
5659 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5660 (const_string "neon_load1_2reg<q>")
5661 (const_string "neon_load2_2reg<q>")))]
5662 )
5663
5664 (define_expand "vec_load_lanesoi<mode>"
5665 [(set (match_operand:OI 0 "s_register_operand")
5666 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5667 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5668 UNSPEC_VLD2))]
5669 "TARGET_NEON")
5670
5671 (define_insn "neon_vld2<mode>"
5672 [(set (match_operand:OI 0 "s_register_operand" "=w")
5673 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5674 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5675 UNSPEC_VLD2))]
5676 "TARGET_NEON"
5677 "vld2.<V_sz_elem>\t%h0, %A1"
5678 [(set_attr "type" "neon_load2_2reg_q")])
5679
5680 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5681 ;; here on big endian targets.
5682 (define_insn "neon_vld2_lane<mode>"
5683 [(set (match_operand:TI 0 "s_register_operand" "=w")
5684 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5685 (match_operand:TI 2 "s_register_operand" "0")
5686 (match_operand:SI 3 "immediate_operand" "i")
5687 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688 UNSPEC_VLD2_LANE))]
5689 "TARGET_NEON"
5690 {
5691 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5692 int regno = REGNO (operands[0]);
5693 rtx ops[4];
5694 ops[0] = gen_rtx_REG (DImode, regno);
5695 ops[1] = gen_rtx_REG (DImode, regno + 2);
5696 ops[2] = operands[1];
5697 ops[3] = GEN_INT (lane);
5698 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5699 return "";
5700 }
5701 [(set_attr "type" "neon_load2_one_lane<q>")]
5702 )
5703
5704 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5705 ;; here on big endian targets.
5706 (define_insn "neon_vld2_lane<mode>"
5707 [(set (match_operand:OI 0 "s_register_operand" "=w")
5708 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5709 (match_operand:OI 2 "s_register_operand" "0")
5710 (match_operand:SI 3 "immediate_operand" "i")
5711 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5712 UNSPEC_VLD2_LANE))]
5713 "TARGET_NEON"
5714 {
5715 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5716 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5717 int regno = REGNO (operands[0]);
5718 rtx ops[4];
5719 if (lane >= max / 2)
5720 {
5721 lane -= max / 2;
5722 regno += 2;
5723 }
5724 ops[0] = gen_rtx_REG (DImode, regno);
5725 ops[1] = gen_rtx_REG (DImode, regno + 4);
5726 ops[2] = operands[1];
5727 ops[3] = GEN_INT (lane);
5728 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5729 return "";
5730 }
5731 [(set_attr "type" "neon_load2_one_lane<q>")]
5732 )
5733
5734 (define_insn "neon_vld2_dup<mode>"
5735 [(set (match_operand:TI 0 "s_register_operand" "=w")
5736 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5737 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5738 UNSPEC_VLD2_DUP))]
5739 "TARGET_NEON"
5740 {
5741 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5742 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5743 else
5744 return "vld1.<V_sz_elem>\t%h0, %A1";
5745 }
5746 [(set (attr "type")
5747 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5748 (const_string "neon_load2_all_lanes<q>")
5749 (const_string "neon_load1_1reg<q>")))]
5750 )
5751
5752 (define_expand "vec_store_lanesti<mode>"
5753 [(set (match_operand:TI 0 "neon_struct_operand")
5754 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5755 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5756 UNSPEC_VST2))]
5757 "TARGET_NEON")
5758
5759 (define_insn "neon_vst2<mode>"
5760 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5761 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5762 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5763 UNSPEC_VST2))]
5764 "TARGET_NEON"
5765 {
5766 if (<V_sz_elem> == 64)
5767 return "vst1.64\t%h1, %A0";
5768 else
5769 return "vst2.<V_sz_elem>\t%h1, %A0";
5770 }
5771 [(set (attr "type")
5772 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5773 (const_string "neon_store1_2reg<q>")
5774 (const_string "neon_store2_one_lane<q>")))]
5775 )
5776
5777 (define_expand "vec_store_lanesoi<mode>"
5778 [(set (match_operand:OI 0 "neon_struct_operand")
5779 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5781 UNSPEC_VST2))]
5782 "TARGET_NEON")
5783
5784 (define_insn "neon_vst2<mode>"
5785 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5786 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5787 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5788 UNSPEC_VST2))]
5789 "TARGET_NEON"
5790 "vst2.<V_sz_elem>\t%h1, %A0"
5791 [(set_attr "type" "neon_store2_4reg<q>")]
5792 )
5793
5794 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5795 ;; here on big endian targets.
5796 (define_insn "neon_vst2_lane<mode>"
5797 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5798 (unspec:<V_two_elem>
5799 [(match_operand:TI 1 "s_register_operand" "w")
5800 (match_operand:SI 2 "immediate_operand" "i")
5801 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5802 UNSPEC_VST2_LANE))]
5803 "TARGET_NEON"
5804 {
5805 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5806 int regno = REGNO (operands[1]);
5807 rtx ops[4];
5808 ops[0] = operands[0];
5809 ops[1] = gen_rtx_REG (DImode, regno);
5810 ops[2] = gen_rtx_REG (DImode, regno + 2);
5811 ops[3] = GEN_INT (lane);
5812 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5813 return "";
5814 }
5815 [(set_attr "type" "neon_store2_one_lane<q>")]
5816 )
5817
5818 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5819 ;; here on big endian targets.
5820 (define_insn "neon_vst2_lane<mode>"
5821 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5822 (unspec:<V_two_elem>
5823 [(match_operand:OI 1 "s_register_operand" "w")
5824 (match_operand:SI 2 "immediate_operand" "i")
5825 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5826 UNSPEC_VST2_LANE))]
5827 "TARGET_NEON"
5828 {
5829 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5830 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5831 int regno = REGNO (operands[1]);
5832 rtx ops[4];
5833 if (lane >= max / 2)
5834 {
5835 lane -= max / 2;
5836 regno += 2;
5837 }
5838 ops[0] = operands[0];
5839 ops[1] = gen_rtx_REG (DImode, regno);
5840 ops[2] = gen_rtx_REG (DImode, regno + 4);
5841 ops[3] = GEN_INT (lane);
5842 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5843 return "";
5844 }
5845 [(set_attr "type" "neon_store2_one_lane<q>")]
5846 )
5847
5848 (define_expand "vec_load_lanesei<mode>"
5849 [(set (match_operand:EI 0 "s_register_operand")
5850 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5851 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5852 UNSPEC_VLD3))]
5853 "TARGET_NEON")
5854
5855 (define_insn "neon_vld3<mode>"
5856 [(set (match_operand:EI 0 "s_register_operand" "=w")
5857 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5858 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5859 UNSPEC_VLD3))]
5860 "TARGET_NEON"
5861 {
5862 if (<V_sz_elem> == 64)
5863 return "vld1.64\t%h0, %A1";
5864 else
5865 return "vld3.<V_sz_elem>\t%h0, %A1";
5866 }
5867 [(set (attr "type")
5868 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5869 (const_string "neon_load1_3reg<q>")
5870 (const_string "neon_load3_3reg<q>")))]
5871 )
5872
5873 (define_expand "vec_load_lanesci<mode>"
5874 [(match_operand:CI 0 "s_register_operand")
5875 (match_operand:CI 1 "neon_struct_operand")
5876 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5877 "TARGET_NEON"
5878 {
5879 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5880 DONE;
5881 })
5882
5883 (define_expand "neon_vld3<mode>"
5884 [(match_operand:CI 0 "s_register_operand")
5885 (match_operand:CI 1 "neon_struct_operand")
5886 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5887 "TARGET_NEON"
5888 {
5889 rtx mem;
5890
5891 mem = adjust_address (operands[1], EImode, 0);
5892 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5893 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5894 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5895 DONE;
5896 })
5897
5898 (define_insn "neon_vld3qa<mode>"
5899 [(set (match_operand:CI 0 "s_register_operand" "=w")
5900 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5901 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5902 UNSPEC_VLD3A))]
5903 "TARGET_NEON"
5904 {
5905 int regno = REGNO (operands[0]);
5906 rtx ops[4];
5907 ops[0] = gen_rtx_REG (DImode, regno);
5908 ops[1] = gen_rtx_REG (DImode, regno + 4);
5909 ops[2] = gen_rtx_REG (DImode, regno + 8);
5910 ops[3] = operands[1];
5911 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5912 return "";
5913 }
5914 [(set_attr "type" "neon_load3_3reg<q>")]
5915 )
5916
5917 (define_insn "neon_vld3qb<mode>"
5918 [(set (match_operand:CI 0 "s_register_operand" "=w")
5919 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5920 (match_operand:CI 2 "s_register_operand" "0")
5921 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5922 UNSPEC_VLD3B))]
5923 "TARGET_NEON"
5924 {
5925 int regno = REGNO (operands[0]);
5926 rtx ops[4];
5927 ops[0] = gen_rtx_REG (DImode, regno + 2);
5928 ops[1] = gen_rtx_REG (DImode, regno + 6);
5929 ops[2] = gen_rtx_REG (DImode, regno + 10);
5930 ops[3] = operands[1];
5931 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5932 return "";
5933 }
5934 [(set_attr "type" "neon_load3_3reg<q>")]
5935 )
5936
5937 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5938 ;; here on big endian targets.
5939 (define_insn "neon_vld3_lane<mode>"
5940 [(set (match_operand:EI 0 "s_register_operand" "=w")
5941 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5942 (match_operand:EI 2 "s_register_operand" "0")
5943 (match_operand:SI 3 "immediate_operand" "i")
5944 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5945 UNSPEC_VLD3_LANE))]
5946 "TARGET_NEON"
5947 {
5948 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5949 int regno = REGNO (operands[0]);
5950 rtx ops[5];
5951 ops[0] = gen_rtx_REG (DImode, regno);
5952 ops[1] = gen_rtx_REG (DImode, regno + 2);
5953 ops[2] = gen_rtx_REG (DImode, regno + 4);
5954 ops[3] = operands[1];
5955 ops[4] = GEN_INT (lane);
5956 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5957 ops);
5958 return "";
5959 }
5960 [(set_attr "type" "neon_load3_one_lane<q>")]
5961 )
5962
5963 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5964 ;; here on big endian targets.
5965 (define_insn "neon_vld3_lane<mode>"
5966 [(set (match_operand:CI 0 "s_register_operand" "=w")
5967 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5968 (match_operand:CI 2 "s_register_operand" "0")
5969 (match_operand:SI 3 "immediate_operand" "i")
5970 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5971 UNSPEC_VLD3_LANE))]
5972 "TARGET_NEON"
5973 {
5974 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5975 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5976 int regno = REGNO (operands[0]);
5977 rtx ops[5];
5978 if (lane >= max / 2)
5979 {
5980 lane -= max / 2;
5981 regno += 2;
5982 }
5983 ops[0] = gen_rtx_REG (DImode, regno);
5984 ops[1] = gen_rtx_REG (DImode, regno + 4);
5985 ops[2] = gen_rtx_REG (DImode, regno + 8);
5986 ops[3] = operands[1];
5987 ops[4] = GEN_INT (lane);
5988 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5989 ops);
5990 return "";
5991 }
5992 [(set_attr "type" "neon_load3_one_lane<q>")]
5993 )
5994
5995 (define_insn "neon_vld3_dup<mode>"
5996 [(set (match_operand:EI 0 "s_register_operand" "=w")
5997 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5998 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5999 UNSPEC_VLD3_DUP))]
6000 "TARGET_NEON"
6001 {
6002 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6003 {
6004 int regno = REGNO (operands[0]);
6005 rtx ops[4];
6006 ops[0] = gen_rtx_REG (DImode, regno);
6007 ops[1] = gen_rtx_REG (DImode, regno + 2);
6008 ops[2] = gen_rtx_REG (DImode, regno + 4);
6009 ops[3] = operands[1];
6010 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
6011 return "";
6012 }
6013 else
6014 return "vld1.<V_sz_elem>\t%h0, %A1";
6015 }
6016 [(set (attr "type")
6017 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6018 (const_string "neon_load3_all_lanes<q>")
6019 (const_string "neon_load1_1reg<q>")))])
6020
6021 (define_expand "vec_store_lanesei<mode>"
6022 [(set (match_operand:EI 0 "neon_struct_operand")
6023 (unspec:EI [(match_operand:EI 1 "s_register_operand")
6024 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6025 UNSPEC_VST3))]
6026 "TARGET_NEON")
6027
6028 (define_insn "neon_vst3<mode>"
6029 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6030 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
6031 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6032 UNSPEC_VST3))]
6033 "TARGET_NEON"
6034 {
6035 if (<V_sz_elem> == 64)
6036 return "vst1.64\t%h1, %A0";
6037 else
6038 return "vst3.<V_sz_elem>\t%h1, %A0";
6039 }
6040 [(set (attr "type")
6041 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6042 (const_string "neon_store1_3reg<q>")
6043 (const_string "neon_store3_one_lane<q>")))])
6044
6045 (define_expand "vec_store_lanesci<mode>"
6046 [(match_operand:CI 0 "neon_struct_operand")
6047 (match_operand:CI 1 "s_register_operand")
6048 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6049 "TARGET_NEON"
6050 {
6051 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
6052 DONE;
6053 })
6054
6055 (define_expand "neon_vst3<mode>"
6056 [(match_operand:CI 0 "neon_struct_operand")
6057 (match_operand:CI 1 "s_register_operand")
6058 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6059 "TARGET_NEON"
6060 {
6061 rtx mem;
6062
6063 mem = adjust_address (operands[0], EImode, 0);
6064 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
6065 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
6066 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
6067 DONE;
6068 })
6069
6070 (define_insn "neon_vst3qa<mode>"
6071 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6072 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6073 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6074 UNSPEC_VST3A))]
6075 "TARGET_NEON"
6076 {
6077 int regno = REGNO (operands[1]);
6078 rtx ops[4];
6079 ops[0] = operands[0];
6080 ops[1] = gen_rtx_REG (DImode, regno);
6081 ops[2] = gen_rtx_REG (DImode, regno + 4);
6082 ops[3] = gen_rtx_REG (DImode, regno + 8);
6083 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6084 return "";
6085 }
6086 [(set_attr "type" "neon_store3_3reg<q>")]
6087 )
6088
6089 (define_insn "neon_vst3qb<mode>"
6090 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6091 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6092 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6093 UNSPEC_VST3B))]
6094 "TARGET_NEON"
6095 {
6096 int regno = REGNO (operands[1]);
6097 rtx ops[4];
6098 ops[0] = operands[0];
6099 ops[1] = gen_rtx_REG (DImode, regno + 2);
6100 ops[2] = gen_rtx_REG (DImode, regno + 6);
6101 ops[3] = gen_rtx_REG (DImode, regno + 10);
6102 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6103 return "";
6104 }
6105 [(set_attr "type" "neon_store3_3reg<q>")]
6106 )
6107
6108 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6109 ;; here on big endian targets.
6110 (define_insn "neon_vst3_lane<mode>"
6111 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6112 (unspec:<V_three_elem>
6113 [(match_operand:EI 1 "s_register_operand" "w")
6114 (match_operand:SI 2 "immediate_operand" "i")
6115 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6116 UNSPEC_VST3_LANE))]
6117 "TARGET_NEON"
6118 {
6119 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6120 int regno = REGNO (operands[1]);
6121 rtx ops[5];
6122 ops[0] = operands[0];
6123 ops[1] = gen_rtx_REG (DImode, regno);
6124 ops[2] = gen_rtx_REG (DImode, regno + 2);
6125 ops[3] = gen_rtx_REG (DImode, regno + 4);
6126 ops[4] = GEN_INT (lane);
6127 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6128 ops);
6129 return "";
6130 }
6131 [(set_attr "type" "neon_store3_one_lane<q>")]
6132 )
6133
6134 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6135 ;; here on big endian targets.
6136 (define_insn "neon_vst3_lane<mode>"
6137 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6138 (unspec:<V_three_elem>
6139 [(match_operand:CI 1 "s_register_operand" "w")
6140 (match_operand:SI 2 "immediate_operand" "i")
6141 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6142 UNSPEC_VST3_LANE))]
6143 "TARGET_NEON"
6144 {
6145 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6146 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6147 int regno = REGNO (operands[1]);
6148 rtx ops[5];
6149 if (lane >= max / 2)
6150 {
6151 lane -= max / 2;
6152 regno += 2;
6153 }
6154 ops[0] = operands[0];
6155 ops[1] = gen_rtx_REG (DImode, regno);
6156 ops[2] = gen_rtx_REG (DImode, regno + 4);
6157 ops[3] = gen_rtx_REG (DImode, regno + 8);
6158 ops[4] = GEN_INT (lane);
6159 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6160 ops);
6161 return "";
6162 }
6163 [(set_attr "type" "neon_store3_one_lane<q>")]
6164 )
6165
6166 (define_expand "vec_load_lanesoi<mode>"
6167 [(set (match_operand:OI 0 "s_register_operand")
6168 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6169 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6170 UNSPEC_VLD4))]
6171 "TARGET_NEON")
6172
6173 (define_insn "neon_vld4<mode>"
6174 [(set (match_operand:OI 0 "s_register_operand" "=w")
6175 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6176 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6177 UNSPEC_VLD4))]
6178 "TARGET_NEON"
6179 {
6180 if (<V_sz_elem> == 64)
6181 return "vld1.64\t%h0, %A1";
6182 else
6183 return "vld4.<V_sz_elem>\t%h0, %A1";
6184 }
6185 [(set (attr "type")
6186 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6187 (const_string "neon_load1_4reg<q>")
6188 (const_string "neon_load4_4reg<q>")))]
6189 )
6190
6191 (define_expand "vec_load_lanesxi<mode>"
6192 [(match_operand:XI 0 "s_register_operand")
6193 (match_operand:XI 1 "neon_struct_operand")
6194 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6195 "TARGET_NEON"
6196 {
6197 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6198 DONE;
6199 })
6200
6201 (define_expand "neon_vld4<mode>"
6202 [(match_operand:XI 0 "s_register_operand")
6203 (match_operand:XI 1 "neon_struct_operand")
6204 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6205 "TARGET_NEON"
6206 {
6207 rtx mem;
6208
6209 mem = adjust_address (operands[1], OImode, 0);
6210 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6211 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6212 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6213 DONE;
6214 })
6215
6216 (define_insn "neon_vld4qa<mode>"
6217 [(set (match_operand:XI 0 "s_register_operand" "=w")
6218 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6219 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6220 UNSPEC_VLD4A))]
6221 "TARGET_NEON"
6222 {
6223 int regno = REGNO (operands[0]);
6224 rtx ops[5];
6225 ops[0] = gen_rtx_REG (DImode, regno);
6226 ops[1] = gen_rtx_REG (DImode, regno + 4);
6227 ops[2] = gen_rtx_REG (DImode, regno + 8);
6228 ops[3] = gen_rtx_REG (DImode, regno + 12);
6229 ops[4] = operands[1];
6230 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6231 return "";
6232 }
6233 [(set_attr "type" "neon_load4_4reg<q>")]
6234 )
6235
6236 (define_insn "neon_vld4qb<mode>"
6237 [(set (match_operand:XI 0 "s_register_operand" "=w")
6238 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6239 (match_operand:XI 2 "s_register_operand" "0")
6240 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6241 UNSPEC_VLD4B))]
6242 "TARGET_NEON"
6243 {
6244 int regno = REGNO (operands[0]);
6245 rtx ops[5];
6246 ops[0] = gen_rtx_REG (DImode, regno + 2);
6247 ops[1] = gen_rtx_REG (DImode, regno + 6);
6248 ops[2] = gen_rtx_REG (DImode, regno + 10);
6249 ops[3] = gen_rtx_REG (DImode, regno + 14);
6250 ops[4] = operands[1];
6251 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6252 return "";
6253 }
6254 [(set_attr "type" "neon_load4_4reg<q>")]
6255 )
6256
6257 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6258 ;; here on big endian targets.
6259 (define_insn "neon_vld4_lane<mode>"
6260 [(set (match_operand:OI 0 "s_register_operand" "=w")
6261 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6262 (match_operand:OI 2 "s_register_operand" "0")
6263 (match_operand:SI 3 "immediate_operand" "i")
6264 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6265 UNSPEC_VLD4_LANE))]
6266 "TARGET_NEON"
6267 {
6268 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6269 int regno = REGNO (operands[0]);
6270 rtx ops[6];
6271 ops[0] = gen_rtx_REG (DImode, regno);
6272 ops[1] = gen_rtx_REG (DImode, regno + 2);
6273 ops[2] = gen_rtx_REG (DImode, regno + 4);
6274 ops[3] = gen_rtx_REG (DImode, regno + 6);
6275 ops[4] = operands[1];
6276 ops[5] = GEN_INT (lane);
6277 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6278 ops);
6279 return "";
6280 }
6281 [(set_attr "type" "neon_load4_one_lane<q>")]
6282 )
6283
6284 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6285 ;; here on big endian targets.
6286 (define_insn "neon_vld4_lane<mode>"
6287 [(set (match_operand:XI 0 "s_register_operand" "=w")
6288 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6289 (match_operand:XI 2 "s_register_operand" "0")
6290 (match_operand:SI 3 "immediate_operand" "i")
6291 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6292 UNSPEC_VLD4_LANE))]
6293 "TARGET_NEON"
6294 {
6295 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6296 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6297 int regno = REGNO (operands[0]);
6298 rtx ops[6];
6299 if (lane >= max / 2)
6300 {
6301 lane -= max / 2;
6302 regno += 2;
6303 }
6304 ops[0] = gen_rtx_REG (DImode, regno);
6305 ops[1] = gen_rtx_REG (DImode, regno + 4);
6306 ops[2] = gen_rtx_REG (DImode, regno + 8);
6307 ops[3] = gen_rtx_REG (DImode, regno + 12);
6308 ops[4] = operands[1];
6309 ops[5] = GEN_INT (lane);
6310 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6311 ops);
6312 return "";
6313 }
6314 [(set_attr "type" "neon_load4_one_lane<q>")]
6315 )
6316
6317 (define_insn "neon_vld4_dup<mode>"
6318 [(set (match_operand:OI 0 "s_register_operand" "=w")
6319 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6320 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6321 UNSPEC_VLD4_DUP))]
6322 "TARGET_NEON"
6323 {
6324 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6325 {
6326 int regno = REGNO (operands[0]);
6327 rtx ops[5];
6328 ops[0] = gen_rtx_REG (DImode, regno);
6329 ops[1] = gen_rtx_REG (DImode, regno + 2);
6330 ops[2] = gen_rtx_REG (DImode, regno + 4);
6331 ops[3] = gen_rtx_REG (DImode, regno + 6);
6332 ops[4] = operands[1];
6333 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6334 ops);
6335 return "";
6336 }
6337 else
6338 return "vld1.<V_sz_elem>\t%h0, %A1";
6339 }
6340 [(set (attr "type")
6341 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6342 (const_string "neon_load4_all_lanes<q>")
6343 (const_string "neon_load1_1reg<q>")))]
6344 )
6345
6346 (define_expand "vec_store_lanesoi<mode>"
6347 [(set (match_operand:OI 0 "neon_struct_operand")
6348 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6349 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6350 UNSPEC_VST4))]
6351 "TARGET_NEON")
6352
6353 (define_insn "neon_vst4<mode>"
6354 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6355 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6356 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6357 UNSPEC_VST4))]
6358 "TARGET_NEON"
6359 {
6360 if (<V_sz_elem> == 64)
6361 return "vst1.64\t%h1, %A0";
6362 else
6363 return "vst4.<V_sz_elem>\t%h1, %A0";
6364 }
6365 [(set (attr "type")
6366 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6367 (const_string "neon_store1_4reg<q>")
6368 (const_string "neon_store4_4reg<q>")))]
6369 )
6370
6371 (define_expand "vec_store_lanesxi<mode>"
6372 [(match_operand:XI 0 "neon_struct_operand")
6373 (match_operand:XI 1 "s_register_operand")
6374 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6375 "TARGET_NEON"
6376 {
6377 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6378 DONE;
6379 })
6380
6381 (define_expand "neon_vst4<mode>"
6382 [(match_operand:XI 0 "neon_struct_operand")
6383 (match_operand:XI 1 "s_register_operand")
6384 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6385 "TARGET_NEON"
6386 {
6387 rtx mem;
6388
6389 mem = adjust_address (operands[0], OImode, 0);
6390 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6391 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6392 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6393 DONE;
6394 })
6395
6396 (define_insn "neon_vst4qa<mode>"
6397 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6398 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6399 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6400 UNSPEC_VST4A))]
6401 "TARGET_NEON"
6402 {
6403 int regno = REGNO (operands[1]);
6404 rtx ops[5];
6405 ops[0] = operands[0];
6406 ops[1] = gen_rtx_REG (DImode, regno);
6407 ops[2] = gen_rtx_REG (DImode, regno + 4);
6408 ops[3] = gen_rtx_REG (DImode, regno + 8);
6409 ops[4] = gen_rtx_REG (DImode, regno + 12);
6410 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6411 return "";
6412 }
6413 [(set_attr "type" "neon_store4_4reg<q>")]
6414 )
6415
6416 (define_insn "neon_vst4qb<mode>"
6417 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6418 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6419 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6420 UNSPEC_VST4B))]
6421 "TARGET_NEON"
6422 {
6423 int regno = REGNO (operands[1]);
6424 rtx ops[5];
6425 ops[0] = operands[0];
6426 ops[1] = gen_rtx_REG (DImode, regno + 2);
6427 ops[2] = gen_rtx_REG (DImode, regno + 6);
6428 ops[3] = gen_rtx_REG (DImode, regno + 10);
6429 ops[4] = gen_rtx_REG (DImode, regno + 14);
6430 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6431 return "";
6432 }
6433 [(set_attr "type" "neon_store4_4reg<q>")]
6434 )
6435
6436 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6437 ;; here on big endian targets.
6438 (define_insn "neon_vst4_lane<mode>"
6439 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6440 (unspec:<V_four_elem>
6441 [(match_operand:OI 1 "s_register_operand" "w")
6442 (match_operand:SI 2 "immediate_operand" "i")
6443 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6444 UNSPEC_VST4_LANE))]
6445 "TARGET_NEON"
6446 {
6447 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6448 int regno = REGNO (operands[1]);
6449 rtx ops[6];
6450 ops[0] = operands[0];
6451 ops[1] = gen_rtx_REG (DImode, regno);
6452 ops[2] = gen_rtx_REG (DImode, regno + 2);
6453 ops[3] = gen_rtx_REG (DImode, regno + 4);
6454 ops[4] = gen_rtx_REG (DImode, regno + 6);
6455 ops[5] = GEN_INT (lane);
6456 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6457 ops);
6458 return "";
6459 }
6460 [(set_attr "type" "neon_store4_one_lane<q>")]
6461 )
6462
6463 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6464 ;; here on big endian targets.
6465 (define_insn "neon_vst4_lane<mode>"
6466 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6467 (unspec:<V_four_elem>
6468 [(match_operand:XI 1 "s_register_operand" "w")
6469 (match_operand:SI 2 "immediate_operand" "i")
6470 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6471 UNSPEC_VST4_LANE))]
6472 "TARGET_NEON"
6473 {
6474 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6475 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6476 int regno = REGNO (operands[1]);
6477 rtx ops[6];
6478 if (lane >= max / 2)
6479 {
6480 lane -= max / 2;
6481 regno += 2;
6482 }
6483 ops[0] = operands[0];
6484 ops[1] = gen_rtx_REG (DImode, regno);
6485 ops[2] = gen_rtx_REG (DImode, regno + 4);
6486 ops[3] = gen_rtx_REG (DImode, regno + 8);
6487 ops[4] = gen_rtx_REG (DImode, regno + 12);
6488 ops[5] = GEN_INT (lane);
6489 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6490 ops);
6491 return "";
6492 }
6493 [(set_attr "type" "neon_store4_4reg<q>")]
6494 )
6495
6496 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6497 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6498 (SE:<V_unpack> (vec_select:<V_HALF>
6499 (match_operand:VU 1 "register_operand" "w")
6500 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6501 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6502 "vmovl.<US><V_sz_elem> %q0, %e1"
6503 [(set_attr "type" "neon_shift_imm_long")]
6504 )
6505
6506 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6507 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6508 (SE:<V_unpack> (vec_select:<V_HALF>
6509 (match_operand:VU 1 "register_operand" "w")
6510 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6511 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6512 "vmovl.<US><V_sz_elem> %q0, %f1"
6513 [(set_attr "type" "neon_shift_imm_long")]
6514 )
6515
6516 (define_expand "vec_unpack<US>_hi_<mode>"
6517 [(match_operand:<V_unpack> 0 "register_operand" "")
6518 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6519 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6520 {
6521 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6522 rtx t1;
6523 int i;
6524 for (i = 0; i < (<V_mode_nunits>/2); i++)
6525 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6526
6527 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6528 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6529 operands[1],
6530 t1));
6531 DONE;
6532 }
6533 )
6534
6535 (define_expand "vec_unpack<US>_lo_<mode>"
6536 [(match_operand:<V_unpack> 0 "register_operand" "")
6537 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6538 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6539 {
6540 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6541 rtx t1;
6542 int i;
6543 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6544 RTVEC_ELT (v, i) = GEN_INT (i);
6545 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6546 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6547 operands[1],
6548 t1));
6549 DONE;
6550 }
6551 )
6552
6553 (define_insn "neon_vec_<US>mult_lo_<mode>"
6554 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6555 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6556 (match_operand:VU 1 "register_operand" "w")
6557 (match_operand:VU 2 "vect_par_constant_low" "")))
6558 (SE:<V_unpack> (vec_select:<V_HALF>
6559 (match_operand:VU 3 "register_operand" "w")
6560 (match_dup 2)))))]
6561 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6562 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6563 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6564 )
6565
6566 (define_expand "vec_widen_<US>mult_lo_<mode>"
6567 [(match_operand:<V_unpack> 0 "register_operand" "")
6568 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6569 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6570 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6571 {
6572 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6573 rtx t1;
6574 int i;
6575 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6576 RTVEC_ELT (v, i) = GEN_INT (i);
6577 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6578
6579 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6580 operands[1],
6581 t1,
6582 operands[2]));
6583 DONE;
6584 }
6585 )
6586
6587 (define_insn "neon_vec_<US>mult_hi_<mode>"
6588 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6589 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6590 (match_operand:VU 1 "register_operand" "w")
6591 (match_operand:VU 2 "vect_par_constant_high" "")))
6592 (SE:<V_unpack> (vec_select:<V_HALF>
6593 (match_operand:VU 3 "register_operand" "w")
6594 (match_dup 2)))))]
6595 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6596 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6597 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6598 )
6599
6600 (define_expand "vec_widen_<US>mult_hi_<mode>"
6601 [(match_operand:<V_unpack> 0 "register_operand" "")
6602 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6603 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6604 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6605 {
6606 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6607 rtx t1;
6608 int i;
6609 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6610 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6611 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6612
6613 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6614 operands[1],
6615 t1,
6616 operands[2]));
6617 DONE;
6618
6619 }
6620 )
6621
6622 (define_insn "neon_vec_<US>shiftl_<mode>"
6623 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6624 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6625 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6626 "TARGET_NEON"
6627 {
6628 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6629 }
6630 [(set_attr "type" "neon_shift_imm_long")]
6631 )
6632
6633 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6634 [(match_operand:<V_unpack> 0 "register_operand" "")
6635 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6636 (match_operand:SI 2 "immediate_operand" "i")]
6637 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6638 {
6639 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6640 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6641 operands[2]));
6642 DONE;
6643 }
6644 )
6645
6646 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6647 [(match_operand:<V_unpack> 0 "register_operand" "")
6648 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6649 (match_operand:SI 2 "immediate_operand" "i")]
6650 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6651 {
6652 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6653 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6654 GET_MODE_SIZE (<V_HALF>mode)),
6655 operands[2]));
6656 DONE;
6657 }
6658 )
6659
6660 ;; Vectorize for non-neon-quad case
6661 (define_insn "neon_unpack<US>_<mode>"
6662 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6663 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6664 "TARGET_NEON"
6665 "vmovl.<US><V_sz_elem> %q0, %P1"
6666 [(set_attr "type" "neon_move")]
6667 )
6668
6669 (define_expand "vec_unpack<US>_lo_<mode>"
6670 [(match_operand:<V_double_width> 0 "register_operand" "")
6671 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6672 "TARGET_NEON"
6673 {
6674 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6675 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6676 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6677
6678 DONE;
6679 }
6680 )
6681
6682 (define_expand "vec_unpack<US>_hi_<mode>"
6683 [(match_operand:<V_double_width> 0 "register_operand" "")
6684 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6685 "TARGET_NEON"
6686 {
6687 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6688 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6689 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6690
6691 DONE;
6692 }
6693 )
6694
6695 (define_insn "neon_vec_<US>mult_<mode>"
6696 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6697 (mult:<V_widen> (SE:<V_widen>
6698 (match_operand:VDI 1 "register_operand" "w"))
6699 (SE:<V_widen>
6700 (match_operand:VDI 2 "register_operand" "w"))))]
6701 "TARGET_NEON"
6702 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6703 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6704 )
6705
6706 (define_expand "vec_widen_<US>mult_hi_<mode>"
6707 [(match_operand:<V_double_width> 0 "register_operand" "")
6708 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6709 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6710 "TARGET_NEON"
6711 {
6712 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6713 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6714 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6715
6716 DONE;
6717
6718 }
6719 )
6720
6721 (define_expand "vec_widen_<US>mult_lo_<mode>"
6722 [(match_operand:<V_double_width> 0 "register_operand" "")
6723 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6724 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6725 "TARGET_NEON"
6726 {
6727 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6728 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6729 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6730
6731 DONE;
6732
6733 }
6734 )
6735
6736 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6737 [(match_operand:<V_double_width> 0 "register_operand" "")
6738 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6739 (match_operand:SI 2 "immediate_operand" "i")]
6740 "TARGET_NEON"
6741 {
6742 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6743 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6744 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6745
6746 DONE;
6747 }
6748 )
6749
6750 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6751 [(match_operand:<V_double_width> 0 "register_operand" "")
6752 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6753 (match_operand:SI 2 "immediate_operand" "i")]
6754 "TARGET_NEON"
6755 {
6756 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6757 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6758 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6759
6760 DONE;
6761 }
6762 )
6763
6764 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6765 ; because the ordering of vector elements in Q registers is different from what
6766 ; the semantics of the instructions require.
6767
6768 (define_insn "vec_pack_trunc_<mode>"
6769 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6770 (vec_concat:<V_narrow_pack>
6771 (truncate:<V_narrow>
6772 (match_operand:VN 1 "register_operand" "w"))
6773 (truncate:<V_narrow>
6774 (match_operand:VN 2 "register_operand" "w"))))]
6775 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6776 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6777 [(set_attr "type" "multiple")
6778 (set_attr "length" "8")]
6779 )
6780
6781 ;; For the non-quad case.
6782 (define_insn "neon_vec_pack_trunc_<mode>"
6783 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6784 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6785 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6786 "vmovn.i<V_sz_elem>\t%P0, %q1"
6787 [(set_attr "type" "neon_move_narrow_q")]
6788 )
6789
6790 (define_expand "vec_pack_trunc_<mode>"
6791 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6792 (match_operand:VSHFT 1 "register_operand" "")
6793 (match_operand:VSHFT 2 "register_operand")]
6794 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6795 {
6796 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6797
6798 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6799 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6800 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6801 DONE;
6802 })
6803
6804 (define_insn "neon_vabd<mode>_2"
6805 [(set (match_operand:VF 0 "s_register_operand" "=w")
6806 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6807 (match_operand:VF 2 "s_register_operand" "w"))))]
6808 "TARGET_NEON && flag_unsafe_math_optimizations"
6809 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6810 [(set_attr "type" "neon_fp_abd_s<q>")]
6811 )
6812
6813 (define_insn "neon_vabd<mode>_3"
6814 [(set (match_operand:VF 0 "s_register_operand" "=w")
6815 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6816 (match_operand:VF 2 "s_register_operand" "w")]
6817 UNSPEC_VSUB)))]
6818 "TARGET_NEON && flag_unsafe_math_optimizations"
6819 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6820 [(set_attr "type" "neon_fp_abd_s<q>")]
6821 )
6822
6823 ;; Copy from core-to-neon regs, then extend, not vice-versa
6824
6825 (define_split
6826 [(set (match_operand:DI 0 "s_register_operand" "")
6827 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6828 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6829 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6830 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6831 {
6832 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6833 })
6834
6835 (define_split
6836 [(set (match_operand:DI 0 "s_register_operand" "")
6837 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6838 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6839 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6840 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6841 {
6842 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6843 })
6844
6845 (define_split
6846 [(set (match_operand:DI 0 "s_register_operand" "")
6847 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6848 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6849 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6850 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6851 {
6852 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6853 })
6854
6855 (define_split
6856 [(set (match_operand:DI 0 "s_register_operand" "")
6857 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6858 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6859 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6860 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6861 {
6862 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6863 })
6864
6865 (define_split
6866 [(set (match_operand:DI 0 "s_register_operand" "")
6867 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6868 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6869 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6870 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6871 {
6872 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6873 })
6874
6875 (define_split
6876 [(set (match_operand:DI 0 "s_register_operand" "")
6877 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6878 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6879 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6880 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6881 {
6882 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6883 })