Update fall through pattern for FP16 patterns in ARM.
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 /* We define these mov expanders to match the standard mov$a optab to prevent
117 the mid-end from trying to do a subreg for these modes which is the most
118 inefficient way to expand the move. Also big-endian subreg's aren't
119 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
120 Without these RTL generation patterns the mid-end would attempt to take a
121 sub-reg and may ICE if it can't. */
122
123 (define_expand "movti"
124 [(set (match_operand:TI 0 "nonimmediate_operand" "")
125 (match_operand:TI 1 "general_operand" ""))]
126 "TARGET_NEON"
127 {
128 if (can_create_pseudo_p ())
129 {
130 if (!REG_P (operands[0]))
131 operands[1] = force_reg (TImode, operands[1]);
132 }
133 })
134
135 (define_expand "mov<mode>"
136 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
137 (match_operand:VSTRUCT 1 "general_operand" ""))]
138 "TARGET_NEON"
139 {
140 if (can_create_pseudo_p ())
141 {
142 if (!REG_P (operands[0]))
143 operands[1] = force_reg (<MODE>mode, operands[1]);
144 }
145 })
146
147 (define_expand "mov<mode>"
148 [(set (match_operand:VH 0 "s_register_operand")
149 (match_operand:VH 1 "s_register_operand"))]
150 "TARGET_NEON"
151 {
152 if (can_create_pseudo_p ())
153 {
154 if (!REG_P (operands[0]))
155 operands[1] = force_reg (<MODE>mode, operands[1]);
156 }
157 })
158
159 (define_insn "*neon_mov<mode>"
160 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
161 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
162 "TARGET_NEON
163 && (register_operand (operands[0], <MODE>mode)
164 || register_operand (operands[1], <MODE>mode))"
165 {
166 switch (which_alternative)
167 {
168 case 0: return "#";
169 case 1: case 2: return output_move_neon (operands);
170 default: gcc_unreachable ();
171 }
172 }
173 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
174 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
175
176 (define_split
177 [(set (match_operand:EI 0 "s_register_operand" "")
178 (match_operand:EI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (DImode, rdest + 4);
190 src[1] = gen_rtx_REG (DImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:OI 0 "s_register_operand" "")
197 (match_operand:OI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))]
201 {
202 int rdest = REGNO (operands[0]);
203 int rsrc = REGNO (operands[1]);
204 rtx dest[2], src[2];
205
206 dest[0] = gen_rtx_REG (TImode, rdest);
207 src[0] = gen_rtx_REG (TImode, rsrc);
208 dest[1] = gen_rtx_REG (TImode, rdest + 4);
209 src[1] = gen_rtx_REG (TImode, rsrc + 4);
210
211 neon_disambiguate_copy (operands, dest, src, 2);
212 })
213
214 (define_split
215 [(set (match_operand:CI 0 "s_register_operand" "")
216 (match_operand:CI 1 "s_register_operand" ""))]
217 "TARGET_NEON && reload_completed"
218 [(set (match_dup 0) (match_dup 1))
219 (set (match_dup 2) (match_dup 3))
220 (set (match_dup 4) (match_dup 5))]
221 {
222 int rdest = REGNO (operands[0]);
223 int rsrc = REGNO (operands[1]);
224 rtx dest[3], src[3];
225
226 dest[0] = gen_rtx_REG (TImode, rdest);
227 src[0] = gen_rtx_REG (TImode, rsrc);
228 dest[1] = gen_rtx_REG (TImode, rdest + 4);
229 src[1] = gen_rtx_REG (TImode, rsrc + 4);
230 dest[2] = gen_rtx_REG (TImode, rdest + 8);
231 src[2] = gen_rtx_REG (TImode, rsrc + 8);
232
233 neon_disambiguate_copy (operands, dest, src, 3);
234 })
235
236 (define_split
237 [(set (match_operand:XI 0 "s_register_operand" "")
238 (match_operand:XI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))
243 (set (match_dup 6) (match_dup 7))]
244 {
245 int rdest = REGNO (operands[0]);
246 int rsrc = REGNO (operands[1]);
247 rtx dest[4], src[4];
248
249 dest[0] = gen_rtx_REG (TImode, rdest);
250 src[0] = gen_rtx_REG (TImode, rsrc);
251 dest[1] = gen_rtx_REG (TImode, rdest + 4);
252 src[1] = gen_rtx_REG (TImode, rsrc + 4);
253 dest[2] = gen_rtx_REG (TImode, rdest + 8);
254 src[2] = gen_rtx_REG (TImode, rsrc + 8);
255 dest[3] = gen_rtx_REG (TImode, rdest + 12);
256 src[3] = gen_rtx_REG (TImode, rsrc + 12);
257
258 neon_disambiguate_copy (operands, dest, src, 4);
259 })
260
261 (define_expand "movmisalign<mode>"
262 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
263 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
264 UNSPEC_MISALIGNED_ACCESS))]
265 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
266 {
267 rtx adjust_mem;
268 /* This pattern is not permitted to fail during expansion: if both arguments
269 are non-registers (e.g. memory := constant, which can be created by the
270 auto-vectorizer), force operand 1 into a register. */
271 if (!s_register_operand (operands[0], <MODE>mode)
272 && !s_register_operand (operands[1], <MODE>mode))
273 operands[1] = force_reg (<MODE>mode, operands[1]);
274
275 if (s_register_operand (operands[0], <MODE>mode))
276 adjust_mem = operands[1];
277 else
278 adjust_mem = operands[0];
279
280 /* Legitimize address. */
281 if (!neon_vector_mem_operand (adjust_mem, 2, true))
282 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
283
284 })
285
286 (define_insn "*movmisalign<mode>_neon_store"
287 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
288 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
289 UNSPEC_MISALIGNED_ACCESS))]
290 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
291 "vst1.<V_sz_elem>\t{%P1}, %A0"
292 [(set_attr "type" "neon_store1_1reg<q>")])
293
294 (define_insn "*movmisalign<mode>_neon_load"
295 [(set (match_operand:VDX 0 "s_register_operand" "=w")
296 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
297 " Um")]
298 UNSPEC_MISALIGNED_ACCESS))]
299 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
300 "vld1.<V_sz_elem>\t{%P0}, %A1"
301 [(set_attr "type" "neon_load1_1reg<q>")])
302
303 (define_insn "*movmisalign<mode>_neon_store"
304 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
305 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
306 UNSPEC_MISALIGNED_ACCESS))]
307 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
308 "vst1.<V_sz_elem>\t{%q1}, %A0"
309 [(set_attr "type" "neon_store1_1reg<q>")])
310
311 (define_insn "*movmisalign<mode>_neon_load"
312 [(set (match_operand:VQX 0 "s_register_operand" "=w")
313 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
314 " Um")]
315 UNSPEC_MISALIGNED_ACCESS))]
316 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
317 "vld1.<V_sz_elem>\t{%q0}, %A1"
318 [(set_attr "type" "neon_load1_1reg<q>")])
319
320 (define_insn "vec_set<mode>_internal"
321 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
322 (vec_merge:VD_LANE
323 (vec_duplicate:VD_LANE
324 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
325 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
326 (match_operand:SI 2 "immediate_operand" "i,i")))]
327 "TARGET_NEON"
328 {
329 int elt = ffs ((int) INTVAL (operands[2])) - 1;
330 if (BYTES_BIG_ENDIAN)
331 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
332 operands[2] = GEN_INT (elt);
333
334 if (which_alternative == 0)
335 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
336 else
337 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
338 }
339 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
340
341 (define_insn "vec_set<mode>_internal"
342 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
343 (vec_merge:VQ2
344 (vec_duplicate:VQ2
345 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
346 (match_operand:VQ2 3 "s_register_operand" "0,0")
347 (match_operand:SI 2 "immediate_operand" "i,i")))]
348 "TARGET_NEON"
349 {
350 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
351 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
352 int elt = elem % half_elts;
353 int hi = (elem / half_elts) * 2;
354 int regno = REGNO (operands[0]);
355
356 if (BYTES_BIG_ENDIAN)
357 elt = half_elts - 1 - elt;
358
359 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
360 operands[2] = GEN_INT (elt);
361
362 if (which_alternative == 0)
363 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
364 else
365 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
366 }
367 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
368 )
369
370 (define_insn "vec_setv2di_internal"
371 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
372 (vec_merge:V2DI
373 (vec_duplicate:V2DI
374 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
375 (match_operand:V2DI 3 "s_register_operand" "0,0")
376 (match_operand:SI 2 "immediate_operand" "i,i")))]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
380 int regno = REGNO (operands[0]) + 2 * elem;
381
382 operands[0] = gen_rtx_REG (DImode, regno);
383
384 if (which_alternative == 0)
385 return "vld1.64\t%P0, %A1";
386 else
387 return "vmov\t%P0, %Q1, %R1";
388 }
389 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
390 )
391
392 (define_expand "vec_set<mode>"
393 [(match_operand:VDQ 0 "s_register_operand" "")
394 (match_operand:<V_elem> 1 "s_register_operand" "")
395 (match_operand:SI 2 "immediate_operand" "")]
396 "TARGET_NEON"
397 {
398 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
399 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
400 GEN_INT (elem), operands[0]));
401 DONE;
402 })
403
404 (define_insn "vec_extract<mode><V_elem_l>"
405 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
406 (vec_select:<V_elem>
407 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
408 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
409 "TARGET_NEON"
410 {
411 if (BYTES_BIG_ENDIAN)
412 {
413 int elt = INTVAL (operands[2]);
414 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
415 operands[2] = GEN_INT (elt);
416 }
417
418 if (which_alternative == 0)
419 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
420 else
421 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
422 }
423 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
424 )
425
426 (define_insn "vec_extract<mode><V_elem_l>"
427 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
428 (vec_select:<V_elem>
429 (match_operand:VQ2 1 "s_register_operand" "w,w")
430 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
431 "TARGET_NEON"
432 {
433 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
434 int elt = INTVAL (operands[2]) % half_elts;
435 int hi = (INTVAL (operands[2]) / half_elts) * 2;
436 int regno = REGNO (operands[1]);
437
438 if (BYTES_BIG_ENDIAN)
439 elt = half_elts - 1 - elt;
440
441 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
442 operands[2] = GEN_INT (elt);
443
444 if (which_alternative == 0)
445 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
446 else
447 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
448 }
449 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
450 )
451
452 (define_insn "vec_extractv2didi"
453 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
454 (vec_select:DI
455 (match_operand:V2DI 1 "s_register_operand" "w,w")
456 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
457 "TARGET_NEON"
458 {
459 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
460
461 operands[1] = gen_rtx_REG (DImode, regno);
462
463 if (which_alternative == 0)
464 return "vst1.64\t{%P1}, %A0 @ v2di";
465 else
466 return "vmov\t%Q0, %R0, %P1 @ v2di";
467 }
468 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
469 )
470
471 (define_expand "vec_init<mode><V_elem_l>"
472 [(match_operand:VDQ 0 "s_register_operand" "")
473 (match_operand 1 "" "")]
474 "TARGET_NEON"
475 {
476 neon_expand_vector_init (operands[0], operands[1]);
477 DONE;
478 })
479
480 ;; Doubleword and quadword arithmetic.
481
482 ;; NOTE: some other instructions also support 64-bit integer
483 ;; element size, which we could potentially use for "long long" operations.
484
485 (define_insn "*add<mode>3_neon"
486 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
487 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
488 (match_operand:VDQ 2 "s_register_operand" "w")))]
489 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
490 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
491 [(set (attr "type")
492 (if_then_else (match_test "<Is_float_mode>")
493 (const_string "neon_fp_addsub_s<q>")
494 (const_string "neon_add<q>")))]
495 )
496
497 ;; As with SFmode, full support for HFmode vector arithmetic is only available
498 ;; when flag-unsafe-math-optimizations is enabled.
499
500 (define_insn "add<mode>3"
501 [(set
502 (match_operand:VH 0 "s_register_operand" "=w")
503 (plus:VH
504 (match_operand:VH 1 "s_register_operand" "w")
505 (match_operand:VH 2 "s_register_operand" "w")))]
506 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
507 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
508 [(set (attr "type")
509 (if_then_else (match_test "<Is_float_mode>")
510 (const_string "neon_fp_addsub_s<q>")
511 (const_string "neon_add<q>")))]
512 )
513
514 (define_insn "add<mode>3_fp16"
515 [(set
516 (match_operand:VH 0 "s_register_operand" "=w")
517 (plus:VH
518 (match_operand:VH 1 "s_register_operand" "w")
519 (match_operand:VH 2 "s_register_operand" "w")))]
520 "TARGET_NEON_FP16INST"
521 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
522 [(set (attr "type")
523 (if_then_else (match_test "<Is_float_mode>")
524 (const_string "neon_fp_addsub_s<q>")
525 (const_string "neon_add<q>")))]
526 )
527
528 (define_insn "adddi3_neon"
529 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
530 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
531 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
532 (clobber (reg:CC CC_REGNUM))]
533 "TARGET_NEON"
534 {
535 switch (which_alternative)
536 {
537 case 0: /* fall through */
538 case 3: return "vadd.i64\t%P0, %P1, %P2";
539 case 1: return "#";
540 case 2: return "#";
541 case 4: return "#";
542 case 5: return "#";
543 case 6: return "#";
544 default: gcc_unreachable ();
545 }
546 }
547 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
548 multiple,multiple,multiple")
549 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
550 (set_attr "length" "*,8,8,*,8,8,8")
551 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
552 )
553
554 (define_insn "*sub<mode>3_neon"
555 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
556 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
557 (match_operand:VDQ 2 "s_register_operand" "w")))]
558 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
559 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
560 [(set (attr "type")
561 (if_then_else (match_test "<Is_float_mode>")
562 (const_string "neon_fp_addsub_s<q>")
563 (const_string "neon_sub<q>")))]
564 )
565
566 (define_insn "sub<mode>3"
567 [(set
568 (match_operand:VH 0 "s_register_operand" "=w")
569 (minus:VH
570 (match_operand:VH 1 "s_register_operand" "w")
571 (match_operand:VH 2 "s_register_operand" "w")))]
572 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
573 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
574 [(set_attr "type" "neon_sub<q>")]
575 )
576
577 (define_insn "sub<mode>3_fp16"
578 [(set
579 (match_operand:VH 0 "s_register_operand" "=w")
580 (minus:VH
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
586 )
587
588 (define_insn "subdi3_neon"
589 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
590 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
591 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
592 (clobber (reg:CC CC_REGNUM))]
593 "TARGET_NEON"
594 {
595 switch (which_alternative)
596 {
597 case 0: /* fall through */
598 case 4: return "vsub.i64\t%P0, %P1, %P2";
599 case 1: /* fall through */
600 case 2: /* fall through */
601 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
602 default: gcc_unreachable ();
603 }
604 }
605 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
606 (set_attr "conds" "*,clob,clob,clob,*")
607 (set_attr "length" "*,8,8,8,*")
608 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
609 )
610
611 (define_insn "*mul<mode>3_neon"
612 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
613 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
614 (match_operand:VDQW 2 "s_register_operand" "w")))]
615 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
616 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
617 [(set (attr "type")
618 (if_then_else (match_test "<Is_float_mode>")
619 (const_string "neon_fp_mul_s<q>")
620 (const_string "neon_mul_<V_elem_ch><q>")))]
621 )
622
623 (define_insn "mul<mode>3add<mode>_neon"
624 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
625 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
626 (match_operand:VDQW 3 "s_register_operand" "w"))
627 (match_operand:VDQW 1 "s_register_operand" "0")))]
628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
629 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
630 [(set (attr "type")
631 (if_then_else (match_test "<Is_float_mode>")
632 (const_string "neon_fp_mla_s<q>")
633 (const_string "neon_mla_<V_elem_ch><q>")))]
634 )
635
636 (define_insn "mul<mode>3add<mode>_neon"
637 [(set (match_operand:VH 0 "s_register_operand" "=w")
638 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
639 (match_operand:VH 3 "s_register_operand" "w"))
640 (match_operand:VH 1 "s_register_operand" "0")))]
641 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
642 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
643 [(set_attr "type" "neon_fp_mla_s<q>")]
644 )
645
646 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
647 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
648 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
649 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
650 (match_operand:VDQW 3 "s_register_operand" "w"))))]
651 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
652 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
653 [(set (attr "type")
654 (if_then_else (match_test "<Is_float_mode>")
655 (const_string "neon_fp_mla_s<q>")
656 (const_string "neon_mla_<V_elem_ch><q>")))]
657 )
658
659 ;; Fused multiply-accumulate
660 ;; We define each insn twice here:
661 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
662 ;; to be able to use when converting to FMA.
663 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
664 (define_insn "fma<VCVTF:mode>4"
665 [(set (match_operand:VCVTF 0 "register_operand" "=w")
666 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
667 (match_operand:VCVTF 2 "register_operand" "w")
668 (match_operand:VCVTF 3 "register_operand" "0")))]
669 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
670 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
671 [(set_attr "type" "neon_fp_mla_s<q>")]
672 )
673
674 (define_insn "fma<VCVTF:mode>4_intrinsic"
675 [(set (match_operand:VCVTF 0 "register_operand" "=w")
676 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
677 (match_operand:VCVTF 2 "register_operand" "w")
678 (match_operand:VCVTF 3 "register_operand" "0")))]
679 "TARGET_NEON && TARGET_FMA"
680 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
681 [(set_attr "type" "neon_fp_mla_s<q>")]
682 )
683
684 (define_insn "fma<VH:mode>4"
685 [(set (match_operand:VH 0 "register_operand" "=w")
686 (fma:VH
687 (match_operand:VH 1 "register_operand" "w")
688 (match_operand:VH 2 "register_operand" "w")
689 (match_operand:VH 3 "register_operand" "0")))]
690 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
693 )
694
695 (define_insn "fma<VH:mode>4_intrinsic"
696 [(set (match_operand:VH 0 "register_operand" "=w")
697 (fma:VH
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
704 )
705
706 (define_insn "*fmsub<VCVTF:mode>4"
707 [(set (match_operand:VCVTF 0 "register_operand" "=w")
708 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
709 (match_operand:VCVTF 2 "register_operand" "w")
710 (match_operand:VCVTF 3 "register_operand" "0")))]
711 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
712 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
713 [(set_attr "type" "neon_fp_mla_s<q>")]
714 )
715
716 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
717 [(set (match_operand:VCVTF 0 "register_operand" "=w")
718 (fma:VCVTF
719 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
725 )
726
727 (define_insn "fmsub<VH:mode>4_intrinsic"
728 [(set (match_operand:VH 0 "register_operand" "=w")
729 (fma:VH
730 (neg:VH (match_operand:VH 1 "register_operand" "w"))
731 (match_operand:VH 2 "register_operand" "w")
732 (match_operand:VH 3 "register_operand" "0")))]
733 "TARGET_NEON_FP16INST"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
736 )
737
738 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
739 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
740 (unspec:VCVTF [(match_operand:VCVTF 1
741 "s_register_operand" "w")]
742 NEON_VRINT))]
743 "TARGET_NEON && TARGET_VFP5"
744 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
745 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
746 )
747
748 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
749 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
750 (FIXUORS:<V_cmp_result> (unspec:VCVTF
751 [(match_operand:VCVTF 1 "register_operand" "w")]
752 NEON_VCVT)))]
753 "TARGET_NEON && TARGET_VFP5"
754 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
755 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
756 (set_attr "predicable" "no")]
757 )
758
759 (define_insn "ior<mode>3"
760 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
761 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
762 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
763 "TARGET_NEON"
764 {
765 switch (which_alternative)
766 {
767 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
768 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
769 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
770 default: gcc_unreachable ();
771 }
772 }
773 [(set_attr "type" "neon_logic<q>")]
774 )
775
776 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
777 ;; vorr. We support the pseudo-instruction vand instead, because that
778 ;; corresponds to the canonical form the middle-end expects to use for
779 ;; immediate bitwise-ANDs.
780
781 (define_insn "and<mode>3"
782 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
783 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
784 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
785 "TARGET_NEON"
786 {
787 switch (which_alternative)
788 {
789 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
790 case 1: return neon_output_logic_immediate ("vand", &operands[2],
791 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
792 default: gcc_unreachable ();
793 }
794 }
795 [(set_attr "type" "neon_logic<q>")]
796 )
797
798 (define_insn "orn<mode>3_neon"
799 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
800 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
801 (match_operand:VDQ 1 "s_register_operand" "w")))]
802 "TARGET_NEON"
803 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
804 [(set_attr "type" "neon_logic<q>")]
805 )
806
807 ;; TODO: investigate whether we should disable
808 ;; this and bicdi3_neon for the A8 in line with the other
809 ;; changes above.
810 (define_insn_and_split "orndi3_neon"
811 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
812 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
813 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
814 "TARGET_NEON"
815 "@
816 vorn\t%P0, %P1, %P2
817 #
818 #
819 #"
820 "reload_completed &&
821 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
822 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
823 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
824 "
825 {
826 if (TARGET_THUMB2)
827 {
828 operands[3] = gen_highpart (SImode, operands[0]);
829 operands[0] = gen_lowpart (SImode, operands[0]);
830 operands[4] = gen_highpart (SImode, operands[2]);
831 operands[2] = gen_lowpart (SImode, operands[2]);
832 operands[5] = gen_highpart (SImode, operands[1]);
833 operands[1] = gen_lowpart (SImode, operands[1]);
834 }
835 else
836 {
837 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
838 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
839 DONE;
840 }
841 }"
842 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
843 (set_attr "length" "*,16,8,8")
844 (set_attr "arch" "any,a,t2,t2")]
845 )
846
847 (define_insn "bic<mode>3_neon"
848 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
849 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
850 (match_operand:VDQ 1 "s_register_operand" "w")))]
851 "TARGET_NEON"
852 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
853 [(set_attr "type" "neon_logic<q>")]
854 )
855
856 ;; Compare to *anddi_notdi_di.
857 (define_insn "bicdi3_neon"
858 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
859 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
860 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
861 "TARGET_NEON"
862 "@
863 vbic\t%P0, %P1, %P2
864 #
865 #"
866 [(set_attr "type" "neon_logic,multiple,multiple")
867 (set_attr "length" "*,8,8")]
868 )
869
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
872 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
873 (match_operand:VDQ 2 "s_register_operand" "w")))]
874 "TARGET_NEON"
875 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
876 [(set_attr "type" "neon_logic<q>")]
877 )
878
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
881 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
882 "TARGET_NEON"
883 "vmvn\t%<V_reg>0, %<V_reg>1"
884 [(set_attr "type" "neon_move<q>")]
885 )
886
887 (define_insn "abs<mode>2"
888 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
889 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
890 "TARGET_NEON"
891 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
892 [(set (attr "type")
893 (if_then_else (match_test "<Is_float_mode>")
894 (const_string "neon_fp_abs_s<q>")
895 (const_string "neon_abs<q>")))]
896 )
897
898 (define_insn "neg<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
901 "TARGET_NEON"
902 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
903 [(set (attr "type")
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_neg_s<q>")
906 (const_string "neon_neg<q>")))]
907 )
908
909 (define_insn "negdi2_neon"
910 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
911 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
912 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
913 (clobber (reg:CC CC_REGNUM))]
914 "TARGET_NEON"
915 "#"
916 [(set_attr "length" "8")
917 (set_attr "type" "multiple")]
918 )
919
920 ; Split negdi2_neon for vfp registers
921 (define_split
922 [(set (match_operand:DI 0 "s_register_operand" "")
923 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
924 (clobber (match_scratch:DI 2 ""))
925 (clobber (reg:CC CC_REGNUM))]
926 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
927 [(set (match_dup 2) (const_int 0))
928 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
929 (clobber (reg:CC CC_REGNUM))])]
930 {
931 if (!REG_P (operands[2]))
932 operands[2] = operands[0];
933 }
934 )
935
936 ; Split negdi2_neon for core registers
937 (define_split
938 [(set (match_operand:DI 0 "s_register_operand" "")
939 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
940 (clobber (match_scratch:DI 2 ""))
941 (clobber (reg:CC CC_REGNUM))]
942 "TARGET_32BIT && reload_completed
943 && arm_general_register_operand (operands[0], DImode)"
944 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
945 (clobber (reg:CC CC_REGNUM))])]
946 ""
947 )
948
949 (define_insn "<absneg_str><mode>2"
950 [(set (match_operand:VH 0 "s_register_operand" "=w")
951 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
952 "TARGET_NEON_FP16INST"
953 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
954 [(set_attr "type" "neon_abs<q>")]
955 )
956
957 (define_expand "neon_v<absneg_str><mode>"
958 [(set
959 (match_operand:VH 0 "s_register_operand")
960 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
961 "TARGET_NEON_FP16INST"
962 {
963 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
964 DONE;
965 })
966
967 (define_insn "neon_v<fp16_rnd_str><mode>"
968 [(set (match_operand:VH 0 "s_register_operand" "=w")
969 (unspec:VH
970 [(match_operand:VH 1 "s_register_operand" "w")]
971 FP16_RND))]
972 "TARGET_NEON_FP16INST"
973 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
974 [(set_attr "type" "neon_fp_round_s<q>")]
975 )
976
977 (define_insn "neon_vrsqrte<mode>"
978 [(set (match_operand:VH 0 "s_register_operand" "=w")
979 (unspec:VH
980 [(match_operand:VH 1 "s_register_operand" "w")]
981 UNSPEC_VRSQRTE))]
982 "TARGET_NEON_FP16INST"
983 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
984 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
985 )
986
987 (define_insn "*umin<mode>3_neon"
988 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
989 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
990 (match_operand:VDQIW 2 "s_register_operand" "w")))]
991 "TARGET_NEON"
992 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
993 [(set_attr "type" "neon_minmax<q>")]
994 )
995
996 (define_insn "*umax<mode>3_neon"
997 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
998 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
999 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1000 "TARGET_NEON"
1001 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1002 [(set_attr "type" "neon_minmax<q>")]
1003 )
1004
1005 (define_insn "*smin<mode>3_neon"
1006 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1007 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1008 (match_operand:VDQW 2 "s_register_operand" "w")))]
1009 "TARGET_NEON"
1010 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1011 [(set (attr "type")
1012 (if_then_else (match_test "<Is_float_mode>")
1013 (const_string "neon_fp_minmax_s<q>")
1014 (const_string "neon_minmax<q>")))]
1015 )
1016
1017 (define_insn "*smax<mode>3_neon"
1018 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1019 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1020 (match_operand:VDQW 2 "s_register_operand" "w")))]
1021 "TARGET_NEON"
1022 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1023 [(set (attr "type")
1024 (if_then_else (match_test "<Is_float_mode>")
1025 (const_string "neon_fp_minmax_s<q>")
1026 (const_string "neon_minmax<q>")))]
1027 )
1028
1029 ; TODO: V2DI shifts are current disabled because there are bugs in the
1030 ; generic vectorizer code. It ends up creating a V2DI constructor with
1031 ; SImode elements.
1032
1033 (define_insn "vashl<mode>3"
1034 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1035 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1036 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1037 "TARGET_NEON"
1038 {
1039 switch (which_alternative)
1040 {
1041 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1042 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1043 <MODE>mode,
1044 VALID_NEON_QREG_MODE (<MODE>mode),
1045 true);
1046 default: gcc_unreachable ();
1047 }
1048 }
1049 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1050 )
1051
1052 (define_insn "vashr<mode>3_imm"
1053 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1054 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1055 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1056 "TARGET_NEON"
1057 {
1058 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1059 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1060 false);
1061 }
1062 [(set_attr "type" "neon_shift_imm<q>")]
1063 )
1064
1065 (define_insn "vlshr<mode>3_imm"
1066 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1067 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1068 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1069 "TARGET_NEON"
1070 {
1071 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1072 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1073 false);
1074 }
1075 [(set_attr "type" "neon_shift_imm<q>")]
1076 )
1077
1078 ; Used for implementing logical shift-right, which is a left-shift by a negative
1079 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1080 ; above, but using an unspec in case GCC tries anything tricky with negative
1081 ; shift amounts.
1082
1083 (define_insn "ashl<mode>3_signed"
1084 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1085 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1086 (match_operand:VDQI 2 "s_register_operand" "w")]
1087 UNSPEC_ASHIFT_SIGNED))]
1088 "TARGET_NEON"
1089 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1090 [(set_attr "type" "neon_shift_reg<q>")]
1091 )
1092
1093 ; Used for implementing logical shift-right, which is a left-shift by a negative
1094 ; amount, with unsigned operands.
1095
1096 (define_insn "ashl<mode>3_unsigned"
1097 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1098 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1099 (match_operand:VDQI 2 "s_register_operand" "w")]
1100 UNSPEC_ASHIFT_UNSIGNED))]
1101 "TARGET_NEON"
1102 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1103 [(set_attr "type" "neon_shift_reg<q>")]
1104 )
1105
1106 (define_expand "vashr<mode>3"
1107 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1108 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1109 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1110 "TARGET_NEON"
1111 {
1112 if (s_register_operand (operands[2], <MODE>mode))
1113 {
1114 rtx neg = gen_reg_rtx (<MODE>mode);
1115 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1116 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1117 }
1118 else
1119 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1120 DONE;
1121 })
1122
1123 (define_expand "vlshr<mode>3"
1124 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1125 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1126 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1127 "TARGET_NEON"
1128 {
1129 if (s_register_operand (operands[2], <MODE>mode))
1130 {
1131 rtx neg = gen_reg_rtx (<MODE>mode);
1132 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1133 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1134 }
1135 else
1136 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1137 DONE;
1138 })
1139
1140 ;; 64-bit shifts
1141
1142 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1143 ;; leaving the upper half uninitalized. This is OK since the shift
1144 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1145 ;; data flow analysis however, we pretend the full register is set
1146 ;; using an unspec.
1147 (define_insn "neon_load_count"
1148 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1149 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1150 UNSPEC_LOAD_COUNT))]
1151 "TARGET_NEON"
1152 "@
1153 vld1.32\t{%P0[0]}, %A1
1154 vmov.32\t%P0[0], %1"
1155 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1156 )
1157
1158 (define_insn "ashldi3_neon_noclobber"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1161 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1162 "TARGET_NEON && reload_completed
1163 && (!CONST_INT_P (operands[2])
1164 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1165 "@
1166 vshl.u64\t%P0, %P1, %2
1167 vshl.u64\t%P0, %P1, %P2"
1168 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1169 )
1170
1171 (define_insn_and_split "ashldi3_neon"
1172 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1173 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1174 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1175 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1176 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1177 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1178 (clobber (reg:CC_C CC_REGNUM))]
1179 "TARGET_NEON"
1180 "#"
1181 "TARGET_NEON && reload_completed"
1182 [(const_int 0)]
1183 "
1184 {
1185 if (IS_VFP_REGNUM (REGNO (operands[0])))
1186 {
1187 if (CONST_INT_P (operands[2]))
1188 {
1189 if (INTVAL (operands[2]) < 1)
1190 {
1191 emit_insn (gen_movdi (operands[0], operands[1]));
1192 DONE;
1193 }
1194 else if (INTVAL (operands[2]) > 63)
1195 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1196 }
1197 else
1198 {
1199 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1200 operands[2] = operands[5];
1201 }
1202
1203 /* Ditch the unnecessary clobbers. */
1204 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1205 operands[2]));
1206 }
1207 else
1208 {
1209 /* The shift expanders support either full overlap or no overlap. */
1210 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1211 || REGNO (operands[0]) == REGNO (operands[1]));
1212
1213 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1214 operands[2], operands[3], operands[4]);
1215 }
1216 DONE;
1217 }"
1218 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1219 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1220 (set_attr "type" "multiple")]
1221 )
1222
1223 ; The shift amount needs to be negated for right-shifts
1224 (define_insn "signed_shift_di3_neon"
1225 [(set (match_operand:DI 0 "s_register_operand" "=w")
1226 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1227 (match_operand:DI 2 "s_register_operand" " w")]
1228 UNSPEC_ASHIFT_SIGNED))]
1229 "TARGET_NEON && reload_completed"
1230 "vshl.s64\t%P0, %P1, %P2"
1231 [(set_attr "type" "neon_shift_reg")]
1232 )
1233
1234 ; The shift amount needs to be negated for right-shifts
1235 (define_insn "unsigned_shift_di3_neon"
1236 [(set (match_operand:DI 0 "s_register_operand" "=w")
1237 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1238 (match_operand:DI 2 "s_register_operand" " w")]
1239 UNSPEC_ASHIFT_UNSIGNED))]
1240 "TARGET_NEON && reload_completed"
1241 "vshl.u64\t%P0, %P1, %P2"
1242 [(set_attr "type" "neon_shift_reg")]
1243 )
1244
1245 (define_insn "ashrdi3_neon_imm_noclobber"
1246 [(set (match_operand:DI 0 "s_register_operand" "=w")
1247 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1248 (match_operand:DI 2 "const_int_operand" " i")))]
1249 "TARGET_NEON && reload_completed
1250 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1251 "vshr.s64\t%P0, %P1, %2"
1252 [(set_attr "type" "neon_shift_imm")]
1253 )
1254
1255 (define_insn "lshrdi3_neon_imm_noclobber"
1256 [(set (match_operand:DI 0 "s_register_operand" "=w")
1257 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1258 (match_operand:DI 2 "const_int_operand" " i")))]
1259 "TARGET_NEON && reload_completed
1260 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1261 "vshr.u64\t%P0, %P1, %2"
1262 [(set_attr "type" "neon_shift_imm")]
1263 )
1264
1265 ;; ashrdi3_neon
1266 ;; lshrdi3_neon
1267 (define_insn_and_split "<shift>di3_neon"
1268 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1269 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1270 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1271 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1272 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1273 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1274 (clobber (reg:CC CC_REGNUM))]
1275 "TARGET_NEON"
1276 "#"
1277 "TARGET_NEON && reload_completed"
1278 [(const_int 0)]
1279 "
1280 {
1281 if (IS_VFP_REGNUM (REGNO (operands[0])))
1282 {
1283 if (CONST_INT_P (operands[2]))
1284 {
1285 if (INTVAL (operands[2]) < 1)
1286 {
1287 emit_insn (gen_movdi (operands[0], operands[1]));
1288 DONE;
1289 }
1290 else if (INTVAL (operands[2]) > 64)
1291 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1292
1293 /* Ditch the unnecessary clobbers. */
1294 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1295 operands[1],
1296 operands[2]));
1297 }
1298 else
1299 {
1300 /* We must use a negative left-shift. */
1301 emit_insn (gen_negsi2 (operands[3], operands[2]));
1302 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1303 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1304 operands[5]));
1305 }
1306 }
1307 else
1308 {
1309 /* The shift expanders support either full overlap or no overlap. */
1310 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1311 || REGNO (operands[0]) == REGNO (operands[1]));
1312
1313 /* This clobbers CC (ASHIFTRT by register only). */
1314 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1315 operands[2], operands[3], operands[4]);
1316 }
1317
1318 DONE;
1319 }"
1320 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1321 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1322 (set_attr "type" "multiple")]
1323 )
1324
1325 ;; Widening operations
1326
1327 (define_expand "widen_ssum<mode>3"
1328 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1329 (plus:<V_double_width>
1330 (sign_extend:<V_double_width>
1331 (match_operand:VQI 1 "s_register_operand" ""))
1332 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1333 "TARGET_NEON"
1334 {
1335 machine_mode mode = GET_MODE (operands[1]);
1336 rtx p1, p2;
1337
1338 p1 = arm_simd_vect_par_cnst_half (mode, false);
1339 p2 = arm_simd_vect_par_cnst_half (mode, true);
1340
1341 if (operands[0] != operands[2])
1342 emit_move_insn (operands[0], operands[2]);
1343
1344 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1345 operands[1],
1346 p1,
1347 operands[0]));
1348 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1349 operands[1],
1350 p2,
1351 operands[0]));
1352 DONE;
1353 }
1354 )
1355
1356 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1357 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1358 (plus:<V_double_width>
1359 (sign_extend:<V_double_width>
1360 (vec_select:<V_HALF>
1361 (match_operand:VQI 1 "s_register_operand" "%w")
1362 (match_operand:VQI 2 "vect_par_constant_low" "")))
1363 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1364 "TARGET_NEON"
1365 {
1366 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1367 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1368 }
1369 [(set_attr "type" "neon_add_widen")])
1370
1371 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1372 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1373 (plus:<V_double_width>
1374 (sign_extend:<V_double_width>
1375 (vec_select:<V_HALF>
1376 (match_operand:VQI 1 "s_register_operand" "%w")
1377 (match_operand:VQI 2 "vect_par_constant_high" "")))
1378 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1379 "TARGET_NEON"
1380 {
1381 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1382 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1383 }
1384 [(set_attr "type" "neon_add_widen")])
1385
1386 (define_insn "widen_ssum<mode>3"
1387 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1388 (plus:<V_widen>
1389 (sign_extend:<V_widen>
1390 (match_operand:VW 1 "s_register_operand" "%w"))
1391 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1392 "TARGET_NEON"
1393 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1394 [(set_attr "type" "neon_add_widen")]
1395 )
1396
1397 (define_expand "widen_usum<mode>3"
1398 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1399 (plus:<V_double_width>
1400 (zero_extend:<V_double_width>
1401 (match_operand:VQI 1 "s_register_operand" ""))
1402 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1403 "TARGET_NEON"
1404 {
1405 machine_mode mode = GET_MODE (operands[1]);
1406 rtx p1, p2;
1407
1408 p1 = arm_simd_vect_par_cnst_half (mode, false);
1409 p2 = arm_simd_vect_par_cnst_half (mode, true);
1410
1411 if (operands[0] != operands[2])
1412 emit_move_insn (operands[0], operands[2]);
1413
1414 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1415 operands[1],
1416 p1,
1417 operands[0]));
1418 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1419 operands[1],
1420 p2,
1421 operands[0]));
1422 DONE;
1423 }
1424 )
1425
1426 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1427 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1428 (plus:<V_double_width>
1429 (zero_extend:<V_double_width>
1430 (vec_select:<V_HALF>
1431 (match_operand:VQI 1 "s_register_operand" "%w")
1432 (match_operand:VQI 2 "vect_par_constant_low" "")))
1433 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1434 "TARGET_NEON"
1435 {
1436 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1437 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1438 }
1439 [(set_attr "type" "neon_add_widen")])
1440
1441 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1442 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1443 (plus:<V_double_width>
1444 (zero_extend:<V_double_width>
1445 (vec_select:<V_HALF>
1446 (match_operand:VQI 1 "s_register_operand" "%w")
1447 (match_operand:VQI 2 "vect_par_constant_high" "")))
1448 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1449 "TARGET_NEON"
1450 {
1451 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1452 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1453 }
1454 [(set_attr "type" "neon_add_widen")])
1455
1456 (define_insn "widen_usum<mode>3"
1457 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1458 (plus:<V_widen> (zero_extend:<V_widen>
1459 (match_operand:VW 1 "s_register_operand" "%w"))
1460 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1461 "TARGET_NEON"
1462 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1463 [(set_attr "type" "neon_add_widen")]
1464 )
1465
1466 ;; Helpers for quad-word reduction operations
1467
1468 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1469 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1470 ; N/2-element vector.
1471
1472 (define_insn "quad_halves_<code>v4si"
1473 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1474 (VQH_OPS:V2SI
1475 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1476 (parallel [(const_int 0) (const_int 1)]))
1477 (vec_select:V2SI (match_dup 1)
1478 (parallel [(const_int 2) (const_int 3)]))))]
1479 "TARGET_NEON"
1480 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1481 [(set_attr "vqh_mnem" "<VQH_mnem>")
1482 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1483 )
1484
1485 (define_insn "quad_halves_<code>v4sf"
1486 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1487 (VQHS_OPS:V2SF
1488 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1489 (parallel [(const_int 0) (const_int 1)]))
1490 (vec_select:V2SF (match_dup 1)
1491 (parallel [(const_int 2) (const_int 3)]))))]
1492 "TARGET_NEON && flag_unsafe_math_optimizations"
1493 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1494 [(set_attr "vqh_mnem" "<VQH_mnem>")
1495 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1496 )
1497
1498 (define_insn "quad_halves_<code>v8hi"
1499 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1500 (VQH_OPS:V4HI
1501 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1502 (parallel [(const_int 0) (const_int 1)
1503 (const_int 2) (const_int 3)]))
1504 (vec_select:V4HI (match_dup 1)
1505 (parallel [(const_int 4) (const_int 5)
1506 (const_int 6) (const_int 7)]))))]
1507 "TARGET_NEON"
1508 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1509 [(set_attr "vqh_mnem" "<VQH_mnem>")
1510 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1511 )
1512
1513 (define_insn "quad_halves_<code>v16qi"
1514 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1515 (VQH_OPS:V8QI
1516 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1517 (parallel [(const_int 0) (const_int 1)
1518 (const_int 2) (const_int 3)
1519 (const_int 4) (const_int 5)
1520 (const_int 6) (const_int 7)]))
1521 (vec_select:V8QI (match_dup 1)
1522 (parallel [(const_int 8) (const_int 9)
1523 (const_int 10) (const_int 11)
1524 (const_int 12) (const_int 13)
1525 (const_int 14) (const_int 15)]))))]
1526 "TARGET_NEON"
1527 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1528 [(set_attr "vqh_mnem" "<VQH_mnem>")
1529 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1530 )
1531
1532 (define_expand "move_hi_quad_<mode>"
1533 [(match_operand:ANY128 0 "s_register_operand" "")
1534 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1535 "TARGET_NEON"
1536 {
1537 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1538 GET_MODE_SIZE (<V_HALF>mode)),
1539 operands[1]);
1540 DONE;
1541 })
1542
1543 (define_expand "move_lo_quad_<mode>"
1544 [(match_operand:ANY128 0 "s_register_operand" "")
1545 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1546 "TARGET_NEON"
1547 {
1548 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1549 <MODE>mode, 0),
1550 operands[1]);
1551 DONE;
1552 })
1553
1554 ;; Reduction operations
1555
1556 (define_expand "reduc_plus_scal_<mode>"
1557 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1558 (match_operand:VD 1 "s_register_operand" "")]
1559 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1560 {
1561 rtx vec = gen_reg_rtx (<MODE>mode);
1562 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1563 &gen_neon_vpadd_internal<mode>);
1564 /* The same result is actually computed into every element. */
1565 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1566 DONE;
1567 })
1568
1569 (define_expand "reduc_plus_scal_<mode>"
1570 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1571 (match_operand:VQ 1 "s_register_operand" "")]
1572 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1573 && !BYTES_BIG_ENDIAN"
1574 {
1575 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1576
1577 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1578 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1579
1580 DONE;
1581 })
1582
1583 (define_expand "reduc_plus_scal_v2di"
1584 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1585 (match_operand:V2DI 1 "s_register_operand" "")]
1586 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1587 {
1588 rtx vec = gen_reg_rtx (V2DImode);
1589
1590 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1591 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1592
1593 DONE;
1594 })
1595
1596 (define_insn "arm_reduc_plus_internal_v2di"
1597 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1598 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1599 UNSPEC_VPADD))]
1600 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1601 "vadd.i64\t%e0, %e1, %f1"
1602 [(set_attr "type" "neon_add_q")]
1603 )
1604
1605 (define_expand "reduc_smin_scal_<mode>"
1606 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1607 (match_operand:VD 1 "s_register_operand" "")]
1608 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1609 {
1610 rtx vec = gen_reg_rtx (<MODE>mode);
1611
1612 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1613 &gen_neon_vpsmin<mode>);
1614 /* The result is computed into every element of the vector. */
1615 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1616 DONE;
1617 })
1618
1619 (define_expand "reduc_smin_scal_<mode>"
1620 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1621 (match_operand:VQ 1 "s_register_operand" "")]
1622 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1623 && !BYTES_BIG_ENDIAN"
1624 {
1625 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1626
1627 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1628 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1629
1630 DONE;
1631 })
1632
1633 (define_expand "reduc_smax_scal_<mode>"
1634 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1635 (match_operand:VD 1 "s_register_operand" "")]
1636 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1637 {
1638 rtx vec = gen_reg_rtx (<MODE>mode);
1639 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1640 &gen_neon_vpsmax<mode>);
1641 /* The result is computed into every element of the vector. */
1642 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1643 DONE;
1644 })
1645
1646 (define_expand "reduc_smax_scal_<mode>"
1647 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1648 (match_operand:VQ 1 "s_register_operand" "")]
1649 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1650 && !BYTES_BIG_ENDIAN"
1651 {
1652 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1653
1654 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1655 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1656
1657 DONE;
1658 })
1659
1660 (define_expand "reduc_umin_scal_<mode>"
1661 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1662 (match_operand:VDI 1 "s_register_operand" "")]
1663 "TARGET_NEON"
1664 {
1665 rtx vec = gen_reg_rtx (<MODE>mode);
1666 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1667 &gen_neon_vpumin<mode>);
1668 /* The result is computed into every element of the vector. */
1669 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1670 DONE;
1671 })
1672
1673 (define_expand "reduc_umin_scal_<mode>"
1674 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1675 (match_operand:VQI 1 "s_register_operand" "")]
1676 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1677 {
1678 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1679
1680 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1681 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1682
1683 DONE;
1684 })
1685
1686 (define_expand "reduc_umax_scal_<mode>"
1687 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1688 (match_operand:VDI 1 "s_register_operand" "")]
1689 "TARGET_NEON"
1690 {
1691 rtx vec = gen_reg_rtx (<MODE>mode);
1692 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1693 &gen_neon_vpumax<mode>);
1694 /* The result is computed into every element of the vector. */
1695 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1696 DONE;
1697 })
1698
1699 (define_expand "reduc_umax_scal_<mode>"
1700 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1701 (match_operand:VQI 1 "s_register_operand" "")]
1702 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1703 {
1704 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1705
1706 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1707 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1708
1709 DONE;
1710 })
1711
1712 (define_insn "neon_vpadd_internal<mode>"
1713 [(set (match_operand:VD 0 "s_register_operand" "=w")
1714 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1715 (match_operand:VD 2 "s_register_operand" "w")]
1716 UNSPEC_VPADD))]
1717 "TARGET_NEON"
1718 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1719 ;; Assume this schedules like vadd.
1720 [(set (attr "type")
1721 (if_then_else (match_test "<Is_float_mode>")
1722 (const_string "neon_fp_reduc_add_s<q>")
1723 (const_string "neon_reduc_add<q>")))]
1724 )
1725
1726 (define_insn "neon_vpaddv4hf"
1727 [(set
1728 (match_operand:V4HF 0 "s_register_operand" "=w")
1729 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1730 (match_operand:V4HF 2 "s_register_operand" "w")]
1731 UNSPEC_VPADD))]
1732 "TARGET_NEON_FP16INST"
1733 "vpadd.f16\t%P0, %P1, %P2"
1734 [(set_attr "type" "neon_reduc_add")]
1735 )
1736
1737 (define_insn "neon_vpsmin<mode>"
1738 [(set (match_operand:VD 0 "s_register_operand" "=w")
1739 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1740 (match_operand:VD 2 "s_register_operand" "w")]
1741 UNSPEC_VPSMIN))]
1742 "TARGET_NEON"
1743 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1744 [(set (attr "type")
1745 (if_then_else (match_test "<Is_float_mode>")
1746 (const_string "neon_fp_reduc_minmax_s<q>")
1747 (const_string "neon_reduc_minmax<q>")))]
1748 )
1749
1750 (define_insn "neon_vpsmax<mode>"
1751 [(set (match_operand:VD 0 "s_register_operand" "=w")
1752 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1753 (match_operand:VD 2 "s_register_operand" "w")]
1754 UNSPEC_VPSMAX))]
1755 "TARGET_NEON"
1756 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1757 [(set (attr "type")
1758 (if_then_else (match_test "<Is_float_mode>")
1759 (const_string "neon_fp_reduc_minmax_s<q>")
1760 (const_string "neon_reduc_minmax<q>")))]
1761 )
1762
1763 (define_insn "neon_vpumin<mode>"
1764 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1765 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1766 (match_operand:VDI 2 "s_register_operand" "w")]
1767 UNSPEC_VPUMIN))]
1768 "TARGET_NEON"
1769 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1770 [(set_attr "type" "neon_reduc_minmax<q>")]
1771 )
1772
1773 (define_insn "neon_vpumax<mode>"
1774 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1775 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1776 (match_operand:VDI 2 "s_register_operand" "w")]
1777 UNSPEC_VPUMAX))]
1778 "TARGET_NEON"
1779 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1780 [(set_attr "type" "neon_reduc_minmax<q>")]
1781 )
1782
1783 ;; Saturating arithmetic
1784
1785 ; NOTE: Neon supports many more saturating variants of instructions than the
1786 ; following, but these are all GCC currently understands.
1787 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1788 ; yet either, although these patterns may be used by intrinsics when they're
1789 ; added.
1790
1791 (define_insn "*ss_add<mode>_neon"
1792 [(set (match_operand:VD 0 "s_register_operand" "=w")
1793 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1794 (match_operand:VD 2 "s_register_operand" "w")))]
1795 "TARGET_NEON"
1796 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1797 [(set_attr "type" "neon_qadd<q>")]
1798 )
1799
1800 (define_insn "*us_add<mode>_neon"
1801 [(set (match_operand:VD 0 "s_register_operand" "=w")
1802 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1803 (match_operand:VD 2 "s_register_operand" "w")))]
1804 "TARGET_NEON"
1805 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1806 [(set_attr "type" "neon_qadd<q>")]
1807 )
1808
1809 (define_insn "*ss_sub<mode>_neon"
1810 [(set (match_operand:VD 0 "s_register_operand" "=w")
1811 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1812 (match_operand:VD 2 "s_register_operand" "w")))]
1813 "TARGET_NEON"
1814 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1815 [(set_attr "type" "neon_qsub<q>")]
1816 )
1817
1818 (define_insn "*us_sub<mode>_neon"
1819 [(set (match_operand:VD 0 "s_register_operand" "=w")
1820 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1821 (match_operand:VD 2 "s_register_operand" "w")))]
1822 "TARGET_NEON"
1823 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1824 [(set_attr "type" "neon_qsub<q>")]
1825 )
1826
1827 ;; Conditional instructions. These are comparisons with conditional moves for
1828 ;; vectors. They perform the assignment:
1829 ;;
1830 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1831 ;;
1832 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1833 ;; element-wise.
1834
1835 (define_expand "vcond<mode><mode>"
1836 [(set (match_operand:VDQW 0 "s_register_operand" "")
1837 (if_then_else:VDQW
1838 (match_operator 3 "comparison_operator"
1839 [(match_operand:VDQW 4 "s_register_operand" "")
1840 (match_operand:VDQW 5 "nonmemory_operand" "")])
1841 (match_operand:VDQW 1 "s_register_operand" "")
1842 (match_operand:VDQW 2 "s_register_operand" "")))]
1843 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1844 {
1845 int inverse = 0;
1846 int use_zero_form = 0;
1847 int swap_bsl_operands = 0;
1848 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1849 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1850
1851 rtx (*base_comparison) (rtx, rtx, rtx);
1852 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1853
1854 switch (GET_CODE (operands[3]))
1855 {
1856 case GE:
1857 case GT:
1858 case LE:
1859 case LT:
1860 case EQ:
1861 if (operands[5] == CONST0_RTX (<MODE>mode))
1862 {
1863 use_zero_form = 1;
1864 break;
1865 }
1866 /* Fall through. */
1867 default:
1868 if (!REG_P (operands[5]))
1869 operands[5] = force_reg (<MODE>mode, operands[5]);
1870 }
1871
1872 switch (GET_CODE (operands[3]))
1873 {
1874 case LT:
1875 case UNLT:
1876 inverse = 1;
1877 /* Fall through. */
1878 case GE:
1879 case UNGE:
1880 case ORDERED:
1881 case UNORDERED:
1882 base_comparison = gen_neon_vcge<mode>;
1883 complimentary_comparison = gen_neon_vcgt<mode>;
1884 break;
1885 case LE:
1886 case UNLE:
1887 inverse = 1;
1888 /* Fall through. */
1889 case GT:
1890 case UNGT:
1891 base_comparison = gen_neon_vcgt<mode>;
1892 complimentary_comparison = gen_neon_vcge<mode>;
1893 break;
1894 case EQ:
1895 case NE:
1896 case UNEQ:
1897 base_comparison = gen_neon_vceq<mode>;
1898 complimentary_comparison = gen_neon_vceq<mode>;
1899 break;
1900 default:
1901 gcc_unreachable ();
1902 }
1903
1904 switch (GET_CODE (operands[3]))
1905 {
1906 case LT:
1907 case LE:
1908 case GT:
1909 case GE:
1910 case EQ:
1911 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1912 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1913 a GE b -> a GE b
1914 a GT b -> a GT b
1915 a LE b -> b GE a
1916 a LT b -> b GT a
1917 a EQ b -> a EQ b
1918 Note that there also exist direct comparison against 0 forms,
1919 so catch those as a special case. */
1920 if (use_zero_form)
1921 {
1922 inverse = 0;
1923 switch (GET_CODE (operands[3]))
1924 {
1925 case LT:
1926 base_comparison = gen_neon_vclt<mode>;
1927 break;
1928 case LE:
1929 base_comparison = gen_neon_vcle<mode>;
1930 break;
1931 default:
1932 /* Do nothing, other zero form cases already have the correct
1933 base_comparison. */
1934 break;
1935 }
1936 }
1937
1938 if (!inverse)
1939 emit_insn (base_comparison (mask, operands[4], operands[5]));
1940 else
1941 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1942 break;
1943 case UNLT:
1944 case UNLE:
1945 case UNGT:
1946 case UNGE:
1947 case NE:
1948 /* Vector compare returns false for lanes which are unordered, so if we use
1949 the inverse of the comparison we actually want to emit, then
1950 swap the operands to BSL, we will end up with the correct result.
1951 Note that a NE NaN and NaN NE b are true for all a, b.
1952
1953 Our transformations are:
1954 a GE b -> !(b GT a)
1955 a GT b -> !(b GE a)
1956 a LE b -> !(a GT b)
1957 a LT b -> !(a GE b)
1958 a NE b -> !(a EQ b) */
1959
1960 if (inverse)
1961 emit_insn (base_comparison (mask, operands[4], operands[5]));
1962 else
1963 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1964
1965 swap_bsl_operands = 1;
1966 break;
1967 case UNEQ:
1968 /* We check (a > b || b > a). combining these comparisons give us
1969 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1970 will then give us (a == b || a UNORDERED b) as intended. */
1971
1972 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1973 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1974 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1975 swap_bsl_operands = 1;
1976 break;
1977 case UNORDERED:
1978 /* Operands are ORDERED iff (a > b || b >= a).
1979 Swapping the operands to BSL will give the UNORDERED case. */
1980 swap_bsl_operands = 1;
1981 /* Fall through. */
1982 case ORDERED:
1983 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1984 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1985 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1986 break;
1987 default:
1988 gcc_unreachable ();
1989 }
1990
1991 if (swap_bsl_operands)
1992 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1993 operands[1]));
1994 else
1995 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1996 operands[2]));
1997 DONE;
1998 })
1999
2000 (define_expand "vcondu<mode><mode>"
2001 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2002 (if_then_else:VDQIW
2003 (match_operator 3 "arm_comparison_operator"
2004 [(match_operand:VDQIW 4 "s_register_operand" "")
2005 (match_operand:VDQIW 5 "s_register_operand" "")])
2006 (match_operand:VDQIW 1 "s_register_operand" "")
2007 (match_operand:VDQIW 2 "s_register_operand" "")))]
2008 "TARGET_NEON"
2009 {
2010 rtx mask;
2011 int inverse = 0, immediate_zero = 0;
2012
2013 mask = gen_reg_rtx (<V_cmp_result>mode);
2014
2015 if (operands[5] == CONST0_RTX (<MODE>mode))
2016 immediate_zero = 1;
2017 else if (!REG_P (operands[5]))
2018 operands[5] = force_reg (<MODE>mode, operands[5]);
2019
2020 switch (GET_CODE (operands[3]))
2021 {
2022 case GEU:
2023 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2024 break;
2025
2026 case GTU:
2027 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2028 break;
2029
2030 case EQ:
2031 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2032 break;
2033
2034 case LEU:
2035 if (immediate_zero)
2036 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2037 else
2038 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2039 break;
2040
2041 case LTU:
2042 if (immediate_zero)
2043 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2044 else
2045 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2046 break;
2047
2048 case NE:
2049 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2050 inverse = 1;
2051 break;
2052
2053 default:
2054 gcc_unreachable ();
2055 }
2056
2057 if (inverse)
2058 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2059 operands[1]));
2060 else
2061 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2062 operands[2]));
2063
2064 DONE;
2065 })
2066
2067 ;; Patterns for builtins.
2068
2069 ; good for plain vadd, vaddq.
2070
2071 (define_expand "neon_vadd<mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2073 (match_operand:VCVTF 1 "s_register_operand" "w")
2074 (match_operand:VCVTF 2 "s_register_operand" "w")]
2075 "TARGET_NEON"
2076 {
2077 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2078 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2079 else
2080 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2081 operands[2]));
2082 DONE;
2083 })
2084
2085 (define_expand "neon_vadd<mode>"
2086 [(match_operand:VH 0 "s_register_operand")
2087 (match_operand:VH 1 "s_register_operand")
2088 (match_operand:VH 2 "s_register_operand")]
2089 "TARGET_NEON_FP16INST"
2090 {
2091 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2092 DONE;
2093 })
2094
2095 (define_expand "neon_vsub<mode>"
2096 [(match_operand:VH 0 "s_register_operand")
2097 (match_operand:VH 1 "s_register_operand")
2098 (match_operand:VH 2 "s_register_operand")]
2099 "TARGET_NEON_FP16INST"
2100 {
2101 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2102 DONE;
2103 })
2104
2105 ; Note that NEON operations don't support the full IEEE 754 standard: in
2106 ; particular, denormal values are flushed to zero. This means that GCC cannot
2107 ; use those instructions for autovectorization, etc. unless
2108 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2109 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2110 ; header) must work in either case: if -funsafe-math-optimizations is given,
2111 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2112 ; expand to unspecs (which may potentially limit the extent to which they might
2113 ; be optimized by generic code).
2114
2115 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2116
2117 (define_insn "neon_vadd<mode>_unspec"
2118 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2119 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2120 (match_operand:VCVTF 2 "s_register_operand" "w")]
2121 UNSPEC_VADD))]
2122 "TARGET_NEON"
2123 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2124 [(set (attr "type")
2125 (if_then_else (match_test "<Is_float_mode>")
2126 (const_string "neon_fp_addsub_s<q>")
2127 (const_string "neon_add<q>")))]
2128 )
2129
2130 (define_insn "neon_vaddl<sup><mode>"
2131 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2132 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2133 (match_operand:VDI 2 "s_register_operand" "w")]
2134 VADDL))]
2135 "TARGET_NEON"
2136 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2137 [(set_attr "type" "neon_add_long")]
2138 )
2139
2140 (define_insn "neon_vaddw<sup><mode>"
2141 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2142 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2143 (match_operand:VDI 2 "s_register_operand" "w")]
2144 VADDW))]
2145 "TARGET_NEON"
2146 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2147 [(set_attr "type" "neon_add_widen")]
2148 )
2149
2150 ; vhadd and vrhadd.
2151
2152 (define_insn "neon_v<r>hadd<sup><mode>"
2153 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2154 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2155 (match_operand:VDQIW 2 "s_register_operand" "w")]
2156 VHADD))]
2157 "TARGET_NEON"
2158 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2159 [(set_attr "type" "neon_add_halve_q")]
2160 )
2161
2162 (define_insn "neon_vqadd<sup><mode>"
2163 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2164 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2165 (match_operand:VDQIX 2 "s_register_operand" "w")]
2166 VQADD))]
2167 "TARGET_NEON"
2168 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2169 [(set_attr "type" "neon_qadd<q>")]
2170 )
2171
2172 (define_insn "neon_v<r>addhn<mode>"
2173 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2174 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2175 (match_operand:VN 2 "s_register_operand" "w")]
2176 VADDHN))]
2177 "TARGET_NEON"
2178 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2179 [(set_attr "type" "neon_add_halve_narrow_q")]
2180 )
2181
2182 ;; Polynomial and Float multiplication.
2183 (define_insn "neon_vmul<pf><mode>"
2184 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2185 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2186 (match_operand:VPF 2 "s_register_operand" "w")]
2187 UNSPEC_VMUL))]
2188 "TARGET_NEON"
2189 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2190 [(set (attr "type")
2191 (if_then_else (match_test "<Is_float_mode>")
2192 (const_string "neon_fp_mul_s<q>")
2193 (const_string "neon_mul_<V_elem_ch><q>")))]
2194 )
2195
2196 (define_insn "mul<mode>3"
2197 [(set
2198 (match_operand:VH 0 "s_register_operand" "=w")
2199 (mult:VH
2200 (match_operand:VH 1 "s_register_operand" "w")
2201 (match_operand:VH 2 "s_register_operand" "w")))]
2202 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2203 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2204 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2205 )
2206
2207 (define_insn "neon_vmulf<mode>"
2208 [(set
2209 (match_operand:VH 0 "s_register_operand" "=w")
2210 (mult:VH
2211 (match_operand:VH 1 "s_register_operand" "w")
2212 (match_operand:VH 2 "s_register_operand" "w")))]
2213 "TARGET_NEON_FP16INST"
2214 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2215 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2216 )
2217
2218 (define_expand "neon_vmla<mode>"
2219 [(match_operand:VDQW 0 "s_register_operand" "=w")
2220 (match_operand:VDQW 1 "s_register_operand" "0")
2221 (match_operand:VDQW 2 "s_register_operand" "w")
2222 (match_operand:VDQW 3 "s_register_operand" "w")]
2223 "TARGET_NEON"
2224 {
2225 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2226 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2227 operands[2], operands[3]));
2228 else
2229 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2230 operands[2], operands[3]));
2231 DONE;
2232 })
2233
2234 (define_expand "neon_vfma<VCVTF:mode>"
2235 [(match_operand:VCVTF 0 "s_register_operand")
2236 (match_operand:VCVTF 1 "s_register_operand")
2237 (match_operand:VCVTF 2 "s_register_operand")
2238 (match_operand:VCVTF 3 "s_register_operand")]
2239 "TARGET_NEON && TARGET_FMA"
2240 {
2241 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2242 operands[1]));
2243 DONE;
2244 })
2245
2246 (define_expand "neon_vfma<VH:mode>"
2247 [(match_operand:VH 0 "s_register_operand")
2248 (match_operand:VH 1 "s_register_operand")
2249 (match_operand:VH 2 "s_register_operand")
2250 (match_operand:VH 3 "s_register_operand")]
2251 "TARGET_NEON_FP16INST"
2252 {
2253 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2254 operands[1]));
2255 DONE;
2256 })
2257
2258 (define_expand "neon_vfms<VCVTF:mode>"
2259 [(match_operand:VCVTF 0 "s_register_operand")
2260 (match_operand:VCVTF 1 "s_register_operand")
2261 (match_operand:VCVTF 2 "s_register_operand")
2262 (match_operand:VCVTF 3 "s_register_operand")]
2263 "TARGET_NEON && TARGET_FMA"
2264 {
2265 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2266 operands[1]));
2267 DONE;
2268 })
2269
2270 (define_expand "neon_vfms<VH:mode>"
2271 [(match_operand:VH 0 "s_register_operand")
2272 (match_operand:VH 1 "s_register_operand")
2273 (match_operand:VH 2 "s_register_operand")
2274 (match_operand:VH 3 "s_register_operand")]
2275 "TARGET_NEON_FP16INST"
2276 {
2277 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2278 operands[1]));
2279 DONE;
2280 })
2281
2282 ;; The expand RTL structure here is not important.
2283 ;; We use the gen_* functions anyway.
2284 ;; We just need something to wrap the iterators around.
2285
2286 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2287 [(set (match_operand:VCVTF 0 "s_register_operand")
2288 (unspec:VCVTF
2289 [(match_operand:VCVTF 1 "s_register_operand")
2290 (PLUSMINUS:<VFML>
2291 (match_operand:<VFML> 2 "s_register_operand")
2292 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2293 "TARGET_FP16FML"
2294 {
2295 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2296 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2297 operands[1],
2298 operands[2],
2299 operands[3],
2300 half, half));
2301 DONE;
2302 })
2303
2304 (define_insn "vfmal_low<mode>_intrinsic"
2305 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2306 (fma:VCVTF
2307 (float_extend:VCVTF
2308 (vec_select:<VFMLSEL>
2309 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2310 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2311 (float_extend:VCVTF
2312 (vec_select:<VFMLSEL>
2313 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2314 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2315 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2316 "TARGET_FP16FML"
2317 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2318 [(set_attr "type" "neon_fp_mla_s<q>")]
2319 )
2320
2321 (define_insn "vfmsl_high<mode>_intrinsic"
2322 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2323 (fma:VCVTF
2324 (float_extend:VCVTF
2325 (neg:<VFMLSEL>
2326 (vec_select:<VFMLSEL>
2327 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2328 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2329 (float_extend:VCVTF
2330 (vec_select:<VFMLSEL>
2331 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2332 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2333 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2334 "TARGET_FP16FML"
2335 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2336 [(set_attr "type" "neon_fp_mla_s<q>")]
2337 )
2338
2339 (define_insn "vfmal_high<mode>_intrinsic"
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2341 (fma:VCVTF
2342 (float_extend:VCVTF
2343 (vec_select:<VFMLSEL>
2344 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2345 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2346 (float_extend:VCVTF
2347 (vec_select:<VFMLSEL>
2348 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2349 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2350 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2351 "TARGET_FP16FML"
2352 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2353 [(set_attr "type" "neon_fp_mla_s<q>")]
2354 )
2355
2356 (define_insn "vfmsl_low<mode>_intrinsic"
2357 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2358 (fma:VCVTF
2359 (float_extend:VCVTF
2360 (neg:<VFMLSEL>
2361 (vec_select:<VFMLSEL>
2362 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2363 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2364 (float_extend:VCVTF
2365 (vec_select:<VFMLSEL>
2366 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2367 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2368 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2369 "TARGET_FP16FML"
2370 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2371 [(set_attr "type" "neon_fp_mla_s<q>")]
2372 )
2373
2374 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2375 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2376 (unspec:VCVTF
2377 [(match_operand:VCVTF 1 "s_register_operand")
2378 (PLUSMINUS:<VFML>
2379 (match_operand:<VFML> 2 "s_register_operand")
2380 (match_operand:<VFML> 3 "s_register_operand"))
2381 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2382 "TARGET_FP16FML"
2383 {
2384 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2385 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2386 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2387 (operands[0], operands[1],
2388 operands[2], operands[3],
2389 half, lane));
2390 DONE;
2391 })
2392
2393 (define_insn "vfmal_lane_low<mode>_intrinsic"
2394 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2395 (fma:VCVTF
2396 (float_extend:VCVTF
2397 (vec_select:<VFMLSEL>
2398 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2399 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2400 (float_extend:VCVTF
2401 (vec_duplicate:<VFMLSEL>
2402 (vec_select:HF
2403 (match_operand:<VFML> 3 "s_register_operand" "x")
2404 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2405 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2406 "TARGET_FP16FML"
2407 {
2408 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2409 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2410 {
2411 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2412 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2413 }
2414 else
2415 {
2416 operands[5] = GEN_INT (lane);
2417 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2418 }
2419 }
2420 [(set_attr "type" "neon_fp_mla_s<q>")]
2421 )
2422
2423 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2424 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2425 (unspec:VCVTF
2426 [(match_operand:VCVTF 1 "s_register_operand")
2427 (PLUSMINUS:<VFML>
2428 (match_operand:<VFML> 2 "s_register_operand")
2429 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2430 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2431 "TARGET_FP16FML"
2432 {
2433 rtx lane
2434 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2435 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2436 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2437 (operands[0], operands[1], operands[2], operands[3],
2438 half, lane));
2439 DONE;
2440 })
2441
2442 ;; Used to implement the intrinsics:
2443 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2444 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2445 ;; Needs a bit of care to get the modes of the different sub-expressions right
2446 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2447 ;; S or D subregister to select the appropriate lane from.
2448
2449 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2450 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2451 (fma:VCVTF
2452 (float_extend:VCVTF
2453 (vec_select:<VFMLSEL>
2454 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2455 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2456 (float_extend:VCVTF
2457 (vec_duplicate:<VFMLSEL>
2458 (vec_select:HF
2459 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2460 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2461 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2462 "TARGET_FP16FML"
2463 {
2464 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2465 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2466 int new_lane = lane % elts_per_reg;
2467 int regdiff = lane / elts_per_reg;
2468 operands[5] = GEN_INT (new_lane);
2469 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2470 because we want the print_operand code to print the appropriate
2471 S or D register prefix. */
2472 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2473 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2474 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2475 }
2476 [(set_attr "type" "neon_fp_mla_s<q>")]
2477 )
2478
2479 ;; Used to implement the intrinsics:
2480 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2481 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2482 ;; Needs a bit of care to get the modes of the different sub-expressions right
2483 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2484 ;; S or D subregister to select the appropriate lane from.
2485
2486 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2487 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2488 (fma:VCVTF
2489 (float_extend:VCVTF
2490 (vec_select:<VFMLSEL>
2491 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2492 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2493 (float_extend:VCVTF
2494 (vec_duplicate:<VFMLSEL>
2495 (vec_select:HF
2496 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2497 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2498 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2499 "TARGET_FP16FML"
2500 {
2501 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2502 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2503 int new_lane = lane % elts_per_reg;
2504 int regdiff = lane / elts_per_reg;
2505 operands[5] = GEN_INT (new_lane);
2506 /* We re-create operands[3] in the halved VFMLSEL mode
2507 because we've calculated the correct half-width subreg to extract
2508 the lane from and we want to print *that* subreg instead. */
2509 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2510 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2511 }
2512 [(set_attr "type" "neon_fp_mla_s<q>")]
2513 )
2514
2515 (define_insn "vfmal_lane_high<mode>_intrinsic"
2516 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2517 (fma:VCVTF
2518 (float_extend:VCVTF
2519 (vec_select:<VFMLSEL>
2520 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2521 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2522 (float_extend:VCVTF
2523 (vec_duplicate:<VFMLSEL>
2524 (vec_select:HF
2525 (match_operand:<VFML> 3 "s_register_operand" "x")
2526 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2527 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2528 "TARGET_FP16FML"
2529 {
2530 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2531 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2532 {
2533 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2534 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2535 }
2536 else
2537 {
2538 operands[5] = GEN_INT (lane);
2539 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2540 }
2541 }
2542 [(set_attr "type" "neon_fp_mla_s<q>")]
2543 )
2544
2545 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2546 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2547 (fma:VCVTF
2548 (float_extend:VCVTF
2549 (neg:<VFMLSEL>
2550 (vec_select:<VFMLSEL>
2551 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2552 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2553 (float_extend:VCVTF
2554 (vec_duplicate:<VFMLSEL>
2555 (vec_select:HF
2556 (match_operand:<VFML> 3 "s_register_operand" "x")
2557 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2558 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2559 "TARGET_FP16FML"
2560 {
2561 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2562 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2563 {
2564 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2565 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2566 }
2567 else
2568 {
2569 operands[5] = GEN_INT (lane);
2570 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2571 }
2572 }
2573 [(set_attr "type" "neon_fp_mla_s<q>")]
2574 )
2575
2576 ;; Used to implement the intrinsics:
2577 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2578 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2579 ;; Needs a bit of care to get the modes of the different sub-expressions right
2580 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2581 ;; S or D subregister to select the appropriate lane from.
2582
2583 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2584 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2585 (fma:VCVTF
2586 (float_extend:VCVTF
2587 (neg:<VFMLSEL>
2588 (vec_select:<VFMLSEL>
2589 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2590 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2591 (float_extend:VCVTF
2592 (vec_duplicate:<VFMLSEL>
2593 (vec_select:HF
2594 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2595 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2596 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2597 "TARGET_FP16FML"
2598 {
2599 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2600 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2601 int new_lane = lane % elts_per_reg;
2602 int regdiff = lane / elts_per_reg;
2603 operands[5] = GEN_INT (new_lane);
2604 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2605 because we want the print_operand code to print the appropriate
2606 S or D register prefix. */
2607 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2608 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2609 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2610 }
2611 [(set_attr "type" "neon_fp_mla_s<q>")]
2612 )
2613
2614 ;; Used to implement the intrinsics:
2615 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2616 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2617 ;; Needs a bit of care to get the modes of the different sub-expressions right
2618 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2619 ;; S or D subregister to select the appropriate lane from.
2620
2621 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2622 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2623 (fma:VCVTF
2624 (float_extend:VCVTF
2625 (neg:<VFMLSEL>
2626 (vec_select:<VFMLSEL>
2627 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2628 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2629 (float_extend:VCVTF
2630 (vec_duplicate:<VFMLSEL>
2631 (vec_select:HF
2632 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2633 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2634 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2635 "TARGET_FP16FML"
2636 {
2637 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2638 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2639 int new_lane = lane % elts_per_reg;
2640 int regdiff = lane / elts_per_reg;
2641 operands[5] = GEN_INT (new_lane);
2642 /* We re-create operands[3] in the halved VFMLSEL mode
2643 because we've calculated the correct half-width subreg to extract
2644 the lane from and we want to print *that* subreg instead. */
2645 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2646 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2647 }
2648 [(set_attr "type" "neon_fp_mla_s<q>")]
2649 )
2650
2651 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2652 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2653 (fma:VCVTF
2654 (float_extend:VCVTF
2655 (neg:<VFMLSEL>
2656 (vec_select:<VFMLSEL>
2657 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2658 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2659 (float_extend:VCVTF
2660 (vec_duplicate:<VFMLSEL>
2661 (vec_select:HF
2662 (match_operand:<VFML> 3 "s_register_operand" "x")
2663 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2664 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2665 "TARGET_FP16FML"
2666 {
2667 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2668 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2669 {
2670 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2671 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2672 }
2673 else
2674 {
2675 operands[5] = GEN_INT (lane);
2676 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2677 }
2678 }
2679 [(set_attr "type" "neon_fp_mla_s<q>")]
2680 )
2681
2682 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2683
2684 (define_insn "neon_vmla<mode>_unspec"
2685 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2686 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2687 (match_operand:VDQW 2 "s_register_operand" "w")
2688 (match_operand:VDQW 3 "s_register_operand" "w")]
2689 UNSPEC_VMLA))]
2690 "TARGET_NEON"
2691 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2692 [(set (attr "type")
2693 (if_then_else (match_test "<Is_float_mode>")
2694 (const_string "neon_fp_mla_s<q>")
2695 (const_string "neon_mla_<V_elem_ch><q>")))]
2696 )
2697
2698 (define_insn "neon_vmlal<sup><mode>"
2699 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2700 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2701 (match_operand:VW 2 "s_register_operand" "w")
2702 (match_operand:VW 3 "s_register_operand" "w")]
2703 VMLAL))]
2704 "TARGET_NEON"
2705 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2706 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2707 )
2708
2709 (define_expand "neon_vmls<mode>"
2710 [(match_operand:VDQW 0 "s_register_operand" "=w")
2711 (match_operand:VDQW 1 "s_register_operand" "0")
2712 (match_operand:VDQW 2 "s_register_operand" "w")
2713 (match_operand:VDQW 3 "s_register_operand" "w")]
2714 "TARGET_NEON"
2715 {
2716 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2717 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2718 operands[1], operands[2], operands[3]));
2719 else
2720 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2721 operands[2], operands[3]));
2722 DONE;
2723 })
2724
2725 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2726
2727 (define_insn "neon_vmls<mode>_unspec"
2728 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2729 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2730 (match_operand:VDQW 2 "s_register_operand" "w")
2731 (match_operand:VDQW 3 "s_register_operand" "w")]
2732 UNSPEC_VMLS))]
2733 "TARGET_NEON"
2734 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2735 [(set (attr "type")
2736 (if_then_else (match_test "<Is_float_mode>")
2737 (const_string "neon_fp_mla_s<q>")
2738 (const_string "neon_mla_<V_elem_ch><q>")))]
2739 )
2740
2741 (define_insn "neon_vmlsl<sup><mode>"
2742 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2743 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2744 (match_operand:VW 2 "s_register_operand" "w")
2745 (match_operand:VW 3 "s_register_operand" "w")]
2746 VMLSL))]
2747 "TARGET_NEON"
2748 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2749 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2750 )
2751
2752 ;; vqdmulh, vqrdmulh
2753 (define_insn "neon_vq<r>dmulh<mode>"
2754 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2755 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2756 (match_operand:VMDQI 2 "s_register_operand" "w")]
2757 VQDMULH))]
2758 "TARGET_NEON"
2759 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2760 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2761 )
2762
2763 ;; vqrdmlah, vqrdmlsh
2764 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2765 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2766 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2767 (match_operand:VMDQI 2 "s_register_operand" "w")
2768 (match_operand:VMDQI 3 "s_register_operand" "w")]
2769 VQRDMLH_AS))]
2770 "TARGET_NEON_RDMA"
2771 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2772 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2773 )
2774
2775 (define_insn "neon_vqdmlal<mode>"
2776 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2777 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2778 (match_operand:VMDI 2 "s_register_operand" "w")
2779 (match_operand:VMDI 3 "s_register_operand" "w")]
2780 UNSPEC_VQDMLAL))]
2781 "TARGET_NEON"
2782 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2783 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2784 )
2785
2786 (define_insn "neon_vqdmlsl<mode>"
2787 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2788 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2789 (match_operand:VMDI 2 "s_register_operand" "w")
2790 (match_operand:VMDI 3 "s_register_operand" "w")]
2791 UNSPEC_VQDMLSL))]
2792 "TARGET_NEON"
2793 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2794 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2795 )
2796
2797 (define_insn "neon_vmull<sup><mode>"
2798 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2799 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2800 (match_operand:VW 2 "s_register_operand" "w")]
2801 VMULL))]
2802 "TARGET_NEON"
2803 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2804 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2805 )
2806
2807 (define_insn "neon_vqdmull<mode>"
2808 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2809 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2810 (match_operand:VMDI 2 "s_register_operand" "w")]
2811 UNSPEC_VQDMULL))]
2812 "TARGET_NEON"
2813 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2814 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2815 )
2816
2817 (define_expand "neon_vsub<mode>"
2818 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2819 (match_operand:VCVTF 1 "s_register_operand" "w")
2820 (match_operand:VCVTF 2 "s_register_operand" "w")]
2821 "TARGET_NEON"
2822 {
2823 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2824 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2825 else
2826 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2827 operands[2]));
2828 DONE;
2829 })
2830
2831 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2832
2833 (define_insn "neon_vsub<mode>_unspec"
2834 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2835 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2836 (match_operand:VCVTF 2 "s_register_operand" "w")]
2837 UNSPEC_VSUB))]
2838 "TARGET_NEON"
2839 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2840 [(set (attr "type")
2841 (if_then_else (match_test "<Is_float_mode>")
2842 (const_string "neon_fp_addsub_s<q>")
2843 (const_string "neon_sub<q>")))]
2844 )
2845
2846 (define_insn "neon_vsubl<sup><mode>"
2847 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2848 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2849 (match_operand:VDI 2 "s_register_operand" "w")]
2850 VSUBL))]
2851 "TARGET_NEON"
2852 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2853 [(set_attr "type" "neon_sub_long")]
2854 )
2855
2856 (define_insn "neon_vsubw<sup><mode>"
2857 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2858 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2859 (match_operand:VDI 2 "s_register_operand" "w")]
2860 VSUBW))]
2861 "TARGET_NEON"
2862 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2863 [(set_attr "type" "neon_sub_widen")]
2864 )
2865
2866 (define_insn "neon_vqsub<sup><mode>"
2867 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2868 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2869 (match_operand:VDQIX 2 "s_register_operand" "w")]
2870 VQSUB))]
2871 "TARGET_NEON"
2872 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2873 [(set_attr "type" "neon_qsub<q>")]
2874 )
2875
2876 (define_insn "neon_vhsub<sup><mode>"
2877 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2878 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2879 (match_operand:VDQIW 2 "s_register_operand" "w")]
2880 VHSUB))]
2881 "TARGET_NEON"
2882 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2883 [(set_attr "type" "neon_sub_halve<q>")]
2884 )
2885
2886 (define_insn "neon_v<r>subhn<mode>"
2887 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2888 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2889 (match_operand:VN 2 "s_register_operand" "w")]
2890 VSUBHN))]
2891 "TARGET_NEON"
2892 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2893 [(set_attr "type" "neon_sub_halve_narrow_q")]
2894 )
2895
2896 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2897 ;; without unsafe math optimizations.
2898 (define_expand "neon_vc<cmp_op><mode>"
2899 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2900 (neg:<V_cmp_result>
2901 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2902 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2903 "TARGET_NEON"
2904 {
2905 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2906 are enabled. */
2907 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2908 && !flag_unsafe_math_optimizations)
2909 {
2910 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2911 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2912 whereas this expander iterates over the integer modes as well,
2913 but we will never expand to UNSPECs for the integer comparisons. */
2914 switch (<MODE>mode)
2915 {
2916 case E_V2SFmode:
2917 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2918 operands[1],
2919 operands[2]));
2920 break;
2921 case E_V4SFmode:
2922 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2923 operands[1],
2924 operands[2]));
2925 break;
2926 default:
2927 gcc_unreachable ();
2928 }
2929 }
2930 else
2931 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2932 operands[1],
2933 operands[2]));
2934 DONE;
2935 }
2936 )
2937
2938 (define_insn "neon_vc<cmp_op><mode>_insn"
2939 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2940 (neg:<V_cmp_result>
2941 (COMPARISONS:<V_cmp_result>
2942 (match_operand:VDQW 1 "s_register_operand" "w,w")
2943 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2944 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2945 && !flag_unsafe_math_optimizations)"
2946 {
2947 char pattern[100];
2948 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2949 " %%<V_reg>1, %s",
2950 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2951 ? "f" : "<cmp_type>",
2952 which_alternative == 0
2953 ? "%<V_reg>2" : "#0");
2954 output_asm_insn (pattern, operands);
2955 return "";
2956 }
2957 [(set (attr "type")
2958 (if_then_else (match_operand 2 "zero_operand")
2959 (const_string "neon_compare_zero<q>")
2960 (const_string "neon_compare<q>")))]
2961 )
2962
2963 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2964 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2965 (unspec:<V_cmp_result>
2966 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2967 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2968 NEON_VCMP))]
2969 "TARGET_NEON"
2970 {
2971 char pattern[100];
2972 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2973 " %%<V_reg>1, %s",
2974 which_alternative == 0
2975 ? "%<V_reg>2" : "#0");
2976 output_asm_insn (pattern, operands);
2977 return "";
2978 }
2979 [(set_attr "type" "neon_fp_compare_s<q>")]
2980 )
2981
2982 (define_expand "neon_vc<cmp_op><mode>"
2983 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2984 (neg:<V_cmp_result>
2985 (COMPARISONS:VH
2986 (match_operand:VH 1 "s_register_operand")
2987 (match_operand:VH 2 "reg_or_zero_operand")))]
2988 "TARGET_NEON_FP16INST"
2989 {
2990 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2991 are enabled. */
2992 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2993 && !flag_unsafe_math_optimizations)
2994 emit_insn
2995 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2996 (operands[0], operands[1], operands[2]));
2997 else
2998 emit_insn
2999 (gen_neon_vc<cmp_op><mode>_fp16insn
3000 (operands[0], operands[1], operands[2]));
3001 DONE;
3002 })
3003
3004 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3005 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3006 (neg:<V_cmp_result>
3007 (COMPARISONS:<V_cmp_result>
3008 (match_operand:VH 1 "s_register_operand" "w,w")
3009 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3010 "TARGET_NEON_FP16INST
3011 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3012 && !flag_unsafe_math_optimizations)"
3013 {
3014 char pattern[100];
3015 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3016 " %%<V_reg>1, %s",
3017 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3018 ? "f" : "<cmp_type>",
3019 which_alternative == 0
3020 ? "%<V_reg>2" : "#0");
3021 output_asm_insn (pattern, operands);
3022 return "";
3023 }
3024 [(set (attr "type")
3025 (if_then_else (match_operand 2 "zero_operand")
3026 (const_string "neon_compare_zero<q>")
3027 (const_string "neon_compare<q>")))])
3028
3029 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3030 [(set
3031 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3032 (unspec:<V_cmp_result>
3033 [(match_operand:VH 1 "s_register_operand" "w,w")
3034 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3035 NEON_VCMP))]
3036 "TARGET_NEON_FP16INST"
3037 {
3038 char pattern[100];
3039 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3040 " %%<V_reg>1, %s",
3041 which_alternative == 0
3042 ? "%<V_reg>2" : "#0");
3043 output_asm_insn (pattern, operands);
3044 return "";
3045 }
3046 [(set_attr "type" "neon_fp_compare_s<q>")])
3047
3048 (define_insn "neon_vc<cmp_op>u<mode>"
3049 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3050 (neg:<V_cmp_result>
3051 (GTUGEU:<V_cmp_result>
3052 (match_operand:VDQIW 1 "s_register_operand" "w")
3053 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3054 "TARGET_NEON"
3055 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3056 [(set_attr "type" "neon_compare<q>")]
3057 )
3058
3059 (define_expand "neon_vca<cmp_op><mode>"
3060 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3061 (neg:<V_cmp_result>
3062 (GTGE:<V_cmp_result>
3063 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3064 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3065 "TARGET_NEON"
3066 {
3067 if (flag_unsafe_math_optimizations)
3068 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3069 operands[2]));
3070 else
3071 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3072 operands[1],
3073 operands[2]));
3074 DONE;
3075 }
3076 )
3077
3078 (define_insn "neon_vca<cmp_op><mode>_insn"
3079 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3080 (neg:<V_cmp_result>
3081 (GTGE:<V_cmp_result>
3082 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3083 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3084 "TARGET_NEON && flag_unsafe_math_optimizations"
3085 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3086 [(set_attr "type" "neon_fp_compare_s<q>")]
3087 )
3088
3089 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3090 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3091 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3092 (match_operand:VCVTF 2 "s_register_operand" "w")]
3093 NEON_VACMP))]
3094 "TARGET_NEON"
3095 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3096 [(set_attr "type" "neon_fp_compare_s<q>")]
3097 )
3098
3099 (define_expand "neon_vca<cmp_op><mode>"
3100 [(set
3101 (match_operand:<V_cmp_result> 0 "s_register_operand")
3102 (neg:<V_cmp_result>
3103 (GLTE:<V_cmp_result>
3104 (abs:VH (match_operand:VH 1 "s_register_operand"))
3105 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3106 "TARGET_NEON_FP16INST"
3107 {
3108 if (flag_unsafe_math_optimizations)
3109 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3110 (operands[0], operands[1], operands[2]));
3111 else
3112 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3113 (operands[0], operands[1], operands[2]));
3114 DONE;
3115 })
3116
3117 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3118 [(set
3119 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3120 (neg:<V_cmp_result>
3121 (GLTE:<V_cmp_result>
3122 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3123 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3124 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3125 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3126 [(set_attr "type" "neon_fp_compare_s<q>")]
3127 )
3128
3129 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3130 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3131 (unspec:<V_cmp_result>
3132 [(match_operand:VH 1 "s_register_operand" "w")
3133 (match_operand:VH 2 "s_register_operand" "w")]
3134 NEON_VAGLTE))]
3135 "TARGET_NEON"
3136 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137 [(set_attr "type" "neon_fp_compare_s<q>")]
3138 )
3139
3140 (define_expand "neon_vc<cmp_op>z<mode>"
3141 [(set
3142 (match_operand:<V_cmp_result> 0 "s_register_operand")
3143 (COMPARISONS:<V_cmp_result>
3144 (match_operand:VH 1 "s_register_operand")
3145 (const_int 0)))]
3146 "TARGET_NEON_FP16INST"
3147 {
3148 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3149 CONST0_RTX (<MODE>mode)));
3150 DONE;
3151 })
3152
3153 (define_insn "neon_vtst<mode>"
3154 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3155 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3156 (match_operand:VDQIW 2 "s_register_operand" "w")]
3157 UNSPEC_VTST))]
3158 "TARGET_NEON"
3159 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3160 [(set_attr "type" "neon_tst<q>")]
3161 )
3162
3163 (define_insn "neon_vabd<sup><mode>"
3164 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3165 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3166 (match_operand:VDQIW 2 "s_register_operand" "w")]
3167 VABD))]
3168 "TARGET_NEON"
3169 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3170 [(set_attr "type" "neon_abd<q>")]
3171 )
3172
3173 (define_insn "neon_vabd<mode>"
3174 [(set (match_operand:VH 0 "s_register_operand" "=w")
3175 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3176 (match_operand:VH 2 "s_register_operand" "w")]
3177 UNSPEC_VABD_F))]
3178 "TARGET_NEON_FP16INST"
3179 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3180 [(set_attr "type" "neon_abd<q>")]
3181 )
3182
3183 (define_insn "neon_vabdf<mode>"
3184 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3185 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3186 (match_operand:VCVTF 2 "s_register_operand" "w")]
3187 UNSPEC_VABD_F))]
3188 "TARGET_NEON"
3189 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3190 [(set_attr "type" "neon_fp_abd_s<q>")]
3191 )
3192
3193 (define_insn "neon_vabdl<sup><mode>"
3194 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3195 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3196 (match_operand:VW 2 "s_register_operand" "w")]
3197 VABDL))]
3198 "TARGET_NEON"
3199 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3200 [(set_attr "type" "neon_abd_long")]
3201 )
3202
3203 (define_insn "neon_vaba<sup><mode>"
3204 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3205 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3206 (match_operand:VDQIW 3 "s_register_operand" "w")]
3207 VABD)
3208 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3209 "TARGET_NEON"
3210 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3211 [(set_attr "type" "neon_arith_acc<q>")]
3212 )
3213
3214 (define_insn "neon_vabal<sup><mode>"
3215 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3216 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3217 (match_operand:VW 3 "s_register_operand" "w")]
3218 VABDL)
3219 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3220 "TARGET_NEON"
3221 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3222 [(set_attr "type" "neon_arith_acc<q>")]
3223 )
3224
3225 (define_insn "neon_v<maxmin><sup><mode>"
3226 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3227 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3228 (match_operand:VDQIW 2 "s_register_operand" "w")]
3229 VMAXMIN))]
3230 "TARGET_NEON"
3231 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3232 [(set_attr "type" "neon_minmax<q>")]
3233 )
3234
3235 (define_insn "neon_v<maxmin>f<mode>"
3236 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3237 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3238 (match_operand:VCVTF 2 "s_register_operand" "w")]
3239 VMAXMINF))]
3240 "TARGET_NEON"
3241 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3242 [(set_attr "type" "neon_fp_minmax_s<q>")]
3243 )
3244
3245 (define_insn "neon_v<maxmin>f<mode>"
3246 [(set (match_operand:VH 0 "s_register_operand" "=w")
3247 (unspec:VH
3248 [(match_operand:VH 1 "s_register_operand" "w")
3249 (match_operand:VH 2 "s_register_operand" "w")]
3250 VMAXMINF))]
3251 "TARGET_NEON_FP16INST"
3252 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3253 [(set_attr "type" "neon_fp_minmax_s<q>")]
3254 )
3255
3256 (define_insn "neon_vp<maxmin>fv4hf"
3257 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3258 (unspec:V4HF
3259 [(match_operand:V4HF 1 "s_register_operand" "w")
3260 (match_operand:V4HF 2 "s_register_operand" "w")]
3261 VPMAXMINF))]
3262 "TARGET_NEON_FP16INST"
3263 "vp<maxmin>.f16\t%P0, %P1, %P2"
3264 [(set_attr "type" "neon_reduc_minmax")]
3265 )
3266
3267 (define_insn "neon_<fmaxmin_op><mode>"
3268 [(set
3269 (match_operand:VH 0 "s_register_operand" "=w")
3270 (unspec:VH
3271 [(match_operand:VH 1 "s_register_operand" "w")
3272 (match_operand:VH 2 "s_register_operand" "w")]
3273 VMAXMINFNM))]
3274 "TARGET_NEON_FP16INST"
3275 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3276 [(set_attr "type" "neon_fp_minmax_s<q>")]
3277 )
3278
3279 ;; v<maxmin>nm intrinsics.
3280 (define_insn "neon_<fmaxmin_op><mode>"
3281 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3282 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3283 (match_operand:VCVTF 2 "s_register_operand" "w")]
3284 VMAXMINFNM))]
3285 "TARGET_NEON && TARGET_VFP5"
3286 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3287 [(set_attr "type" "neon_fp_minmax_s<q>")]
3288 )
3289
3290 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3291 (define_insn "<fmaxmin><mode>3"
3292 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3293 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3294 (match_operand:VCVTF 2 "s_register_operand" "w")]
3295 VMAXMINFNM))]
3296 "TARGET_NEON && TARGET_VFP5"
3297 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3298 [(set_attr "type" "neon_fp_minmax_s<q>")]
3299 )
3300
3301 (define_expand "neon_vpadd<mode>"
3302 [(match_operand:VD 0 "s_register_operand" "=w")
3303 (match_operand:VD 1 "s_register_operand" "w")
3304 (match_operand:VD 2 "s_register_operand" "w")]
3305 "TARGET_NEON"
3306 {
3307 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3308 operands[2]));
3309 DONE;
3310 })
3311
3312 (define_insn "neon_vpaddl<sup><mode>"
3313 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3314 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3315 VPADDL))]
3316 "TARGET_NEON"
3317 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3318 [(set_attr "type" "neon_reduc_add_long")]
3319 )
3320
3321 (define_insn "neon_vpadal<sup><mode>"
3322 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3323 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3324 (match_operand:VDQIW 2 "s_register_operand" "w")]
3325 VPADAL))]
3326 "TARGET_NEON"
3327 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3328 [(set_attr "type" "neon_reduc_add_acc")]
3329 )
3330
3331 (define_insn "neon_vp<maxmin><sup><mode>"
3332 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3333 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3334 (match_operand:VDI 2 "s_register_operand" "w")]
3335 VPMAXMIN))]
3336 "TARGET_NEON"
3337 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3338 [(set_attr "type" "neon_reduc_minmax<q>")]
3339 )
3340
3341 (define_insn "neon_vp<maxmin>f<mode>"
3342 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3343 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3344 (match_operand:VCVTF 2 "s_register_operand" "w")]
3345 VPMAXMINF))]
3346 "TARGET_NEON"
3347 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3348 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3349 )
3350
3351 (define_insn "neon_vrecps<mode>"
3352 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3353 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3354 (match_operand:VCVTF 2 "s_register_operand" "w")]
3355 UNSPEC_VRECPS))]
3356 "TARGET_NEON"
3357 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3358 [(set_attr "type" "neon_fp_recps_s<q>")]
3359 )
3360
3361 (define_insn "neon_vrecps<mode>"
3362 [(set
3363 (match_operand:VH 0 "s_register_operand" "=w")
3364 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3365 (match_operand:VH 2 "s_register_operand" "w")]
3366 UNSPEC_VRECPS))]
3367 "TARGET_NEON_FP16INST"
3368 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3369 [(set_attr "type" "neon_fp_recps_s<q>")]
3370 )
3371
3372 (define_insn "neon_vrsqrts<mode>"
3373 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3374 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3375 (match_operand:VCVTF 2 "s_register_operand" "w")]
3376 UNSPEC_VRSQRTS))]
3377 "TARGET_NEON"
3378 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3379 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3380 )
3381
3382 (define_insn "neon_vrsqrts<mode>"
3383 [(set
3384 (match_operand:VH 0 "s_register_operand" "=w")
3385 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3386 (match_operand:VH 2 "s_register_operand" "w")]
3387 UNSPEC_VRSQRTS))]
3388 "TARGET_NEON_FP16INST"
3389 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3391 )
3392
3393 (define_expand "neon_vabs<mode>"
3394 [(match_operand:VDQW 0 "s_register_operand" "")
3395 (match_operand:VDQW 1 "s_register_operand" "")]
3396 "TARGET_NEON"
3397 {
3398 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3399 DONE;
3400 })
3401
3402 (define_insn "neon_vqabs<mode>"
3403 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3404 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3405 UNSPEC_VQABS))]
3406 "TARGET_NEON"
3407 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3408 [(set_attr "type" "neon_qabs<q>")]
3409 )
3410
3411 (define_insn "neon_bswap<mode>"
3412 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3413 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3414 "TARGET_NEON"
3415 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3416 [(set_attr "type" "neon_rev<q>")]
3417 )
3418
3419 (define_expand "neon_vneg<mode>"
3420 [(match_operand:VDQW 0 "s_register_operand" "")
3421 (match_operand:VDQW 1 "s_register_operand" "")]
3422 "TARGET_NEON"
3423 {
3424 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3425 DONE;
3426 })
3427
3428 ;; These instructions map to the __builtins for the Dot Product operations.
3429 (define_insn "neon_<sup>dot<vsi2qi>"
3430 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3431 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3432 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3433 "register_operand" "w")
3434 (match_operand:<VSI2QI> 3
3435 "register_operand" "w")]
3436 DOTPROD)))]
3437 "TARGET_DOTPROD"
3438 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3439 [(set_attr "type" "neon_dot")]
3440 )
3441
3442 ;; These instructions map to the __builtins for the Dot Product
3443 ;; indexed operations.
3444 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3445 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3446 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3447 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3448 "register_operand" "w")
3449 (match_operand:V8QI 3 "register_operand" "t")
3450 (match_operand:SI 4 "immediate_operand" "i")]
3451 DOTPROD)))]
3452 "TARGET_DOTPROD"
3453 {
3454 operands[4]
3455 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3456 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3457 }
3458 [(set_attr "type" "neon_dot")]
3459 )
3460
3461 ;; These expands map to the Dot Product optab the vectorizer checks for.
3462 ;; The auto-vectorizer expects a dot product builtin that also does an
3463 ;; accumulation into the provided register.
3464 ;; Given the following pattern
3465 ;;
3466 ;; for (i=0; i<len; i++) {
3467 ;; c = a[i] * b[i];
3468 ;; r += c;
3469 ;; }
3470 ;; return result;
3471 ;;
3472 ;; This can be auto-vectorized to
3473 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3474 ;;
3475 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3476 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3477 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3478 ;; ...
3479 ;;
3480 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3481 (define_expand "<sup>dot_prod<vsi2qi>"
3482 [(set (match_operand:VCVTI 0 "register_operand")
3483 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3484 "register_operand")
3485 (match_operand:<VSI2QI> 2
3486 "register_operand")]
3487 DOTPROD)
3488 (match_operand:VCVTI 3 "register_operand")))]
3489 "TARGET_DOTPROD"
3490 {
3491 emit_insn (
3492 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3493 operands[2]));
3494 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3495 DONE;
3496 })
3497
3498 (define_expand "neon_copysignf<mode>"
3499 [(match_operand:VCVTF 0 "register_operand")
3500 (match_operand:VCVTF 1 "register_operand")
3501 (match_operand:VCVTF 2 "register_operand")]
3502 "TARGET_NEON"
3503 "{
3504 rtx v_bitmask_cast;
3505 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3506 rtx c = GEN_INT (0x80000000);
3507
3508 emit_move_insn (v_bitmask,
3509 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3510 emit_move_insn (operands[0], operands[2]);
3511 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3512 <VCVTF:V_cmp_result>mode, 0);
3513 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3514 operands[1]));
3515
3516 DONE;
3517 }"
3518 )
3519
3520 (define_insn "neon_vqneg<mode>"
3521 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3522 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3523 UNSPEC_VQNEG))]
3524 "TARGET_NEON"
3525 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3526 [(set_attr "type" "neon_qneg<q>")]
3527 )
3528
3529 (define_insn "neon_vcls<mode>"
3530 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3531 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3532 UNSPEC_VCLS))]
3533 "TARGET_NEON"
3534 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3535 [(set_attr "type" "neon_cls<q>")]
3536 )
3537
3538 (define_insn "clz<mode>2"
3539 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3540 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3541 "TARGET_NEON"
3542 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3543 [(set_attr "type" "neon_cnt<q>")]
3544 )
3545
3546 (define_expand "neon_vclz<mode>"
3547 [(match_operand:VDQIW 0 "s_register_operand" "")
3548 (match_operand:VDQIW 1 "s_register_operand" "")]
3549 "TARGET_NEON"
3550 {
3551 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3552 DONE;
3553 })
3554
3555 (define_insn "popcount<mode>2"
3556 [(set (match_operand:VE 0 "s_register_operand" "=w")
3557 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3558 "TARGET_NEON"
3559 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3560 [(set_attr "type" "neon_cnt<q>")]
3561 )
3562
3563 (define_expand "neon_vcnt<mode>"
3564 [(match_operand:VE 0 "s_register_operand" "=w")
3565 (match_operand:VE 1 "s_register_operand" "w")]
3566 "TARGET_NEON"
3567 {
3568 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3569 DONE;
3570 })
3571
3572 (define_insn "neon_vrecpe<mode>"
3573 [(set (match_operand:VH 0 "s_register_operand" "=w")
3574 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3575 UNSPEC_VRECPE))]
3576 "TARGET_NEON_FP16INST"
3577 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3578 [(set_attr "type" "neon_fp_recpe_s<q>")]
3579 )
3580
3581 (define_insn "neon_vrecpe<mode>"
3582 [(set (match_operand:V32 0 "s_register_operand" "=w")
3583 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3584 UNSPEC_VRECPE))]
3585 "TARGET_NEON"
3586 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3587 [(set_attr "type" "neon_fp_recpe_s<q>")]
3588 )
3589
3590 (define_insn "neon_vrsqrte<mode>"
3591 [(set (match_operand:V32 0 "s_register_operand" "=w")
3592 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3593 UNSPEC_VRSQRTE))]
3594 "TARGET_NEON"
3595 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3596 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3597 )
3598
3599 (define_expand "neon_vmvn<mode>"
3600 [(match_operand:VDQIW 0 "s_register_operand" "")
3601 (match_operand:VDQIW 1 "s_register_operand" "")]
3602 "TARGET_NEON"
3603 {
3604 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3605 DONE;
3606 })
3607
3608 (define_insn "neon_vget_lane<mode>_sext_internal"
3609 [(set (match_operand:SI 0 "s_register_operand" "=r")
3610 (sign_extend:SI
3611 (vec_select:<V_elem>
3612 (match_operand:VD 1 "s_register_operand" "w")
3613 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3614 "TARGET_NEON"
3615 {
3616 if (BYTES_BIG_ENDIAN)
3617 {
3618 int elt = INTVAL (operands[2]);
3619 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3620 operands[2] = GEN_INT (elt);
3621 }
3622 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3623 }
3624 [(set_attr "type" "neon_to_gp")]
3625 )
3626
3627 (define_insn "neon_vget_lane<mode>_zext_internal"
3628 [(set (match_operand:SI 0 "s_register_operand" "=r")
3629 (zero_extend:SI
3630 (vec_select:<V_elem>
3631 (match_operand:VD 1 "s_register_operand" "w")
3632 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3633 "TARGET_NEON"
3634 {
3635 if (BYTES_BIG_ENDIAN)
3636 {
3637 int elt = INTVAL (operands[2]);
3638 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3639 operands[2] = GEN_INT (elt);
3640 }
3641 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3642 }
3643 [(set_attr "type" "neon_to_gp")]
3644 )
3645
3646 (define_insn "neon_vget_lane<mode>_sext_internal"
3647 [(set (match_operand:SI 0 "s_register_operand" "=r")
3648 (sign_extend:SI
3649 (vec_select:<V_elem>
3650 (match_operand:VQ2 1 "s_register_operand" "w")
3651 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3652 "TARGET_NEON"
3653 {
3654 rtx ops[3];
3655 int regno = REGNO (operands[1]);
3656 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3657 unsigned int elt = INTVAL (operands[2]);
3658 unsigned int elt_adj = elt % halfelts;
3659
3660 if (BYTES_BIG_ENDIAN)
3661 elt_adj = halfelts - 1 - elt_adj;
3662
3663 ops[0] = operands[0];
3664 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3665 ops[2] = GEN_INT (elt_adj);
3666 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3667
3668 return "";
3669 }
3670 [(set_attr "type" "neon_to_gp_q")]
3671 )
3672
3673 (define_insn "neon_vget_lane<mode>_zext_internal"
3674 [(set (match_operand:SI 0 "s_register_operand" "=r")
3675 (zero_extend:SI
3676 (vec_select:<V_elem>
3677 (match_operand:VQ2 1 "s_register_operand" "w")
3678 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3679 "TARGET_NEON"
3680 {
3681 rtx ops[3];
3682 int regno = REGNO (operands[1]);
3683 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3684 unsigned int elt = INTVAL (operands[2]);
3685 unsigned int elt_adj = elt % halfelts;
3686
3687 if (BYTES_BIG_ENDIAN)
3688 elt_adj = halfelts - 1 - elt_adj;
3689
3690 ops[0] = operands[0];
3691 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3692 ops[2] = GEN_INT (elt_adj);
3693 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3694
3695 return "";
3696 }
3697 [(set_attr "type" "neon_to_gp_q")]
3698 )
3699
3700 (define_expand "neon_vget_lane<mode>"
3701 [(match_operand:<V_ext> 0 "s_register_operand" "")
3702 (match_operand:VDQW 1 "s_register_operand" "")
3703 (match_operand:SI 2 "immediate_operand" "")]
3704 "TARGET_NEON"
3705 {
3706 if (BYTES_BIG_ENDIAN)
3707 {
3708 /* The intrinsics are defined in terms of a model where the
3709 element ordering in memory is vldm order, whereas the generic
3710 RTL is defined in terms of a model where the element ordering
3711 in memory is array order. Convert the lane number to conform
3712 to this model. */
3713 unsigned int elt = INTVAL (operands[2]);
3714 unsigned int reg_nelts
3715 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3716 elt ^= reg_nelts - 1;
3717 operands[2] = GEN_INT (elt);
3718 }
3719
3720 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3721 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3722 operands[2]));
3723 else
3724 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3725 operands[1],
3726 operands[2]));
3727 DONE;
3728 })
3729
3730 (define_expand "neon_vget_laneu<mode>"
3731 [(match_operand:<V_ext> 0 "s_register_operand" "")
3732 (match_operand:VDQIW 1 "s_register_operand" "")
3733 (match_operand:SI 2 "immediate_operand" "")]
3734 "TARGET_NEON"
3735 {
3736 if (BYTES_BIG_ENDIAN)
3737 {
3738 /* The intrinsics are defined in terms of a model where the
3739 element ordering in memory is vldm order, whereas the generic
3740 RTL is defined in terms of a model where the element ordering
3741 in memory is array order. Convert the lane number to conform
3742 to this model. */
3743 unsigned int elt = INTVAL (operands[2]);
3744 unsigned int reg_nelts
3745 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3746 elt ^= reg_nelts - 1;
3747 operands[2] = GEN_INT (elt);
3748 }
3749
3750 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3751 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3752 operands[2]));
3753 else
3754 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3755 operands[1],
3756 operands[2]));
3757 DONE;
3758 })
3759
3760 (define_expand "neon_vget_lanedi"
3761 [(match_operand:DI 0 "s_register_operand" "=r")
3762 (match_operand:DI 1 "s_register_operand" "w")
3763 (match_operand:SI 2 "immediate_operand" "")]
3764 "TARGET_NEON"
3765 {
3766 emit_move_insn (operands[0], operands[1]);
3767 DONE;
3768 })
3769
3770 (define_expand "neon_vget_lanev2di"
3771 [(match_operand:DI 0 "s_register_operand" "")
3772 (match_operand:V2DI 1 "s_register_operand" "")
3773 (match_operand:SI 2 "immediate_operand" "")]
3774 "TARGET_NEON"
3775 {
3776 int lane;
3777
3778 if (BYTES_BIG_ENDIAN)
3779 {
3780 /* The intrinsics are defined in terms of a model where the
3781 element ordering in memory is vldm order, whereas the generic
3782 RTL is defined in terms of a model where the element ordering
3783 in memory is array order. Convert the lane number to conform
3784 to this model. */
3785 unsigned int elt = INTVAL (operands[2]);
3786 unsigned int reg_nelts = 2;
3787 elt ^= reg_nelts - 1;
3788 operands[2] = GEN_INT (elt);
3789 }
3790
3791 lane = INTVAL (operands[2]);
3792 gcc_assert ((lane ==0) || (lane == 1));
3793 emit_move_insn (operands[0], lane == 0
3794 ? gen_lowpart (DImode, operands[1])
3795 : gen_highpart (DImode, operands[1]));
3796 DONE;
3797 })
3798
3799 (define_expand "neon_vset_lane<mode>"
3800 [(match_operand:VDQ 0 "s_register_operand" "=w")
3801 (match_operand:<V_elem> 1 "s_register_operand" "r")
3802 (match_operand:VDQ 2 "s_register_operand" "0")
3803 (match_operand:SI 3 "immediate_operand" "i")]
3804 "TARGET_NEON"
3805 {
3806 unsigned int elt = INTVAL (operands[3]);
3807
3808 if (BYTES_BIG_ENDIAN)
3809 {
3810 unsigned int reg_nelts
3811 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3812 elt ^= reg_nelts - 1;
3813 }
3814
3815 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3816 GEN_INT (1 << elt), operands[2]));
3817 DONE;
3818 })
3819
3820 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3821
3822 (define_expand "neon_vset_lanedi"
3823 [(match_operand:DI 0 "s_register_operand" "=w")
3824 (match_operand:DI 1 "s_register_operand" "r")
3825 (match_operand:DI 2 "s_register_operand" "0")
3826 (match_operand:SI 3 "immediate_operand" "i")]
3827 "TARGET_NEON"
3828 {
3829 emit_move_insn (operands[0], operands[1]);
3830 DONE;
3831 })
3832
3833 (define_expand "neon_vcreate<mode>"
3834 [(match_operand:VD_RE 0 "s_register_operand" "")
3835 (match_operand:DI 1 "general_operand" "")]
3836 "TARGET_NEON"
3837 {
3838 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3839 emit_move_insn (operands[0], src);
3840 DONE;
3841 })
3842
3843 (define_insn "neon_vdup_n<mode>"
3844 [(set (match_operand:VX 0 "s_register_operand" "=w")
3845 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3846 "TARGET_NEON"
3847 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3848 [(set_attr "type" "neon_from_gp<q>")]
3849 )
3850
3851 (define_insn "neon_vdup_nv4hf"
3852 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3853 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3854 "TARGET_NEON"
3855 "vdup.16\t%P0, %1"
3856 [(set_attr "type" "neon_from_gp")]
3857 )
3858
3859 (define_insn "neon_vdup_nv8hf"
3860 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3861 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3862 "TARGET_NEON"
3863 "vdup.16\t%q0, %1"
3864 [(set_attr "type" "neon_from_gp_q")]
3865 )
3866
3867 (define_insn "neon_vdup_n<mode>"
3868 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3869 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3870 "TARGET_NEON"
3871 "@
3872 vdup.<V_sz_elem>\t%<V_reg>0, %1
3873 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3874 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3875 )
3876
3877 (define_expand "neon_vdup_ndi"
3878 [(match_operand:DI 0 "s_register_operand" "=w")
3879 (match_operand:DI 1 "s_register_operand" "r")]
3880 "TARGET_NEON"
3881 {
3882 emit_move_insn (operands[0], operands[1]);
3883 DONE;
3884 }
3885 )
3886
3887 (define_insn "neon_vdup_nv2di"
3888 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3889 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3890 "TARGET_NEON"
3891 "@
3892 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3893 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3894 [(set_attr "length" "8")
3895 (set_attr "type" "multiple")]
3896 )
3897
3898 (define_insn "neon_vdup_lane<mode>_internal"
3899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3900 (vec_duplicate:VDQW
3901 (vec_select:<V_elem>
3902 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3903 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3904 "TARGET_NEON"
3905 {
3906 if (BYTES_BIG_ENDIAN)
3907 {
3908 int elt = INTVAL (operands[2]);
3909 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3910 operands[2] = GEN_INT (elt);
3911 }
3912 if (<Is_d_reg>)
3913 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3914 else
3915 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3916 }
3917 [(set_attr "type" "neon_dup<q>")]
3918 )
3919
3920 (define_insn "neon_vdup_lane<mode>_internal"
3921 [(set (match_operand:VH 0 "s_register_operand" "=w")
3922 (vec_duplicate:VH
3923 (vec_select:<V_elem>
3924 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3925 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3926 "TARGET_NEON && TARGET_FP16"
3927 {
3928 if (BYTES_BIG_ENDIAN)
3929 {
3930 int elt = INTVAL (operands[2]);
3931 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3932 operands[2] = GEN_INT (elt);
3933 }
3934 if (<Is_d_reg>)
3935 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3936 else
3937 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3938 }
3939 [(set_attr "type" "neon_dup<q>")]
3940 )
3941
3942 (define_expand "neon_vdup_lane<mode>"
3943 [(match_operand:VDQW 0 "s_register_operand" "=w")
3944 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3945 (match_operand:SI 2 "immediate_operand" "i")]
3946 "TARGET_NEON"
3947 {
3948 if (BYTES_BIG_ENDIAN)
3949 {
3950 unsigned int elt = INTVAL (operands[2]);
3951 unsigned int reg_nelts
3952 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3953 elt ^= reg_nelts - 1;
3954 operands[2] = GEN_INT (elt);
3955 }
3956 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3957 operands[2]));
3958 DONE;
3959 })
3960
3961 (define_expand "neon_vdup_lane<mode>"
3962 [(match_operand:VH 0 "s_register_operand")
3963 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3964 (match_operand:SI 2 "immediate_operand")]
3965 "TARGET_NEON && TARGET_FP16"
3966 {
3967 if (BYTES_BIG_ENDIAN)
3968 {
3969 unsigned int elt = INTVAL (operands[2]);
3970 unsigned int reg_nelts
3971 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3972 elt ^= reg_nelts - 1;
3973 operands[2] = GEN_INT (elt);
3974 }
3975 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3976 operands[2]));
3977 DONE;
3978 })
3979
3980 ; Scalar index is ignored, since only zero is valid here.
3981 (define_expand "neon_vdup_lanedi"
3982 [(match_operand:DI 0 "s_register_operand" "=w")
3983 (match_operand:DI 1 "s_register_operand" "w")
3984 (match_operand:SI 2 "immediate_operand" "i")]
3985 "TARGET_NEON"
3986 {
3987 emit_move_insn (operands[0], operands[1]);
3988 DONE;
3989 })
3990
3991 ; Likewise for v2di, as the DImode second operand has only a single element.
3992 (define_expand "neon_vdup_lanev2di"
3993 [(match_operand:V2DI 0 "s_register_operand" "=w")
3994 (match_operand:DI 1 "s_register_operand" "w")
3995 (match_operand:SI 2 "immediate_operand" "i")]
3996 "TARGET_NEON"
3997 {
3998 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3999 DONE;
4000 })
4001
4002 ; Disabled before reload because we don't want combine doing something silly,
4003 ; but used by the post-reload expansion of neon_vcombine.
4004 (define_insn "*neon_vswp<mode>"
4005 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4006 (match_operand:VDQX 1 "s_register_operand" "+w"))
4007 (set (match_dup 1) (match_dup 0))]
4008 "TARGET_NEON && reload_completed"
4009 "vswp\t%<V_reg>0, %<V_reg>1"
4010 [(set_attr "type" "neon_permute<q>")]
4011 )
4012
4013 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4014 ;; dest vector.
4015 ;; FIXME: A different implementation of this builtin could make it much
4016 ;; more likely that we wouldn't actually need to output anything (we could make
4017 ;; it so that the reg allocator puts things in the right places magically
4018 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4019
4020 (define_insn_and_split "neon_vcombine<mode>"
4021 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4022 (vec_concat:<V_DOUBLE>
4023 (match_operand:VDX 1 "s_register_operand" "w")
4024 (match_operand:VDX 2 "s_register_operand" "w")))]
4025 "TARGET_NEON"
4026 "#"
4027 "&& reload_completed"
4028 [(const_int 0)]
4029 {
4030 neon_split_vcombine (operands);
4031 DONE;
4032 }
4033 [(set_attr "type" "multiple")]
4034 )
4035
4036 (define_expand "neon_vget_high<mode>"
4037 [(match_operand:<V_HALF> 0 "s_register_operand")
4038 (match_operand:VQX 1 "s_register_operand")]
4039 "TARGET_NEON"
4040 {
4041 emit_move_insn (operands[0],
4042 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4043 GET_MODE_SIZE (<V_HALF>mode)));
4044 DONE;
4045 })
4046
4047 (define_expand "neon_vget_low<mode>"
4048 [(match_operand:<V_HALF> 0 "s_register_operand")
4049 (match_operand:VQX 1 "s_register_operand")]
4050 "TARGET_NEON"
4051 {
4052 emit_move_insn (operands[0],
4053 simplify_gen_subreg (<V_HALF>mode, operands[1],
4054 <MODE>mode, 0));
4055 DONE;
4056 })
4057
4058 (define_insn "float<mode><V_cvtto>2"
4059 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4060 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4061 "TARGET_NEON && !flag_rounding_math"
4062 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4063 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4064 )
4065
4066 (define_insn "floatuns<mode><V_cvtto>2"
4067 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4068 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4069 "TARGET_NEON && !flag_rounding_math"
4070 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4071 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4072 )
4073
4074 (define_insn "fix_trunc<mode><V_cvtto>2"
4075 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4076 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4077 "TARGET_NEON"
4078 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4079 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4080 )
4081
4082 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4083 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4084 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4085 "TARGET_NEON"
4086 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4087 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4088 )
4089
4090 (define_insn "neon_vcvt<sup><mode>"
4091 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4092 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4093 VCVT_US))]
4094 "TARGET_NEON"
4095 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4096 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4097 )
4098
4099 (define_insn "neon_vcvt<sup><mode>"
4100 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4101 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4102 VCVT_US))]
4103 "TARGET_NEON"
4104 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4105 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4106 )
4107
4108 (define_insn "neon_vcvtv4sfv4hf"
4109 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4110 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4111 UNSPEC_VCVT))]
4112 "TARGET_NEON && TARGET_FP16"
4113 "vcvt.f32.f16\t%q0, %P1"
4114 [(set_attr "type" "neon_fp_cvt_widen_h")]
4115 )
4116
4117 (define_insn "neon_vcvtv4hfv4sf"
4118 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4119 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4120 UNSPEC_VCVT))]
4121 "TARGET_NEON && TARGET_FP16"
4122 "vcvt.f16.f32\t%P0, %q1"
4123 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4124 )
4125
4126 (define_insn "neon_vcvt<sup><mode>"
4127 [(set
4128 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4129 (unspec:<VH_CVTTO>
4130 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4131 VCVT_US))]
4132 "TARGET_NEON_FP16INST"
4133 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4134 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4135 )
4136
4137 (define_insn "neon_vcvt<sup><mode>"
4138 [(set
4139 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4140 (unspec:<VH_CVTTO>
4141 [(match_operand:VH 1 "s_register_operand" "w")]
4142 VCVT_US))]
4143 "TARGET_NEON_FP16INST"
4144 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4145 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4146 )
4147
4148 (define_insn "neon_vcvt<sup>_n<mode>"
4149 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4150 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4151 (match_operand:SI 2 "immediate_operand" "i")]
4152 VCVT_US_N))]
4153 "TARGET_NEON"
4154 {
4155 arm_const_bounds (operands[2], 1, 33);
4156 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4157 }
4158 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4159 )
4160
4161 (define_insn "neon_vcvt<sup>_n<mode>"
4162 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4163 (unspec:<VH_CVTTO>
4164 [(match_operand:VH 1 "s_register_operand" "w")
4165 (match_operand:SI 2 "immediate_operand" "i")]
4166 VCVT_US_N))]
4167 "TARGET_NEON_FP16INST"
4168 {
4169 arm_const_bounds (operands[2], 0, 17);
4170 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4171 }
4172 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4173 )
4174
4175 (define_insn "neon_vcvt<sup>_n<mode>"
4176 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4177 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4178 (match_operand:SI 2 "immediate_operand" "i")]
4179 VCVT_US_N))]
4180 "TARGET_NEON"
4181 {
4182 arm_const_bounds (operands[2], 1, 33);
4183 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4184 }
4185 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4186 )
4187
4188 (define_insn "neon_vcvt<sup>_n<mode>"
4189 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4190 (unspec:<VH_CVTTO>
4191 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4192 (match_operand:SI 2 "immediate_operand" "i")]
4193 VCVT_US_N))]
4194 "TARGET_NEON_FP16INST"
4195 {
4196 arm_const_bounds (operands[2], 0, 17);
4197 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4198 }
4199 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4200 )
4201
4202 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4203 [(set
4204 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4205 (unspec:<VH_CVTTO>
4206 [(match_operand:VH 1 "s_register_operand" "w")]
4207 VCVT_HF_US))]
4208 "TARGET_NEON_FP16INST"
4209 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4210 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4211 )
4212
4213 (define_insn "neon_vmovn<mode>"
4214 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4215 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4216 UNSPEC_VMOVN))]
4217 "TARGET_NEON"
4218 "vmovn.<V_if_elem>\t%P0, %q1"
4219 [(set_attr "type" "neon_shift_imm_narrow_q")]
4220 )
4221
4222 (define_insn "neon_vqmovn<sup><mode>"
4223 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4224 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4225 VQMOVN))]
4226 "TARGET_NEON"
4227 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4228 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4229 )
4230
4231 (define_insn "neon_vqmovun<mode>"
4232 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4233 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4234 UNSPEC_VQMOVUN))]
4235 "TARGET_NEON"
4236 "vqmovun.<V_s_elem>\t%P0, %q1"
4237 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4238 )
4239
4240 (define_insn "neon_vmovl<sup><mode>"
4241 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4242 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4243 VMOVL))]
4244 "TARGET_NEON"
4245 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4246 [(set_attr "type" "neon_shift_imm_long")]
4247 )
4248
4249 (define_insn "neon_vmul_lane<mode>"
4250 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4251 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4252 (match_operand:VMD 2 "s_register_operand"
4253 "<scalar_mul_constraint>")
4254 (match_operand:SI 3 "immediate_operand" "i")]
4255 UNSPEC_VMUL_LANE))]
4256 "TARGET_NEON"
4257 {
4258 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4259 }
4260 [(set (attr "type")
4261 (if_then_else (match_test "<Is_float_mode>")
4262 (const_string "neon_fp_mul_s_scalar<q>")
4263 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4264 )
4265
4266 (define_insn "neon_vmul_lane<mode>"
4267 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4268 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4269 (match_operand:<V_HALF> 2 "s_register_operand"
4270 "<scalar_mul_constraint>")
4271 (match_operand:SI 3 "immediate_operand" "i")]
4272 UNSPEC_VMUL_LANE))]
4273 "TARGET_NEON"
4274 {
4275 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4276 }
4277 [(set (attr "type")
4278 (if_then_else (match_test "<Is_float_mode>")
4279 (const_string "neon_fp_mul_s_scalar<q>")
4280 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4281 )
4282
4283 (define_insn "neon_vmul_lane<mode>"
4284 [(set (match_operand:VH 0 "s_register_operand" "=w")
4285 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4286 (match_operand:V4HF 2 "s_register_operand"
4287 "<scalar_mul_constraint>")
4288 (match_operand:SI 3 "immediate_operand" "i")]
4289 UNSPEC_VMUL_LANE))]
4290 "TARGET_NEON_FP16INST"
4291 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4292 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4293 )
4294
4295 (define_insn "neon_vmull<sup>_lane<mode>"
4296 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4297 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4298 (match_operand:VMDI 2 "s_register_operand"
4299 "<scalar_mul_constraint>")
4300 (match_operand:SI 3 "immediate_operand" "i")]
4301 VMULL_LANE))]
4302 "TARGET_NEON"
4303 {
4304 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4305 }
4306 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4307 )
4308
4309 (define_insn "neon_vqdmull_lane<mode>"
4310 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4311 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4312 (match_operand:VMDI 2 "s_register_operand"
4313 "<scalar_mul_constraint>")
4314 (match_operand:SI 3 "immediate_operand" "i")]
4315 UNSPEC_VQDMULL_LANE))]
4316 "TARGET_NEON"
4317 {
4318 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4319 }
4320 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4321 )
4322
4323 (define_insn "neon_vq<r>dmulh_lane<mode>"
4324 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4325 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4326 (match_operand:<V_HALF> 2 "s_register_operand"
4327 "<scalar_mul_constraint>")
4328 (match_operand:SI 3 "immediate_operand" "i")]
4329 VQDMULH_LANE))]
4330 "TARGET_NEON"
4331 {
4332 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4333 }
4334 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4335 )
4336
4337 (define_insn "neon_vq<r>dmulh_lane<mode>"
4338 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4339 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4340 (match_operand:VMDI 2 "s_register_operand"
4341 "<scalar_mul_constraint>")
4342 (match_operand:SI 3 "immediate_operand" "i")]
4343 VQDMULH_LANE))]
4344 "TARGET_NEON"
4345 {
4346 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4347 }
4348 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4349 )
4350
4351 ;; vqrdmlah_lane, vqrdmlsh_lane
4352 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4353 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4354 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4355 (match_operand:VMQI 2 "s_register_operand" "w")
4356 (match_operand:<V_HALF> 3 "s_register_operand"
4357 "<scalar_mul_constraint>")
4358 (match_operand:SI 4 "immediate_operand" "i")]
4359 VQRDMLH_AS))]
4360 "TARGET_NEON_RDMA"
4361 {
4362 return
4363 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4364 }
4365 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4366 )
4367
4368 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4369 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4370 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4371 (match_operand:VMDI 2 "s_register_operand" "w")
4372 (match_operand:VMDI 3 "s_register_operand"
4373 "<scalar_mul_constraint>")
4374 (match_operand:SI 4 "immediate_operand" "i")]
4375 VQRDMLH_AS))]
4376 "TARGET_NEON_RDMA"
4377 {
4378 return
4379 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4380 }
4381 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4382 )
4383
4384 (define_insn "neon_vmla_lane<mode>"
4385 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4386 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4387 (match_operand:VMD 2 "s_register_operand" "w")
4388 (match_operand:VMD 3 "s_register_operand"
4389 "<scalar_mul_constraint>")
4390 (match_operand:SI 4 "immediate_operand" "i")]
4391 UNSPEC_VMLA_LANE))]
4392 "TARGET_NEON"
4393 {
4394 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4395 }
4396 [(set (attr "type")
4397 (if_then_else (match_test "<Is_float_mode>")
4398 (const_string "neon_fp_mla_s_scalar<q>")
4399 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4400 )
4401
4402 (define_insn "neon_vmla_lane<mode>"
4403 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4404 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4405 (match_operand:VMQ 2 "s_register_operand" "w")
4406 (match_operand:<V_HALF> 3 "s_register_operand"
4407 "<scalar_mul_constraint>")
4408 (match_operand:SI 4 "immediate_operand" "i")]
4409 UNSPEC_VMLA_LANE))]
4410 "TARGET_NEON"
4411 {
4412 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4413 }
4414 [(set (attr "type")
4415 (if_then_else (match_test "<Is_float_mode>")
4416 (const_string "neon_fp_mla_s_scalar<q>")
4417 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4418 )
4419
4420 (define_insn "neon_vmlal<sup>_lane<mode>"
4421 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4422 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4423 (match_operand:VMDI 2 "s_register_operand" "w")
4424 (match_operand:VMDI 3 "s_register_operand"
4425 "<scalar_mul_constraint>")
4426 (match_operand:SI 4 "immediate_operand" "i")]
4427 VMLAL_LANE))]
4428 "TARGET_NEON"
4429 {
4430 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4431 }
4432 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4433 )
4434
4435 (define_insn "neon_vqdmlal_lane<mode>"
4436 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4437 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4438 (match_operand:VMDI 2 "s_register_operand" "w")
4439 (match_operand:VMDI 3 "s_register_operand"
4440 "<scalar_mul_constraint>")
4441 (match_operand:SI 4 "immediate_operand" "i")]
4442 UNSPEC_VQDMLAL_LANE))]
4443 "TARGET_NEON"
4444 {
4445 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4446 }
4447 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4448 )
4449
4450 (define_insn "neon_vmls_lane<mode>"
4451 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4452 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4453 (match_operand:VMD 2 "s_register_operand" "w")
4454 (match_operand:VMD 3 "s_register_operand"
4455 "<scalar_mul_constraint>")
4456 (match_operand:SI 4 "immediate_operand" "i")]
4457 UNSPEC_VMLS_LANE))]
4458 "TARGET_NEON"
4459 {
4460 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4461 }
4462 [(set (attr "type")
4463 (if_then_else (match_test "<Is_float_mode>")
4464 (const_string "neon_fp_mla_s_scalar<q>")
4465 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4466 )
4467
4468 (define_insn "neon_vmls_lane<mode>"
4469 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4470 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4471 (match_operand:VMQ 2 "s_register_operand" "w")
4472 (match_operand:<V_HALF> 3 "s_register_operand"
4473 "<scalar_mul_constraint>")
4474 (match_operand:SI 4 "immediate_operand" "i")]
4475 UNSPEC_VMLS_LANE))]
4476 "TARGET_NEON"
4477 {
4478 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4479 }
4480 [(set (attr "type")
4481 (if_then_else (match_test "<Is_float_mode>")
4482 (const_string "neon_fp_mla_s_scalar<q>")
4483 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4484 )
4485
4486 (define_insn "neon_vmlsl<sup>_lane<mode>"
4487 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4488 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4489 (match_operand:VMDI 2 "s_register_operand" "w")
4490 (match_operand:VMDI 3 "s_register_operand"
4491 "<scalar_mul_constraint>")
4492 (match_operand:SI 4 "immediate_operand" "i")]
4493 VMLSL_LANE))]
4494 "TARGET_NEON"
4495 {
4496 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4497 }
4498 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4499 )
4500
4501 (define_insn "neon_vqdmlsl_lane<mode>"
4502 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4503 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4504 (match_operand:VMDI 2 "s_register_operand" "w")
4505 (match_operand:VMDI 3 "s_register_operand"
4506 "<scalar_mul_constraint>")
4507 (match_operand:SI 4 "immediate_operand" "i")]
4508 UNSPEC_VQDMLSL_LANE))]
4509 "TARGET_NEON"
4510 {
4511 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4512 }
4513 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4514 )
4515
4516 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4517 ; core register into a temp register, then use a scalar taken from that. This
4518 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4519 ; or extracted from another vector. The latter case it's currently better to
4520 ; use the "_lane" variant, and the former case can probably be implemented
4521 ; using vld1_lane, but that hasn't been done yet.
4522
4523 (define_expand "neon_vmul_n<mode>"
4524 [(match_operand:VMD 0 "s_register_operand" "")
4525 (match_operand:VMD 1 "s_register_operand" "")
4526 (match_operand:<V_elem> 2 "s_register_operand" "")]
4527 "TARGET_NEON"
4528 {
4529 rtx tmp = gen_reg_rtx (<MODE>mode);
4530 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4531 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4532 const0_rtx));
4533 DONE;
4534 })
4535
4536 (define_expand "neon_vmul_n<mode>"
4537 [(match_operand:VMQ 0 "s_register_operand" "")
4538 (match_operand:VMQ 1 "s_register_operand" "")
4539 (match_operand:<V_elem> 2 "s_register_operand" "")]
4540 "TARGET_NEON"
4541 {
4542 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4543 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4544 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4545 const0_rtx));
4546 DONE;
4547 })
4548
4549 (define_expand "neon_vmul_n<mode>"
4550 [(match_operand:VH 0 "s_register_operand")
4551 (match_operand:VH 1 "s_register_operand")
4552 (match_operand:<V_elem> 2 "s_register_operand")]
4553 "TARGET_NEON_FP16INST"
4554 {
4555 rtx tmp = gen_reg_rtx (V4HFmode);
4556 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4557 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4558 const0_rtx));
4559 DONE;
4560 })
4561
4562 (define_expand "neon_vmulls_n<mode>"
4563 [(match_operand:<V_widen> 0 "s_register_operand" "")
4564 (match_operand:VMDI 1 "s_register_operand" "")
4565 (match_operand:<V_elem> 2 "s_register_operand" "")]
4566 "TARGET_NEON"
4567 {
4568 rtx tmp = gen_reg_rtx (<MODE>mode);
4569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4570 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4571 const0_rtx));
4572 DONE;
4573 })
4574
4575 (define_expand "neon_vmullu_n<mode>"
4576 [(match_operand:<V_widen> 0 "s_register_operand" "")
4577 (match_operand:VMDI 1 "s_register_operand" "")
4578 (match_operand:<V_elem> 2 "s_register_operand" "")]
4579 "TARGET_NEON"
4580 {
4581 rtx tmp = gen_reg_rtx (<MODE>mode);
4582 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4583 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4584 const0_rtx));
4585 DONE;
4586 })
4587
4588 (define_expand "neon_vqdmull_n<mode>"
4589 [(match_operand:<V_widen> 0 "s_register_operand" "")
4590 (match_operand:VMDI 1 "s_register_operand" "")
4591 (match_operand:<V_elem> 2 "s_register_operand" "")]
4592 "TARGET_NEON"
4593 {
4594 rtx tmp = gen_reg_rtx (<MODE>mode);
4595 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4596 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4597 const0_rtx));
4598 DONE;
4599 })
4600
4601 (define_expand "neon_vqdmulh_n<mode>"
4602 [(match_operand:VMDI 0 "s_register_operand" "")
4603 (match_operand:VMDI 1 "s_register_operand" "")
4604 (match_operand:<V_elem> 2 "s_register_operand" "")]
4605 "TARGET_NEON"
4606 {
4607 rtx tmp = gen_reg_rtx (<MODE>mode);
4608 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4609 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4610 const0_rtx));
4611 DONE;
4612 })
4613
4614 (define_expand "neon_vqrdmulh_n<mode>"
4615 [(match_operand:VMDI 0 "s_register_operand" "")
4616 (match_operand:VMDI 1 "s_register_operand" "")
4617 (match_operand:<V_elem> 2 "s_register_operand" "")]
4618 "TARGET_NEON"
4619 {
4620 rtx tmp = gen_reg_rtx (<MODE>mode);
4621 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4622 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4623 const0_rtx));
4624 DONE;
4625 })
4626
4627 (define_expand "neon_vqdmulh_n<mode>"
4628 [(match_operand:VMQI 0 "s_register_operand" "")
4629 (match_operand:VMQI 1 "s_register_operand" "")
4630 (match_operand:<V_elem> 2 "s_register_operand" "")]
4631 "TARGET_NEON"
4632 {
4633 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4634 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4635 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4636 const0_rtx));
4637 DONE;
4638 })
4639
4640 (define_expand "neon_vqrdmulh_n<mode>"
4641 [(match_operand:VMQI 0 "s_register_operand" "")
4642 (match_operand:VMQI 1 "s_register_operand" "")
4643 (match_operand:<V_elem> 2 "s_register_operand" "")]
4644 "TARGET_NEON"
4645 {
4646 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4647 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4648 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4649 const0_rtx));
4650 DONE;
4651 })
4652
4653 (define_expand "neon_vmla_n<mode>"
4654 [(match_operand:VMD 0 "s_register_operand" "")
4655 (match_operand:VMD 1 "s_register_operand" "")
4656 (match_operand:VMD 2 "s_register_operand" "")
4657 (match_operand:<V_elem> 3 "s_register_operand" "")]
4658 "TARGET_NEON"
4659 {
4660 rtx tmp = gen_reg_rtx (<MODE>mode);
4661 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4662 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4663 tmp, const0_rtx));
4664 DONE;
4665 })
4666
4667 (define_expand "neon_vmla_n<mode>"
4668 [(match_operand:VMQ 0 "s_register_operand" "")
4669 (match_operand:VMQ 1 "s_register_operand" "")
4670 (match_operand:VMQ 2 "s_register_operand" "")
4671 (match_operand:<V_elem> 3 "s_register_operand" "")]
4672 "TARGET_NEON"
4673 {
4674 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4675 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4676 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4677 tmp, const0_rtx));
4678 DONE;
4679 })
4680
4681 (define_expand "neon_vmlals_n<mode>"
4682 [(match_operand:<V_widen> 0 "s_register_operand" "")
4683 (match_operand:<V_widen> 1 "s_register_operand" "")
4684 (match_operand:VMDI 2 "s_register_operand" "")
4685 (match_operand:<V_elem> 3 "s_register_operand" "")]
4686 "TARGET_NEON"
4687 {
4688 rtx tmp = gen_reg_rtx (<MODE>mode);
4689 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4690 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4691 tmp, const0_rtx));
4692 DONE;
4693 })
4694
4695 (define_expand "neon_vmlalu_n<mode>"
4696 [(match_operand:<V_widen> 0 "s_register_operand" "")
4697 (match_operand:<V_widen> 1 "s_register_operand" "")
4698 (match_operand:VMDI 2 "s_register_operand" "")
4699 (match_operand:<V_elem> 3 "s_register_operand" "")]
4700 "TARGET_NEON"
4701 {
4702 rtx tmp = gen_reg_rtx (<MODE>mode);
4703 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4704 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4705 tmp, const0_rtx));
4706 DONE;
4707 })
4708
4709 (define_expand "neon_vqdmlal_n<mode>"
4710 [(match_operand:<V_widen> 0 "s_register_operand" "")
4711 (match_operand:<V_widen> 1 "s_register_operand" "")
4712 (match_operand:VMDI 2 "s_register_operand" "")
4713 (match_operand:<V_elem> 3 "s_register_operand" "")]
4714 "TARGET_NEON"
4715 {
4716 rtx tmp = gen_reg_rtx (<MODE>mode);
4717 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4718 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4719 tmp, const0_rtx));
4720 DONE;
4721 })
4722
4723 (define_expand "neon_vmls_n<mode>"
4724 [(match_operand:VMD 0 "s_register_operand" "")
4725 (match_operand:VMD 1 "s_register_operand" "")
4726 (match_operand:VMD 2 "s_register_operand" "")
4727 (match_operand:<V_elem> 3 "s_register_operand" "")]
4728 "TARGET_NEON"
4729 {
4730 rtx tmp = gen_reg_rtx (<MODE>mode);
4731 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4732 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4733 tmp, const0_rtx));
4734 DONE;
4735 })
4736
4737 (define_expand "neon_vmls_n<mode>"
4738 [(match_operand:VMQ 0 "s_register_operand" "")
4739 (match_operand:VMQ 1 "s_register_operand" "")
4740 (match_operand:VMQ 2 "s_register_operand" "")
4741 (match_operand:<V_elem> 3 "s_register_operand" "")]
4742 "TARGET_NEON"
4743 {
4744 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4745 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4746 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4747 tmp, const0_rtx));
4748 DONE;
4749 })
4750
4751 (define_expand "neon_vmlsls_n<mode>"
4752 [(match_operand:<V_widen> 0 "s_register_operand" "")
4753 (match_operand:<V_widen> 1 "s_register_operand" "")
4754 (match_operand:VMDI 2 "s_register_operand" "")
4755 (match_operand:<V_elem> 3 "s_register_operand" "")]
4756 "TARGET_NEON"
4757 {
4758 rtx tmp = gen_reg_rtx (<MODE>mode);
4759 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4760 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4761 tmp, const0_rtx));
4762 DONE;
4763 })
4764
4765 (define_expand "neon_vmlslu_n<mode>"
4766 [(match_operand:<V_widen> 0 "s_register_operand" "")
4767 (match_operand:<V_widen> 1 "s_register_operand" "")
4768 (match_operand:VMDI 2 "s_register_operand" "")
4769 (match_operand:<V_elem> 3 "s_register_operand" "")]
4770 "TARGET_NEON"
4771 {
4772 rtx tmp = gen_reg_rtx (<MODE>mode);
4773 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4774 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4775 tmp, const0_rtx));
4776 DONE;
4777 })
4778
4779 (define_expand "neon_vqdmlsl_n<mode>"
4780 [(match_operand:<V_widen> 0 "s_register_operand" "")
4781 (match_operand:<V_widen> 1 "s_register_operand" "")
4782 (match_operand:VMDI 2 "s_register_operand" "")
4783 (match_operand:<V_elem> 3 "s_register_operand" "")]
4784 "TARGET_NEON"
4785 {
4786 rtx tmp = gen_reg_rtx (<MODE>mode);
4787 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4788 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4789 tmp, const0_rtx));
4790 DONE;
4791 })
4792
4793 (define_insn "neon_vext<mode>"
4794 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4795 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4796 (match_operand:VDQX 2 "s_register_operand" "w")
4797 (match_operand:SI 3 "immediate_operand" "i")]
4798 UNSPEC_VEXT))]
4799 "TARGET_NEON"
4800 {
4801 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4802 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4803 }
4804 [(set_attr "type" "neon_ext<q>")]
4805 )
4806
4807 (define_insn "neon_vrev64<mode>"
4808 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4809 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4810 UNSPEC_VREV64))]
4811 "TARGET_NEON"
4812 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4813 [(set_attr "type" "neon_rev<q>")]
4814 )
4815
4816 (define_insn "neon_vrev32<mode>"
4817 [(set (match_operand:VX 0 "s_register_operand" "=w")
4818 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4819 UNSPEC_VREV32))]
4820 "TARGET_NEON"
4821 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4822 [(set_attr "type" "neon_rev<q>")]
4823 )
4824
4825 (define_insn "neon_vrev16<mode>"
4826 [(set (match_operand:VE 0 "s_register_operand" "=w")
4827 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4828 UNSPEC_VREV16))]
4829 "TARGET_NEON"
4830 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4831 [(set_attr "type" "neon_rev<q>")]
4832 )
4833
4834 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4835 ; allocation. For an intrinsic of form:
4836 ; rD = vbsl_* (rS, rN, rM)
4837 ; We can use any of:
4838 ; vbsl rS, rN, rM (if D = S)
4839 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4840 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4841
4842 (define_insn "neon_vbsl<mode>_internal"
4843 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4844 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4845 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4846 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4847 UNSPEC_VBSL))]
4848 "TARGET_NEON"
4849 "@
4850 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4851 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4852 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4853 [(set_attr "type" "neon_bsl<q>")]
4854 )
4855
4856 (define_expand "neon_vbsl<mode>"
4857 [(set (match_operand:VDQX 0 "s_register_operand" "")
4858 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4859 (match_operand:VDQX 2 "s_register_operand" "")
4860 (match_operand:VDQX 3 "s_register_operand" "")]
4861 UNSPEC_VBSL))]
4862 "TARGET_NEON"
4863 {
4864 /* We can't alias operands together if they have different modes. */
4865 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4866 })
4867
4868 ;; vshl, vrshl
4869 (define_insn "neon_v<shift_op><sup><mode>"
4870 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4871 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4872 (match_operand:VDQIX 2 "s_register_operand" "w")]
4873 VSHL))]
4874 "TARGET_NEON"
4875 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4876 [(set_attr "type" "neon_shift_imm<q>")]
4877 )
4878
4879 ;; vqshl, vqrshl
4880 (define_insn "neon_v<shift_op><sup><mode>"
4881 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4882 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4883 (match_operand:VDQIX 2 "s_register_operand" "w")]
4884 VQSHL))]
4885 "TARGET_NEON"
4886 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4887 [(set_attr "type" "neon_sat_shift_imm<q>")]
4888 )
4889
4890 ;; vshr_n, vrshr_n
4891 (define_insn "neon_v<shift_op><sup>_n<mode>"
4892 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4893 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4894 (match_operand:SI 2 "immediate_operand" "i")]
4895 VSHR_N))]
4896 "TARGET_NEON"
4897 {
4898 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4899 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4900 }
4901 [(set_attr "type" "neon_shift_imm<q>")]
4902 )
4903
4904 ;; vshrn_n, vrshrn_n
4905 (define_insn "neon_v<shift_op>_n<mode>"
4906 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4907 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4908 (match_operand:SI 2 "immediate_operand" "i")]
4909 VSHRN_N))]
4910 "TARGET_NEON"
4911 {
4912 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4913 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4914 }
4915 [(set_attr "type" "neon_shift_imm_narrow_q")]
4916 )
4917
4918 ;; vqshrn_n, vqrshrn_n
4919 (define_insn "neon_v<shift_op><sup>_n<mode>"
4920 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4921 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4922 (match_operand:SI 2 "immediate_operand" "i")]
4923 VQSHRN_N))]
4924 "TARGET_NEON"
4925 {
4926 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4927 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4928 }
4929 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4930 )
4931
4932 ;; vqshrun_n, vqrshrun_n
4933 (define_insn "neon_v<shift_op>_n<mode>"
4934 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4935 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4936 (match_operand:SI 2 "immediate_operand" "i")]
4937 VQSHRUN_N))]
4938 "TARGET_NEON"
4939 {
4940 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4941 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4942 }
4943 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4944 )
4945
4946 (define_insn "neon_vshl_n<mode>"
4947 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4948 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4949 (match_operand:SI 2 "immediate_operand" "i")]
4950 UNSPEC_VSHL_N))]
4951 "TARGET_NEON"
4952 {
4953 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4954 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4955 }
4956 [(set_attr "type" "neon_shift_imm<q>")]
4957 )
4958
4959 (define_insn "neon_vqshl_<sup>_n<mode>"
4960 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4961 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4962 (match_operand:SI 2 "immediate_operand" "i")]
4963 VQSHL_N))]
4964 "TARGET_NEON"
4965 {
4966 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4967 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4968 }
4969 [(set_attr "type" "neon_sat_shift_imm<q>")]
4970 )
4971
4972 (define_insn "neon_vqshlu_n<mode>"
4973 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4974 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4975 (match_operand:SI 2 "immediate_operand" "i")]
4976 UNSPEC_VQSHLU_N))]
4977 "TARGET_NEON"
4978 {
4979 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4980 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4981 }
4982 [(set_attr "type" "neon_sat_shift_imm<q>")]
4983 )
4984
4985 (define_insn "neon_vshll<sup>_n<mode>"
4986 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4987 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4988 (match_operand:SI 2 "immediate_operand" "i")]
4989 VSHLL_N))]
4990 "TARGET_NEON"
4991 {
4992 /* The boundaries are: 0 < imm <= size. */
4993 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4994 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4995 }
4996 [(set_attr "type" "neon_shift_imm_long")]
4997 )
4998
4999 ;; vsra_n, vrsra_n
5000 (define_insn "neon_v<shift_op><sup>_n<mode>"
5001 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5002 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5003 (match_operand:VDQIX 2 "s_register_operand" "w")
5004 (match_operand:SI 3 "immediate_operand" "i")]
5005 VSRA_N))]
5006 "TARGET_NEON"
5007 {
5008 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5009 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5010 }
5011 [(set_attr "type" "neon_shift_acc<q>")]
5012 )
5013
5014 (define_insn "neon_vsri_n<mode>"
5015 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5016 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5017 (match_operand:VDQIX 2 "s_register_operand" "w")
5018 (match_operand:SI 3 "immediate_operand" "i")]
5019 UNSPEC_VSRI))]
5020 "TARGET_NEON"
5021 {
5022 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5023 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5024 }
5025 [(set_attr "type" "neon_shift_reg<q>")]
5026 )
5027
5028 (define_insn "neon_vsli_n<mode>"
5029 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5030 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5031 (match_operand:VDQIX 2 "s_register_operand" "w")
5032 (match_operand:SI 3 "immediate_operand" "i")]
5033 UNSPEC_VSLI))]
5034 "TARGET_NEON"
5035 {
5036 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5037 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5038 }
5039 [(set_attr "type" "neon_shift_reg<q>")]
5040 )
5041
5042 (define_insn "neon_vtbl1v8qi"
5043 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5044 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5045 (match_operand:V8QI 2 "s_register_operand" "w")]
5046 UNSPEC_VTBL))]
5047 "TARGET_NEON"
5048 "vtbl.8\t%P0, {%P1}, %P2"
5049 [(set_attr "type" "neon_tbl1")]
5050 )
5051
5052 (define_insn "neon_vtbl2v8qi"
5053 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5054 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5055 (match_operand:V8QI 2 "s_register_operand" "w")]
5056 UNSPEC_VTBL))]
5057 "TARGET_NEON"
5058 {
5059 rtx ops[4];
5060 int tabbase = REGNO (operands[1]);
5061
5062 ops[0] = operands[0];
5063 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5064 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5065 ops[3] = operands[2];
5066 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5067
5068 return "";
5069 }
5070 [(set_attr "type" "neon_tbl2")]
5071 )
5072
5073 (define_insn "neon_vtbl3v8qi"
5074 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5075 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5076 (match_operand:V8QI 2 "s_register_operand" "w")]
5077 UNSPEC_VTBL))]
5078 "TARGET_NEON"
5079 {
5080 rtx ops[5];
5081 int tabbase = REGNO (operands[1]);
5082
5083 ops[0] = operands[0];
5084 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5085 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5086 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5087 ops[4] = operands[2];
5088 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5089
5090 return "";
5091 }
5092 [(set_attr "type" "neon_tbl3")]
5093 )
5094
5095 (define_insn "neon_vtbl4v8qi"
5096 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5097 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5098 (match_operand:V8QI 2 "s_register_operand" "w")]
5099 UNSPEC_VTBL))]
5100 "TARGET_NEON"
5101 {
5102 rtx ops[6];
5103 int tabbase = REGNO (operands[1]);
5104
5105 ops[0] = operands[0];
5106 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5107 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5108 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5109 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5110 ops[5] = operands[2];
5111 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5112
5113 return "";
5114 }
5115 [(set_attr "type" "neon_tbl4")]
5116 )
5117
5118 ;; These three are used by the vec_perm infrastructure for V16QImode.
5119 (define_insn_and_split "neon_vtbl1v16qi"
5120 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5121 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5122 (match_operand:V16QI 2 "s_register_operand" "w")]
5123 UNSPEC_VTBL))]
5124 "TARGET_NEON"
5125 "#"
5126 "&& reload_completed"
5127 [(const_int 0)]
5128 {
5129 rtx op0, op1, op2, part0, part2;
5130 unsigned ofs;
5131
5132 op0 = operands[0];
5133 op1 = gen_lowpart (TImode, operands[1]);
5134 op2 = operands[2];
5135
5136 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5137 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5138 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5139 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5140
5141 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5142 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5143 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5144 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5145 DONE;
5146 }
5147 [(set_attr "type" "multiple")]
5148 )
5149
5150 (define_insn_and_split "neon_vtbl2v16qi"
5151 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5152 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5153 (match_operand:V16QI 2 "s_register_operand" "w")]
5154 UNSPEC_VTBL))]
5155 "TARGET_NEON"
5156 "#"
5157 "&& reload_completed"
5158 [(const_int 0)]
5159 {
5160 rtx op0, op1, op2, part0, part2;
5161 unsigned ofs;
5162
5163 op0 = operands[0];
5164 op1 = operands[1];
5165 op2 = operands[2];
5166
5167 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5168 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5169 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5170 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5171
5172 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5173 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5174 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5175 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5176 DONE;
5177 }
5178 [(set_attr "type" "multiple")]
5179 )
5180
5181 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5182 ;; handle quad-word input modes, producing octa-word output modes. But
5183 ;; that requires us to add support for octa-word vector modes in moves.
5184 ;; That seems overkill for this one use in vec_perm.
5185 (define_insn_and_split "neon_vcombinev16qi"
5186 [(set (match_operand:OI 0 "s_register_operand" "=w")
5187 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5188 (match_operand:V16QI 2 "s_register_operand" "w")]
5189 UNSPEC_VCONCAT))]
5190 "TARGET_NEON"
5191 "#"
5192 "&& reload_completed"
5193 [(const_int 0)]
5194 {
5195 neon_split_vcombine (operands);
5196 DONE;
5197 }
5198 [(set_attr "type" "multiple")]
5199 )
5200
5201 (define_insn "neon_vtbx1v8qi"
5202 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5203 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5204 (match_operand:V8QI 2 "s_register_operand" "w")
5205 (match_operand:V8QI 3 "s_register_operand" "w")]
5206 UNSPEC_VTBX))]
5207 "TARGET_NEON"
5208 "vtbx.8\t%P0, {%P2}, %P3"
5209 [(set_attr "type" "neon_tbl1")]
5210 )
5211
5212 (define_insn "neon_vtbx2v8qi"
5213 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5214 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5215 (match_operand:TI 2 "s_register_operand" "w")
5216 (match_operand:V8QI 3 "s_register_operand" "w")]
5217 UNSPEC_VTBX))]
5218 "TARGET_NEON"
5219 {
5220 rtx ops[4];
5221 int tabbase = REGNO (operands[2]);
5222
5223 ops[0] = operands[0];
5224 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5225 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5226 ops[3] = operands[3];
5227 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5228
5229 return "";
5230 }
5231 [(set_attr "type" "neon_tbl2")]
5232 )
5233
5234 (define_insn "neon_vtbx3v8qi"
5235 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5236 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5237 (match_operand:EI 2 "s_register_operand" "w")
5238 (match_operand:V8QI 3 "s_register_operand" "w")]
5239 UNSPEC_VTBX))]
5240 "TARGET_NEON"
5241 {
5242 rtx ops[5];
5243 int tabbase = REGNO (operands[2]);
5244
5245 ops[0] = operands[0];
5246 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5247 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5248 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5249 ops[4] = operands[3];
5250 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5251
5252 return "";
5253 }
5254 [(set_attr "type" "neon_tbl3")]
5255 )
5256
5257 (define_insn "neon_vtbx4v8qi"
5258 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5259 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5260 (match_operand:OI 2 "s_register_operand" "w")
5261 (match_operand:V8QI 3 "s_register_operand" "w")]
5262 UNSPEC_VTBX))]
5263 "TARGET_NEON"
5264 {
5265 rtx ops[6];
5266 int tabbase = REGNO (operands[2]);
5267
5268 ops[0] = operands[0];
5269 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5270 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5271 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5272 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5273 ops[5] = operands[3];
5274 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5275
5276 return "";
5277 }
5278 [(set_attr "type" "neon_tbl4")]
5279 )
5280
5281 (define_expand "neon_vtrn<mode>_internal"
5282 [(parallel
5283 [(set (match_operand:VDQWH 0 "s_register_operand")
5284 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5285 (match_operand:VDQWH 2 "s_register_operand")]
5286 UNSPEC_VTRN1))
5287 (set (match_operand:VDQWH 3 "s_register_operand")
5288 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5289 "TARGET_NEON"
5290 ""
5291 )
5292
5293 ;; Note: Different operand numbering to handle tied registers correctly.
5294 (define_insn "*neon_vtrn<mode>_insn"
5295 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5296 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5297 (match_operand:VDQWH 3 "s_register_operand" "2")]
5298 UNSPEC_VTRN1))
5299 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5300 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5301 UNSPEC_VTRN2))]
5302 "TARGET_NEON"
5303 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5304 [(set_attr "type" "neon_permute<q>")]
5305 )
5306
5307 (define_expand "neon_vzip<mode>_internal"
5308 [(parallel
5309 [(set (match_operand:VDQWH 0 "s_register_operand")
5310 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5311 (match_operand:VDQWH 2 "s_register_operand")]
5312 UNSPEC_VZIP1))
5313 (set (match_operand:VDQWH 3 "s_register_operand")
5314 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5315 "TARGET_NEON"
5316 ""
5317 )
5318
5319 ;; Note: Different operand numbering to handle tied registers correctly.
5320 (define_insn "*neon_vzip<mode>_insn"
5321 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5322 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5323 (match_operand:VDQWH 3 "s_register_operand" "2")]
5324 UNSPEC_VZIP1))
5325 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5326 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5327 UNSPEC_VZIP2))]
5328 "TARGET_NEON"
5329 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5330 [(set_attr "type" "neon_zip<q>")]
5331 )
5332
5333 (define_expand "neon_vuzp<mode>_internal"
5334 [(parallel
5335 [(set (match_operand:VDQWH 0 "s_register_operand")
5336 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5337 (match_operand:VDQWH 2 "s_register_operand")]
5338 UNSPEC_VUZP1))
5339 (set (match_operand:VDQWH 3 "s_register_operand" "")
5340 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5341 "TARGET_NEON"
5342 ""
5343 )
5344
5345 ;; Note: Different operand numbering to handle tied registers correctly.
5346 (define_insn "*neon_vuzp<mode>_insn"
5347 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5348 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5349 (match_operand:VDQWH 3 "s_register_operand" "2")]
5350 UNSPEC_VUZP1))
5351 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5352 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5353 UNSPEC_VUZP2))]
5354 "TARGET_NEON"
5355 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5356 [(set_attr "type" "neon_zip<q>")]
5357 )
5358
5359 (define_expand "vec_load_lanes<mode><mode>"
5360 [(set (match_operand:VDQX 0 "s_register_operand")
5361 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5362 UNSPEC_VLD1))]
5363 "TARGET_NEON")
5364
5365 (define_insn "neon_vld1<mode>"
5366 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5367 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5368 UNSPEC_VLD1))]
5369 "TARGET_NEON"
5370 "vld1.<V_sz_elem>\t%h0, %A1"
5371 [(set_attr "type" "neon_load1_1reg<q>")]
5372 )
5373
5374 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5375 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5376 ;; lane order here.
5377 (define_insn "neon_vld1_lane<mode>"
5378 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5379 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5380 (match_operand:VDX 2 "s_register_operand" "0")
5381 (match_operand:SI 3 "immediate_operand" "i")]
5382 UNSPEC_VLD1_LANE))]
5383 "TARGET_NEON"
5384 {
5385 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5386 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5387 operands[3] = GEN_INT (lane);
5388 if (max == 1)
5389 return "vld1.<V_sz_elem>\t%P0, %A1";
5390 else
5391 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5392 }
5393 [(set_attr "type" "neon_load1_one_lane<q>")]
5394 )
5395
5396 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5397 ;; here on big endian targets.
5398 (define_insn "neon_vld1_lane<mode>"
5399 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5400 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5401 (match_operand:VQX 2 "s_register_operand" "0")
5402 (match_operand:SI 3 "immediate_operand" "i")]
5403 UNSPEC_VLD1_LANE))]
5404 "TARGET_NEON"
5405 {
5406 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5407 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5408 operands[3] = GEN_INT (lane);
5409 int regno = REGNO (operands[0]);
5410 if (lane >= max / 2)
5411 {
5412 lane -= max / 2;
5413 regno += 2;
5414 operands[3] = GEN_INT (lane);
5415 }
5416 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5417 if (max == 2)
5418 return "vld1.<V_sz_elem>\t%P0, %A1";
5419 else
5420 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5421 }
5422 [(set_attr "type" "neon_load1_one_lane<q>")]
5423 )
5424
5425 (define_insn "neon_vld1_dup<mode>"
5426 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5427 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5428 "TARGET_NEON"
5429 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5430 [(set_attr "type" "neon_load1_all_lanes<q>")]
5431 )
5432
5433 ;; Special case for DImode. Treat it exactly like a simple load.
5434 (define_expand "neon_vld1_dupdi"
5435 [(set (match_operand:DI 0 "s_register_operand" "")
5436 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5437 UNSPEC_VLD1))]
5438 "TARGET_NEON"
5439 ""
5440 )
5441
5442 (define_insn "neon_vld1_dup<mode>"
5443 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5444 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5445 "TARGET_NEON"
5446 {
5447 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5448 }
5449 [(set_attr "type" "neon_load1_all_lanes<q>")]
5450 )
5451
5452 (define_insn_and_split "neon_vld1_dupv2di"
5453 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5454 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5455 "TARGET_NEON"
5456 "#"
5457 "&& reload_completed"
5458 [(const_int 0)]
5459 {
5460 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5461 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5462 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5463 DONE;
5464 }
5465 [(set_attr "length" "8")
5466 (set_attr "type" "neon_load1_all_lanes_q")]
5467 )
5468
5469 (define_expand "vec_store_lanes<mode><mode>"
5470 [(set (match_operand:VDQX 0 "neon_struct_operand")
5471 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5472 UNSPEC_VST1))]
5473 "TARGET_NEON")
5474
5475 (define_insn "neon_vst1<mode>"
5476 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5477 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5478 UNSPEC_VST1))]
5479 "TARGET_NEON"
5480 "vst1.<V_sz_elem>\t%h1, %A0"
5481 [(set_attr "type" "neon_store1_1reg<q>")])
5482
5483 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5484 ;; here on big endian targets.
5485 (define_insn "neon_vst1_lane<mode>"
5486 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5487 (unspec:<V_elem>
5488 [(match_operand:VDX 1 "s_register_operand" "w")
5489 (match_operand:SI 2 "immediate_operand" "i")]
5490 UNSPEC_VST1_LANE))]
5491 "TARGET_NEON"
5492 {
5493 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5494 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5495 operands[2] = GEN_INT (lane);
5496 if (max == 1)
5497 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5498 else
5499 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5500 }
5501 [(set_attr "type" "neon_store1_one_lane<q>")]
5502 )
5503
5504 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5505 ;; here on big endian targets.
5506 (define_insn "neon_vst1_lane<mode>"
5507 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5508 (unspec:<V_elem>
5509 [(match_operand:VQX 1 "s_register_operand" "w")
5510 (match_operand:SI 2 "immediate_operand" "i")]
5511 UNSPEC_VST1_LANE))]
5512 "TARGET_NEON"
5513 {
5514 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5515 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5516 int regno = REGNO (operands[1]);
5517 if (lane >= max / 2)
5518 {
5519 lane -= max / 2;
5520 regno += 2;
5521 }
5522 operands[2] = GEN_INT (lane);
5523 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5524 if (max == 2)
5525 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5526 else
5527 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5528 }
5529 [(set_attr "type" "neon_store1_one_lane<q>")]
5530 )
5531
5532 (define_expand "vec_load_lanesti<mode>"
5533 [(set (match_operand:TI 0 "s_register_operand")
5534 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5535 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5536 UNSPEC_VLD2))]
5537 "TARGET_NEON")
5538
5539 (define_insn "neon_vld2<mode>"
5540 [(set (match_operand:TI 0 "s_register_operand" "=w")
5541 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5542 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5543 UNSPEC_VLD2))]
5544 "TARGET_NEON"
5545 {
5546 if (<V_sz_elem> == 64)
5547 return "vld1.64\t%h0, %A1";
5548 else
5549 return "vld2.<V_sz_elem>\t%h0, %A1";
5550 }
5551 [(set (attr "type")
5552 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5553 (const_string "neon_load1_2reg<q>")
5554 (const_string "neon_load2_2reg<q>")))]
5555 )
5556
5557 (define_expand "vec_load_lanesoi<mode>"
5558 [(set (match_operand:OI 0 "s_register_operand")
5559 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5560 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5561 UNSPEC_VLD2))]
5562 "TARGET_NEON")
5563
5564 (define_insn "neon_vld2<mode>"
5565 [(set (match_operand:OI 0 "s_register_operand" "=w")
5566 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5567 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568 UNSPEC_VLD2))]
5569 "TARGET_NEON"
5570 "vld2.<V_sz_elem>\t%h0, %A1"
5571 [(set_attr "type" "neon_load2_2reg_q")])
5572
5573 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5574 ;; here on big endian targets.
5575 (define_insn "neon_vld2_lane<mode>"
5576 [(set (match_operand:TI 0 "s_register_operand" "=w")
5577 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5578 (match_operand:TI 2 "s_register_operand" "0")
5579 (match_operand:SI 3 "immediate_operand" "i")
5580 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581 UNSPEC_VLD2_LANE))]
5582 "TARGET_NEON"
5583 {
5584 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5585 int regno = REGNO (operands[0]);
5586 rtx ops[4];
5587 ops[0] = gen_rtx_REG (DImode, regno);
5588 ops[1] = gen_rtx_REG (DImode, regno + 2);
5589 ops[2] = operands[1];
5590 ops[3] = GEN_INT (lane);
5591 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5592 return "";
5593 }
5594 [(set_attr "type" "neon_load2_one_lane<q>")]
5595 )
5596
5597 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5598 ;; here on big endian targets.
5599 (define_insn "neon_vld2_lane<mode>"
5600 [(set (match_operand:OI 0 "s_register_operand" "=w")
5601 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5602 (match_operand:OI 2 "s_register_operand" "0")
5603 (match_operand:SI 3 "immediate_operand" "i")
5604 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5605 UNSPEC_VLD2_LANE))]
5606 "TARGET_NEON"
5607 {
5608 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5609 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5610 int regno = REGNO (operands[0]);
5611 rtx ops[4];
5612 if (lane >= max / 2)
5613 {
5614 lane -= max / 2;
5615 regno += 2;
5616 }
5617 ops[0] = gen_rtx_REG (DImode, regno);
5618 ops[1] = gen_rtx_REG (DImode, regno + 4);
5619 ops[2] = operands[1];
5620 ops[3] = GEN_INT (lane);
5621 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5622 return "";
5623 }
5624 [(set_attr "type" "neon_load2_one_lane<q>")]
5625 )
5626
5627 (define_insn "neon_vld2_dup<mode>"
5628 [(set (match_operand:TI 0 "s_register_operand" "=w")
5629 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5630 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5631 UNSPEC_VLD2_DUP))]
5632 "TARGET_NEON"
5633 {
5634 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5635 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5636 else
5637 return "vld1.<V_sz_elem>\t%h0, %A1";
5638 }
5639 [(set (attr "type")
5640 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5641 (const_string "neon_load2_all_lanes<q>")
5642 (const_string "neon_load1_1reg<q>")))]
5643 )
5644
5645 (define_expand "vec_store_lanesti<mode>"
5646 [(set (match_operand:TI 0 "neon_struct_operand")
5647 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5648 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5649 UNSPEC_VST2))]
5650 "TARGET_NEON")
5651
5652 (define_insn "neon_vst2<mode>"
5653 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5654 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5655 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5656 UNSPEC_VST2))]
5657 "TARGET_NEON"
5658 {
5659 if (<V_sz_elem> == 64)
5660 return "vst1.64\t%h1, %A0";
5661 else
5662 return "vst2.<V_sz_elem>\t%h1, %A0";
5663 }
5664 [(set (attr "type")
5665 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5666 (const_string "neon_store1_2reg<q>")
5667 (const_string "neon_store2_one_lane<q>")))]
5668 )
5669
5670 (define_expand "vec_store_lanesoi<mode>"
5671 [(set (match_operand:OI 0 "neon_struct_operand")
5672 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5673 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5674 UNSPEC_VST2))]
5675 "TARGET_NEON")
5676
5677 (define_insn "neon_vst2<mode>"
5678 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5679 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5680 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5681 UNSPEC_VST2))]
5682 "TARGET_NEON"
5683 "vst2.<V_sz_elem>\t%h1, %A0"
5684 [(set_attr "type" "neon_store2_4reg<q>")]
5685 )
5686
5687 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5688 ;; here on big endian targets.
5689 (define_insn "neon_vst2_lane<mode>"
5690 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5691 (unspec:<V_two_elem>
5692 [(match_operand:TI 1 "s_register_operand" "w")
5693 (match_operand:SI 2 "immediate_operand" "i")
5694 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5695 UNSPEC_VST2_LANE))]
5696 "TARGET_NEON"
5697 {
5698 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5699 int regno = REGNO (operands[1]);
5700 rtx ops[4];
5701 ops[0] = operands[0];
5702 ops[1] = gen_rtx_REG (DImode, regno);
5703 ops[2] = gen_rtx_REG (DImode, regno + 2);
5704 ops[3] = GEN_INT (lane);
5705 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5706 return "";
5707 }
5708 [(set_attr "type" "neon_store2_one_lane<q>")]
5709 )
5710
5711 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5712 ;; here on big endian targets.
5713 (define_insn "neon_vst2_lane<mode>"
5714 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5715 (unspec:<V_two_elem>
5716 [(match_operand:OI 1 "s_register_operand" "w")
5717 (match_operand:SI 2 "immediate_operand" "i")
5718 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719 UNSPEC_VST2_LANE))]
5720 "TARGET_NEON"
5721 {
5722 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5723 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5724 int regno = REGNO (operands[1]);
5725 rtx ops[4];
5726 if (lane >= max / 2)
5727 {
5728 lane -= max / 2;
5729 regno += 2;
5730 }
5731 ops[0] = operands[0];
5732 ops[1] = gen_rtx_REG (DImode, regno);
5733 ops[2] = gen_rtx_REG (DImode, regno + 4);
5734 ops[3] = GEN_INT (lane);
5735 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5736 return "";
5737 }
5738 [(set_attr "type" "neon_store2_one_lane<q>")]
5739 )
5740
5741 (define_expand "vec_load_lanesei<mode>"
5742 [(set (match_operand:EI 0 "s_register_operand")
5743 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5744 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5745 UNSPEC_VLD3))]
5746 "TARGET_NEON")
5747
5748 (define_insn "neon_vld3<mode>"
5749 [(set (match_operand:EI 0 "s_register_operand" "=w")
5750 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5751 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5752 UNSPEC_VLD3))]
5753 "TARGET_NEON"
5754 {
5755 if (<V_sz_elem> == 64)
5756 return "vld1.64\t%h0, %A1";
5757 else
5758 return "vld3.<V_sz_elem>\t%h0, %A1";
5759 }
5760 [(set (attr "type")
5761 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5762 (const_string "neon_load1_3reg<q>")
5763 (const_string "neon_load3_3reg<q>")))]
5764 )
5765
5766 (define_expand "vec_load_lanesci<mode>"
5767 [(match_operand:CI 0 "s_register_operand")
5768 (match_operand:CI 1 "neon_struct_operand")
5769 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5770 "TARGET_NEON"
5771 {
5772 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5773 DONE;
5774 })
5775
5776 (define_expand "neon_vld3<mode>"
5777 [(match_operand:CI 0 "s_register_operand")
5778 (match_operand:CI 1 "neon_struct_operand")
5779 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5780 "TARGET_NEON"
5781 {
5782 rtx mem;
5783
5784 mem = adjust_address (operands[1], EImode, 0);
5785 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5786 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5787 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5788 DONE;
5789 })
5790
5791 (define_insn "neon_vld3qa<mode>"
5792 [(set (match_operand:CI 0 "s_register_operand" "=w")
5793 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5794 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5795 UNSPEC_VLD3A))]
5796 "TARGET_NEON"
5797 {
5798 int regno = REGNO (operands[0]);
5799 rtx ops[4];
5800 ops[0] = gen_rtx_REG (DImode, regno);
5801 ops[1] = gen_rtx_REG (DImode, regno + 4);
5802 ops[2] = gen_rtx_REG (DImode, regno + 8);
5803 ops[3] = operands[1];
5804 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5805 return "";
5806 }
5807 [(set_attr "type" "neon_load3_3reg<q>")]
5808 )
5809
5810 (define_insn "neon_vld3qb<mode>"
5811 [(set (match_operand:CI 0 "s_register_operand" "=w")
5812 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5813 (match_operand:CI 2 "s_register_operand" "0")
5814 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5815 UNSPEC_VLD3B))]
5816 "TARGET_NEON"
5817 {
5818 int regno = REGNO (operands[0]);
5819 rtx ops[4];
5820 ops[0] = gen_rtx_REG (DImode, regno + 2);
5821 ops[1] = gen_rtx_REG (DImode, regno + 6);
5822 ops[2] = gen_rtx_REG (DImode, regno + 10);
5823 ops[3] = operands[1];
5824 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5825 return "";
5826 }
5827 [(set_attr "type" "neon_load3_3reg<q>")]
5828 )
5829
5830 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5831 ;; here on big endian targets.
5832 (define_insn "neon_vld3_lane<mode>"
5833 [(set (match_operand:EI 0 "s_register_operand" "=w")
5834 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5835 (match_operand:EI 2 "s_register_operand" "0")
5836 (match_operand:SI 3 "immediate_operand" "i")
5837 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5838 UNSPEC_VLD3_LANE))]
5839 "TARGET_NEON"
5840 {
5841 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5842 int regno = REGNO (operands[0]);
5843 rtx ops[5];
5844 ops[0] = gen_rtx_REG (DImode, regno);
5845 ops[1] = gen_rtx_REG (DImode, regno + 2);
5846 ops[2] = gen_rtx_REG (DImode, regno + 4);
5847 ops[3] = operands[1];
5848 ops[4] = GEN_INT (lane);
5849 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5850 ops);
5851 return "";
5852 }
5853 [(set_attr "type" "neon_load3_one_lane<q>")]
5854 )
5855
5856 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5857 ;; here on big endian targets.
5858 (define_insn "neon_vld3_lane<mode>"
5859 [(set (match_operand:CI 0 "s_register_operand" "=w")
5860 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5861 (match_operand:CI 2 "s_register_operand" "0")
5862 (match_operand:SI 3 "immediate_operand" "i")
5863 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5864 UNSPEC_VLD3_LANE))]
5865 "TARGET_NEON"
5866 {
5867 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5868 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5869 int regno = REGNO (operands[0]);
5870 rtx ops[5];
5871 if (lane >= max / 2)
5872 {
5873 lane -= max / 2;
5874 regno += 2;
5875 }
5876 ops[0] = gen_rtx_REG (DImode, regno);
5877 ops[1] = gen_rtx_REG (DImode, regno + 4);
5878 ops[2] = gen_rtx_REG (DImode, regno + 8);
5879 ops[3] = operands[1];
5880 ops[4] = GEN_INT (lane);
5881 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5882 ops);
5883 return "";
5884 }
5885 [(set_attr "type" "neon_load3_one_lane<q>")]
5886 )
5887
5888 (define_insn "neon_vld3_dup<mode>"
5889 [(set (match_operand:EI 0 "s_register_operand" "=w")
5890 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5891 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5892 UNSPEC_VLD3_DUP))]
5893 "TARGET_NEON"
5894 {
5895 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5896 {
5897 int regno = REGNO (operands[0]);
5898 rtx ops[4];
5899 ops[0] = gen_rtx_REG (DImode, regno);
5900 ops[1] = gen_rtx_REG (DImode, regno + 2);
5901 ops[2] = gen_rtx_REG (DImode, regno + 4);
5902 ops[3] = operands[1];
5903 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5904 return "";
5905 }
5906 else
5907 return "vld1.<V_sz_elem>\t%h0, %A1";
5908 }
5909 [(set (attr "type")
5910 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5911 (const_string "neon_load3_all_lanes<q>")
5912 (const_string "neon_load1_1reg<q>")))])
5913
5914 (define_expand "vec_store_lanesei<mode>"
5915 [(set (match_operand:EI 0 "neon_struct_operand")
5916 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5917 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5918 UNSPEC_VST3))]
5919 "TARGET_NEON")
5920
5921 (define_insn "neon_vst3<mode>"
5922 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5923 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5924 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5925 UNSPEC_VST3))]
5926 "TARGET_NEON"
5927 {
5928 if (<V_sz_elem> == 64)
5929 return "vst1.64\t%h1, %A0";
5930 else
5931 return "vst3.<V_sz_elem>\t%h1, %A0";
5932 }
5933 [(set (attr "type")
5934 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5935 (const_string "neon_store1_3reg<q>")
5936 (const_string "neon_store3_one_lane<q>")))])
5937
5938 (define_expand "vec_store_lanesci<mode>"
5939 [(match_operand:CI 0 "neon_struct_operand")
5940 (match_operand:CI 1 "s_register_operand")
5941 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5942 "TARGET_NEON"
5943 {
5944 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5945 DONE;
5946 })
5947
5948 (define_expand "neon_vst3<mode>"
5949 [(match_operand:CI 0 "neon_struct_operand")
5950 (match_operand:CI 1 "s_register_operand")
5951 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5952 "TARGET_NEON"
5953 {
5954 rtx mem;
5955
5956 mem = adjust_address (operands[0], EImode, 0);
5957 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5958 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5959 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5960 DONE;
5961 })
5962
5963 (define_insn "neon_vst3qa<mode>"
5964 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5965 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5966 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5967 UNSPEC_VST3A))]
5968 "TARGET_NEON"
5969 {
5970 int regno = REGNO (operands[1]);
5971 rtx ops[4];
5972 ops[0] = operands[0];
5973 ops[1] = gen_rtx_REG (DImode, regno);
5974 ops[2] = gen_rtx_REG (DImode, regno + 4);
5975 ops[3] = gen_rtx_REG (DImode, regno + 8);
5976 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5977 return "";
5978 }
5979 [(set_attr "type" "neon_store3_3reg<q>")]
5980 )
5981
5982 (define_insn "neon_vst3qb<mode>"
5983 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5984 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5985 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5986 UNSPEC_VST3B))]
5987 "TARGET_NEON"
5988 {
5989 int regno = REGNO (operands[1]);
5990 rtx ops[4];
5991 ops[0] = operands[0];
5992 ops[1] = gen_rtx_REG (DImode, regno + 2);
5993 ops[2] = gen_rtx_REG (DImode, regno + 6);
5994 ops[3] = gen_rtx_REG (DImode, regno + 10);
5995 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5996 return "";
5997 }
5998 [(set_attr "type" "neon_store3_3reg<q>")]
5999 )
6000
6001 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6002 ;; here on big endian targets.
6003 (define_insn "neon_vst3_lane<mode>"
6004 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6005 (unspec:<V_three_elem>
6006 [(match_operand:EI 1 "s_register_operand" "w")
6007 (match_operand:SI 2 "immediate_operand" "i")
6008 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6009 UNSPEC_VST3_LANE))]
6010 "TARGET_NEON"
6011 {
6012 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6013 int regno = REGNO (operands[1]);
6014 rtx ops[5];
6015 ops[0] = operands[0];
6016 ops[1] = gen_rtx_REG (DImode, regno);
6017 ops[2] = gen_rtx_REG (DImode, regno + 2);
6018 ops[3] = gen_rtx_REG (DImode, regno + 4);
6019 ops[4] = GEN_INT (lane);
6020 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6021 ops);
6022 return "";
6023 }
6024 [(set_attr "type" "neon_store3_one_lane<q>")]
6025 )
6026
6027 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6028 ;; here on big endian targets.
6029 (define_insn "neon_vst3_lane<mode>"
6030 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6031 (unspec:<V_three_elem>
6032 [(match_operand:CI 1 "s_register_operand" "w")
6033 (match_operand:SI 2 "immediate_operand" "i")
6034 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6035 UNSPEC_VST3_LANE))]
6036 "TARGET_NEON"
6037 {
6038 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6039 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6040 int regno = REGNO (operands[1]);
6041 rtx ops[5];
6042 if (lane >= max / 2)
6043 {
6044 lane -= max / 2;
6045 regno += 2;
6046 }
6047 ops[0] = operands[0];
6048 ops[1] = gen_rtx_REG (DImode, regno);
6049 ops[2] = gen_rtx_REG (DImode, regno + 4);
6050 ops[3] = gen_rtx_REG (DImode, regno + 8);
6051 ops[4] = GEN_INT (lane);
6052 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6053 ops);
6054 return "";
6055 }
6056 [(set_attr "type" "neon_store3_one_lane<q>")]
6057 )
6058
6059 (define_expand "vec_load_lanesoi<mode>"
6060 [(set (match_operand:OI 0 "s_register_operand")
6061 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6062 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6063 UNSPEC_VLD4))]
6064 "TARGET_NEON")
6065
6066 (define_insn "neon_vld4<mode>"
6067 [(set (match_operand:OI 0 "s_register_operand" "=w")
6068 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6069 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6070 UNSPEC_VLD4))]
6071 "TARGET_NEON"
6072 {
6073 if (<V_sz_elem> == 64)
6074 return "vld1.64\t%h0, %A1";
6075 else
6076 return "vld4.<V_sz_elem>\t%h0, %A1";
6077 }
6078 [(set (attr "type")
6079 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6080 (const_string "neon_load1_4reg<q>")
6081 (const_string "neon_load4_4reg<q>")))]
6082 )
6083
6084 (define_expand "vec_load_lanesxi<mode>"
6085 [(match_operand:XI 0 "s_register_operand")
6086 (match_operand:XI 1 "neon_struct_operand")
6087 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6088 "TARGET_NEON"
6089 {
6090 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6091 DONE;
6092 })
6093
6094 (define_expand "neon_vld4<mode>"
6095 [(match_operand:XI 0 "s_register_operand")
6096 (match_operand:XI 1 "neon_struct_operand")
6097 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6098 "TARGET_NEON"
6099 {
6100 rtx mem;
6101
6102 mem = adjust_address (operands[1], OImode, 0);
6103 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6104 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6105 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6106 DONE;
6107 })
6108
6109 (define_insn "neon_vld4qa<mode>"
6110 [(set (match_operand:XI 0 "s_register_operand" "=w")
6111 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6112 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6113 UNSPEC_VLD4A))]
6114 "TARGET_NEON"
6115 {
6116 int regno = REGNO (operands[0]);
6117 rtx ops[5];
6118 ops[0] = gen_rtx_REG (DImode, regno);
6119 ops[1] = gen_rtx_REG (DImode, regno + 4);
6120 ops[2] = gen_rtx_REG (DImode, regno + 8);
6121 ops[3] = gen_rtx_REG (DImode, regno + 12);
6122 ops[4] = operands[1];
6123 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6124 return "";
6125 }
6126 [(set_attr "type" "neon_load4_4reg<q>")]
6127 )
6128
6129 (define_insn "neon_vld4qb<mode>"
6130 [(set (match_operand:XI 0 "s_register_operand" "=w")
6131 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6132 (match_operand:XI 2 "s_register_operand" "0")
6133 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6134 UNSPEC_VLD4B))]
6135 "TARGET_NEON"
6136 {
6137 int regno = REGNO (operands[0]);
6138 rtx ops[5];
6139 ops[0] = gen_rtx_REG (DImode, regno + 2);
6140 ops[1] = gen_rtx_REG (DImode, regno + 6);
6141 ops[2] = gen_rtx_REG (DImode, regno + 10);
6142 ops[3] = gen_rtx_REG (DImode, regno + 14);
6143 ops[4] = operands[1];
6144 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6145 return "";
6146 }
6147 [(set_attr "type" "neon_load4_4reg<q>")]
6148 )
6149
6150 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6151 ;; here on big endian targets.
6152 (define_insn "neon_vld4_lane<mode>"
6153 [(set (match_operand:OI 0 "s_register_operand" "=w")
6154 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6155 (match_operand:OI 2 "s_register_operand" "0")
6156 (match_operand:SI 3 "immediate_operand" "i")
6157 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6158 UNSPEC_VLD4_LANE))]
6159 "TARGET_NEON"
6160 {
6161 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6162 int regno = REGNO (operands[0]);
6163 rtx ops[6];
6164 ops[0] = gen_rtx_REG (DImode, regno);
6165 ops[1] = gen_rtx_REG (DImode, regno + 2);
6166 ops[2] = gen_rtx_REG (DImode, regno + 4);
6167 ops[3] = gen_rtx_REG (DImode, regno + 6);
6168 ops[4] = operands[1];
6169 ops[5] = GEN_INT (lane);
6170 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6171 ops);
6172 return "";
6173 }
6174 [(set_attr "type" "neon_load4_one_lane<q>")]
6175 )
6176
6177 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6178 ;; here on big endian targets.
6179 (define_insn "neon_vld4_lane<mode>"
6180 [(set (match_operand:XI 0 "s_register_operand" "=w")
6181 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6182 (match_operand:XI 2 "s_register_operand" "0")
6183 (match_operand:SI 3 "immediate_operand" "i")
6184 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6185 UNSPEC_VLD4_LANE))]
6186 "TARGET_NEON"
6187 {
6188 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6189 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6190 int regno = REGNO (operands[0]);
6191 rtx ops[6];
6192 if (lane >= max / 2)
6193 {
6194 lane -= max / 2;
6195 regno += 2;
6196 }
6197 ops[0] = gen_rtx_REG (DImode, regno);
6198 ops[1] = gen_rtx_REG (DImode, regno + 4);
6199 ops[2] = gen_rtx_REG (DImode, regno + 8);
6200 ops[3] = gen_rtx_REG (DImode, regno + 12);
6201 ops[4] = operands[1];
6202 ops[5] = GEN_INT (lane);
6203 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6204 ops);
6205 return "";
6206 }
6207 [(set_attr "type" "neon_load4_one_lane<q>")]
6208 )
6209
6210 (define_insn "neon_vld4_dup<mode>"
6211 [(set (match_operand:OI 0 "s_register_operand" "=w")
6212 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6213 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6214 UNSPEC_VLD4_DUP))]
6215 "TARGET_NEON"
6216 {
6217 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6218 {
6219 int regno = REGNO (operands[0]);
6220 rtx ops[5];
6221 ops[0] = gen_rtx_REG (DImode, regno);
6222 ops[1] = gen_rtx_REG (DImode, regno + 2);
6223 ops[2] = gen_rtx_REG (DImode, regno + 4);
6224 ops[3] = gen_rtx_REG (DImode, regno + 6);
6225 ops[4] = operands[1];
6226 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6227 ops);
6228 return "";
6229 }
6230 else
6231 return "vld1.<V_sz_elem>\t%h0, %A1";
6232 }
6233 [(set (attr "type")
6234 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6235 (const_string "neon_load4_all_lanes<q>")
6236 (const_string "neon_load1_1reg<q>")))]
6237 )
6238
6239 (define_expand "vec_store_lanesoi<mode>"
6240 [(set (match_operand:OI 0 "neon_struct_operand")
6241 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6242 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6243 UNSPEC_VST4))]
6244 "TARGET_NEON")
6245
6246 (define_insn "neon_vst4<mode>"
6247 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6248 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6249 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6250 UNSPEC_VST4))]
6251 "TARGET_NEON"
6252 {
6253 if (<V_sz_elem> == 64)
6254 return "vst1.64\t%h1, %A0";
6255 else
6256 return "vst4.<V_sz_elem>\t%h1, %A0";
6257 }
6258 [(set (attr "type")
6259 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6260 (const_string "neon_store1_4reg<q>")
6261 (const_string "neon_store4_4reg<q>")))]
6262 )
6263
6264 (define_expand "vec_store_lanesxi<mode>"
6265 [(match_operand:XI 0 "neon_struct_operand")
6266 (match_operand:XI 1 "s_register_operand")
6267 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6268 "TARGET_NEON"
6269 {
6270 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6271 DONE;
6272 })
6273
6274 (define_expand "neon_vst4<mode>"
6275 [(match_operand:XI 0 "neon_struct_operand")
6276 (match_operand:XI 1 "s_register_operand")
6277 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6278 "TARGET_NEON"
6279 {
6280 rtx mem;
6281
6282 mem = adjust_address (operands[0], OImode, 0);
6283 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6284 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6285 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6286 DONE;
6287 })
6288
6289 (define_insn "neon_vst4qa<mode>"
6290 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6291 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6292 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6293 UNSPEC_VST4A))]
6294 "TARGET_NEON"
6295 {
6296 int regno = REGNO (operands[1]);
6297 rtx ops[5];
6298 ops[0] = operands[0];
6299 ops[1] = gen_rtx_REG (DImode, regno);
6300 ops[2] = gen_rtx_REG (DImode, regno + 4);
6301 ops[3] = gen_rtx_REG (DImode, regno + 8);
6302 ops[4] = gen_rtx_REG (DImode, regno + 12);
6303 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6304 return "";
6305 }
6306 [(set_attr "type" "neon_store4_4reg<q>")]
6307 )
6308
6309 (define_insn "neon_vst4qb<mode>"
6310 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6311 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6312 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6313 UNSPEC_VST4B))]
6314 "TARGET_NEON"
6315 {
6316 int regno = REGNO (operands[1]);
6317 rtx ops[5];
6318 ops[0] = operands[0];
6319 ops[1] = gen_rtx_REG (DImode, regno + 2);
6320 ops[2] = gen_rtx_REG (DImode, regno + 6);
6321 ops[3] = gen_rtx_REG (DImode, regno + 10);
6322 ops[4] = gen_rtx_REG (DImode, regno + 14);
6323 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6324 return "";
6325 }
6326 [(set_attr "type" "neon_store4_4reg<q>")]
6327 )
6328
6329 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6330 ;; here on big endian targets.
6331 (define_insn "neon_vst4_lane<mode>"
6332 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6333 (unspec:<V_four_elem>
6334 [(match_operand:OI 1 "s_register_operand" "w")
6335 (match_operand:SI 2 "immediate_operand" "i")
6336 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6337 UNSPEC_VST4_LANE))]
6338 "TARGET_NEON"
6339 {
6340 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6341 int regno = REGNO (operands[1]);
6342 rtx ops[6];
6343 ops[0] = operands[0];
6344 ops[1] = gen_rtx_REG (DImode, regno);
6345 ops[2] = gen_rtx_REG (DImode, regno + 2);
6346 ops[3] = gen_rtx_REG (DImode, regno + 4);
6347 ops[4] = gen_rtx_REG (DImode, regno + 6);
6348 ops[5] = GEN_INT (lane);
6349 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6350 ops);
6351 return "";
6352 }
6353 [(set_attr "type" "neon_store4_one_lane<q>")]
6354 )
6355
6356 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6357 ;; here on big endian targets.
6358 (define_insn "neon_vst4_lane<mode>"
6359 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6360 (unspec:<V_four_elem>
6361 [(match_operand:XI 1 "s_register_operand" "w")
6362 (match_operand:SI 2 "immediate_operand" "i")
6363 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6364 UNSPEC_VST4_LANE))]
6365 "TARGET_NEON"
6366 {
6367 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6368 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6369 int regno = REGNO (operands[1]);
6370 rtx ops[6];
6371 if (lane >= max / 2)
6372 {
6373 lane -= max / 2;
6374 regno += 2;
6375 }
6376 ops[0] = operands[0];
6377 ops[1] = gen_rtx_REG (DImode, regno);
6378 ops[2] = gen_rtx_REG (DImode, regno + 4);
6379 ops[3] = gen_rtx_REG (DImode, regno + 8);
6380 ops[4] = gen_rtx_REG (DImode, regno + 12);
6381 ops[5] = GEN_INT (lane);
6382 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6383 ops);
6384 return "";
6385 }
6386 [(set_attr "type" "neon_store4_4reg<q>")]
6387 )
6388
6389 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6390 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6391 (SE:<V_unpack> (vec_select:<V_HALF>
6392 (match_operand:VU 1 "register_operand" "w")
6393 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6394 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6395 "vmovl.<US><V_sz_elem> %q0, %e1"
6396 [(set_attr "type" "neon_shift_imm_long")]
6397 )
6398
6399 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6400 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6401 (SE:<V_unpack> (vec_select:<V_HALF>
6402 (match_operand:VU 1 "register_operand" "w")
6403 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6404 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6405 "vmovl.<US><V_sz_elem> %q0, %f1"
6406 [(set_attr "type" "neon_shift_imm_long")]
6407 )
6408
6409 (define_expand "vec_unpack<US>_hi_<mode>"
6410 [(match_operand:<V_unpack> 0 "register_operand" "")
6411 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6412 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6413 {
6414 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6415 rtx t1;
6416 int i;
6417 for (i = 0; i < (<V_mode_nunits>/2); i++)
6418 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6419
6420 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6421 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6422 operands[1],
6423 t1));
6424 DONE;
6425 }
6426 )
6427
6428 (define_expand "vec_unpack<US>_lo_<mode>"
6429 [(match_operand:<V_unpack> 0 "register_operand" "")
6430 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6431 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6432 {
6433 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6434 rtx t1;
6435 int i;
6436 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6437 RTVEC_ELT (v, i) = GEN_INT (i);
6438 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6439 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6440 operands[1],
6441 t1));
6442 DONE;
6443 }
6444 )
6445
6446 (define_insn "neon_vec_<US>mult_lo_<mode>"
6447 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6448 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6449 (match_operand:VU 1 "register_operand" "w")
6450 (match_operand:VU 2 "vect_par_constant_low" "")))
6451 (SE:<V_unpack> (vec_select:<V_HALF>
6452 (match_operand:VU 3 "register_operand" "w")
6453 (match_dup 2)))))]
6454 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6455 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6456 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6457 )
6458
6459 (define_expand "vec_widen_<US>mult_lo_<mode>"
6460 [(match_operand:<V_unpack> 0 "register_operand" "")
6461 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6462 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6463 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6464 {
6465 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6466 rtx t1;
6467 int i;
6468 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6469 RTVEC_ELT (v, i) = GEN_INT (i);
6470 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6471
6472 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6473 operands[1],
6474 t1,
6475 operands[2]));
6476 DONE;
6477 }
6478 )
6479
6480 (define_insn "neon_vec_<US>mult_hi_<mode>"
6481 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6482 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6483 (match_operand:VU 1 "register_operand" "w")
6484 (match_operand:VU 2 "vect_par_constant_high" "")))
6485 (SE:<V_unpack> (vec_select:<V_HALF>
6486 (match_operand:VU 3 "register_operand" "w")
6487 (match_dup 2)))))]
6488 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6489 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6490 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6491 )
6492
6493 (define_expand "vec_widen_<US>mult_hi_<mode>"
6494 [(match_operand:<V_unpack> 0 "register_operand" "")
6495 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6496 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6497 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6498 {
6499 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6500 rtx t1;
6501 int i;
6502 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6503 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6504 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6505
6506 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6507 operands[1],
6508 t1,
6509 operands[2]));
6510 DONE;
6511
6512 }
6513 )
6514
6515 (define_insn "neon_vec_<US>shiftl_<mode>"
6516 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6517 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6518 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6519 "TARGET_NEON"
6520 {
6521 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6522 }
6523 [(set_attr "type" "neon_shift_imm_long")]
6524 )
6525
6526 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6527 [(match_operand:<V_unpack> 0 "register_operand" "")
6528 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6529 (match_operand:SI 2 "immediate_operand" "i")]
6530 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6531 {
6532 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6533 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6534 operands[2]));
6535 DONE;
6536 }
6537 )
6538
6539 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6540 [(match_operand:<V_unpack> 0 "register_operand" "")
6541 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6542 (match_operand:SI 2 "immediate_operand" "i")]
6543 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6544 {
6545 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6546 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6547 GET_MODE_SIZE (<V_HALF>mode)),
6548 operands[2]));
6549 DONE;
6550 }
6551 )
6552
6553 ;; Vectorize for non-neon-quad case
6554 (define_insn "neon_unpack<US>_<mode>"
6555 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6556 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6557 "TARGET_NEON"
6558 "vmovl.<US><V_sz_elem> %q0, %P1"
6559 [(set_attr "type" "neon_move")]
6560 )
6561
6562 (define_expand "vec_unpack<US>_lo_<mode>"
6563 [(match_operand:<V_double_width> 0 "register_operand" "")
6564 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6565 "TARGET_NEON"
6566 {
6567 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6568 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6569 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6570
6571 DONE;
6572 }
6573 )
6574
6575 (define_expand "vec_unpack<US>_hi_<mode>"
6576 [(match_operand:<V_double_width> 0 "register_operand" "")
6577 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6578 "TARGET_NEON"
6579 {
6580 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6581 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6582 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6583
6584 DONE;
6585 }
6586 )
6587
6588 (define_insn "neon_vec_<US>mult_<mode>"
6589 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6590 (mult:<V_widen> (SE:<V_widen>
6591 (match_operand:VDI 1 "register_operand" "w"))
6592 (SE:<V_widen>
6593 (match_operand:VDI 2 "register_operand" "w"))))]
6594 "TARGET_NEON"
6595 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6596 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6597 )
6598
6599 (define_expand "vec_widen_<US>mult_hi_<mode>"
6600 [(match_operand:<V_double_width> 0 "register_operand" "")
6601 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6602 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6603 "TARGET_NEON"
6604 {
6605 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6606 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6607 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6608
6609 DONE;
6610
6611 }
6612 )
6613
6614 (define_expand "vec_widen_<US>mult_lo_<mode>"
6615 [(match_operand:<V_double_width> 0 "register_operand" "")
6616 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6617 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6618 "TARGET_NEON"
6619 {
6620 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6621 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6622 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6623
6624 DONE;
6625
6626 }
6627 )
6628
6629 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6630 [(match_operand:<V_double_width> 0 "register_operand" "")
6631 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6632 (match_operand:SI 2 "immediate_operand" "i")]
6633 "TARGET_NEON"
6634 {
6635 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6636 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6637 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6638
6639 DONE;
6640 }
6641 )
6642
6643 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6644 [(match_operand:<V_double_width> 0 "register_operand" "")
6645 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6646 (match_operand:SI 2 "immediate_operand" "i")]
6647 "TARGET_NEON"
6648 {
6649 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6650 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6651 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6652
6653 DONE;
6654 }
6655 )
6656
6657 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6658 ; because the ordering of vector elements in Q registers is different from what
6659 ; the semantics of the instructions require.
6660
6661 (define_insn "vec_pack_trunc_<mode>"
6662 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6663 (vec_concat:<V_narrow_pack>
6664 (truncate:<V_narrow>
6665 (match_operand:VN 1 "register_operand" "w"))
6666 (truncate:<V_narrow>
6667 (match_operand:VN 2 "register_operand" "w"))))]
6668 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6669 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6670 [(set_attr "type" "multiple")
6671 (set_attr "length" "8")]
6672 )
6673
6674 ;; For the non-quad case.
6675 (define_insn "neon_vec_pack_trunc_<mode>"
6676 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6677 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6678 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6679 "vmovn.i<V_sz_elem>\t%P0, %q1"
6680 [(set_attr "type" "neon_move_narrow_q")]
6681 )
6682
6683 (define_expand "vec_pack_trunc_<mode>"
6684 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6685 (match_operand:VSHFT 1 "register_operand" "")
6686 (match_operand:VSHFT 2 "register_operand")]
6687 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6688 {
6689 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6690
6691 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6692 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6693 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6694 DONE;
6695 })
6696
6697 (define_insn "neon_vabd<mode>_2"
6698 [(set (match_operand:VF 0 "s_register_operand" "=w")
6699 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6700 (match_operand:VF 2 "s_register_operand" "w"))))]
6701 "TARGET_NEON && flag_unsafe_math_optimizations"
6702 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6703 [(set_attr "type" "neon_fp_abd_s<q>")]
6704 )
6705
6706 (define_insn "neon_vabd<mode>_3"
6707 [(set (match_operand:VF 0 "s_register_operand" "=w")
6708 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6709 (match_operand:VF 2 "s_register_operand" "w")]
6710 UNSPEC_VSUB)))]
6711 "TARGET_NEON && flag_unsafe_math_optimizations"
6712 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6713 [(set_attr "type" "neon_fp_abd_s<q>")]
6714 )
6715
6716 ;; Copy from core-to-neon regs, then extend, not vice-versa
6717
6718 (define_split
6719 [(set (match_operand:DI 0 "s_register_operand" "")
6720 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6721 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6722 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6723 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6724 {
6725 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6726 })
6727
6728 (define_split
6729 [(set (match_operand:DI 0 "s_register_operand" "")
6730 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6731 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6732 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6733 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6734 {
6735 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6736 })
6737
6738 (define_split
6739 [(set (match_operand:DI 0 "s_register_operand" "")
6740 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6741 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6742 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6743 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6744 {
6745 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6746 })
6747
6748 (define_split
6749 [(set (match_operand:DI 0 "s_register_operand" "")
6750 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6751 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6752 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6753 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6754 {
6755 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6756 })
6757
6758 (define_split
6759 [(set (match_operand:DI 0 "s_register_operand" "")
6760 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6761 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6762 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6763 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6764 {
6765 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6766 })
6767
6768 (define_split
6769 [(set (match_operand:DI 0 "s_register_operand" "")
6770 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6771 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6772 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6773 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6774 {
6775 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6776 })