[ARM] Cleanup DImode shifts
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2 || which_alternative == 3)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 4: return output_move_neon (operands);
57 case 2: case 3: gcc_unreachable ();
58 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 case 9: return "#";
61 default: return output_move_double (operands, true, NULL);
62 }
63 }
64 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
65 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
66 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
67 multiple")
68 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
69 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
70 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
71 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
72
73 (define_insn "*neon_mov<mode>"
74 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
75 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
76 (match_operand:VQXMOV 1 "general_operand"
77 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
78 "TARGET_NEON
79 && (register_operand (operands[0], <MODE>mode)
80 || register_operand (operands[1], <MODE>mode))"
81 {
82 if (which_alternative == 2 || which_alternative == 3)
83 {
84 int width, is_valid;
85 static char templ[40];
86
87 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
88 &operands[1], &width);
89
90 gcc_assert (is_valid != 0);
91
92 if (width == 0)
93 return "vmov.f32\t%q0, %1 @ <mode>";
94 else
95 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
96
97 return templ;
98 }
99
100 switch (which_alternative)
101 {
102 case 0: return "vmov\t%q0, %q1 @ <mode>";
103 case 1: case 4: return output_move_neon (operands);
104 case 2: case 3: gcc_unreachable ();
105 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
106 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
107 default: return output_move_quad (operands);
108 }
109 }
110 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
111 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
112 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
113 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
114 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
115 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
116 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
117
118 /* We define these mov expanders to match the standard mov$a optab to prevent
119 the mid-end from trying to do a subreg for these modes which is the most
120 inefficient way to expand the move. Also big-endian subreg's aren't
121 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
122 Without these RTL generation patterns the mid-end would attempt to take a
123 sub-reg and may ICE if it can't. */
124
125 (define_expand "movti"
126 [(set (match_operand:TI 0 "nonimmediate_operand")
127 (match_operand:TI 1 "general_operand"))]
128 "TARGET_NEON"
129 {
130 if (can_create_pseudo_p ())
131 {
132 if (!REG_P (operands[0]))
133 operands[1] = force_reg (TImode, operands[1]);
134 }
135 })
136
137 (define_expand "mov<mode>"
138 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
139 (match_operand:VSTRUCT 1 "general_operand"))]
140 "TARGET_NEON"
141 {
142 if (can_create_pseudo_p ())
143 {
144 if (!REG_P (operands[0]))
145 operands[1] = force_reg (<MODE>mode, operands[1]);
146 }
147 })
148
149 (define_expand "mov<mode>"
150 [(set (match_operand:VH 0 "s_register_operand")
151 (match_operand:VH 1 "s_register_operand"))]
152 "TARGET_NEON"
153 {
154 if (can_create_pseudo_p ())
155 {
156 if (!REG_P (operands[0]))
157 operands[1] = force_reg (<MODE>mode, operands[1]);
158 }
159 })
160
161 (define_insn "*neon_mov<mode>"
162 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
163 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
164 "TARGET_NEON
165 && (register_operand (operands[0], <MODE>mode)
166 || register_operand (operands[1], <MODE>mode))"
167 {
168 switch (which_alternative)
169 {
170 case 0: return "#";
171 case 1: case 2: return output_move_neon (operands);
172 default: gcc_unreachable ();
173 }
174 }
175 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
176 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
177
178 (define_split
179 [(set (match_operand:EI 0 "s_register_operand" "")
180 (match_operand:EI 1 "s_register_operand" ""))]
181 "TARGET_NEON && reload_completed"
182 [(set (match_dup 0) (match_dup 1))
183 (set (match_dup 2) (match_dup 3))]
184 {
185 int rdest = REGNO (operands[0]);
186 int rsrc = REGNO (operands[1]);
187 rtx dest[2], src[2];
188
189 dest[0] = gen_rtx_REG (TImode, rdest);
190 src[0] = gen_rtx_REG (TImode, rsrc);
191 dest[1] = gen_rtx_REG (DImode, rdest + 4);
192 src[1] = gen_rtx_REG (DImode, rsrc + 4);
193
194 neon_disambiguate_copy (operands, dest, src, 2);
195 })
196
197 (define_split
198 [(set (match_operand:OI 0 "s_register_operand" "")
199 (match_operand:OI 1 "s_register_operand" ""))]
200 "TARGET_NEON && reload_completed"
201 [(set (match_dup 0) (match_dup 1))
202 (set (match_dup 2) (match_dup 3))]
203 {
204 int rdest = REGNO (operands[0]);
205 int rsrc = REGNO (operands[1]);
206 rtx dest[2], src[2];
207
208 dest[0] = gen_rtx_REG (TImode, rdest);
209 src[0] = gen_rtx_REG (TImode, rsrc);
210 dest[1] = gen_rtx_REG (TImode, rdest + 4);
211 src[1] = gen_rtx_REG (TImode, rsrc + 4);
212
213 neon_disambiguate_copy (operands, dest, src, 2);
214 })
215
216 (define_split
217 [(set (match_operand:CI 0 "s_register_operand" "")
218 (match_operand:CI 1 "s_register_operand" ""))]
219 "TARGET_NEON && reload_completed"
220 [(set (match_dup 0) (match_dup 1))
221 (set (match_dup 2) (match_dup 3))
222 (set (match_dup 4) (match_dup 5))]
223 {
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
226 rtx dest[3], src[3];
227
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
232 dest[2] = gen_rtx_REG (TImode, rdest + 8);
233 src[2] = gen_rtx_REG (TImode, rsrc + 8);
234
235 neon_disambiguate_copy (operands, dest, src, 3);
236 })
237
238 (define_split
239 [(set (match_operand:XI 0 "s_register_operand" "")
240 (match_operand:XI 1 "s_register_operand" ""))]
241 "TARGET_NEON && reload_completed"
242 [(set (match_dup 0) (match_dup 1))
243 (set (match_dup 2) (match_dup 3))
244 (set (match_dup 4) (match_dup 5))
245 (set (match_dup 6) (match_dup 7))]
246 {
247 int rdest = REGNO (operands[0]);
248 int rsrc = REGNO (operands[1]);
249 rtx dest[4], src[4];
250
251 dest[0] = gen_rtx_REG (TImode, rdest);
252 src[0] = gen_rtx_REG (TImode, rsrc);
253 dest[1] = gen_rtx_REG (TImode, rdest + 4);
254 src[1] = gen_rtx_REG (TImode, rsrc + 4);
255 dest[2] = gen_rtx_REG (TImode, rdest + 8);
256 src[2] = gen_rtx_REG (TImode, rsrc + 8);
257 dest[3] = gen_rtx_REG (TImode, rdest + 12);
258 src[3] = gen_rtx_REG (TImode, rsrc + 12);
259
260 neon_disambiguate_copy (operands, dest, src, 4);
261 })
262
263 (define_expand "movmisalign<mode>"
264 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
265 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
266 UNSPEC_MISALIGNED_ACCESS))]
267 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
268 {
269 rtx adjust_mem;
270 /* This pattern is not permitted to fail during expansion: if both arguments
271 are non-registers (e.g. memory := constant, which can be created by the
272 auto-vectorizer), force operand 1 into a register. */
273 if (!s_register_operand (operands[0], <MODE>mode)
274 && !s_register_operand (operands[1], <MODE>mode))
275 operands[1] = force_reg (<MODE>mode, operands[1]);
276
277 if (s_register_operand (operands[0], <MODE>mode))
278 adjust_mem = operands[1];
279 else
280 adjust_mem = operands[0];
281
282 /* Legitimize address. */
283 if (!neon_vector_mem_operand (adjust_mem, 2, true))
284 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
285
286 })
287
288 (define_insn "*movmisalign<mode>_neon_store"
289 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
290 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
291 UNSPEC_MISALIGNED_ACCESS))]
292 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
293 "vst1.<V_sz_elem>\t{%P1}, %A0"
294 [(set_attr "type" "neon_store1_1reg<q>")])
295
296 (define_insn "*movmisalign<mode>_neon_load"
297 [(set (match_operand:VDX 0 "s_register_operand" "=w")
298 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
299 " Um")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vld1.<V_sz_elem>\t{%P0}, %A1"
303 [(set_attr "type" "neon_load1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_store"
306 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
307 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
308 UNSPEC_MISALIGNED_ACCESS))]
309 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
310 "vst1.<V_sz_elem>\t{%q1}, %A0"
311 [(set_attr "type" "neon_store1_1reg<q>")])
312
313 (define_insn "*movmisalign<mode>_neon_load"
314 [(set (match_operand:VQX 0 "s_register_operand" "=w")
315 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
316 " Um")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vld1.<V_sz_elem>\t{%q0}, %A1"
320 [(set_attr "type" "neon_load1_1reg<q>")])
321
322 (define_insn "@vec_set<mode>_internal"
323 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
324 (vec_merge:VD_LANE
325 (vec_duplicate:VD_LANE
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 int elt = ffs ((int) INTVAL (operands[2])) - 1;
332 if (BYTES_BIG_ENDIAN)
333 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
334 operands[2] = GEN_INT (elt);
335
336 if (which_alternative == 0)
337 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
338 else
339 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
340 }
341 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
342
343 (define_insn "@vec_set<mode>_internal"
344 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
345 (vec_merge:VQ2
346 (vec_duplicate:VQ2
347 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
348 (match_operand:VQ2 3 "s_register_operand" "0,0")
349 (match_operand:SI 2 "immediate_operand" "i,i")))]
350 "TARGET_NEON"
351 {
352 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
353 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
354 int elt = elem % half_elts;
355 int hi = (elem / half_elts) * 2;
356 int regno = REGNO (operands[0]);
357
358 if (BYTES_BIG_ENDIAN)
359 elt = half_elts - 1 - elt;
360
361 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
362 operands[2] = GEN_INT (elt);
363
364 if (which_alternative == 0)
365 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
366 else
367 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
368 }
369 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
370 )
371
372 (define_insn "@vec_set<mode>_internal"
373 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
374 (vec_merge:V2DI_ONLY
375 (vec_duplicate:V2DI_ONLY
376 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
377 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
378 (match_operand:SI 2 "immediate_operand" "i,i")))]
379 "TARGET_NEON"
380 {
381 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
382 int regno = REGNO (operands[0]) + 2 * elem;
383
384 operands[0] = gen_rtx_REG (DImode, regno);
385
386 if (which_alternative == 0)
387 return "vld1.64\t%P0, %A1";
388 else
389 return "vmov\t%P0, %Q1, %R1";
390 }
391 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
392 )
393
394 (define_expand "vec_set<mode>"
395 [(match_operand:VDQ 0 "s_register_operand")
396 (match_operand:<V_elem> 1 "s_register_operand")
397 (match_operand:SI 2 "immediate_operand")]
398 "TARGET_NEON"
399 {
400 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
401 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
402 GEN_INT (elem), operands[0]));
403 DONE;
404 })
405
406 (define_insn "vec_extract<mode><V_elem_l>"
407 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
408 (vec_select:<V_elem>
409 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
410 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
411 "TARGET_NEON"
412 {
413 if (BYTES_BIG_ENDIAN)
414 {
415 int elt = INTVAL (operands[2]);
416 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
417 operands[2] = GEN_INT (elt);
418 }
419
420 if (which_alternative == 0)
421 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
422 else
423 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
424 }
425 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
426 )
427
428 (define_insn "vec_extract<mode><V_elem_l>"
429 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
430 (vec_select:<V_elem>
431 (match_operand:VQ2 1 "s_register_operand" "w,w")
432 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
433 "TARGET_NEON"
434 {
435 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
436 int elt = INTVAL (operands[2]) % half_elts;
437 int hi = (INTVAL (operands[2]) / half_elts) * 2;
438 int regno = REGNO (operands[1]);
439
440 if (BYTES_BIG_ENDIAN)
441 elt = half_elts - 1 - elt;
442
443 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
444 operands[2] = GEN_INT (elt);
445
446 if (which_alternative == 0)
447 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
448 else
449 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
450 }
451 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
452 )
453
454 (define_insn "vec_extractv2didi"
455 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
456 (vec_select:DI
457 (match_operand:V2DI 1 "s_register_operand" "w,w")
458 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
459 "TARGET_NEON"
460 {
461 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
462
463 operands[1] = gen_rtx_REG (DImode, regno);
464
465 if (which_alternative == 0)
466 return "vst1.64\t{%P1}, %A0 @ v2di";
467 else
468 return "vmov\t%Q0, %R0, %P1 @ v2di";
469 }
470 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
471 )
472
473 (define_expand "vec_init<mode><V_elem_l>"
474 [(match_operand:VDQ 0 "s_register_operand")
475 (match_operand 1 "" "")]
476 "TARGET_NEON"
477 {
478 neon_expand_vector_init (operands[0], operands[1]);
479 DONE;
480 })
481
482 ;; Doubleword and quadword arithmetic.
483
484 ;; NOTE: some other instructions also support 64-bit integer
485 ;; element size, which we could potentially use for "long long" operations.
486
487 (define_insn "*add<mode>3_neon"
488 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
489 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
490 (match_operand:VDQ 2 "s_register_operand" "w")))]
491 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
492 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
493 [(set (attr "type")
494 (if_then_else (match_test "<Is_float_mode>")
495 (const_string "neon_fp_addsub_s<q>")
496 (const_string "neon_add<q>")))]
497 )
498
499 ;; As with SFmode, full support for HFmode vector arithmetic is only available
500 ;; when flag-unsafe-math-optimizations is enabled.
501
502 (define_insn "add<mode>3"
503 [(set
504 (match_operand:VH 0 "s_register_operand" "=w")
505 (plus:VH
506 (match_operand:VH 1 "s_register_operand" "w")
507 (match_operand:VH 2 "s_register_operand" "w")))]
508 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
509 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_add<q>")))]
514 )
515
516 (define_insn "add<mode>3_fp16"
517 [(set
518 (match_operand:VH 0 "s_register_operand" "=w")
519 (plus:VH
520 (match_operand:VH 1 "s_register_operand" "w")
521 (match_operand:VH 2 "s_register_operand" "w")))]
522 "TARGET_NEON_FP16INST"
523 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
524 [(set (attr "type")
525 (if_then_else (match_test "<Is_float_mode>")
526 (const_string "neon_fp_addsub_s<q>")
527 (const_string "neon_add<q>")))]
528 )
529
530 (define_insn "adddi3_neon"
531 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
532 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
533 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
534 (clobber (reg:CC CC_REGNUM))]
535 "TARGET_NEON"
536 {
537 switch (which_alternative)
538 {
539 case 0: /* fall through */
540 case 3: return "vadd.i64\t%P0, %P1, %P2";
541 case 1: return "#";
542 case 2: return "#";
543 case 4: return "#";
544 case 5: return "#";
545 case 6: return "#";
546 default: gcc_unreachable ();
547 }
548 }
549 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
550 multiple,multiple,multiple")
551 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
552 (set_attr "length" "*,8,8,*,8,8,8")
553 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
554 )
555
556 (define_insn "*sub<mode>3_neon"
557 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
558 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
559 (match_operand:VDQ 2 "s_register_operand" "w")))]
560 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
561 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
562 [(set (attr "type")
563 (if_then_else (match_test "<Is_float_mode>")
564 (const_string "neon_fp_addsub_s<q>")
565 (const_string "neon_sub<q>")))]
566 )
567
568 (define_insn "sub<mode>3"
569 [(set
570 (match_operand:VH 0 "s_register_operand" "=w")
571 (minus:VH
572 (match_operand:VH 1 "s_register_operand" "w")
573 (match_operand:VH 2 "s_register_operand" "w")))]
574 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
575 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
576 [(set_attr "type" "neon_sub<q>")]
577 )
578
579 (define_insn "sub<mode>3_fp16"
580 [(set
581 (match_operand:VH 0 "s_register_operand" "=w")
582 (minus:VH
583 (match_operand:VH 1 "s_register_operand" "w")
584 (match_operand:VH 2 "s_register_operand" "w")))]
585 "TARGET_NEON_FP16INST"
586 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
587 [(set_attr "type" "neon_sub<q>")]
588 )
589
590 (define_insn "subdi3_neon"
591 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
592 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
593 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
594 (clobber (reg:CC CC_REGNUM))]
595 "TARGET_NEON"
596 {
597 switch (which_alternative)
598 {
599 case 0: /* fall through */
600 case 4: return "vsub.i64\t%P0, %P1, %P2";
601 case 1: /* fall through */
602 case 2: /* fall through */
603 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
604 default: gcc_unreachable ();
605 }
606 }
607 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
608 (set_attr "conds" "*,clob,clob,clob,*")
609 (set_attr "length" "*,8,8,8,*")
610 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
611 )
612
613 (define_insn "*mul<mode>3_neon"
614 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
615 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
616 (match_operand:VDQW 2 "s_register_operand" "w")))]
617 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
618 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set (attr "type")
620 (if_then_else (match_test "<Is_float_mode>")
621 (const_string "neon_fp_mul_s<q>")
622 (const_string "neon_mul_<V_elem_ch><q>")))]
623 )
624
625 /* Perform division using multiply-by-reciprocal.
626 Reciprocal is calculated using Newton-Raphson method.
627 Enabled with -funsafe-math-optimizations -freciprocal-math
628 and disabled for -Os since it increases code size . */
629
630 (define_expand "div<mode>3"
631 [(set (match_operand:VCVTF 0 "s_register_operand")
632 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
633 (match_operand:VCVTF 2 "s_register_operand")))]
634 "TARGET_NEON && !optimize_size
635 && flag_reciprocal_math"
636 {
637 rtx rec = gen_reg_rtx (<MODE>mode);
638 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
639
640 /* Reciprocal estimate. */
641 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
642
643 /* Perform 2 iterations of newton-raphson method. */
644 for (int i = 0; i < 2; i++)
645 {
646 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
647 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
648 }
649
650 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
651 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
652 DONE;
653 }
654 )
655
656
657 (define_insn "mul<mode>3add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
660 (match_operand:VDQW 3 "s_register_operand" "w"))
661 (match_operand:VDQW 1 "s_register_operand" "0")))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
664 [(set (attr "type")
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
668 )
669
670 (define_insn "mul<mode>3add<mode>_neon"
671 [(set (match_operand:VH 0 "s_register_operand" "=w")
672 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
673 (match_operand:VH 3 "s_register_operand" "w"))
674 (match_operand:VH 1 "s_register_operand" "0")))]
675 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
676 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
678 )
679
680 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
681 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
682 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
683 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
684 (match_operand:VDQW 3 "s_register_operand" "w"))))]
685 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
686 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
687 [(set (attr "type")
688 (if_then_else (match_test "<Is_float_mode>")
689 (const_string "neon_fp_mla_s<q>")
690 (const_string "neon_mla_<V_elem_ch><q>")))]
691 )
692
693 ;; Fused multiply-accumulate
694 ;; We define each insn twice here:
695 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
696 ;; to be able to use when converting to FMA.
697 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
698 (define_insn "fma<VCVTF:mode>4"
699 [(set (match_operand:VCVTF 0 "register_operand" "=w")
700 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
701 (match_operand:VCVTF 2 "register_operand" "w")
702 (match_operand:VCVTF 3 "register_operand" "0")))]
703 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
704 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
705 [(set_attr "type" "neon_fp_mla_s<q>")]
706 )
707
708 (define_insn "fma<VCVTF:mode>4_intrinsic"
709 [(set (match_operand:VCVTF 0 "register_operand" "=w")
710 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
711 (match_operand:VCVTF 2 "register_operand" "w")
712 (match_operand:VCVTF 3 "register_operand" "0")))]
713 "TARGET_NEON && TARGET_FMA"
714 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
715 [(set_attr "type" "neon_fp_mla_s<q>")]
716 )
717
718 (define_insn "fma<VH:mode>4"
719 [(set (match_operand:VH 0 "register_operand" "=w")
720 (fma:VH
721 (match_operand:VH 1 "register_operand" "w")
722 (match_operand:VH 2 "register_operand" "w")
723 (match_operand:VH 3 "register_operand" "0")))]
724 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
725 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_fp_mla_s<q>")]
727 )
728
729 (define_insn "fma<VH:mode>4_intrinsic"
730 [(set (match_operand:VH 0 "register_operand" "=w")
731 (fma:VH
732 (match_operand:VH 1 "register_operand" "w")
733 (match_operand:VH 2 "register_operand" "w")
734 (match_operand:VH 3 "register_operand" "0")))]
735 "TARGET_NEON_FP16INST"
736 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_fp_mla_s<q>")]
738 )
739
740 (define_insn "*fmsub<VCVTF:mode>4"
741 [(set (match_operand:VCVTF 0 "register_operand" "=w")
742 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
743 (match_operand:VCVTF 2 "register_operand" "w")
744 (match_operand:VCVTF 3 "register_operand" "0")))]
745 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
746 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
747 [(set_attr "type" "neon_fp_mla_s<q>")]
748 )
749
750 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
751 [(set (match_operand:VCVTF 0 "register_operand" "=w")
752 (fma:VCVTF
753 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
754 (match_operand:VCVTF 2 "register_operand" "w")
755 (match_operand:VCVTF 3 "register_operand" "0")))]
756 "TARGET_NEON && TARGET_FMA"
757 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
758 [(set_attr "type" "neon_fp_mla_s<q>")]
759 )
760
761 (define_insn "fmsub<VH:mode>4_intrinsic"
762 [(set (match_operand:VH 0 "register_operand" "=w")
763 (fma:VH
764 (neg:VH (match_operand:VH 1 "register_operand" "w"))
765 (match_operand:VH 2 "register_operand" "w")
766 (match_operand:VH 3 "register_operand" "0")))]
767 "TARGET_NEON_FP16INST"
768 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
769 [(set_attr "type" "neon_fp_mla_s<q>")]
770 )
771
772 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
773 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
774 (unspec:VCVTF [(match_operand:VCVTF 1
775 "s_register_operand" "w")]
776 NEON_VRINT))]
777 "TARGET_NEON && TARGET_VFP5"
778 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
779 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
780 )
781
782 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
783 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
784 (FIXUORS:<V_cmp_result> (unspec:VCVTF
785 [(match_operand:VCVTF 1 "register_operand" "w")]
786 NEON_VCVT)))]
787 "TARGET_NEON && TARGET_VFP5"
788 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
789 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
790 (set_attr "predicable" "no")]
791 )
792
793 (define_insn "ior<mode>3"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
795 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
796 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
797 "TARGET_NEON"
798 {
799 switch (which_alternative)
800 {
801 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
802 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
803 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
804 default: gcc_unreachable ();
805 }
806 }
807 [(set_attr "type" "neon_logic<q>")]
808 )
809
810 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
811 ;; vorr. We support the pseudo-instruction vand instead, because that
812 ;; corresponds to the canonical form the middle-end expects to use for
813 ;; immediate bitwise-ANDs.
814
815 (define_insn "and<mode>3"
816 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
817 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
818 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
819 "TARGET_NEON"
820 {
821 switch (which_alternative)
822 {
823 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
824 case 1: return neon_output_logic_immediate ("vand", &operands[2],
825 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
826 default: gcc_unreachable ();
827 }
828 }
829 [(set_attr "type" "neon_logic<q>")]
830 )
831
832 (define_insn "orn<mode>3_neon"
833 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
834 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
835 (match_operand:VDQ 1 "s_register_operand" "w")))]
836 "TARGET_NEON"
837 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
838 [(set_attr "type" "neon_logic<q>")]
839 )
840
841 (define_insn "bic<mode>3_neon"
842 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
843 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
844 (match_operand:VDQ 1 "s_register_operand" "w")))]
845 "TARGET_NEON"
846 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
847 [(set_attr "type" "neon_logic<q>")]
848 )
849
850 (define_insn "xor<mode>3"
851 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
852 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
853 (match_operand:VDQ 2 "s_register_operand" "w")))]
854 "TARGET_NEON"
855 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
856 [(set_attr "type" "neon_logic<q>")]
857 )
858
859 (define_insn "one_cmpl<mode>2"
860 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
861 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
862 "TARGET_NEON"
863 "vmvn\t%<V_reg>0, %<V_reg>1"
864 [(set_attr "type" "neon_move<q>")]
865 )
866
867 (define_insn "abs<mode>2"
868 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
869 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
870 "TARGET_NEON"
871 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
872 [(set (attr "type")
873 (if_then_else (match_test "<Is_float_mode>")
874 (const_string "neon_fp_abs_s<q>")
875 (const_string "neon_abs<q>")))]
876 )
877
878 (define_insn "neg<mode>2"
879 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
880 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
881 "TARGET_NEON"
882 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
883 [(set (attr "type")
884 (if_then_else (match_test "<Is_float_mode>")
885 (const_string "neon_fp_neg_s<q>")
886 (const_string "neon_neg<q>")))]
887 )
888
889 (define_insn "negdi2_neon"
890 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
891 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
892 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
893 (clobber (reg:CC CC_REGNUM))]
894 "TARGET_NEON"
895 "#"
896 [(set_attr "length" "8")
897 (set_attr "type" "multiple")]
898 )
899
900 ; Split negdi2_neon for vfp registers
901 (define_split
902 [(set (match_operand:DI 0 "s_register_operand" "")
903 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
904 (clobber (match_scratch:DI 2 ""))
905 (clobber (reg:CC CC_REGNUM))]
906 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
907 [(set (match_dup 2) (const_int 0))
908 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
909 (clobber (reg:CC CC_REGNUM))])]
910 {
911 if (!REG_P (operands[2]))
912 operands[2] = operands[0];
913 }
914 )
915
916 ; Split negdi2_neon for core registers
917 (define_split
918 [(set (match_operand:DI 0 "s_register_operand" "")
919 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
920 (clobber (match_scratch:DI 2 ""))
921 (clobber (reg:CC CC_REGNUM))]
922 "TARGET_32BIT && reload_completed
923 && arm_general_register_operand (operands[0], DImode)"
924 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
925 (clobber (reg:CC CC_REGNUM))])]
926 ""
927 )
928
929 (define_insn "<absneg_str><mode>2"
930 [(set (match_operand:VH 0 "s_register_operand" "=w")
931 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
932 "TARGET_NEON_FP16INST"
933 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
934 [(set_attr "type" "neon_abs<q>")]
935 )
936
937 (define_expand "neon_v<absneg_str><mode>"
938 [(set
939 (match_operand:VH 0 "s_register_operand")
940 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
941 "TARGET_NEON_FP16INST"
942 {
943 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
944 DONE;
945 })
946
947 (define_insn "neon_v<fp16_rnd_str><mode>"
948 [(set (match_operand:VH 0 "s_register_operand" "=w")
949 (unspec:VH
950 [(match_operand:VH 1 "s_register_operand" "w")]
951 FP16_RND))]
952 "TARGET_NEON_FP16INST"
953 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
954 [(set_attr "type" "neon_fp_round_s<q>")]
955 )
956
957 (define_insn "neon_vrsqrte<mode>"
958 [(set (match_operand:VH 0 "s_register_operand" "=w")
959 (unspec:VH
960 [(match_operand:VH 1 "s_register_operand" "w")]
961 UNSPEC_VRSQRTE))]
962 "TARGET_NEON_FP16INST"
963 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
964 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
965 )
966
967 (define_insn "*umin<mode>3_neon"
968 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
969 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
970 (match_operand:VDQIW 2 "s_register_operand" "w")))]
971 "TARGET_NEON"
972 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
973 [(set_attr "type" "neon_minmax<q>")]
974 )
975
976 (define_insn "*umax<mode>3_neon"
977 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
978 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
979 (match_operand:VDQIW 2 "s_register_operand" "w")))]
980 "TARGET_NEON"
981 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
982 [(set_attr "type" "neon_minmax<q>")]
983 )
984
985 (define_insn "*smin<mode>3_neon"
986 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
987 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
988 (match_operand:VDQW 2 "s_register_operand" "w")))]
989 "TARGET_NEON"
990 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
991 [(set (attr "type")
992 (if_then_else (match_test "<Is_float_mode>")
993 (const_string "neon_fp_minmax_s<q>")
994 (const_string "neon_minmax<q>")))]
995 )
996
997 (define_insn "*smax<mode>3_neon"
998 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
999 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1000 (match_operand:VDQW 2 "s_register_operand" "w")))]
1001 "TARGET_NEON"
1002 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1003 [(set (attr "type")
1004 (if_then_else (match_test "<Is_float_mode>")
1005 (const_string "neon_fp_minmax_s<q>")
1006 (const_string "neon_minmax<q>")))]
1007 )
1008
1009 ; TODO: V2DI shifts are current disabled because there are bugs in the
1010 ; generic vectorizer code. It ends up creating a V2DI constructor with
1011 ; SImode elements.
1012
1013 (define_insn "vashl<mode>3"
1014 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1015 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1016 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
1017 "TARGET_NEON"
1018 {
1019 switch (which_alternative)
1020 {
1021 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1022 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1023 <MODE>mode,
1024 VALID_NEON_QREG_MODE (<MODE>mode),
1025 true);
1026 default: gcc_unreachable ();
1027 }
1028 }
1029 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1030 )
1031
1032 (define_insn "vashr<mode>3_imm"
1033 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1034 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1035 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1036 "TARGET_NEON"
1037 {
1038 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1039 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1040 false);
1041 }
1042 [(set_attr "type" "neon_shift_imm<q>")]
1043 )
1044
1045 (define_insn "vlshr<mode>3_imm"
1046 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1047 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1048 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1049 "TARGET_NEON"
1050 {
1051 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1052 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1053 false);
1054 }
1055 [(set_attr "type" "neon_shift_imm<q>")]
1056 )
1057
1058 ; Used for implementing logical shift-right, which is a left-shift by a negative
1059 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1060 ; above, but using an unspec in case GCC tries anything tricky with negative
1061 ; shift amounts.
1062
1063 (define_insn "ashl<mode>3_signed"
1064 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1065 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1066 (match_operand:VDQI 2 "s_register_operand" "w")]
1067 UNSPEC_ASHIFT_SIGNED))]
1068 "TARGET_NEON"
1069 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1070 [(set_attr "type" "neon_shift_reg<q>")]
1071 )
1072
1073 ; Used for implementing logical shift-right, which is a left-shift by a negative
1074 ; amount, with unsigned operands.
1075
1076 (define_insn "ashl<mode>3_unsigned"
1077 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1078 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1079 (match_operand:VDQI 2 "s_register_operand" "w")]
1080 UNSPEC_ASHIFT_UNSIGNED))]
1081 "TARGET_NEON"
1082 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1083 [(set_attr "type" "neon_shift_reg<q>")]
1084 )
1085
1086 (define_expand "vashr<mode>3"
1087 [(set (match_operand:VDQIW 0 "s_register_operand")
1088 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1089 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1090 "TARGET_NEON"
1091 {
1092 if (s_register_operand (operands[2], <MODE>mode))
1093 {
1094 rtx neg = gen_reg_rtx (<MODE>mode);
1095 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1096 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1097 }
1098 else
1099 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1100 DONE;
1101 })
1102
1103 (define_expand "vlshr<mode>3"
1104 [(set (match_operand:VDQIW 0 "s_register_operand")
1105 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1106 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1107 "TARGET_NEON"
1108 {
1109 if (s_register_operand (operands[2], <MODE>mode))
1110 {
1111 rtx neg = gen_reg_rtx (<MODE>mode);
1112 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1113 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1114 }
1115 else
1116 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1117 DONE;
1118 })
1119
1120 ;; 64-bit shifts
1121
1122 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1123 ;; leaving the upper half uninitalized. This is OK since the shift
1124 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1125 ;; data flow analysis however, we pretend the full register is set
1126 ;; using an unspec.
1127 (define_insn "neon_load_count"
1128 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1129 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1130 UNSPEC_LOAD_COUNT))]
1131 "TARGET_NEON"
1132 "@
1133 vld1.32\t{%P0[0]}, %A1
1134 vmov.32\t%P0[0], %1"
1135 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1136 )
1137
1138 ;; Widening operations
1139
1140 (define_expand "widen_ssum<mode>3"
1141 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1142 (plus:<V_double_width>
1143 (sign_extend:<V_double_width>
1144 (match_operand:VQI 1 "s_register_operand"))
1145 (match_operand:<V_double_width> 2 "s_register_operand")))]
1146 "TARGET_NEON"
1147 {
1148 machine_mode mode = GET_MODE (operands[1]);
1149 rtx p1, p2;
1150
1151 p1 = arm_simd_vect_par_cnst_half (mode, false);
1152 p2 = arm_simd_vect_par_cnst_half (mode, true);
1153
1154 if (operands[0] != operands[2])
1155 emit_move_insn (operands[0], operands[2]);
1156
1157 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1158 operands[1],
1159 p1,
1160 operands[0]));
1161 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1162 operands[1],
1163 p2,
1164 operands[0]));
1165 DONE;
1166 }
1167 )
1168
1169 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1170 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1171 (plus:<V_double_width>
1172 (sign_extend:<V_double_width>
1173 (vec_select:<V_HALF>
1174 (match_operand:VQI 1 "s_register_operand" "%w")
1175 (match_operand:VQI 2 "vect_par_constant_low" "")))
1176 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1177 "TARGET_NEON"
1178 {
1179 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1180 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1181 }
1182 [(set_attr "type" "neon_add_widen")])
1183
1184 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1185 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1186 (plus:<V_double_width>
1187 (sign_extend:<V_double_width>
1188 (vec_select:<V_HALF>
1189 (match_operand:VQI 1 "s_register_operand" "%w")
1190 (match_operand:VQI 2 "vect_par_constant_high" "")))
1191 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1192 "TARGET_NEON"
1193 {
1194 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1195 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1196 }
1197 [(set_attr "type" "neon_add_widen")])
1198
1199 (define_insn "widen_ssum<mode>3"
1200 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1201 (plus:<V_widen>
1202 (sign_extend:<V_widen>
1203 (match_operand:VW 1 "s_register_operand" "%w"))
1204 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1205 "TARGET_NEON"
1206 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1207 [(set_attr "type" "neon_add_widen")]
1208 )
1209
1210 (define_expand "widen_usum<mode>3"
1211 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1212 (plus:<V_double_width>
1213 (zero_extend:<V_double_width>
1214 (match_operand:VQI 1 "s_register_operand"))
1215 (match_operand:<V_double_width> 2 "s_register_operand")))]
1216 "TARGET_NEON"
1217 {
1218 machine_mode mode = GET_MODE (operands[1]);
1219 rtx p1, p2;
1220
1221 p1 = arm_simd_vect_par_cnst_half (mode, false);
1222 p2 = arm_simd_vect_par_cnst_half (mode, true);
1223
1224 if (operands[0] != operands[2])
1225 emit_move_insn (operands[0], operands[2]);
1226
1227 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1228 operands[1],
1229 p1,
1230 operands[0]));
1231 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1232 operands[1],
1233 p2,
1234 operands[0]));
1235 DONE;
1236 }
1237 )
1238
1239 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1240 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1241 (plus:<V_double_width>
1242 (zero_extend:<V_double_width>
1243 (vec_select:<V_HALF>
1244 (match_operand:VQI 1 "s_register_operand" "%w")
1245 (match_operand:VQI 2 "vect_par_constant_low" "")))
1246 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1247 "TARGET_NEON"
1248 {
1249 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1250 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1251 }
1252 [(set_attr "type" "neon_add_widen")])
1253
1254 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1255 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1256 (plus:<V_double_width>
1257 (zero_extend:<V_double_width>
1258 (vec_select:<V_HALF>
1259 (match_operand:VQI 1 "s_register_operand" "%w")
1260 (match_operand:VQI 2 "vect_par_constant_high" "")))
1261 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1262 "TARGET_NEON"
1263 {
1264 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1265 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1266 }
1267 [(set_attr "type" "neon_add_widen")])
1268
1269 (define_insn "widen_usum<mode>3"
1270 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1271 (plus:<V_widen> (zero_extend:<V_widen>
1272 (match_operand:VW 1 "s_register_operand" "%w"))
1273 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1274 "TARGET_NEON"
1275 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1276 [(set_attr "type" "neon_add_widen")]
1277 )
1278
1279 ;; Helpers for quad-word reduction operations
1280
1281 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1282 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1283 ; N/2-element vector.
1284
1285 (define_insn "quad_halves_<code>v4si"
1286 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1287 (VQH_OPS:V2SI
1288 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1289 (parallel [(const_int 0) (const_int 1)]))
1290 (vec_select:V2SI (match_dup 1)
1291 (parallel [(const_int 2) (const_int 3)]))))]
1292 "TARGET_NEON"
1293 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1294 [(set_attr "vqh_mnem" "<VQH_mnem>")
1295 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1296 )
1297
1298 (define_insn "quad_halves_<code>v4sf"
1299 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1300 (VQHS_OPS:V2SF
1301 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1302 (parallel [(const_int 0) (const_int 1)]))
1303 (vec_select:V2SF (match_dup 1)
1304 (parallel [(const_int 2) (const_int 3)]))))]
1305 "TARGET_NEON && flag_unsafe_math_optimizations"
1306 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1307 [(set_attr "vqh_mnem" "<VQH_mnem>")
1308 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1309 )
1310
1311 (define_insn "quad_halves_<code>v8hi"
1312 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1313 (VQH_OPS:V4HI
1314 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1315 (parallel [(const_int 0) (const_int 1)
1316 (const_int 2) (const_int 3)]))
1317 (vec_select:V4HI (match_dup 1)
1318 (parallel [(const_int 4) (const_int 5)
1319 (const_int 6) (const_int 7)]))))]
1320 "TARGET_NEON"
1321 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1322 [(set_attr "vqh_mnem" "<VQH_mnem>")
1323 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1324 )
1325
1326 (define_insn "quad_halves_<code>v16qi"
1327 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1328 (VQH_OPS:V8QI
1329 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1330 (parallel [(const_int 0) (const_int 1)
1331 (const_int 2) (const_int 3)
1332 (const_int 4) (const_int 5)
1333 (const_int 6) (const_int 7)]))
1334 (vec_select:V8QI (match_dup 1)
1335 (parallel [(const_int 8) (const_int 9)
1336 (const_int 10) (const_int 11)
1337 (const_int 12) (const_int 13)
1338 (const_int 14) (const_int 15)]))))]
1339 "TARGET_NEON"
1340 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1341 [(set_attr "vqh_mnem" "<VQH_mnem>")
1342 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1343 )
1344
1345 (define_expand "move_hi_quad_<mode>"
1346 [(match_operand:ANY128 0 "s_register_operand")
1347 (match_operand:<V_HALF> 1 "s_register_operand")]
1348 "TARGET_NEON"
1349 {
1350 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1351 GET_MODE_SIZE (<V_HALF>mode)),
1352 operands[1]);
1353 DONE;
1354 })
1355
1356 (define_expand "move_lo_quad_<mode>"
1357 [(match_operand:ANY128 0 "s_register_operand")
1358 (match_operand:<V_HALF> 1 "s_register_operand")]
1359 "TARGET_NEON"
1360 {
1361 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1362 <MODE>mode, 0),
1363 operands[1]);
1364 DONE;
1365 })
1366
1367 ;; Reduction operations
1368
1369 (define_expand "reduc_plus_scal_<mode>"
1370 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1371 (match_operand:VD 1 "s_register_operand")]
1372 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1373 {
1374 rtx vec = gen_reg_rtx (<MODE>mode);
1375 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1376 &gen_neon_vpadd_internal<mode>);
1377 /* The same result is actually computed into every element. */
1378 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1379 DONE;
1380 })
1381
1382 (define_expand "reduc_plus_scal_<mode>"
1383 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1384 (match_operand:VQ 1 "s_register_operand")]
1385 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1386 && !BYTES_BIG_ENDIAN"
1387 {
1388 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1389
1390 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1391 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1392
1393 DONE;
1394 })
1395
1396 (define_expand "reduc_plus_scal_v2di"
1397 [(match_operand:DI 0 "nonimmediate_operand")
1398 (match_operand:V2DI 1 "s_register_operand")]
1399 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1400 {
1401 rtx vec = gen_reg_rtx (V2DImode);
1402
1403 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1404 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1405
1406 DONE;
1407 })
1408
1409 (define_insn "arm_reduc_plus_internal_v2di"
1410 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1411 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1412 UNSPEC_VPADD))]
1413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1414 "vadd.i64\t%e0, %e1, %f1"
1415 [(set_attr "type" "neon_add_q")]
1416 )
1417
1418 (define_expand "reduc_smin_scal_<mode>"
1419 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1420 (match_operand:VD 1 "s_register_operand")]
1421 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1422 {
1423 rtx vec = gen_reg_rtx (<MODE>mode);
1424
1425 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1426 &gen_neon_vpsmin<mode>);
1427 /* The result is computed into every element of the vector. */
1428 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1429 DONE;
1430 })
1431
1432 (define_expand "reduc_smin_scal_<mode>"
1433 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1434 (match_operand:VQ 1 "s_register_operand")]
1435 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1436 && !BYTES_BIG_ENDIAN"
1437 {
1438 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1439
1440 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1441 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1442
1443 DONE;
1444 })
1445
1446 (define_expand "reduc_smax_scal_<mode>"
1447 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1448 (match_operand:VD 1 "s_register_operand")]
1449 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1450 {
1451 rtx vec = gen_reg_rtx (<MODE>mode);
1452 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1453 &gen_neon_vpsmax<mode>);
1454 /* The result is computed into every element of the vector. */
1455 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1456 DONE;
1457 })
1458
1459 (define_expand "reduc_smax_scal_<mode>"
1460 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1461 (match_operand:VQ 1 "s_register_operand")]
1462 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1463 && !BYTES_BIG_ENDIAN"
1464 {
1465 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1466
1467 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1468 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1469
1470 DONE;
1471 })
1472
1473 (define_expand "reduc_umin_scal_<mode>"
1474 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1475 (match_operand:VDI 1 "s_register_operand")]
1476 "TARGET_NEON"
1477 {
1478 rtx vec = gen_reg_rtx (<MODE>mode);
1479 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1480 &gen_neon_vpumin<mode>);
1481 /* The result is computed into every element of the vector. */
1482 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1483 DONE;
1484 })
1485
1486 (define_expand "reduc_umin_scal_<mode>"
1487 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1488 (match_operand:VQI 1 "s_register_operand")]
1489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1490 {
1491 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1492
1493 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1494 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1495
1496 DONE;
1497 })
1498
1499 (define_expand "reduc_umax_scal_<mode>"
1500 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1501 (match_operand:VDI 1 "s_register_operand")]
1502 "TARGET_NEON"
1503 {
1504 rtx vec = gen_reg_rtx (<MODE>mode);
1505 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1506 &gen_neon_vpumax<mode>);
1507 /* The result is computed into every element of the vector. */
1508 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1509 DONE;
1510 })
1511
1512 (define_expand "reduc_umax_scal_<mode>"
1513 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1514 (match_operand:VQI 1 "s_register_operand")]
1515 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1516 {
1517 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1518
1519 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1520 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1521
1522 DONE;
1523 })
1524
1525 (define_insn "neon_vpadd_internal<mode>"
1526 [(set (match_operand:VD 0 "s_register_operand" "=w")
1527 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1528 (match_operand:VD 2 "s_register_operand" "w")]
1529 UNSPEC_VPADD))]
1530 "TARGET_NEON"
1531 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1532 ;; Assume this schedules like vadd.
1533 [(set (attr "type")
1534 (if_then_else (match_test "<Is_float_mode>")
1535 (const_string "neon_fp_reduc_add_s<q>")
1536 (const_string "neon_reduc_add<q>")))]
1537 )
1538
1539 (define_insn "neon_vpaddv4hf"
1540 [(set
1541 (match_operand:V4HF 0 "s_register_operand" "=w")
1542 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1543 (match_operand:V4HF 2 "s_register_operand" "w")]
1544 UNSPEC_VPADD))]
1545 "TARGET_NEON_FP16INST"
1546 "vpadd.f16\t%P0, %P1, %P2"
1547 [(set_attr "type" "neon_reduc_add")]
1548 )
1549
1550 (define_insn "neon_vpsmin<mode>"
1551 [(set (match_operand:VD 0 "s_register_operand" "=w")
1552 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1553 (match_operand:VD 2 "s_register_operand" "w")]
1554 UNSPEC_VPSMIN))]
1555 "TARGET_NEON"
1556 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1557 [(set (attr "type")
1558 (if_then_else (match_test "<Is_float_mode>")
1559 (const_string "neon_fp_reduc_minmax_s<q>")
1560 (const_string "neon_reduc_minmax<q>")))]
1561 )
1562
1563 (define_insn "neon_vpsmax<mode>"
1564 [(set (match_operand:VD 0 "s_register_operand" "=w")
1565 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1566 (match_operand:VD 2 "s_register_operand" "w")]
1567 UNSPEC_VPSMAX))]
1568 "TARGET_NEON"
1569 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1570 [(set (attr "type")
1571 (if_then_else (match_test "<Is_float_mode>")
1572 (const_string "neon_fp_reduc_minmax_s<q>")
1573 (const_string "neon_reduc_minmax<q>")))]
1574 )
1575
1576 (define_insn "neon_vpumin<mode>"
1577 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1578 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1579 (match_operand:VDI 2 "s_register_operand" "w")]
1580 UNSPEC_VPUMIN))]
1581 "TARGET_NEON"
1582 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1583 [(set_attr "type" "neon_reduc_minmax<q>")]
1584 )
1585
1586 (define_insn "neon_vpumax<mode>"
1587 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1588 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1589 (match_operand:VDI 2 "s_register_operand" "w")]
1590 UNSPEC_VPUMAX))]
1591 "TARGET_NEON"
1592 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1593 [(set_attr "type" "neon_reduc_minmax<q>")]
1594 )
1595
1596 ;; Saturating arithmetic
1597
1598 ; NOTE: Neon supports many more saturating variants of instructions than the
1599 ; following, but these are all GCC currently understands.
1600 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1601 ; yet either, although these patterns may be used by intrinsics when they're
1602 ; added.
1603
1604 (define_insn "*ss_add<mode>_neon"
1605 [(set (match_operand:VD 0 "s_register_operand" "=w")
1606 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1607 (match_operand:VD 2 "s_register_operand" "w")))]
1608 "TARGET_NEON"
1609 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1610 [(set_attr "type" "neon_qadd<q>")]
1611 )
1612
1613 (define_insn "*us_add<mode>_neon"
1614 [(set (match_operand:VD 0 "s_register_operand" "=w")
1615 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1616 (match_operand:VD 2 "s_register_operand" "w")))]
1617 "TARGET_NEON"
1618 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1619 [(set_attr "type" "neon_qadd<q>")]
1620 )
1621
1622 (define_insn "*ss_sub<mode>_neon"
1623 [(set (match_operand:VD 0 "s_register_operand" "=w")
1624 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1625 (match_operand:VD 2 "s_register_operand" "w")))]
1626 "TARGET_NEON"
1627 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1628 [(set_attr "type" "neon_qsub<q>")]
1629 )
1630
1631 (define_insn "*us_sub<mode>_neon"
1632 [(set (match_operand:VD 0 "s_register_operand" "=w")
1633 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1634 (match_operand:VD 2 "s_register_operand" "w")))]
1635 "TARGET_NEON"
1636 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1637 [(set_attr "type" "neon_qsub<q>")]
1638 )
1639
1640 ;; Conditional instructions. These are comparisons with conditional moves for
1641 ;; vectors. They perform the assignment:
1642 ;;
1643 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1644 ;;
1645 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1646 ;; element-wise.
1647
1648 (define_expand "vcond<mode><mode>"
1649 [(set (match_operand:VDQW 0 "s_register_operand")
1650 (if_then_else:VDQW
1651 (match_operator 3 "comparison_operator"
1652 [(match_operand:VDQW 4 "s_register_operand")
1653 (match_operand:VDQW 5 "nonmemory_operand")])
1654 (match_operand:VDQW 1 "s_register_operand")
1655 (match_operand:VDQW 2 "s_register_operand")))]
1656 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1657 {
1658 int inverse = 0;
1659 int use_zero_form = 0;
1660 int swap_bsl_operands = 0;
1661 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1662 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1663
1664 rtx (*base_comparison) (rtx, rtx, rtx);
1665 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1666
1667 switch (GET_CODE (operands[3]))
1668 {
1669 case GE:
1670 case GT:
1671 case LE:
1672 case LT:
1673 case EQ:
1674 if (operands[5] == CONST0_RTX (<MODE>mode))
1675 {
1676 use_zero_form = 1;
1677 break;
1678 }
1679 /* Fall through. */
1680 default:
1681 if (!REG_P (operands[5]))
1682 operands[5] = force_reg (<MODE>mode, operands[5]);
1683 }
1684
1685 switch (GET_CODE (operands[3]))
1686 {
1687 case LT:
1688 case UNLT:
1689 inverse = 1;
1690 /* Fall through. */
1691 case GE:
1692 case UNGE:
1693 case ORDERED:
1694 case UNORDERED:
1695 base_comparison = gen_neon_vcge<mode>;
1696 complimentary_comparison = gen_neon_vcgt<mode>;
1697 break;
1698 case LE:
1699 case UNLE:
1700 inverse = 1;
1701 /* Fall through. */
1702 case GT:
1703 case UNGT:
1704 base_comparison = gen_neon_vcgt<mode>;
1705 complimentary_comparison = gen_neon_vcge<mode>;
1706 break;
1707 case EQ:
1708 case NE:
1709 case UNEQ:
1710 base_comparison = gen_neon_vceq<mode>;
1711 complimentary_comparison = gen_neon_vceq<mode>;
1712 break;
1713 default:
1714 gcc_unreachable ();
1715 }
1716
1717 switch (GET_CODE (operands[3]))
1718 {
1719 case LT:
1720 case LE:
1721 case GT:
1722 case GE:
1723 case EQ:
1724 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1725 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1726 a GE b -> a GE b
1727 a GT b -> a GT b
1728 a LE b -> b GE a
1729 a LT b -> b GT a
1730 a EQ b -> a EQ b
1731 Note that there also exist direct comparison against 0 forms,
1732 so catch those as a special case. */
1733 if (use_zero_form)
1734 {
1735 inverse = 0;
1736 switch (GET_CODE (operands[3]))
1737 {
1738 case LT:
1739 base_comparison = gen_neon_vclt<mode>;
1740 break;
1741 case LE:
1742 base_comparison = gen_neon_vcle<mode>;
1743 break;
1744 default:
1745 /* Do nothing, other zero form cases already have the correct
1746 base_comparison. */
1747 break;
1748 }
1749 }
1750
1751 if (!inverse)
1752 emit_insn (base_comparison (mask, operands[4], operands[5]));
1753 else
1754 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1755 break;
1756 case UNLT:
1757 case UNLE:
1758 case UNGT:
1759 case UNGE:
1760 case NE:
1761 /* Vector compare returns false for lanes which are unordered, so if we use
1762 the inverse of the comparison we actually want to emit, then
1763 swap the operands to BSL, we will end up with the correct result.
1764 Note that a NE NaN and NaN NE b are true for all a, b.
1765
1766 Our transformations are:
1767 a GE b -> !(b GT a)
1768 a GT b -> !(b GE a)
1769 a LE b -> !(a GT b)
1770 a LT b -> !(a GE b)
1771 a NE b -> !(a EQ b) */
1772
1773 if (inverse)
1774 emit_insn (base_comparison (mask, operands[4], operands[5]));
1775 else
1776 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1777
1778 swap_bsl_operands = 1;
1779 break;
1780 case UNEQ:
1781 /* We check (a > b || b > a). combining these comparisons give us
1782 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1783 will then give us (a == b || a UNORDERED b) as intended. */
1784
1785 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1786 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1787 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1788 swap_bsl_operands = 1;
1789 break;
1790 case UNORDERED:
1791 /* Operands are ORDERED iff (a > b || b >= a).
1792 Swapping the operands to BSL will give the UNORDERED case. */
1793 swap_bsl_operands = 1;
1794 /* Fall through. */
1795 case ORDERED:
1796 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1797 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1798 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1799 break;
1800 default:
1801 gcc_unreachable ();
1802 }
1803
1804 if (swap_bsl_operands)
1805 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1806 operands[1]));
1807 else
1808 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1809 operands[2]));
1810 DONE;
1811 })
1812
1813 (define_expand "vcondu<mode><mode>"
1814 [(set (match_operand:VDQIW 0 "s_register_operand")
1815 (if_then_else:VDQIW
1816 (match_operator 3 "arm_comparison_operator"
1817 [(match_operand:VDQIW 4 "s_register_operand")
1818 (match_operand:VDQIW 5 "s_register_operand")])
1819 (match_operand:VDQIW 1 "s_register_operand")
1820 (match_operand:VDQIW 2 "s_register_operand")))]
1821 "TARGET_NEON"
1822 {
1823 rtx mask;
1824 int inverse = 0, immediate_zero = 0;
1825
1826 mask = gen_reg_rtx (<V_cmp_result>mode);
1827
1828 if (operands[5] == CONST0_RTX (<MODE>mode))
1829 immediate_zero = 1;
1830 else if (!REG_P (operands[5]))
1831 operands[5] = force_reg (<MODE>mode, operands[5]);
1832
1833 switch (GET_CODE (operands[3]))
1834 {
1835 case GEU:
1836 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1837 break;
1838
1839 case GTU:
1840 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1841 break;
1842
1843 case EQ:
1844 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1845 break;
1846
1847 case LEU:
1848 if (immediate_zero)
1849 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1850 else
1851 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1852 break;
1853
1854 case LTU:
1855 if (immediate_zero)
1856 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1857 else
1858 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1859 break;
1860
1861 case NE:
1862 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1863 inverse = 1;
1864 break;
1865
1866 default:
1867 gcc_unreachable ();
1868 }
1869
1870 if (inverse)
1871 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1872 operands[1]));
1873 else
1874 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1875 operands[2]));
1876
1877 DONE;
1878 })
1879
1880 ;; Patterns for builtins.
1881
1882 ; good for plain vadd, vaddq.
1883
1884 (define_expand "neon_vadd<mode>"
1885 [(match_operand:VCVTF 0 "s_register_operand")
1886 (match_operand:VCVTF 1 "s_register_operand")
1887 (match_operand:VCVTF 2 "s_register_operand")]
1888 "TARGET_NEON"
1889 {
1890 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1891 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1892 else
1893 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1894 operands[2]));
1895 DONE;
1896 })
1897
1898 (define_expand "neon_vadd<mode>"
1899 [(match_operand:VH 0 "s_register_operand")
1900 (match_operand:VH 1 "s_register_operand")
1901 (match_operand:VH 2 "s_register_operand")]
1902 "TARGET_NEON_FP16INST"
1903 {
1904 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
1905 DONE;
1906 })
1907
1908 (define_expand "neon_vsub<mode>"
1909 [(match_operand:VH 0 "s_register_operand")
1910 (match_operand:VH 1 "s_register_operand")
1911 (match_operand:VH 2 "s_register_operand")]
1912 "TARGET_NEON_FP16INST"
1913 {
1914 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
1915 DONE;
1916 })
1917
1918 ; Note that NEON operations don't support the full IEEE 754 standard: in
1919 ; particular, denormal values are flushed to zero. This means that GCC cannot
1920 ; use those instructions for autovectorization, etc. unless
1921 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1922 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1923 ; header) must work in either case: if -funsafe-math-optimizations is given,
1924 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1925 ; expand to unspecs (which may potentially limit the extent to which they might
1926 ; be optimized by generic code).
1927
1928 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1929
1930 (define_insn "neon_vadd<mode>_unspec"
1931 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1932 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1933 (match_operand:VCVTF 2 "s_register_operand" "w")]
1934 UNSPEC_VADD))]
1935 "TARGET_NEON"
1936 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1937 [(set (attr "type")
1938 (if_then_else (match_test "<Is_float_mode>")
1939 (const_string "neon_fp_addsub_s<q>")
1940 (const_string "neon_add<q>")))]
1941 )
1942
1943 (define_insn "neon_vaddl<sup><mode>"
1944 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1945 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1946 (match_operand:VDI 2 "s_register_operand" "w")]
1947 VADDL))]
1948 "TARGET_NEON"
1949 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1950 [(set_attr "type" "neon_add_long")]
1951 )
1952
1953 (define_insn "neon_vaddw<sup><mode>"
1954 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1955 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1956 (match_operand:VDI 2 "s_register_operand" "w")]
1957 VADDW))]
1958 "TARGET_NEON"
1959 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1960 [(set_attr "type" "neon_add_widen")]
1961 )
1962
1963 ; vhadd and vrhadd.
1964
1965 (define_insn "neon_v<r>hadd<sup><mode>"
1966 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1967 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1968 (match_operand:VDQIW 2 "s_register_operand" "w")]
1969 VHADD))]
1970 "TARGET_NEON"
1971 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1972 [(set_attr "type" "neon_add_halve_q")]
1973 )
1974
1975 (define_insn "neon_vqadd<sup><mode>"
1976 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1977 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1978 (match_operand:VDQIX 2 "s_register_operand" "w")]
1979 VQADD))]
1980 "TARGET_NEON"
1981 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1982 [(set_attr "type" "neon_qadd<q>")]
1983 )
1984
1985 (define_insn "neon_v<r>addhn<mode>"
1986 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1987 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1988 (match_operand:VN 2 "s_register_operand" "w")]
1989 VADDHN))]
1990 "TARGET_NEON"
1991 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1992 [(set_attr "type" "neon_add_halve_narrow_q")]
1993 )
1994
1995 ;; Polynomial and Float multiplication.
1996 (define_insn "neon_vmul<pf><mode>"
1997 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1998 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1999 (match_operand:VPF 2 "s_register_operand" "w")]
2000 UNSPEC_VMUL))]
2001 "TARGET_NEON"
2002 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2003 [(set (attr "type")
2004 (if_then_else (match_test "<Is_float_mode>")
2005 (const_string "neon_fp_mul_s<q>")
2006 (const_string "neon_mul_<V_elem_ch><q>")))]
2007 )
2008
2009 (define_insn "mul<mode>3"
2010 [(set
2011 (match_operand:VH 0 "s_register_operand" "=w")
2012 (mult:VH
2013 (match_operand:VH 1 "s_register_operand" "w")
2014 (match_operand:VH 2 "s_register_operand" "w")))]
2015 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2016 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2017 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2018 )
2019
2020 (define_insn "neon_vmulf<mode>"
2021 [(set
2022 (match_operand:VH 0 "s_register_operand" "=w")
2023 (mult:VH
2024 (match_operand:VH 1 "s_register_operand" "w")
2025 (match_operand:VH 2 "s_register_operand" "w")))]
2026 "TARGET_NEON_FP16INST"
2027 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2028 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2029 )
2030
2031 (define_expand "neon_vmla<mode>"
2032 [(match_operand:VDQW 0 "s_register_operand")
2033 (match_operand:VDQW 1 "s_register_operand")
2034 (match_operand:VDQW 2 "s_register_operand")
2035 (match_operand:VDQW 3 "s_register_operand")]
2036 "TARGET_NEON"
2037 {
2038 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2039 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2040 operands[2], operands[3]));
2041 else
2042 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2043 operands[2], operands[3]));
2044 DONE;
2045 })
2046
2047 (define_expand "neon_vfma<VCVTF:mode>"
2048 [(match_operand:VCVTF 0 "s_register_operand")
2049 (match_operand:VCVTF 1 "s_register_operand")
2050 (match_operand:VCVTF 2 "s_register_operand")
2051 (match_operand:VCVTF 3 "s_register_operand")]
2052 "TARGET_NEON && TARGET_FMA"
2053 {
2054 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2055 operands[1]));
2056 DONE;
2057 })
2058
2059 (define_expand "neon_vfma<VH:mode>"
2060 [(match_operand:VH 0 "s_register_operand")
2061 (match_operand:VH 1 "s_register_operand")
2062 (match_operand:VH 2 "s_register_operand")
2063 (match_operand:VH 3 "s_register_operand")]
2064 "TARGET_NEON_FP16INST"
2065 {
2066 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2067 operands[1]));
2068 DONE;
2069 })
2070
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand")
2073 (match_operand:VCVTF 1 "s_register_operand")
2074 (match_operand:VCVTF 2 "s_register_operand")
2075 (match_operand:VCVTF 3 "s_register_operand")]
2076 "TARGET_NEON && TARGET_FMA"
2077 {
2078 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2079 operands[1]));
2080 DONE;
2081 })
2082
2083 (define_expand "neon_vfms<VH:mode>"
2084 [(match_operand:VH 0 "s_register_operand")
2085 (match_operand:VH 1 "s_register_operand")
2086 (match_operand:VH 2 "s_register_operand")
2087 (match_operand:VH 3 "s_register_operand")]
2088 "TARGET_NEON_FP16INST"
2089 {
2090 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2091 operands[1]));
2092 DONE;
2093 })
2094
2095 ;; The expand RTL structure here is not important.
2096 ;; We use the gen_* functions anyway.
2097 ;; We just need something to wrap the iterators around.
2098
2099 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2100 [(set (match_operand:VCVTF 0 "s_register_operand")
2101 (unspec:VCVTF
2102 [(match_operand:VCVTF 1 "s_register_operand")
2103 (PLUSMINUS:<VFML>
2104 (match_operand:<VFML> 2 "s_register_operand")
2105 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2106 "TARGET_FP16FML"
2107 {
2108 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2109 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2110 operands[1],
2111 operands[2],
2112 operands[3],
2113 half, half));
2114 DONE;
2115 })
2116
2117 (define_insn "vfmal_low<mode>_intrinsic"
2118 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2119 (fma:VCVTF
2120 (float_extend:VCVTF
2121 (vec_select:<VFMLSEL>
2122 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2123 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2124 (float_extend:VCVTF
2125 (vec_select:<VFMLSEL>
2126 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2127 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2128 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2129 "TARGET_FP16FML"
2130 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2131 [(set_attr "type" "neon_fp_mla_s<q>")]
2132 )
2133
2134 (define_insn "vfmsl_high<mode>_intrinsic"
2135 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2136 (fma:VCVTF
2137 (float_extend:VCVTF
2138 (neg:<VFMLSEL>
2139 (vec_select:<VFMLSEL>
2140 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2141 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2142 (float_extend:VCVTF
2143 (vec_select:<VFMLSEL>
2144 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2145 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2146 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2147 "TARGET_FP16FML"
2148 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2149 [(set_attr "type" "neon_fp_mla_s<q>")]
2150 )
2151
2152 (define_insn "vfmal_high<mode>_intrinsic"
2153 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2154 (fma:VCVTF
2155 (float_extend:VCVTF
2156 (vec_select:<VFMLSEL>
2157 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2158 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2159 (float_extend:VCVTF
2160 (vec_select:<VFMLSEL>
2161 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2162 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2163 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2164 "TARGET_FP16FML"
2165 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2166 [(set_attr "type" "neon_fp_mla_s<q>")]
2167 )
2168
2169 (define_insn "vfmsl_low<mode>_intrinsic"
2170 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2171 (fma:VCVTF
2172 (float_extend:VCVTF
2173 (neg:<VFMLSEL>
2174 (vec_select:<VFMLSEL>
2175 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2176 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2177 (float_extend:VCVTF
2178 (vec_select:<VFMLSEL>
2179 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2180 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2181 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2182 "TARGET_FP16FML"
2183 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2184 [(set_attr "type" "neon_fp_mla_s<q>")]
2185 )
2186
2187 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2188 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2189 (unspec:VCVTF
2190 [(match_operand:VCVTF 1 "s_register_operand")
2191 (PLUSMINUS:<VFML>
2192 (match_operand:<VFML> 2 "s_register_operand")
2193 (match_operand:<VFML> 3 "s_register_operand"))
2194 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2195 "TARGET_FP16FML"
2196 {
2197 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2198 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2199 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2200 (operands[0], operands[1],
2201 operands[2], operands[3],
2202 half, lane));
2203 DONE;
2204 })
2205
2206 (define_insn "vfmal_lane_low<mode>_intrinsic"
2207 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2208 (fma:VCVTF
2209 (float_extend:VCVTF
2210 (vec_select:<VFMLSEL>
2211 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2212 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2213 (float_extend:VCVTF
2214 (vec_duplicate:<VFMLSEL>
2215 (vec_select:HF
2216 (match_operand:<VFML> 3 "s_register_operand" "x")
2217 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2218 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2219 "TARGET_FP16FML"
2220 {
2221 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2222 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2223 {
2224 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2225 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2226 }
2227 else
2228 {
2229 operands[5] = GEN_INT (lane);
2230 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2231 }
2232 }
2233 [(set_attr "type" "neon_fp_mla_s<q>")]
2234 )
2235
2236 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2237 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2238 (unspec:VCVTF
2239 [(match_operand:VCVTF 1 "s_register_operand")
2240 (PLUSMINUS:<VFML>
2241 (match_operand:<VFML> 2 "s_register_operand")
2242 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2243 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2244 "TARGET_FP16FML"
2245 {
2246 rtx lane
2247 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2248 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2249 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2250 (operands[0], operands[1], operands[2], operands[3],
2251 half, lane));
2252 DONE;
2253 })
2254
2255 ;; Used to implement the intrinsics:
2256 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2257 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2258 ;; Needs a bit of care to get the modes of the different sub-expressions right
2259 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2260 ;; S or D subregister to select the appropriate lane from.
2261
2262 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2263 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2264 (fma:VCVTF
2265 (float_extend:VCVTF
2266 (vec_select:<VFMLSEL>
2267 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2268 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2269 (float_extend:VCVTF
2270 (vec_duplicate:<VFMLSEL>
2271 (vec_select:HF
2272 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2273 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2274 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2275 "TARGET_FP16FML"
2276 {
2277 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2278 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2279 int new_lane = lane % elts_per_reg;
2280 int regdiff = lane / elts_per_reg;
2281 operands[5] = GEN_INT (new_lane);
2282 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2283 because we want the print_operand code to print the appropriate
2284 S or D register prefix. */
2285 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2286 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2287 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2288 }
2289 [(set_attr "type" "neon_fp_mla_s<q>")]
2290 )
2291
2292 ;; Used to implement the intrinsics:
2293 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2294 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2295 ;; Needs a bit of care to get the modes of the different sub-expressions right
2296 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2297 ;; S or D subregister to select the appropriate lane from.
2298
2299 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2300 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2301 (fma:VCVTF
2302 (float_extend:VCVTF
2303 (vec_select:<VFMLSEL>
2304 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2305 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2306 (float_extend:VCVTF
2307 (vec_duplicate:<VFMLSEL>
2308 (vec_select:HF
2309 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2310 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2311 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2312 "TARGET_FP16FML"
2313 {
2314 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2315 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2316 int new_lane = lane % elts_per_reg;
2317 int regdiff = lane / elts_per_reg;
2318 operands[5] = GEN_INT (new_lane);
2319 /* We re-create operands[3] in the halved VFMLSEL mode
2320 because we've calculated the correct half-width subreg to extract
2321 the lane from and we want to print *that* subreg instead. */
2322 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2323 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2324 }
2325 [(set_attr "type" "neon_fp_mla_s<q>")]
2326 )
2327
2328 (define_insn "vfmal_lane_high<mode>_intrinsic"
2329 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2330 (fma:VCVTF
2331 (float_extend:VCVTF
2332 (vec_select:<VFMLSEL>
2333 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2334 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2335 (float_extend:VCVTF
2336 (vec_duplicate:<VFMLSEL>
2337 (vec_select:HF
2338 (match_operand:<VFML> 3 "s_register_operand" "x")
2339 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2340 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2341 "TARGET_FP16FML"
2342 {
2343 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2344 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2345 {
2346 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2347 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2348 }
2349 else
2350 {
2351 operands[5] = GEN_INT (lane);
2352 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2353 }
2354 }
2355 [(set_attr "type" "neon_fp_mla_s<q>")]
2356 )
2357
2358 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2359 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2360 (fma:VCVTF
2361 (float_extend:VCVTF
2362 (neg:<VFMLSEL>
2363 (vec_select:<VFMLSEL>
2364 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2365 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2366 (float_extend:VCVTF
2367 (vec_duplicate:<VFMLSEL>
2368 (vec_select:HF
2369 (match_operand:<VFML> 3 "s_register_operand" "x")
2370 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2371 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2372 "TARGET_FP16FML"
2373 {
2374 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2375 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2376 {
2377 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2378 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2379 }
2380 else
2381 {
2382 operands[5] = GEN_INT (lane);
2383 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2384 }
2385 }
2386 [(set_attr "type" "neon_fp_mla_s<q>")]
2387 )
2388
2389 ;; Used to implement the intrinsics:
2390 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2391 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2392 ;; Needs a bit of care to get the modes of the different sub-expressions right
2393 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2394 ;; S or D subregister to select the appropriate lane from.
2395
2396 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2397 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2398 (fma:VCVTF
2399 (float_extend:VCVTF
2400 (neg:<VFMLSEL>
2401 (vec_select:<VFMLSEL>
2402 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2403 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2404 (float_extend:VCVTF
2405 (vec_duplicate:<VFMLSEL>
2406 (vec_select:HF
2407 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2408 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2409 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2410 "TARGET_FP16FML"
2411 {
2412 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2413 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2414 int new_lane = lane % elts_per_reg;
2415 int regdiff = lane / elts_per_reg;
2416 operands[5] = GEN_INT (new_lane);
2417 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2418 because we want the print_operand code to print the appropriate
2419 S or D register prefix. */
2420 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2421 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2422 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2423 }
2424 [(set_attr "type" "neon_fp_mla_s<q>")]
2425 )
2426
2427 ;; Used to implement the intrinsics:
2428 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2429 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2430 ;; Needs a bit of care to get the modes of the different sub-expressions right
2431 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2432 ;; S or D subregister to select the appropriate lane from.
2433
2434 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2435 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2436 (fma:VCVTF
2437 (float_extend:VCVTF
2438 (neg:<VFMLSEL>
2439 (vec_select:<VFMLSEL>
2440 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2441 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2442 (float_extend:VCVTF
2443 (vec_duplicate:<VFMLSEL>
2444 (vec_select:HF
2445 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2446 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2447 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2448 "TARGET_FP16FML"
2449 {
2450 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2451 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2452 int new_lane = lane % elts_per_reg;
2453 int regdiff = lane / elts_per_reg;
2454 operands[5] = GEN_INT (new_lane);
2455 /* We re-create operands[3] in the halved VFMLSEL mode
2456 because we've calculated the correct half-width subreg to extract
2457 the lane from and we want to print *that* subreg instead. */
2458 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2459 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2460 }
2461 [(set_attr "type" "neon_fp_mla_s<q>")]
2462 )
2463
2464 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2465 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2466 (fma:VCVTF
2467 (float_extend:VCVTF
2468 (neg:<VFMLSEL>
2469 (vec_select:<VFMLSEL>
2470 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2471 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2472 (float_extend:VCVTF
2473 (vec_duplicate:<VFMLSEL>
2474 (vec_select:HF
2475 (match_operand:<VFML> 3 "s_register_operand" "x")
2476 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2477 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2478 "TARGET_FP16FML"
2479 {
2480 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2481 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2482 {
2483 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2484 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2485 }
2486 else
2487 {
2488 operands[5] = GEN_INT (lane);
2489 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2490 }
2491 }
2492 [(set_attr "type" "neon_fp_mla_s<q>")]
2493 )
2494
2495 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2496
2497 (define_insn "neon_vmla<mode>_unspec"
2498 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2499 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2500 (match_operand:VDQW 2 "s_register_operand" "w")
2501 (match_operand:VDQW 3 "s_register_operand" "w")]
2502 UNSPEC_VMLA))]
2503 "TARGET_NEON"
2504 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2505 [(set (attr "type")
2506 (if_then_else (match_test "<Is_float_mode>")
2507 (const_string "neon_fp_mla_s<q>")
2508 (const_string "neon_mla_<V_elem_ch><q>")))]
2509 )
2510
2511 (define_insn "neon_vmlal<sup><mode>"
2512 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2513 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2514 (match_operand:VW 2 "s_register_operand" "w")
2515 (match_operand:VW 3 "s_register_operand" "w")]
2516 VMLAL))]
2517 "TARGET_NEON"
2518 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2519 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2520 )
2521
2522 (define_expand "neon_vmls<mode>"
2523 [(match_operand:VDQW 0 "s_register_operand")
2524 (match_operand:VDQW 1 "s_register_operand")
2525 (match_operand:VDQW 2 "s_register_operand")
2526 (match_operand:VDQW 3 "s_register_operand")]
2527 "TARGET_NEON"
2528 {
2529 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2530 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2531 operands[1], operands[2], operands[3]));
2532 else
2533 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2534 operands[2], operands[3]));
2535 DONE;
2536 })
2537
2538 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2539
2540 (define_insn "neon_vmls<mode>_unspec"
2541 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2542 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2543 (match_operand:VDQW 2 "s_register_operand" "w")
2544 (match_operand:VDQW 3 "s_register_operand" "w")]
2545 UNSPEC_VMLS))]
2546 "TARGET_NEON"
2547 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2548 [(set (attr "type")
2549 (if_then_else (match_test "<Is_float_mode>")
2550 (const_string "neon_fp_mla_s<q>")
2551 (const_string "neon_mla_<V_elem_ch><q>")))]
2552 )
2553
2554 (define_insn "neon_vmlsl<sup><mode>"
2555 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2556 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2557 (match_operand:VW 2 "s_register_operand" "w")
2558 (match_operand:VW 3 "s_register_operand" "w")]
2559 VMLSL))]
2560 "TARGET_NEON"
2561 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2562 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2563 )
2564
2565 ;; vqdmulh, vqrdmulh
2566 (define_insn "neon_vq<r>dmulh<mode>"
2567 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2568 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2569 (match_operand:VMDQI 2 "s_register_operand" "w")]
2570 VQDMULH))]
2571 "TARGET_NEON"
2572 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2573 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2574 )
2575
2576 ;; vqrdmlah, vqrdmlsh
2577 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2578 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2579 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2580 (match_operand:VMDQI 2 "s_register_operand" "w")
2581 (match_operand:VMDQI 3 "s_register_operand" "w")]
2582 VQRDMLH_AS))]
2583 "TARGET_NEON_RDMA"
2584 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2585 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2586 )
2587
2588 (define_insn "neon_vqdmlal<mode>"
2589 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2590 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2591 (match_operand:VMDI 2 "s_register_operand" "w")
2592 (match_operand:VMDI 3 "s_register_operand" "w")]
2593 UNSPEC_VQDMLAL))]
2594 "TARGET_NEON"
2595 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2596 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2597 )
2598
2599 (define_insn "neon_vqdmlsl<mode>"
2600 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2601 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2602 (match_operand:VMDI 2 "s_register_operand" "w")
2603 (match_operand:VMDI 3 "s_register_operand" "w")]
2604 UNSPEC_VQDMLSL))]
2605 "TARGET_NEON"
2606 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2607 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2608 )
2609
2610 (define_insn "neon_vmull<sup><mode>"
2611 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2612 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2613 (match_operand:VW 2 "s_register_operand" "w")]
2614 VMULL))]
2615 "TARGET_NEON"
2616 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2617 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2618 )
2619
2620 (define_insn "neon_vqdmull<mode>"
2621 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2622 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2623 (match_operand:VMDI 2 "s_register_operand" "w")]
2624 UNSPEC_VQDMULL))]
2625 "TARGET_NEON"
2626 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2627 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2628 )
2629
2630 (define_expand "neon_vsub<mode>"
2631 [(match_operand:VCVTF 0 "s_register_operand")
2632 (match_operand:VCVTF 1 "s_register_operand")
2633 (match_operand:VCVTF 2 "s_register_operand")]
2634 "TARGET_NEON"
2635 {
2636 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2637 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2638 else
2639 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2640 operands[2]));
2641 DONE;
2642 })
2643
2644 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2645
2646 (define_insn "neon_vsub<mode>_unspec"
2647 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2648 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2649 (match_operand:VCVTF 2 "s_register_operand" "w")]
2650 UNSPEC_VSUB))]
2651 "TARGET_NEON"
2652 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2653 [(set (attr "type")
2654 (if_then_else (match_test "<Is_float_mode>")
2655 (const_string "neon_fp_addsub_s<q>")
2656 (const_string "neon_sub<q>")))]
2657 )
2658
2659 (define_insn "neon_vsubl<sup><mode>"
2660 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2661 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2662 (match_operand:VDI 2 "s_register_operand" "w")]
2663 VSUBL))]
2664 "TARGET_NEON"
2665 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2666 [(set_attr "type" "neon_sub_long")]
2667 )
2668
2669 (define_insn "neon_vsubw<sup><mode>"
2670 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2671 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2672 (match_operand:VDI 2 "s_register_operand" "w")]
2673 VSUBW))]
2674 "TARGET_NEON"
2675 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2676 [(set_attr "type" "neon_sub_widen")]
2677 )
2678
2679 (define_insn "neon_vqsub<sup><mode>"
2680 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2681 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2682 (match_operand:VDQIX 2 "s_register_operand" "w")]
2683 VQSUB))]
2684 "TARGET_NEON"
2685 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2686 [(set_attr "type" "neon_qsub<q>")]
2687 )
2688
2689 (define_insn "neon_vhsub<sup><mode>"
2690 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2691 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2692 (match_operand:VDQIW 2 "s_register_operand" "w")]
2693 VHSUB))]
2694 "TARGET_NEON"
2695 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2696 [(set_attr "type" "neon_sub_halve<q>")]
2697 )
2698
2699 (define_insn "neon_v<r>subhn<mode>"
2700 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2701 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2702 (match_operand:VN 2 "s_register_operand" "w")]
2703 VSUBHN))]
2704 "TARGET_NEON"
2705 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2706 [(set_attr "type" "neon_sub_halve_narrow_q")]
2707 )
2708
2709 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2710 ;; without unsafe math optimizations.
2711 (define_expand "neon_vc<cmp_op><mode>"
2712 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2713 (neg:<V_cmp_result>
2714 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2715 (match_operand:VDQW 2 "reg_or_zero_operand")))]
2716 "TARGET_NEON"
2717 {
2718 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2719 are enabled. */
2720 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2721 && !flag_unsafe_math_optimizations)
2722 {
2723 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2724 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2725 whereas this expander iterates over the integer modes as well,
2726 but we will never expand to UNSPECs for the integer comparisons. */
2727 switch (<MODE>mode)
2728 {
2729 case E_V2SFmode:
2730 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2731 operands[1],
2732 operands[2]));
2733 break;
2734 case E_V4SFmode:
2735 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2736 operands[1],
2737 operands[2]));
2738 break;
2739 default:
2740 gcc_unreachable ();
2741 }
2742 }
2743 else
2744 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2745 operands[1],
2746 operands[2]));
2747 DONE;
2748 }
2749 )
2750
2751 (define_insn "neon_vc<cmp_op><mode>_insn"
2752 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2753 (neg:<V_cmp_result>
2754 (COMPARISONS:<V_cmp_result>
2755 (match_operand:VDQW 1 "s_register_operand" "w,w")
2756 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2757 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2758 && !flag_unsafe_math_optimizations)"
2759 {
2760 char pattern[100];
2761 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2762 " %%<V_reg>1, %s",
2763 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2764 ? "f" : "<cmp_type>",
2765 which_alternative == 0
2766 ? "%<V_reg>2" : "#0");
2767 output_asm_insn (pattern, operands);
2768 return "";
2769 }
2770 [(set (attr "type")
2771 (if_then_else (match_operand 2 "zero_operand")
2772 (const_string "neon_compare_zero<q>")
2773 (const_string "neon_compare<q>")))]
2774 )
2775
2776 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2777 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2778 (unspec:<V_cmp_result>
2779 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2780 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2781 NEON_VCMP))]
2782 "TARGET_NEON"
2783 {
2784 char pattern[100];
2785 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2786 " %%<V_reg>1, %s",
2787 which_alternative == 0
2788 ? "%<V_reg>2" : "#0");
2789 output_asm_insn (pattern, operands);
2790 return "";
2791 }
2792 [(set_attr "type" "neon_fp_compare_s<q>")]
2793 )
2794
2795 (define_expand "neon_vc<cmp_op><mode>"
2796 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2797 (neg:<V_cmp_result>
2798 (COMPARISONS:VH
2799 (match_operand:VH 1 "s_register_operand")
2800 (match_operand:VH 2 "reg_or_zero_operand")))]
2801 "TARGET_NEON_FP16INST"
2802 {
2803 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2804 are enabled. */
2805 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2806 && !flag_unsafe_math_optimizations)
2807 emit_insn
2808 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2809 (operands[0], operands[1], operands[2]));
2810 else
2811 emit_insn
2812 (gen_neon_vc<cmp_op><mode>_fp16insn
2813 (operands[0], operands[1], operands[2]));
2814 DONE;
2815 })
2816
2817 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2818 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2819 (neg:<V_cmp_result>
2820 (COMPARISONS:<V_cmp_result>
2821 (match_operand:VH 1 "s_register_operand" "w,w")
2822 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2823 "TARGET_NEON_FP16INST
2824 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2825 && !flag_unsafe_math_optimizations)"
2826 {
2827 char pattern[100];
2828 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2829 " %%<V_reg>1, %s",
2830 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2831 ? "f" : "<cmp_type>",
2832 which_alternative == 0
2833 ? "%<V_reg>2" : "#0");
2834 output_asm_insn (pattern, operands);
2835 return "";
2836 }
2837 [(set (attr "type")
2838 (if_then_else (match_operand 2 "zero_operand")
2839 (const_string "neon_compare_zero<q>")
2840 (const_string "neon_compare<q>")))])
2841
2842 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2843 [(set
2844 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2845 (unspec:<V_cmp_result>
2846 [(match_operand:VH 1 "s_register_operand" "w,w")
2847 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2848 NEON_VCMP))]
2849 "TARGET_NEON_FP16INST"
2850 {
2851 char pattern[100];
2852 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2853 " %%<V_reg>1, %s",
2854 which_alternative == 0
2855 ? "%<V_reg>2" : "#0");
2856 output_asm_insn (pattern, operands);
2857 return "";
2858 }
2859 [(set_attr "type" "neon_fp_compare_s<q>")])
2860
2861 (define_insn "neon_vc<cmp_op>u<mode>"
2862 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2863 (neg:<V_cmp_result>
2864 (GTUGEU:<V_cmp_result>
2865 (match_operand:VDQIW 1 "s_register_operand" "w")
2866 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2867 "TARGET_NEON"
2868 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2869 [(set_attr "type" "neon_compare<q>")]
2870 )
2871
2872 (define_expand "neon_vca<cmp_op><mode>"
2873 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2874 (neg:<V_cmp_result>
2875 (GTGE:<V_cmp_result>
2876 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2877 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2878 "TARGET_NEON"
2879 {
2880 if (flag_unsafe_math_optimizations)
2881 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2882 operands[2]));
2883 else
2884 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2885 operands[1],
2886 operands[2]));
2887 DONE;
2888 }
2889 )
2890
2891 (define_insn "neon_vca<cmp_op><mode>_insn"
2892 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2893 (neg:<V_cmp_result>
2894 (GTGE:<V_cmp_result>
2895 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2896 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2897 "TARGET_NEON && flag_unsafe_math_optimizations"
2898 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2899 [(set_attr "type" "neon_fp_compare_s<q>")]
2900 )
2901
2902 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2903 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2904 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2905 (match_operand:VCVTF 2 "s_register_operand" "w")]
2906 NEON_VACMP))]
2907 "TARGET_NEON"
2908 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2909 [(set_attr "type" "neon_fp_compare_s<q>")]
2910 )
2911
2912 (define_expand "neon_vca<cmp_op><mode>"
2913 [(set
2914 (match_operand:<V_cmp_result> 0 "s_register_operand")
2915 (neg:<V_cmp_result>
2916 (GLTE:<V_cmp_result>
2917 (abs:VH (match_operand:VH 1 "s_register_operand"))
2918 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2919 "TARGET_NEON_FP16INST"
2920 {
2921 if (flag_unsafe_math_optimizations)
2922 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2923 (operands[0], operands[1], operands[2]));
2924 else
2925 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2926 (operands[0], operands[1], operands[2]));
2927 DONE;
2928 })
2929
2930 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2931 [(set
2932 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2933 (neg:<V_cmp_result>
2934 (GLTE:<V_cmp_result>
2935 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2936 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2937 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2938 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2939 [(set_attr "type" "neon_fp_compare_s<q>")]
2940 )
2941
2942 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2943 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2944 (unspec:<V_cmp_result>
2945 [(match_operand:VH 1 "s_register_operand" "w")
2946 (match_operand:VH 2 "s_register_operand" "w")]
2947 NEON_VAGLTE))]
2948 "TARGET_NEON"
2949 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2950 [(set_attr "type" "neon_fp_compare_s<q>")]
2951 )
2952
2953 (define_expand "neon_vc<cmp_op>z<mode>"
2954 [(set
2955 (match_operand:<V_cmp_result> 0 "s_register_operand")
2956 (COMPARISONS:<V_cmp_result>
2957 (match_operand:VH 1 "s_register_operand")
2958 (const_int 0)))]
2959 "TARGET_NEON_FP16INST"
2960 {
2961 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2962 CONST0_RTX (<MODE>mode)));
2963 DONE;
2964 })
2965
2966 (define_insn "neon_vtst<mode>"
2967 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2968 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2969 (match_operand:VDQIW 2 "s_register_operand" "w")]
2970 UNSPEC_VTST))]
2971 "TARGET_NEON"
2972 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2973 [(set_attr "type" "neon_tst<q>")]
2974 )
2975
2976 (define_insn "neon_vabd<sup><mode>"
2977 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2978 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2979 (match_operand:VDQIW 2 "s_register_operand" "w")]
2980 VABD))]
2981 "TARGET_NEON"
2982 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2983 [(set_attr "type" "neon_abd<q>")]
2984 )
2985
2986 (define_insn "neon_vabd<mode>"
2987 [(set (match_operand:VH 0 "s_register_operand" "=w")
2988 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2989 (match_operand:VH 2 "s_register_operand" "w")]
2990 UNSPEC_VABD_F))]
2991 "TARGET_NEON_FP16INST"
2992 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2993 [(set_attr "type" "neon_abd<q>")]
2994 )
2995
2996 (define_insn "neon_vabdf<mode>"
2997 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2998 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2999 (match_operand:VCVTF 2 "s_register_operand" "w")]
3000 UNSPEC_VABD_F))]
3001 "TARGET_NEON"
3002 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3003 [(set_attr "type" "neon_fp_abd_s<q>")]
3004 )
3005
3006 (define_insn "neon_vabdl<sup><mode>"
3007 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3008 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3009 (match_operand:VW 2 "s_register_operand" "w")]
3010 VABDL))]
3011 "TARGET_NEON"
3012 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3013 [(set_attr "type" "neon_abd_long")]
3014 )
3015
3016 (define_insn "neon_vaba<sup><mode>"
3017 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3018 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3019 (match_operand:VDQIW 3 "s_register_operand" "w")]
3020 VABD)
3021 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3022 "TARGET_NEON"
3023 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3024 [(set_attr "type" "neon_arith_acc<q>")]
3025 )
3026
3027 (define_insn "neon_vabal<sup><mode>"
3028 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3029 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3030 (match_operand:VW 3 "s_register_operand" "w")]
3031 VABDL)
3032 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3033 "TARGET_NEON"
3034 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3035 [(set_attr "type" "neon_arith_acc<q>")]
3036 )
3037
3038 (define_expand "<sup>sadv16qi"
3039 [(use (match_operand:V4SI 0 "register_operand"))
3040 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
3041 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
3042 (use (match_operand:V4SI 3 "register_operand"))]
3043 "TARGET_NEON"
3044 {
3045 rtx reduc = gen_reg_rtx (V8HImode);
3046 rtx op1_highpart = gen_reg_rtx (V8QImode);
3047 rtx op2_highpart = gen_reg_rtx (V8QImode);
3048
3049 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
3050 gen_lowpart (V8QImode, operands[1]),
3051 gen_lowpart (V8QImode, operands[2])));
3052
3053 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
3054 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
3055 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
3056 op1_highpart, op2_highpart));
3057 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
3058
3059 emit_move_insn (operands[0], operands[3]);
3060 DONE;
3061 }
3062 )
3063
3064 (define_insn "neon_v<maxmin><sup><mode>"
3065 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3066 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3067 (match_operand:VDQIW 2 "s_register_operand" "w")]
3068 VMAXMIN))]
3069 "TARGET_NEON"
3070 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3071 [(set_attr "type" "neon_minmax<q>")]
3072 )
3073
3074 (define_insn "neon_v<maxmin>f<mode>"
3075 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3076 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3077 (match_operand:VCVTF 2 "s_register_operand" "w")]
3078 VMAXMINF))]
3079 "TARGET_NEON"
3080 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3081 [(set_attr "type" "neon_fp_minmax_s<q>")]
3082 )
3083
3084 (define_insn "neon_v<maxmin>f<mode>"
3085 [(set (match_operand:VH 0 "s_register_operand" "=w")
3086 (unspec:VH
3087 [(match_operand:VH 1 "s_register_operand" "w")
3088 (match_operand:VH 2 "s_register_operand" "w")]
3089 VMAXMINF))]
3090 "TARGET_NEON_FP16INST"
3091 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3092 [(set_attr "type" "neon_fp_minmax_s<q>")]
3093 )
3094
3095 (define_insn "neon_vp<maxmin>fv4hf"
3096 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3097 (unspec:V4HF
3098 [(match_operand:V4HF 1 "s_register_operand" "w")
3099 (match_operand:V4HF 2 "s_register_operand" "w")]
3100 VPMAXMINF))]
3101 "TARGET_NEON_FP16INST"
3102 "vp<maxmin>.f16\t%P0, %P1, %P2"
3103 [(set_attr "type" "neon_reduc_minmax")]
3104 )
3105
3106 (define_insn "neon_<fmaxmin_op><mode>"
3107 [(set
3108 (match_operand:VH 0 "s_register_operand" "=w")
3109 (unspec:VH
3110 [(match_operand:VH 1 "s_register_operand" "w")
3111 (match_operand:VH 2 "s_register_operand" "w")]
3112 VMAXMINFNM))]
3113 "TARGET_NEON_FP16INST"
3114 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3115 [(set_attr "type" "neon_fp_minmax_s<q>")]
3116 )
3117
3118 ;; v<maxmin>nm intrinsics.
3119 (define_insn "neon_<fmaxmin_op><mode>"
3120 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3121 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3122 (match_operand:VCVTF 2 "s_register_operand" "w")]
3123 VMAXMINFNM))]
3124 "TARGET_NEON && TARGET_VFP5"
3125 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3126 [(set_attr "type" "neon_fp_minmax_s<q>")]
3127 )
3128
3129 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3130 (define_insn "<fmaxmin><mode>3"
3131 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3132 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3133 (match_operand:VCVTF 2 "s_register_operand" "w")]
3134 VMAXMINFNM))]
3135 "TARGET_NEON && TARGET_VFP5"
3136 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137 [(set_attr "type" "neon_fp_minmax_s<q>")]
3138 )
3139
3140 (define_expand "neon_vpadd<mode>"
3141 [(match_operand:VD 0 "s_register_operand")
3142 (match_operand:VD 1 "s_register_operand")
3143 (match_operand:VD 2 "s_register_operand")]
3144 "TARGET_NEON"
3145 {
3146 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3147 operands[2]));
3148 DONE;
3149 })
3150
3151 (define_insn "neon_vpaddl<sup><mode>"
3152 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3153 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3154 VPADDL))]
3155 "TARGET_NEON"
3156 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3157 [(set_attr "type" "neon_reduc_add_long")]
3158 )
3159
3160 (define_insn "neon_vpadal<sup><mode>"
3161 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3162 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3163 (match_operand:VDQIW 2 "s_register_operand" "w")]
3164 VPADAL))]
3165 "TARGET_NEON"
3166 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3167 [(set_attr "type" "neon_reduc_add_acc")]
3168 )
3169
3170 (define_insn "neon_vp<maxmin><sup><mode>"
3171 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3172 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3173 (match_operand:VDI 2 "s_register_operand" "w")]
3174 VPMAXMIN))]
3175 "TARGET_NEON"
3176 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3177 [(set_attr "type" "neon_reduc_minmax<q>")]
3178 )
3179
3180 (define_insn "neon_vp<maxmin>f<mode>"
3181 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3182 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3183 (match_operand:VCVTF 2 "s_register_operand" "w")]
3184 VPMAXMINF))]
3185 "TARGET_NEON"
3186 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3187 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3188 )
3189
3190 (define_insn "neon_vrecps<mode>"
3191 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3192 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3193 (match_operand:VCVTF 2 "s_register_operand" "w")]
3194 UNSPEC_VRECPS))]
3195 "TARGET_NEON"
3196 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3197 [(set_attr "type" "neon_fp_recps_s<q>")]
3198 )
3199
3200 (define_insn "neon_vrecps<mode>"
3201 [(set
3202 (match_operand:VH 0 "s_register_operand" "=w")
3203 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3204 (match_operand:VH 2 "s_register_operand" "w")]
3205 UNSPEC_VRECPS))]
3206 "TARGET_NEON_FP16INST"
3207 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3208 [(set_attr "type" "neon_fp_recps_s<q>")]
3209 )
3210
3211 (define_insn "neon_vrsqrts<mode>"
3212 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3213 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3214 (match_operand:VCVTF 2 "s_register_operand" "w")]
3215 UNSPEC_VRSQRTS))]
3216 "TARGET_NEON"
3217 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3218 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3219 )
3220
3221 (define_insn "neon_vrsqrts<mode>"
3222 [(set
3223 (match_operand:VH 0 "s_register_operand" "=w")
3224 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3225 (match_operand:VH 2 "s_register_operand" "w")]
3226 UNSPEC_VRSQRTS))]
3227 "TARGET_NEON_FP16INST"
3228 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3229 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3230 )
3231
3232 (define_expand "neon_vabs<mode>"
3233 [(match_operand:VDQW 0 "s_register_operand")
3234 (match_operand:VDQW 1 "s_register_operand")]
3235 "TARGET_NEON"
3236 {
3237 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3238 DONE;
3239 })
3240
3241 (define_insn "neon_vqabs<mode>"
3242 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3243 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3244 UNSPEC_VQABS))]
3245 "TARGET_NEON"
3246 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3247 [(set_attr "type" "neon_qabs<q>")]
3248 )
3249
3250 (define_insn "neon_bswap<mode>"
3251 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3252 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3253 "TARGET_NEON"
3254 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3255 [(set_attr "type" "neon_rev<q>")]
3256 )
3257
3258 (define_expand "neon_vneg<mode>"
3259 [(match_operand:VDQW 0 "s_register_operand")
3260 (match_operand:VDQW 1 "s_register_operand")]
3261 "TARGET_NEON"
3262 {
3263 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3264 DONE;
3265 })
3266
3267
3268 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3269 ;; fact that their usage need to guarantee that the source vectors are
3270 ;; contiguous. It would be wrong to describe the operation without being able
3271 ;; to describe the permute that is also required, but even if that is done
3272 ;; the permute would have been created as a LOAD_LANES which means the values
3273 ;; in the registers are in the wrong order.
3274 (define_insn "neon_vcadd<rot><mode>"
3275 [(set (match_operand:VF 0 "register_operand" "=w")
3276 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3277 (match_operand:VF 2 "register_operand" "w")]
3278 VCADD))]
3279 "TARGET_COMPLEX"
3280 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3281 [(set_attr "type" "neon_fcadd")]
3282 )
3283
3284 (define_insn "neon_vcmla<rot><mode>"
3285 [(set (match_operand:VF 0 "register_operand" "=w")
3286 (plus:VF (match_operand:VF 1 "register_operand" "0")
3287 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3288 (match_operand:VF 3 "register_operand" "w")]
3289 VCMLA)))]
3290 "TARGET_COMPLEX"
3291 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3292 [(set_attr "type" "neon_fcmla")]
3293 )
3294
3295 (define_insn "neon_vcmla_lane<rot><mode>"
3296 [(set (match_operand:VF 0 "s_register_operand" "=w")
3297 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3298 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3299 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3300 (match_operand:SI 4 "const_int_operand" "n")]
3301 VCMLA)))]
3302 "TARGET_COMPLEX"
3303 {
3304 operands = neon_vcmla_lane_prepare_operands (operands);
3305 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3306 }
3307 [(set_attr "type" "neon_fcmla")]
3308 )
3309
3310 (define_insn "neon_vcmla_laneq<rot><mode>"
3311 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3312 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3313 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3314 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3315 (match_operand:SI 4 "const_int_operand" "n")]
3316 VCMLA)))]
3317 "TARGET_COMPLEX"
3318 {
3319 operands = neon_vcmla_lane_prepare_operands (operands);
3320 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3321 }
3322 [(set_attr "type" "neon_fcmla")]
3323 )
3324
3325 (define_insn "neon_vcmlaq_lane<rot><mode>"
3326 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3327 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3328 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3329 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3330 (match_operand:SI 4 "const_int_operand" "n")]
3331 VCMLA)))]
3332 "TARGET_COMPLEX"
3333 {
3334 operands = neon_vcmla_lane_prepare_operands (operands);
3335 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3336 }
3337 [(set_attr "type" "neon_fcmla")]
3338 )
3339
3340
3341 ;; These instructions map to the __builtins for the Dot Product operations.
3342 (define_insn "neon_<sup>dot<vsi2qi>"
3343 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3344 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3345 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3346 "register_operand" "w")
3347 (match_operand:<VSI2QI> 3
3348 "register_operand" "w")]
3349 DOTPROD)))]
3350 "TARGET_DOTPROD"
3351 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3352 [(set_attr "type" "neon_dot<q>")]
3353 )
3354
3355 ;; These instructions map to the __builtins for the Dot Product
3356 ;; indexed operations.
3357 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3358 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3359 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3360 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3361 "register_operand" "w")
3362 (match_operand:V8QI 3 "register_operand" "t")
3363 (match_operand:SI 4 "immediate_operand" "i")]
3364 DOTPROD)))]
3365 "TARGET_DOTPROD"
3366 {
3367 operands[4]
3368 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3369 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3370 }
3371 [(set_attr "type" "neon_dot<q>")]
3372 )
3373
3374 ;; These expands map to the Dot Product optab the vectorizer checks for.
3375 ;; The auto-vectorizer expects a dot product builtin that also does an
3376 ;; accumulation into the provided register.
3377 ;; Given the following pattern
3378 ;;
3379 ;; for (i=0; i<len; i++) {
3380 ;; c = a[i] * b[i];
3381 ;; r += c;
3382 ;; }
3383 ;; return result;
3384 ;;
3385 ;; This can be auto-vectorized to
3386 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3387 ;;
3388 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3389 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3390 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3391 ;; ...
3392 ;;
3393 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3394 (define_expand "<sup>dot_prod<vsi2qi>"
3395 [(set (match_operand:VCVTI 0 "register_operand")
3396 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3397 "register_operand")
3398 (match_operand:<VSI2QI> 2
3399 "register_operand")]
3400 DOTPROD)
3401 (match_operand:VCVTI 3 "register_operand")))]
3402 "TARGET_DOTPROD"
3403 {
3404 emit_insn (
3405 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3406 operands[2]));
3407 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3408 DONE;
3409 })
3410
3411 (define_expand "neon_copysignf<mode>"
3412 [(match_operand:VCVTF 0 "register_operand")
3413 (match_operand:VCVTF 1 "register_operand")
3414 (match_operand:VCVTF 2 "register_operand")]
3415 "TARGET_NEON"
3416 "{
3417 rtx v_bitmask_cast;
3418 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3419 rtx c = gen_int_mode (0x80000000, SImode);
3420
3421 emit_move_insn (v_bitmask,
3422 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3423 emit_move_insn (operands[0], operands[2]);
3424 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3425 <VCVTF:V_cmp_result>mode, 0);
3426 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3427 operands[1]));
3428
3429 DONE;
3430 }"
3431 )
3432
3433 (define_insn "neon_vqneg<mode>"
3434 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3435 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3436 UNSPEC_VQNEG))]
3437 "TARGET_NEON"
3438 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3439 [(set_attr "type" "neon_qneg<q>")]
3440 )
3441
3442 (define_insn "neon_vcls<mode>"
3443 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3444 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3445 UNSPEC_VCLS))]
3446 "TARGET_NEON"
3447 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3448 [(set_attr "type" "neon_cls<q>")]
3449 )
3450
3451 (define_insn "clz<mode>2"
3452 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3453 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3454 "TARGET_NEON"
3455 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3456 [(set_attr "type" "neon_cnt<q>")]
3457 )
3458
3459 (define_expand "neon_vclz<mode>"
3460 [(match_operand:VDQIW 0 "s_register_operand")
3461 (match_operand:VDQIW 1 "s_register_operand")]
3462 "TARGET_NEON"
3463 {
3464 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3465 DONE;
3466 })
3467
3468 (define_insn "popcount<mode>2"
3469 [(set (match_operand:VE 0 "s_register_operand" "=w")
3470 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3471 "TARGET_NEON"
3472 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3473 [(set_attr "type" "neon_cnt<q>")]
3474 )
3475
3476 (define_expand "neon_vcnt<mode>"
3477 [(match_operand:VE 0 "s_register_operand")
3478 (match_operand:VE 1 "s_register_operand")]
3479 "TARGET_NEON"
3480 {
3481 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3482 DONE;
3483 })
3484
3485 (define_insn "neon_vrecpe<mode>"
3486 [(set (match_operand:VH 0 "s_register_operand" "=w")
3487 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3488 UNSPEC_VRECPE))]
3489 "TARGET_NEON_FP16INST"
3490 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3491 [(set_attr "type" "neon_fp_recpe_s<q>")]
3492 )
3493
3494 (define_insn "neon_vrecpe<mode>"
3495 [(set (match_operand:V32 0 "s_register_operand" "=w")
3496 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3497 UNSPEC_VRECPE))]
3498 "TARGET_NEON"
3499 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3500 [(set_attr "type" "neon_fp_recpe_s<q>")]
3501 )
3502
3503 (define_insn "neon_vrsqrte<mode>"
3504 [(set (match_operand:V32 0 "s_register_operand" "=w")
3505 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3506 UNSPEC_VRSQRTE))]
3507 "TARGET_NEON"
3508 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3509 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3510 )
3511
3512 (define_expand "neon_vmvn<mode>"
3513 [(match_operand:VDQIW 0 "s_register_operand")
3514 (match_operand:VDQIW 1 "s_register_operand")]
3515 "TARGET_NEON"
3516 {
3517 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3518 DONE;
3519 })
3520
3521 (define_insn "neon_vget_lane<mode>_sext_internal"
3522 [(set (match_operand:SI 0 "s_register_operand" "=r")
3523 (sign_extend:SI
3524 (vec_select:<V_elem>
3525 (match_operand:VD 1 "s_register_operand" "w")
3526 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3527 "TARGET_NEON"
3528 {
3529 if (BYTES_BIG_ENDIAN)
3530 {
3531 int elt = INTVAL (operands[2]);
3532 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3533 operands[2] = GEN_INT (elt);
3534 }
3535 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3536 }
3537 [(set_attr "type" "neon_to_gp")]
3538 )
3539
3540 (define_insn "neon_vget_lane<mode>_zext_internal"
3541 [(set (match_operand:SI 0 "s_register_operand" "=r")
3542 (zero_extend:SI
3543 (vec_select:<V_elem>
3544 (match_operand:VD 1 "s_register_operand" "w")
3545 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3546 "TARGET_NEON"
3547 {
3548 if (BYTES_BIG_ENDIAN)
3549 {
3550 int elt = INTVAL (operands[2]);
3551 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3552 operands[2] = GEN_INT (elt);
3553 }
3554 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3555 }
3556 [(set_attr "type" "neon_to_gp")]
3557 )
3558
3559 (define_insn "neon_vget_lane<mode>_sext_internal"
3560 [(set (match_operand:SI 0 "s_register_operand" "=r")
3561 (sign_extend:SI
3562 (vec_select:<V_elem>
3563 (match_operand:VQ2 1 "s_register_operand" "w")
3564 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3565 "TARGET_NEON"
3566 {
3567 rtx ops[3];
3568 int regno = REGNO (operands[1]);
3569 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3570 unsigned int elt = INTVAL (operands[2]);
3571 unsigned int elt_adj = elt % halfelts;
3572
3573 if (BYTES_BIG_ENDIAN)
3574 elt_adj = halfelts - 1 - elt_adj;
3575
3576 ops[0] = operands[0];
3577 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3578 ops[2] = GEN_INT (elt_adj);
3579 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3580
3581 return "";
3582 }
3583 [(set_attr "type" "neon_to_gp_q")]
3584 )
3585
3586 (define_insn "neon_vget_lane<mode>_zext_internal"
3587 [(set (match_operand:SI 0 "s_register_operand" "=r")
3588 (zero_extend:SI
3589 (vec_select:<V_elem>
3590 (match_operand:VQ2 1 "s_register_operand" "w")
3591 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3592 "TARGET_NEON"
3593 {
3594 rtx ops[3];
3595 int regno = REGNO (operands[1]);
3596 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3597 unsigned int elt = INTVAL (operands[2]);
3598 unsigned int elt_adj = elt % halfelts;
3599
3600 if (BYTES_BIG_ENDIAN)
3601 elt_adj = halfelts - 1 - elt_adj;
3602
3603 ops[0] = operands[0];
3604 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3605 ops[2] = GEN_INT (elt_adj);
3606 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3607
3608 return "";
3609 }
3610 [(set_attr "type" "neon_to_gp_q")]
3611 )
3612
3613 (define_expand "neon_vget_lane<mode>"
3614 [(match_operand:<V_ext> 0 "s_register_operand")
3615 (match_operand:VDQW 1 "s_register_operand")
3616 (match_operand:SI 2 "immediate_operand")]
3617 "TARGET_NEON"
3618 {
3619 if (BYTES_BIG_ENDIAN)
3620 {
3621 /* The intrinsics are defined in terms of a model where the
3622 element ordering in memory is vldm order, whereas the generic
3623 RTL is defined in terms of a model where the element ordering
3624 in memory is array order. Convert the lane number to conform
3625 to this model. */
3626 unsigned int elt = INTVAL (operands[2]);
3627 unsigned int reg_nelts
3628 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3629 elt ^= reg_nelts - 1;
3630 operands[2] = GEN_INT (elt);
3631 }
3632
3633 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3634 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3635 operands[2]));
3636 else
3637 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3638 operands[1],
3639 operands[2]));
3640 DONE;
3641 })
3642
3643 (define_expand "neon_vget_laneu<mode>"
3644 [(match_operand:<V_ext> 0 "s_register_operand")
3645 (match_operand:VDQIW 1 "s_register_operand")
3646 (match_operand:SI 2 "immediate_operand")]
3647 "TARGET_NEON"
3648 {
3649 if (BYTES_BIG_ENDIAN)
3650 {
3651 /* The intrinsics are defined in terms of a model where the
3652 element ordering in memory is vldm order, whereas the generic
3653 RTL is defined in terms of a model where the element ordering
3654 in memory is array order. Convert the lane number to conform
3655 to this model. */
3656 unsigned int elt = INTVAL (operands[2]);
3657 unsigned int reg_nelts
3658 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3659 elt ^= reg_nelts - 1;
3660 operands[2] = GEN_INT (elt);
3661 }
3662
3663 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3664 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3665 operands[2]));
3666 else
3667 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3668 operands[1],
3669 operands[2]));
3670 DONE;
3671 })
3672
3673 (define_expand "neon_vget_lanedi"
3674 [(match_operand:DI 0 "s_register_operand")
3675 (match_operand:DI 1 "s_register_operand")
3676 (match_operand:SI 2 "immediate_operand")]
3677 "TARGET_NEON"
3678 {
3679 emit_move_insn (operands[0], operands[1]);
3680 DONE;
3681 })
3682
3683 (define_expand "neon_vget_lanev2di"
3684 [(match_operand:DI 0 "s_register_operand")
3685 (match_operand:V2DI 1 "s_register_operand")
3686 (match_operand:SI 2 "immediate_operand")]
3687 "TARGET_NEON"
3688 {
3689 int lane;
3690
3691 if (BYTES_BIG_ENDIAN)
3692 {
3693 /* The intrinsics are defined in terms of a model where the
3694 element ordering in memory is vldm order, whereas the generic
3695 RTL is defined in terms of a model where the element ordering
3696 in memory is array order. Convert the lane number to conform
3697 to this model. */
3698 unsigned int elt = INTVAL (operands[2]);
3699 unsigned int reg_nelts = 2;
3700 elt ^= reg_nelts - 1;
3701 operands[2] = GEN_INT (elt);
3702 }
3703
3704 lane = INTVAL (operands[2]);
3705 gcc_assert ((lane ==0) || (lane == 1));
3706 emit_move_insn (operands[0], lane == 0
3707 ? gen_lowpart (DImode, operands[1])
3708 : gen_highpart (DImode, operands[1]));
3709 DONE;
3710 })
3711
3712 (define_expand "neon_vset_lane<mode>"
3713 [(match_operand:VDQ 0 "s_register_operand")
3714 (match_operand:<V_elem> 1 "s_register_operand")
3715 (match_operand:VDQ 2 "s_register_operand")
3716 (match_operand:SI 3 "immediate_operand")]
3717 "TARGET_NEON"
3718 {
3719 unsigned int elt = INTVAL (operands[3]);
3720
3721 if (BYTES_BIG_ENDIAN)
3722 {
3723 unsigned int reg_nelts
3724 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3725 elt ^= reg_nelts - 1;
3726 }
3727
3728 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3729 GEN_INT (1 << elt), operands[2]));
3730 DONE;
3731 })
3732
3733 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3734
3735 (define_expand "neon_vset_lanedi"
3736 [(match_operand:DI 0 "s_register_operand")
3737 (match_operand:DI 1 "s_register_operand")
3738 (match_operand:DI 2 "s_register_operand")
3739 (match_operand:SI 3 "immediate_operand")]
3740 "TARGET_NEON"
3741 {
3742 emit_move_insn (operands[0], operands[1]);
3743 DONE;
3744 })
3745
3746 (define_expand "neon_vcreate<mode>"
3747 [(match_operand:VD_RE 0 "s_register_operand")
3748 (match_operand:DI 1 "general_operand")]
3749 "TARGET_NEON"
3750 {
3751 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3752 emit_move_insn (operands[0], src);
3753 DONE;
3754 })
3755
3756 (define_insn "neon_vdup_n<mode>"
3757 [(set (match_operand:VX 0 "s_register_operand" "=w")
3758 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3759 "TARGET_NEON"
3760 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3761 [(set_attr "type" "neon_from_gp<q>")]
3762 )
3763
3764 (define_insn "neon_vdup_nv4hf"
3765 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3766 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3767 "TARGET_NEON"
3768 "vdup.16\t%P0, %1"
3769 [(set_attr "type" "neon_from_gp")]
3770 )
3771
3772 (define_insn "neon_vdup_nv8hf"
3773 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3774 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3775 "TARGET_NEON"
3776 "vdup.16\t%q0, %1"
3777 [(set_attr "type" "neon_from_gp_q")]
3778 )
3779
3780 (define_insn "neon_vdup_n<mode>"
3781 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3782 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3783 "TARGET_NEON"
3784 "@
3785 vdup.<V_sz_elem>\t%<V_reg>0, %1
3786 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3787 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3788 )
3789
3790 (define_expand "neon_vdup_ndi"
3791 [(match_operand:DI 0 "s_register_operand")
3792 (match_operand:DI 1 "s_register_operand")]
3793 "TARGET_NEON"
3794 {
3795 emit_move_insn (operands[0], operands[1]);
3796 DONE;
3797 }
3798 )
3799
3800 (define_insn "neon_vdup_nv2di"
3801 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3802 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3803 "TARGET_NEON"
3804 "@
3805 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3806 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3807 [(set_attr "length" "8")
3808 (set_attr "type" "multiple")]
3809 )
3810
3811 (define_insn "neon_vdup_lane<mode>_internal"
3812 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3813 (vec_duplicate:VDQW
3814 (vec_select:<V_elem>
3815 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3816 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3817 "TARGET_NEON"
3818 {
3819 if (BYTES_BIG_ENDIAN)
3820 {
3821 int elt = INTVAL (operands[2]);
3822 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3823 operands[2] = GEN_INT (elt);
3824 }
3825 if (<Is_d_reg>)
3826 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3827 else
3828 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3829 }
3830 [(set_attr "type" "neon_dup<q>")]
3831 )
3832
3833 (define_insn "neon_vdup_lane<mode>_internal"
3834 [(set (match_operand:VH 0 "s_register_operand" "=w")
3835 (vec_duplicate:VH
3836 (vec_select:<V_elem>
3837 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3838 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3839 "TARGET_NEON && TARGET_FP16"
3840 {
3841 if (BYTES_BIG_ENDIAN)
3842 {
3843 int elt = INTVAL (operands[2]);
3844 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3845 operands[2] = GEN_INT (elt);
3846 }
3847 if (<Is_d_reg>)
3848 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3849 else
3850 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3851 }
3852 [(set_attr "type" "neon_dup<q>")]
3853 )
3854
3855 (define_expand "neon_vdup_lane<mode>"
3856 [(match_operand:VDQW 0 "s_register_operand")
3857 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3858 (match_operand:SI 2 "immediate_operand")]
3859 "TARGET_NEON"
3860 {
3861 if (BYTES_BIG_ENDIAN)
3862 {
3863 unsigned int elt = INTVAL (operands[2]);
3864 unsigned int reg_nelts
3865 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3866 elt ^= reg_nelts - 1;
3867 operands[2] = GEN_INT (elt);
3868 }
3869 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3870 operands[2]));
3871 DONE;
3872 })
3873
3874 (define_expand "neon_vdup_lane<mode>"
3875 [(match_operand:VH 0 "s_register_operand")
3876 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3877 (match_operand:SI 2 "immediate_operand")]
3878 "TARGET_NEON && TARGET_FP16"
3879 {
3880 if (BYTES_BIG_ENDIAN)
3881 {
3882 unsigned int elt = INTVAL (operands[2]);
3883 unsigned int reg_nelts
3884 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3885 elt ^= reg_nelts - 1;
3886 operands[2] = GEN_INT (elt);
3887 }
3888 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3889 operands[2]));
3890 DONE;
3891 })
3892
3893 ; Scalar index is ignored, since only zero is valid here.
3894 (define_expand "neon_vdup_lanedi"
3895 [(match_operand:DI 0 "s_register_operand")
3896 (match_operand:DI 1 "s_register_operand")
3897 (match_operand:SI 2 "immediate_operand")]
3898 "TARGET_NEON"
3899 {
3900 emit_move_insn (operands[0], operands[1]);
3901 DONE;
3902 })
3903
3904 ; Likewise for v2di, as the DImode second operand has only a single element.
3905 (define_expand "neon_vdup_lanev2di"
3906 [(match_operand:V2DI 0 "s_register_operand")
3907 (match_operand:DI 1 "s_register_operand")
3908 (match_operand:SI 2 "immediate_operand")]
3909 "TARGET_NEON"
3910 {
3911 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3912 DONE;
3913 })
3914
3915 ; Disabled before reload because we don't want combine doing something silly,
3916 ; but used by the post-reload expansion of neon_vcombine.
3917 (define_insn "*neon_vswp<mode>"
3918 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3919 (match_operand:VDQX 1 "s_register_operand" "+w"))
3920 (set (match_dup 1) (match_dup 0))]
3921 "TARGET_NEON && reload_completed"
3922 "vswp\t%<V_reg>0, %<V_reg>1"
3923 [(set_attr "type" "neon_permute<q>")]
3924 )
3925
3926 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3927 ;; dest vector.
3928 ;; FIXME: A different implementation of this builtin could make it much
3929 ;; more likely that we wouldn't actually need to output anything (we could make
3930 ;; it so that the reg allocator puts things in the right places magically
3931 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3932
3933 (define_insn_and_split "neon_vcombine<mode>"
3934 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3935 (vec_concat:<V_DOUBLE>
3936 (match_operand:VDX 1 "s_register_operand" "w")
3937 (match_operand:VDX 2 "s_register_operand" "w")))]
3938 "TARGET_NEON"
3939 "#"
3940 "&& reload_completed"
3941 [(const_int 0)]
3942 {
3943 neon_split_vcombine (operands);
3944 DONE;
3945 }
3946 [(set_attr "type" "multiple")]
3947 )
3948
3949 (define_expand "neon_vget_high<mode>"
3950 [(match_operand:<V_HALF> 0 "s_register_operand")
3951 (match_operand:VQX 1 "s_register_operand")]
3952 "TARGET_NEON"
3953 {
3954 emit_move_insn (operands[0],
3955 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3956 GET_MODE_SIZE (<V_HALF>mode)));
3957 DONE;
3958 })
3959
3960 (define_expand "neon_vget_low<mode>"
3961 [(match_operand:<V_HALF> 0 "s_register_operand")
3962 (match_operand:VQX 1 "s_register_operand")]
3963 "TARGET_NEON"
3964 {
3965 emit_move_insn (operands[0],
3966 simplify_gen_subreg (<V_HALF>mode, operands[1],
3967 <MODE>mode, 0));
3968 DONE;
3969 })
3970
3971 (define_insn "float<mode><V_cvtto>2"
3972 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3973 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3974 "TARGET_NEON && !flag_rounding_math"
3975 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3976 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3977 )
3978
3979 (define_insn "floatuns<mode><V_cvtto>2"
3980 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3981 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3982 "TARGET_NEON && !flag_rounding_math"
3983 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3984 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3985 )
3986
3987 (define_insn "fix_trunc<mode><V_cvtto>2"
3988 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3989 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3990 "TARGET_NEON"
3991 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3992 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3993 )
3994
3995 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3996 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3997 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3998 "TARGET_NEON"
3999 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4000 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4001 )
4002
4003 (define_insn "neon_vcvt<sup><mode>"
4004 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4005 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4006 VCVT_US))]
4007 "TARGET_NEON"
4008 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4009 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4010 )
4011
4012 (define_insn "neon_vcvt<sup><mode>"
4013 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4014 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4015 VCVT_US))]
4016 "TARGET_NEON"
4017 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4018 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4019 )
4020
4021 (define_insn "neon_vcvtv4sfv4hf"
4022 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4023 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4024 UNSPEC_VCVT))]
4025 "TARGET_NEON && TARGET_FP16"
4026 "vcvt.f32.f16\t%q0, %P1"
4027 [(set_attr "type" "neon_fp_cvt_widen_h")]
4028 )
4029
4030 (define_insn "neon_vcvtv4hfv4sf"
4031 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4032 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4033 UNSPEC_VCVT))]
4034 "TARGET_NEON && TARGET_FP16"
4035 "vcvt.f16.f32\t%P0, %q1"
4036 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4037 )
4038
4039 (define_insn "neon_vcvt<sup><mode>"
4040 [(set
4041 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4042 (unspec:<VH_CVTTO>
4043 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4044 VCVT_US))]
4045 "TARGET_NEON_FP16INST"
4046 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4047 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4048 )
4049
4050 (define_insn "neon_vcvt<sup><mode>"
4051 [(set
4052 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4053 (unspec:<VH_CVTTO>
4054 [(match_operand:VH 1 "s_register_operand" "w")]
4055 VCVT_US))]
4056 "TARGET_NEON_FP16INST"
4057 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4058 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4059 )
4060
4061 (define_insn "neon_vcvt<sup>_n<mode>"
4062 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4063 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4064 (match_operand:SI 2 "immediate_operand" "i")]
4065 VCVT_US_N))]
4066 "TARGET_NEON"
4067 {
4068 arm_const_bounds (operands[2], 1, 33);
4069 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4070 }
4071 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4072 )
4073
4074 (define_insn "neon_vcvt<sup>_n<mode>"
4075 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4076 (unspec:<VH_CVTTO>
4077 [(match_operand:VH 1 "s_register_operand" "w")
4078 (match_operand:SI 2 "immediate_operand" "i")]
4079 VCVT_US_N))]
4080 "TARGET_NEON_FP16INST"
4081 {
4082 arm_const_bounds (operands[2], 0, 17);
4083 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4084 }
4085 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4086 )
4087
4088 (define_insn "neon_vcvt<sup>_n<mode>"
4089 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4090 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4091 (match_operand:SI 2 "immediate_operand" "i")]
4092 VCVT_US_N))]
4093 "TARGET_NEON"
4094 {
4095 arm_const_bounds (operands[2], 1, 33);
4096 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4097 }
4098 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4099 )
4100
4101 (define_insn "neon_vcvt<sup>_n<mode>"
4102 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4103 (unspec:<VH_CVTTO>
4104 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4105 (match_operand:SI 2 "immediate_operand" "i")]
4106 VCVT_US_N))]
4107 "TARGET_NEON_FP16INST"
4108 {
4109 arm_const_bounds (operands[2], 0, 17);
4110 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4111 }
4112 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4113 )
4114
4115 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4116 [(set
4117 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4118 (unspec:<VH_CVTTO>
4119 [(match_operand:VH 1 "s_register_operand" "w")]
4120 VCVT_HF_US))]
4121 "TARGET_NEON_FP16INST"
4122 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4123 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4124 )
4125
4126 (define_insn "neon_vmovn<mode>"
4127 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4128 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4129 UNSPEC_VMOVN))]
4130 "TARGET_NEON"
4131 "vmovn.<V_if_elem>\t%P0, %q1"
4132 [(set_attr "type" "neon_shift_imm_narrow_q")]
4133 )
4134
4135 (define_insn "neon_vqmovn<sup><mode>"
4136 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4137 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4138 VQMOVN))]
4139 "TARGET_NEON"
4140 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4141 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4142 )
4143
4144 (define_insn "neon_vqmovun<mode>"
4145 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4146 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4147 UNSPEC_VQMOVUN))]
4148 "TARGET_NEON"
4149 "vqmovun.<V_s_elem>\t%P0, %q1"
4150 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4151 )
4152
4153 (define_insn "neon_vmovl<sup><mode>"
4154 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4155 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4156 VMOVL))]
4157 "TARGET_NEON"
4158 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4159 [(set_attr "type" "neon_shift_imm_long")]
4160 )
4161
4162 (define_insn "neon_vmul_lane<mode>"
4163 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4164 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4165 (match_operand:VMD 2 "s_register_operand"
4166 "<scalar_mul_constraint>")
4167 (match_operand:SI 3 "immediate_operand" "i")]
4168 UNSPEC_VMUL_LANE))]
4169 "TARGET_NEON"
4170 {
4171 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4172 }
4173 [(set (attr "type")
4174 (if_then_else (match_test "<Is_float_mode>")
4175 (const_string "neon_fp_mul_s_scalar<q>")
4176 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4177 )
4178
4179 (define_insn "neon_vmul_lane<mode>"
4180 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4181 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4182 (match_operand:<V_HALF> 2 "s_register_operand"
4183 "<scalar_mul_constraint>")
4184 (match_operand:SI 3 "immediate_operand" "i")]
4185 UNSPEC_VMUL_LANE))]
4186 "TARGET_NEON"
4187 {
4188 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4189 }
4190 [(set (attr "type")
4191 (if_then_else (match_test "<Is_float_mode>")
4192 (const_string "neon_fp_mul_s_scalar<q>")
4193 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4194 )
4195
4196 (define_insn "neon_vmul_lane<mode>"
4197 [(set (match_operand:VH 0 "s_register_operand" "=w")
4198 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4199 (match_operand:V4HF 2 "s_register_operand"
4200 "<scalar_mul_constraint>")
4201 (match_operand:SI 3 "immediate_operand" "i")]
4202 UNSPEC_VMUL_LANE))]
4203 "TARGET_NEON_FP16INST"
4204 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4205 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4206 )
4207
4208 (define_insn "neon_vmull<sup>_lane<mode>"
4209 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4210 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4211 (match_operand:VMDI 2 "s_register_operand"
4212 "<scalar_mul_constraint>")
4213 (match_operand:SI 3 "immediate_operand" "i")]
4214 VMULL_LANE))]
4215 "TARGET_NEON"
4216 {
4217 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4218 }
4219 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4220 )
4221
4222 (define_insn "neon_vqdmull_lane<mode>"
4223 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4224 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4225 (match_operand:VMDI 2 "s_register_operand"
4226 "<scalar_mul_constraint>")
4227 (match_operand:SI 3 "immediate_operand" "i")]
4228 UNSPEC_VQDMULL_LANE))]
4229 "TARGET_NEON"
4230 {
4231 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4232 }
4233 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4234 )
4235
4236 (define_insn "neon_vq<r>dmulh_lane<mode>"
4237 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4238 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4239 (match_operand:<V_HALF> 2 "s_register_operand"
4240 "<scalar_mul_constraint>")
4241 (match_operand:SI 3 "immediate_operand" "i")]
4242 VQDMULH_LANE))]
4243 "TARGET_NEON"
4244 {
4245 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4246 }
4247 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4248 )
4249
4250 (define_insn "neon_vq<r>dmulh_lane<mode>"
4251 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4252 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4253 (match_operand:VMDI 2 "s_register_operand"
4254 "<scalar_mul_constraint>")
4255 (match_operand:SI 3 "immediate_operand" "i")]
4256 VQDMULH_LANE))]
4257 "TARGET_NEON"
4258 {
4259 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4260 }
4261 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4262 )
4263
4264 ;; vqrdmlah_lane, vqrdmlsh_lane
4265 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4266 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4267 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4268 (match_operand:VMQI 2 "s_register_operand" "w")
4269 (match_operand:<V_HALF> 3 "s_register_operand"
4270 "<scalar_mul_constraint>")
4271 (match_operand:SI 4 "immediate_operand" "i")]
4272 VQRDMLH_AS))]
4273 "TARGET_NEON_RDMA"
4274 {
4275 return
4276 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4277 }
4278 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4279 )
4280
4281 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4282 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4283 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4284 (match_operand:VMDI 2 "s_register_operand" "w")
4285 (match_operand:VMDI 3 "s_register_operand"
4286 "<scalar_mul_constraint>")
4287 (match_operand:SI 4 "immediate_operand" "i")]
4288 VQRDMLH_AS))]
4289 "TARGET_NEON_RDMA"
4290 {
4291 return
4292 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4293 }
4294 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4295 )
4296
4297 (define_insn "neon_vmla_lane<mode>"
4298 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4299 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4300 (match_operand:VMD 2 "s_register_operand" "w")
4301 (match_operand:VMD 3 "s_register_operand"
4302 "<scalar_mul_constraint>")
4303 (match_operand:SI 4 "immediate_operand" "i")]
4304 UNSPEC_VMLA_LANE))]
4305 "TARGET_NEON"
4306 {
4307 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4308 }
4309 [(set (attr "type")
4310 (if_then_else (match_test "<Is_float_mode>")
4311 (const_string "neon_fp_mla_s_scalar<q>")
4312 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4313 )
4314
4315 (define_insn "neon_vmla_lane<mode>"
4316 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4317 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4318 (match_operand:VMQ 2 "s_register_operand" "w")
4319 (match_operand:<V_HALF> 3 "s_register_operand"
4320 "<scalar_mul_constraint>")
4321 (match_operand:SI 4 "immediate_operand" "i")]
4322 UNSPEC_VMLA_LANE))]
4323 "TARGET_NEON"
4324 {
4325 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4326 }
4327 [(set (attr "type")
4328 (if_then_else (match_test "<Is_float_mode>")
4329 (const_string "neon_fp_mla_s_scalar<q>")
4330 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4331 )
4332
4333 (define_insn "neon_vmlal<sup>_lane<mode>"
4334 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4335 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4336 (match_operand:VMDI 2 "s_register_operand" "w")
4337 (match_operand:VMDI 3 "s_register_operand"
4338 "<scalar_mul_constraint>")
4339 (match_operand:SI 4 "immediate_operand" "i")]
4340 VMLAL_LANE))]
4341 "TARGET_NEON"
4342 {
4343 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4344 }
4345 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4346 )
4347
4348 (define_insn "neon_vqdmlal_lane<mode>"
4349 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4350 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4351 (match_operand:VMDI 2 "s_register_operand" "w")
4352 (match_operand:VMDI 3 "s_register_operand"
4353 "<scalar_mul_constraint>")
4354 (match_operand:SI 4 "immediate_operand" "i")]
4355 UNSPEC_VQDMLAL_LANE))]
4356 "TARGET_NEON"
4357 {
4358 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4359 }
4360 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4361 )
4362
4363 (define_insn "neon_vmls_lane<mode>"
4364 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4365 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4366 (match_operand:VMD 2 "s_register_operand" "w")
4367 (match_operand:VMD 3 "s_register_operand"
4368 "<scalar_mul_constraint>")
4369 (match_operand:SI 4 "immediate_operand" "i")]
4370 UNSPEC_VMLS_LANE))]
4371 "TARGET_NEON"
4372 {
4373 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4374 }
4375 [(set (attr "type")
4376 (if_then_else (match_test "<Is_float_mode>")
4377 (const_string "neon_fp_mla_s_scalar<q>")
4378 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4379 )
4380
4381 (define_insn "neon_vmls_lane<mode>"
4382 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4383 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4384 (match_operand:VMQ 2 "s_register_operand" "w")
4385 (match_operand:<V_HALF> 3 "s_register_operand"
4386 "<scalar_mul_constraint>")
4387 (match_operand:SI 4 "immediate_operand" "i")]
4388 UNSPEC_VMLS_LANE))]
4389 "TARGET_NEON"
4390 {
4391 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4392 }
4393 [(set (attr "type")
4394 (if_then_else (match_test "<Is_float_mode>")
4395 (const_string "neon_fp_mla_s_scalar<q>")
4396 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4397 )
4398
4399 (define_insn "neon_vmlsl<sup>_lane<mode>"
4400 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4401 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4402 (match_operand:VMDI 2 "s_register_operand" "w")
4403 (match_operand:VMDI 3 "s_register_operand"
4404 "<scalar_mul_constraint>")
4405 (match_operand:SI 4 "immediate_operand" "i")]
4406 VMLSL_LANE))]
4407 "TARGET_NEON"
4408 {
4409 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4410 }
4411 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4412 )
4413
4414 (define_insn "neon_vqdmlsl_lane<mode>"
4415 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4416 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4417 (match_operand:VMDI 2 "s_register_operand" "w")
4418 (match_operand:VMDI 3 "s_register_operand"
4419 "<scalar_mul_constraint>")
4420 (match_operand:SI 4 "immediate_operand" "i")]
4421 UNSPEC_VQDMLSL_LANE))]
4422 "TARGET_NEON"
4423 {
4424 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4425 }
4426 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4427 )
4428
4429 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4430 ; core register into a temp register, then use a scalar taken from that. This
4431 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4432 ; or extracted from another vector. The latter case it's currently better to
4433 ; use the "_lane" variant, and the former case can probably be implemented
4434 ; using vld1_lane, but that hasn't been done yet.
4435
4436 (define_expand "neon_vmul_n<mode>"
4437 [(match_operand:VMD 0 "s_register_operand")
4438 (match_operand:VMD 1 "s_register_operand")
4439 (match_operand:<V_elem> 2 "s_register_operand")]
4440 "TARGET_NEON"
4441 {
4442 rtx tmp = gen_reg_rtx (<MODE>mode);
4443 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4444 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4445 const0_rtx));
4446 DONE;
4447 })
4448
4449 (define_expand "neon_vmul_n<mode>"
4450 [(match_operand:VMQ 0 "s_register_operand")
4451 (match_operand:VMQ 1 "s_register_operand")
4452 (match_operand:<V_elem> 2 "s_register_operand")]
4453 "TARGET_NEON"
4454 {
4455 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4456 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4457 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4458 const0_rtx));
4459 DONE;
4460 })
4461
4462 (define_expand "neon_vmul_n<mode>"
4463 [(match_operand:VH 0 "s_register_operand")
4464 (match_operand:VH 1 "s_register_operand")
4465 (match_operand:<V_elem> 2 "s_register_operand")]
4466 "TARGET_NEON_FP16INST"
4467 {
4468 rtx tmp = gen_reg_rtx (V4HFmode);
4469 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4470 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4471 const0_rtx));
4472 DONE;
4473 })
4474
4475 (define_expand "neon_vmulls_n<mode>"
4476 [(match_operand:<V_widen> 0 "s_register_operand")
4477 (match_operand:VMDI 1 "s_register_operand")
4478 (match_operand:<V_elem> 2 "s_register_operand")]
4479 "TARGET_NEON"
4480 {
4481 rtx tmp = gen_reg_rtx (<MODE>mode);
4482 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4483 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4484 const0_rtx));
4485 DONE;
4486 })
4487
4488 (define_expand "neon_vmullu_n<mode>"
4489 [(match_operand:<V_widen> 0 "s_register_operand")
4490 (match_operand:VMDI 1 "s_register_operand")
4491 (match_operand:<V_elem> 2 "s_register_operand")]
4492 "TARGET_NEON"
4493 {
4494 rtx tmp = gen_reg_rtx (<MODE>mode);
4495 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4496 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4497 const0_rtx));
4498 DONE;
4499 })
4500
4501 (define_expand "neon_vqdmull_n<mode>"
4502 [(match_operand:<V_widen> 0 "s_register_operand")
4503 (match_operand:VMDI 1 "s_register_operand")
4504 (match_operand:<V_elem> 2 "s_register_operand")]
4505 "TARGET_NEON"
4506 {
4507 rtx tmp = gen_reg_rtx (<MODE>mode);
4508 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4509 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4510 const0_rtx));
4511 DONE;
4512 })
4513
4514 (define_expand "neon_vqdmulh_n<mode>"
4515 [(match_operand:VMDI 0 "s_register_operand")
4516 (match_operand:VMDI 1 "s_register_operand")
4517 (match_operand:<V_elem> 2 "s_register_operand")]
4518 "TARGET_NEON"
4519 {
4520 rtx tmp = gen_reg_rtx (<MODE>mode);
4521 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4522 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4523 const0_rtx));
4524 DONE;
4525 })
4526
4527 (define_expand "neon_vqrdmulh_n<mode>"
4528 [(match_operand:VMDI 0 "s_register_operand")
4529 (match_operand:VMDI 1 "s_register_operand")
4530 (match_operand:<V_elem> 2 "s_register_operand")]
4531 "TARGET_NEON"
4532 {
4533 rtx tmp = gen_reg_rtx (<MODE>mode);
4534 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4535 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4536 const0_rtx));
4537 DONE;
4538 })
4539
4540 (define_expand "neon_vqdmulh_n<mode>"
4541 [(match_operand:VMQI 0 "s_register_operand")
4542 (match_operand:VMQI 1 "s_register_operand")
4543 (match_operand:<V_elem> 2 "s_register_operand")]
4544 "TARGET_NEON"
4545 {
4546 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4547 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4548 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4549 const0_rtx));
4550 DONE;
4551 })
4552
4553 (define_expand "neon_vqrdmulh_n<mode>"
4554 [(match_operand:VMQI 0 "s_register_operand")
4555 (match_operand:VMQI 1 "s_register_operand")
4556 (match_operand:<V_elem> 2 "s_register_operand")]
4557 "TARGET_NEON"
4558 {
4559 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4560 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4561 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4562 const0_rtx));
4563 DONE;
4564 })
4565
4566 (define_expand "neon_vmla_n<mode>"
4567 [(match_operand:VMD 0 "s_register_operand")
4568 (match_operand:VMD 1 "s_register_operand")
4569 (match_operand:VMD 2 "s_register_operand")
4570 (match_operand:<V_elem> 3 "s_register_operand")]
4571 "TARGET_NEON"
4572 {
4573 rtx tmp = gen_reg_rtx (<MODE>mode);
4574 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4575 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4576 tmp, const0_rtx));
4577 DONE;
4578 })
4579
4580 (define_expand "neon_vmla_n<mode>"
4581 [(match_operand:VMQ 0 "s_register_operand")
4582 (match_operand:VMQ 1 "s_register_operand")
4583 (match_operand:VMQ 2 "s_register_operand")
4584 (match_operand:<V_elem> 3 "s_register_operand")]
4585 "TARGET_NEON"
4586 {
4587 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4588 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4589 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4590 tmp, const0_rtx));
4591 DONE;
4592 })
4593
4594 (define_expand "neon_vmlals_n<mode>"
4595 [(match_operand:<V_widen> 0 "s_register_operand")
4596 (match_operand:<V_widen> 1 "s_register_operand")
4597 (match_operand:VMDI 2 "s_register_operand")
4598 (match_operand:<V_elem> 3 "s_register_operand")]
4599 "TARGET_NEON"
4600 {
4601 rtx tmp = gen_reg_rtx (<MODE>mode);
4602 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4603 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4604 tmp, const0_rtx));
4605 DONE;
4606 })
4607
4608 (define_expand "neon_vmlalu_n<mode>"
4609 [(match_operand:<V_widen> 0 "s_register_operand")
4610 (match_operand:<V_widen> 1 "s_register_operand")
4611 (match_operand:VMDI 2 "s_register_operand")
4612 (match_operand:<V_elem> 3 "s_register_operand")]
4613 "TARGET_NEON"
4614 {
4615 rtx tmp = gen_reg_rtx (<MODE>mode);
4616 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4617 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4618 tmp, const0_rtx));
4619 DONE;
4620 })
4621
4622 (define_expand "neon_vqdmlal_n<mode>"
4623 [(match_operand:<V_widen> 0 "s_register_operand")
4624 (match_operand:<V_widen> 1 "s_register_operand")
4625 (match_operand:VMDI 2 "s_register_operand")
4626 (match_operand:<V_elem> 3 "s_register_operand")]
4627 "TARGET_NEON"
4628 {
4629 rtx tmp = gen_reg_rtx (<MODE>mode);
4630 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4631 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4632 tmp, const0_rtx));
4633 DONE;
4634 })
4635
4636 (define_expand "neon_vmls_n<mode>"
4637 [(match_operand:VMD 0 "s_register_operand")
4638 (match_operand:VMD 1 "s_register_operand")
4639 (match_operand:VMD 2 "s_register_operand")
4640 (match_operand:<V_elem> 3 "s_register_operand")]
4641 "TARGET_NEON"
4642 {
4643 rtx tmp = gen_reg_rtx (<MODE>mode);
4644 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4645 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4646 tmp, const0_rtx));
4647 DONE;
4648 })
4649
4650 (define_expand "neon_vmls_n<mode>"
4651 [(match_operand:VMQ 0 "s_register_operand")
4652 (match_operand:VMQ 1 "s_register_operand")
4653 (match_operand:VMQ 2 "s_register_operand")
4654 (match_operand:<V_elem> 3 "s_register_operand")]
4655 "TARGET_NEON"
4656 {
4657 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4658 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4659 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4660 tmp, const0_rtx));
4661 DONE;
4662 })
4663
4664 (define_expand "neon_vmlsls_n<mode>"
4665 [(match_operand:<V_widen> 0 "s_register_operand")
4666 (match_operand:<V_widen> 1 "s_register_operand")
4667 (match_operand:VMDI 2 "s_register_operand")
4668 (match_operand:<V_elem> 3 "s_register_operand")]
4669 "TARGET_NEON"
4670 {
4671 rtx tmp = gen_reg_rtx (<MODE>mode);
4672 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4673 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4674 tmp, const0_rtx));
4675 DONE;
4676 })
4677
4678 (define_expand "neon_vmlslu_n<mode>"
4679 [(match_operand:<V_widen> 0 "s_register_operand")
4680 (match_operand:<V_widen> 1 "s_register_operand")
4681 (match_operand:VMDI 2 "s_register_operand")
4682 (match_operand:<V_elem> 3 "s_register_operand")]
4683 "TARGET_NEON"
4684 {
4685 rtx tmp = gen_reg_rtx (<MODE>mode);
4686 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4687 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4688 tmp, const0_rtx));
4689 DONE;
4690 })
4691
4692 (define_expand "neon_vqdmlsl_n<mode>"
4693 [(match_operand:<V_widen> 0 "s_register_operand")
4694 (match_operand:<V_widen> 1 "s_register_operand")
4695 (match_operand:VMDI 2 "s_register_operand")
4696 (match_operand:<V_elem> 3 "s_register_operand")]
4697 "TARGET_NEON"
4698 {
4699 rtx tmp = gen_reg_rtx (<MODE>mode);
4700 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4701 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4702 tmp, const0_rtx));
4703 DONE;
4704 })
4705
4706 (define_insn "@neon_vext<mode>"
4707 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4708 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4709 (match_operand:VDQX 2 "s_register_operand" "w")
4710 (match_operand:SI 3 "immediate_operand" "i")]
4711 UNSPEC_VEXT))]
4712 "TARGET_NEON"
4713 {
4714 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4715 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4716 }
4717 [(set_attr "type" "neon_ext<q>")]
4718 )
4719
4720 (define_insn "@neon_vrev64<mode>"
4721 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4722 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4723 UNSPEC_VREV64))]
4724 "TARGET_NEON"
4725 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4726 [(set_attr "type" "neon_rev<q>")]
4727 )
4728
4729 (define_insn "@neon_vrev32<mode>"
4730 [(set (match_operand:VX 0 "s_register_operand" "=w")
4731 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4732 UNSPEC_VREV32))]
4733 "TARGET_NEON"
4734 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4735 [(set_attr "type" "neon_rev<q>")]
4736 )
4737
4738 (define_insn "@neon_vrev16<mode>"
4739 [(set (match_operand:VE 0 "s_register_operand" "=w")
4740 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4741 UNSPEC_VREV16))]
4742 "TARGET_NEON"
4743 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4744 [(set_attr "type" "neon_rev<q>")]
4745 )
4746
4747 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4748 ; allocation. For an intrinsic of form:
4749 ; rD = vbsl_* (rS, rN, rM)
4750 ; We can use any of:
4751 ; vbsl rS, rN, rM (if D = S)
4752 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4753 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4754
4755 (define_insn "neon_vbsl<mode>_internal"
4756 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4757 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4758 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4759 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4760 UNSPEC_VBSL))]
4761 "TARGET_NEON"
4762 "@
4763 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4764 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4765 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4766 [(set_attr "type" "neon_bsl<q>")]
4767 )
4768
4769 (define_expand "neon_vbsl<mode>"
4770 [(set (match_operand:VDQX 0 "s_register_operand")
4771 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4772 (match_operand:VDQX 2 "s_register_operand")
4773 (match_operand:VDQX 3 "s_register_operand")]
4774 UNSPEC_VBSL))]
4775 "TARGET_NEON"
4776 {
4777 /* We can't alias operands together if they have different modes. */
4778 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4779 })
4780
4781 ;; vshl, vrshl
4782 (define_insn "neon_v<shift_op><sup><mode>"
4783 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4784 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4785 (match_operand:VDQIX 2 "s_register_operand" "w")]
4786 VSHL))]
4787 "TARGET_NEON"
4788 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4789 [(set_attr "type" "neon_shift_imm<q>")]
4790 )
4791
4792 ;; vqshl, vqrshl
4793 (define_insn "neon_v<shift_op><sup><mode>"
4794 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4795 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4796 (match_operand:VDQIX 2 "s_register_operand" "w")]
4797 VQSHL))]
4798 "TARGET_NEON"
4799 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4800 [(set_attr "type" "neon_sat_shift_imm<q>")]
4801 )
4802
4803 ;; vshr_n, vrshr_n
4804 (define_insn "neon_v<shift_op><sup>_n<mode>"
4805 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4806 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4807 (match_operand:SI 2 "immediate_operand" "i")]
4808 VSHR_N))]
4809 "TARGET_NEON"
4810 {
4811 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4812 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4813 }
4814 [(set_attr "type" "neon_shift_imm<q>")]
4815 )
4816
4817 ;; vshrn_n, vrshrn_n
4818 (define_insn "neon_v<shift_op>_n<mode>"
4819 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4820 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4821 (match_operand:SI 2 "immediate_operand" "i")]
4822 VSHRN_N))]
4823 "TARGET_NEON"
4824 {
4825 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4826 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4827 }
4828 [(set_attr "type" "neon_shift_imm_narrow_q")]
4829 )
4830
4831 ;; vqshrn_n, vqrshrn_n
4832 (define_insn "neon_v<shift_op><sup>_n<mode>"
4833 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4834 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4835 (match_operand:SI 2 "immediate_operand" "i")]
4836 VQSHRN_N))]
4837 "TARGET_NEON"
4838 {
4839 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4840 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4841 }
4842 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4843 )
4844
4845 ;; vqshrun_n, vqrshrun_n
4846 (define_insn "neon_v<shift_op>_n<mode>"
4847 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4848 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4849 (match_operand:SI 2 "immediate_operand" "i")]
4850 VQSHRUN_N))]
4851 "TARGET_NEON"
4852 {
4853 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4854 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4855 }
4856 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4857 )
4858
4859 (define_insn "neon_vshl_n<mode>"
4860 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4861 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4862 (match_operand:SI 2 "immediate_operand" "i")]
4863 UNSPEC_VSHL_N))]
4864 "TARGET_NEON"
4865 {
4866 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4867 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4868 }
4869 [(set_attr "type" "neon_shift_imm<q>")]
4870 )
4871
4872 (define_insn "neon_vqshl_<sup>_n<mode>"
4873 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4874 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4875 (match_operand:SI 2 "immediate_operand" "i")]
4876 VQSHL_N))]
4877 "TARGET_NEON"
4878 {
4879 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4880 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4881 }
4882 [(set_attr "type" "neon_sat_shift_imm<q>")]
4883 )
4884
4885 (define_insn "neon_vqshlu_n<mode>"
4886 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4887 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4888 (match_operand:SI 2 "immediate_operand" "i")]
4889 UNSPEC_VQSHLU_N))]
4890 "TARGET_NEON"
4891 {
4892 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4893 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4894 }
4895 [(set_attr "type" "neon_sat_shift_imm<q>")]
4896 )
4897
4898 (define_insn "neon_vshll<sup>_n<mode>"
4899 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4900 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4901 (match_operand:SI 2 "immediate_operand" "i")]
4902 VSHLL_N))]
4903 "TARGET_NEON"
4904 {
4905 /* The boundaries are: 0 < imm <= size. */
4906 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4907 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4908 }
4909 [(set_attr "type" "neon_shift_imm_long")]
4910 )
4911
4912 ;; vsra_n, vrsra_n
4913 (define_insn "neon_v<shift_op><sup>_n<mode>"
4914 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4915 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4916 (match_operand:VDQIX 2 "s_register_operand" "w")
4917 (match_operand:SI 3 "immediate_operand" "i")]
4918 VSRA_N))]
4919 "TARGET_NEON"
4920 {
4921 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4922 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4923 }
4924 [(set_attr "type" "neon_shift_acc<q>")]
4925 )
4926
4927 (define_insn "neon_vsri_n<mode>"
4928 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4929 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4930 (match_operand:VDQIX 2 "s_register_operand" "w")
4931 (match_operand:SI 3 "immediate_operand" "i")]
4932 UNSPEC_VSRI))]
4933 "TARGET_NEON"
4934 {
4935 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4936 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4937 }
4938 [(set_attr "type" "neon_shift_reg<q>")]
4939 )
4940
4941 (define_insn "neon_vsli_n<mode>"
4942 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4943 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4944 (match_operand:VDQIX 2 "s_register_operand" "w")
4945 (match_operand:SI 3 "immediate_operand" "i")]
4946 UNSPEC_VSLI))]
4947 "TARGET_NEON"
4948 {
4949 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4950 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4951 }
4952 [(set_attr "type" "neon_shift_reg<q>")]
4953 )
4954
4955 (define_insn "neon_vtbl1v8qi"
4956 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4957 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4958 (match_operand:V8QI 2 "s_register_operand" "w")]
4959 UNSPEC_VTBL))]
4960 "TARGET_NEON"
4961 "vtbl.8\t%P0, {%P1}, %P2"
4962 [(set_attr "type" "neon_tbl1")]
4963 )
4964
4965 (define_insn "neon_vtbl2v8qi"
4966 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4967 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4968 (match_operand:V8QI 2 "s_register_operand" "w")]
4969 UNSPEC_VTBL))]
4970 "TARGET_NEON"
4971 {
4972 rtx ops[4];
4973 int tabbase = REGNO (operands[1]);
4974
4975 ops[0] = operands[0];
4976 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4977 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4978 ops[3] = operands[2];
4979 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4980
4981 return "";
4982 }
4983 [(set_attr "type" "neon_tbl2")]
4984 )
4985
4986 (define_insn "neon_vtbl3v8qi"
4987 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4988 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4989 (match_operand:V8QI 2 "s_register_operand" "w")]
4990 UNSPEC_VTBL))]
4991 "TARGET_NEON"
4992 {
4993 rtx ops[5];
4994 int tabbase = REGNO (operands[1]);
4995
4996 ops[0] = operands[0];
4997 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4998 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4999 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5000 ops[4] = operands[2];
5001 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5002
5003 return "";
5004 }
5005 [(set_attr "type" "neon_tbl3")]
5006 )
5007
5008 (define_insn "neon_vtbl4v8qi"
5009 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5010 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5011 (match_operand:V8QI 2 "s_register_operand" "w")]
5012 UNSPEC_VTBL))]
5013 "TARGET_NEON"
5014 {
5015 rtx ops[6];
5016 int tabbase = REGNO (operands[1]);
5017
5018 ops[0] = operands[0];
5019 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5020 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5021 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5022 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5023 ops[5] = operands[2];
5024 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5025
5026 return "";
5027 }
5028 [(set_attr "type" "neon_tbl4")]
5029 )
5030
5031 ;; These three are used by the vec_perm infrastructure for V16QImode.
5032 (define_insn_and_split "neon_vtbl1v16qi"
5033 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5034 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5035 (match_operand:V16QI 2 "s_register_operand" "w")]
5036 UNSPEC_VTBL))]
5037 "TARGET_NEON"
5038 "#"
5039 "&& reload_completed"
5040 [(const_int 0)]
5041 {
5042 rtx op0, op1, op2, part0, part2;
5043 unsigned ofs;
5044
5045 op0 = operands[0];
5046 op1 = gen_lowpart (TImode, operands[1]);
5047 op2 = operands[2];
5048
5049 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5050 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5051 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5052 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5053
5054 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5055 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5056 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5057 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5058 DONE;
5059 }
5060 [(set_attr "type" "multiple")]
5061 )
5062
5063 (define_insn_and_split "neon_vtbl2v16qi"
5064 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5065 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5066 (match_operand:V16QI 2 "s_register_operand" "w")]
5067 UNSPEC_VTBL))]
5068 "TARGET_NEON"
5069 "#"
5070 "&& reload_completed"
5071 [(const_int 0)]
5072 {
5073 rtx op0, op1, op2, part0, part2;
5074 unsigned ofs;
5075
5076 op0 = operands[0];
5077 op1 = operands[1];
5078 op2 = operands[2];
5079
5080 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5081 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5082 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5083 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5084
5085 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5086 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5087 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5088 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5089 DONE;
5090 }
5091 [(set_attr "type" "multiple")]
5092 )
5093
5094 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5095 ;; handle quad-word input modes, producing octa-word output modes. But
5096 ;; that requires us to add support for octa-word vector modes in moves.
5097 ;; That seems overkill for this one use in vec_perm.
5098 (define_insn_and_split "neon_vcombinev16qi"
5099 [(set (match_operand:OI 0 "s_register_operand" "=w")
5100 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5101 (match_operand:V16QI 2 "s_register_operand" "w")]
5102 UNSPEC_VCONCAT))]
5103 "TARGET_NEON"
5104 "#"
5105 "&& reload_completed"
5106 [(const_int 0)]
5107 {
5108 neon_split_vcombine (operands);
5109 DONE;
5110 }
5111 [(set_attr "type" "multiple")]
5112 )
5113
5114 (define_insn "neon_vtbx1v8qi"
5115 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5116 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5117 (match_operand:V8QI 2 "s_register_operand" "w")
5118 (match_operand:V8QI 3 "s_register_operand" "w")]
5119 UNSPEC_VTBX))]
5120 "TARGET_NEON"
5121 "vtbx.8\t%P0, {%P2}, %P3"
5122 [(set_attr "type" "neon_tbl1")]
5123 )
5124
5125 (define_insn "neon_vtbx2v8qi"
5126 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5127 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5128 (match_operand:TI 2 "s_register_operand" "w")
5129 (match_operand:V8QI 3 "s_register_operand" "w")]
5130 UNSPEC_VTBX))]
5131 "TARGET_NEON"
5132 {
5133 rtx ops[4];
5134 int tabbase = REGNO (operands[2]);
5135
5136 ops[0] = operands[0];
5137 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5138 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5139 ops[3] = operands[3];
5140 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5141
5142 return "";
5143 }
5144 [(set_attr "type" "neon_tbl2")]
5145 )
5146
5147 (define_insn "neon_vtbx3v8qi"
5148 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5149 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5150 (match_operand:EI 2 "s_register_operand" "w")
5151 (match_operand:V8QI 3 "s_register_operand" "w")]
5152 UNSPEC_VTBX))]
5153 "TARGET_NEON"
5154 {
5155 rtx ops[5];
5156 int tabbase = REGNO (operands[2]);
5157
5158 ops[0] = operands[0];
5159 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5160 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5161 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5162 ops[4] = operands[3];
5163 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5164
5165 return "";
5166 }
5167 [(set_attr "type" "neon_tbl3")]
5168 )
5169
5170 (define_insn "neon_vtbx4v8qi"
5171 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5172 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5173 (match_operand:OI 2 "s_register_operand" "w")
5174 (match_operand:V8QI 3 "s_register_operand" "w")]
5175 UNSPEC_VTBX))]
5176 "TARGET_NEON"
5177 {
5178 rtx ops[6];
5179 int tabbase = REGNO (operands[2]);
5180
5181 ops[0] = operands[0];
5182 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5183 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5184 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5185 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5186 ops[5] = operands[3];
5187 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5188
5189 return "";
5190 }
5191 [(set_attr "type" "neon_tbl4")]
5192 )
5193
5194 (define_expand "@neon_vtrn<mode>_internal"
5195 [(parallel
5196 [(set (match_operand:VDQWH 0 "s_register_operand")
5197 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5198 (match_operand:VDQWH 2 "s_register_operand")]
5199 UNSPEC_VTRN1))
5200 (set (match_operand:VDQWH 3 "s_register_operand")
5201 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5202 "TARGET_NEON"
5203 ""
5204 )
5205
5206 ;; Note: Different operand numbering to handle tied registers correctly.
5207 (define_insn "*neon_vtrn<mode>_insn"
5208 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5209 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5210 (match_operand:VDQWH 3 "s_register_operand" "2")]
5211 UNSPEC_VTRN1))
5212 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5213 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5214 UNSPEC_VTRN2))]
5215 "TARGET_NEON"
5216 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5217 [(set_attr "type" "neon_permute<q>")]
5218 )
5219
5220 (define_expand "@neon_vzip<mode>_internal"
5221 [(parallel
5222 [(set (match_operand:VDQWH 0 "s_register_operand")
5223 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5224 (match_operand:VDQWH 2 "s_register_operand")]
5225 UNSPEC_VZIP1))
5226 (set (match_operand:VDQWH 3 "s_register_operand")
5227 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5228 "TARGET_NEON"
5229 ""
5230 )
5231
5232 ;; Note: Different operand numbering to handle tied registers correctly.
5233 (define_insn "*neon_vzip<mode>_insn"
5234 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5235 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5236 (match_operand:VDQWH 3 "s_register_operand" "2")]
5237 UNSPEC_VZIP1))
5238 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5239 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5240 UNSPEC_VZIP2))]
5241 "TARGET_NEON"
5242 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5243 [(set_attr "type" "neon_zip<q>")]
5244 )
5245
5246 (define_expand "@neon_vuzp<mode>_internal"
5247 [(parallel
5248 [(set (match_operand:VDQWH 0 "s_register_operand")
5249 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5250 (match_operand:VDQWH 2 "s_register_operand")]
5251 UNSPEC_VUZP1))
5252 (set (match_operand:VDQWH 3 "s_register_operand")
5253 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5254 "TARGET_NEON"
5255 ""
5256 )
5257
5258 ;; Note: Different operand numbering to handle tied registers correctly.
5259 (define_insn "*neon_vuzp<mode>_insn"
5260 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5261 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5262 (match_operand:VDQWH 3 "s_register_operand" "2")]
5263 UNSPEC_VUZP1))
5264 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5265 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5266 UNSPEC_VUZP2))]
5267 "TARGET_NEON"
5268 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5269 [(set_attr "type" "neon_zip<q>")]
5270 )
5271
5272 (define_expand "vec_load_lanes<mode><mode>"
5273 [(set (match_operand:VDQX 0 "s_register_operand")
5274 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5275 UNSPEC_VLD1))]
5276 "TARGET_NEON")
5277
5278 (define_insn "neon_vld1<mode>"
5279 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5280 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5281 UNSPEC_VLD1))]
5282 "TARGET_NEON"
5283 "vld1.<V_sz_elem>\t%h0, %A1"
5284 [(set_attr "type" "neon_load1_1reg<q>")]
5285 )
5286
5287 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5288 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5289 ;; lane order here.
5290 (define_insn "neon_vld1_lane<mode>"
5291 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5292 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5293 (match_operand:VDX 2 "s_register_operand" "0")
5294 (match_operand:SI 3 "immediate_operand" "i")]
5295 UNSPEC_VLD1_LANE))]
5296 "TARGET_NEON"
5297 {
5298 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5299 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5300 operands[3] = GEN_INT (lane);
5301 if (max == 1)
5302 return "vld1.<V_sz_elem>\t%P0, %A1";
5303 else
5304 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5305 }
5306 [(set_attr "type" "neon_load1_one_lane<q>")]
5307 )
5308
5309 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5310 ;; here on big endian targets.
5311 (define_insn "neon_vld1_lane<mode>"
5312 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5313 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5314 (match_operand:VQX 2 "s_register_operand" "0")
5315 (match_operand:SI 3 "immediate_operand" "i")]
5316 UNSPEC_VLD1_LANE))]
5317 "TARGET_NEON"
5318 {
5319 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5320 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5321 operands[3] = GEN_INT (lane);
5322 int regno = REGNO (operands[0]);
5323 if (lane >= max / 2)
5324 {
5325 lane -= max / 2;
5326 regno += 2;
5327 operands[3] = GEN_INT (lane);
5328 }
5329 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5330 if (max == 2)
5331 return "vld1.<V_sz_elem>\t%P0, %A1";
5332 else
5333 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5334 }
5335 [(set_attr "type" "neon_load1_one_lane<q>")]
5336 )
5337
5338 (define_insn "neon_vld1_dup<mode>"
5339 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5340 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5341 "TARGET_NEON"
5342 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5343 [(set_attr "type" "neon_load1_all_lanes<q>")]
5344 )
5345
5346 ;; Special case for DImode. Treat it exactly like a simple load.
5347 (define_expand "neon_vld1_dupdi"
5348 [(set (match_operand:DI 0 "s_register_operand")
5349 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5350 UNSPEC_VLD1))]
5351 "TARGET_NEON"
5352 ""
5353 )
5354
5355 (define_insn "neon_vld1_dup<mode>"
5356 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5357 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5358 "TARGET_NEON"
5359 {
5360 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5361 }
5362 [(set_attr "type" "neon_load1_all_lanes<q>")]
5363 )
5364
5365 (define_insn_and_split "neon_vld1_dupv2di"
5366 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5367 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5368 "TARGET_NEON"
5369 "#"
5370 "&& reload_completed"
5371 [(const_int 0)]
5372 {
5373 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5374 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5375 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5376 DONE;
5377 }
5378 [(set_attr "length" "8")
5379 (set_attr "type" "neon_load1_all_lanes_q")]
5380 )
5381
5382 (define_expand "vec_store_lanes<mode><mode>"
5383 [(set (match_operand:VDQX 0 "neon_struct_operand")
5384 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5385 UNSPEC_VST1))]
5386 "TARGET_NEON")
5387
5388 (define_insn "neon_vst1<mode>"
5389 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5390 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5391 UNSPEC_VST1))]
5392 "TARGET_NEON"
5393 "vst1.<V_sz_elem>\t%h1, %A0"
5394 [(set_attr "type" "neon_store1_1reg<q>")])
5395
5396 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5397 ;; here on big endian targets.
5398 (define_insn "neon_vst1_lane<mode>"
5399 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5400 (unspec:<V_elem>
5401 [(match_operand:VDX 1 "s_register_operand" "w")
5402 (match_operand:SI 2 "immediate_operand" "i")]
5403 UNSPEC_VST1_LANE))]
5404 "TARGET_NEON"
5405 {
5406 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5407 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5408 operands[2] = GEN_INT (lane);
5409 if (max == 1)
5410 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5411 else
5412 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5413 }
5414 [(set_attr "type" "neon_store1_one_lane<q>")]
5415 )
5416
5417 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5418 ;; here on big endian targets.
5419 (define_insn "neon_vst1_lane<mode>"
5420 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5421 (unspec:<V_elem>
5422 [(match_operand:VQX 1 "s_register_operand" "w")
5423 (match_operand:SI 2 "immediate_operand" "i")]
5424 UNSPEC_VST1_LANE))]
5425 "TARGET_NEON"
5426 {
5427 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5428 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5429 int regno = REGNO (operands[1]);
5430 if (lane >= max / 2)
5431 {
5432 lane -= max / 2;
5433 regno += 2;
5434 }
5435 operands[2] = GEN_INT (lane);
5436 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5437 if (max == 2)
5438 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5439 else
5440 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5441 }
5442 [(set_attr "type" "neon_store1_one_lane<q>")]
5443 )
5444
5445 (define_expand "vec_load_lanesti<mode>"
5446 [(set (match_operand:TI 0 "s_register_operand")
5447 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5448 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5449 UNSPEC_VLD2))]
5450 "TARGET_NEON")
5451
5452 (define_insn "neon_vld2<mode>"
5453 [(set (match_operand:TI 0 "s_register_operand" "=w")
5454 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5455 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5456 UNSPEC_VLD2))]
5457 "TARGET_NEON"
5458 {
5459 if (<V_sz_elem> == 64)
5460 return "vld1.64\t%h0, %A1";
5461 else
5462 return "vld2.<V_sz_elem>\t%h0, %A1";
5463 }
5464 [(set (attr "type")
5465 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5466 (const_string "neon_load1_2reg<q>")
5467 (const_string "neon_load2_2reg<q>")))]
5468 )
5469
5470 (define_expand "vec_load_lanesoi<mode>"
5471 [(set (match_operand:OI 0 "s_register_operand")
5472 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5473 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474 UNSPEC_VLD2))]
5475 "TARGET_NEON")
5476
5477 (define_insn "neon_vld2<mode>"
5478 [(set (match_operand:OI 0 "s_register_operand" "=w")
5479 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5480 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5481 UNSPEC_VLD2))]
5482 "TARGET_NEON"
5483 "vld2.<V_sz_elem>\t%h0, %A1"
5484 [(set_attr "type" "neon_load2_2reg_q")])
5485
5486 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5487 ;; here on big endian targets.
5488 (define_insn "neon_vld2_lane<mode>"
5489 [(set (match_operand:TI 0 "s_register_operand" "=w")
5490 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5491 (match_operand:TI 2 "s_register_operand" "0")
5492 (match_operand:SI 3 "immediate_operand" "i")
5493 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5494 UNSPEC_VLD2_LANE))]
5495 "TARGET_NEON"
5496 {
5497 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5498 int regno = REGNO (operands[0]);
5499 rtx ops[4];
5500 ops[0] = gen_rtx_REG (DImode, regno);
5501 ops[1] = gen_rtx_REG (DImode, regno + 2);
5502 ops[2] = operands[1];
5503 ops[3] = GEN_INT (lane);
5504 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5505 return "";
5506 }
5507 [(set_attr "type" "neon_load2_one_lane<q>")]
5508 )
5509
5510 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5511 ;; here on big endian targets.
5512 (define_insn "neon_vld2_lane<mode>"
5513 [(set (match_operand:OI 0 "s_register_operand" "=w")
5514 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5515 (match_operand:OI 2 "s_register_operand" "0")
5516 (match_operand:SI 3 "immediate_operand" "i")
5517 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5518 UNSPEC_VLD2_LANE))]
5519 "TARGET_NEON"
5520 {
5521 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5522 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5523 int regno = REGNO (operands[0]);
5524 rtx ops[4];
5525 if (lane >= max / 2)
5526 {
5527 lane -= max / 2;
5528 regno += 2;
5529 }
5530 ops[0] = gen_rtx_REG (DImode, regno);
5531 ops[1] = gen_rtx_REG (DImode, regno + 4);
5532 ops[2] = operands[1];
5533 ops[3] = GEN_INT (lane);
5534 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5535 return "";
5536 }
5537 [(set_attr "type" "neon_load2_one_lane<q>")]
5538 )
5539
5540 (define_insn "neon_vld2_dup<mode>"
5541 [(set (match_operand:TI 0 "s_register_operand" "=w")
5542 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5544 UNSPEC_VLD2_DUP))]
5545 "TARGET_NEON"
5546 {
5547 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5548 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5549 else
5550 return "vld1.<V_sz_elem>\t%h0, %A1";
5551 }
5552 [(set (attr "type")
5553 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5554 (const_string "neon_load2_all_lanes<q>")
5555 (const_string "neon_load1_1reg<q>")))]
5556 )
5557
5558 (define_expand "vec_store_lanesti<mode>"
5559 [(set (match_operand:TI 0 "neon_struct_operand")
5560 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5561 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5562 UNSPEC_VST2))]
5563 "TARGET_NEON")
5564
5565 (define_insn "neon_vst2<mode>"
5566 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5567 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5568 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569 UNSPEC_VST2))]
5570 "TARGET_NEON"
5571 {
5572 if (<V_sz_elem> == 64)
5573 return "vst1.64\t%h1, %A0";
5574 else
5575 return "vst2.<V_sz_elem>\t%h1, %A0";
5576 }
5577 [(set (attr "type")
5578 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5579 (const_string "neon_store1_2reg<q>")
5580 (const_string "neon_store2_one_lane<q>")))]
5581 )
5582
5583 (define_expand "vec_store_lanesoi<mode>"
5584 [(set (match_operand:OI 0 "neon_struct_operand")
5585 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5586 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5587 UNSPEC_VST2))]
5588 "TARGET_NEON")
5589
5590 (define_insn "neon_vst2<mode>"
5591 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5592 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5593 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5594 UNSPEC_VST2))]
5595 "TARGET_NEON"
5596 "vst2.<V_sz_elem>\t%h1, %A0"
5597 [(set_attr "type" "neon_store2_4reg<q>")]
5598 )
5599
5600 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5601 ;; here on big endian targets.
5602 (define_insn "neon_vst2_lane<mode>"
5603 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5604 (unspec:<V_two_elem>
5605 [(match_operand:TI 1 "s_register_operand" "w")
5606 (match_operand:SI 2 "immediate_operand" "i")
5607 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5608 UNSPEC_VST2_LANE))]
5609 "TARGET_NEON"
5610 {
5611 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5612 int regno = REGNO (operands[1]);
5613 rtx ops[4];
5614 ops[0] = operands[0];
5615 ops[1] = gen_rtx_REG (DImode, regno);
5616 ops[2] = gen_rtx_REG (DImode, regno + 2);
5617 ops[3] = GEN_INT (lane);
5618 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5619 return "";
5620 }
5621 [(set_attr "type" "neon_store2_one_lane<q>")]
5622 )
5623
5624 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5625 ;; here on big endian targets.
5626 (define_insn "neon_vst2_lane<mode>"
5627 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5628 (unspec:<V_two_elem>
5629 [(match_operand:OI 1 "s_register_operand" "w")
5630 (match_operand:SI 2 "immediate_operand" "i")
5631 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5632 UNSPEC_VST2_LANE))]
5633 "TARGET_NEON"
5634 {
5635 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5636 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5637 int regno = REGNO (operands[1]);
5638 rtx ops[4];
5639 if (lane >= max / 2)
5640 {
5641 lane -= max / 2;
5642 regno += 2;
5643 }
5644 ops[0] = operands[0];
5645 ops[1] = gen_rtx_REG (DImode, regno);
5646 ops[2] = gen_rtx_REG (DImode, regno + 4);
5647 ops[3] = GEN_INT (lane);
5648 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5649 return "";
5650 }
5651 [(set_attr "type" "neon_store2_one_lane<q>")]
5652 )
5653
5654 (define_expand "vec_load_lanesei<mode>"
5655 [(set (match_operand:EI 0 "s_register_operand")
5656 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5657 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5658 UNSPEC_VLD3))]
5659 "TARGET_NEON")
5660
5661 (define_insn "neon_vld3<mode>"
5662 [(set (match_operand:EI 0 "s_register_operand" "=w")
5663 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5664 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5665 UNSPEC_VLD3))]
5666 "TARGET_NEON"
5667 {
5668 if (<V_sz_elem> == 64)
5669 return "vld1.64\t%h0, %A1";
5670 else
5671 return "vld3.<V_sz_elem>\t%h0, %A1";
5672 }
5673 [(set (attr "type")
5674 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5675 (const_string "neon_load1_3reg<q>")
5676 (const_string "neon_load3_3reg<q>")))]
5677 )
5678
5679 (define_expand "vec_load_lanesci<mode>"
5680 [(match_operand:CI 0 "s_register_operand")
5681 (match_operand:CI 1 "neon_struct_operand")
5682 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5683 "TARGET_NEON"
5684 {
5685 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5686 DONE;
5687 })
5688
5689 (define_expand "neon_vld3<mode>"
5690 [(match_operand:CI 0 "s_register_operand")
5691 (match_operand:CI 1 "neon_struct_operand")
5692 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5693 "TARGET_NEON"
5694 {
5695 rtx mem;
5696
5697 mem = adjust_address (operands[1], EImode, 0);
5698 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5699 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5700 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5701 DONE;
5702 })
5703
5704 (define_insn "neon_vld3qa<mode>"
5705 [(set (match_operand:CI 0 "s_register_operand" "=w")
5706 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5707 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5708 UNSPEC_VLD3A))]
5709 "TARGET_NEON"
5710 {
5711 int regno = REGNO (operands[0]);
5712 rtx ops[4];
5713 ops[0] = gen_rtx_REG (DImode, regno);
5714 ops[1] = gen_rtx_REG (DImode, regno + 4);
5715 ops[2] = gen_rtx_REG (DImode, regno + 8);
5716 ops[3] = operands[1];
5717 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5718 return "";
5719 }
5720 [(set_attr "type" "neon_load3_3reg<q>")]
5721 )
5722
5723 (define_insn "neon_vld3qb<mode>"
5724 [(set (match_operand:CI 0 "s_register_operand" "=w")
5725 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5726 (match_operand:CI 2 "s_register_operand" "0")
5727 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728 UNSPEC_VLD3B))]
5729 "TARGET_NEON"
5730 {
5731 int regno = REGNO (operands[0]);
5732 rtx ops[4];
5733 ops[0] = gen_rtx_REG (DImode, regno + 2);
5734 ops[1] = gen_rtx_REG (DImode, regno + 6);
5735 ops[2] = gen_rtx_REG (DImode, regno + 10);
5736 ops[3] = operands[1];
5737 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5738 return "";
5739 }
5740 [(set_attr "type" "neon_load3_3reg<q>")]
5741 )
5742
5743 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5744 ;; here on big endian targets.
5745 (define_insn "neon_vld3_lane<mode>"
5746 [(set (match_operand:EI 0 "s_register_operand" "=w")
5747 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5748 (match_operand:EI 2 "s_register_operand" "0")
5749 (match_operand:SI 3 "immediate_operand" "i")
5750 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5751 UNSPEC_VLD3_LANE))]
5752 "TARGET_NEON"
5753 {
5754 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5755 int regno = REGNO (operands[0]);
5756 rtx ops[5];
5757 ops[0] = gen_rtx_REG (DImode, regno);
5758 ops[1] = gen_rtx_REG (DImode, regno + 2);
5759 ops[2] = gen_rtx_REG (DImode, regno + 4);
5760 ops[3] = operands[1];
5761 ops[4] = GEN_INT (lane);
5762 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5763 ops);
5764 return "";
5765 }
5766 [(set_attr "type" "neon_load3_one_lane<q>")]
5767 )
5768
5769 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5770 ;; here on big endian targets.
5771 (define_insn "neon_vld3_lane<mode>"
5772 [(set (match_operand:CI 0 "s_register_operand" "=w")
5773 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5774 (match_operand:CI 2 "s_register_operand" "0")
5775 (match_operand:SI 3 "immediate_operand" "i")
5776 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5777 UNSPEC_VLD3_LANE))]
5778 "TARGET_NEON"
5779 {
5780 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5781 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5782 int regno = REGNO (operands[0]);
5783 rtx ops[5];
5784 if (lane >= max / 2)
5785 {
5786 lane -= max / 2;
5787 regno += 2;
5788 }
5789 ops[0] = gen_rtx_REG (DImode, regno);
5790 ops[1] = gen_rtx_REG (DImode, regno + 4);
5791 ops[2] = gen_rtx_REG (DImode, regno + 8);
5792 ops[3] = operands[1];
5793 ops[4] = GEN_INT (lane);
5794 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5795 ops);
5796 return "";
5797 }
5798 [(set_attr "type" "neon_load3_one_lane<q>")]
5799 )
5800
5801 (define_insn "neon_vld3_dup<mode>"
5802 [(set (match_operand:EI 0 "s_register_operand" "=w")
5803 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5804 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5805 UNSPEC_VLD3_DUP))]
5806 "TARGET_NEON"
5807 {
5808 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5809 {
5810 int regno = REGNO (operands[0]);
5811 rtx ops[4];
5812 ops[0] = gen_rtx_REG (DImode, regno);
5813 ops[1] = gen_rtx_REG (DImode, regno + 2);
5814 ops[2] = gen_rtx_REG (DImode, regno + 4);
5815 ops[3] = operands[1];
5816 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5817 return "";
5818 }
5819 else
5820 return "vld1.<V_sz_elem>\t%h0, %A1";
5821 }
5822 [(set (attr "type")
5823 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5824 (const_string "neon_load3_all_lanes<q>")
5825 (const_string "neon_load1_1reg<q>")))])
5826
5827 (define_expand "vec_store_lanesei<mode>"
5828 [(set (match_operand:EI 0 "neon_struct_operand")
5829 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5830 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5831 UNSPEC_VST3))]
5832 "TARGET_NEON")
5833
5834 (define_insn "neon_vst3<mode>"
5835 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5836 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5837 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5838 UNSPEC_VST3))]
5839 "TARGET_NEON"
5840 {
5841 if (<V_sz_elem> == 64)
5842 return "vst1.64\t%h1, %A0";
5843 else
5844 return "vst3.<V_sz_elem>\t%h1, %A0";
5845 }
5846 [(set (attr "type")
5847 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5848 (const_string "neon_store1_3reg<q>")
5849 (const_string "neon_store3_one_lane<q>")))])
5850
5851 (define_expand "vec_store_lanesci<mode>"
5852 [(match_operand:CI 0 "neon_struct_operand")
5853 (match_operand:CI 1 "s_register_operand")
5854 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5855 "TARGET_NEON"
5856 {
5857 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5858 DONE;
5859 })
5860
5861 (define_expand "neon_vst3<mode>"
5862 [(match_operand:CI 0 "neon_struct_operand")
5863 (match_operand:CI 1 "s_register_operand")
5864 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5865 "TARGET_NEON"
5866 {
5867 rtx mem;
5868
5869 mem = adjust_address (operands[0], EImode, 0);
5870 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5871 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5872 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5873 DONE;
5874 })
5875
5876 (define_insn "neon_vst3qa<mode>"
5877 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5878 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5879 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5880 UNSPEC_VST3A))]
5881 "TARGET_NEON"
5882 {
5883 int regno = REGNO (operands[1]);
5884 rtx ops[4];
5885 ops[0] = operands[0];
5886 ops[1] = gen_rtx_REG (DImode, regno);
5887 ops[2] = gen_rtx_REG (DImode, regno + 4);
5888 ops[3] = gen_rtx_REG (DImode, regno + 8);
5889 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5890 return "";
5891 }
5892 [(set_attr "type" "neon_store3_3reg<q>")]
5893 )
5894
5895 (define_insn "neon_vst3qb<mode>"
5896 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5897 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5898 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5899 UNSPEC_VST3B))]
5900 "TARGET_NEON"
5901 {
5902 int regno = REGNO (operands[1]);
5903 rtx ops[4];
5904 ops[0] = operands[0];
5905 ops[1] = gen_rtx_REG (DImode, regno + 2);
5906 ops[2] = gen_rtx_REG (DImode, regno + 6);
5907 ops[3] = gen_rtx_REG (DImode, regno + 10);
5908 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5909 return "";
5910 }
5911 [(set_attr "type" "neon_store3_3reg<q>")]
5912 )
5913
5914 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5915 ;; here on big endian targets.
5916 (define_insn "neon_vst3_lane<mode>"
5917 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5918 (unspec:<V_three_elem>
5919 [(match_operand:EI 1 "s_register_operand" "w")
5920 (match_operand:SI 2 "immediate_operand" "i")
5921 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5922 UNSPEC_VST3_LANE))]
5923 "TARGET_NEON"
5924 {
5925 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5926 int regno = REGNO (operands[1]);
5927 rtx ops[5];
5928 ops[0] = operands[0];
5929 ops[1] = gen_rtx_REG (DImode, regno);
5930 ops[2] = gen_rtx_REG (DImode, regno + 2);
5931 ops[3] = gen_rtx_REG (DImode, regno + 4);
5932 ops[4] = GEN_INT (lane);
5933 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5934 ops);
5935 return "";
5936 }
5937 [(set_attr "type" "neon_store3_one_lane<q>")]
5938 )
5939
5940 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5941 ;; here on big endian targets.
5942 (define_insn "neon_vst3_lane<mode>"
5943 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5944 (unspec:<V_three_elem>
5945 [(match_operand:CI 1 "s_register_operand" "w")
5946 (match_operand:SI 2 "immediate_operand" "i")
5947 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5948 UNSPEC_VST3_LANE))]
5949 "TARGET_NEON"
5950 {
5951 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5952 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5953 int regno = REGNO (operands[1]);
5954 rtx ops[5];
5955 if (lane >= max / 2)
5956 {
5957 lane -= max / 2;
5958 regno += 2;
5959 }
5960 ops[0] = operands[0];
5961 ops[1] = gen_rtx_REG (DImode, regno);
5962 ops[2] = gen_rtx_REG (DImode, regno + 4);
5963 ops[3] = gen_rtx_REG (DImode, regno + 8);
5964 ops[4] = GEN_INT (lane);
5965 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5966 ops);
5967 return "";
5968 }
5969 [(set_attr "type" "neon_store3_one_lane<q>")]
5970 )
5971
5972 (define_expand "vec_load_lanesoi<mode>"
5973 [(set (match_operand:OI 0 "s_register_operand")
5974 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5975 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5976 UNSPEC_VLD4))]
5977 "TARGET_NEON")
5978
5979 (define_insn "neon_vld4<mode>"
5980 [(set (match_operand:OI 0 "s_register_operand" "=w")
5981 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5982 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5983 UNSPEC_VLD4))]
5984 "TARGET_NEON"
5985 {
5986 if (<V_sz_elem> == 64)
5987 return "vld1.64\t%h0, %A1";
5988 else
5989 return "vld4.<V_sz_elem>\t%h0, %A1";
5990 }
5991 [(set (attr "type")
5992 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5993 (const_string "neon_load1_4reg<q>")
5994 (const_string "neon_load4_4reg<q>")))]
5995 )
5996
5997 (define_expand "vec_load_lanesxi<mode>"
5998 [(match_operand:XI 0 "s_register_operand")
5999 (match_operand:XI 1 "neon_struct_operand")
6000 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6001 "TARGET_NEON"
6002 {
6003 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6004 DONE;
6005 })
6006
6007 (define_expand "neon_vld4<mode>"
6008 [(match_operand:XI 0 "s_register_operand")
6009 (match_operand:XI 1 "neon_struct_operand")
6010 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6011 "TARGET_NEON"
6012 {
6013 rtx mem;
6014
6015 mem = adjust_address (operands[1], OImode, 0);
6016 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6017 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6018 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6019 DONE;
6020 })
6021
6022 (define_insn "neon_vld4qa<mode>"
6023 [(set (match_operand:XI 0 "s_register_operand" "=w")
6024 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6025 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6026 UNSPEC_VLD4A))]
6027 "TARGET_NEON"
6028 {
6029 int regno = REGNO (operands[0]);
6030 rtx ops[5];
6031 ops[0] = gen_rtx_REG (DImode, regno);
6032 ops[1] = gen_rtx_REG (DImode, regno + 4);
6033 ops[2] = gen_rtx_REG (DImode, regno + 8);
6034 ops[3] = gen_rtx_REG (DImode, regno + 12);
6035 ops[4] = operands[1];
6036 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6037 return "";
6038 }
6039 [(set_attr "type" "neon_load4_4reg<q>")]
6040 )
6041
6042 (define_insn "neon_vld4qb<mode>"
6043 [(set (match_operand:XI 0 "s_register_operand" "=w")
6044 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6045 (match_operand:XI 2 "s_register_operand" "0")
6046 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6047 UNSPEC_VLD4B))]
6048 "TARGET_NEON"
6049 {
6050 int regno = REGNO (operands[0]);
6051 rtx ops[5];
6052 ops[0] = gen_rtx_REG (DImode, regno + 2);
6053 ops[1] = gen_rtx_REG (DImode, regno + 6);
6054 ops[2] = gen_rtx_REG (DImode, regno + 10);
6055 ops[3] = gen_rtx_REG (DImode, regno + 14);
6056 ops[4] = operands[1];
6057 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6058 return "";
6059 }
6060 [(set_attr "type" "neon_load4_4reg<q>")]
6061 )
6062
6063 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6064 ;; here on big endian targets.
6065 (define_insn "neon_vld4_lane<mode>"
6066 [(set (match_operand:OI 0 "s_register_operand" "=w")
6067 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6068 (match_operand:OI 2 "s_register_operand" "0")
6069 (match_operand:SI 3 "immediate_operand" "i")
6070 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6071 UNSPEC_VLD4_LANE))]
6072 "TARGET_NEON"
6073 {
6074 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6075 int regno = REGNO (operands[0]);
6076 rtx ops[6];
6077 ops[0] = gen_rtx_REG (DImode, regno);
6078 ops[1] = gen_rtx_REG (DImode, regno + 2);
6079 ops[2] = gen_rtx_REG (DImode, regno + 4);
6080 ops[3] = gen_rtx_REG (DImode, regno + 6);
6081 ops[4] = operands[1];
6082 ops[5] = GEN_INT (lane);
6083 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6084 ops);
6085 return "";
6086 }
6087 [(set_attr "type" "neon_load4_one_lane<q>")]
6088 )
6089
6090 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6091 ;; here on big endian targets.
6092 (define_insn "neon_vld4_lane<mode>"
6093 [(set (match_operand:XI 0 "s_register_operand" "=w")
6094 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6095 (match_operand:XI 2 "s_register_operand" "0")
6096 (match_operand:SI 3 "immediate_operand" "i")
6097 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6098 UNSPEC_VLD4_LANE))]
6099 "TARGET_NEON"
6100 {
6101 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6102 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6103 int regno = REGNO (operands[0]);
6104 rtx ops[6];
6105 if (lane >= max / 2)
6106 {
6107 lane -= max / 2;
6108 regno += 2;
6109 }
6110 ops[0] = gen_rtx_REG (DImode, regno);
6111 ops[1] = gen_rtx_REG (DImode, regno + 4);
6112 ops[2] = gen_rtx_REG (DImode, regno + 8);
6113 ops[3] = gen_rtx_REG (DImode, regno + 12);
6114 ops[4] = operands[1];
6115 ops[5] = GEN_INT (lane);
6116 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6117 ops);
6118 return "";
6119 }
6120 [(set_attr "type" "neon_load4_one_lane<q>")]
6121 )
6122
6123 (define_insn "neon_vld4_dup<mode>"
6124 [(set (match_operand:OI 0 "s_register_operand" "=w")
6125 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6126 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6127 UNSPEC_VLD4_DUP))]
6128 "TARGET_NEON"
6129 {
6130 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6131 {
6132 int regno = REGNO (operands[0]);
6133 rtx ops[5];
6134 ops[0] = gen_rtx_REG (DImode, regno);
6135 ops[1] = gen_rtx_REG (DImode, regno + 2);
6136 ops[2] = gen_rtx_REG (DImode, regno + 4);
6137 ops[3] = gen_rtx_REG (DImode, regno + 6);
6138 ops[4] = operands[1];
6139 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6140 ops);
6141 return "";
6142 }
6143 else
6144 return "vld1.<V_sz_elem>\t%h0, %A1";
6145 }
6146 [(set (attr "type")
6147 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6148 (const_string "neon_load4_all_lanes<q>")
6149 (const_string "neon_load1_1reg<q>")))]
6150 )
6151
6152 (define_expand "vec_store_lanesoi<mode>"
6153 [(set (match_operand:OI 0 "neon_struct_operand")
6154 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6155 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6156 UNSPEC_VST4))]
6157 "TARGET_NEON")
6158
6159 (define_insn "neon_vst4<mode>"
6160 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6161 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6162 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6163 UNSPEC_VST4))]
6164 "TARGET_NEON"
6165 {
6166 if (<V_sz_elem> == 64)
6167 return "vst1.64\t%h1, %A0";
6168 else
6169 return "vst4.<V_sz_elem>\t%h1, %A0";
6170 }
6171 [(set (attr "type")
6172 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6173 (const_string "neon_store1_4reg<q>")
6174 (const_string "neon_store4_4reg<q>")))]
6175 )
6176
6177 (define_expand "vec_store_lanesxi<mode>"
6178 [(match_operand:XI 0 "neon_struct_operand")
6179 (match_operand:XI 1 "s_register_operand")
6180 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6181 "TARGET_NEON"
6182 {
6183 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6184 DONE;
6185 })
6186
6187 (define_expand "neon_vst4<mode>"
6188 [(match_operand:XI 0 "neon_struct_operand")
6189 (match_operand:XI 1 "s_register_operand")
6190 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6191 "TARGET_NEON"
6192 {
6193 rtx mem;
6194
6195 mem = adjust_address (operands[0], OImode, 0);
6196 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6197 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6198 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6199 DONE;
6200 })
6201
6202 (define_insn "neon_vst4qa<mode>"
6203 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6204 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6205 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6206 UNSPEC_VST4A))]
6207 "TARGET_NEON"
6208 {
6209 int regno = REGNO (operands[1]);
6210 rtx ops[5];
6211 ops[0] = operands[0];
6212 ops[1] = gen_rtx_REG (DImode, regno);
6213 ops[2] = gen_rtx_REG (DImode, regno + 4);
6214 ops[3] = gen_rtx_REG (DImode, regno + 8);
6215 ops[4] = gen_rtx_REG (DImode, regno + 12);
6216 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6217 return "";
6218 }
6219 [(set_attr "type" "neon_store4_4reg<q>")]
6220 )
6221
6222 (define_insn "neon_vst4qb<mode>"
6223 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6224 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6225 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6226 UNSPEC_VST4B))]
6227 "TARGET_NEON"
6228 {
6229 int regno = REGNO (operands[1]);
6230 rtx ops[5];
6231 ops[0] = operands[0];
6232 ops[1] = gen_rtx_REG (DImode, regno + 2);
6233 ops[2] = gen_rtx_REG (DImode, regno + 6);
6234 ops[3] = gen_rtx_REG (DImode, regno + 10);
6235 ops[4] = gen_rtx_REG (DImode, regno + 14);
6236 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6237 return "";
6238 }
6239 [(set_attr "type" "neon_store4_4reg<q>")]
6240 )
6241
6242 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6243 ;; here on big endian targets.
6244 (define_insn "neon_vst4_lane<mode>"
6245 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6246 (unspec:<V_four_elem>
6247 [(match_operand:OI 1 "s_register_operand" "w")
6248 (match_operand:SI 2 "immediate_operand" "i")
6249 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6250 UNSPEC_VST4_LANE))]
6251 "TARGET_NEON"
6252 {
6253 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6254 int regno = REGNO (operands[1]);
6255 rtx ops[6];
6256 ops[0] = operands[0];
6257 ops[1] = gen_rtx_REG (DImode, regno);
6258 ops[2] = gen_rtx_REG (DImode, regno + 2);
6259 ops[3] = gen_rtx_REG (DImode, regno + 4);
6260 ops[4] = gen_rtx_REG (DImode, regno + 6);
6261 ops[5] = GEN_INT (lane);
6262 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6263 ops);
6264 return "";
6265 }
6266 [(set_attr "type" "neon_store4_one_lane<q>")]
6267 )
6268
6269 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6270 ;; here on big endian targets.
6271 (define_insn "neon_vst4_lane<mode>"
6272 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6273 (unspec:<V_four_elem>
6274 [(match_operand:XI 1 "s_register_operand" "w")
6275 (match_operand:SI 2 "immediate_operand" "i")
6276 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6277 UNSPEC_VST4_LANE))]
6278 "TARGET_NEON"
6279 {
6280 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6281 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6282 int regno = REGNO (operands[1]);
6283 rtx ops[6];
6284 if (lane >= max / 2)
6285 {
6286 lane -= max / 2;
6287 regno += 2;
6288 }
6289 ops[0] = operands[0];
6290 ops[1] = gen_rtx_REG (DImode, regno);
6291 ops[2] = gen_rtx_REG (DImode, regno + 4);
6292 ops[3] = gen_rtx_REG (DImode, regno + 8);
6293 ops[4] = gen_rtx_REG (DImode, regno + 12);
6294 ops[5] = GEN_INT (lane);
6295 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6296 ops);
6297 return "";
6298 }
6299 [(set_attr "type" "neon_store4_4reg<q>")]
6300 )
6301
6302 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6303 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6304 (SE:<V_unpack> (vec_select:<V_HALF>
6305 (match_operand:VU 1 "register_operand" "w")
6306 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6307 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6308 "vmovl.<US><V_sz_elem> %q0, %e1"
6309 [(set_attr "type" "neon_shift_imm_long")]
6310 )
6311
6312 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6313 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6314 (SE:<V_unpack> (vec_select:<V_HALF>
6315 (match_operand:VU 1 "register_operand" "w")
6316 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6318 "vmovl.<US><V_sz_elem> %q0, %f1"
6319 [(set_attr "type" "neon_shift_imm_long")]
6320 )
6321
6322 (define_expand "vec_unpack<US>_hi_<mode>"
6323 [(match_operand:<V_unpack> 0 "register_operand")
6324 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6325 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6326 {
6327 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6328 rtx t1;
6329 int i;
6330 for (i = 0; i < (<V_mode_nunits>/2); i++)
6331 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6332
6333 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6334 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6335 operands[1],
6336 t1));
6337 DONE;
6338 }
6339 )
6340
6341 (define_expand "vec_unpack<US>_lo_<mode>"
6342 [(match_operand:<V_unpack> 0 "register_operand")
6343 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6344 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6345 {
6346 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6347 rtx t1;
6348 int i;
6349 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6350 RTVEC_ELT (v, i) = GEN_INT (i);
6351 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6352 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6353 operands[1],
6354 t1));
6355 DONE;
6356 }
6357 )
6358
6359 (define_insn "neon_vec_<US>mult_lo_<mode>"
6360 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6361 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6362 (match_operand:VU 1 "register_operand" "w")
6363 (match_operand:VU 2 "vect_par_constant_low" "")))
6364 (SE:<V_unpack> (vec_select:<V_HALF>
6365 (match_operand:VU 3 "register_operand" "w")
6366 (match_dup 2)))))]
6367 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6368 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6369 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6370 )
6371
6372 (define_expand "vec_widen_<US>mult_lo_<mode>"
6373 [(match_operand:<V_unpack> 0 "register_operand")
6374 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6375 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6376 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6377 {
6378 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6379 rtx t1;
6380 int i;
6381 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6382 RTVEC_ELT (v, i) = GEN_INT (i);
6383 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6384
6385 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6386 operands[1],
6387 t1,
6388 operands[2]));
6389 DONE;
6390 }
6391 )
6392
6393 (define_insn "neon_vec_<US>mult_hi_<mode>"
6394 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6395 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6396 (match_operand:VU 1 "register_operand" "w")
6397 (match_operand:VU 2 "vect_par_constant_high" "")))
6398 (SE:<V_unpack> (vec_select:<V_HALF>
6399 (match_operand:VU 3 "register_operand" "w")
6400 (match_dup 2)))))]
6401 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6402 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6403 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6404 )
6405
6406 (define_expand "vec_widen_<US>mult_hi_<mode>"
6407 [(match_operand:<V_unpack> 0 "register_operand")
6408 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6409 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6410 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6411 {
6412 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6413 rtx t1;
6414 int i;
6415 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6416 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6417 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6418
6419 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6420 operands[1],
6421 t1,
6422 operands[2]));
6423 DONE;
6424
6425 }
6426 )
6427
6428 (define_insn "neon_vec_<US>shiftl_<mode>"
6429 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6430 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6431 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6432 "TARGET_NEON"
6433 {
6434 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6435 }
6436 [(set_attr "type" "neon_shift_imm_long")]
6437 )
6438
6439 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6440 [(match_operand:<V_unpack> 0 "register_operand")
6441 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6442 (match_operand:SI 2 "immediate_operand")]
6443 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6444 {
6445 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6446 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6447 operands[2]));
6448 DONE;
6449 }
6450 )
6451
6452 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6453 [(match_operand:<V_unpack> 0 "register_operand")
6454 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6455 (match_operand:SI 2 "immediate_operand")]
6456 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6457 {
6458 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6459 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6460 GET_MODE_SIZE (<V_HALF>mode)),
6461 operands[2]));
6462 DONE;
6463 }
6464 )
6465
6466 ;; Vectorize for non-neon-quad case
6467 (define_insn "neon_unpack<US>_<mode>"
6468 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6469 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6470 "TARGET_NEON"
6471 "vmovl.<US><V_sz_elem> %q0, %P1"
6472 [(set_attr "type" "neon_move")]
6473 )
6474
6475 (define_expand "vec_unpack<US>_lo_<mode>"
6476 [(match_operand:<V_double_width> 0 "register_operand")
6477 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6478 "TARGET_NEON"
6479 {
6480 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6481 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6482 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6483
6484 DONE;
6485 }
6486 )
6487
6488 (define_expand "vec_unpack<US>_hi_<mode>"
6489 [(match_operand:<V_double_width> 0 "register_operand")
6490 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6491 "TARGET_NEON"
6492 {
6493 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6494 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6495 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6496
6497 DONE;
6498 }
6499 )
6500
6501 (define_insn "neon_vec_<US>mult_<mode>"
6502 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6503 (mult:<V_widen> (SE:<V_widen>
6504 (match_operand:VDI 1 "register_operand" "w"))
6505 (SE:<V_widen>
6506 (match_operand:VDI 2 "register_operand" "w"))))]
6507 "TARGET_NEON"
6508 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6509 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6510 )
6511
6512 (define_expand "vec_widen_<US>mult_hi_<mode>"
6513 [(match_operand:<V_double_width> 0 "register_operand")
6514 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6515 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6516 "TARGET_NEON"
6517 {
6518 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6519 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6520 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6521
6522 DONE;
6523
6524 }
6525 )
6526
6527 (define_expand "vec_widen_<US>mult_lo_<mode>"
6528 [(match_operand:<V_double_width> 0 "register_operand")
6529 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6530 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6531 "TARGET_NEON"
6532 {
6533 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6534 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6535 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6536
6537 DONE;
6538
6539 }
6540 )
6541
6542 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6543 [(match_operand:<V_double_width> 0 "register_operand")
6544 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6545 (match_operand:SI 2 "immediate_operand")]
6546 "TARGET_NEON"
6547 {
6548 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6549 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6550 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6551
6552 DONE;
6553 }
6554 )
6555
6556 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6557 [(match_operand:<V_double_width> 0 "register_operand")
6558 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6559 (match_operand:SI 2 "immediate_operand")]
6560 "TARGET_NEON"
6561 {
6562 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6563 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6564 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6565
6566 DONE;
6567 }
6568 )
6569
6570 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6571 ; because the ordering of vector elements in Q registers is different from what
6572 ; the semantics of the instructions require.
6573
6574 (define_insn "vec_pack_trunc_<mode>"
6575 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6576 (vec_concat:<V_narrow_pack>
6577 (truncate:<V_narrow>
6578 (match_operand:VN 1 "register_operand" "w"))
6579 (truncate:<V_narrow>
6580 (match_operand:VN 2 "register_operand" "w"))))]
6581 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6582 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6583 [(set_attr "type" "multiple")
6584 (set_attr "length" "8")]
6585 )
6586
6587 ;; For the non-quad case.
6588 (define_insn "neon_vec_pack_trunc_<mode>"
6589 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6590 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6591 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6592 "vmovn.i<V_sz_elem>\t%P0, %q1"
6593 [(set_attr "type" "neon_move_narrow_q")]
6594 )
6595
6596 (define_expand "vec_pack_trunc_<mode>"
6597 [(match_operand:<V_narrow_pack> 0 "register_operand")
6598 (match_operand:VSHFT 1 "register_operand")
6599 (match_operand:VSHFT 2 "register_operand")]
6600 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6601 {
6602 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6603
6604 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6605 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6606 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6607 DONE;
6608 })
6609
6610 (define_insn "neon_vabd<mode>_2"
6611 [(set (match_operand:VF 0 "s_register_operand" "=w")
6612 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6613 (match_operand:VF 2 "s_register_operand" "w"))))]
6614 "TARGET_NEON && flag_unsafe_math_optimizations"
6615 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6616 [(set_attr "type" "neon_fp_abd_s<q>")]
6617 )
6618
6619 (define_insn "neon_vabd<mode>_3"
6620 [(set (match_operand:VF 0 "s_register_operand" "=w")
6621 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6622 (match_operand:VF 2 "s_register_operand" "w")]
6623 UNSPEC_VSUB)))]
6624 "TARGET_NEON && flag_unsafe_math_optimizations"
6625 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6626 [(set_attr "type" "neon_fp_abd_s<q>")]
6627 )