[arm] Implement usadv16qi and ssadv16qi standard names
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2 || which_alternative == 3)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 4: return output_move_neon (operands);
57 case 2: case 3: gcc_unreachable ();
58 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 case 9: return "#";
61 default: return output_move_double (operands, true, NULL);
62 }
63 }
64 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
65 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
66 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
67 multiple")
68 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
69 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
70 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
71 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
72
73 (define_insn "*neon_mov<mode>"
74 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
75 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
76 (match_operand:VQXMOV 1 "general_operand"
77 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
78 "TARGET_NEON
79 && (register_operand (operands[0], <MODE>mode)
80 || register_operand (operands[1], <MODE>mode))"
81 {
82 if (which_alternative == 2 || which_alternative == 3)
83 {
84 int width, is_valid;
85 static char templ[40];
86
87 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
88 &operands[1], &width);
89
90 gcc_assert (is_valid != 0);
91
92 if (width == 0)
93 return "vmov.f32\t%q0, %1 @ <mode>";
94 else
95 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
96
97 return templ;
98 }
99
100 switch (which_alternative)
101 {
102 case 0: return "vmov\t%q0, %q1 @ <mode>";
103 case 1: case 4: return output_move_neon (operands);
104 case 2: case 3: gcc_unreachable ();
105 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
106 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
107 default: return output_move_quad (operands);
108 }
109 }
110 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
111 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
112 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
113 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
114 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
115 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
116 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
117
118 /* We define these mov expanders to match the standard mov$a optab to prevent
119 the mid-end from trying to do a subreg for these modes which is the most
120 inefficient way to expand the move. Also big-endian subreg's aren't
121 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
122 Without these RTL generation patterns the mid-end would attempt to take a
123 sub-reg and may ICE if it can't. */
124
125 (define_expand "movti"
126 [(set (match_operand:TI 0 "nonimmediate_operand" "")
127 (match_operand:TI 1 "general_operand" ""))]
128 "TARGET_NEON"
129 {
130 if (can_create_pseudo_p ())
131 {
132 if (!REG_P (operands[0]))
133 operands[1] = force_reg (TImode, operands[1]);
134 }
135 })
136
137 (define_expand "mov<mode>"
138 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
139 (match_operand:VSTRUCT 1 "general_operand" ""))]
140 "TARGET_NEON"
141 {
142 if (can_create_pseudo_p ())
143 {
144 if (!REG_P (operands[0]))
145 operands[1] = force_reg (<MODE>mode, operands[1]);
146 }
147 })
148
149 (define_expand "mov<mode>"
150 [(set (match_operand:VH 0 "s_register_operand")
151 (match_operand:VH 1 "s_register_operand"))]
152 "TARGET_NEON"
153 {
154 if (can_create_pseudo_p ())
155 {
156 if (!REG_P (operands[0]))
157 operands[1] = force_reg (<MODE>mode, operands[1]);
158 }
159 })
160
161 (define_insn "*neon_mov<mode>"
162 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
163 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
164 "TARGET_NEON
165 && (register_operand (operands[0], <MODE>mode)
166 || register_operand (operands[1], <MODE>mode))"
167 {
168 switch (which_alternative)
169 {
170 case 0: return "#";
171 case 1: case 2: return output_move_neon (operands);
172 default: gcc_unreachable ();
173 }
174 }
175 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
176 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
177
178 (define_split
179 [(set (match_operand:EI 0 "s_register_operand" "")
180 (match_operand:EI 1 "s_register_operand" ""))]
181 "TARGET_NEON && reload_completed"
182 [(set (match_dup 0) (match_dup 1))
183 (set (match_dup 2) (match_dup 3))]
184 {
185 int rdest = REGNO (operands[0]);
186 int rsrc = REGNO (operands[1]);
187 rtx dest[2], src[2];
188
189 dest[0] = gen_rtx_REG (TImode, rdest);
190 src[0] = gen_rtx_REG (TImode, rsrc);
191 dest[1] = gen_rtx_REG (DImode, rdest + 4);
192 src[1] = gen_rtx_REG (DImode, rsrc + 4);
193
194 neon_disambiguate_copy (operands, dest, src, 2);
195 })
196
197 (define_split
198 [(set (match_operand:OI 0 "s_register_operand" "")
199 (match_operand:OI 1 "s_register_operand" ""))]
200 "TARGET_NEON && reload_completed"
201 [(set (match_dup 0) (match_dup 1))
202 (set (match_dup 2) (match_dup 3))]
203 {
204 int rdest = REGNO (operands[0]);
205 int rsrc = REGNO (operands[1]);
206 rtx dest[2], src[2];
207
208 dest[0] = gen_rtx_REG (TImode, rdest);
209 src[0] = gen_rtx_REG (TImode, rsrc);
210 dest[1] = gen_rtx_REG (TImode, rdest + 4);
211 src[1] = gen_rtx_REG (TImode, rsrc + 4);
212
213 neon_disambiguate_copy (operands, dest, src, 2);
214 })
215
216 (define_split
217 [(set (match_operand:CI 0 "s_register_operand" "")
218 (match_operand:CI 1 "s_register_operand" ""))]
219 "TARGET_NEON && reload_completed"
220 [(set (match_dup 0) (match_dup 1))
221 (set (match_dup 2) (match_dup 3))
222 (set (match_dup 4) (match_dup 5))]
223 {
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
226 rtx dest[3], src[3];
227
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
232 dest[2] = gen_rtx_REG (TImode, rdest + 8);
233 src[2] = gen_rtx_REG (TImode, rsrc + 8);
234
235 neon_disambiguate_copy (operands, dest, src, 3);
236 })
237
238 (define_split
239 [(set (match_operand:XI 0 "s_register_operand" "")
240 (match_operand:XI 1 "s_register_operand" ""))]
241 "TARGET_NEON && reload_completed"
242 [(set (match_dup 0) (match_dup 1))
243 (set (match_dup 2) (match_dup 3))
244 (set (match_dup 4) (match_dup 5))
245 (set (match_dup 6) (match_dup 7))]
246 {
247 int rdest = REGNO (operands[0]);
248 int rsrc = REGNO (operands[1]);
249 rtx dest[4], src[4];
250
251 dest[0] = gen_rtx_REG (TImode, rdest);
252 src[0] = gen_rtx_REG (TImode, rsrc);
253 dest[1] = gen_rtx_REG (TImode, rdest + 4);
254 src[1] = gen_rtx_REG (TImode, rsrc + 4);
255 dest[2] = gen_rtx_REG (TImode, rdest + 8);
256 src[2] = gen_rtx_REG (TImode, rsrc + 8);
257 dest[3] = gen_rtx_REG (TImode, rdest + 12);
258 src[3] = gen_rtx_REG (TImode, rsrc + 12);
259
260 neon_disambiguate_copy (operands, dest, src, 4);
261 })
262
263 (define_expand "movmisalign<mode>"
264 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
265 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
266 UNSPEC_MISALIGNED_ACCESS))]
267 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
268 {
269 rtx adjust_mem;
270 /* This pattern is not permitted to fail during expansion: if both arguments
271 are non-registers (e.g. memory := constant, which can be created by the
272 auto-vectorizer), force operand 1 into a register. */
273 if (!s_register_operand (operands[0], <MODE>mode)
274 && !s_register_operand (operands[1], <MODE>mode))
275 operands[1] = force_reg (<MODE>mode, operands[1]);
276
277 if (s_register_operand (operands[0], <MODE>mode))
278 adjust_mem = operands[1];
279 else
280 adjust_mem = operands[0];
281
282 /* Legitimize address. */
283 if (!neon_vector_mem_operand (adjust_mem, 2, true))
284 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
285
286 })
287
288 (define_insn "*movmisalign<mode>_neon_store"
289 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
290 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
291 UNSPEC_MISALIGNED_ACCESS))]
292 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
293 "vst1.<V_sz_elem>\t{%P1}, %A0"
294 [(set_attr "type" "neon_store1_1reg<q>")])
295
296 (define_insn "*movmisalign<mode>_neon_load"
297 [(set (match_operand:VDX 0 "s_register_operand" "=w")
298 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
299 " Um")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vld1.<V_sz_elem>\t{%P0}, %A1"
303 [(set_attr "type" "neon_load1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_store"
306 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
307 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
308 UNSPEC_MISALIGNED_ACCESS))]
309 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
310 "vst1.<V_sz_elem>\t{%q1}, %A0"
311 [(set_attr "type" "neon_store1_1reg<q>")])
312
313 (define_insn "*movmisalign<mode>_neon_load"
314 [(set (match_operand:VQX 0 "s_register_operand" "=w")
315 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
316 " Um")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vld1.<V_sz_elem>\t{%q0}, %A1"
320 [(set_attr "type" "neon_load1_1reg<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
324 (vec_merge:VD_LANE
325 (vec_duplicate:VD_LANE
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 int elt = ffs ((int) INTVAL (operands[2])) - 1;
332 if (BYTES_BIG_ENDIAN)
333 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
334 operands[2] = GEN_INT (elt);
335
336 if (which_alternative == 0)
337 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
338 else
339 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
340 }
341 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
342
343 (define_insn "vec_set<mode>_internal"
344 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
345 (vec_merge:VQ2
346 (vec_duplicate:VQ2
347 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
348 (match_operand:VQ2 3 "s_register_operand" "0,0")
349 (match_operand:SI 2 "immediate_operand" "i,i")))]
350 "TARGET_NEON"
351 {
352 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
353 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
354 int elt = elem % half_elts;
355 int hi = (elem / half_elts) * 2;
356 int regno = REGNO (operands[0]);
357
358 if (BYTES_BIG_ENDIAN)
359 elt = half_elts - 1 - elt;
360
361 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
362 operands[2] = GEN_INT (elt);
363
364 if (which_alternative == 0)
365 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
366 else
367 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
368 }
369 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
370 )
371
372 (define_insn "vec_setv2di_internal"
373 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
374 (vec_merge:V2DI
375 (vec_duplicate:V2DI
376 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
377 (match_operand:V2DI 3 "s_register_operand" "0,0")
378 (match_operand:SI 2 "immediate_operand" "i,i")))]
379 "TARGET_NEON"
380 {
381 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
382 int regno = REGNO (operands[0]) + 2 * elem;
383
384 operands[0] = gen_rtx_REG (DImode, regno);
385
386 if (which_alternative == 0)
387 return "vld1.64\t%P0, %A1";
388 else
389 return "vmov\t%P0, %Q1, %R1";
390 }
391 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
392 )
393
394 (define_expand "vec_set<mode>"
395 [(match_operand:VDQ 0 "s_register_operand" "")
396 (match_operand:<V_elem> 1 "s_register_operand" "")
397 (match_operand:SI 2 "immediate_operand" "")]
398 "TARGET_NEON"
399 {
400 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
401 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
402 GEN_INT (elem), operands[0]));
403 DONE;
404 })
405
406 (define_insn "vec_extract<mode><V_elem_l>"
407 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
408 (vec_select:<V_elem>
409 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
410 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
411 "TARGET_NEON"
412 {
413 if (BYTES_BIG_ENDIAN)
414 {
415 int elt = INTVAL (operands[2]);
416 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
417 operands[2] = GEN_INT (elt);
418 }
419
420 if (which_alternative == 0)
421 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
422 else
423 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
424 }
425 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
426 )
427
428 (define_insn "vec_extract<mode><V_elem_l>"
429 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
430 (vec_select:<V_elem>
431 (match_operand:VQ2 1 "s_register_operand" "w,w")
432 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
433 "TARGET_NEON"
434 {
435 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
436 int elt = INTVAL (operands[2]) % half_elts;
437 int hi = (INTVAL (operands[2]) / half_elts) * 2;
438 int regno = REGNO (operands[1]);
439
440 if (BYTES_BIG_ENDIAN)
441 elt = half_elts - 1 - elt;
442
443 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
444 operands[2] = GEN_INT (elt);
445
446 if (which_alternative == 0)
447 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
448 else
449 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
450 }
451 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
452 )
453
454 (define_insn "vec_extractv2didi"
455 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
456 (vec_select:DI
457 (match_operand:V2DI 1 "s_register_operand" "w,w")
458 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
459 "TARGET_NEON"
460 {
461 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
462
463 operands[1] = gen_rtx_REG (DImode, regno);
464
465 if (which_alternative == 0)
466 return "vst1.64\t{%P1}, %A0 @ v2di";
467 else
468 return "vmov\t%Q0, %R0, %P1 @ v2di";
469 }
470 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
471 )
472
473 (define_expand "vec_init<mode><V_elem_l>"
474 [(match_operand:VDQ 0 "s_register_operand" "")
475 (match_operand 1 "" "")]
476 "TARGET_NEON"
477 {
478 neon_expand_vector_init (operands[0], operands[1]);
479 DONE;
480 })
481
482 ;; Doubleword and quadword arithmetic.
483
484 ;; NOTE: some other instructions also support 64-bit integer
485 ;; element size, which we could potentially use for "long long" operations.
486
487 (define_insn "*add<mode>3_neon"
488 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
489 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
490 (match_operand:VDQ 2 "s_register_operand" "w")))]
491 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
492 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
493 [(set (attr "type")
494 (if_then_else (match_test "<Is_float_mode>")
495 (const_string "neon_fp_addsub_s<q>")
496 (const_string "neon_add<q>")))]
497 )
498
499 ;; As with SFmode, full support for HFmode vector arithmetic is only available
500 ;; when flag-unsafe-math-optimizations is enabled.
501
502 (define_insn "add<mode>3"
503 [(set
504 (match_operand:VH 0 "s_register_operand" "=w")
505 (plus:VH
506 (match_operand:VH 1 "s_register_operand" "w")
507 (match_operand:VH 2 "s_register_operand" "w")))]
508 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
509 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_add<q>")))]
514 )
515
516 (define_insn "add<mode>3_fp16"
517 [(set
518 (match_operand:VH 0 "s_register_operand" "=w")
519 (plus:VH
520 (match_operand:VH 1 "s_register_operand" "w")
521 (match_operand:VH 2 "s_register_operand" "w")))]
522 "TARGET_NEON_FP16INST"
523 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
524 [(set (attr "type")
525 (if_then_else (match_test "<Is_float_mode>")
526 (const_string "neon_fp_addsub_s<q>")
527 (const_string "neon_add<q>")))]
528 )
529
530 (define_insn "adddi3_neon"
531 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
532 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
533 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
534 (clobber (reg:CC CC_REGNUM))]
535 "TARGET_NEON"
536 {
537 switch (which_alternative)
538 {
539 case 0: /* fall through */
540 case 3: return "vadd.i64\t%P0, %P1, %P2";
541 case 1: return "#";
542 case 2: return "#";
543 case 4: return "#";
544 case 5: return "#";
545 case 6: return "#";
546 default: gcc_unreachable ();
547 }
548 }
549 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
550 multiple,multiple,multiple")
551 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
552 (set_attr "length" "*,8,8,*,8,8,8")
553 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
554 )
555
556 (define_insn "*sub<mode>3_neon"
557 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
558 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
559 (match_operand:VDQ 2 "s_register_operand" "w")))]
560 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
561 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
562 [(set (attr "type")
563 (if_then_else (match_test "<Is_float_mode>")
564 (const_string "neon_fp_addsub_s<q>")
565 (const_string "neon_sub<q>")))]
566 )
567
568 (define_insn "sub<mode>3"
569 [(set
570 (match_operand:VH 0 "s_register_operand" "=w")
571 (minus:VH
572 (match_operand:VH 1 "s_register_operand" "w")
573 (match_operand:VH 2 "s_register_operand" "w")))]
574 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
575 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
576 [(set_attr "type" "neon_sub<q>")]
577 )
578
579 (define_insn "sub<mode>3_fp16"
580 [(set
581 (match_operand:VH 0 "s_register_operand" "=w")
582 (minus:VH
583 (match_operand:VH 1 "s_register_operand" "w")
584 (match_operand:VH 2 "s_register_operand" "w")))]
585 "TARGET_NEON_FP16INST"
586 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
587 [(set_attr "type" "neon_sub<q>")]
588 )
589
590 (define_insn "subdi3_neon"
591 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
592 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
593 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
594 (clobber (reg:CC CC_REGNUM))]
595 "TARGET_NEON"
596 {
597 switch (which_alternative)
598 {
599 case 0: /* fall through */
600 case 4: return "vsub.i64\t%P0, %P1, %P2";
601 case 1: /* fall through */
602 case 2: /* fall through */
603 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
604 default: gcc_unreachable ();
605 }
606 }
607 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
608 (set_attr "conds" "*,clob,clob,clob,*")
609 (set_attr "length" "*,8,8,8,*")
610 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
611 )
612
613 (define_insn "*mul<mode>3_neon"
614 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
615 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
616 (match_operand:VDQW 2 "s_register_operand" "w")))]
617 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
618 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set (attr "type")
620 (if_then_else (match_test "<Is_float_mode>")
621 (const_string "neon_fp_mul_s<q>")
622 (const_string "neon_mul_<V_elem_ch><q>")))]
623 )
624
625 /* Perform division using multiply-by-reciprocal.
626 Reciprocal is calculated using Newton-Raphson method.
627 Enabled with -funsafe-math-optimizations -freciprocal-math
628 and disabled for -Os since it increases code size . */
629
630 (define_expand "div<mode>3"
631 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
632 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
633 (match_operand:VCVTF 2 "s_register_operand" "w")))]
634 "TARGET_NEON && !optimize_size
635 && flag_reciprocal_math"
636 {
637 rtx rec = gen_reg_rtx (<MODE>mode);
638 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
639
640 /* Reciprocal estimate. */
641 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
642
643 /* Perform 2 iterations of newton-raphson method. */
644 for (int i = 0; i < 2; i++)
645 {
646 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
647 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
648 }
649
650 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
651 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
652 DONE;
653 }
654 )
655
656
657 (define_insn "mul<mode>3add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
660 (match_operand:VDQW 3 "s_register_operand" "w"))
661 (match_operand:VDQW 1 "s_register_operand" "0")))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
664 [(set (attr "type")
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
668 )
669
670 (define_insn "mul<mode>3add<mode>_neon"
671 [(set (match_operand:VH 0 "s_register_operand" "=w")
672 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
673 (match_operand:VH 3 "s_register_operand" "w"))
674 (match_operand:VH 1 "s_register_operand" "0")))]
675 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
676 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
678 )
679
680 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
681 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
682 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
683 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
684 (match_operand:VDQW 3 "s_register_operand" "w"))))]
685 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
686 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
687 [(set (attr "type")
688 (if_then_else (match_test "<Is_float_mode>")
689 (const_string "neon_fp_mla_s<q>")
690 (const_string "neon_mla_<V_elem_ch><q>")))]
691 )
692
693 ;; Fused multiply-accumulate
694 ;; We define each insn twice here:
695 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
696 ;; to be able to use when converting to FMA.
697 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
698 (define_insn "fma<VCVTF:mode>4"
699 [(set (match_operand:VCVTF 0 "register_operand" "=w")
700 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
701 (match_operand:VCVTF 2 "register_operand" "w")
702 (match_operand:VCVTF 3 "register_operand" "0")))]
703 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
704 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
705 [(set_attr "type" "neon_fp_mla_s<q>")]
706 )
707
708 (define_insn "fma<VCVTF:mode>4_intrinsic"
709 [(set (match_operand:VCVTF 0 "register_operand" "=w")
710 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
711 (match_operand:VCVTF 2 "register_operand" "w")
712 (match_operand:VCVTF 3 "register_operand" "0")))]
713 "TARGET_NEON && TARGET_FMA"
714 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
715 [(set_attr "type" "neon_fp_mla_s<q>")]
716 )
717
718 (define_insn "fma<VH:mode>4"
719 [(set (match_operand:VH 0 "register_operand" "=w")
720 (fma:VH
721 (match_operand:VH 1 "register_operand" "w")
722 (match_operand:VH 2 "register_operand" "w")
723 (match_operand:VH 3 "register_operand" "0")))]
724 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
725 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_fp_mla_s<q>")]
727 )
728
729 (define_insn "fma<VH:mode>4_intrinsic"
730 [(set (match_operand:VH 0 "register_operand" "=w")
731 (fma:VH
732 (match_operand:VH 1 "register_operand" "w")
733 (match_operand:VH 2 "register_operand" "w")
734 (match_operand:VH 3 "register_operand" "0")))]
735 "TARGET_NEON_FP16INST"
736 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_fp_mla_s<q>")]
738 )
739
740 (define_insn "*fmsub<VCVTF:mode>4"
741 [(set (match_operand:VCVTF 0 "register_operand" "=w")
742 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
743 (match_operand:VCVTF 2 "register_operand" "w")
744 (match_operand:VCVTF 3 "register_operand" "0")))]
745 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
746 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
747 [(set_attr "type" "neon_fp_mla_s<q>")]
748 )
749
750 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
751 [(set (match_operand:VCVTF 0 "register_operand" "=w")
752 (fma:VCVTF
753 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
754 (match_operand:VCVTF 2 "register_operand" "w")
755 (match_operand:VCVTF 3 "register_operand" "0")))]
756 "TARGET_NEON && TARGET_FMA"
757 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
758 [(set_attr "type" "neon_fp_mla_s<q>")]
759 )
760
761 (define_insn "fmsub<VH:mode>4_intrinsic"
762 [(set (match_operand:VH 0 "register_operand" "=w")
763 (fma:VH
764 (neg:VH (match_operand:VH 1 "register_operand" "w"))
765 (match_operand:VH 2 "register_operand" "w")
766 (match_operand:VH 3 "register_operand" "0")))]
767 "TARGET_NEON_FP16INST"
768 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
769 [(set_attr "type" "neon_fp_mla_s<q>")]
770 )
771
772 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
773 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
774 (unspec:VCVTF [(match_operand:VCVTF 1
775 "s_register_operand" "w")]
776 NEON_VRINT))]
777 "TARGET_NEON && TARGET_VFP5"
778 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
779 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
780 )
781
782 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
783 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
784 (FIXUORS:<V_cmp_result> (unspec:VCVTF
785 [(match_operand:VCVTF 1 "register_operand" "w")]
786 NEON_VCVT)))]
787 "TARGET_NEON && TARGET_VFP5"
788 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
789 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
790 (set_attr "predicable" "no")]
791 )
792
793 (define_insn "ior<mode>3"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
795 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
796 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
797 "TARGET_NEON"
798 {
799 switch (which_alternative)
800 {
801 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
802 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
803 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
804 default: gcc_unreachable ();
805 }
806 }
807 [(set_attr "type" "neon_logic<q>")]
808 )
809
810 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
811 ;; vorr. We support the pseudo-instruction vand instead, because that
812 ;; corresponds to the canonical form the middle-end expects to use for
813 ;; immediate bitwise-ANDs.
814
815 (define_insn "and<mode>3"
816 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
817 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
818 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
819 "TARGET_NEON"
820 {
821 switch (which_alternative)
822 {
823 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
824 case 1: return neon_output_logic_immediate ("vand", &operands[2],
825 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
826 default: gcc_unreachable ();
827 }
828 }
829 [(set_attr "type" "neon_logic<q>")]
830 )
831
832 (define_insn "orn<mode>3_neon"
833 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
834 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
835 (match_operand:VDQ 1 "s_register_operand" "w")))]
836 "TARGET_NEON"
837 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
838 [(set_attr "type" "neon_logic<q>")]
839 )
840
841 ;; TODO: investigate whether we should disable
842 ;; this and bicdi3_neon for the A8 in line with the other
843 ;; changes above.
844 (define_insn_and_split "orndi3_neon"
845 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
846 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
847 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
848 "TARGET_NEON"
849 "@
850 vorn\t%P0, %P1, %P2
851 #
852 #
853 #"
854 "reload_completed &&
855 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
856 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
857 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
858 "
859 {
860 if (TARGET_THUMB2)
861 {
862 operands[3] = gen_highpart (SImode, operands[0]);
863 operands[0] = gen_lowpart (SImode, operands[0]);
864 operands[4] = gen_highpart (SImode, operands[2]);
865 operands[2] = gen_lowpart (SImode, operands[2]);
866 operands[5] = gen_highpart (SImode, operands[1]);
867 operands[1] = gen_lowpart (SImode, operands[1]);
868 }
869 else
870 {
871 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
872 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
873 DONE;
874 }
875 }"
876 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
877 (set_attr "length" "*,16,8,8")
878 (set_attr "arch" "any,a,t2,t2")]
879 )
880
881 (define_insn "bic<mode>3_neon"
882 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
883 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
884 (match_operand:VDQ 1 "s_register_operand" "w")))]
885 "TARGET_NEON"
886 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set_attr "type" "neon_logic<q>")]
888 )
889
890 ;; Compare to *anddi_notdi_di.
891 (define_insn "bicdi3_neon"
892 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
893 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
894 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
895 "TARGET_NEON"
896 "@
897 vbic\t%P0, %P1, %P2
898 #
899 #"
900 [(set_attr "type" "neon_logic,multiple,multiple")
901 (set_attr "length" "*,8,8")]
902 )
903
904 (define_insn "xor<mode>3"
905 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
906 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
907 (match_operand:VDQ 2 "s_register_operand" "w")))]
908 "TARGET_NEON"
909 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
910 [(set_attr "type" "neon_logic<q>")]
911 )
912
913 (define_insn "one_cmpl<mode>2"
914 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
915 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
916 "TARGET_NEON"
917 "vmvn\t%<V_reg>0, %<V_reg>1"
918 [(set_attr "type" "neon_move<q>")]
919 )
920
921 (define_insn "abs<mode>2"
922 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
923 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
924 "TARGET_NEON"
925 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
926 [(set (attr "type")
927 (if_then_else (match_test "<Is_float_mode>")
928 (const_string "neon_fp_abs_s<q>")
929 (const_string "neon_abs<q>")))]
930 )
931
932 (define_insn "neg<mode>2"
933 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
934 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
935 "TARGET_NEON"
936 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
937 [(set (attr "type")
938 (if_then_else (match_test "<Is_float_mode>")
939 (const_string "neon_fp_neg_s<q>")
940 (const_string "neon_neg<q>")))]
941 )
942
943 (define_insn "negdi2_neon"
944 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
945 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
946 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
947 (clobber (reg:CC CC_REGNUM))]
948 "TARGET_NEON"
949 "#"
950 [(set_attr "length" "8")
951 (set_attr "type" "multiple")]
952 )
953
954 ; Split negdi2_neon for vfp registers
955 (define_split
956 [(set (match_operand:DI 0 "s_register_operand" "")
957 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
958 (clobber (match_scratch:DI 2 ""))
959 (clobber (reg:CC CC_REGNUM))]
960 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
961 [(set (match_dup 2) (const_int 0))
962 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
963 (clobber (reg:CC CC_REGNUM))])]
964 {
965 if (!REG_P (operands[2]))
966 operands[2] = operands[0];
967 }
968 )
969
970 ; Split negdi2_neon for core registers
971 (define_split
972 [(set (match_operand:DI 0 "s_register_operand" "")
973 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
974 (clobber (match_scratch:DI 2 ""))
975 (clobber (reg:CC CC_REGNUM))]
976 "TARGET_32BIT && reload_completed
977 && arm_general_register_operand (operands[0], DImode)"
978 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
979 (clobber (reg:CC CC_REGNUM))])]
980 ""
981 )
982
983 (define_insn "<absneg_str><mode>2"
984 [(set (match_operand:VH 0 "s_register_operand" "=w")
985 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
986 "TARGET_NEON_FP16INST"
987 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
988 [(set_attr "type" "neon_abs<q>")]
989 )
990
991 (define_expand "neon_v<absneg_str><mode>"
992 [(set
993 (match_operand:VH 0 "s_register_operand")
994 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
995 "TARGET_NEON_FP16INST"
996 {
997 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
998 DONE;
999 })
1000
1001 (define_insn "neon_v<fp16_rnd_str><mode>"
1002 [(set (match_operand:VH 0 "s_register_operand" "=w")
1003 (unspec:VH
1004 [(match_operand:VH 1 "s_register_operand" "w")]
1005 FP16_RND))]
1006 "TARGET_NEON_FP16INST"
1007 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
1008 [(set_attr "type" "neon_fp_round_s<q>")]
1009 )
1010
1011 (define_insn "neon_vrsqrte<mode>"
1012 [(set (match_operand:VH 0 "s_register_operand" "=w")
1013 (unspec:VH
1014 [(match_operand:VH 1 "s_register_operand" "w")]
1015 UNSPEC_VRSQRTE))]
1016 "TARGET_NEON_FP16INST"
1017 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
1018 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
1019 )
1020
1021 (define_insn "*umin<mode>3_neon"
1022 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1023 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1024 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1025 "TARGET_NEON"
1026 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1027 [(set_attr "type" "neon_minmax<q>")]
1028 )
1029
1030 (define_insn "*umax<mode>3_neon"
1031 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1032 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1033 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1034 "TARGET_NEON"
1035 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1036 [(set_attr "type" "neon_minmax<q>")]
1037 )
1038
1039 (define_insn "*smin<mode>3_neon"
1040 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1041 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1042 (match_operand:VDQW 2 "s_register_operand" "w")))]
1043 "TARGET_NEON"
1044 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1045 [(set (attr "type")
1046 (if_then_else (match_test "<Is_float_mode>")
1047 (const_string "neon_fp_minmax_s<q>")
1048 (const_string "neon_minmax<q>")))]
1049 )
1050
1051 (define_insn "*smax<mode>3_neon"
1052 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1053 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1054 (match_operand:VDQW 2 "s_register_operand" "w")))]
1055 "TARGET_NEON"
1056 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1057 [(set (attr "type")
1058 (if_then_else (match_test "<Is_float_mode>")
1059 (const_string "neon_fp_minmax_s<q>")
1060 (const_string "neon_minmax<q>")))]
1061 )
1062
1063 ; TODO: V2DI shifts are current disabled because there are bugs in the
1064 ; generic vectorizer code. It ends up creating a V2DI constructor with
1065 ; SImode elements.
1066
1067 (define_insn "vashl<mode>3"
1068 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1069 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1070 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
1071 "TARGET_NEON"
1072 {
1073 switch (which_alternative)
1074 {
1075 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1076 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1077 <MODE>mode,
1078 VALID_NEON_QREG_MODE (<MODE>mode),
1079 true);
1080 default: gcc_unreachable ();
1081 }
1082 }
1083 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1084 )
1085
1086 (define_insn "vashr<mode>3_imm"
1087 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1088 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1089 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1090 "TARGET_NEON"
1091 {
1092 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1093 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1094 false);
1095 }
1096 [(set_attr "type" "neon_shift_imm<q>")]
1097 )
1098
1099 (define_insn "vlshr<mode>3_imm"
1100 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1101 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1102 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1103 "TARGET_NEON"
1104 {
1105 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1106 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1107 false);
1108 }
1109 [(set_attr "type" "neon_shift_imm<q>")]
1110 )
1111
1112 ; Used for implementing logical shift-right, which is a left-shift by a negative
1113 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1114 ; above, but using an unspec in case GCC tries anything tricky with negative
1115 ; shift amounts.
1116
1117 (define_insn "ashl<mode>3_signed"
1118 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1119 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1120 (match_operand:VDQI 2 "s_register_operand" "w")]
1121 UNSPEC_ASHIFT_SIGNED))]
1122 "TARGET_NEON"
1123 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1124 [(set_attr "type" "neon_shift_reg<q>")]
1125 )
1126
1127 ; Used for implementing logical shift-right, which is a left-shift by a negative
1128 ; amount, with unsigned operands.
1129
1130 (define_insn "ashl<mode>3_unsigned"
1131 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1132 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1133 (match_operand:VDQI 2 "s_register_operand" "w")]
1134 UNSPEC_ASHIFT_UNSIGNED))]
1135 "TARGET_NEON"
1136 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1137 [(set_attr "type" "neon_shift_reg<q>")]
1138 )
1139
1140 (define_expand "vashr<mode>3"
1141 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1142 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1143 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1144 "TARGET_NEON"
1145 {
1146 if (s_register_operand (operands[2], <MODE>mode))
1147 {
1148 rtx neg = gen_reg_rtx (<MODE>mode);
1149 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1150 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1151 }
1152 else
1153 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1154 DONE;
1155 })
1156
1157 (define_expand "vlshr<mode>3"
1158 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1159 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1160 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1161 "TARGET_NEON"
1162 {
1163 if (s_register_operand (operands[2], <MODE>mode))
1164 {
1165 rtx neg = gen_reg_rtx (<MODE>mode);
1166 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1167 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1168 }
1169 else
1170 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1171 DONE;
1172 })
1173
1174 ;; 64-bit shifts
1175
1176 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1177 ;; leaving the upper half uninitalized. This is OK since the shift
1178 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1179 ;; data flow analysis however, we pretend the full register is set
1180 ;; using an unspec.
1181 (define_insn "neon_load_count"
1182 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1183 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1184 UNSPEC_LOAD_COUNT))]
1185 "TARGET_NEON"
1186 "@
1187 vld1.32\t{%P0[0]}, %A1
1188 vmov.32\t%P0[0], %1"
1189 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1190 )
1191
1192 (define_insn "ashldi3_neon_noclobber"
1193 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1194 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1195 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1196 "TARGET_NEON && reload_completed
1197 && (!CONST_INT_P (operands[2])
1198 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1199 "@
1200 vshl.u64\t%P0, %P1, %2
1201 vshl.u64\t%P0, %P1, %P2"
1202 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1203 )
1204
1205 (define_insn_and_split "ashldi3_neon"
1206 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1207 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1208 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1209 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1210 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1211 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1212 (clobber (reg:CC_C CC_REGNUM))]
1213 "TARGET_NEON"
1214 "#"
1215 "TARGET_NEON && reload_completed"
1216 [(const_int 0)]
1217 "
1218 {
1219 if (IS_VFP_REGNUM (REGNO (operands[0])))
1220 {
1221 if (CONST_INT_P (operands[2]))
1222 {
1223 if (INTVAL (operands[2]) < 1)
1224 {
1225 emit_insn (gen_movdi (operands[0], operands[1]));
1226 DONE;
1227 }
1228 else if (INTVAL (operands[2]) > 63)
1229 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1230 }
1231 else
1232 {
1233 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1234 operands[2] = operands[5];
1235 }
1236
1237 /* Ditch the unnecessary clobbers. */
1238 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1239 operands[2]));
1240 }
1241 else
1242 {
1243 /* The shift expanders support either full overlap or no overlap. */
1244 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1245 || REGNO (operands[0]) == REGNO (operands[1]));
1246
1247 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1248 operands[2], operands[3], operands[4]);
1249 }
1250 DONE;
1251 }"
1252 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1253 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1254 (set_attr "type" "multiple")]
1255 )
1256
1257 ; The shift amount needs to be negated for right-shifts
1258 (define_insn "signed_shift_di3_neon"
1259 [(set (match_operand:DI 0 "s_register_operand" "=w")
1260 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1261 (match_operand:DI 2 "s_register_operand" " w")]
1262 UNSPEC_ASHIFT_SIGNED))]
1263 "TARGET_NEON && reload_completed"
1264 "vshl.s64\t%P0, %P1, %P2"
1265 [(set_attr "type" "neon_shift_reg")]
1266 )
1267
1268 ; The shift amount needs to be negated for right-shifts
1269 (define_insn "unsigned_shift_di3_neon"
1270 [(set (match_operand:DI 0 "s_register_operand" "=w")
1271 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1272 (match_operand:DI 2 "s_register_operand" " w")]
1273 UNSPEC_ASHIFT_UNSIGNED))]
1274 "TARGET_NEON && reload_completed"
1275 "vshl.u64\t%P0, %P1, %P2"
1276 [(set_attr "type" "neon_shift_reg")]
1277 )
1278
1279 (define_insn "ashrdi3_neon_imm_noclobber"
1280 [(set (match_operand:DI 0 "s_register_operand" "=w")
1281 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1282 (match_operand:DI 2 "const_int_operand" " i")))]
1283 "TARGET_NEON && reload_completed
1284 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1285 "vshr.s64\t%P0, %P1, %2"
1286 [(set_attr "type" "neon_shift_imm")]
1287 )
1288
1289 (define_insn "lshrdi3_neon_imm_noclobber"
1290 [(set (match_operand:DI 0 "s_register_operand" "=w")
1291 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1292 (match_operand:DI 2 "const_int_operand" " i")))]
1293 "TARGET_NEON && reload_completed
1294 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1295 "vshr.u64\t%P0, %P1, %2"
1296 [(set_attr "type" "neon_shift_imm")]
1297 )
1298
1299 ;; ashrdi3_neon
1300 ;; lshrdi3_neon
1301 (define_insn_and_split "<shift>di3_neon"
1302 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1303 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1304 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1305 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1306 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1307 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1308 (clobber (reg:CC CC_REGNUM))]
1309 "TARGET_NEON"
1310 "#"
1311 "TARGET_NEON && reload_completed"
1312 [(const_int 0)]
1313 "
1314 {
1315 if (IS_VFP_REGNUM (REGNO (operands[0])))
1316 {
1317 if (CONST_INT_P (operands[2]))
1318 {
1319 if (INTVAL (operands[2]) < 1)
1320 {
1321 emit_insn (gen_movdi (operands[0], operands[1]));
1322 DONE;
1323 }
1324 else if (INTVAL (operands[2]) > 64)
1325 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1326
1327 /* Ditch the unnecessary clobbers. */
1328 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1329 operands[1],
1330 operands[2]));
1331 }
1332 else
1333 {
1334 /* We must use a negative left-shift. */
1335 emit_insn (gen_negsi2 (operands[3], operands[2]));
1336 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1337 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1338 operands[5]));
1339 }
1340 }
1341 else
1342 {
1343 /* The shift expanders support either full overlap or no overlap. */
1344 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1345 || REGNO (operands[0]) == REGNO (operands[1]));
1346
1347 /* This clobbers CC (ASHIFTRT by register only). */
1348 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1349 operands[2], operands[3], operands[4]);
1350 }
1351
1352 DONE;
1353 }"
1354 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1355 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1356 (set_attr "type" "multiple")]
1357 )
1358
1359 ;; Widening operations
1360
1361 (define_expand "widen_ssum<mode>3"
1362 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1363 (plus:<V_double_width>
1364 (sign_extend:<V_double_width>
1365 (match_operand:VQI 1 "s_register_operand" ""))
1366 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1367 "TARGET_NEON"
1368 {
1369 machine_mode mode = GET_MODE (operands[1]);
1370 rtx p1, p2;
1371
1372 p1 = arm_simd_vect_par_cnst_half (mode, false);
1373 p2 = arm_simd_vect_par_cnst_half (mode, true);
1374
1375 if (operands[0] != operands[2])
1376 emit_move_insn (operands[0], operands[2]);
1377
1378 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1379 operands[1],
1380 p1,
1381 operands[0]));
1382 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1383 operands[1],
1384 p2,
1385 operands[0]));
1386 DONE;
1387 }
1388 )
1389
1390 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1391 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1392 (plus:<V_double_width>
1393 (sign_extend:<V_double_width>
1394 (vec_select:<V_HALF>
1395 (match_operand:VQI 1 "s_register_operand" "%w")
1396 (match_operand:VQI 2 "vect_par_constant_low" "")))
1397 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1398 "TARGET_NEON"
1399 {
1400 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1401 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1402 }
1403 [(set_attr "type" "neon_add_widen")])
1404
1405 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1406 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1407 (plus:<V_double_width>
1408 (sign_extend:<V_double_width>
1409 (vec_select:<V_HALF>
1410 (match_operand:VQI 1 "s_register_operand" "%w")
1411 (match_operand:VQI 2 "vect_par_constant_high" "")))
1412 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1413 "TARGET_NEON"
1414 {
1415 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1416 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1417 }
1418 [(set_attr "type" "neon_add_widen")])
1419
1420 (define_insn "widen_ssum<mode>3"
1421 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1422 (plus:<V_widen>
1423 (sign_extend:<V_widen>
1424 (match_operand:VW 1 "s_register_operand" "%w"))
1425 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1426 "TARGET_NEON"
1427 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1428 [(set_attr "type" "neon_add_widen")]
1429 )
1430
1431 (define_expand "widen_usum<mode>3"
1432 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1433 (plus:<V_double_width>
1434 (zero_extend:<V_double_width>
1435 (match_operand:VQI 1 "s_register_operand" ""))
1436 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1437 "TARGET_NEON"
1438 {
1439 machine_mode mode = GET_MODE (operands[1]);
1440 rtx p1, p2;
1441
1442 p1 = arm_simd_vect_par_cnst_half (mode, false);
1443 p2 = arm_simd_vect_par_cnst_half (mode, true);
1444
1445 if (operands[0] != operands[2])
1446 emit_move_insn (operands[0], operands[2]);
1447
1448 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1449 operands[1],
1450 p1,
1451 operands[0]));
1452 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1453 operands[1],
1454 p2,
1455 operands[0]));
1456 DONE;
1457 }
1458 )
1459
1460 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1461 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1462 (plus:<V_double_width>
1463 (zero_extend:<V_double_width>
1464 (vec_select:<V_HALF>
1465 (match_operand:VQI 1 "s_register_operand" "%w")
1466 (match_operand:VQI 2 "vect_par_constant_low" "")))
1467 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1468 "TARGET_NEON"
1469 {
1470 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1471 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1472 }
1473 [(set_attr "type" "neon_add_widen")])
1474
1475 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1476 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1477 (plus:<V_double_width>
1478 (zero_extend:<V_double_width>
1479 (vec_select:<V_HALF>
1480 (match_operand:VQI 1 "s_register_operand" "%w")
1481 (match_operand:VQI 2 "vect_par_constant_high" "")))
1482 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1483 "TARGET_NEON"
1484 {
1485 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1486 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1487 }
1488 [(set_attr "type" "neon_add_widen")])
1489
1490 (define_insn "widen_usum<mode>3"
1491 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1492 (plus:<V_widen> (zero_extend:<V_widen>
1493 (match_operand:VW 1 "s_register_operand" "%w"))
1494 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1495 "TARGET_NEON"
1496 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1497 [(set_attr "type" "neon_add_widen")]
1498 )
1499
1500 ;; Helpers for quad-word reduction operations
1501
1502 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1503 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1504 ; N/2-element vector.
1505
1506 (define_insn "quad_halves_<code>v4si"
1507 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1508 (VQH_OPS:V2SI
1509 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1510 (parallel [(const_int 0) (const_int 1)]))
1511 (vec_select:V2SI (match_dup 1)
1512 (parallel [(const_int 2) (const_int 3)]))))]
1513 "TARGET_NEON"
1514 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1515 [(set_attr "vqh_mnem" "<VQH_mnem>")
1516 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1517 )
1518
1519 (define_insn "quad_halves_<code>v4sf"
1520 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1521 (VQHS_OPS:V2SF
1522 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1523 (parallel [(const_int 0) (const_int 1)]))
1524 (vec_select:V2SF (match_dup 1)
1525 (parallel [(const_int 2) (const_int 3)]))))]
1526 "TARGET_NEON && flag_unsafe_math_optimizations"
1527 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1528 [(set_attr "vqh_mnem" "<VQH_mnem>")
1529 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1530 )
1531
1532 (define_insn "quad_halves_<code>v8hi"
1533 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1534 (VQH_OPS:V4HI
1535 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1536 (parallel [(const_int 0) (const_int 1)
1537 (const_int 2) (const_int 3)]))
1538 (vec_select:V4HI (match_dup 1)
1539 (parallel [(const_int 4) (const_int 5)
1540 (const_int 6) (const_int 7)]))))]
1541 "TARGET_NEON"
1542 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1543 [(set_attr "vqh_mnem" "<VQH_mnem>")
1544 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1545 )
1546
1547 (define_insn "quad_halves_<code>v16qi"
1548 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1549 (VQH_OPS:V8QI
1550 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1551 (parallel [(const_int 0) (const_int 1)
1552 (const_int 2) (const_int 3)
1553 (const_int 4) (const_int 5)
1554 (const_int 6) (const_int 7)]))
1555 (vec_select:V8QI (match_dup 1)
1556 (parallel [(const_int 8) (const_int 9)
1557 (const_int 10) (const_int 11)
1558 (const_int 12) (const_int 13)
1559 (const_int 14) (const_int 15)]))))]
1560 "TARGET_NEON"
1561 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1562 [(set_attr "vqh_mnem" "<VQH_mnem>")
1563 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1564 )
1565
1566 (define_expand "move_hi_quad_<mode>"
1567 [(match_operand:ANY128 0 "s_register_operand" "")
1568 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1569 "TARGET_NEON"
1570 {
1571 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1572 GET_MODE_SIZE (<V_HALF>mode)),
1573 operands[1]);
1574 DONE;
1575 })
1576
1577 (define_expand "move_lo_quad_<mode>"
1578 [(match_operand:ANY128 0 "s_register_operand" "")
1579 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1580 "TARGET_NEON"
1581 {
1582 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1583 <MODE>mode, 0),
1584 operands[1]);
1585 DONE;
1586 })
1587
1588 ;; Reduction operations
1589
1590 (define_expand "reduc_plus_scal_<mode>"
1591 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1592 (match_operand:VD 1 "s_register_operand" "")]
1593 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1594 {
1595 rtx vec = gen_reg_rtx (<MODE>mode);
1596 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1597 &gen_neon_vpadd_internal<mode>);
1598 /* The same result is actually computed into every element. */
1599 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1600 DONE;
1601 })
1602
1603 (define_expand "reduc_plus_scal_<mode>"
1604 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1605 (match_operand:VQ 1 "s_register_operand" "")]
1606 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1607 && !BYTES_BIG_ENDIAN"
1608 {
1609 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1610
1611 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1612 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1613
1614 DONE;
1615 })
1616
1617 (define_expand "reduc_plus_scal_v2di"
1618 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1619 (match_operand:V2DI 1 "s_register_operand" "")]
1620 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1621 {
1622 rtx vec = gen_reg_rtx (V2DImode);
1623
1624 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1625 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1626
1627 DONE;
1628 })
1629
1630 (define_insn "arm_reduc_plus_internal_v2di"
1631 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1632 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1633 UNSPEC_VPADD))]
1634 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1635 "vadd.i64\t%e0, %e1, %f1"
1636 [(set_attr "type" "neon_add_q")]
1637 )
1638
1639 (define_expand "reduc_smin_scal_<mode>"
1640 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1641 (match_operand:VD 1 "s_register_operand" "")]
1642 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1643 {
1644 rtx vec = gen_reg_rtx (<MODE>mode);
1645
1646 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1647 &gen_neon_vpsmin<mode>);
1648 /* The result is computed into every element of the vector. */
1649 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1650 DONE;
1651 })
1652
1653 (define_expand "reduc_smin_scal_<mode>"
1654 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1655 (match_operand:VQ 1 "s_register_operand" "")]
1656 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1657 && !BYTES_BIG_ENDIAN"
1658 {
1659 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1660
1661 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1662 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1663
1664 DONE;
1665 })
1666
1667 (define_expand "reduc_smax_scal_<mode>"
1668 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1669 (match_operand:VD 1 "s_register_operand" "")]
1670 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1671 {
1672 rtx vec = gen_reg_rtx (<MODE>mode);
1673 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1674 &gen_neon_vpsmax<mode>);
1675 /* The result is computed into every element of the vector. */
1676 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1677 DONE;
1678 })
1679
1680 (define_expand "reduc_smax_scal_<mode>"
1681 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1682 (match_operand:VQ 1 "s_register_operand" "")]
1683 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1684 && !BYTES_BIG_ENDIAN"
1685 {
1686 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1687
1688 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1689 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1690
1691 DONE;
1692 })
1693
1694 (define_expand "reduc_umin_scal_<mode>"
1695 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1696 (match_operand:VDI 1 "s_register_operand" "")]
1697 "TARGET_NEON"
1698 {
1699 rtx vec = gen_reg_rtx (<MODE>mode);
1700 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1701 &gen_neon_vpumin<mode>);
1702 /* The result is computed into every element of the vector. */
1703 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1704 DONE;
1705 })
1706
1707 (define_expand "reduc_umin_scal_<mode>"
1708 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1709 (match_operand:VQI 1 "s_register_operand" "")]
1710 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1711 {
1712 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1713
1714 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1715 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1716
1717 DONE;
1718 })
1719
1720 (define_expand "reduc_umax_scal_<mode>"
1721 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1722 (match_operand:VDI 1 "s_register_operand" "")]
1723 "TARGET_NEON"
1724 {
1725 rtx vec = gen_reg_rtx (<MODE>mode);
1726 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1727 &gen_neon_vpumax<mode>);
1728 /* The result is computed into every element of the vector. */
1729 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1730 DONE;
1731 })
1732
1733 (define_expand "reduc_umax_scal_<mode>"
1734 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1735 (match_operand:VQI 1 "s_register_operand" "")]
1736 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1737 {
1738 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1739
1740 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1741 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1742
1743 DONE;
1744 })
1745
1746 (define_insn "neon_vpadd_internal<mode>"
1747 [(set (match_operand:VD 0 "s_register_operand" "=w")
1748 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1749 (match_operand:VD 2 "s_register_operand" "w")]
1750 UNSPEC_VPADD))]
1751 "TARGET_NEON"
1752 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1753 ;; Assume this schedules like vadd.
1754 [(set (attr "type")
1755 (if_then_else (match_test "<Is_float_mode>")
1756 (const_string "neon_fp_reduc_add_s<q>")
1757 (const_string "neon_reduc_add<q>")))]
1758 )
1759
1760 (define_insn "neon_vpaddv4hf"
1761 [(set
1762 (match_operand:V4HF 0 "s_register_operand" "=w")
1763 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1764 (match_operand:V4HF 2 "s_register_operand" "w")]
1765 UNSPEC_VPADD))]
1766 "TARGET_NEON_FP16INST"
1767 "vpadd.f16\t%P0, %P1, %P2"
1768 [(set_attr "type" "neon_reduc_add")]
1769 )
1770
1771 (define_insn "neon_vpsmin<mode>"
1772 [(set (match_operand:VD 0 "s_register_operand" "=w")
1773 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1774 (match_operand:VD 2 "s_register_operand" "w")]
1775 UNSPEC_VPSMIN))]
1776 "TARGET_NEON"
1777 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1778 [(set (attr "type")
1779 (if_then_else (match_test "<Is_float_mode>")
1780 (const_string "neon_fp_reduc_minmax_s<q>")
1781 (const_string "neon_reduc_minmax<q>")))]
1782 )
1783
1784 (define_insn "neon_vpsmax<mode>"
1785 [(set (match_operand:VD 0 "s_register_operand" "=w")
1786 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1787 (match_operand:VD 2 "s_register_operand" "w")]
1788 UNSPEC_VPSMAX))]
1789 "TARGET_NEON"
1790 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1791 [(set (attr "type")
1792 (if_then_else (match_test "<Is_float_mode>")
1793 (const_string "neon_fp_reduc_minmax_s<q>")
1794 (const_string "neon_reduc_minmax<q>")))]
1795 )
1796
1797 (define_insn "neon_vpumin<mode>"
1798 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1799 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1800 (match_operand:VDI 2 "s_register_operand" "w")]
1801 UNSPEC_VPUMIN))]
1802 "TARGET_NEON"
1803 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1804 [(set_attr "type" "neon_reduc_minmax<q>")]
1805 )
1806
1807 (define_insn "neon_vpumax<mode>"
1808 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1809 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1810 (match_operand:VDI 2 "s_register_operand" "w")]
1811 UNSPEC_VPUMAX))]
1812 "TARGET_NEON"
1813 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1814 [(set_attr "type" "neon_reduc_minmax<q>")]
1815 )
1816
1817 ;; Saturating arithmetic
1818
1819 ; NOTE: Neon supports many more saturating variants of instructions than the
1820 ; following, but these are all GCC currently understands.
1821 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1822 ; yet either, although these patterns may be used by intrinsics when they're
1823 ; added.
1824
1825 (define_insn "*ss_add<mode>_neon"
1826 [(set (match_operand:VD 0 "s_register_operand" "=w")
1827 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1828 (match_operand:VD 2 "s_register_operand" "w")))]
1829 "TARGET_NEON"
1830 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1831 [(set_attr "type" "neon_qadd<q>")]
1832 )
1833
1834 (define_insn "*us_add<mode>_neon"
1835 [(set (match_operand:VD 0 "s_register_operand" "=w")
1836 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1837 (match_operand:VD 2 "s_register_operand" "w")))]
1838 "TARGET_NEON"
1839 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1840 [(set_attr "type" "neon_qadd<q>")]
1841 )
1842
1843 (define_insn "*ss_sub<mode>_neon"
1844 [(set (match_operand:VD 0 "s_register_operand" "=w")
1845 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1846 (match_operand:VD 2 "s_register_operand" "w")))]
1847 "TARGET_NEON"
1848 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1849 [(set_attr "type" "neon_qsub<q>")]
1850 )
1851
1852 (define_insn "*us_sub<mode>_neon"
1853 [(set (match_operand:VD 0 "s_register_operand" "=w")
1854 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1855 (match_operand:VD 2 "s_register_operand" "w")))]
1856 "TARGET_NEON"
1857 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1858 [(set_attr "type" "neon_qsub<q>")]
1859 )
1860
1861 ;; Conditional instructions. These are comparisons with conditional moves for
1862 ;; vectors. They perform the assignment:
1863 ;;
1864 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1865 ;;
1866 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1867 ;; element-wise.
1868
1869 (define_expand "vcond<mode><mode>"
1870 [(set (match_operand:VDQW 0 "s_register_operand" "")
1871 (if_then_else:VDQW
1872 (match_operator 3 "comparison_operator"
1873 [(match_operand:VDQW 4 "s_register_operand" "")
1874 (match_operand:VDQW 5 "nonmemory_operand" "")])
1875 (match_operand:VDQW 1 "s_register_operand" "")
1876 (match_operand:VDQW 2 "s_register_operand" "")))]
1877 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1878 {
1879 int inverse = 0;
1880 int use_zero_form = 0;
1881 int swap_bsl_operands = 0;
1882 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1883 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1884
1885 rtx (*base_comparison) (rtx, rtx, rtx);
1886 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1887
1888 switch (GET_CODE (operands[3]))
1889 {
1890 case GE:
1891 case GT:
1892 case LE:
1893 case LT:
1894 case EQ:
1895 if (operands[5] == CONST0_RTX (<MODE>mode))
1896 {
1897 use_zero_form = 1;
1898 break;
1899 }
1900 /* Fall through. */
1901 default:
1902 if (!REG_P (operands[5]))
1903 operands[5] = force_reg (<MODE>mode, operands[5]);
1904 }
1905
1906 switch (GET_CODE (operands[3]))
1907 {
1908 case LT:
1909 case UNLT:
1910 inverse = 1;
1911 /* Fall through. */
1912 case GE:
1913 case UNGE:
1914 case ORDERED:
1915 case UNORDERED:
1916 base_comparison = gen_neon_vcge<mode>;
1917 complimentary_comparison = gen_neon_vcgt<mode>;
1918 break;
1919 case LE:
1920 case UNLE:
1921 inverse = 1;
1922 /* Fall through. */
1923 case GT:
1924 case UNGT:
1925 base_comparison = gen_neon_vcgt<mode>;
1926 complimentary_comparison = gen_neon_vcge<mode>;
1927 break;
1928 case EQ:
1929 case NE:
1930 case UNEQ:
1931 base_comparison = gen_neon_vceq<mode>;
1932 complimentary_comparison = gen_neon_vceq<mode>;
1933 break;
1934 default:
1935 gcc_unreachable ();
1936 }
1937
1938 switch (GET_CODE (operands[3]))
1939 {
1940 case LT:
1941 case LE:
1942 case GT:
1943 case GE:
1944 case EQ:
1945 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1946 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1947 a GE b -> a GE b
1948 a GT b -> a GT b
1949 a LE b -> b GE a
1950 a LT b -> b GT a
1951 a EQ b -> a EQ b
1952 Note that there also exist direct comparison against 0 forms,
1953 so catch those as a special case. */
1954 if (use_zero_form)
1955 {
1956 inverse = 0;
1957 switch (GET_CODE (operands[3]))
1958 {
1959 case LT:
1960 base_comparison = gen_neon_vclt<mode>;
1961 break;
1962 case LE:
1963 base_comparison = gen_neon_vcle<mode>;
1964 break;
1965 default:
1966 /* Do nothing, other zero form cases already have the correct
1967 base_comparison. */
1968 break;
1969 }
1970 }
1971
1972 if (!inverse)
1973 emit_insn (base_comparison (mask, operands[4], operands[5]));
1974 else
1975 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1976 break;
1977 case UNLT:
1978 case UNLE:
1979 case UNGT:
1980 case UNGE:
1981 case NE:
1982 /* Vector compare returns false for lanes which are unordered, so if we use
1983 the inverse of the comparison we actually want to emit, then
1984 swap the operands to BSL, we will end up with the correct result.
1985 Note that a NE NaN and NaN NE b are true for all a, b.
1986
1987 Our transformations are:
1988 a GE b -> !(b GT a)
1989 a GT b -> !(b GE a)
1990 a LE b -> !(a GT b)
1991 a LT b -> !(a GE b)
1992 a NE b -> !(a EQ b) */
1993
1994 if (inverse)
1995 emit_insn (base_comparison (mask, operands[4], operands[5]));
1996 else
1997 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1998
1999 swap_bsl_operands = 1;
2000 break;
2001 case UNEQ:
2002 /* We check (a > b || b > a). combining these comparisons give us
2003 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2004 will then give us (a == b || a UNORDERED b) as intended. */
2005
2006 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
2007 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
2008 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2009 swap_bsl_operands = 1;
2010 break;
2011 case UNORDERED:
2012 /* Operands are ORDERED iff (a > b || b >= a).
2013 Swapping the operands to BSL will give the UNORDERED case. */
2014 swap_bsl_operands = 1;
2015 /* Fall through. */
2016 case ORDERED:
2017 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2018 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2019 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2020 break;
2021 default:
2022 gcc_unreachable ();
2023 }
2024
2025 if (swap_bsl_operands)
2026 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2027 operands[1]));
2028 else
2029 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2030 operands[2]));
2031 DONE;
2032 })
2033
2034 (define_expand "vcondu<mode><mode>"
2035 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2036 (if_then_else:VDQIW
2037 (match_operator 3 "arm_comparison_operator"
2038 [(match_operand:VDQIW 4 "s_register_operand" "")
2039 (match_operand:VDQIW 5 "s_register_operand" "")])
2040 (match_operand:VDQIW 1 "s_register_operand" "")
2041 (match_operand:VDQIW 2 "s_register_operand" "")))]
2042 "TARGET_NEON"
2043 {
2044 rtx mask;
2045 int inverse = 0, immediate_zero = 0;
2046
2047 mask = gen_reg_rtx (<V_cmp_result>mode);
2048
2049 if (operands[5] == CONST0_RTX (<MODE>mode))
2050 immediate_zero = 1;
2051 else if (!REG_P (operands[5]))
2052 operands[5] = force_reg (<MODE>mode, operands[5]);
2053
2054 switch (GET_CODE (operands[3]))
2055 {
2056 case GEU:
2057 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2058 break;
2059
2060 case GTU:
2061 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2062 break;
2063
2064 case EQ:
2065 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2066 break;
2067
2068 case LEU:
2069 if (immediate_zero)
2070 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2071 else
2072 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2073 break;
2074
2075 case LTU:
2076 if (immediate_zero)
2077 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2078 else
2079 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2080 break;
2081
2082 case NE:
2083 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2084 inverse = 1;
2085 break;
2086
2087 default:
2088 gcc_unreachable ();
2089 }
2090
2091 if (inverse)
2092 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2093 operands[1]));
2094 else
2095 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2096 operands[2]));
2097
2098 DONE;
2099 })
2100
2101 ;; Patterns for builtins.
2102
2103 ; good for plain vadd, vaddq.
2104
2105 (define_expand "neon_vadd<mode>"
2106 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2107 (match_operand:VCVTF 1 "s_register_operand" "w")
2108 (match_operand:VCVTF 2 "s_register_operand" "w")]
2109 "TARGET_NEON"
2110 {
2111 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2112 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2113 else
2114 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2115 operands[2]));
2116 DONE;
2117 })
2118
2119 (define_expand "neon_vadd<mode>"
2120 [(match_operand:VH 0 "s_register_operand")
2121 (match_operand:VH 1 "s_register_operand")
2122 (match_operand:VH 2 "s_register_operand")]
2123 "TARGET_NEON_FP16INST"
2124 {
2125 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2126 DONE;
2127 })
2128
2129 (define_expand "neon_vsub<mode>"
2130 [(match_operand:VH 0 "s_register_operand")
2131 (match_operand:VH 1 "s_register_operand")
2132 (match_operand:VH 2 "s_register_operand")]
2133 "TARGET_NEON_FP16INST"
2134 {
2135 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2136 DONE;
2137 })
2138
2139 ; Note that NEON operations don't support the full IEEE 754 standard: in
2140 ; particular, denormal values are flushed to zero. This means that GCC cannot
2141 ; use those instructions for autovectorization, etc. unless
2142 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2143 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2144 ; header) must work in either case: if -funsafe-math-optimizations is given,
2145 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2146 ; expand to unspecs (which may potentially limit the extent to which they might
2147 ; be optimized by generic code).
2148
2149 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2150
2151 (define_insn "neon_vadd<mode>_unspec"
2152 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2153 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2154 (match_operand:VCVTF 2 "s_register_operand" "w")]
2155 UNSPEC_VADD))]
2156 "TARGET_NEON"
2157 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2158 [(set (attr "type")
2159 (if_then_else (match_test "<Is_float_mode>")
2160 (const_string "neon_fp_addsub_s<q>")
2161 (const_string "neon_add<q>")))]
2162 )
2163
2164 (define_insn "neon_vaddl<sup><mode>"
2165 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2166 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2167 (match_operand:VDI 2 "s_register_operand" "w")]
2168 VADDL))]
2169 "TARGET_NEON"
2170 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2171 [(set_attr "type" "neon_add_long")]
2172 )
2173
2174 (define_insn "neon_vaddw<sup><mode>"
2175 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2176 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2177 (match_operand:VDI 2 "s_register_operand" "w")]
2178 VADDW))]
2179 "TARGET_NEON"
2180 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2181 [(set_attr "type" "neon_add_widen")]
2182 )
2183
2184 ; vhadd and vrhadd.
2185
2186 (define_insn "neon_v<r>hadd<sup><mode>"
2187 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2188 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2189 (match_operand:VDQIW 2 "s_register_operand" "w")]
2190 VHADD))]
2191 "TARGET_NEON"
2192 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2193 [(set_attr "type" "neon_add_halve_q")]
2194 )
2195
2196 (define_insn "neon_vqadd<sup><mode>"
2197 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2198 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2199 (match_operand:VDQIX 2 "s_register_operand" "w")]
2200 VQADD))]
2201 "TARGET_NEON"
2202 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2203 [(set_attr "type" "neon_qadd<q>")]
2204 )
2205
2206 (define_insn "neon_v<r>addhn<mode>"
2207 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2208 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2209 (match_operand:VN 2 "s_register_operand" "w")]
2210 VADDHN))]
2211 "TARGET_NEON"
2212 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2213 [(set_attr "type" "neon_add_halve_narrow_q")]
2214 )
2215
2216 ;; Polynomial and Float multiplication.
2217 (define_insn "neon_vmul<pf><mode>"
2218 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2219 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2220 (match_operand:VPF 2 "s_register_operand" "w")]
2221 UNSPEC_VMUL))]
2222 "TARGET_NEON"
2223 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2224 [(set (attr "type")
2225 (if_then_else (match_test "<Is_float_mode>")
2226 (const_string "neon_fp_mul_s<q>")
2227 (const_string "neon_mul_<V_elem_ch><q>")))]
2228 )
2229
2230 (define_insn "mul<mode>3"
2231 [(set
2232 (match_operand:VH 0 "s_register_operand" "=w")
2233 (mult:VH
2234 (match_operand:VH 1 "s_register_operand" "w")
2235 (match_operand:VH 2 "s_register_operand" "w")))]
2236 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2237 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2238 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2239 )
2240
2241 (define_insn "neon_vmulf<mode>"
2242 [(set
2243 (match_operand:VH 0 "s_register_operand" "=w")
2244 (mult:VH
2245 (match_operand:VH 1 "s_register_operand" "w")
2246 (match_operand:VH 2 "s_register_operand" "w")))]
2247 "TARGET_NEON_FP16INST"
2248 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2249 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2250 )
2251
2252 (define_expand "neon_vmla<mode>"
2253 [(match_operand:VDQW 0 "s_register_operand" "=w")
2254 (match_operand:VDQW 1 "s_register_operand" "0")
2255 (match_operand:VDQW 2 "s_register_operand" "w")
2256 (match_operand:VDQW 3 "s_register_operand" "w")]
2257 "TARGET_NEON"
2258 {
2259 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2260 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2261 operands[2], operands[3]));
2262 else
2263 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2264 operands[2], operands[3]));
2265 DONE;
2266 })
2267
2268 (define_expand "neon_vfma<VCVTF:mode>"
2269 [(match_operand:VCVTF 0 "s_register_operand")
2270 (match_operand:VCVTF 1 "s_register_operand")
2271 (match_operand:VCVTF 2 "s_register_operand")
2272 (match_operand:VCVTF 3 "s_register_operand")]
2273 "TARGET_NEON && TARGET_FMA"
2274 {
2275 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2276 operands[1]));
2277 DONE;
2278 })
2279
2280 (define_expand "neon_vfma<VH:mode>"
2281 [(match_operand:VH 0 "s_register_operand")
2282 (match_operand:VH 1 "s_register_operand")
2283 (match_operand:VH 2 "s_register_operand")
2284 (match_operand:VH 3 "s_register_operand")]
2285 "TARGET_NEON_FP16INST"
2286 {
2287 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2288 operands[1]));
2289 DONE;
2290 })
2291
2292 (define_expand "neon_vfms<VCVTF:mode>"
2293 [(match_operand:VCVTF 0 "s_register_operand")
2294 (match_operand:VCVTF 1 "s_register_operand")
2295 (match_operand:VCVTF 2 "s_register_operand")
2296 (match_operand:VCVTF 3 "s_register_operand")]
2297 "TARGET_NEON && TARGET_FMA"
2298 {
2299 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2300 operands[1]));
2301 DONE;
2302 })
2303
2304 (define_expand "neon_vfms<VH:mode>"
2305 [(match_operand:VH 0 "s_register_operand")
2306 (match_operand:VH 1 "s_register_operand")
2307 (match_operand:VH 2 "s_register_operand")
2308 (match_operand:VH 3 "s_register_operand")]
2309 "TARGET_NEON_FP16INST"
2310 {
2311 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2312 operands[1]));
2313 DONE;
2314 })
2315
2316 ;; The expand RTL structure here is not important.
2317 ;; We use the gen_* functions anyway.
2318 ;; We just need something to wrap the iterators around.
2319
2320 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2321 [(set (match_operand:VCVTF 0 "s_register_operand")
2322 (unspec:VCVTF
2323 [(match_operand:VCVTF 1 "s_register_operand")
2324 (PLUSMINUS:<VFML>
2325 (match_operand:<VFML> 2 "s_register_operand")
2326 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2327 "TARGET_FP16FML"
2328 {
2329 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2330 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2331 operands[1],
2332 operands[2],
2333 operands[3],
2334 half, half));
2335 DONE;
2336 })
2337
2338 (define_insn "vfmal_low<mode>_intrinsic"
2339 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2340 (fma:VCVTF
2341 (float_extend:VCVTF
2342 (vec_select:<VFMLSEL>
2343 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2344 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2345 (float_extend:VCVTF
2346 (vec_select:<VFMLSEL>
2347 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2348 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2349 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2350 "TARGET_FP16FML"
2351 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2352 [(set_attr "type" "neon_fp_mla_s<q>")]
2353 )
2354
2355 (define_insn "vfmsl_high<mode>_intrinsic"
2356 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2357 (fma:VCVTF
2358 (float_extend:VCVTF
2359 (neg:<VFMLSEL>
2360 (vec_select:<VFMLSEL>
2361 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2362 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2363 (float_extend:VCVTF
2364 (vec_select:<VFMLSEL>
2365 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2366 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2367 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2368 "TARGET_FP16FML"
2369 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2370 [(set_attr "type" "neon_fp_mla_s<q>")]
2371 )
2372
2373 (define_insn "vfmal_high<mode>_intrinsic"
2374 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2375 (fma:VCVTF
2376 (float_extend:VCVTF
2377 (vec_select:<VFMLSEL>
2378 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2379 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2380 (float_extend:VCVTF
2381 (vec_select:<VFMLSEL>
2382 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2383 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2384 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2385 "TARGET_FP16FML"
2386 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2387 [(set_attr "type" "neon_fp_mla_s<q>")]
2388 )
2389
2390 (define_insn "vfmsl_low<mode>_intrinsic"
2391 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2392 (fma:VCVTF
2393 (float_extend:VCVTF
2394 (neg:<VFMLSEL>
2395 (vec_select:<VFMLSEL>
2396 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2397 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2398 (float_extend:VCVTF
2399 (vec_select:<VFMLSEL>
2400 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2401 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2402 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2403 "TARGET_FP16FML"
2404 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2405 [(set_attr "type" "neon_fp_mla_s<q>")]
2406 )
2407
2408 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2409 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2410 (unspec:VCVTF
2411 [(match_operand:VCVTF 1 "s_register_operand")
2412 (PLUSMINUS:<VFML>
2413 (match_operand:<VFML> 2 "s_register_operand")
2414 (match_operand:<VFML> 3 "s_register_operand"))
2415 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2416 "TARGET_FP16FML"
2417 {
2418 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2419 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2420 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2421 (operands[0], operands[1],
2422 operands[2], operands[3],
2423 half, lane));
2424 DONE;
2425 })
2426
2427 (define_insn "vfmal_lane_low<mode>_intrinsic"
2428 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2429 (fma:VCVTF
2430 (float_extend:VCVTF
2431 (vec_select:<VFMLSEL>
2432 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2433 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2434 (float_extend:VCVTF
2435 (vec_duplicate:<VFMLSEL>
2436 (vec_select:HF
2437 (match_operand:<VFML> 3 "s_register_operand" "x")
2438 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2439 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2440 "TARGET_FP16FML"
2441 {
2442 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2443 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2444 {
2445 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2446 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2447 }
2448 else
2449 {
2450 operands[5] = GEN_INT (lane);
2451 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2452 }
2453 }
2454 [(set_attr "type" "neon_fp_mla_s<q>")]
2455 )
2456
2457 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2458 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2459 (unspec:VCVTF
2460 [(match_operand:VCVTF 1 "s_register_operand")
2461 (PLUSMINUS:<VFML>
2462 (match_operand:<VFML> 2 "s_register_operand")
2463 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2464 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2465 "TARGET_FP16FML"
2466 {
2467 rtx lane
2468 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2469 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2470 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2471 (operands[0], operands[1], operands[2], operands[3],
2472 half, lane));
2473 DONE;
2474 })
2475
2476 ;; Used to implement the intrinsics:
2477 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2478 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2479 ;; Needs a bit of care to get the modes of the different sub-expressions right
2480 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2481 ;; S or D subregister to select the appropriate lane from.
2482
2483 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2484 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2485 (fma:VCVTF
2486 (float_extend:VCVTF
2487 (vec_select:<VFMLSEL>
2488 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2489 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2490 (float_extend:VCVTF
2491 (vec_duplicate:<VFMLSEL>
2492 (vec_select:HF
2493 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2494 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2495 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2496 "TARGET_FP16FML"
2497 {
2498 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2499 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2500 int new_lane = lane % elts_per_reg;
2501 int regdiff = lane / elts_per_reg;
2502 operands[5] = GEN_INT (new_lane);
2503 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2504 because we want the print_operand code to print the appropriate
2505 S or D register prefix. */
2506 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2507 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2508 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2509 }
2510 [(set_attr "type" "neon_fp_mla_s<q>")]
2511 )
2512
2513 ;; Used to implement the intrinsics:
2514 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2515 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2516 ;; Needs a bit of care to get the modes of the different sub-expressions right
2517 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2518 ;; S or D subregister to select the appropriate lane from.
2519
2520 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2521 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2522 (fma:VCVTF
2523 (float_extend:VCVTF
2524 (vec_select:<VFMLSEL>
2525 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2526 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2527 (float_extend:VCVTF
2528 (vec_duplicate:<VFMLSEL>
2529 (vec_select:HF
2530 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2531 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2532 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2533 "TARGET_FP16FML"
2534 {
2535 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2536 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2537 int new_lane = lane % elts_per_reg;
2538 int regdiff = lane / elts_per_reg;
2539 operands[5] = GEN_INT (new_lane);
2540 /* We re-create operands[3] in the halved VFMLSEL mode
2541 because we've calculated the correct half-width subreg to extract
2542 the lane from and we want to print *that* subreg instead. */
2543 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2544 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2545 }
2546 [(set_attr "type" "neon_fp_mla_s<q>")]
2547 )
2548
2549 (define_insn "vfmal_lane_high<mode>_intrinsic"
2550 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2551 (fma:VCVTF
2552 (float_extend:VCVTF
2553 (vec_select:<VFMLSEL>
2554 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2555 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2556 (float_extend:VCVTF
2557 (vec_duplicate:<VFMLSEL>
2558 (vec_select:HF
2559 (match_operand:<VFML> 3 "s_register_operand" "x")
2560 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2561 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2562 "TARGET_FP16FML"
2563 {
2564 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2565 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2566 {
2567 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2568 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2569 }
2570 else
2571 {
2572 operands[5] = GEN_INT (lane);
2573 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2574 }
2575 }
2576 [(set_attr "type" "neon_fp_mla_s<q>")]
2577 )
2578
2579 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2580 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2581 (fma:VCVTF
2582 (float_extend:VCVTF
2583 (neg:<VFMLSEL>
2584 (vec_select:<VFMLSEL>
2585 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2586 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2587 (float_extend:VCVTF
2588 (vec_duplicate:<VFMLSEL>
2589 (vec_select:HF
2590 (match_operand:<VFML> 3 "s_register_operand" "x")
2591 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2592 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2593 "TARGET_FP16FML"
2594 {
2595 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2596 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2597 {
2598 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2599 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2600 }
2601 else
2602 {
2603 operands[5] = GEN_INT (lane);
2604 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2605 }
2606 }
2607 [(set_attr "type" "neon_fp_mla_s<q>")]
2608 )
2609
2610 ;; Used to implement the intrinsics:
2611 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2612 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2613 ;; Needs a bit of care to get the modes of the different sub-expressions right
2614 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2615 ;; S or D subregister to select the appropriate lane from.
2616
2617 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2618 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2619 (fma:VCVTF
2620 (float_extend:VCVTF
2621 (neg:<VFMLSEL>
2622 (vec_select:<VFMLSEL>
2623 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2624 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2625 (float_extend:VCVTF
2626 (vec_duplicate:<VFMLSEL>
2627 (vec_select:HF
2628 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2629 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2630 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2631 "TARGET_FP16FML"
2632 {
2633 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2634 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2635 int new_lane = lane % elts_per_reg;
2636 int regdiff = lane / elts_per_reg;
2637 operands[5] = GEN_INT (new_lane);
2638 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2639 because we want the print_operand code to print the appropriate
2640 S or D register prefix. */
2641 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2642 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2643 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2644 }
2645 [(set_attr "type" "neon_fp_mla_s<q>")]
2646 )
2647
2648 ;; Used to implement the intrinsics:
2649 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2650 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2651 ;; Needs a bit of care to get the modes of the different sub-expressions right
2652 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2653 ;; S or D subregister to select the appropriate lane from.
2654
2655 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2656 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2657 (fma:VCVTF
2658 (float_extend:VCVTF
2659 (neg:<VFMLSEL>
2660 (vec_select:<VFMLSEL>
2661 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2662 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2663 (float_extend:VCVTF
2664 (vec_duplicate:<VFMLSEL>
2665 (vec_select:HF
2666 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2667 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2668 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2669 "TARGET_FP16FML"
2670 {
2671 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2672 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2673 int new_lane = lane % elts_per_reg;
2674 int regdiff = lane / elts_per_reg;
2675 operands[5] = GEN_INT (new_lane);
2676 /* We re-create operands[3] in the halved VFMLSEL mode
2677 because we've calculated the correct half-width subreg to extract
2678 the lane from and we want to print *that* subreg instead. */
2679 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2680 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2681 }
2682 [(set_attr "type" "neon_fp_mla_s<q>")]
2683 )
2684
2685 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2686 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2687 (fma:VCVTF
2688 (float_extend:VCVTF
2689 (neg:<VFMLSEL>
2690 (vec_select:<VFMLSEL>
2691 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2692 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2693 (float_extend:VCVTF
2694 (vec_duplicate:<VFMLSEL>
2695 (vec_select:HF
2696 (match_operand:<VFML> 3 "s_register_operand" "x")
2697 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2698 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2699 "TARGET_FP16FML"
2700 {
2701 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2702 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2703 {
2704 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2705 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2706 }
2707 else
2708 {
2709 operands[5] = GEN_INT (lane);
2710 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2711 }
2712 }
2713 [(set_attr "type" "neon_fp_mla_s<q>")]
2714 )
2715
2716 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2717
2718 (define_insn "neon_vmla<mode>_unspec"
2719 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2720 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2721 (match_operand:VDQW 2 "s_register_operand" "w")
2722 (match_operand:VDQW 3 "s_register_operand" "w")]
2723 UNSPEC_VMLA))]
2724 "TARGET_NEON"
2725 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2726 [(set (attr "type")
2727 (if_then_else (match_test "<Is_float_mode>")
2728 (const_string "neon_fp_mla_s<q>")
2729 (const_string "neon_mla_<V_elem_ch><q>")))]
2730 )
2731
2732 (define_insn "neon_vmlal<sup><mode>"
2733 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2734 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2735 (match_operand:VW 2 "s_register_operand" "w")
2736 (match_operand:VW 3 "s_register_operand" "w")]
2737 VMLAL))]
2738 "TARGET_NEON"
2739 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2740 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2741 )
2742
2743 (define_expand "neon_vmls<mode>"
2744 [(match_operand:VDQW 0 "s_register_operand" "=w")
2745 (match_operand:VDQW 1 "s_register_operand" "0")
2746 (match_operand:VDQW 2 "s_register_operand" "w")
2747 (match_operand:VDQW 3 "s_register_operand" "w")]
2748 "TARGET_NEON"
2749 {
2750 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2751 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2752 operands[1], operands[2], operands[3]));
2753 else
2754 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2755 operands[2], operands[3]));
2756 DONE;
2757 })
2758
2759 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2760
2761 (define_insn "neon_vmls<mode>_unspec"
2762 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2763 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2764 (match_operand:VDQW 2 "s_register_operand" "w")
2765 (match_operand:VDQW 3 "s_register_operand" "w")]
2766 UNSPEC_VMLS))]
2767 "TARGET_NEON"
2768 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2769 [(set (attr "type")
2770 (if_then_else (match_test "<Is_float_mode>")
2771 (const_string "neon_fp_mla_s<q>")
2772 (const_string "neon_mla_<V_elem_ch><q>")))]
2773 )
2774
2775 (define_insn "neon_vmlsl<sup><mode>"
2776 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2777 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2778 (match_operand:VW 2 "s_register_operand" "w")
2779 (match_operand:VW 3 "s_register_operand" "w")]
2780 VMLSL))]
2781 "TARGET_NEON"
2782 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2783 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2784 )
2785
2786 ;; vqdmulh, vqrdmulh
2787 (define_insn "neon_vq<r>dmulh<mode>"
2788 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2789 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2790 (match_operand:VMDQI 2 "s_register_operand" "w")]
2791 VQDMULH))]
2792 "TARGET_NEON"
2793 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2794 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2795 )
2796
2797 ;; vqrdmlah, vqrdmlsh
2798 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2799 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2800 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2801 (match_operand:VMDQI 2 "s_register_operand" "w")
2802 (match_operand:VMDQI 3 "s_register_operand" "w")]
2803 VQRDMLH_AS))]
2804 "TARGET_NEON_RDMA"
2805 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2806 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2807 )
2808
2809 (define_insn "neon_vqdmlal<mode>"
2810 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2811 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2812 (match_operand:VMDI 2 "s_register_operand" "w")
2813 (match_operand:VMDI 3 "s_register_operand" "w")]
2814 UNSPEC_VQDMLAL))]
2815 "TARGET_NEON"
2816 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2817 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2818 )
2819
2820 (define_insn "neon_vqdmlsl<mode>"
2821 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2822 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2823 (match_operand:VMDI 2 "s_register_operand" "w")
2824 (match_operand:VMDI 3 "s_register_operand" "w")]
2825 UNSPEC_VQDMLSL))]
2826 "TARGET_NEON"
2827 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2828 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2829 )
2830
2831 (define_insn "neon_vmull<sup><mode>"
2832 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2833 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2834 (match_operand:VW 2 "s_register_operand" "w")]
2835 VMULL))]
2836 "TARGET_NEON"
2837 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2838 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2839 )
2840
2841 (define_insn "neon_vqdmull<mode>"
2842 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2843 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2844 (match_operand:VMDI 2 "s_register_operand" "w")]
2845 UNSPEC_VQDMULL))]
2846 "TARGET_NEON"
2847 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2848 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2849 )
2850
2851 (define_expand "neon_vsub<mode>"
2852 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2853 (match_operand:VCVTF 1 "s_register_operand" "w")
2854 (match_operand:VCVTF 2 "s_register_operand" "w")]
2855 "TARGET_NEON"
2856 {
2857 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2858 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2859 else
2860 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2861 operands[2]));
2862 DONE;
2863 })
2864
2865 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2866
2867 (define_insn "neon_vsub<mode>_unspec"
2868 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2869 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2870 (match_operand:VCVTF 2 "s_register_operand" "w")]
2871 UNSPEC_VSUB))]
2872 "TARGET_NEON"
2873 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2874 [(set (attr "type")
2875 (if_then_else (match_test "<Is_float_mode>")
2876 (const_string "neon_fp_addsub_s<q>")
2877 (const_string "neon_sub<q>")))]
2878 )
2879
2880 (define_insn "neon_vsubl<sup><mode>"
2881 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2882 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2883 (match_operand:VDI 2 "s_register_operand" "w")]
2884 VSUBL))]
2885 "TARGET_NEON"
2886 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2887 [(set_attr "type" "neon_sub_long")]
2888 )
2889
2890 (define_insn "neon_vsubw<sup><mode>"
2891 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2892 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2893 (match_operand:VDI 2 "s_register_operand" "w")]
2894 VSUBW))]
2895 "TARGET_NEON"
2896 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2897 [(set_attr "type" "neon_sub_widen")]
2898 )
2899
2900 (define_insn "neon_vqsub<sup><mode>"
2901 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2902 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2903 (match_operand:VDQIX 2 "s_register_operand" "w")]
2904 VQSUB))]
2905 "TARGET_NEON"
2906 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2907 [(set_attr "type" "neon_qsub<q>")]
2908 )
2909
2910 (define_insn "neon_vhsub<sup><mode>"
2911 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2912 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2913 (match_operand:VDQIW 2 "s_register_operand" "w")]
2914 VHSUB))]
2915 "TARGET_NEON"
2916 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2917 [(set_attr "type" "neon_sub_halve<q>")]
2918 )
2919
2920 (define_insn "neon_v<r>subhn<mode>"
2921 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2922 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2923 (match_operand:VN 2 "s_register_operand" "w")]
2924 VSUBHN))]
2925 "TARGET_NEON"
2926 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2927 [(set_attr "type" "neon_sub_halve_narrow_q")]
2928 )
2929
2930 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2931 ;; without unsafe math optimizations.
2932 (define_expand "neon_vc<cmp_op><mode>"
2933 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2934 (neg:<V_cmp_result>
2935 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2936 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2937 "TARGET_NEON"
2938 {
2939 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2940 are enabled. */
2941 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2942 && !flag_unsafe_math_optimizations)
2943 {
2944 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2945 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2946 whereas this expander iterates over the integer modes as well,
2947 but we will never expand to UNSPECs for the integer comparisons. */
2948 switch (<MODE>mode)
2949 {
2950 case E_V2SFmode:
2951 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2952 operands[1],
2953 operands[2]));
2954 break;
2955 case E_V4SFmode:
2956 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2957 operands[1],
2958 operands[2]));
2959 break;
2960 default:
2961 gcc_unreachable ();
2962 }
2963 }
2964 else
2965 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2966 operands[1],
2967 operands[2]));
2968 DONE;
2969 }
2970 )
2971
2972 (define_insn "neon_vc<cmp_op><mode>_insn"
2973 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2974 (neg:<V_cmp_result>
2975 (COMPARISONS:<V_cmp_result>
2976 (match_operand:VDQW 1 "s_register_operand" "w,w")
2977 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2978 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2979 && !flag_unsafe_math_optimizations)"
2980 {
2981 char pattern[100];
2982 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2983 " %%<V_reg>1, %s",
2984 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2985 ? "f" : "<cmp_type>",
2986 which_alternative == 0
2987 ? "%<V_reg>2" : "#0");
2988 output_asm_insn (pattern, operands);
2989 return "";
2990 }
2991 [(set (attr "type")
2992 (if_then_else (match_operand 2 "zero_operand")
2993 (const_string "neon_compare_zero<q>")
2994 (const_string "neon_compare<q>")))]
2995 )
2996
2997 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2998 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2999 (unspec:<V_cmp_result>
3000 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
3001 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
3002 NEON_VCMP))]
3003 "TARGET_NEON"
3004 {
3005 char pattern[100];
3006 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3007 " %%<V_reg>1, %s",
3008 which_alternative == 0
3009 ? "%<V_reg>2" : "#0");
3010 output_asm_insn (pattern, operands);
3011 return "";
3012 }
3013 [(set_attr "type" "neon_fp_compare_s<q>")]
3014 )
3015
3016 (define_expand "neon_vc<cmp_op><mode>"
3017 [(match_operand:<V_cmp_result> 0 "s_register_operand")
3018 (neg:<V_cmp_result>
3019 (COMPARISONS:VH
3020 (match_operand:VH 1 "s_register_operand")
3021 (match_operand:VH 2 "reg_or_zero_operand")))]
3022 "TARGET_NEON_FP16INST"
3023 {
3024 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3025 are enabled. */
3026 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3027 && !flag_unsafe_math_optimizations)
3028 emit_insn
3029 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3030 (operands[0], operands[1], operands[2]));
3031 else
3032 emit_insn
3033 (gen_neon_vc<cmp_op><mode>_fp16insn
3034 (operands[0], operands[1], operands[2]));
3035 DONE;
3036 })
3037
3038 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3039 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3040 (neg:<V_cmp_result>
3041 (COMPARISONS:<V_cmp_result>
3042 (match_operand:VH 1 "s_register_operand" "w,w")
3043 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3044 "TARGET_NEON_FP16INST
3045 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3046 && !flag_unsafe_math_optimizations)"
3047 {
3048 char pattern[100];
3049 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3050 " %%<V_reg>1, %s",
3051 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3052 ? "f" : "<cmp_type>",
3053 which_alternative == 0
3054 ? "%<V_reg>2" : "#0");
3055 output_asm_insn (pattern, operands);
3056 return "";
3057 }
3058 [(set (attr "type")
3059 (if_then_else (match_operand 2 "zero_operand")
3060 (const_string "neon_compare_zero<q>")
3061 (const_string "neon_compare<q>")))])
3062
3063 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3064 [(set
3065 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3066 (unspec:<V_cmp_result>
3067 [(match_operand:VH 1 "s_register_operand" "w,w")
3068 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3069 NEON_VCMP))]
3070 "TARGET_NEON_FP16INST"
3071 {
3072 char pattern[100];
3073 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3074 " %%<V_reg>1, %s",
3075 which_alternative == 0
3076 ? "%<V_reg>2" : "#0");
3077 output_asm_insn (pattern, operands);
3078 return "";
3079 }
3080 [(set_attr "type" "neon_fp_compare_s<q>")])
3081
3082 (define_insn "neon_vc<cmp_op>u<mode>"
3083 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3084 (neg:<V_cmp_result>
3085 (GTUGEU:<V_cmp_result>
3086 (match_operand:VDQIW 1 "s_register_operand" "w")
3087 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3088 "TARGET_NEON"
3089 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3090 [(set_attr "type" "neon_compare<q>")]
3091 )
3092
3093 (define_expand "neon_vca<cmp_op><mode>"
3094 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3095 (neg:<V_cmp_result>
3096 (GTGE:<V_cmp_result>
3097 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3098 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3099 "TARGET_NEON"
3100 {
3101 if (flag_unsafe_math_optimizations)
3102 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3103 operands[2]));
3104 else
3105 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3106 operands[1],
3107 operands[2]));
3108 DONE;
3109 }
3110 )
3111
3112 (define_insn "neon_vca<cmp_op><mode>_insn"
3113 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3114 (neg:<V_cmp_result>
3115 (GTGE:<V_cmp_result>
3116 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3117 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3118 "TARGET_NEON && flag_unsafe_math_optimizations"
3119 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3120 [(set_attr "type" "neon_fp_compare_s<q>")]
3121 )
3122
3123 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3124 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3125 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3126 (match_operand:VCVTF 2 "s_register_operand" "w")]
3127 NEON_VACMP))]
3128 "TARGET_NEON"
3129 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3130 [(set_attr "type" "neon_fp_compare_s<q>")]
3131 )
3132
3133 (define_expand "neon_vca<cmp_op><mode>"
3134 [(set
3135 (match_operand:<V_cmp_result> 0 "s_register_operand")
3136 (neg:<V_cmp_result>
3137 (GLTE:<V_cmp_result>
3138 (abs:VH (match_operand:VH 1 "s_register_operand"))
3139 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3140 "TARGET_NEON_FP16INST"
3141 {
3142 if (flag_unsafe_math_optimizations)
3143 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3144 (operands[0], operands[1], operands[2]));
3145 else
3146 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3147 (operands[0], operands[1], operands[2]));
3148 DONE;
3149 })
3150
3151 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3152 [(set
3153 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3154 (neg:<V_cmp_result>
3155 (GLTE:<V_cmp_result>
3156 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3157 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3158 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3159 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3160 [(set_attr "type" "neon_fp_compare_s<q>")]
3161 )
3162
3163 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3164 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3165 (unspec:<V_cmp_result>
3166 [(match_operand:VH 1 "s_register_operand" "w")
3167 (match_operand:VH 2 "s_register_operand" "w")]
3168 NEON_VAGLTE))]
3169 "TARGET_NEON"
3170 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3171 [(set_attr "type" "neon_fp_compare_s<q>")]
3172 )
3173
3174 (define_expand "neon_vc<cmp_op>z<mode>"
3175 [(set
3176 (match_operand:<V_cmp_result> 0 "s_register_operand")
3177 (COMPARISONS:<V_cmp_result>
3178 (match_operand:VH 1 "s_register_operand")
3179 (const_int 0)))]
3180 "TARGET_NEON_FP16INST"
3181 {
3182 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3183 CONST0_RTX (<MODE>mode)));
3184 DONE;
3185 })
3186
3187 (define_insn "neon_vtst<mode>"
3188 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3189 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3190 (match_operand:VDQIW 2 "s_register_operand" "w")]
3191 UNSPEC_VTST))]
3192 "TARGET_NEON"
3193 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3194 [(set_attr "type" "neon_tst<q>")]
3195 )
3196
3197 (define_insn "neon_vabd<sup><mode>"
3198 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3199 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3200 (match_operand:VDQIW 2 "s_register_operand" "w")]
3201 VABD))]
3202 "TARGET_NEON"
3203 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3204 [(set_attr "type" "neon_abd<q>")]
3205 )
3206
3207 (define_insn "neon_vabd<mode>"
3208 [(set (match_operand:VH 0 "s_register_operand" "=w")
3209 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3210 (match_operand:VH 2 "s_register_operand" "w")]
3211 UNSPEC_VABD_F))]
3212 "TARGET_NEON_FP16INST"
3213 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3214 [(set_attr "type" "neon_abd<q>")]
3215 )
3216
3217 (define_insn "neon_vabdf<mode>"
3218 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3219 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3220 (match_operand:VCVTF 2 "s_register_operand" "w")]
3221 UNSPEC_VABD_F))]
3222 "TARGET_NEON"
3223 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3224 [(set_attr "type" "neon_fp_abd_s<q>")]
3225 )
3226
3227 (define_insn "neon_vabdl<sup><mode>"
3228 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3229 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3230 (match_operand:VW 2 "s_register_operand" "w")]
3231 VABDL))]
3232 "TARGET_NEON"
3233 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3234 [(set_attr "type" "neon_abd_long")]
3235 )
3236
3237 (define_insn "neon_vaba<sup><mode>"
3238 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3239 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3240 (match_operand:VDQIW 3 "s_register_operand" "w")]
3241 VABD)
3242 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3243 "TARGET_NEON"
3244 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3245 [(set_attr "type" "neon_arith_acc<q>")]
3246 )
3247
3248 (define_insn "neon_vabal<sup><mode>"
3249 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3250 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3251 (match_operand:VW 3 "s_register_operand" "w")]
3252 VABDL)
3253 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3254 "TARGET_NEON"
3255 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3256 [(set_attr "type" "neon_arith_acc<q>")]
3257 )
3258
3259 (define_expand "<sup>sadv16qi"
3260 [(use (match_operand:V4SI 0 "register_operand"))
3261 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
3262 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
3263 (use (match_operand:V4SI 3 "register_operand"))]
3264 "TARGET_NEON"
3265 {
3266 rtx reduc = gen_reg_rtx (V8HImode);
3267 rtx op1_highpart = gen_reg_rtx (V8QImode);
3268 rtx op2_highpart = gen_reg_rtx (V8QImode);
3269
3270 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
3271 gen_lowpart (V8QImode, operands[1]),
3272 gen_lowpart (V8QImode, operands[2])));
3273
3274 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
3275 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
3276 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
3277 op1_highpart, op2_highpart));
3278 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
3279
3280 emit_move_insn (operands[0], operands[3]);
3281 DONE;
3282 }
3283 )
3284
3285 (define_insn "neon_v<maxmin><sup><mode>"
3286 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3287 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3288 (match_operand:VDQIW 2 "s_register_operand" "w")]
3289 VMAXMIN))]
3290 "TARGET_NEON"
3291 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3292 [(set_attr "type" "neon_minmax<q>")]
3293 )
3294
3295 (define_insn "neon_v<maxmin>f<mode>"
3296 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3297 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3298 (match_operand:VCVTF 2 "s_register_operand" "w")]
3299 VMAXMINF))]
3300 "TARGET_NEON"
3301 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3302 [(set_attr "type" "neon_fp_minmax_s<q>")]
3303 )
3304
3305 (define_insn "neon_v<maxmin>f<mode>"
3306 [(set (match_operand:VH 0 "s_register_operand" "=w")
3307 (unspec:VH
3308 [(match_operand:VH 1 "s_register_operand" "w")
3309 (match_operand:VH 2 "s_register_operand" "w")]
3310 VMAXMINF))]
3311 "TARGET_NEON_FP16INST"
3312 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3313 [(set_attr "type" "neon_fp_minmax_s<q>")]
3314 )
3315
3316 (define_insn "neon_vp<maxmin>fv4hf"
3317 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3318 (unspec:V4HF
3319 [(match_operand:V4HF 1 "s_register_operand" "w")
3320 (match_operand:V4HF 2 "s_register_operand" "w")]
3321 VPMAXMINF))]
3322 "TARGET_NEON_FP16INST"
3323 "vp<maxmin>.f16\t%P0, %P1, %P2"
3324 [(set_attr "type" "neon_reduc_minmax")]
3325 )
3326
3327 (define_insn "neon_<fmaxmin_op><mode>"
3328 [(set
3329 (match_operand:VH 0 "s_register_operand" "=w")
3330 (unspec:VH
3331 [(match_operand:VH 1 "s_register_operand" "w")
3332 (match_operand:VH 2 "s_register_operand" "w")]
3333 VMAXMINFNM))]
3334 "TARGET_NEON_FP16INST"
3335 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3336 [(set_attr "type" "neon_fp_minmax_s<q>")]
3337 )
3338
3339 ;; v<maxmin>nm intrinsics.
3340 (define_insn "neon_<fmaxmin_op><mode>"
3341 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3342 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3343 (match_operand:VCVTF 2 "s_register_operand" "w")]
3344 VMAXMINFNM))]
3345 "TARGET_NEON && TARGET_VFP5"
3346 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3347 [(set_attr "type" "neon_fp_minmax_s<q>")]
3348 )
3349
3350 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3351 (define_insn "<fmaxmin><mode>3"
3352 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3353 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3354 (match_operand:VCVTF 2 "s_register_operand" "w")]
3355 VMAXMINFNM))]
3356 "TARGET_NEON && TARGET_VFP5"
3357 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3358 [(set_attr "type" "neon_fp_minmax_s<q>")]
3359 )
3360
3361 (define_expand "neon_vpadd<mode>"
3362 [(match_operand:VD 0 "s_register_operand" "=w")
3363 (match_operand:VD 1 "s_register_operand" "w")
3364 (match_operand:VD 2 "s_register_operand" "w")]
3365 "TARGET_NEON"
3366 {
3367 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3368 operands[2]));
3369 DONE;
3370 })
3371
3372 (define_insn "neon_vpaddl<sup><mode>"
3373 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3374 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3375 VPADDL))]
3376 "TARGET_NEON"
3377 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3378 [(set_attr "type" "neon_reduc_add_long")]
3379 )
3380
3381 (define_insn "neon_vpadal<sup><mode>"
3382 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3383 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3384 (match_operand:VDQIW 2 "s_register_operand" "w")]
3385 VPADAL))]
3386 "TARGET_NEON"
3387 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3388 [(set_attr "type" "neon_reduc_add_acc")]
3389 )
3390
3391 (define_insn "neon_vp<maxmin><sup><mode>"
3392 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3393 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3394 (match_operand:VDI 2 "s_register_operand" "w")]
3395 VPMAXMIN))]
3396 "TARGET_NEON"
3397 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3398 [(set_attr "type" "neon_reduc_minmax<q>")]
3399 )
3400
3401 (define_insn "neon_vp<maxmin>f<mode>"
3402 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3403 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3404 (match_operand:VCVTF 2 "s_register_operand" "w")]
3405 VPMAXMINF))]
3406 "TARGET_NEON"
3407 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3408 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3409 )
3410
3411 (define_insn "neon_vrecps<mode>"
3412 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3413 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3414 (match_operand:VCVTF 2 "s_register_operand" "w")]
3415 UNSPEC_VRECPS))]
3416 "TARGET_NEON"
3417 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3418 [(set_attr "type" "neon_fp_recps_s<q>")]
3419 )
3420
3421 (define_insn "neon_vrecps<mode>"
3422 [(set
3423 (match_operand:VH 0 "s_register_operand" "=w")
3424 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3425 (match_operand:VH 2 "s_register_operand" "w")]
3426 UNSPEC_VRECPS))]
3427 "TARGET_NEON_FP16INST"
3428 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3429 [(set_attr "type" "neon_fp_recps_s<q>")]
3430 )
3431
3432 (define_insn "neon_vrsqrts<mode>"
3433 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3434 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3435 (match_operand:VCVTF 2 "s_register_operand" "w")]
3436 UNSPEC_VRSQRTS))]
3437 "TARGET_NEON"
3438 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3439 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3440 )
3441
3442 (define_insn "neon_vrsqrts<mode>"
3443 [(set
3444 (match_operand:VH 0 "s_register_operand" "=w")
3445 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3446 (match_operand:VH 2 "s_register_operand" "w")]
3447 UNSPEC_VRSQRTS))]
3448 "TARGET_NEON_FP16INST"
3449 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3450 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3451 )
3452
3453 (define_expand "neon_vabs<mode>"
3454 [(match_operand:VDQW 0 "s_register_operand" "")
3455 (match_operand:VDQW 1 "s_register_operand" "")]
3456 "TARGET_NEON"
3457 {
3458 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3459 DONE;
3460 })
3461
3462 (define_insn "neon_vqabs<mode>"
3463 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3464 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3465 UNSPEC_VQABS))]
3466 "TARGET_NEON"
3467 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3468 [(set_attr "type" "neon_qabs<q>")]
3469 )
3470
3471 (define_insn "neon_bswap<mode>"
3472 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3473 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3474 "TARGET_NEON"
3475 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3476 [(set_attr "type" "neon_rev<q>")]
3477 )
3478
3479 (define_expand "neon_vneg<mode>"
3480 [(match_operand:VDQW 0 "s_register_operand" "")
3481 (match_operand:VDQW 1 "s_register_operand" "")]
3482 "TARGET_NEON"
3483 {
3484 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3485 DONE;
3486 })
3487
3488
3489 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3490 ;; fact that their usage need to guarantee that the source vectors are
3491 ;; contiguous. It would be wrong to describe the operation without being able
3492 ;; to describe the permute that is also required, but even if that is done
3493 ;; the permute would have been created as a LOAD_LANES which means the values
3494 ;; in the registers are in the wrong order.
3495 (define_insn "neon_vcadd<rot><mode>"
3496 [(set (match_operand:VF 0 "register_operand" "=w")
3497 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3498 (match_operand:VF 2 "register_operand" "w")]
3499 VCADD))]
3500 "TARGET_COMPLEX"
3501 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3502 [(set_attr "type" "neon_fcadd")]
3503 )
3504
3505 (define_insn "neon_vcmla<rot><mode>"
3506 [(set (match_operand:VF 0 "register_operand" "=w")
3507 (plus:VF (match_operand:VF 1 "register_operand" "0")
3508 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3509 (match_operand:VF 3 "register_operand" "w")]
3510 VCMLA)))]
3511 "TARGET_COMPLEX"
3512 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3513 [(set_attr "type" "neon_fcmla")]
3514 )
3515
3516 (define_insn "neon_vcmla_lane<rot><mode>"
3517 [(set (match_operand:VF 0 "s_register_operand" "=w")
3518 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3519 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3520 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3521 (match_operand:SI 4 "const_int_operand" "n")]
3522 VCMLA)))]
3523 "TARGET_COMPLEX"
3524 {
3525 operands = neon_vcmla_lane_prepare_operands (operands);
3526 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3527 }
3528 [(set_attr "type" "neon_fcmla")]
3529 )
3530
3531 (define_insn "neon_vcmla_laneq<rot><mode>"
3532 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3533 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3534 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3535 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3536 (match_operand:SI 4 "const_int_operand" "n")]
3537 VCMLA)))]
3538 "TARGET_COMPLEX"
3539 {
3540 operands = neon_vcmla_lane_prepare_operands (operands);
3541 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3542 }
3543 [(set_attr "type" "neon_fcmla")]
3544 )
3545
3546 (define_insn "neon_vcmlaq_lane<rot><mode>"
3547 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3548 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3549 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3550 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3551 (match_operand:SI 4 "const_int_operand" "n")]
3552 VCMLA)))]
3553 "TARGET_COMPLEX"
3554 {
3555 operands = neon_vcmla_lane_prepare_operands (operands);
3556 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3557 }
3558 [(set_attr "type" "neon_fcmla")]
3559 )
3560
3561
3562 ;; These instructions map to the __builtins for the Dot Product operations.
3563 (define_insn "neon_<sup>dot<vsi2qi>"
3564 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3565 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3566 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3567 "register_operand" "w")
3568 (match_operand:<VSI2QI> 3
3569 "register_operand" "w")]
3570 DOTPROD)))]
3571 "TARGET_DOTPROD"
3572 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3573 [(set_attr "type" "neon_dot<q>")]
3574 )
3575
3576 ;; These instructions map to the __builtins for the Dot Product
3577 ;; indexed operations.
3578 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3579 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3580 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3581 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3582 "register_operand" "w")
3583 (match_operand:V8QI 3 "register_operand" "t")
3584 (match_operand:SI 4 "immediate_operand" "i")]
3585 DOTPROD)))]
3586 "TARGET_DOTPROD"
3587 {
3588 operands[4]
3589 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3590 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3591 }
3592 [(set_attr "type" "neon_dot<q>")]
3593 )
3594
3595 ;; These expands map to the Dot Product optab the vectorizer checks for.
3596 ;; The auto-vectorizer expects a dot product builtin that also does an
3597 ;; accumulation into the provided register.
3598 ;; Given the following pattern
3599 ;;
3600 ;; for (i=0; i<len; i++) {
3601 ;; c = a[i] * b[i];
3602 ;; r += c;
3603 ;; }
3604 ;; return result;
3605 ;;
3606 ;; This can be auto-vectorized to
3607 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3608 ;;
3609 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3610 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3611 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3612 ;; ...
3613 ;;
3614 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3615 (define_expand "<sup>dot_prod<vsi2qi>"
3616 [(set (match_operand:VCVTI 0 "register_operand")
3617 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3618 "register_operand")
3619 (match_operand:<VSI2QI> 2
3620 "register_operand")]
3621 DOTPROD)
3622 (match_operand:VCVTI 3 "register_operand")))]
3623 "TARGET_DOTPROD"
3624 {
3625 emit_insn (
3626 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3627 operands[2]));
3628 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3629 DONE;
3630 })
3631
3632 (define_expand "neon_copysignf<mode>"
3633 [(match_operand:VCVTF 0 "register_operand")
3634 (match_operand:VCVTF 1 "register_operand")
3635 (match_operand:VCVTF 2 "register_operand")]
3636 "TARGET_NEON"
3637 "{
3638 rtx v_bitmask_cast;
3639 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3640 rtx c = gen_int_mode (0x80000000, SImode);
3641
3642 emit_move_insn (v_bitmask,
3643 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3644 emit_move_insn (operands[0], operands[2]);
3645 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3646 <VCVTF:V_cmp_result>mode, 0);
3647 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3648 operands[1]));
3649
3650 DONE;
3651 }"
3652 )
3653
3654 (define_insn "neon_vqneg<mode>"
3655 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3656 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3657 UNSPEC_VQNEG))]
3658 "TARGET_NEON"
3659 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3660 [(set_attr "type" "neon_qneg<q>")]
3661 )
3662
3663 (define_insn "neon_vcls<mode>"
3664 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3665 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3666 UNSPEC_VCLS))]
3667 "TARGET_NEON"
3668 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3669 [(set_attr "type" "neon_cls<q>")]
3670 )
3671
3672 (define_insn "clz<mode>2"
3673 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3674 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3675 "TARGET_NEON"
3676 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3677 [(set_attr "type" "neon_cnt<q>")]
3678 )
3679
3680 (define_expand "neon_vclz<mode>"
3681 [(match_operand:VDQIW 0 "s_register_operand" "")
3682 (match_operand:VDQIW 1 "s_register_operand" "")]
3683 "TARGET_NEON"
3684 {
3685 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3686 DONE;
3687 })
3688
3689 (define_insn "popcount<mode>2"
3690 [(set (match_operand:VE 0 "s_register_operand" "=w")
3691 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3692 "TARGET_NEON"
3693 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3694 [(set_attr "type" "neon_cnt<q>")]
3695 )
3696
3697 (define_expand "neon_vcnt<mode>"
3698 [(match_operand:VE 0 "s_register_operand" "=w")
3699 (match_operand:VE 1 "s_register_operand" "w")]
3700 "TARGET_NEON"
3701 {
3702 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3703 DONE;
3704 })
3705
3706 (define_insn "neon_vrecpe<mode>"
3707 [(set (match_operand:VH 0 "s_register_operand" "=w")
3708 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3709 UNSPEC_VRECPE))]
3710 "TARGET_NEON_FP16INST"
3711 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3712 [(set_attr "type" "neon_fp_recpe_s<q>")]
3713 )
3714
3715 (define_insn "neon_vrecpe<mode>"
3716 [(set (match_operand:V32 0 "s_register_operand" "=w")
3717 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3718 UNSPEC_VRECPE))]
3719 "TARGET_NEON"
3720 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3721 [(set_attr "type" "neon_fp_recpe_s<q>")]
3722 )
3723
3724 (define_insn "neon_vrsqrte<mode>"
3725 [(set (match_operand:V32 0 "s_register_operand" "=w")
3726 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3727 UNSPEC_VRSQRTE))]
3728 "TARGET_NEON"
3729 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3730 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3731 )
3732
3733 (define_expand "neon_vmvn<mode>"
3734 [(match_operand:VDQIW 0 "s_register_operand" "")
3735 (match_operand:VDQIW 1 "s_register_operand" "")]
3736 "TARGET_NEON"
3737 {
3738 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3739 DONE;
3740 })
3741
3742 (define_insn "neon_vget_lane<mode>_sext_internal"
3743 [(set (match_operand:SI 0 "s_register_operand" "=r")
3744 (sign_extend:SI
3745 (vec_select:<V_elem>
3746 (match_operand:VD 1 "s_register_operand" "w")
3747 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3748 "TARGET_NEON"
3749 {
3750 if (BYTES_BIG_ENDIAN)
3751 {
3752 int elt = INTVAL (operands[2]);
3753 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3754 operands[2] = GEN_INT (elt);
3755 }
3756 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3757 }
3758 [(set_attr "type" "neon_to_gp")]
3759 )
3760
3761 (define_insn "neon_vget_lane<mode>_zext_internal"
3762 [(set (match_operand:SI 0 "s_register_operand" "=r")
3763 (zero_extend:SI
3764 (vec_select:<V_elem>
3765 (match_operand:VD 1 "s_register_operand" "w")
3766 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3767 "TARGET_NEON"
3768 {
3769 if (BYTES_BIG_ENDIAN)
3770 {
3771 int elt = INTVAL (operands[2]);
3772 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3773 operands[2] = GEN_INT (elt);
3774 }
3775 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3776 }
3777 [(set_attr "type" "neon_to_gp")]
3778 )
3779
3780 (define_insn "neon_vget_lane<mode>_sext_internal"
3781 [(set (match_operand:SI 0 "s_register_operand" "=r")
3782 (sign_extend:SI
3783 (vec_select:<V_elem>
3784 (match_operand:VQ2 1 "s_register_operand" "w")
3785 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3786 "TARGET_NEON"
3787 {
3788 rtx ops[3];
3789 int regno = REGNO (operands[1]);
3790 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3791 unsigned int elt = INTVAL (operands[2]);
3792 unsigned int elt_adj = elt % halfelts;
3793
3794 if (BYTES_BIG_ENDIAN)
3795 elt_adj = halfelts - 1 - elt_adj;
3796
3797 ops[0] = operands[0];
3798 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3799 ops[2] = GEN_INT (elt_adj);
3800 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3801
3802 return "";
3803 }
3804 [(set_attr "type" "neon_to_gp_q")]
3805 )
3806
3807 (define_insn "neon_vget_lane<mode>_zext_internal"
3808 [(set (match_operand:SI 0 "s_register_operand" "=r")
3809 (zero_extend:SI
3810 (vec_select:<V_elem>
3811 (match_operand:VQ2 1 "s_register_operand" "w")
3812 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3813 "TARGET_NEON"
3814 {
3815 rtx ops[3];
3816 int regno = REGNO (operands[1]);
3817 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3818 unsigned int elt = INTVAL (operands[2]);
3819 unsigned int elt_adj = elt % halfelts;
3820
3821 if (BYTES_BIG_ENDIAN)
3822 elt_adj = halfelts - 1 - elt_adj;
3823
3824 ops[0] = operands[0];
3825 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3826 ops[2] = GEN_INT (elt_adj);
3827 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3828
3829 return "";
3830 }
3831 [(set_attr "type" "neon_to_gp_q")]
3832 )
3833
3834 (define_expand "neon_vget_lane<mode>"
3835 [(match_operand:<V_ext> 0 "s_register_operand" "")
3836 (match_operand:VDQW 1 "s_register_operand" "")
3837 (match_operand:SI 2 "immediate_operand" "")]
3838 "TARGET_NEON"
3839 {
3840 if (BYTES_BIG_ENDIAN)
3841 {
3842 /* The intrinsics are defined in terms of a model where the
3843 element ordering in memory is vldm order, whereas the generic
3844 RTL is defined in terms of a model where the element ordering
3845 in memory is array order. Convert the lane number to conform
3846 to this model. */
3847 unsigned int elt = INTVAL (operands[2]);
3848 unsigned int reg_nelts
3849 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3850 elt ^= reg_nelts - 1;
3851 operands[2] = GEN_INT (elt);
3852 }
3853
3854 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3855 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3856 operands[2]));
3857 else
3858 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3859 operands[1],
3860 operands[2]));
3861 DONE;
3862 })
3863
3864 (define_expand "neon_vget_laneu<mode>"
3865 [(match_operand:<V_ext> 0 "s_register_operand" "")
3866 (match_operand:VDQIW 1 "s_register_operand" "")
3867 (match_operand:SI 2 "immediate_operand" "")]
3868 "TARGET_NEON"
3869 {
3870 if (BYTES_BIG_ENDIAN)
3871 {
3872 /* The intrinsics are defined in terms of a model where the
3873 element ordering in memory is vldm order, whereas the generic
3874 RTL is defined in terms of a model where the element ordering
3875 in memory is array order. Convert the lane number to conform
3876 to this model. */
3877 unsigned int elt = INTVAL (operands[2]);
3878 unsigned int reg_nelts
3879 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3880 elt ^= reg_nelts - 1;
3881 operands[2] = GEN_INT (elt);
3882 }
3883
3884 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3885 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3886 operands[2]));
3887 else
3888 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3889 operands[1],
3890 operands[2]));
3891 DONE;
3892 })
3893
3894 (define_expand "neon_vget_lanedi"
3895 [(match_operand:DI 0 "s_register_operand" "=r")
3896 (match_operand:DI 1 "s_register_operand" "w")
3897 (match_operand:SI 2 "immediate_operand" "")]
3898 "TARGET_NEON"
3899 {
3900 emit_move_insn (operands[0], operands[1]);
3901 DONE;
3902 })
3903
3904 (define_expand "neon_vget_lanev2di"
3905 [(match_operand:DI 0 "s_register_operand" "")
3906 (match_operand:V2DI 1 "s_register_operand" "")
3907 (match_operand:SI 2 "immediate_operand" "")]
3908 "TARGET_NEON"
3909 {
3910 int lane;
3911
3912 if (BYTES_BIG_ENDIAN)
3913 {
3914 /* The intrinsics are defined in terms of a model where the
3915 element ordering in memory is vldm order, whereas the generic
3916 RTL is defined in terms of a model where the element ordering
3917 in memory is array order. Convert the lane number to conform
3918 to this model. */
3919 unsigned int elt = INTVAL (operands[2]);
3920 unsigned int reg_nelts = 2;
3921 elt ^= reg_nelts - 1;
3922 operands[2] = GEN_INT (elt);
3923 }
3924
3925 lane = INTVAL (operands[2]);
3926 gcc_assert ((lane ==0) || (lane == 1));
3927 emit_move_insn (operands[0], lane == 0
3928 ? gen_lowpart (DImode, operands[1])
3929 : gen_highpart (DImode, operands[1]));
3930 DONE;
3931 })
3932
3933 (define_expand "neon_vset_lane<mode>"
3934 [(match_operand:VDQ 0 "s_register_operand" "=w")
3935 (match_operand:<V_elem> 1 "s_register_operand" "r")
3936 (match_operand:VDQ 2 "s_register_operand" "0")
3937 (match_operand:SI 3 "immediate_operand" "i")]
3938 "TARGET_NEON"
3939 {
3940 unsigned int elt = INTVAL (operands[3]);
3941
3942 if (BYTES_BIG_ENDIAN)
3943 {
3944 unsigned int reg_nelts
3945 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3946 elt ^= reg_nelts - 1;
3947 }
3948
3949 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3950 GEN_INT (1 << elt), operands[2]));
3951 DONE;
3952 })
3953
3954 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3955
3956 (define_expand "neon_vset_lanedi"
3957 [(match_operand:DI 0 "s_register_operand" "=w")
3958 (match_operand:DI 1 "s_register_operand" "r")
3959 (match_operand:DI 2 "s_register_operand" "0")
3960 (match_operand:SI 3 "immediate_operand" "i")]
3961 "TARGET_NEON"
3962 {
3963 emit_move_insn (operands[0], operands[1]);
3964 DONE;
3965 })
3966
3967 (define_expand "neon_vcreate<mode>"
3968 [(match_operand:VD_RE 0 "s_register_operand" "")
3969 (match_operand:DI 1 "general_operand" "")]
3970 "TARGET_NEON"
3971 {
3972 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3973 emit_move_insn (operands[0], src);
3974 DONE;
3975 })
3976
3977 (define_insn "neon_vdup_n<mode>"
3978 [(set (match_operand:VX 0 "s_register_operand" "=w")
3979 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3980 "TARGET_NEON"
3981 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3982 [(set_attr "type" "neon_from_gp<q>")]
3983 )
3984
3985 (define_insn "neon_vdup_nv4hf"
3986 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3987 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3988 "TARGET_NEON"
3989 "vdup.16\t%P0, %1"
3990 [(set_attr "type" "neon_from_gp")]
3991 )
3992
3993 (define_insn "neon_vdup_nv8hf"
3994 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3995 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3996 "TARGET_NEON"
3997 "vdup.16\t%q0, %1"
3998 [(set_attr "type" "neon_from_gp_q")]
3999 )
4000
4001 (define_insn "neon_vdup_n<mode>"
4002 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
4003 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
4004 "TARGET_NEON"
4005 "@
4006 vdup.<V_sz_elem>\t%<V_reg>0, %1
4007 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
4008 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
4009 )
4010
4011 (define_expand "neon_vdup_ndi"
4012 [(match_operand:DI 0 "s_register_operand" "=w")
4013 (match_operand:DI 1 "s_register_operand" "r")]
4014 "TARGET_NEON"
4015 {
4016 emit_move_insn (operands[0], operands[1]);
4017 DONE;
4018 }
4019 )
4020
4021 (define_insn "neon_vdup_nv2di"
4022 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
4023 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
4024 "TARGET_NEON"
4025 "@
4026 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
4027 vmov\t%e0, %P1\;vmov\t%f0, %P1"
4028 [(set_attr "length" "8")
4029 (set_attr "type" "multiple")]
4030 )
4031
4032 (define_insn "neon_vdup_lane<mode>_internal"
4033 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4034 (vec_duplicate:VDQW
4035 (vec_select:<V_elem>
4036 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4037 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4038 "TARGET_NEON"
4039 {
4040 if (BYTES_BIG_ENDIAN)
4041 {
4042 int elt = INTVAL (operands[2]);
4043 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
4044 operands[2] = GEN_INT (elt);
4045 }
4046 if (<Is_d_reg>)
4047 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
4048 else
4049 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
4050 }
4051 [(set_attr "type" "neon_dup<q>")]
4052 )
4053
4054 (define_insn "neon_vdup_lane<mode>_internal"
4055 [(set (match_operand:VH 0 "s_register_operand" "=w")
4056 (vec_duplicate:VH
4057 (vec_select:<V_elem>
4058 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4059 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4060 "TARGET_NEON && TARGET_FP16"
4061 {
4062 if (BYTES_BIG_ENDIAN)
4063 {
4064 int elt = INTVAL (operands[2]);
4065 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
4066 operands[2] = GEN_INT (elt);
4067 }
4068 if (<Is_d_reg>)
4069 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
4070 else
4071 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
4072 }
4073 [(set_attr "type" "neon_dup<q>")]
4074 )
4075
4076 (define_expand "neon_vdup_lane<mode>"
4077 [(match_operand:VDQW 0 "s_register_operand" "=w")
4078 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4079 (match_operand:SI 2 "immediate_operand" "i")]
4080 "TARGET_NEON"
4081 {
4082 if (BYTES_BIG_ENDIAN)
4083 {
4084 unsigned int elt = INTVAL (operands[2]);
4085 unsigned int reg_nelts
4086 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4087 elt ^= reg_nelts - 1;
4088 operands[2] = GEN_INT (elt);
4089 }
4090 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4091 operands[2]));
4092 DONE;
4093 })
4094
4095 (define_expand "neon_vdup_lane<mode>"
4096 [(match_operand:VH 0 "s_register_operand")
4097 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
4098 (match_operand:SI 2 "immediate_operand")]
4099 "TARGET_NEON && TARGET_FP16"
4100 {
4101 if (BYTES_BIG_ENDIAN)
4102 {
4103 unsigned int elt = INTVAL (operands[2]);
4104 unsigned int reg_nelts
4105 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4106 elt ^= reg_nelts - 1;
4107 operands[2] = GEN_INT (elt);
4108 }
4109 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4110 operands[2]));
4111 DONE;
4112 })
4113
4114 ; Scalar index is ignored, since only zero is valid here.
4115 (define_expand "neon_vdup_lanedi"
4116 [(match_operand:DI 0 "s_register_operand" "=w")
4117 (match_operand:DI 1 "s_register_operand" "w")
4118 (match_operand:SI 2 "immediate_operand" "i")]
4119 "TARGET_NEON"
4120 {
4121 emit_move_insn (operands[0], operands[1]);
4122 DONE;
4123 })
4124
4125 ; Likewise for v2di, as the DImode second operand has only a single element.
4126 (define_expand "neon_vdup_lanev2di"
4127 [(match_operand:V2DI 0 "s_register_operand" "=w")
4128 (match_operand:DI 1 "s_register_operand" "w")
4129 (match_operand:SI 2 "immediate_operand" "i")]
4130 "TARGET_NEON"
4131 {
4132 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4133 DONE;
4134 })
4135
4136 ; Disabled before reload because we don't want combine doing something silly,
4137 ; but used by the post-reload expansion of neon_vcombine.
4138 (define_insn "*neon_vswp<mode>"
4139 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4140 (match_operand:VDQX 1 "s_register_operand" "+w"))
4141 (set (match_dup 1) (match_dup 0))]
4142 "TARGET_NEON && reload_completed"
4143 "vswp\t%<V_reg>0, %<V_reg>1"
4144 [(set_attr "type" "neon_permute<q>")]
4145 )
4146
4147 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4148 ;; dest vector.
4149 ;; FIXME: A different implementation of this builtin could make it much
4150 ;; more likely that we wouldn't actually need to output anything (we could make
4151 ;; it so that the reg allocator puts things in the right places magically
4152 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4153
4154 (define_insn_and_split "neon_vcombine<mode>"
4155 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4156 (vec_concat:<V_DOUBLE>
4157 (match_operand:VDX 1 "s_register_operand" "w")
4158 (match_operand:VDX 2 "s_register_operand" "w")))]
4159 "TARGET_NEON"
4160 "#"
4161 "&& reload_completed"
4162 [(const_int 0)]
4163 {
4164 neon_split_vcombine (operands);
4165 DONE;
4166 }
4167 [(set_attr "type" "multiple")]
4168 )
4169
4170 (define_expand "neon_vget_high<mode>"
4171 [(match_operand:<V_HALF> 0 "s_register_operand")
4172 (match_operand:VQX 1 "s_register_operand")]
4173 "TARGET_NEON"
4174 {
4175 emit_move_insn (operands[0],
4176 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4177 GET_MODE_SIZE (<V_HALF>mode)));
4178 DONE;
4179 })
4180
4181 (define_expand "neon_vget_low<mode>"
4182 [(match_operand:<V_HALF> 0 "s_register_operand")
4183 (match_operand:VQX 1 "s_register_operand")]
4184 "TARGET_NEON"
4185 {
4186 emit_move_insn (operands[0],
4187 simplify_gen_subreg (<V_HALF>mode, operands[1],
4188 <MODE>mode, 0));
4189 DONE;
4190 })
4191
4192 (define_insn "float<mode><V_cvtto>2"
4193 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4194 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4195 "TARGET_NEON && !flag_rounding_math"
4196 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4197 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4198 )
4199
4200 (define_insn "floatuns<mode><V_cvtto>2"
4201 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4202 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4203 "TARGET_NEON && !flag_rounding_math"
4204 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4205 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4206 )
4207
4208 (define_insn "fix_trunc<mode><V_cvtto>2"
4209 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4210 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4211 "TARGET_NEON"
4212 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4213 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4214 )
4215
4216 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4217 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4218 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4219 "TARGET_NEON"
4220 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4221 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4222 )
4223
4224 (define_insn "neon_vcvt<sup><mode>"
4225 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4226 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4227 VCVT_US))]
4228 "TARGET_NEON"
4229 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4230 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4231 )
4232
4233 (define_insn "neon_vcvt<sup><mode>"
4234 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4235 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4236 VCVT_US))]
4237 "TARGET_NEON"
4238 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4239 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4240 )
4241
4242 (define_insn "neon_vcvtv4sfv4hf"
4243 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4244 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4245 UNSPEC_VCVT))]
4246 "TARGET_NEON && TARGET_FP16"
4247 "vcvt.f32.f16\t%q0, %P1"
4248 [(set_attr "type" "neon_fp_cvt_widen_h")]
4249 )
4250
4251 (define_insn "neon_vcvtv4hfv4sf"
4252 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4253 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4254 UNSPEC_VCVT))]
4255 "TARGET_NEON && TARGET_FP16"
4256 "vcvt.f16.f32\t%P0, %q1"
4257 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4258 )
4259
4260 (define_insn "neon_vcvt<sup><mode>"
4261 [(set
4262 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4263 (unspec:<VH_CVTTO>
4264 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4265 VCVT_US))]
4266 "TARGET_NEON_FP16INST"
4267 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4268 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4269 )
4270
4271 (define_insn "neon_vcvt<sup><mode>"
4272 [(set
4273 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4274 (unspec:<VH_CVTTO>
4275 [(match_operand:VH 1 "s_register_operand" "w")]
4276 VCVT_US))]
4277 "TARGET_NEON_FP16INST"
4278 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4279 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4280 )
4281
4282 (define_insn "neon_vcvt<sup>_n<mode>"
4283 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4284 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4285 (match_operand:SI 2 "immediate_operand" "i")]
4286 VCVT_US_N))]
4287 "TARGET_NEON"
4288 {
4289 arm_const_bounds (operands[2], 1, 33);
4290 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4291 }
4292 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4293 )
4294
4295 (define_insn "neon_vcvt<sup>_n<mode>"
4296 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4297 (unspec:<VH_CVTTO>
4298 [(match_operand:VH 1 "s_register_operand" "w")
4299 (match_operand:SI 2 "immediate_operand" "i")]
4300 VCVT_US_N))]
4301 "TARGET_NEON_FP16INST"
4302 {
4303 arm_const_bounds (operands[2], 0, 17);
4304 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4305 }
4306 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4307 )
4308
4309 (define_insn "neon_vcvt<sup>_n<mode>"
4310 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4311 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4312 (match_operand:SI 2 "immediate_operand" "i")]
4313 VCVT_US_N))]
4314 "TARGET_NEON"
4315 {
4316 arm_const_bounds (operands[2], 1, 33);
4317 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4318 }
4319 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4320 )
4321
4322 (define_insn "neon_vcvt<sup>_n<mode>"
4323 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4324 (unspec:<VH_CVTTO>
4325 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4326 (match_operand:SI 2 "immediate_operand" "i")]
4327 VCVT_US_N))]
4328 "TARGET_NEON_FP16INST"
4329 {
4330 arm_const_bounds (operands[2], 0, 17);
4331 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4332 }
4333 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4334 )
4335
4336 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4337 [(set
4338 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4339 (unspec:<VH_CVTTO>
4340 [(match_operand:VH 1 "s_register_operand" "w")]
4341 VCVT_HF_US))]
4342 "TARGET_NEON_FP16INST"
4343 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4344 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4345 )
4346
4347 (define_insn "neon_vmovn<mode>"
4348 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4349 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4350 UNSPEC_VMOVN))]
4351 "TARGET_NEON"
4352 "vmovn.<V_if_elem>\t%P0, %q1"
4353 [(set_attr "type" "neon_shift_imm_narrow_q")]
4354 )
4355
4356 (define_insn "neon_vqmovn<sup><mode>"
4357 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4358 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4359 VQMOVN))]
4360 "TARGET_NEON"
4361 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4362 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4363 )
4364
4365 (define_insn "neon_vqmovun<mode>"
4366 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4367 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4368 UNSPEC_VQMOVUN))]
4369 "TARGET_NEON"
4370 "vqmovun.<V_s_elem>\t%P0, %q1"
4371 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4372 )
4373
4374 (define_insn "neon_vmovl<sup><mode>"
4375 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4376 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4377 VMOVL))]
4378 "TARGET_NEON"
4379 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4380 [(set_attr "type" "neon_shift_imm_long")]
4381 )
4382
4383 (define_insn "neon_vmul_lane<mode>"
4384 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4385 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4386 (match_operand:VMD 2 "s_register_operand"
4387 "<scalar_mul_constraint>")
4388 (match_operand:SI 3 "immediate_operand" "i")]
4389 UNSPEC_VMUL_LANE))]
4390 "TARGET_NEON"
4391 {
4392 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4393 }
4394 [(set (attr "type")
4395 (if_then_else (match_test "<Is_float_mode>")
4396 (const_string "neon_fp_mul_s_scalar<q>")
4397 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4398 )
4399
4400 (define_insn "neon_vmul_lane<mode>"
4401 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4402 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4403 (match_operand:<V_HALF> 2 "s_register_operand"
4404 "<scalar_mul_constraint>")
4405 (match_operand:SI 3 "immediate_operand" "i")]
4406 UNSPEC_VMUL_LANE))]
4407 "TARGET_NEON"
4408 {
4409 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4410 }
4411 [(set (attr "type")
4412 (if_then_else (match_test "<Is_float_mode>")
4413 (const_string "neon_fp_mul_s_scalar<q>")
4414 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4415 )
4416
4417 (define_insn "neon_vmul_lane<mode>"
4418 [(set (match_operand:VH 0 "s_register_operand" "=w")
4419 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4420 (match_operand:V4HF 2 "s_register_operand"
4421 "<scalar_mul_constraint>")
4422 (match_operand:SI 3 "immediate_operand" "i")]
4423 UNSPEC_VMUL_LANE))]
4424 "TARGET_NEON_FP16INST"
4425 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4426 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4427 )
4428
4429 (define_insn "neon_vmull<sup>_lane<mode>"
4430 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4431 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4432 (match_operand:VMDI 2 "s_register_operand"
4433 "<scalar_mul_constraint>")
4434 (match_operand:SI 3 "immediate_operand" "i")]
4435 VMULL_LANE))]
4436 "TARGET_NEON"
4437 {
4438 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4439 }
4440 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4441 )
4442
4443 (define_insn "neon_vqdmull_lane<mode>"
4444 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4445 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4446 (match_operand:VMDI 2 "s_register_operand"
4447 "<scalar_mul_constraint>")
4448 (match_operand:SI 3 "immediate_operand" "i")]
4449 UNSPEC_VQDMULL_LANE))]
4450 "TARGET_NEON"
4451 {
4452 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4453 }
4454 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4455 )
4456
4457 (define_insn "neon_vq<r>dmulh_lane<mode>"
4458 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4459 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4460 (match_operand:<V_HALF> 2 "s_register_operand"
4461 "<scalar_mul_constraint>")
4462 (match_operand:SI 3 "immediate_operand" "i")]
4463 VQDMULH_LANE))]
4464 "TARGET_NEON"
4465 {
4466 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4467 }
4468 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4469 )
4470
4471 (define_insn "neon_vq<r>dmulh_lane<mode>"
4472 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4473 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4474 (match_operand:VMDI 2 "s_register_operand"
4475 "<scalar_mul_constraint>")
4476 (match_operand:SI 3 "immediate_operand" "i")]
4477 VQDMULH_LANE))]
4478 "TARGET_NEON"
4479 {
4480 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4481 }
4482 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4483 )
4484
4485 ;; vqrdmlah_lane, vqrdmlsh_lane
4486 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4487 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4488 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4489 (match_operand:VMQI 2 "s_register_operand" "w")
4490 (match_operand:<V_HALF> 3 "s_register_operand"
4491 "<scalar_mul_constraint>")
4492 (match_operand:SI 4 "immediate_operand" "i")]
4493 VQRDMLH_AS))]
4494 "TARGET_NEON_RDMA"
4495 {
4496 return
4497 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4498 }
4499 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4500 )
4501
4502 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4503 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4504 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4505 (match_operand:VMDI 2 "s_register_operand" "w")
4506 (match_operand:VMDI 3 "s_register_operand"
4507 "<scalar_mul_constraint>")
4508 (match_operand:SI 4 "immediate_operand" "i")]
4509 VQRDMLH_AS))]
4510 "TARGET_NEON_RDMA"
4511 {
4512 return
4513 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4514 }
4515 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4516 )
4517
4518 (define_insn "neon_vmla_lane<mode>"
4519 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4520 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4521 (match_operand:VMD 2 "s_register_operand" "w")
4522 (match_operand:VMD 3 "s_register_operand"
4523 "<scalar_mul_constraint>")
4524 (match_operand:SI 4 "immediate_operand" "i")]
4525 UNSPEC_VMLA_LANE))]
4526 "TARGET_NEON"
4527 {
4528 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4529 }
4530 [(set (attr "type")
4531 (if_then_else (match_test "<Is_float_mode>")
4532 (const_string "neon_fp_mla_s_scalar<q>")
4533 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4534 )
4535
4536 (define_insn "neon_vmla_lane<mode>"
4537 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4538 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4539 (match_operand:VMQ 2 "s_register_operand" "w")
4540 (match_operand:<V_HALF> 3 "s_register_operand"
4541 "<scalar_mul_constraint>")
4542 (match_operand:SI 4 "immediate_operand" "i")]
4543 UNSPEC_VMLA_LANE))]
4544 "TARGET_NEON"
4545 {
4546 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4547 }
4548 [(set (attr "type")
4549 (if_then_else (match_test "<Is_float_mode>")
4550 (const_string "neon_fp_mla_s_scalar<q>")
4551 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4552 )
4553
4554 (define_insn "neon_vmlal<sup>_lane<mode>"
4555 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4556 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4557 (match_operand:VMDI 2 "s_register_operand" "w")
4558 (match_operand:VMDI 3 "s_register_operand"
4559 "<scalar_mul_constraint>")
4560 (match_operand:SI 4 "immediate_operand" "i")]
4561 VMLAL_LANE))]
4562 "TARGET_NEON"
4563 {
4564 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4565 }
4566 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4567 )
4568
4569 (define_insn "neon_vqdmlal_lane<mode>"
4570 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4571 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4572 (match_operand:VMDI 2 "s_register_operand" "w")
4573 (match_operand:VMDI 3 "s_register_operand"
4574 "<scalar_mul_constraint>")
4575 (match_operand:SI 4 "immediate_operand" "i")]
4576 UNSPEC_VQDMLAL_LANE))]
4577 "TARGET_NEON"
4578 {
4579 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4580 }
4581 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4582 )
4583
4584 (define_insn "neon_vmls_lane<mode>"
4585 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4586 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4587 (match_operand:VMD 2 "s_register_operand" "w")
4588 (match_operand:VMD 3 "s_register_operand"
4589 "<scalar_mul_constraint>")
4590 (match_operand:SI 4 "immediate_operand" "i")]
4591 UNSPEC_VMLS_LANE))]
4592 "TARGET_NEON"
4593 {
4594 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4595 }
4596 [(set (attr "type")
4597 (if_then_else (match_test "<Is_float_mode>")
4598 (const_string "neon_fp_mla_s_scalar<q>")
4599 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4600 )
4601
4602 (define_insn "neon_vmls_lane<mode>"
4603 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4604 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4605 (match_operand:VMQ 2 "s_register_operand" "w")
4606 (match_operand:<V_HALF> 3 "s_register_operand"
4607 "<scalar_mul_constraint>")
4608 (match_operand:SI 4 "immediate_operand" "i")]
4609 UNSPEC_VMLS_LANE))]
4610 "TARGET_NEON"
4611 {
4612 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4613 }
4614 [(set (attr "type")
4615 (if_then_else (match_test "<Is_float_mode>")
4616 (const_string "neon_fp_mla_s_scalar<q>")
4617 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4618 )
4619
4620 (define_insn "neon_vmlsl<sup>_lane<mode>"
4621 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4622 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4623 (match_operand:VMDI 2 "s_register_operand" "w")
4624 (match_operand:VMDI 3 "s_register_operand"
4625 "<scalar_mul_constraint>")
4626 (match_operand:SI 4 "immediate_operand" "i")]
4627 VMLSL_LANE))]
4628 "TARGET_NEON"
4629 {
4630 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4631 }
4632 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4633 )
4634
4635 (define_insn "neon_vqdmlsl_lane<mode>"
4636 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4637 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4638 (match_operand:VMDI 2 "s_register_operand" "w")
4639 (match_operand:VMDI 3 "s_register_operand"
4640 "<scalar_mul_constraint>")
4641 (match_operand:SI 4 "immediate_operand" "i")]
4642 UNSPEC_VQDMLSL_LANE))]
4643 "TARGET_NEON"
4644 {
4645 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4646 }
4647 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4648 )
4649
4650 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4651 ; core register into a temp register, then use a scalar taken from that. This
4652 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4653 ; or extracted from another vector. The latter case it's currently better to
4654 ; use the "_lane" variant, and the former case can probably be implemented
4655 ; using vld1_lane, but that hasn't been done yet.
4656
4657 (define_expand "neon_vmul_n<mode>"
4658 [(match_operand:VMD 0 "s_register_operand" "")
4659 (match_operand:VMD 1 "s_register_operand" "")
4660 (match_operand:<V_elem> 2 "s_register_operand" "")]
4661 "TARGET_NEON"
4662 {
4663 rtx tmp = gen_reg_rtx (<MODE>mode);
4664 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4665 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4666 const0_rtx));
4667 DONE;
4668 })
4669
4670 (define_expand "neon_vmul_n<mode>"
4671 [(match_operand:VMQ 0 "s_register_operand" "")
4672 (match_operand:VMQ 1 "s_register_operand" "")
4673 (match_operand:<V_elem> 2 "s_register_operand" "")]
4674 "TARGET_NEON"
4675 {
4676 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4677 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4678 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4679 const0_rtx));
4680 DONE;
4681 })
4682
4683 (define_expand "neon_vmul_n<mode>"
4684 [(match_operand:VH 0 "s_register_operand")
4685 (match_operand:VH 1 "s_register_operand")
4686 (match_operand:<V_elem> 2 "s_register_operand")]
4687 "TARGET_NEON_FP16INST"
4688 {
4689 rtx tmp = gen_reg_rtx (V4HFmode);
4690 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4691 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4692 const0_rtx));
4693 DONE;
4694 })
4695
4696 (define_expand "neon_vmulls_n<mode>"
4697 [(match_operand:<V_widen> 0 "s_register_operand" "")
4698 (match_operand:VMDI 1 "s_register_operand" "")
4699 (match_operand:<V_elem> 2 "s_register_operand" "")]
4700 "TARGET_NEON"
4701 {
4702 rtx tmp = gen_reg_rtx (<MODE>mode);
4703 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4704 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4705 const0_rtx));
4706 DONE;
4707 })
4708
4709 (define_expand "neon_vmullu_n<mode>"
4710 [(match_operand:<V_widen> 0 "s_register_operand" "")
4711 (match_operand:VMDI 1 "s_register_operand" "")
4712 (match_operand:<V_elem> 2 "s_register_operand" "")]
4713 "TARGET_NEON"
4714 {
4715 rtx tmp = gen_reg_rtx (<MODE>mode);
4716 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4717 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4718 const0_rtx));
4719 DONE;
4720 })
4721
4722 (define_expand "neon_vqdmull_n<mode>"
4723 [(match_operand:<V_widen> 0 "s_register_operand" "")
4724 (match_operand:VMDI 1 "s_register_operand" "")
4725 (match_operand:<V_elem> 2 "s_register_operand" "")]
4726 "TARGET_NEON"
4727 {
4728 rtx tmp = gen_reg_rtx (<MODE>mode);
4729 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4730 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4731 const0_rtx));
4732 DONE;
4733 })
4734
4735 (define_expand "neon_vqdmulh_n<mode>"
4736 [(match_operand:VMDI 0 "s_register_operand" "")
4737 (match_operand:VMDI 1 "s_register_operand" "")
4738 (match_operand:<V_elem> 2 "s_register_operand" "")]
4739 "TARGET_NEON"
4740 {
4741 rtx tmp = gen_reg_rtx (<MODE>mode);
4742 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4743 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4744 const0_rtx));
4745 DONE;
4746 })
4747
4748 (define_expand "neon_vqrdmulh_n<mode>"
4749 [(match_operand:VMDI 0 "s_register_operand" "")
4750 (match_operand:VMDI 1 "s_register_operand" "")
4751 (match_operand:<V_elem> 2 "s_register_operand" "")]
4752 "TARGET_NEON"
4753 {
4754 rtx tmp = gen_reg_rtx (<MODE>mode);
4755 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4756 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4757 const0_rtx));
4758 DONE;
4759 })
4760
4761 (define_expand "neon_vqdmulh_n<mode>"
4762 [(match_operand:VMQI 0 "s_register_operand" "")
4763 (match_operand:VMQI 1 "s_register_operand" "")
4764 (match_operand:<V_elem> 2 "s_register_operand" "")]
4765 "TARGET_NEON"
4766 {
4767 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4768 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4769 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4770 const0_rtx));
4771 DONE;
4772 })
4773
4774 (define_expand "neon_vqrdmulh_n<mode>"
4775 [(match_operand:VMQI 0 "s_register_operand" "")
4776 (match_operand:VMQI 1 "s_register_operand" "")
4777 (match_operand:<V_elem> 2 "s_register_operand" "")]
4778 "TARGET_NEON"
4779 {
4780 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4781 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4782 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4783 const0_rtx));
4784 DONE;
4785 })
4786
4787 (define_expand "neon_vmla_n<mode>"
4788 [(match_operand:VMD 0 "s_register_operand" "")
4789 (match_operand:VMD 1 "s_register_operand" "")
4790 (match_operand:VMD 2 "s_register_operand" "")
4791 (match_operand:<V_elem> 3 "s_register_operand" "")]
4792 "TARGET_NEON"
4793 {
4794 rtx tmp = gen_reg_rtx (<MODE>mode);
4795 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4796 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4797 tmp, const0_rtx));
4798 DONE;
4799 })
4800
4801 (define_expand "neon_vmla_n<mode>"
4802 [(match_operand:VMQ 0 "s_register_operand" "")
4803 (match_operand:VMQ 1 "s_register_operand" "")
4804 (match_operand:VMQ 2 "s_register_operand" "")
4805 (match_operand:<V_elem> 3 "s_register_operand" "")]
4806 "TARGET_NEON"
4807 {
4808 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4809 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4810 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4811 tmp, const0_rtx));
4812 DONE;
4813 })
4814
4815 (define_expand "neon_vmlals_n<mode>"
4816 [(match_operand:<V_widen> 0 "s_register_operand" "")
4817 (match_operand:<V_widen> 1 "s_register_operand" "")
4818 (match_operand:VMDI 2 "s_register_operand" "")
4819 (match_operand:<V_elem> 3 "s_register_operand" "")]
4820 "TARGET_NEON"
4821 {
4822 rtx tmp = gen_reg_rtx (<MODE>mode);
4823 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4824 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4825 tmp, const0_rtx));
4826 DONE;
4827 })
4828
4829 (define_expand "neon_vmlalu_n<mode>"
4830 [(match_operand:<V_widen> 0 "s_register_operand" "")
4831 (match_operand:<V_widen> 1 "s_register_operand" "")
4832 (match_operand:VMDI 2 "s_register_operand" "")
4833 (match_operand:<V_elem> 3 "s_register_operand" "")]
4834 "TARGET_NEON"
4835 {
4836 rtx tmp = gen_reg_rtx (<MODE>mode);
4837 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4838 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4839 tmp, const0_rtx));
4840 DONE;
4841 })
4842
4843 (define_expand "neon_vqdmlal_n<mode>"
4844 [(match_operand:<V_widen> 0 "s_register_operand" "")
4845 (match_operand:<V_widen> 1 "s_register_operand" "")
4846 (match_operand:VMDI 2 "s_register_operand" "")
4847 (match_operand:<V_elem> 3 "s_register_operand" "")]
4848 "TARGET_NEON"
4849 {
4850 rtx tmp = gen_reg_rtx (<MODE>mode);
4851 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4852 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4853 tmp, const0_rtx));
4854 DONE;
4855 })
4856
4857 (define_expand "neon_vmls_n<mode>"
4858 [(match_operand:VMD 0 "s_register_operand" "")
4859 (match_operand:VMD 1 "s_register_operand" "")
4860 (match_operand:VMD 2 "s_register_operand" "")
4861 (match_operand:<V_elem> 3 "s_register_operand" "")]
4862 "TARGET_NEON"
4863 {
4864 rtx tmp = gen_reg_rtx (<MODE>mode);
4865 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4866 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4867 tmp, const0_rtx));
4868 DONE;
4869 })
4870
4871 (define_expand "neon_vmls_n<mode>"
4872 [(match_operand:VMQ 0 "s_register_operand" "")
4873 (match_operand:VMQ 1 "s_register_operand" "")
4874 (match_operand:VMQ 2 "s_register_operand" "")
4875 (match_operand:<V_elem> 3 "s_register_operand" "")]
4876 "TARGET_NEON"
4877 {
4878 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4879 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4880 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4881 tmp, const0_rtx));
4882 DONE;
4883 })
4884
4885 (define_expand "neon_vmlsls_n<mode>"
4886 [(match_operand:<V_widen> 0 "s_register_operand" "")
4887 (match_operand:<V_widen> 1 "s_register_operand" "")
4888 (match_operand:VMDI 2 "s_register_operand" "")
4889 (match_operand:<V_elem> 3 "s_register_operand" "")]
4890 "TARGET_NEON"
4891 {
4892 rtx tmp = gen_reg_rtx (<MODE>mode);
4893 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4894 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4895 tmp, const0_rtx));
4896 DONE;
4897 })
4898
4899 (define_expand "neon_vmlslu_n<mode>"
4900 [(match_operand:<V_widen> 0 "s_register_operand" "")
4901 (match_operand:<V_widen> 1 "s_register_operand" "")
4902 (match_operand:VMDI 2 "s_register_operand" "")
4903 (match_operand:<V_elem> 3 "s_register_operand" "")]
4904 "TARGET_NEON"
4905 {
4906 rtx tmp = gen_reg_rtx (<MODE>mode);
4907 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4908 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4909 tmp, const0_rtx));
4910 DONE;
4911 })
4912
4913 (define_expand "neon_vqdmlsl_n<mode>"
4914 [(match_operand:<V_widen> 0 "s_register_operand" "")
4915 (match_operand:<V_widen> 1 "s_register_operand" "")
4916 (match_operand:VMDI 2 "s_register_operand" "")
4917 (match_operand:<V_elem> 3 "s_register_operand" "")]
4918 "TARGET_NEON"
4919 {
4920 rtx tmp = gen_reg_rtx (<MODE>mode);
4921 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4922 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4923 tmp, const0_rtx));
4924 DONE;
4925 })
4926
4927 (define_insn "@neon_vext<mode>"
4928 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4929 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4930 (match_operand:VDQX 2 "s_register_operand" "w")
4931 (match_operand:SI 3 "immediate_operand" "i")]
4932 UNSPEC_VEXT))]
4933 "TARGET_NEON"
4934 {
4935 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4936 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4937 }
4938 [(set_attr "type" "neon_ext<q>")]
4939 )
4940
4941 (define_insn "@neon_vrev64<mode>"
4942 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4943 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4944 UNSPEC_VREV64))]
4945 "TARGET_NEON"
4946 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4947 [(set_attr "type" "neon_rev<q>")]
4948 )
4949
4950 (define_insn "@neon_vrev32<mode>"
4951 [(set (match_operand:VX 0 "s_register_operand" "=w")
4952 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4953 UNSPEC_VREV32))]
4954 "TARGET_NEON"
4955 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4956 [(set_attr "type" "neon_rev<q>")]
4957 )
4958
4959 (define_insn "@neon_vrev16<mode>"
4960 [(set (match_operand:VE 0 "s_register_operand" "=w")
4961 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4962 UNSPEC_VREV16))]
4963 "TARGET_NEON"
4964 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4965 [(set_attr "type" "neon_rev<q>")]
4966 )
4967
4968 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4969 ; allocation. For an intrinsic of form:
4970 ; rD = vbsl_* (rS, rN, rM)
4971 ; We can use any of:
4972 ; vbsl rS, rN, rM (if D = S)
4973 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4974 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4975
4976 (define_insn "neon_vbsl<mode>_internal"
4977 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4978 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4979 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4980 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4981 UNSPEC_VBSL))]
4982 "TARGET_NEON"
4983 "@
4984 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4985 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4986 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4987 [(set_attr "type" "neon_bsl<q>")]
4988 )
4989
4990 (define_expand "neon_vbsl<mode>"
4991 [(set (match_operand:VDQX 0 "s_register_operand" "")
4992 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4993 (match_operand:VDQX 2 "s_register_operand" "")
4994 (match_operand:VDQX 3 "s_register_operand" "")]
4995 UNSPEC_VBSL))]
4996 "TARGET_NEON"
4997 {
4998 /* We can't alias operands together if they have different modes. */
4999 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
5000 })
5001
5002 ;; vshl, vrshl
5003 (define_insn "neon_v<shift_op><sup><mode>"
5004 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5005 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5006 (match_operand:VDQIX 2 "s_register_operand" "w")]
5007 VSHL))]
5008 "TARGET_NEON"
5009 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5010 [(set_attr "type" "neon_shift_imm<q>")]
5011 )
5012
5013 ;; vqshl, vqrshl
5014 (define_insn "neon_v<shift_op><sup><mode>"
5015 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5016 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5017 (match_operand:VDQIX 2 "s_register_operand" "w")]
5018 VQSHL))]
5019 "TARGET_NEON"
5020 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
5021 [(set_attr "type" "neon_sat_shift_imm<q>")]
5022 )
5023
5024 ;; vshr_n, vrshr_n
5025 (define_insn "neon_v<shift_op><sup>_n<mode>"
5026 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5027 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5028 (match_operand:SI 2 "immediate_operand" "i")]
5029 VSHR_N))]
5030 "TARGET_NEON"
5031 {
5032 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
5033 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5034 }
5035 [(set_attr "type" "neon_shift_imm<q>")]
5036 )
5037
5038 ;; vshrn_n, vrshrn_n
5039 (define_insn "neon_v<shift_op>_n<mode>"
5040 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5041 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5042 (match_operand:SI 2 "immediate_operand" "i")]
5043 VSHRN_N))]
5044 "TARGET_NEON"
5045 {
5046 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5047 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
5048 }
5049 [(set_attr "type" "neon_shift_imm_narrow_q")]
5050 )
5051
5052 ;; vqshrn_n, vqrshrn_n
5053 (define_insn "neon_v<shift_op><sup>_n<mode>"
5054 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5055 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5056 (match_operand:SI 2 "immediate_operand" "i")]
5057 VQSHRN_N))]
5058 "TARGET_NEON"
5059 {
5060 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5061 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
5062 }
5063 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5064 )
5065
5066 ;; vqshrun_n, vqrshrun_n
5067 (define_insn "neon_v<shift_op>_n<mode>"
5068 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5069 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5070 (match_operand:SI 2 "immediate_operand" "i")]
5071 VQSHRUN_N))]
5072 "TARGET_NEON"
5073 {
5074 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5075 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
5076 }
5077 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5078 )
5079
5080 (define_insn "neon_vshl_n<mode>"
5081 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5082 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5083 (match_operand:SI 2 "immediate_operand" "i")]
5084 UNSPEC_VSHL_N))]
5085 "TARGET_NEON"
5086 {
5087 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5088 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
5089 }
5090 [(set_attr "type" "neon_shift_imm<q>")]
5091 )
5092
5093 (define_insn "neon_vqshl_<sup>_n<mode>"
5094 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5095 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5096 (match_operand:SI 2 "immediate_operand" "i")]
5097 VQSHL_N))]
5098 "TARGET_NEON"
5099 {
5100 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5101 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5102 }
5103 [(set_attr "type" "neon_sat_shift_imm<q>")]
5104 )
5105
5106 (define_insn "neon_vqshlu_n<mode>"
5107 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5108 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5109 (match_operand:SI 2 "immediate_operand" "i")]
5110 UNSPEC_VQSHLU_N))]
5111 "TARGET_NEON"
5112 {
5113 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5114 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
5115 }
5116 [(set_attr "type" "neon_sat_shift_imm<q>")]
5117 )
5118
5119 (define_insn "neon_vshll<sup>_n<mode>"
5120 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5121 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
5122 (match_operand:SI 2 "immediate_operand" "i")]
5123 VSHLL_N))]
5124 "TARGET_NEON"
5125 {
5126 /* The boundaries are: 0 < imm <= size. */
5127 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5128 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5129 }
5130 [(set_attr "type" "neon_shift_imm_long")]
5131 )
5132
5133 ;; vsra_n, vrsra_n
5134 (define_insn "neon_v<shift_op><sup>_n<mode>"
5135 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5136 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5137 (match_operand:VDQIX 2 "s_register_operand" "w")
5138 (match_operand:SI 3 "immediate_operand" "i")]
5139 VSRA_N))]
5140 "TARGET_NEON"
5141 {
5142 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5143 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5144 }
5145 [(set_attr "type" "neon_shift_acc<q>")]
5146 )
5147
5148 (define_insn "neon_vsri_n<mode>"
5149 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5150 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5151 (match_operand:VDQIX 2 "s_register_operand" "w")
5152 (match_operand:SI 3 "immediate_operand" "i")]
5153 UNSPEC_VSRI))]
5154 "TARGET_NEON"
5155 {
5156 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5157 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5158 }
5159 [(set_attr "type" "neon_shift_reg<q>")]
5160 )
5161
5162 (define_insn "neon_vsli_n<mode>"
5163 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5164 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5165 (match_operand:VDQIX 2 "s_register_operand" "w")
5166 (match_operand:SI 3 "immediate_operand" "i")]
5167 UNSPEC_VSLI))]
5168 "TARGET_NEON"
5169 {
5170 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5171 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5172 }
5173 [(set_attr "type" "neon_shift_reg<q>")]
5174 )
5175
5176 (define_insn "neon_vtbl1v8qi"
5177 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5178 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5179 (match_operand:V8QI 2 "s_register_operand" "w")]
5180 UNSPEC_VTBL))]
5181 "TARGET_NEON"
5182 "vtbl.8\t%P0, {%P1}, %P2"
5183 [(set_attr "type" "neon_tbl1")]
5184 )
5185
5186 (define_insn "neon_vtbl2v8qi"
5187 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5188 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5189 (match_operand:V8QI 2 "s_register_operand" "w")]
5190 UNSPEC_VTBL))]
5191 "TARGET_NEON"
5192 {
5193 rtx ops[4];
5194 int tabbase = REGNO (operands[1]);
5195
5196 ops[0] = operands[0];
5197 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5198 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5199 ops[3] = operands[2];
5200 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5201
5202 return "";
5203 }
5204 [(set_attr "type" "neon_tbl2")]
5205 )
5206
5207 (define_insn "neon_vtbl3v8qi"
5208 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5209 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5210 (match_operand:V8QI 2 "s_register_operand" "w")]
5211 UNSPEC_VTBL))]
5212 "TARGET_NEON"
5213 {
5214 rtx ops[5];
5215 int tabbase = REGNO (operands[1]);
5216
5217 ops[0] = operands[0];
5218 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5219 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5220 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5221 ops[4] = operands[2];
5222 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5223
5224 return "";
5225 }
5226 [(set_attr "type" "neon_tbl3")]
5227 )
5228
5229 (define_insn "neon_vtbl4v8qi"
5230 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5231 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5232 (match_operand:V8QI 2 "s_register_operand" "w")]
5233 UNSPEC_VTBL))]
5234 "TARGET_NEON"
5235 {
5236 rtx ops[6];
5237 int tabbase = REGNO (operands[1]);
5238
5239 ops[0] = operands[0];
5240 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5241 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5242 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5243 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5244 ops[5] = operands[2];
5245 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5246
5247 return "";
5248 }
5249 [(set_attr "type" "neon_tbl4")]
5250 )
5251
5252 ;; These three are used by the vec_perm infrastructure for V16QImode.
5253 (define_insn_and_split "neon_vtbl1v16qi"
5254 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5255 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5256 (match_operand:V16QI 2 "s_register_operand" "w")]
5257 UNSPEC_VTBL))]
5258 "TARGET_NEON"
5259 "#"
5260 "&& reload_completed"
5261 [(const_int 0)]
5262 {
5263 rtx op0, op1, op2, part0, part2;
5264 unsigned ofs;
5265
5266 op0 = operands[0];
5267 op1 = gen_lowpart (TImode, operands[1]);
5268 op2 = operands[2];
5269
5270 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5271 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5272 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5273 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5274
5275 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5276 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5277 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5278 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5279 DONE;
5280 }
5281 [(set_attr "type" "multiple")]
5282 )
5283
5284 (define_insn_and_split "neon_vtbl2v16qi"
5285 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5286 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5287 (match_operand:V16QI 2 "s_register_operand" "w")]
5288 UNSPEC_VTBL))]
5289 "TARGET_NEON"
5290 "#"
5291 "&& reload_completed"
5292 [(const_int 0)]
5293 {
5294 rtx op0, op1, op2, part0, part2;
5295 unsigned ofs;
5296
5297 op0 = operands[0];
5298 op1 = operands[1];
5299 op2 = operands[2];
5300
5301 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5302 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5303 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5304 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5305
5306 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5307 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5308 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5309 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5310 DONE;
5311 }
5312 [(set_attr "type" "multiple")]
5313 )
5314
5315 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5316 ;; handle quad-word input modes, producing octa-word output modes. But
5317 ;; that requires us to add support for octa-word vector modes in moves.
5318 ;; That seems overkill for this one use in vec_perm.
5319 (define_insn_and_split "neon_vcombinev16qi"
5320 [(set (match_operand:OI 0 "s_register_operand" "=w")
5321 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5322 (match_operand:V16QI 2 "s_register_operand" "w")]
5323 UNSPEC_VCONCAT))]
5324 "TARGET_NEON"
5325 "#"
5326 "&& reload_completed"
5327 [(const_int 0)]
5328 {
5329 neon_split_vcombine (operands);
5330 DONE;
5331 }
5332 [(set_attr "type" "multiple")]
5333 )
5334
5335 (define_insn "neon_vtbx1v8qi"
5336 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5337 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5338 (match_operand:V8QI 2 "s_register_operand" "w")
5339 (match_operand:V8QI 3 "s_register_operand" "w")]
5340 UNSPEC_VTBX))]
5341 "TARGET_NEON"
5342 "vtbx.8\t%P0, {%P2}, %P3"
5343 [(set_attr "type" "neon_tbl1")]
5344 )
5345
5346 (define_insn "neon_vtbx2v8qi"
5347 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5348 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5349 (match_operand:TI 2 "s_register_operand" "w")
5350 (match_operand:V8QI 3 "s_register_operand" "w")]
5351 UNSPEC_VTBX))]
5352 "TARGET_NEON"
5353 {
5354 rtx ops[4];
5355 int tabbase = REGNO (operands[2]);
5356
5357 ops[0] = operands[0];
5358 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5359 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5360 ops[3] = operands[3];
5361 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5362
5363 return "";
5364 }
5365 [(set_attr "type" "neon_tbl2")]
5366 )
5367
5368 (define_insn "neon_vtbx3v8qi"
5369 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5370 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5371 (match_operand:EI 2 "s_register_operand" "w")
5372 (match_operand:V8QI 3 "s_register_operand" "w")]
5373 UNSPEC_VTBX))]
5374 "TARGET_NEON"
5375 {
5376 rtx ops[5];
5377 int tabbase = REGNO (operands[2]);
5378
5379 ops[0] = operands[0];
5380 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5381 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5382 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5383 ops[4] = operands[3];
5384 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5385
5386 return "";
5387 }
5388 [(set_attr "type" "neon_tbl3")]
5389 )
5390
5391 (define_insn "neon_vtbx4v8qi"
5392 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5393 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5394 (match_operand:OI 2 "s_register_operand" "w")
5395 (match_operand:V8QI 3 "s_register_operand" "w")]
5396 UNSPEC_VTBX))]
5397 "TARGET_NEON"
5398 {
5399 rtx ops[6];
5400 int tabbase = REGNO (operands[2]);
5401
5402 ops[0] = operands[0];
5403 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5404 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5405 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5406 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5407 ops[5] = operands[3];
5408 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5409
5410 return "";
5411 }
5412 [(set_attr "type" "neon_tbl4")]
5413 )
5414
5415 (define_expand "@neon_vtrn<mode>_internal"
5416 [(parallel
5417 [(set (match_operand:VDQWH 0 "s_register_operand")
5418 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5419 (match_operand:VDQWH 2 "s_register_operand")]
5420 UNSPEC_VTRN1))
5421 (set (match_operand:VDQWH 3 "s_register_operand")
5422 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5423 "TARGET_NEON"
5424 ""
5425 )
5426
5427 ;; Note: Different operand numbering to handle tied registers correctly.
5428 (define_insn "*neon_vtrn<mode>_insn"
5429 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5430 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5431 (match_operand:VDQWH 3 "s_register_operand" "2")]
5432 UNSPEC_VTRN1))
5433 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5434 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5435 UNSPEC_VTRN2))]
5436 "TARGET_NEON"
5437 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5438 [(set_attr "type" "neon_permute<q>")]
5439 )
5440
5441 (define_expand "@neon_vzip<mode>_internal"
5442 [(parallel
5443 [(set (match_operand:VDQWH 0 "s_register_operand")
5444 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5445 (match_operand:VDQWH 2 "s_register_operand")]
5446 UNSPEC_VZIP1))
5447 (set (match_operand:VDQWH 3 "s_register_operand")
5448 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5449 "TARGET_NEON"
5450 ""
5451 )
5452
5453 ;; Note: Different operand numbering to handle tied registers correctly.
5454 (define_insn "*neon_vzip<mode>_insn"
5455 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5456 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5457 (match_operand:VDQWH 3 "s_register_operand" "2")]
5458 UNSPEC_VZIP1))
5459 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5460 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5461 UNSPEC_VZIP2))]
5462 "TARGET_NEON"
5463 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5464 [(set_attr "type" "neon_zip<q>")]
5465 )
5466
5467 (define_expand "@neon_vuzp<mode>_internal"
5468 [(parallel
5469 [(set (match_operand:VDQWH 0 "s_register_operand")
5470 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5471 (match_operand:VDQWH 2 "s_register_operand")]
5472 UNSPEC_VUZP1))
5473 (set (match_operand:VDQWH 3 "s_register_operand" "")
5474 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5475 "TARGET_NEON"
5476 ""
5477 )
5478
5479 ;; Note: Different operand numbering to handle tied registers correctly.
5480 (define_insn "*neon_vuzp<mode>_insn"
5481 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5482 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5483 (match_operand:VDQWH 3 "s_register_operand" "2")]
5484 UNSPEC_VUZP1))
5485 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5486 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5487 UNSPEC_VUZP2))]
5488 "TARGET_NEON"
5489 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5490 [(set_attr "type" "neon_zip<q>")]
5491 )
5492
5493 (define_expand "vec_load_lanes<mode><mode>"
5494 [(set (match_operand:VDQX 0 "s_register_operand")
5495 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5496 UNSPEC_VLD1))]
5497 "TARGET_NEON")
5498
5499 (define_insn "neon_vld1<mode>"
5500 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5501 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5502 UNSPEC_VLD1))]
5503 "TARGET_NEON"
5504 "vld1.<V_sz_elem>\t%h0, %A1"
5505 [(set_attr "type" "neon_load1_1reg<q>")]
5506 )
5507
5508 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5509 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5510 ;; lane order here.
5511 (define_insn "neon_vld1_lane<mode>"
5512 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5513 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5514 (match_operand:VDX 2 "s_register_operand" "0")
5515 (match_operand:SI 3 "immediate_operand" "i")]
5516 UNSPEC_VLD1_LANE))]
5517 "TARGET_NEON"
5518 {
5519 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5520 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5521 operands[3] = GEN_INT (lane);
5522 if (max == 1)
5523 return "vld1.<V_sz_elem>\t%P0, %A1";
5524 else
5525 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5526 }
5527 [(set_attr "type" "neon_load1_one_lane<q>")]
5528 )
5529
5530 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5531 ;; here on big endian targets.
5532 (define_insn "neon_vld1_lane<mode>"
5533 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5534 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5535 (match_operand:VQX 2 "s_register_operand" "0")
5536 (match_operand:SI 3 "immediate_operand" "i")]
5537 UNSPEC_VLD1_LANE))]
5538 "TARGET_NEON"
5539 {
5540 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5541 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5542 operands[3] = GEN_INT (lane);
5543 int regno = REGNO (operands[0]);
5544 if (lane >= max / 2)
5545 {
5546 lane -= max / 2;
5547 regno += 2;
5548 operands[3] = GEN_INT (lane);
5549 }
5550 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5551 if (max == 2)
5552 return "vld1.<V_sz_elem>\t%P0, %A1";
5553 else
5554 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5555 }
5556 [(set_attr "type" "neon_load1_one_lane<q>")]
5557 )
5558
5559 (define_insn "neon_vld1_dup<mode>"
5560 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5561 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5562 "TARGET_NEON"
5563 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5564 [(set_attr "type" "neon_load1_all_lanes<q>")]
5565 )
5566
5567 ;; Special case for DImode. Treat it exactly like a simple load.
5568 (define_expand "neon_vld1_dupdi"
5569 [(set (match_operand:DI 0 "s_register_operand" "")
5570 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5571 UNSPEC_VLD1))]
5572 "TARGET_NEON"
5573 ""
5574 )
5575
5576 (define_insn "neon_vld1_dup<mode>"
5577 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5578 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5579 "TARGET_NEON"
5580 {
5581 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5582 }
5583 [(set_attr "type" "neon_load1_all_lanes<q>")]
5584 )
5585
5586 (define_insn_and_split "neon_vld1_dupv2di"
5587 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5588 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5589 "TARGET_NEON"
5590 "#"
5591 "&& reload_completed"
5592 [(const_int 0)]
5593 {
5594 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5595 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5596 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5597 DONE;
5598 }
5599 [(set_attr "length" "8")
5600 (set_attr "type" "neon_load1_all_lanes_q")]
5601 )
5602
5603 (define_expand "vec_store_lanes<mode><mode>"
5604 [(set (match_operand:VDQX 0 "neon_struct_operand")
5605 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5606 UNSPEC_VST1))]
5607 "TARGET_NEON")
5608
5609 (define_insn "neon_vst1<mode>"
5610 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5611 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5612 UNSPEC_VST1))]
5613 "TARGET_NEON"
5614 "vst1.<V_sz_elem>\t%h1, %A0"
5615 [(set_attr "type" "neon_store1_1reg<q>")])
5616
5617 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5618 ;; here on big endian targets.
5619 (define_insn "neon_vst1_lane<mode>"
5620 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5621 (unspec:<V_elem>
5622 [(match_operand:VDX 1 "s_register_operand" "w")
5623 (match_operand:SI 2 "immediate_operand" "i")]
5624 UNSPEC_VST1_LANE))]
5625 "TARGET_NEON"
5626 {
5627 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5628 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5629 operands[2] = GEN_INT (lane);
5630 if (max == 1)
5631 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5632 else
5633 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5634 }
5635 [(set_attr "type" "neon_store1_one_lane<q>")]
5636 )
5637
5638 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5639 ;; here on big endian targets.
5640 (define_insn "neon_vst1_lane<mode>"
5641 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5642 (unspec:<V_elem>
5643 [(match_operand:VQX 1 "s_register_operand" "w")
5644 (match_operand:SI 2 "immediate_operand" "i")]
5645 UNSPEC_VST1_LANE))]
5646 "TARGET_NEON"
5647 {
5648 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5649 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5650 int regno = REGNO (operands[1]);
5651 if (lane >= max / 2)
5652 {
5653 lane -= max / 2;
5654 regno += 2;
5655 }
5656 operands[2] = GEN_INT (lane);
5657 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5658 if (max == 2)
5659 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5660 else
5661 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5662 }
5663 [(set_attr "type" "neon_store1_one_lane<q>")]
5664 )
5665
5666 (define_expand "vec_load_lanesti<mode>"
5667 [(set (match_operand:TI 0 "s_register_operand")
5668 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5669 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5670 UNSPEC_VLD2))]
5671 "TARGET_NEON")
5672
5673 (define_insn "neon_vld2<mode>"
5674 [(set (match_operand:TI 0 "s_register_operand" "=w")
5675 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5676 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5677 UNSPEC_VLD2))]
5678 "TARGET_NEON"
5679 {
5680 if (<V_sz_elem> == 64)
5681 return "vld1.64\t%h0, %A1";
5682 else
5683 return "vld2.<V_sz_elem>\t%h0, %A1";
5684 }
5685 [(set (attr "type")
5686 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5687 (const_string "neon_load1_2reg<q>")
5688 (const_string "neon_load2_2reg<q>")))]
5689 )
5690
5691 (define_expand "vec_load_lanesoi<mode>"
5692 [(set (match_operand:OI 0 "s_register_operand")
5693 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5694 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5695 UNSPEC_VLD2))]
5696 "TARGET_NEON")
5697
5698 (define_insn "neon_vld2<mode>"
5699 [(set (match_operand:OI 0 "s_register_operand" "=w")
5700 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5701 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5702 UNSPEC_VLD2))]
5703 "TARGET_NEON"
5704 "vld2.<V_sz_elem>\t%h0, %A1"
5705 [(set_attr "type" "neon_load2_2reg_q")])
5706
5707 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5708 ;; here on big endian targets.
5709 (define_insn "neon_vld2_lane<mode>"
5710 [(set (match_operand:TI 0 "s_register_operand" "=w")
5711 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5712 (match_operand:TI 2 "s_register_operand" "0")
5713 (match_operand:SI 3 "immediate_operand" "i")
5714 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5715 UNSPEC_VLD2_LANE))]
5716 "TARGET_NEON"
5717 {
5718 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5719 int regno = REGNO (operands[0]);
5720 rtx ops[4];
5721 ops[0] = gen_rtx_REG (DImode, regno);
5722 ops[1] = gen_rtx_REG (DImode, regno + 2);
5723 ops[2] = operands[1];
5724 ops[3] = GEN_INT (lane);
5725 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5726 return "";
5727 }
5728 [(set_attr "type" "neon_load2_one_lane<q>")]
5729 )
5730
5731 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5732 ;; here on big endian targets.
5733 (define_insn "neon_vld2_lane<mode>"
5734 [(set (match_operand:OI 0 "s_register_operand" "=w")
5735 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5736 (match_operand:OI 2 "s_register_operand" "0")
5737 (match_operand:SI 3 "immediate_operand" "i")
5738 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5739 UNSPEC_VLD2_LANE))]
5740 "TARGET_NEON"
5741 {
5742 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5743 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5744 int regno = REGNO (operands[0]);
5745 rtx ops[4];
5746 if (lane >= max / 2)
5747 {
5748 lane -= max / 2;
5749 regno += 2;
5750 }
5751 ops[0] = gen_rtx_REG (DImode, regno);
5752 ops[1] = gen_rtx_REG (DImode, regno + 4);
5753 ops[2] = operands[1];
5754 ops[3] = GEN_INT (lane);
5755 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5756 return "";
5757 }
5758 [(set_attr "type" "neon_load2_one_lane<q>")]
5759 )
5760
5761 (define_insn "neon_vld2_dup<mode>"
5762 [(set (match_operand:TI 0 "s_register_operand" "=w")
5763 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5764 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5765 UNSPEC_VLD2_DUP))]
5766 "TARGET_NEON"
5767 {
5768 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5769 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5770 else
5771 return "vld1.<V_sz_elem>\t%h0, %A1";
5772 }
5773 [(set (attr "type")
5774 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5775 (const_string "neon_load2_all_lanes<q>")
5776 (const_string "neon_load1_1reg<q>")))]
5777 )
5778
5779 (define_expand "vec_store_lanesti<mode>"
5780 [(set (match_operand:TI 0 "neon_struct_operand")
5781 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5782 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5783 UNSPEC_VST2))]
5784 "TARGET_NEON")
5785
5786 (define_insn "neon_vst2<mode>"
5787 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5788 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5789 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5790 UNSPEC_VST2))]
5791 "TARGET_NEON"
5792 {
5793 if (<V_sz_elem> == 64)
5794 return "vst1.64\t%h1, %A0";
5795 else
5796 return "vst2.<V_sz_elem>\t%h1, %A0";
5797 }
5798 [(set (attr "type")
5799 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5800 (const_string "neon_store1_2reg<q>")
5801 (const_string "neon_store2_one_lane<q>")))]
5802 )
5803
5804 (define_expand "vec_store_lanesoi<mode>"
5805 [(set (match_operand:OI 0 "neon_struct_operand")
5806 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5807 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5808 UNSPEC_VST2))]
5809 "TARGET_NEON")
5810
5811 (define_insn "neon_vst2<mode>"
5812 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5813 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5814 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5815 UNSPEC_VST2))]
5816 "TARGET_NEON"
5817 "vst2.<V_sz_elem>\t%h1, %A0"
5818 [(set_attr "type" "neon_store2_4reg<q>")]
5819 )
5820
5821 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5822 ;; here on big endian targets.
5823 (define_insn "neon_vst2_lane<mode>"
5824 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5825 (unspec:<V_two_elem>
5826 [(match_operand:TI 1 "s_register_operand" "w")
5827 (match_operand:SI 2 "immediate_operand" "i")
5828 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829 UNSPEC_VST2_LANE))]
5830 "TARGET_NEON"
5831 {
5832 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5833 int regno = REGNO (operands[1]);
5834 rtx ops[4];
5835 ops[0] = operands[0];
5836 ops[1] = gen_rtx_REG (DImode, regno);
5837 ops[2] = gen_rtx_REG (DImode, regno + 2);
5838 ops[3] = GEN_INT (lane);
5839 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5840 return "";
5841 }
5842 [(set_attr "type" "neon_store2_one_lane<q>")]
5843 )
5844
5845 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5846 ;; here on big endian targets.
5847 (define_insn "neon_vst2_lane<mode>"
5848 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5849 (unspec:<V_two_elem>
5850 [(match_operand:OI 1 "s_register_operand" "w")
5851 (match_operand:SI 2 "immediate_operand" "i")
5852 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5853 UNSPEC_VST2_LANE))]
5854 "TARGET_NEON"
5855 {
5856 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5857 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5858 int regno = REGNO (operands[1]);
5859 rtx ops[4];
5860 if (lane >= max / 2)
5861 {
5862 lane -= max / 2;
5863 regno += 2;
5864 }
5865 ops[0] = operands[0];
5866 ops[1] = gen_rtx_REG (DImode, regno);
5867 ops[2] = gen_rtx_REG (DImode, regno + 4);
5868 ops[3] = GEN_INT (lane);
5869 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5870 return "";
5871 }
5872 [(set_attr "type" "neon_store2_one_lane<q>")]
5873 )
5874
5875 (define_expand "vec_load_lanesei<mode>"
5876 [(set (match_operand:EI 0 "s_register_operand")
5877 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5878 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5879 UNSPEC_VLD3))]
5880 "TARGET_NEON")
5881
5882 (define_insn "neon_vld3<mode>"
5883 [(set (match_operand:EI 0 "s_register_operand" "=w")
5884 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5885 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5886 UNSPEC_VLD3))]
5887 "TARGET_NEON"
5888 {
5889 if (<V_sz_elem> == 64)
5890 return "vld1.64\t%h0, %A1";
5891 else
5892 return "vld3.<V_sz_elem>\t%h0, %A1";
5893 }
5894 [(set (attr "type")
5895 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5896 (const_string "neon_load1_3reg<q>")
5897 (const_string "neon_load3_3reg<q>")))]
5898 )
5899
5900 (define_expand "vec_load_lanesci<mode>"
5901 [(match_operand:CI 0 "s_register_operand")
5902 (match_operand:CI 1 "neon_struct_operand")
5903 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5904 "TARGET_NEON"
5905 {
5906 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5907 DONE;
5908 })
5909
5910 (define_expand "neon_vld3<mode>"
5911 [(match_operand:CI 0 "s_register_operand")
5912 (match_operand:CI 1 "neon_struct_operand")
5913 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5914 "TARGET_NEON"
5915 {
5916 rtx mem;
5917
5918 mem = adjust_address (operands[1], EImode, 0);
5919 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5920 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5921 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5922 DONE;
5923 })
5924
5925 (define_insn "neon_vld3qa<mode>"
5926 [(set (match_operand:CI 0 "s_register_operand" "=w")
5927 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5928 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5929 UNSPEC_VLD3A))]
5930 "TARGET_NEON"
5931 {
5932 int regno = REGNO (operands[0]);
5933 rtx ops[4];
5934 ops[0] = gen_rtx_REG (DImode, regno);
5935 ops[1] = gen_rtx_REG (DImode, regno + 4);
5936 ops[2] = gen_rtx_REG (DImode, regno + 8);
5937 ops[3] = operands[1];
5938 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5939 return "";
5940 }
5941 [(set_attr "type" "neon_load3_3reg<q>")]
5942 )
5943
5944 (define_insn "neon_vld3qb<mode>"
5945 [(set (match_operand:CI 0 "s_register_operand" "=w")
5946 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5947 (match_operand:CI 2 "s_register_operand" "0")
5948 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5949 UNSPEC_VLD3B))]
5950 "TARGET_NEON"
5951 {
5952 int regno = REGNO (operands[0]);
5953 rtx ops[4];
5954 ops[0] = gen_rtx_REG (DImode, regno + 2);
5955 ops[1] = gen_rtx_REG (DImode, regno + 6);
5956 ops[2] = gen_rtx_REG (DImode, regno + 10);
5957 ops[3] = operands[1];
5958 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5959 return "";
5960 }
5961 [(set_attr "type" "neon_load3_3reg<q>")]
5962 )
5963
5964 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5965 ;; here on big endian targets.
5966 (define_insn "neon_vld3_lane<mode>"
5967 [(set (match_operand:EI 0 "s_register_operand" "=w")
5968 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5969 (match_operand:EI 2 "s_register_operand" "0")
5970 (match_operand:SI 3 "immediate_operand" "i")
5971 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5972 UNSPEC_VLD3_LANE))]
5973 "TARGET_NEON"
5974 {
5975 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5976 int regno = REGNO (operands[0]);
5977 rtx ops[5];
5978 ops[0] = gen_rtx_REG (DImode, regno);
5979 ops[1] = gen_rtx_REG (DImode, regno + 2);
5980 ops[2] = gen_rtx_REG (DImode, regno + 4);
5981 ops[3] = operands[1];
5982 ops[4] = GEN_INT (lane);
5983 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5984 ops);
5985 return "";
5986 }
5987 [(set_attr "type" "neon_load3_one_lane<q>")]
5988 )
5989
5990 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5991 ;; here on big endian targets.
5992 (define_insn "neon_vld3_lane<mode>"
5993 [(set (match_operand:CI 0 "s_register_operand" "=w")
5994 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5995 (match_operand:CI 2 "s_register_operand" "0")
5996 (match_operand:SI 3 "immediate_operand" "i")
5997 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5998 UNSPEC_VLD3_LANE))]
5999 "TARGET_NEON"
6000 {
6001 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6002 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6003 int regno = REGNO (operands[0]);
6004 rtx ops[5];
6005 if (lane >= max / 2)
6006 {
6007 lane -= max / 2;
6008 regno += 2;
6009 }
6010 ops[0] = gen_rtx_REG (DImode, regno);
6011 ops[1] = gen_rtx_REG (DImode, regno + 4);
6012 ops[2] = gen_rtx_REG (DImode, regno + 8);
6013 ops[3] = operands[1];
6014 ops[4] = GEN_INT (lane);
6015 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
6016 ops);
6017 return "";
6018 }
6019 [(set_attr "type" "neon_load3_one_lane<q>")]
6020 )
6021
6022 (define_insn "neon_vld3_dup<mode>"
6023 [(set (match_operand:EI 0 "s_register_operand" "=w")
6024 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
6025 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6026 UNSPEC_VLD3_DUP))]
6027 "TARGET_NEON"
6028 {
6029 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6030 {
6031 int regno = REGNO (operands[0]);
6032 rtx ops[4];
6033 ops[0] = gen_rtx_REG (DImode, regno);
6034 ops[1] = gen_rtx_REG (DImode, regno + 2);
6035 ops[2] = gen_rtx_REG (DImode, regno + 4);
6036 ops[3] = operands[1];
6037 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
6038 return "";
6039 }
6040 else
6041 return "vld1.<V_sz_elem>\t%h0, %A1";
6042 }
6043 [(set (attr "type")
6044 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6045 (const_string "neon_load3_all_lanes<q>")
6046 (const_string "neon_load1_1reg<q>")))])
6047
6048 (define_expand "vec_store_lanesei<mode>"
6049 [(set (match_operand:EI 0 "neon_struct_operand")
6050 (unspec:EI [(match_operand:EI 1 "s_register_operand")
6051 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6052 UNSPEC_VST3))]
6053 "TARGET_NEON")
6054
6055 (define_insn "neon_vst3<mode>"
6056 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6057 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
6058 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6059 UNSPEC_VST3))]
6060 "TARGET_NEON"
6061 {
6062 if (<V_sz_elem> == 64)
6063 return "vst1.64\t%h1, %A0";
6064 else
6065 return "vst3.<V_sz_elem>\t%h1, %A0";
6066 }
6067 [(set (attr "type")
6068 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6069 (const_string "neon_store1_3reg<q>")
6070 (const_string "neon_store3_one_lane<q>")))])
6071
6072 (define_expand "vec_store_lanesci<mode>"
6073 [(match_operand:CI 0 "neon_struct_operand")
6074 (match_operand:CI 1 "s_register_operand")
6075 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6076 "TARGET_NEON"
6077 {
6078 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
6079 DONE;
6080 })
6081
6082 (define_expand "neon_vst3<mode>"
6083 [(match_operand:CI 0 "neon_struct_operand")
6084 (match_operand:CI 1 "s_register_operand")
6085 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6086 "TARGET_NEON"
6087 {
6088 rtx mem;
6089
6090 mem = adjust_address (operands[0], EImode, 0);
6091 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
6092 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
6093 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
6094 DONE;
6095 })
6096
6097 (define_insn "neon_vst3qa<mode>"
6098 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6099 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6100 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6101 UNSPEC_VST3A))]
6102 "TARGET_NEON"
6103 {
6104 int regno = REGNO (operands[1]);
6105 rtx ops[4];
6106 ops[0] = operands[0];
6107 ops[1] = gen_rtx_REG (DImode, regno);
6108 ops[2] = gen_rtx_REG (DImode, regno + 4);
6109 ops[3] = gen_rtx_REG (DImode, regno + 8);
6110 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6111 return "";
6112 }
6113 [(set_attr "type" "neon_store3_3reg<q>")]
6114 )
6115
6116 (define_insn "neon_vst3qb<mode>"
6117 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6118 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6119 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6120 UNSPEC_VST3B))]
6121 "TARGET_NEON"
6122 {
6123 int regno = REGNO (operands[1]);
6124 rtx ops[4];
6125 ops[0] = operands[0];
6126 ops[1] = gen_rtx_REG (DImode, regno + 2);
6127 ops[2] = gen_rtx_REG (DImode, regno + 6);
6128 ops[3] = gen_rtx_REG (DImode, regno + 10);
6129 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6130 return "";
6131 }
6132 [(set_attr "type" "neon_store3_3reg<q>")]
6133 )
6134
6135 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6136 ;; here on big endian targets.
6137 (define_insn "neon_vst3_lane<mode>"
6138 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6139 (unspec:<V_three_elem>
6140 [(match_operand:EI 1 "s_register_operand" "w")
6141 (match_operand:SI 2 "immediate_operand" "i")
6142 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6143 UNSPEC_VST3_LANE))]
6144 "TARGET_NEON"
6145 {
6146 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6147 int regno = REGNO (operands[1]);
6148 rtx ops[5];
6149 ops[0] = operands[0];
6150 ops[1] = gen_rtx_REG (DImode, regno);
6151 ops[2] = gen_rtx_REG (DImode, regno + 2);
6152 ops[3] = gen_rtx_REG (DImode, regno + 4);
6153 ops[4] = GEN_INT (lane);
6154 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6155 ops);
6156 return "";
6157 }
6158 [(set_attr "type" "neon_store3_one_lane<q>")]
6159 )
6160
6161 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6162 ;; here on big endian targets.
6163 (define_insn "neon_vst3_lane<mode>"
6164 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6165 (unspec:<V_three_elem>
6166 [(match_operand:CI 1 "s_register_operand" "w")
6167 (match_operand:SI 2 "immediate_operand" "i")
6168 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6169 UNSPEC_VST3_LANE))]
6170 "TARGET_NEON"
6171 {
6172 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6173 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6174 int regno = REGNO (operands[1]);
6175 rtx ops[5];
6176 if (lane >= max / 2)
6177 {
6178 lane -= max / 2;
6179 regno += 2;
6180 }
6181 ops[0] = operands[0];
6182 ops[1] = gen_rtx_REG (DImode, regno);
6183 ops[2] = gen_rtx_REG (DImode, regno + 4);
6184 ops[3] = gen_rtx_REG (DImode, regno + 8);
6185 ops[4] = GEN_INT (lane);
6186 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6187 ops);
6188 return "";
6189 }
6190 [(set_attr "type" "neon_store3_one_lane<q>")]
6191 )
6192
6193 (define_expand "vec_load_lanesoi<mode>"
6194 [(set (match_operand:OI 0 "s_register_operand")
6195 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6196 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6197 UNSPEC_VLD4))]
6198 "TARGET_NEON")
6199
6200 (define_insn "neon_vld4<mode>"
6201 [(set (match_operand:OI 0 "s_register_operand" "=w")
6202 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6203 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6204 UNSPEC_VLD4))]
6205 "TARGET_NEON"
6206 {
6207 if (<V_sz_elem> == 64)
6208 return "vld1.64\t%h0, %A1";
6209 else
6210 return "vld4.<V_sz_elem>\t%h0, %A1";
6211 }
6212 [(set (attr "type")
6213 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6214 (const_string "neon_load1_4reg<q>")
6215 (const_string "neon_load4_4reg<q>")))]
6216 )
6217
6218 (define_expand "vec_load_lanesxi<mode>"
6219 [(match_operand:XI 0 "s_register_operand")
6220 (match_operand:XI 1 "neon_struct_operand")
6221 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6222 "TARGET_NEON"
6223 {
6224 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6225 DONE;
6226 })
6227
6228 (define_expand "neon_vld4<mode>"
6229 [(match_operand:XI 0 "s_register_operand")
6230 (match_operand:XI 1 "neon_struct_operand")
6231 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6232 "TARGET_NEON"
6233 {
6234 rtx mem;
6235
6236 mem = adjust_address (operands[1], OImode, 0);
6237 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6238 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6239 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6240 DONE;
6241 })
6242
6243 (define_insn "neon_vld4qa<mode>"
6244 [(set (match_operand:XI 0 "s_register_operand" "=w")
6245 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6246 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6247 UNSPEC_VLD4A))]
6248 "TARGET_NEON"
6249 {
6250 int regno = REGNO (operands[0]);
6251 rtx ops[5];
6252 ops[0] = gen_rtx_REG (DImode, regno);
6253 ops[1] = gen_rtx_REG (DImode, regno + 4);
6254 ops[2] = gen_rtx_REG (DImode, regno + 8);
6255 ops[3] = gen_rtx_REG (DImode, regno + 12);
6256 ops[4] = operands[1];
6257 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6258 return "";
6259 }
6260 [(set_attr "type" "neon_load4_4reg<q>")]
6261 )
6262
6263 (define_insn "neon_vld4qb<mode>"
6264 [(set (match_operand:XI 0 "s_register_operand" "=w")
6265 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6266 (match_operand:XI 2 "s_register_operand" "0")
6267 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6268 UNSPEC_VLD4B))]
6269 "TARGET_NEON"
6270 {
6271 int regno = REGNO (operands[0]);
6272 rtx ops[5];
6273 ops[0] = gen_rtx_REG (DImode, regno + 2);
6274 ops[1] = gen_rtx_REG (DImode, regno + 6);
6275 ops[2] = gen_rtx_REG (DImode, regno + 10);
6276 ops[3] = gen_rtx_REG (DImode, regno + 14);
6277 ops[4] = operands[1];
6278 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6279 return "";
6280 }
6281 [(set_attr "type" "neon_load4_4reg<q>")]
6282 )
6283
6284 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6285 ;; here on big endian targets.
6286 (define_insn "neon_vld4_lane<mode>"
6287 [(set (match_operand:OI 0 "s_register_operand" "=w")
6288 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6289 (match_operand:OI 2 "s_register_operand" "0")
6290 (match_operand:SI 3 "immediate_operand" "i")
6291 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6292 UNSPEC_VLD4_LANE))]
6293 "TARGET_NEON"
6294 {
6295 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6296 int regno = REGNO (operands[0]);
6297 rtx ops[6];
6298 ops[0] = gen_rtx_REG (DImode, regno);
6299 ops[1] = gen_rtx_REG (DImode, regno + 2);
6300 ops[2] = gen_rtx_REG (DImode, regno + 4);
6301 ops[3] = gen_rtx_REG (DImode, regno + 6);
6302 ops[4] = operands[1];
6303 ops[5] = GEN_INT (lane);
6304 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6305 ops);
6306 return "";
6307 }
6308 [(set_attr "type" "neon_load4_one_lane<q>")]
6309 )
6310
6311 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6312 ;; here on big endian targets.
6313 (define_insn "neon_vld4_lane<mode>"
6314 [(set (match_operand:XI 0 "s_register_operand" "=w")
6315 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6316 (match_operand:XI 2 "s_register_operand" "0")
6317 (match_operand:SI 3 "immediate_operand" "i")
6318 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6319 UNSPEC_VLD4_LANE))]
6320 "TARGET_NEON"
6321 {
6322 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6323 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6324 int regno = REGNO (operands[0]);
6325 rtx ops[6];
6326 if (lane >= max / 2)
6327 {
6328 lane -= max / 2;
6329 regno += 2;
6330 }
6331 ops[0] = gen_rtx_REG (DImode, regno);
6332 ops[1] = gen_rtx_REG (DImode, regno + 4);
6333 ops[2] = gen_rtx_REG (DImode, regno + 8);
6334 ops[3] = gen_rtx_REG (DImode, regno + 12);
6335 ops[4] = operands[1];
6336 ops[5] = GEN_INT (lane);
6337 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6338 ops);
6339 return "";
6340 }
6341 [(set_attr "type" "neon_load4_one_lane<q>")]
6342 )
6343
6344 (define_insn "neon_vld4_dup<mode>"
6345 [(set (match_operand:OI 0 "s_register_operand" "=w")
6346 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6347 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6348 UNSPEC_VLD4_DUP))]
6349 "TARGET_NEON"
6350 {
6351 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6352 {
6353 int regno = REGNO (operands[0]);
6354 rtx ops[5];
6355 ops[0] = gen_rtx_REG (DImode, regno);
6356 ops[1] = gen_rtx_REG (DImode, regno + 2);
6357 ops[2] = gen_rtx_REG (DImode, regno + 4);
6358 ops[3] = gen_rtx_REG (DImode, regno + 6);
6359 ops[4] = operands[1];
6360 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6361 ops);
6362 return "";
6363 }
6364 else
6365 return "vld1.<V_sz_elem>\t%h0, %A1";
6366 }
6367 [(set (attr "type")
6368 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6369 (const_string "neon_load4_all_lanes<q>")
6370 (const_string "neon_load1_1reg<q>")))]
6371 )
6372
6373 (define_expand "vec_store_lanesoi<mode>"
6374 [(set (match_operand:OI 0 "neon_struct_operand")
6375 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6376 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6377 UNSPEC_VST4))]
6378 "TARGET_NEON")
6379
6380 (define_insn "neon_vst4<mode>"
6381 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6382 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6383 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6384 UNSPEC_VST4))]
6385 "TARGET_NEON"
6386 {
6387 if (<V_sz_elem> == 64)
6388 return "vst1.64\t%h1, %A0";
6389 else
6390 return "vst4.<V_sz_elem>\t%h1, %A0";
6391 }
6392 [(set (attr "type")
6393 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6394 (const_string "neon_store1_4reg<q>")
6395 (const_string "neon_store4_4reg<q>")))]
6396 )
6397
6398 (define_expand "vec_store_lanesxi<mode>"
6399 [(match_operand:XI 0 "neon_struct_operand")
6400 (match_operand:XI 1 "s_register_operand")
6401 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6402 "TARGET_NEON"
6403 {
6404 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6405 DONE;
6406 })
6407
6408 (define_expand "neon_vst4<mode>"
6409 [(match_operand:XI 0 "neon_struct_operand")
6410 (match_operand:XI 1 "s_register_operand")
6411 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6412 "TARGET_NEON"
6413 {
6414 rtx mem;
6415
6416 mem = adjust_address (operands[0], OImode, 0);
6417 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6418 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6419 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6420 DONE;
6421 })
6422
6423 (define_insn "neon_vst4qa<mode>"
6424 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6425 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6426 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6427 UNSPEC_VST4A))]
6428 "TARGET_NEON"
6429 {
6430 int regno = REGNO (operands[1]);
6431 rtx ops[5];
6432 ops[0] = operands[0];
6433 ops[1] = gen_rtx_REG (DImode, regno);
6434 ops[2] = gen_rtx_REG (DImode, regno + 4);
6435 ops[3] = gen_rtx_REG (DImode, regno + 8);
6436 ops[4] = gen_rtx_REG (DImode, regno + 12);
6437 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6438 return "";
6439 }
6440 [(set_attr "type" "neon_store4_4reg<q>")]
6441 )
6442
6443 (define_insn "neon_vst4qb<mode>"
6444 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6445 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6446 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6447 UNSPEC_VST4B))]
6448 "TARGET_NEON"
6449 {
6450 int regno = REGNO (operands[1]);
6451 rtx ops[5];
6452 ops[0] = operands[0];
6453 ops[1] = gen_rtx_REG (DImode, regno + 2);
6454 ops[2] = gen_rtx_REG (DImode, regno + 6);
6455 ops[3] = gen_rtx_REG (DImode, regno + 10);
6456 ops[4] = gen_rtx_REG (DImode, regno + 14);
6457 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6458 return "";
6459 }
6460 [(set_attr "type" "neon_store4_4reg<q>")]
6461 )
6462
6463 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6464 ;; here on big endian targets.
6465 (define_insn "neon_vst4_lane<mode>"
6466 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6467 (unspec:<V_four_elem>
6468 [(match_operand:OI 1 "s_register_operand" "w")
6469 (match_operand:SI 2 "immediate_operand" "i")
6470 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6471 UNSPEC_VST4_LANE))]
6472 "TARGET_NEON"
6473 {
6474 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6475 int regno = REGNO (operands[1]);
6476 rtx ops[6];
6477 ops[0] = operands[0];
6478 ops[1] = gen_rtx_REG (DImode, regno);
6479 ops[2] = gen_rtx_REG (DImode, regno + 2);
6480 ops[3] = gen_rtx_REG (DImode, regno + 4);
6481 ops[4] = gen_rtx_REG (DImode, regno + 6);
6482 ops[5] = GEN_INT (lane);
6483 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6484 ops);
6485 return "";
6486 }
6487 [(set_attr "type" "neon_store4_one_lane<q>")]
6488 )
6489
6490 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6491 ;; here on big endian targets.
6492 (define_insn "neon_vst4_lane<mode>"
6493 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6494 (unspec:<V_four_elem>
6495 [(match_operand:XI 1 "s_register_operand" "w")
6496 (match_operand:SI 2 "immediate_operand" "i")
6497 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6498 UNSPEC_VST4_LANE))]
6499 "TARGET_NEON"
6500 {
6501 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6502 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6503 int regno = REGNO (operands[1]);
6504 rtx ops[6];
6505 if (lane >= max / 2)
6506 {
6507 lane -= max / 2;
6508 regno += 2;
6509 }
6510 ops[0] = operands[0];
6511 ops[1] = gen_rtx_REG (DImode, regno);
6512 ops[2] = gen_rtx_REG (DImode, regno + 4);
6513 ops[3] = gen_rtx_REG (DImode, regno + 8);
6514 ops[4] = gen_rtx_REG (DImode, regno + 12);
6515 ops[5] = GEN_INT (lane);
6516 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6517 ops);
6518 return "";
6519 }
6520 [(set_attr "type" "neon_store4_4reg<q>")]
6521 )
6522
6523 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6524 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6525 (SE:<V_unpack> (vec_select:<V_HALF>
6526 (match_operand:VU 1 "register_operand" "w")
6527 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6528 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6529 "vmovl.<US><V_sz_elem> %q0, %e1"
6530 [(set_attr "type" "neon_shift_imm_long")]
6531 )
6532
6533 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6534 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6535 (SE:<V_unpack> (vec_select:<V_HALF>
6536 (match_operand:VU 1 "register_operand" "w")
6537 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6538 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6539 "vmovl.<US><V_sz_elem> %q0, %f1"
6540 [(set_attr "type" "neon_shift_imm_long")]
6541 )
6542
6543 (define_expand "vec_unpack<US>_hi_<mode>"
6544 [(match_operand:<V_unpack> 0 "register_operand" "")
6545 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6546 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6547 {
6548 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6549 rtx t1;
6550 int i;
6551 for (i = 0; i < (<V_mode_nunits>/2); i++)
6552 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6553
6554 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6555 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6556 operands[1],
6557 t1));
6558 DONE;
6559 }
6560 )
6561
6562 (define_expand "vec_unpack<US>_lo_<mode>"
6563 [(match_operand:<V_unpack> 0 "register_operand" "")
6564 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6565 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6566 {
6567 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6568 rtx t1;
6569 int i;
6570 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6571 RTVEC_ELT (v, i) = GEN_INT (i);
6572 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6573 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6574 operands[1],
6575 t1));
6576 DONE;
6577 }
6578 )
6579
6580 (define_insn "neon_vec_<US>mult_lo_<mode>"
6581 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6582 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6583 (match_operand:VU 1 "register_operand" "w")
6584 (match_operand:VU 2 "vect_par_constant_low" "")))
6585 (SE:<V_unpack> (vec_select:<V_HALF>
6586 (match_operand:VU 3 "register_operand" "w")
6587 (match_dup 2)))))]
6588 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6589 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6590 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6591 )
6592
6593 (define_expand "vec_widen_<US>mult_lo_<mode>"
6594 [(match_operand:<V_unpack> 0 "register_operand" "")
6595 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6596 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6597 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6598 {
6599 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6600 rtx t1;
6601 int i;
6602 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6603 RTVEC_ELT (v, i) = GEN_INT (i);
6604 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6605
6606 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6607 operands[1],
6608 t1,
6609 operands[2]));
6610 DONE;
6611 }
6612 )
6613
6614 (define_insn "neon_vec_<US>mult_hi_<mode>"
6615 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6616 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6617 (match_operand:VU 1 "register_operand" "w")
6618 (match_operand:VU 2 "vect_par_constant_high" "")))
6619 (SE:<V_unpack> (vec_select:<V_HALF>
6620 (match_operand:VU 3 "register_operand" "w")
6621 (match_dup 2)))))]
6622 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6623 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6624 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6625 )
6626
6627 (define_expand "vec_widen_<US>mult_hi_<mode>"
6628 [(match_operand:<V_unpack> 0 "register_operand" "")
6629 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6630 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6631 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6632 {
6633 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6634 rtx t1;
6635 int i;
6636 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6637 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6638 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6639
6640 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6641 operands[1],
6642 t1,
6643 operands[2]));
6644 DONE;
6645
6646 }
6647 )
6648
6649 (define_insn "neon_vec_<US>shiftl_<mode>"
6650 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6651 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6652 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6653 "TARGET_NEON"
6654 {
6655 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6656 }
6657 [(set_attr "type" "neon_shift_imm_long")]
6658 )
6659
6660 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6661 [(match_operand:<V_unpack> 0 "register_operand" "")
6662 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6663 (match_operand:SI 2 "immediate_operand" "i")]
6664 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6665 {
6666 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6667 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6668 operands[2]));
6669 DONE;
6670 }
6671 )
6672
6673 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6674 [(match_operand:<V_unpack> 0 "register_operand" "")
6675 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6676 (match_operand:SI 2 "immediate_operand" "i")]
6677 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6678 {
6679 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6680 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6681 GET_MODE_SIZE (<V_HALF>mode)),
6682 operands[2]));
6683 DONE;
6684 }
6685 )
6686
6687 ;; Vectorize for non-neon-quad case
6688 (define_insn "neon_unpack<US>_<mode>"
6689 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6690 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6691 "TARGET_NEON"
6692 "vmovl.<US><V_sz_elem> %q0, %P1"
6693 [(set_attr "type" "neon_move")]
6694 )
6695
6696 (define_expand "vec_unpack<US>_lo_<mode>"
6697 [(match_operand:<V_double_width> 0 "register_operand" "")
6698 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6699 "TARGET_NEON"
6700 {
6701 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6702 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6703 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6704
6705 DONE;
6706 }
6707 )
6708
6709 (define_expand "vec_unpack<US>_hi_<mode>"
6710 [(match_operand:<V_double_width> 0 "register_operand" "")
6711 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6712 "TARGET_NEON"
6713 {
6714 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6715 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6716 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6717
6718 DONE;
6719 }
6720 )
6721
6722 (define_insn "neon_vec_<US>mult_<mode>"
6723 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6724 (mult:<V_widen> (SE:<V_widen>
6725 (match_operand:VDI 1 "register_operand" "w"))
6726 (SE:<V_widen>
6727 (match_operand:VDI 2 "register_operand" "w"))))]
6728 "TARGET_NEON"
6729 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6730 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6731 )
6732
6733 (define_expand "vec_widen_<US>mult_hi_<mode>"
6734 [(match_operand:<V_double_width> 0 "register_operand" "")
6735 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6736 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6737 "TARGET_NEON"
6738 {
6739 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6740 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6741 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6742
6743 DONE;
6744
6745 }
6746 )
6747
6748 (define_expand "vec_widen_<US>mult_lo_<mode>"
6749 [(match_operand:<V_double_width> 0 "register_operand" "")
6750 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6751 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6752 "TARGET_NEON"
6753 {
6754 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6755 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6756 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6757
6758 DONE;
6759
6760 }
6761 )
6762
6763 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6764 [(match_operand:<V_double_width> 0 "register_operand" "")
6765 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6766 (match_operand:SI 2 "immediate_operand" "i")]
6767 "TARGET_NEON"
6768 {
6769 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6770 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6771 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6772
6773 DONE;
6774 }
6775 )
6776
6777 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6778 [(match_operand:<V_double_width> 0 "register_operand" "")
6779 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6780 (match_operand:SI 2 "immediate_operand" "i")]
6781 "TARGET_NEON"
6782 {
6783 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6784 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6785 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6786
6787 DONE;
6788 }
6789 )
6790
6791 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6792 ; because the ordering of vector elements in Q registers is different from what
6793 ; the semantics of the instructions require.
6794
6795 (define_insn "vec_pack_trunc_<mode>"
6796 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6797 (vec_concat:<V_narrow_pack>
6798 (truncate:<V_narrow>
6799 (match_operand:VN 1 "register_operand" "w"))
6800 (truncate:<V_narrow>
6801 (match_operand:VN 2 "register_operand" "w"))))]
6802 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6803 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6804 [(set_attr "type" "multiple")
6805 (set_attr "length" "8")]
6806 )
6807
6808 ;; For the non-quad case.
6809 (define_insn "neon_vec_pack_trunc_<mode>"
6810 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6811 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6812 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6813 "vmovn.i<V_sz_elem>\t%P0, %q1"
6814 [(set_attr "type" "neon_move_narrow_q")]
6815 )
6816
6817 (define_expand "vec_pack_trunc_<mode>"
6818 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6819 (match_operand:VSHFT 1 "register_operand" "")
6820 (match_operand:VSHFT 2 "register_operand")]
6821 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6822 {
6823 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6824
6825 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6826 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6827 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6828 DONE;
6829 })
6830
6831 (define_insn "neon_vabd<mode>_2"
6832 [(set (match_operand:VF 0 "s_register_operand" "=w")
6833 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6834 (match_operand:VF 2 "s_register_operand" "w"))))]
6835 "TARGET_NEON && flag_unsafe_math_optimizations"
6836 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6837 [(set_attr "type" "neon_fp_abd_s<q>")]
6838 )
6839
6840 (define_insn "neon_vabd<mode>_3"
6841 [(set (match_operand:VF 0 "s_register_operand" "=w")
6842 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6843 (match_operand:VF 2 "s_register_operand" "w")]
6844 UNSPEC_VSUB)))]
6845 "TARGET_NEON && flag_unsafe_math_optimizations"
6846 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6847 [(set_attr "type" "neon_fp_abd_s<q>")]
6848 )
6849
6850 ;; Copy from core-to-neon regs, then extend, not vice-versa
6851
6852 (define_split
6853 [(set (match_operand:DI 0 "s_register_operand" "")
6854 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6855 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6856 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6857 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6858 {
6859 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6860 })
6861
6862 (define_split
6863 [(set (match_operand:DI 0 "s_register_operand" "")
6864 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6865 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6866 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6867 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6868 {
6869 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6870 })
6871
6872 (define_split
6873 [(set (match_operand:DI 0 "s_register_operand" "")
6874 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6875 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6876 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6877 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6878 {
6879 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6880 })
6881
6882 (define_split
6883 [(set (match_operand:DI 0 "s_register_operand" "")
6884 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6885 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6886 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6887 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6888 {
6889 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6890 })
6891
6892 (define_split
6893 [(set (match_operand:DI 0 "s_register_operand" "")
6894 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6895 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6896 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6897 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6898 {
6899 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6900 })
6901
6902 (define_split
6903 [(set (match_operand:DI 0 "s_register_operand" "")
6904 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6905 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6906 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6907 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6908 {
6909 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6910 })