[ARM] Implement support for ACLE Coprocessor CDP intrinsics
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
144 {
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ())
149 {
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 }
168 })
169
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
173 "TARGET_NEON
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
176 {
177 switch (which_alternative)
178 {
179 case 0: return "#";
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
182 }
183 }
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187 (define_split
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
193 {
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
196 rtx dest[2], src[2];
197
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203 neon_disambiguate_copy (operands, dest, src, 2);
204 })
205
206 (define_split
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
212 {
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
215 rtx dest[2], src[2];
216
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222 neon_disambiguate_copy (operands, dest, src, 2);
223 })
224
225 (define_split
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
232 {
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
235 rtx dest[3], src[3];
236
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244 neon_disambiguate_copy (operands, dest, src, 3);
245 })
246
247 (define_split
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
255 {
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
258 rtx dest[4], src[4];
259
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269 neon_disambiguate_copy (operands, dest, src, 4);
270 })
271
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277 {
278 rtx adjust_mem;
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
285
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
288 else
289 adjust_mem = operands[0];
290
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295 })
296
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308 " Um")]
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
313
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
321
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325 " Um")]
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
330
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333 (vec_merge:VD_LANE
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
338 "TARGET_NEON"
339 {
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
344
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347 else
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349 }
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354 (vec_merge:VQ2
355 (vec_duplicate:VQ2
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
359 "TARGET_NEON"
360 {
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
366
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
369
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
372
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375 else
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377 }
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379 )
380
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383 (vec_merge:V2DI
384 (vec_duplicate:V2DI
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
388 "TARGET_NEON"
389 {
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
392
393 operands[0] = gen_rtx_REG (DImode, regno);
394
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
397 else
398 return "vmov\t%P0, %Q1, %R1";
399 }
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401 )
402
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
407 "TARGET_NEON"
408 {
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
412 DONE;
413 })
414
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (vec_select:<V_elem>
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420 "TARGET_NEON"
421 {
422 if (BYTES_BIG_ENDIAN)
423 {
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
427 }
428
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 else
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 }
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435 )
436
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439 (vec_select:<V_elem>
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442 "TARGET_NEON"
443 {
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
448
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
451
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
454
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457 else
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459 }
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461 )
462
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465 (vec_select:DI
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468 "TARGET_NEON"
469 {
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472 operands[1] = gen_rtx_REG (DImode, regno);
473
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
476 else
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
478 }
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480 )
481
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
485 "TARGET_NEON"
486 {
487 neon_expand_vector_init (operands[0], operands[1]);
488 DONE;
489 })
490
491 ;; Doubleword and quadword arithmetic.
492
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
495
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502 [(set (attr "type")
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
506 )
507
508 (define_insn "add<mode>3_fp16"
509 [(set
510 (match_operand:VH 0 "s_register_operand" "=w")
511 (plus:VH
512 (match_operand:VH 1 "s_register_operand" "w")
513 (match_operand:VH 2 "s_register_operand" "w")))]
514 "TARGET_NEON_FP16INST"
515 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
516 [(set (attr "type")
517 (if_then_else (match_test "<Is_float_mode>")
518 (const_string "neon_fp_addsub_s<q>")
519 (const_string "neon_add<q>")))]
520 )
521
522 (define_insn "adddi3_neon"
523 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
524 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
525 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
526 (clobber (reg:CC CC_REGNUM))]
527 "TARGET_NEON"
528 {
529 switch (which_alternative)
530 {
531 case 0: /* fall through */
532 case 3: return "vadd.i64\t%P0, %P1, %P2";
533 case 1: return "#";
534 case 2: return "#";
535 case 4: return "#";
536 case 5: return "#";
537 case 6: return "#";
538 default: gcc_unreachable ();
539 }
540 }
541 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
542 multiple,multiple,multiple")
543 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
544 (set_attr "length" "*,8,8,*,8,8,8")
545 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
546 )
547
548 (define_insn "*sub<mode>3_neon"
549 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
550 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
551 (match_operand:VDQ 2 "s_register_operand" "w")))]
552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
553 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
554 [(set (attr "type")
555 (if_then_else (match_test "<Is_float_mode>")
556 (const_string "neon_fp_addsub_s<q>")
557 (const_string "neon_sub<q>")))]
558 )
559
560 (define_insn "sub<mode>3_fp16"
561 [(set
562 (match_operand:VH 0 "s_register_operand" "=w")
563 (minus:VH
564 (match_operand:VH 1 "s_register_operand" "w")
565 (match_operand:VH 2 "s_register_operand" "w")))]
566 "TARGET_NEON_FP16INST"
567 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
568 [(set_attr "type" "neon_sub<q>")]
569 )
570
571 (define_insn "subdi3_neon"
572 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
573 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
574 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
575 (clobber (reg:CC CC_REGNUM))]
576 "TARGET_NEON"
577 {
578 switch (which_alternative)
579 {
580 case 0: /* fall through */
581 case 4: return "vsub.i64\t%P0, %P1, %P2";
582 case 1: /* fall through */
583 case 2: /* fall through */
584 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
585 default: gcc_unreachable ();
586 }
587 }
588 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
589 (set_attr "conds" "*,clob,clob,clob,*")
590 (set_attr "length" "*,8,8,8,*")
591 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
592 )
593
594 (define_insn "*mul<mode>3_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
597 (match_operand:VDQW 2 "s_register_operand" "w")))]
598 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
599 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
600 [(set (attr "type")
601 (if_then_else (match_test "<Is_float_mode>")
602 (const_string "neon_fp_mul_s<q>")
603 (const_string "neon_mul_<V_elem_ch><q>")))]
604 )
605
606 (define_insn "mul<mode>3add<mode>_neon"
607 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
608 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
609 (match_operand:VDQW 3 "s_register_operand" "w"))
610 (match_operand:VDQW 1 "s_register_operand" "0")))]
611 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
612 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
613 [(set (attr "type")
614 (if_then_else (match_test "<Is_float_mode>")
615 (const_string "neon_fp_mla_s<q>")
616 (const_string "neon_mla_<V_elem_ch><q>")))]
617 )
618
619 (define_insn "mul<mode>3add<mode>_neon"
620 [(set (match_operand:VH 0 "s_register_operand" "=w")
621 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
622 (match_operand:VH 3 "s_register_operand" "w"))
623 (match_operand:VH 1 "s_register_operand" "0")))]
624 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
625 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
626 [(set_attr "type" "neon_fp_mla_s<q>")]
627 )
628
629 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
630 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
631 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
632 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
633 (match_operand:VDQW 3 "s_register_operand" "w"))))]
634 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
635 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
636 [(set (attr "type")
637 (if_then_else (match_test "<Is_float_mode>")
638 (const_string "neon_fp_mla_s<q>")
639 (const_string "neon_mla_<V_elem_ch><q>")))]
640 )
641
642 ;; Fused multiply-accumulate
643 ;; We define each insn twice here:
644 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
645 ;; to be able to use when converting to FMA.
646 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
647 (define_insn "fma<VCVTF:mode>4"
648 [(set (match_operand:VCVTF 0 "register_operand" "=w")
649 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
650 (match_operand:VCVTF 2 "register_operand" "w")
651 (match_operand:VCVTF 3 "register_operand" "0")))]
652 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
653 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
655 )
656
657 (define_insn "fma<VCVTF:mode>4_intrinsic"
658 [(set (match_operand:VCVTF 0 "register_operand" "=w")
659 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
660 (match_operand:VCVTF 2 "register_operand" "w")
661 (match_operand:VCVTF 3 "register_operand" "0")))]
662 "TARGET_NEON && TARGET_FMA"
663 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
664 [(set_attr "type" "neon_fp_mla_s<q>")]
665 )
666
667 ;; There is limited support for unsafe-math optimizations using the NEON FP16
668 ;; arithmetic instructions, so only the intrinsic is currently supported.
669 (define_insn "fma<VH:mode>4_intrinsic"
670 [(set (match_operand:VH 0 "register_operand" "=w")
671 (fma:VH
672 (match_operand:VH 1 "register_operand" "w")
673 (match_operand:VH 2 "register_operand" "w")
674 (match_operand:VH 3 "register_operand" "0")))]
675 "TARGET_NEON_FP16INST"
676 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
678 )
679
680 (define_insn "*fmsub<VCVTF:mode>4"
681 [(set (match_operand:VCVTF 0 "register_operand" "=w")
682 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
683 (match_operand:VCVTF 2 "register_operand" "w")
684 (match_operand:VCVTF 3 "register_operand" "0")))]
685 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
686 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
687 [(set_attr "type" "neon_fp_mla_s<q>")]
688 )
689
690 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
691 [(set (match_operand:VCVTF 0 "register_operand" "=w")
692 (fma:VCVTF
693 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
694 (match_operand:VCVTF 2 "register_operand" "w")
695 (match_operand:VCVTF 3 "register_operand" "0")))]
696 "TARGET_NEON && TARGET_FMA"
697 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
698 [(set_attr "type" "neon_fp_mla_s<q>")]
699 )
700
701 (define_insn "fmsub<VH:mode>4_intrinsic"
702 [(set (match_operand:VH 0 "register_operand" "=w")
703 (fma:VH
704 (neg:VH (match_operand:VH 1 "register_operand" "w"))
705 (match_operand:VH 2 "register_operand" "w")
706 (match_operand:VH 3 "register_operand" "0")))]
707 "TARGET_NEON_FP16INST"
708 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
709 [(set_attr "type" "neon_fp_mla_s<q>")]
710 )
711
712 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
713 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
714 (unspec:VCVTF [(match_operand:VCVTF 1
715 "s_register_operand" "w")]
716 NEON_VRINT))]
717 "TARGET_NEON && TARGET_FPU_ARMV8"
718 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
719 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
720 )
721
722 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
723 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
724 (FIXUORS:<V_cmp_result> (unspec:VCVTF
725 [(match_operand:VCVTF 1 "register_operand" "w")]
726 NEON_VCVT)))]
727 "TARGET_NEON && TARGET_FPU_ARMV8"
728 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
729 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
730 (set_attr "predicable" "no")]
731 )
732
733 (define_insn "ior<mode>3"
734 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
735 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
736 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
737 "TARGET_NEON"
738 {
739 switch (which_alternative)
740 {
741 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
742 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
743 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
744 default: gcc_unreachable ();
745 }
746 }
747 [(set_attr "type" "neon_logic<q>")]
748 )
749
750 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
751 ;; vorr. We support the pseudo-instruction vand instead, because that
752 ;; corresponds to the canonical form the middle-end expects to use for
753 ;; immediate bitwise-ANDs.
754
755 (define_insn "and<mode>3"
756 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
757 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
758 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
759 "TARGET_NEON"
760 {
761 switch (which_alternative)
762 {
763 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
764 case 1: return neon_output_logic_immediate ("vand", &operands[2],
765 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
766 default: gcc_unreachable ();
767 }
768 }
769 [(set_attr "type" "neon_logic<q>")]
770 )
771
772 (define_insn "orn<mode>3_neon"
773 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
774 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
775 (match_operand:VDQ 1 "s_register_operand" "w")))]
776 "TARGET_NEON"
777 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
778 [(set_attr "type" "neon_logic<q>")]
779 )
780
781 ;; TODO: investigate whether we should disable
782 ;; this and bicdi3_neon for the A8 in line with the other
783 ;; changes above.
784 (define_insn_and_split "orndi3_neon"
785 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
786 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
787 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
788 "TARGET_NEON"
789 "@
790 vorn\t%P0, %P1, %P2
791 #
792 #
793 #"
794 "reload_completed &&
795 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
796 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
797 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
798 "
799 {
800 if (TARGET_THUMB2)
801 {
802 operands[3] = gen_highpart (SImode, operands[0]);
803 operands[0] = gen_lowpart (SImode, operands[0]);
804 operands[4] = gen_highpart (SImode, operands[2]);
805 operands[2] = gen_lowpart (SImode, operands[2]);
806 operands[5] = gen_highpart (SImode, operands[1]);
807 operands[1] = gen_lowpart (SImode, operands[1]);
808 }
809 else
810 {
811 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
812 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
813 DONE;
814 }
815 }"
816 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
817 (set_attr "length" "*,16,8,8")
818 (set_attr "arch" "any,a,t2,t2")]
819 )
820
821 (define_insn "bic<mode>3_neon"
822 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
823 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
824 (match_operand:VDQ 1 "s_register_operand" "w")))]
825 "TARGET_NEON"
826 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
827 [(set_attr "type" "neon_logic<q>")]
828 )
829
830 ;; Compare to *anddi_notdi_di.
831 (define_insn "bicdi3_neon"
832 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
833 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
834 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
835 "TARGET_NEON"
836 "@
837 vbic\t%P0, %P1, %P2
838 #
839 #"
840 [(set_attr "type" "neon_logic,multiple,multiple")
841 (set_attr "length" "*,8,8")]
842 )
843
844 (define_insn "xor<mode>3"
845 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
846 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
847 (match_operand:VDQ 2 "s_register_operand" "w")))]
848 "TARGET_NEON"
849 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
850 [(set_attr "type" "neon_logic<q>")]
851 )
852
853 (define_insn "one_cmpl<mode>2"
854 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
855 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
856 "TARGET_NEON"
857 "vmvn\t%<V_reg>0, %<V_reg>1"
858 [(set_attr "type" "neon_move<q>")]
859 )
860
861 (define_insn "abs<mode>2"
862 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
863 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
864 "TARGET_NEON"
865 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
866 [(set (attr "type")
867 (if_then_else (match_test "<Is_float_mode>")
868 (const_string "neon_fp_abs_s<q>")
869 (const_string "neon_abs<q>")))]
870 )
871
872 (define_insn "neg<mode>2"
873 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
874 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
875 "TARGET_NEON"
876 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
877 [(set (attr "type")
878 (if_then_else (match_test "<Is_float_mode>")
879 (const_string "neon_fp_neg_s<q>")
880 (const_string "neon_neg<q>")))]
881 )
882
883 (define_insn "negdi2_neon"
884 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
885 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
886 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
887 (clobber (reg:CC CC_REGNUM))]
888 "TARGET_NEON"
889 "#"
890 [(set_attr "length" "8")
891 (set_attr "type" "multiple")]
892 )
893
894 ; Split negdi2_neon for vfp registers
895 (define_split
896 [(set (match_operand:DI 0 "s_register_operand" "")
897 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
898 (clobber (match_scratch:DI 2 ""))
899 (clobber (reg:CC CC_REGNUM))]
900 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
901 [(set (match_dup 2) (const_int 0))
902 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
903 (clobber (reg:CC CC_REGNUM))])]
904 {
905 if (!REG_P (operands[2]))
906 operands[2] = operands[0];
907 }
908 )
909
910 ; Split negdi2_neon for core registers
911 (define_split
912 [(set (match_operand:DI 0 "s_register_operand" "")
913 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
914 (clobber (match_scratch:DI 2 ""))
915 (clobber (reg:CC CC_REGNUM))]
916 "TARGET_32BIT && reload_completed
917 && arm_general_register_operand (operands[0], DImode)"
918 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
919 (clobber (reg:CC CC_REGNUM))])]
920 ""
921 )
922
923 (define_insn "<absneg_str><mode>2"
924 [(set (match_operand:VH 0 "s_register_operand" "=w")
925 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
926 "TARGET_NEON_FP16INST"
927 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
928 [(set_attr "type" "neon_abs<q>")]
929 )
930
931 (define_expand "neon_v<absneg_str><mode>"
932 [(set
933 (match_operand:VH 0 "s_register_operand")
934 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
935 "TARGET_NEON_FP16INST"
936 {
937 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
938 DONE;
939 })
940
941 (define_insn "neon_v<fp16_rnd_str><mode>"
942 [(set (match_operand:VH 0 "s_register_operand" "=w")
943 (unspec:VH
944 [(match_operand:VH 1 "s_register_operand" "w")]
945 FP16_RND))]
946 "TARGET_NEON_FP16INST"
947 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
948 [(set_attr "type" "neon_fp_round_s<q>")]
949 )
950
951 (define_insn "neon_vrsqrte<mode>"
952 [(set (match_operand:VH 0 "s_register_operand" "=w")
953 (unspec:VH
954 [(match_operand:VH 1 "s_register_operand" "w")]
955 UNSPEC_VRSQRTE))]
956 "TARGET_NEON_FP16INST"
957 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
958 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
959 )
960
961 (define_insn "*umin<mode>3_neon"
962 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
963 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
964 (match_operand:VDQIW 2 "s_register_operand" "w")))]
965 "TARGET_NEON"
966 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
967 [(set_attr "type" "neon_minmax<q>")]
968 )
969
970 (define_insn "*umax<mode>3_neon"
971 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
972 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
973 (match_operand:VDQIW 2 "s_register_operand" "w")))]
974 "TARGET_NEON"
975 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
976 [(set_attr "type" "neon_minmax<q>")]
977 )
978
979 (define_insn "*smin<mode>3_neon"
980 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
981 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
982 (match_operand:VDQW 2 "s_register_operand" "w")))]
983 "TARGET_NEON"
984 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
985 [(set (attr "type")
986 (if_then_else (match_test "<Is_float_mode>")
987 (const_string "neon_fp_minmax_s<q>")
988 (const_string "neon_minmax<q>")))]
989 )
990
991 (define_insn "*smax<mode>3_neon"
992 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
993 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
994 (match_operand:VDQW 2 "s_register_operand" "w")))]
995 "TARGET_NEON"
996 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
997 [(set (attr "type")
998 (if_then_else (match_test "<Is_float_mode>")
999 (const_string "neon_fp_minmax_s<q>")
1000 (const_string "neon_minmax<q>")))]
1001 )
1002
1003 ; TODO: V2DI shifts are current disabled because there are bugs in the
1004 ; generic vectorizer code. It ends up creating a V2DI constructor with
1005 ; SImode elements.
1006
1007 (define_insn "vashl<mode>3"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1009 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1010 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1011 "TARGET_NEON"
1012 {
1013 switch (which_alternative)
1014 {
1015 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1016 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1017 <MODE>mode,
1018 VALID_NEON_QREG_MODE (<MODE>mode),
1019 true);
1020 default: gcc_unreachable ();
1021 }
1022 }
1023 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1024 )
1025
1026 (define_insn "vashr<mode>3_imm"
1027 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1028 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1029 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1030 "TARGET_NEON"
1031 {
1032 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1033 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1034 false);
1035 }
1036 [(set_attr "type" "neon_shift_imm<q>")]
1037 )
1038
1039 (define_insn "vlshr<mode>3_imm"
1040 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1041 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1042 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1043 "TARGET_NEON"
1044 {
1045 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1046 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1047 false);
1048 }
1049 [(set_attr "type" "neon_shift_imm<q>")]
1050 )
1051
1052 ; Used for implementing logical shift-right, which is a left-shift by a negative
1053 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1054 ; above, but using an unspec in case GCC tries anything tricky with negative
1055 ; shift amounts.
1056
1057 (define_insn "ashl<mode>3_signed"
1058 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1059 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1060 (match_operand:VDQI 2 "s_register_operand" "w")]
1061 UNSPEC_ASHIFT_SIGNED))]
1062 "TARGET_NEON"
1063 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1064 [(set_attr "type" "neon_shift_reg<q>")]
1065 )
1066
1067 ; Used for implementing logical shift-right, which is a left-shift by a negative
1068 ; amount, with unsigned operands.
1069
1070 (define_insn "ashl<mode>3_unsigned"
1071 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1072 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1073 (match_operand:VDQI 2 "s_register_operand" "w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1075 "TARGET_NEON"
1076 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1077 [(set_attr "type" "neon_shift_reg<q>")]
1078 )
1079
1080 (define_expand "vashr<mode>3"
1081 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1082 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1083 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1084 "TARGET_NEON"
1085 {
1086 if (s_register_operand (operands[2], <MODE>mode))
1087 {
1088 rtx neg = gen_reg_rtx (<MODE>mode);
1089 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1090 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1091 }
1092 else
1093 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1094 DONE;
1095 })
1096
1097 (define_expand "vlshr<mode>3"
1098 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1099 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1100 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1101 "TARGET_NEON"
1102 {
1103 if (s_register_operand (operands[2], <MODE>mode))
1104 {
1105 rtx neg = gen_reg_rtx (<MODE>mode);
1106 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1107 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1108 }
1109 else
1110 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1111 DONE;
1112 })
1113
1114 ;; 64-bit shifts
1115
1116 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1117 ;; leaving the upper half uninitalized. This is OK since the shift
1118 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1119 ;; data flow analysis however, we pretend the full register is set
1120 ;; using an unspec.
1121 (define_insn "neon_load_count"
1122 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1123 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1124 UNSPEC_LOAD_COUNT))]
1125 "TARGET_NEON"
1126 "@
1127 vld1.32\t{%P0[0]}, %A1
1128 vmov.32\t%P0[0], %1"
1129 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1130 )
1131
1132 (define_insn "ashldi3_neon_noclobber"
1133 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1134 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1135 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1136 "TARGET_NEON && reload_completed
1137 && (!CONST_INT_P (operands[2])
1138 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1139 "@
1140 vshl.u64\t%P0, %P1, %2
1141 vshl.u64\t%P0, %P1, %P2"
1142 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1143 )
1144
1145 (define_insn_and_split "ashldi3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1147 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1148 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1149 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1150 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1152 (clobber (reg:CC_C CC_REGNUM))]
1153 "TARGET_NEON"
1154 "#"
1155 "TARGET_NEON && reload_completed"
1156 [(const_int 0)]
1157 "
1158 {
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1160 {
1161 if (CONST_INT_P (operands[2]))
1162 {
1163 if (INTVAL (operands[2]) < 1)
1164 {
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1166 DONE;
1167 }
1168 else if (INTVAL (operands[2]) > 63)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1170 }
1171 else
1172 {
1173 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1174 operands[2] = operands[5];
1175 }
1176
1177 /* Ditch the unnecessary clobbers. */
1178 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1179 operands[2]));
1180 }
1181 else
1182 {
1183 /* The shift expanders support either full overlap or no overlap. */
1184 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1185 || REGNO (operands[0]) == REGNO (operands[1]));
1186
1187 if (operands[2] == CONST1_RTX (SImode))
1188 /* This clobbers CC. */
1189 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1190 else
1191 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1192 operands[2], operands[3], operands[4]);
1193 }
1194 DONE;
1195 }"
1196 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1197 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1198 (set_attr "type" "multiple")]
1199 )
1200
1201 ; The shift amount needs to be negated for right-shifts
1202 (define_insn "signed_shift_di3_neon"
1203 [(set (match_operand:DI 0 "s_register_operand" "=w")
1204 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1205 (match_operand:DI 2 "s_register_operand" " w")]
1206 UNSPEC_ASHIFT_SIGNED))]
1207 "TARGET_NEON && reload_completed"
1208 "vshl.s64\t%P0, %P1, %P2"
1209 [(set_attr "type" "neon_shift_reg")]
1210 )
1211
1212 ; The shift amount needs to be negated for right-shifts
1213 (define_insn "unsigned_shift_di3_neon"
1214 [(set (match_operand:DI 0 "s_register_operand" "=w")
1215 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1216 (match_operand:DI 2 "s_register_operand" " w")]
1217 UNSPEC_ASHIFT_UNSIGNED))]
1218 "TARGET_NEON && reload_completed"
1219 "vshl.u64\t%P0, %P1, %P2"
1220 [(set_attr "type" "neon_shift_reg")]
1221 )
1222
1223 (define_insn "ashrdi3_neon_imm_noclobber"
1224 [(set (match_operand:DI 0 "s_register_operand" "=w")
1225 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1226 (match_operand:DI 2 "const_int_operand" " i")))]
1227 "TARGET_NEON && reload_completed
1228 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1229 "vshr.s64\t%P0, %P1, %2"
1230 [(set_attr "type" "neon_shift_imm")]
1231 )
1232
1233 (define_insn "lshrdi3_neon_imm_noclobber"
1234 [(set (match_operand:DI 0 "s_register_operand" "=w")
1235 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1236 (match_operand:DI 2 "const_int_operand" " i")))]
1237 "TARGET_NEON && reload_completed
1238 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1239 "vshr.u64\t%P0, %P1, %2"
1240 [(set_attr "type" "neon_shift_imm")]
1241 )
1242
1243 ;; ashrdi3_neon
1244 ;; lshrdi3_neon
1245 (define_insn_and_split "<shift>di3_neon"
1246 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1247 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1248 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1249 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1250 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1251 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1252 (clobber (reg:CC CC_REGNUM))]
1253 "TARGET_NEON"
1254 "#"
1255 "TARGET_NEON && reload_completed"
1256 [(const_int 0)]
1257 "
1258 {
1259 if (IS_VFP_REGNUM (REGNO (operands[0])))
1260 {
1261 if (CONST_INT_P (operands[2]))
1262 {
1263 if (INTVAL (operands[2]) < 1)
1264 {
1265 emit_insn (gen_movdi (operands[0], operands[1]));
1266 DONE;
1267 }
1268 else if (INTVAL (operands[2]) > 64)
1269 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1270
1271 /* Ditch the unnecessary clobbers. */
1272 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1273 operands[1],
1274 operands[2]));
1275 }
1276 else
1277 {
1278 /* We must use a negative left-shift. */
1279 emit_insn (gen_negsi2 (operands[3], operands[2]));
1280 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1281 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1282 operands[5]));
1283 }
1284 }
1285 else
1286 {
1287 /* The shift expanders support either full overlap or no overlap. */
1288 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1289 || REGNO (operands[0]) == REGNO (operands[1]));
1290
1291 if (operands[2] == CONST1_RTX (SImode))
1292 /* This clobbers CC. */
1293 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1294 else
1295 /* This clobbers CC (ASHIFTRT by register only). */
1296 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1297 operands[2], operands[3], operands[4]);
1298 }
1299
1300 DONE;
1301 }"
1302 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1303 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1304 (set_attr "type" "multiple")]
1305 )
1306
1307 ;; Widening operations
1308
1309 (define_expand "widen_ssum<mode>3"
1310 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1311 (plus:<V_double_width>
1312 (sign_extend:<V_double_width>
1313 (match_operand:VQI 1 "s_register_operand" ""))
1314 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1315 "TARGET_NEON"
1316 {
1317 machine_mode mode = GET_MODE (operands[1]);
1318 rtx p1, p2;
1319
1320 p1 = arm_simd_vect_par_cnst_half (mode, false);
1321 p2 = arm_simd_vect_par_cnst_half (mode, true);
1322
1323 if (operands[0] != operands[2])
1324 emit_move_insn (operands[0], operands[2]);
1325
1326 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1327 operands[1],
1328 p1,
1329 operands[0]));
1330 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1331 operands[1],
1332 p2,
1333 operands[0]));
1334 DONE;
1335 }
1336 )
1337
1338 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1339 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1340 (plus:<VW:V_widen>
1341 (sign_extend:<VW:V_widen>
1342 (vec_select:VW
1343 (match_operand:VQI 1 "s_register_operand" "%w")
1344 (match_operand:VQI 2 "vect_par_constant_low" "")))
1345 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1346 "TARGET_NEON"
1347 {
1348 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1349 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1350 }
1351 [(set_attr "type" "neon_add_widen")])
1352
1353 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1354 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1355 (plus:<VW:V_widen>
1356 (sign_extend:<VW:V_widen>
1357 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1358 (match_operand:VQI 2 "vect_par_constant_high" "")))
1359 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1360 "TARGET_NEON"
1361 {
1362 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1363 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1364 }
1365 [(set_attr "type" "neon_add_widen")])
1366
1367 (define_insn "widen_ssum<mode>3"
1368 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1369 (plus:<V_widen>
1370 (sign_extend:<V_widen>
1371 (match_operand:VW 1 "s_register_operand" "%w"))
1372 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1373 "TARGET_NEON"
1374 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1375 [(set_attr "type" "neon_add_widen")]
1376 )
1377
1378 (define_expand "widen_usum<mode>3"
1379 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1380 (plus:<V_double_width>
1381 (zero_extend:<V_double_width>
1382 (match_operand:VQI 1 "s_register_operand" ""))
1383 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1384 "TARGET_NEON"
1385 {
1386 machine_mode mode = GET_MODE (operands[1]);
1387 rtx p1, p2;
1388
1389 p1 = arm_simd_vect_par_cnst_half (mode, false);
1390 p2 = arm_simd_vect_par_cnst_half (mode, true);
1391
1392 if (operands[0] != operands[2])
1393 emit_move_insn (operands[0], operands[2]);
1394
1395 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1396 operands[1],
1397 p1,
1398 operands[0]));
1399 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1400 operands[1],
1401 p2,
1402 operands[0]));
1403 DONE;
1404 }
1405 )
1406
1407 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1408 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1409 (plus:<VW:V_widen>
1410 (zero_extend:<VW:V_widen>
1411 (vec_select:VW
1412 (match_operand:VQI 1 "s_register_operand" "%w")
1413 (match_operand:VQI 2 "vect_par_constant_low" "")))
1414 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1415 "TARGET_NEON"
1416 {
1417 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1418 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1419 }
1420 [(set_attr "type" "neon_add_widen")])
1421
1422 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1423 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1424 (plus:<VW:V_widen>
1425 (zero_extend:<VW:V_widen>
1426 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1427 (match_operand:VQI 2 "vect_par_constant_high" "")))
1428 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1429 "TARGET_NEON"
1430 {
1431 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1432 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1433 }
1434 [(set_attr "type" "neon_add_widen")])
1435
1436 (define_insn "widen_usum<mode>3"
1437 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1438 (plus:<V_widen> (zero_extend:<V_widen>
1439 (match_operand:VW 1 "s_register_operand" "%w"))
1440 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1441 "TARGET_NEON"
1442 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1443 [(set_attr "type" "neon_add_widen")]
1444 )
1445
1446 ;; Helpers for quad-word reduction operations
1447
1448 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1449 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1450 ; N/2-element vector.
1451
1452 (define_insn "quad_halves_<code>v4si"
1453 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1454 (VQH_OPS:V2SI
1455 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1456 (parallel [(const_int 0) (const_int 1)]))
1457 (vec_select:V2SI (match_dup 1)
1458 (parallel [(const_int 2) (const_int 3)]))))]
1459 "TARGET_NEON"
1460 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1461 [(set_attr "vqh_mnem" "<VQH_mnem>")
1462 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1463 )
1464
1465 (define_insn "quad_halves_<code>v4sf"
1466 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1467 (VQHS_OPS:V2SF
1468 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1469 (parallel [(const_int 0) (const_int 1)]))
1470 (vec_select:V2SF (match_dup 1)
1471 (parallel [(const_int 2) (const_int 3)]))))]
1472 "TARGET_NEON && flag_unsafe_math_optimizations"
1473 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1474 [(set_attr "vqh_mnem" "<VQH_mnem>")
1475 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1476 )
1477
1478 (define_insn "quad_halves_<code>v8hi"
1479 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1480 (VQH_OPS:V4HI
1481 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1482 (parallel [(const_int 0) (const_int 1)
1483 (const_int 2) (const_int 3)]))
1484 (vec_select:V4HI (match_dup 1)
1485 (parallel [(const_int 4) (const_int 5)
1486 (const_int 6) (const_int 7)]))))]
1487 "TARGET_NEON"
1488 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1489 [(set_attr "vqh_mnem" "<VQH_mnem>")
1490 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1491 )
1492
1493 (define_insn "quad_halves_<code>v16qi"
1494 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1495 (VQH_OPS:V8QI
1496 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1497 (parallel [(const_int 0) (const_int 1)
1498 (const_int 2) (const_int 3)
1499 (const_int 4) (const_int 5)
1500 (const_int 6) (const_int 7)]))
1501 (vec_select:V8QI (match_dup 1)
1502 (parallel [(const_int 8) (const_int 9)
1503 (const_int 10) (const_int 11)
1504 (const_int 12) (const_int 13)
1505 (const_int 14) (const_int 15)]))))]
1506 "TARGET_NEON"
1507 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1508 [(set_attr "vqh_mnem" "<VQH_mnem>")
1509 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1510 )
1511
1512 (define_expand "move_hi_quad_<mode>"
1513 [(match_operand:ANY128 0 "s_register_operand" "")
1514 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1515 "TARGET_NEON"
1516 {
1517 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1518 GET_MODE_SIZE (<V_HALF>mode)),
1519 operands[1]);
1520 DONE;
1521 })
1522
1523 (define_expand "move_lo_quad_<mode>"
1524 [(match_operand:ANY128 0 "s_register_operand" "")
1525 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1526 "TARGET_NEON"
1527 {
1528 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1529 <MODE>mode, 0),
1530 operands[1]);
1531 DONE;
1532 })
1533
1534 ;; Reduction operations
1535
1536 (define_expand "reduc_plus_scal_<mode>"
1537 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1538 (match_operand:VD 1 "s_register_operand" "")]
1539 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1540 {
1541 rtx vec = gen_reg_rtx (<MODE>mode);
1542 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1543 &gen_neon_vpadd_internal<mode>);
1544 /* The same result is actually computed into every element. */
1545 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1546 DONE;
1547 })
1548
1549 (define_expand "reduc_plus_scal_<mode>"
1550 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1551 (match_operand:VQ 1 "s_register_operand" "")]
1552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1553 && !BYTES_BIG_ENDIAN"
1554 {
1555 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1556
1557 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1558 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1559
1560 DONE;
1561 })
1562
1563 (define_expand "reduc_plus_scal_v2di"
1564 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1565 (match_operand:V2DI 1 "s_register_operand" "")]
1566 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1567 {
1568 rtx vec = gen_reg_rtx (V2DImode);
1569
1570 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1571 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1572
1573 DONE;
1574 })
1575
1576 (define_insn "arm_reduc_plus_internal_v2di"
1577 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1578 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1579 UNSPEC_VPADD))]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1581 "vadd.i64\t%e0, %e1, %f1"
1582 [(set_attr "type" "neon_add_q")]
1583 )
1584
1585 (define_expand "reduc_smin_scal_<mode>"
1586 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1587 (match_operand:VD 1 "s_register_operand" "")]
1588 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1589 {
1590 rtx vec = gen_reg_rtx (<MODE>mode);
1591
1592 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1593 &gen_neon_vpsmin<mode>);
1594 /* The result is computed into every element of the vector. */
1595 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1596 DONE;
1597 })
1598
1599 (define_expand "reduc_smin_scal_<mode>"
1600 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1601 (match_operand:VQ 1 "s_register_operand" "")]
1602 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1603 && !BYTES_BIG_ENDIAN"
1604 {
1605 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1606
1607 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1608 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1609
1610 DONE;
1611 })
1612
1613 (define_expand "reduc_smax_scal_<mode>"
1614 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1615 (match_operand:VD 1 "s_register_operand" "")]
1616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1617 {
1618 rtx vec = gen_reg_rtx (<MODE>mode);
1619 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1620 &gen_neon_vpsmax<mode>);
1621 /* The result is computed into every element of the vector. */
1622 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1623 DONE;
1624 })
1625
1626 (define_expand "reduc_smax_scal_<mode>"
1627 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1628 (match_operand:VQ 1 "s_register_operand" "")]
1629 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1630 && !BYTES_BIG_ENDIAN"
1631 {
1632 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1633
1634 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1635 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1636
1637 DONE;
1638 })
1639
1640 (define_expand "reduc_umin_scal_<mode>"
1641 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1642 (match_operand:VDI 1 "s_register_operand" "")]
1643 "TARGET_NEON"
1644 {
1645 rtx vec = gen_reg_rtx (<MODE>mode);
1646 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1647 &gen_neon_vpumin<mode>);
1648 /* The result is computed into every element of the vector. */
1649 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1650 DONE;
1651 })
1652
1653 (define_expand "reduc_umin_scal_<mode>"
1654 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1655 (match_operand:VQI 1 "s_register_operand" "")]
1656 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1657 {
1658 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1659
1660 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1661 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1662
1663 DONE;
1664 })
1665
1666 (define_expand "reduc_umax_scal_<mode>"
1667 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1668 (match_operand:VDI 1 "s_register_operand" "")]
1669 "TARGET_NEON"
1670 {
1671 rtx vec = gen_reg_rtx (<MODE>mode);
1672 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1673 &gen_neon_vpumax<mode>);
1674 /* The result is computed into every element of the vector. */
1675 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1676 DONE;
1677 })
1678
1679 (define_expand "reduc_umax_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VQI 1 "s_register_operand" "")]
1682 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1683 {
1684 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1685
1686 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1687 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1688
1689 DONE;
1690 })
1691
1692 (define_insn "neon_vpadd_internal<mode>"
1693 [(set (match_operand:VD 0 "s_register_operand" "=w")
1694 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1695 (match_operand:VD 2 "s_register_operand" "w")]
1696 UNSPEC_VPADD))]
1697 "TARGET_NEON"
1698 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1699 ;; Assume this schedules like vadd.
1700 [(set (attr "type")
1701 (if_then_else (match_test "<Is_float_mode>")
1702 (const_string "neon_fp_reduc_add_s<q>")
1703 (const_string "neon_reduc_add<q>")))]
1704 )
1705
1706 (define_insn "neon_vpaddv4hf"
1707 [(set
1708 (match_operand:V4HF 0 "s_register_operand" "=w")
1709 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1710 (match_operand:V4HF 2 "s_register_operand" "w")]
1711 UNSPEC_VPADD))]
1712 "TARGET_NEON_FP16INST"
1713 "vpadd.f16\t%P0, %P1, %P2"
1714 [(set_attr "type" "neon_reduc_add")]
1715 )
1716
1717 (define_insn "neon_vpsmin<mode>"
1718 [(set (match_operand:VD 0 "s_register_operand" "=w")
1719 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1720 (match_operand:VD 2 "s_register_operand" "w")]
1721 UNSPEC_VPSMIN))]
1722 "TARGET_NEON"
1723 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1724 [(set (attr "type")
1725 (if_then_else (match_test "<Is_float_mode>")
1726 (const_string "neon_fp_reduc_minmax_s<q>")
1727 (const_string "neon_reduc_minmax<q>")))]
1728 )
1729
1730 (define_insn "neon_vpsmax<mode>"
1731 [(set (match_operand:VD 0 "s_register_operand" "=w")
1732 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1733 (match_operand:VD 2 "s_register_operand" "w")]
1734 UNSPEC_VPSMAX))]
1735 "TARGET_NEON"
1736 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1737 [(set (attr "type")
1738 (if_then_else (match_test "<Is_float_mode>")
1739 (const_string "neon_fp_reduc_minmax_s<q>")
1740 (const_string "neon_reduc_minmax<q>")))]
1741 )
1742
1743 (define_insn "neon_vpumin<mode>"
1744 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1745 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1746 (match_operand:VDI 2 "s_register_operand" "w")]
1747 UNSPEC_VPUMIN))]
1748 "TARGET_NEON"
1749 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1750 [(set_attr "type" "neon_reduc_minmax<q>")]
1751 )
1752
1753 (define_insn "neon_vpumax<mode>"
1754 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1755 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1756 (match_operand:VDI 2 "s_register_operand" "w")]
1757 UNSPEC_VPUMAX))]
1758 "TARGET_NEON"
1759 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1760 [(set_attr "type" "neon_reduc_minmax<q>")]
1761 )
1762
1763 ;; Saturating arithmetic
1764
1765 ; NOTE: Neon supports many more saturating variants of instructions than the
1766 ; following, but these are all GCC currently understands.
1767 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1768 ; yet either, although these patterns may be used by intrinsics when they're
1769 ; added.
1770
1771 (define_insn "*ss_add<mode>_neon"
1772 [(set (match_operand:VD 0 "s_register_operand" "=w")
1773 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1774 (match_operand:VD 2 "s_register_operand" "w")))]
1775 "TARGET_NEON"
1776 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1777 [(set_attr "type" "neon_qadd<q>")]
1778 )
1779
1780 (define_insn "*us_add<mode>_neon"
1781 [(set (match_operand:VD 0 "s_register_operand" "=w")
1782 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1783 (match_operand:VD 2 "s_register_operand" "w")))]
1784 "TARGET_NEON"
1785 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1786 [(set_attr "type" "neon_qadd<q>")]
1787 )
1788
1789 (define_insn "*ss_sub<mode>_neon"
1790 [(set (match_operand:VD 0 "s_register_operand" "=w")
1791 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1792 (match_operand:VD 2 "s_register_operand" "w")))]
1793 "TARGET_NEON"
1794 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1795 [(set_attr "type" "neon_qsub<q>")]
1796 )
1797
1798 (define_insn "*us_sub<mode>_neon"
1799 [(set (match_operand:VD 0 "s_register_operand" "=w")
1800 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1801 (match_operand:VD 2 "s_register_operand" "w")))]
1802 "TARGET_NEON"
1803 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1804 [(set_attr "type" "neon_qsub<q>")]
1805 )
1806
1807 ;; Conditional instructions. These are comparisons with conditional moves for
1808 ;; vectors. They perform the assignment:
1809 ;;
1810 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1811 ;;
1812 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1813 ;; element-wise.
1814
1815 (define_expand "vcond<mode><mode>"
1816 [(set (match_operand:VDQW 0 "s_register_operand" "")
1817 (if_then_else:VDQW
1818 (match_operator 3 "comparison_operator"
1819 [(match_operand:VDQW 4 "s_register_operand" "")
1820 (match_operand:VDQW 5 "nonmemory_operand" "")])
1821 (match_operand:VDQW 1 "s_register_operand" "")
1822 (match_operand:VDQW 2 "s_register_operand" "")))]
1823 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1824 {
1825 int inverse = 0;
1826 int use_zero_form = 0;
1827 int swap_bsl_operands = 0;
1828 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1829 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1830
1831 rtx (*base_comparison) (rtx, rtx, rtx);
1832 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1833
1834 switch (GET_CODE (operands[3]))
1835 {
1836 case GE:
1837 case GT:
1838 case LE:
1839 case LT:
1840 case EQ:
1841 if (operands[5] == CONST0_RTX (<MODE>mode))
1842 {
1843 use_zero_form = 1;
1844 break;
1845 }
1846 /* Fall through. */
1847 default:
1848 if (!REG_P (operands[5]))
1849 operands[5] = force_reg (<MODE>mode, operands[5]);
1850 }
1851
1852 switch (GET_CODE (operands[3]))
1853 {
1854 case LT:
1855 case UNLT:
1856 inverse = 1;
1857 /* Fall through. */
1858 case GE:
1859 case UNGE:
1860 case ORDERED:
1861 case UNORDERED:
1862 base_comparison = gen_neon_vcge<mode>;
1863 complimentary_comparison = gen_neon_vcgt<mode>;
1864 break;
1865 case LE:
1866 case UNLE:
1867 inverse = 1;
1868 /* Fall through. */
1869 case GT:
1870 case UNGT:
1871 base_comparison = gen_neon_vcgt<mode>;
1872 complimentary_comparison = gen_neon_vcge<mode>;
1873 break;
1874 case EQ:
1875 case NE:
1876 case UNEQ:
1877 base_comparison = gen_neon_vceq<mode>;
1878 complimentary_comparison = gen_neon_vceq<mode>;
1879 break;
1880 default:
1881 gcc_unreachable ();
1882 }
1883
1884 switch (GET_CODE (operands[3]))
1885 {
1886 case LT:
1887 case LE:
1888 case GT:
1889 case GE:
1890 case EQ:
1891 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1892 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1893 a GE b -> a GE b
1894 a GT b -> a GT b
1895 a LE b -> b GE a
1896 a LT b -> b GT a
1897 a EQ b -> a EQ b
1898 Note that there also exist direct comparison against 0 forms,
1899 so catch those as a special case. */
1900 if (use_zero_form)
1901 {
1902 inverse = 0;
1903 switch (GET_CODE (operands[3]))
1904 {
1905 case LT:
1906 base_comparison = gen_neon_vclt<mode>;
1907 break;
1908 case LE:
1909 base_comparison = gen_neon_vcle<mode>;
1910 break;
1911 default:
1912 /* Do nothing, other zero form cases already have the correct
1913 base_comparison. */
1914 break;
1915 }
1916 }
1917
1918 if (!inverse)
1919 emit_insn (base_comparison (mask, operands[4], operands[5]));
1920 else
1921 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1922 break;
1923 case UNLT:
1924 case UNLE:
1925 case UNGT:
1926 case UNGE:
1927 case NE:
1928 /* Vector compare returns false for lanes which are unordered, so if we use
1929 the inverse of the comparison we actually want to emit, then
1930 swap the operands to BSL, we will end up with the correct result.
1931 Note that a NE NaN and NaN NE b are true for all a, b.
1932
1933 Our transformations are:
1934 a GE b -> !(b GT a)
1935 a GT b -> !(b GE a)
1936 a LE b -> !(a GT b)
1937 a LT b -> !(a GE b)
1938 a NE b -> !(a EQ b) */
1939
1940 if (inverse)
1941 emit_insn (base_comparison (mask, operands[4], operands[5]));
1942 else
1943 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1944
1945 swap_bsl_operands = 1;
1946 break;
1947 case UNEQ:
1948 /* We check (a > b || b > a). combining these comparisons give us
1949 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1950 will then give us (a == b || a UNORDERED b) as intended. */
1951
1952 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1953 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1954 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1955 swap_bsl_operands = 1;
1956 break;
1957 case UNORDERED:
1958 /* Operands are ORDERED iff (a > b || b >= a).
1959 Swapping the operands to BSL will give the UNORDERED case. */
1960 swap_bsl_operands = 1;
1961 /* Fall through. */
1962 case ORDERED:
1963 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1964 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1965 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1966 break;
1967 default:
1968 gcc_unreachable ();
1969 }
1970
1971 if (swap_bsl_operands)
1972 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1973 operands[1]));
1974 else
1975 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1976 operands[2]));
1977 DONE;
1978 })
1979
1980 (define_expand "vcondu<mode><mode>"
1981 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1982 (if_then_else:VDQIW
1983 (match_operator 3 "arm_comparison_operator"
1984 [(match_operand:VDQIW 4 "s_register_operand" "")
1985 (match_operand:VDQIW 5 "s_register_operand" "")])
1986 (match_operand:VDQIW 1 "s_register_operand" "")
1987 (match_operand:VDQIW 2 "s_register_operand" "")))]
1988 "TARGET_NEON"
1989 {
1990 rtx mask;
1991 int inverse = 0, immediate_zero = 0;
1992
1993 mask = gen_reg_rtx (<V_cmp_result>mode);
1994
1995 if (operands[5] == CONST0_RTX (<MODE>mode))
1996 immediate_zero = 1;
1997 else if (!REG_P (operands[5]))
1998 operands[5] = force_reg (<MODE>mode, operands[5]);
1999
2000 switch (GET_CODE (operands[3]))
2001 {
2002 case GEU:
2003 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2004 break;
2005
2006 case GTU:
2007 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2008 break;
2009
2010 case EQ:
2011 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2012 break;
2013
2014 case LEU:
2015 if (immediate_zero)
2016 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2017 else
2018 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2019 break;
2020
2021 case LTU:
2022 if (immediate_zero)
2023 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2024 else
2025 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2026 break;
2027
2028 case NE:
2029 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2030 inverse = 1;
2031 break;
2032
2033 default:
2034 gcc_unreachable ();
2035 }
2036
2037 if (inverse)
2038 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2039 operands[1]));
2040 else
2041 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2042 operands[2]));
2043
2044 DONE;
2045 })
2046
2047 ;; Patterns for builtins.
2048
2049 ; good for plain vadd, vaddq.
2050
2051 (define_expand "neon_vadd<mode>"
2052 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2053 (match_operand:VCVTF 1 "s_register_operand" "w")
2054 (match_operand:VCVTF 2 "s_register_operand" "w")]
2055 "TARGET_NEON"
2056 {
2057 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2058 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2059 else
2060 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2061 operands[2]));
2062 DONE;
2063 })
2064
2065 (define_expand "neon_vadd<mode>"
2066 [(match_operand:VH 0 "s_register_operand")
2067 (match_operand:VH 1 "s_register_operand")
2068 (match_operand:VH 2 "s_register_operand")]
2069 "TARGET_NEON_FP16INST"
2070 {
2071 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2072 DONE;
2073 })
2074
2075 (define_expand "neon_vsub<mode>"
2076 [(match_operand:VH 0 "s_register_operand")
2077 (match_operand:VH 1 "s_register_operand")
2078 (match_operand:VH 2 "s_register_operand")]
2079 "TARGET_NEON_FP16INST"
2080 {
2081 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2082 DONE;
2083 })
2084
2085 ; Note that NEON operations don't support the full IEEE 754 standard: in
2086 ; particular, denormal values are flushed to zero. This means that GCC cannot
2087 ; use those instructions for autovectorization, etc. unless
2088 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2089 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2090 ; header) must work in either case: if -funsafe-math-optimizations is given,
2091 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2092 ; expand to unspecs (which may potentially limit the extent to which they might
2093 ; be optimized by generic code).
2094
2095 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2096
2097 (define_insn "neon_vadd<mode>_unspec"
2098 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2099 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2100 (match_operand:VCVTF 2 "s_register_operand" "w")]
2101 UNSPEC_VADD))]
2102 "TARGET_NEON"
2103 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2104 [(set (attr "type")
2105 (if_then_else (match_test "<Is_float_mode>")
2106 (const_string "neon_fp_addsub_s<q>")
2107 (const_string "neon_add<q>")))]
2108 )
2109
2110 (define_insn "neon_vaddl<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2114 VADDL))]
2115 "TARGET_NEON"
2116 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2117 [(set_attr "type" "neon_add_long")]
2118 )
2119
2120 (define_insn "neon_vaddw<sup><mode>"
2121 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2122 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2123 (match_operand:VDI 2 "s_register_operand" "w")]
2124 VADDW))]
2125 "TARGET_NEON"
2126 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2127 [(set_attr "type" "neon_add_widen")]
2128 )
2129
2130 ; vhadd and vrhadd.
2131
2132 (define_insn "neon_v<r>hadd<sup><mode>"
2133 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2134 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2135 (match_operand:VDQIW 2 "s_register_operand" "w")]
2136 VHADD))]
2137 "TARGET_NEON"
2138 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2139 [(set_attr "type" "neon_add_halve_q")]
2140 )
2141
2142 (define_insn "neon_vqadd<sup><mode>"
2143 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2144 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2145 (match_operand:VDQIX 2 "s_register_operand" "w")]
2146 VQADD))]
2147 "TARGET_NEON"
2148 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2149 [(set_attr "type" "neon_qadd<q>")]
2150 )
2151
2152 (define_insn "neon_v<r>addhn<mode>"
2153 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2154 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2155 (match_operand:VN 2 "s_register_operand" "w")]
2156 VADDHN))]
2157 "TARGET_NEON"
2158 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2159 [(set_attr "type" "neon_add_halve_narrow_q")]
2160 )
2161
2162 ;; Polynomial and Float multiplication.
2163 (define_insn "neon_vmul<pf><mode>"
2164 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2165 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2166 (match_operand:VPF 2 "s_register_operand" "w")]
2167 UNSPEC_VMUL))]
2168 "TARGET_NEON"
2169 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2170 [(set (attr "type")
2171 (if_then_else (match_test "<Is_float_mode>")
2172 (const_string "neon_fp_mul_s<q>")
2173 (const_string "neon_mul_<V_elem_ch><q>")))]
2174 )
2175
2176 (define_insn "neon_vmulf<mode>"
2177 [(set
2178 (match_operand:VH 0 "s_register_operand" "=w")
2179 (mult:VH
2180 (match_operand:VH 1 "s_register_operand" "w")
2181 (match_operand:VH 2 "s_register_operand" "w")))]
2182 "TARGET_NEON_FP16INST"
2183 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2184 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2185 )
2186
2187 (define_expand "neon_vmla<mode>"
2188 [(match_operand:VDQW 0 "s_register_operand" "=w")
2189 (match_operand:VDQW 1 "s_register_operand" "0")
2190 (match_operand:VDQW 2 "s_register_operand" "w")
2191 (match_operand:VDQW 3 "s_register_operand" "w")]
2192 "TARGET_NEON"
2193 {
2194 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2195 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2196 operands[2], operands[3]));
2197 else
2198 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2199 operands[2], operands[3]));
2200 DONE;
2201 })
2202
2203 (define_expand "neon_vfma<VCVTF:mode>"
2204 [(match_operand:VCVTF 0 "s_register_operand")
2205 (match_operand:VCVTF 1 "s_register_operand")
2206 (match_operand:VCVTF 2 "s_register_operand")
2207 (match_operand:VCVTF 3 "s_register_operand")]
2208 "TARGET_NEON && TARGET_FMA"
2209 {
2210 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2211 operands[1]));
2212 DONE;
2213 })
2214
2215 (define_expand "neon_vfma<VH:mode>"
2216 [(match_operand:VH 0 "s_register_operand")
2217 (match_operand:VH 1 "s_register_operand")
2218 (match_operand:VH 2 "s_register_operand")
2219 (match_operand:VH 3 "s_register_operand")]
2220 "TARGET_NEON_FP16INST"
2221 {
2222 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2223 operands[1]));
2224 DONE;
2225 })
2226
2227 (define_expand "neon_vfms<VCVTF:mode>"
2228 [(match_operand:VCVTF 0 "s_register_operand")
2229 (match_operand:VCVTF 1 "s_register_operand")
2230 (match_operand:VCVTF 2 "s_register_operand")
2231 (match_operand:VCVTF 3 "s_register_operand")]
2232 "TARGET_NEON && TARGET_FMA"
2233 {
2234 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2235 operands[1]));
2236 DONE;
2237 })
2238
2239 (define_expand "neon_vfms<VH:mode>"
2240 [(match_operand:VH 0 "s_register_operand")
2241 (match_operand:VH 1 "s_register_operand")
2242 (match_operand:VH 2 "s_register_operand")
2243 (match_operand:VH 3 "s_register_operand")]
2244 "TARGET_NEON_FP16INST"
2245 {
2246 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2247 operands[1]));
2248 DONE;
2249 })
2250
2251 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2252
2253 (define_insn "neon_vmla<mode>_unspec"
2254 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2255 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2256 (match_operand:VDQW 2 "s_register_operand" "w")
2257 (match_operand:VDQW 3 "s_register_operand" "w")]
2258 UNSPEC_VMLA))]
2259 "TARGET_NEON"
2260 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2261 [(set (attr "type")
2262 (if_then_else (match_test "<Is_float_mode>")
2263 (const_string "neon_fp_mla_s<q>")
2264 (const_string "neon_mla_<V_elem_ch><q>")))]
2265 )
2266
2267 (define_insn "neon_vmlal<sup><mode>"
2268 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2269 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2270 (match_operand:VW 2 "s_register_operand" "w")
2271 (match_operand:VW 3 "s_register_operand" "w")]
2272 VMLAL))]
2273 "TARGET_NEON"
2274 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2275 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2276 )
2277
2278 (define_expand "neon_vmls<mode>"
2279 [(match_operand:VDQW 0 "s_register_operand" "=w")
2280 (match_operand:VDQW 1 "s_register_operand" "0")
2281 (match_operand:VDQW 2 "s_register_operand" "w")
2282 (match_operand:VDQW 3 "s_register_operand" "w")]
2283 "TARGET_NEON"
2284 {
2285 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2286 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2287 operands[1], operands[2], operands[3]));
2288 else
2289 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2290 operands[2], operands[3]));
2291 DONE;
2292 })
2293
2294 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2295
2296 (define_insn "neon_vmls<mode>_unspec"
2297 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2298 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2299 (match_operand:VDQW 2 "s_register_operand" "w")
2300 (match_operand:VDQW 3 "s_register_operand" "w")]
2301 UNSPEC_VMLS))]
2302 "TARGET_NEON"
2303 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2304 [(set (attr "type")
2305 (if_then_else (match_test "<Is_float_mode>")
2306 (const_string "neon_fp_mla_s<q>")
2307 (const_string "neon_mla_<V_elem_ch><q>")))]
2308 )
2309
2310 (define_insn "neon_vmlsl<sup><mode>"
2311 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2312 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2313 (match_operand:VW 2 "s_register_operand" "w")
2314 (match_operand:VW 3 "s_register_operand" "w")]
2315 VMLSL))]
2316 "TARGET_NEON"
2317 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2318 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2319 )
2320
2321 ;; vqdmulh, vqrdmulh
2322 (define_insn "neon_vq<r>dmulh<mode>"
2323 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2324 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2325 (match_operand:VMDQI 2 "s_register_operand" "w")]
2326 VQDMULH))]
2327 "TARGET_NEON"
2328 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2329 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2330 )
2331
2332 ;; vqrdmlah, vqrdmlsh
2333 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2334 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2335 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2336 (match_operand:VMDQI 2 "s_register_operand" "w")
2337 (match_operand:VMDQI 3 "s_register_operand" "w")]
2338 VQRDMLH_AS))]
2339 "TARGET_NEON_RDMA"
2340 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2341 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2342 )
2343
2344 (define_insn "neon_vqdmlal<mode>"
2345 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2346 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2347 (match_operand:VMDI 2 "s_register_operand" "w")
2348 (match_operand:VMDI 3 "s_register_operand" "w")]
2349 UNSPEC_VQDMLAL))]
2350 "TARGET_NEON"
2351 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2352 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2353 )
2354
2355 (define_insn "neon_vqdmlsl<mode>"
2356 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2357 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2358 (match_operand:VMDI 2 "s_register_operand" "w")
2359 (match_operand:VMDI 3 "s_register_operand" "w")]
2360 UNSPEC_VQDMLSL))]
2361 "TARGET_NEON"
2362 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2363 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2364 )
2365
2366 (define_insn "neon_vmull<sup><mode>"
2367 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2368 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2369 (match_operand:VW 2 "s_register_operand" "w")]
2370 VMULL))]
2371 "TARGET_NEON"
2372 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2373 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2374 )
2375
2376 (define_insn "neon_vqdmull<mode>"
2377 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2378 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2379 (match_operand:VMDI 2 "s_register_operand" "w")]
2380 UNSPEC_VQDMULL))]
2381 "TARGET_NEON"
2382 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2383 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2384 )
2385
2386 (define_expand "neon_vsub<mode>"
2387 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2388 (match_operand:VCVTF 1 "s_register_operand" "w")
2389 (match_operand:VCVTF 2 "s_register_operand" "w")]
2390 "TARGET_NEON"
2391 {
2392 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2393 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2394 else
2395 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2396 operands[2]));
2397 DONE;
2398 })
2399
2400 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2401
2402 (define_insn "neon_vsub<mode>_unspec"
2403 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2404 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2405 (match_operand:VCVTF 2 "s_register_operand" "w")]
2406 UNSPEC_VSUB))]
2407 "TARGET_NEON"
2408 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2409 [(set (attr "type")
2410 (if_then_else (match_test "<Is_float_mode>")
2411 (const_string "neon_fp_addsub_s<q>")
2412 (const_string "neon_sub<q>")))]
2413 )
2414
2415 (define_insn "neon_vsubl<sup><mode>"
2416 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2417 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2418 (match_operand:VDI 2 "s_register_operand" "w")]
2419 VSUBL))]
2420 "TARGET_NEON"
2421 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2422 [(set_attr "type" "neon_sub_long")]
2423 )
2424
2425 (define_insn "neon_vsubw<sup><mode>"
2426 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2427 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2428 (match_operand:VDI 2 "s_register_operand" "w")]
2429 VSUBW))]
2430 "TARGET_NEON"
2431 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2432 [(set_attr "type" "neon_sub_widen")]
2433 )
2434
2435 (define_insn "neon_vqsub<sup><mode>"
2436 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2437 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2438 (match_operand:VDQIX 2 "s_register_operand" "w")]
2439 VQSUB))]
2440 "TARGET_NEON"
2441 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2442 [(set_attr "type" "neon_qsub<q>")]
2443 )
2444
2445 (define_insn "neon_vhsub<sup><mode>"
2446 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2447 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2448 (match_operand:VDQIW 2 "s_register_operand" "w")]
2449 VHSUB))]
2450 "TARGET_NEON"
2451 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2452 [(set_attr "type" "neon_sub_halve<q>")]
2453 )
2454
2455 (define_insn "neon_v<r>subhn<mode>"
2456 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2457 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2458 (match_operand:VN 2 "s_register_operand" "w")]
2459 VSUBHN))]
2460 "TARGET_NEON"
2461 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2462 [(set_attr "type" "neon_sub_halve_narrow_q")]
2463 )
2464
2465 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2466 ;; without unsafe math optimizations.
2467 (define_expand "neon_vc<cmp_op><mode>"
2468 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2469 (neg:<V_cmp_result>
2470 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2471 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2472 "TARGET_NEON"
2473 {
2474 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2475 are enabled. */
2476 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2477 && !flag_unsafe_math_optimizations)
2478 {
2479 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2480 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2481 whereas this expander iterates over the integer modes as well,
2482 but we will never expand to UNSPECs for the integer comparisons. */
2483 switch (<MODE>mode)
2484 {
2485 case V2SFmode:
2486 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2487 operands[1],
2488 operands[2]));
2489 break;
2490 case V4SFmode:
2491 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2492 operands[1],
2493 operands[2]));
2494 break;
2495 default:
2496 gcc_unreachable ();
2497 }
2498 }
2499 else
2500 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2501 operands[1],
2502 operands[2]));
2503 DONE;
2504 }
2505 )
2506
2507 (define_insn "neon_vc<cmp_op><mode>_insn"
2508 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2509 (neg:<V_cmp_result>
2510 (COMPARISONS:<V_cmp_result>
2511 (match_operand:VDQW 1 "s_register_operand" "w,w")
2512 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2513 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2514 && !flag_unsafe_math_optimizations)"
2515 {
2516 char pattern[100];
2517 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2518 " %%<V_reg>1, %s",
2519 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2520 ? "f" : "<cmp_type>",
2521 which_alternative == 0
2522 ? "%<V_reg>2" : "#0");
2523 output_asm_insn (pattern, operands);
2524 return "";
2525 }
2526 [(set (attr "type")
2527 (if_then_else (match_operand 2 "zero_operand")
2528 (const_string "neon_compare_zero<q>")
2529 (const_string "neon_compare<q>")))]
2530 )
2531
2532 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2533 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2534 (unspec:<V_cmp_result>
2535 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2536 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2537 NEON_VCMP))]
2538 "TARGET_NEON"
2539 {
2540 char pattern[100];
2541 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2542 " %%<V_reg>1, %s",
2543 which_alternative == 0
2544 ? "%<V_reg>2" : "#0");
2545 output_asm_insn (pattern, operands);
2546 return "";
2547 }
2548 [(set_attr "type" "neon_fp_compare_s<q>")]
2549 )
2550
2551 (define_expand "neon_vc<cmp_op><mode>"
2552 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2553 (neg:<V_cmp_result>
2554 (COMPARISONS:VH
2555 (match_operand:VH 1 "s_register_operand")
2556 (match_operand:VH 2 "reg_or_zero_operand")))]
2557 "TARGET_NEON_FP16INST"
2558 {
2559 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2560 are enabled. */
2561 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2562 && !flag_unsafe_math_optimizations)
2563 emit_insn
2564 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2565 (operands[0], operands[1], operands[2]));
2566 else
2567 emit_insn
2568 (gen_neon_vc<cmp_op><mode>_fp16insn
2569 (operands[0], operands[1], operands[2]));
2570 DONE;
2571 })
2572
2573 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2574 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2575 (neg:<V_cmp_result>
2576 (COMPARISONS:<V_cmp_result>
2577 (match_operand:VH 1 "s_register_operand" "w,w")
2578 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2579 "TARGET_NEON_FP16INST
2580 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2581 && !flag_unsafe_math_optimizations)"
2582 {
2583 char pattern[100];
2584 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2585 " %%<V_reg>1, %s",
2586 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2587 ? "f" : "<cmp_type>",
2588 which_alternative == 0
2589 ? "%<V_reg>2" : "#0");
2590 output_asm_insn (pattern, operands);
2591 return "";
2592 }
2593 [(set (attr "type")
2594 (if_then_else (match_operand 2 "zero_operand")
2595 (const_string "neon_compare_zero<q>")
2596 (const_string "neon_compare<q>")))])
2597
2598 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2599 [(set
2600 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2601 (unspec:<V_cmp_result>
2602 [(match_operand:VH 1 "s_register_operand" "w,w")
2603 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2604 NEON_VCMP))]
2605 "TARGET_NEON_FP16INST"
2606 {
2607 char pattern[100];
2608 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2609 " %%<V_reg>1, %s",
2610 which_alternative == 0
2611 ? "%<V_reg>2" : "#0");
2612 output_asm_insn (pattern, operands);
2613 return "";
2614 }
2615 [(set_attr "type" "neon_fp_compare_s<q>")])
2616
2617 (define_insn "neon_vc<cmp_op>u<mode>"
2618 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2619 (neg:<V_cmp_result>
2620 (GTUGEU:<V_cmp_result>
2621 (match_operand:VDQIW 1 "s_register_operand" "w")
2622 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2623 "TARGET_NEON"
2624 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2625 [(set_attr "type" "neon_compare<q>")]
2626 )
2627
2628 (define_expand "neon_vca<cmp_op><mode>"
2629 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2630 (neg:<V_cmp_result>
2631 (GTGE:<V_cmp_result>
2632 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2633 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2634 "TARGET_NEON"
2635 {
2636 if (flag_unsafe_math_optimizations)
2637 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2638 operands[2]));
2639 else
2640 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2641 operands[1],
2642 operands[2]));
2643 DONE;
2644 }
2645 )
2646
2647 (define_insn "neon_vca<cmp_op><mode>_insn"
2648 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2649 (neg:<V_cmp_result>
2650 (GTGE:<V_cmp_result>
2651 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2652 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2653 "TARGET_NEON && flag_unsafe_math_optimizations"
2654 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2655 [(set_attr "type" "neon_fp_compare_s<q>")]
2656 )
2657
2658 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2659 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2660 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2661 (match_operand:VCVTF 2 "s_register_operand" "w")]
2662 NEON_VACMP))]
2663 "TARGET_NEON"
2664 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2665 [(set_attr "type" "neon_fp_compare_s<q>")]
2666 )
2667
2668 (define_expand "neon_vca<cmp_op><mode>"
2669 [(set
2670 (match_operand:<V_cmp_result> 0 "s_register_operand")
2671 (neg:<V_cmp_result>
2672 (GLTE:<V_cmp_result>
2673 (abs:VH (match_operand:VH 1 "s_register_operand"))
2674 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2675 "TARGET_NEON_FP16INST"
2676 {
2677 if (flag_unsafe_math_optimizations)
2678 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2679 (operands[0], operands[1], operands[2]));
2680 else
2681 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2682 (operands[0], operands[1], operands[2]));
2683 DONE;
2684 })
2685
2686 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2687 [(set
2688 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2689 (neg:<V_cmp_result>
2690 (GLTE:<V_cmp_result>
2691 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2692 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2693 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2694 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2695 [(set_attr "type" "neon_fp_compare_s<q>")]
2696 )
2697
2698 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2699 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2700 (unspec:<V_cmp_result>
2701 [(match_operand:VH 1 "s_register_operand" "w")
2702 (match_operand:VH 2 "s_register_operand" "w")]
2703 NEON_VAGLTE))]
2704 "TARGET_NEON"
2705 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2706 [(set_attr "type" "neon_fp_compare_s<q>")]
2707 )
2708
2709 (define_expand "neon_vc<cmp_op>z<mode>"
2710 [(set
2711 (match_operand:<V_cmp_result> 0 "s_register_operand")
2712 (COMPARISONS:<V_cmp_result>
2713 (match_operand:VH 1 "s_register_operand")
2714 (const_int 0)))]
2715 "TARGET_NEON_FP16INST"
2716 {
2717 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2718 CONST0_RTX (<MODE>mode)));
2719 DONE;
2720 })
2721
2722 (define_insn "neon_vtst<mode>"
2723 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2724 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2725 (match_operand:VDQIW 2 "s_register_operand" "w")]
2726 UNSPEC_VTST))]
2727 "TARGET_NEON"
2728 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2729 [(set_attr "type" "neon_tst<q>")]
2730 )
2731
2732 (define_insn "neon_vabd<sup><mode>"
2733 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2734 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2735 (match_operand:VDQIW 2 "s_register_operand" "w")]
2736 VABD))]
2737 "TARGET_NEON"
2738 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2739 [(set_attr "type" "neon_abd<q>")]
2740 )
2741
2742 (define_insn "neon_vabd<mode>"
2743 [(set (match_operand:VH 0 "s_register_operand" "=w")
2744 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2745 (match_operand:VH 2 "s_register_operand" "w")]
2746 UNSPEC_VABD_F))]
2747 "TARGET_NEON_FP16INST"
2748 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2749 [(set_attr "type" "neon_abd<q>")]
2750 )
2751
2752 (define_insn "neon_vabdf<mode>"
2753 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2754 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2755 (match_operand:VCVTF 2 "s_register_operand" "w")]
2756 UNSPEC_VABD_F))]
2757 "TARGET_NEON"
2758 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2759 [(set_attr "type" "neon_fp_abd_s<q>")]
2760 )
2761
2762 (define_insn "neon_vabdl<sup><mode>"
2763 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2764 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2765 (match_operand:VW 2 "s_register_operand" "w")]
2766 VABDL))]
2767 "TARGET_NEON"
2768 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2769 [(set_attr "type" "neon_abd_long")]
2770 )
2771
2772 (define_insn "neon_vaba<sup><mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2775 (match_operand:VDQIW 3 "s_register_operand" "w")]
2776 VABD)
2777 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2778 "TARGET_NEON"
2779 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2780 [(set_attr "type" "neon_arith_acc<q>")]
2781 )
2782
2783 (define_insn "neon_vabal<sup><mode>"
2784 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2785 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2786 (match_operand:VW 3 "s_register_operand" "w")]
2787 VABDL)
2788 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2789 "TARGET_NEON"
2790 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2791 [(set_attr "type" "neon_arith_acc<q>")]
2792 )
2793
2794 (define_insn "neon_v<maxmin><sup><mode>"
2795 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2796 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2797 (match_operand:VDQIW 2 "s_register_operand" "w")]
2798 VMAXMIN))]
2799 "TARGET_NEON"
2800 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2801 [(set_attr "type" "neon_minmax<q>")]
2802 )
2803
2804 (define_insn "neon_v<maxmin>f<mode>"
2805 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2806 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2807 (match_operand:VCVTF 2 "s_register_operand" "w")]
2808 VMAXMINF))]
2809 "TARGET_NEON"
2810 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2811 [(set_attr "type" "neon_fp_minmax_s<q>")]
2812 )
2813
2814 (define_insn "neon_v<maxmin>f<mode>"
2815 [(set (match_operand:VH 0 "s_register_operand" "=w")
2816 (unspec:VH
2817 [(match_operand:VH 1 "s_register_operand" "w")
2818 (match_operand:VH 2 "s_register_operand" "w")]
2819 VMAXMINF))]
2820 "TARGET_NEON_FP16INST"
2821 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2822 [(set_attr "type" "neon_fp_minmax_s<q>")]
2823 )
2824
2825 (define_insn "neon_vp<maxmin>fv4hf"
2826 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2827 (unspec:V4HF
2828 [(match_operand:V4HF 1 "s_register_operand" "w")
2829 (match_operand:V4HF 2 "s_register_operand" "w")]
2830 VPMAXMINF))]
2831 "TARGET_NEON_FP16INST"
2832 "vp<maxmin>.f16\t%P0, %P1, %P2"
2833 [(set_attr "type" "neon_reduc_minmax")]
2834 )
2835
2836 (define_insn "neon_<fmaxmin_op><mode>"
2837 [(set
2838 (match_operand:VH 0 "s_register_operand" "=w")
2839 (unspec:VH
2840 [(match_operand:VH 1 "s_register_operand" "w")
2841 (match_operand:VH 2 "s_register_operand" "w")]
2842 VMAXMINFNM))]
2843 "TARGET_NEON_FP16INST"
2844 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2845 [(set_attr "type" "neon_fp_minmax_s<q>")]
2846 )
2847
2848 ;; v<maxmin>nm intrinsics.
2849 (define_insn "neon_<fmaxmin_op><mode>"
2850 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2851 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2852 (match_operand:VCVTF 2 "s_register_operand" "w")]
2853 VMAXMINFNM))]
2854 "TARGET_NEON && TARGET_FPU_ARMV8"
2855 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2856 [(set_attr "type" "neon_fp_minmax_s<q>")]
2857 )
2858
2859 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2860 (define_insn "<fmaxmin><mode>3"
2861 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2862 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2863 (match_operand:VCVTF 2 "s_register_operand" "w")]
2864 VMAXMINFNM))]
2865 "TARGET_NEON && TARGET_FPU_ARMV8"
2866 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2867 [(set_attr "type" "neon_fp_minmax_s<q>")]
2868 )
2869
2870 (define_expand "neon_vpadd<mode>"
2871 [(match_operand:VD 0 "s_register_operand" "=w")
2872 (match_operand:VD 1 "s_register_operand" "w")
2873 (match_operand:VD 2 "s_register_operand" "w")]
2874 "TARGET_NEON"
2875 {
2876 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2877 operands[2]));
2878 DONE;
2879 })
2880
2881 (define_insn "neon_vpaddl<sup><mode>"
2882 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2883 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2884 VPADDL))]
2885 "TARGET_NEON"
2886 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2887 [(set_attr "type" "neon_reduc_add_long")]
2888 )
2889
2890 (define_insn "neon_vpadal<sup><mode>"
2891 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2892 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2893 (match_operand:VDQIW 2 "s_register_operand" "w")]
2894 VPADAL))]
2895 "TARGET_NEON"
2896 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2897 [(set_attr "type" "neon_reduc_add_acc")]
2898 )
2899
2900 (define_insn "neon_vp<maxmin><sup><mode>"
2901 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2902 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2903 (match_operand:VDI 2 "s_register_operand" "w")]
2904 VPMAXMIN))]
2905 "TARGET_NEON"
2906 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2907 [(set_attr "type" "neon_reduc_minmax<q>")]
2908 )
2909
2910 (define_insn "neon_vp<maxmin>f<mode>"
2911 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2912 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2913 (match_operand:VCVTF 2 "s_register_operand" "w")]
2914 VPMAXMINF))]
2915 "TARGET_NEON"
2916 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2917 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2918 )
2919
2920 (define_insn "neon_vrecps<mode>"
2921 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2922 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2923 (match_operand:VCVTF 2 "s_register_operand" "w")]
2924 UNSPEC_VRECPS))]
2925 "TARGET_NEON"
2926 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2927 [(set_attr "type" "neon_fp_recps_s<q>")]
2928 )
2929
2930 (define_insn "neon_vrecps<mode>"
2931 [(set
2932 (match_operand:VH 0 "s_register_operand" "=w")
2933 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2934 (match_operand:VH 2 "s_register_operand" "w")]
2935 UNSPEC_VRECPS))]
2936 "TARGET_NEON_FP16INST"
2937 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2938 [(set_attr "type" "neon_fp_recps_s<q>")]
2939 )
2940
2941 (define_insn "neon_vrsqrts<mode>"
2942 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2943 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2944 (match_operand:VCVTF 2 "s_register_operand" "w")]
2945 UNSPEC_VRSQRTS))]
2946 "TARGET_NEON"
2947 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2948 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2949 )
2950
2951 (define_insn "neon_vrsqrts<mode>"
2952 [(set
2953 (match_operand:VH 0 "s_register_operand" "=w")
2954 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2955 (match_operand:VH 2 "s_register_operand" "w")]
2956 UNSPEC_VRSQRTS))]
2957 "TARGET_NEON_FP16INST"
2958 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2959 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2960 )
2961
2962 (define_expand "neon_vabs<mode>"
2963 [(match_operand:VDQW 0 "s_register_operand" "")
2964 (match_operand:VDQW 1 "s_register_operand" "")]
2965 "TARGET_NEON"
2966 {
2967 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2968 DONE;
2969 })
2970
2971 (define_insn "neon_vqabs<mode>"
2972 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2973 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2974 UNSPEC_VQABS))]
2975 "TARGET_NEON"
2976 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2977 [(set_attr "type" "neon_qabs<q>")]
2978 )
2979
2980 (define_insn "neon_bswap<mode>"
2981 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2982 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2983 "TARGET_NEON"
2984 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2985 [(set_attr "type" "neon_rev<q>")]
2986 )
2987
2988 (define_expand "neon_vneg<mode>"
2989 [(match_operand:VDQW 0 "s_register_operand" "")
2990 (match_operand:VDQW 1 "s_register_operand" "")]
2991 "TARGET_NEON"
2992 {
2993 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2994 DONE;
2995 })
2996
2997 (define_expand "neon_copysignf<mode>"
2998 [(match_operand:VCVTF 0 "register_operand")
2999 (match_operand:VCVTF 1 "register_operand")
3000 (match_operand:VCVTF 2 "register_operand")]
3001 "TARGET_NEON"
3002 "{
3003 rtx v_bitmask_cast;
3004 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3005 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
3006 rtvec v = rtvec_alloc (n_elt);
3007
3008 /* Create bitmask for vector select. */
3009 for (i = 0; i < n_elt; ++i)
3010 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3011
3012 emit_move_insn (v_bitmask,
3013 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3014 emit_move_insn (operands[0], operands[2]);
3015 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3016 <VCVTF:V_cmp_result>mode, 0);
3017 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3018 operands[1]));
3019
3020 DONE;
3021 }"
3022 )
3023
3024 (define_insn "neon_vqneg<mode>"
3025 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3026 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3027 UNSPEC_VQNEG))]
3028 "TARGET_NEON"
3029 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3030 [(set_attr "type" "neon_qneg<q>")]
3031 )
3032
3033 (define_insn "neon_vcls<mode>"
3034 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3035 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3036 UNSPEC_VCLS))]
3037 "TARGET_NEON"
3038 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3039 [(set_attr "type" "neon_cls<q>")]
3040 )
3041
3042 (define_insn "clz<mode>2"
3043 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3044 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3045 "TARGET_NEON"
3046 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3047 [(set_attr "type" "neon_cnt<q>")]
3048 )
3049
3050 (define_expand "neon_vclz<mode>"
3051 [(match_operand:VDQIW 0 "s_register_operand" "")
3052 (match_operand:VDQIW 1 "s_register_operand" "")]
3053 "TARGET_NEON"
3054 {
3055 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3056 DONE;
3057 })
3058
3059 (define_insn "popcount<mode>2"
3060 [(set (match_operand:VE 0 "s_register_operand" "=w")
3061 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3062 "TARGET_NEON"
3063 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3064 [(set_attr "type" "neon_cnt<q>")]
3065 )
3066
3067 (define_expand "neon_vcnt<mode>"
3068 [(match_operand:VE 0 "s_register_operand" "=w")
3069 (match_operand:VE 1 "s_register_operand" "w")]
3070 "TARGET_NEON"
3071 {
3072 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3073 DONE;
3074 })
3075
3076 (define_insn "neon_vrecpe<mode>"
3077 [(set (match_operand:VH 0 "s_register_operand" "=w")
3078 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3079 UNSPEC_VRECPE))]
3080 "TARGET_NEON_FP16INST"
3081 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3082 [(set_attr "type" "neon_fp_recpe_s<q>")]
3083 )
3084
3085 (define_insn "neon_vrecpe<mode>"
3086 [(set (match_operand:V32 0 "s_register_operand" "=w")
3087 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3088 UNSPEC_VRECPE))]
3089 "TARGET_NEON"
3090 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3091 [(set_attr "type" "neon_fp_recpe_s<q>")]
3092 )
3093
3094 (define_insn "neon_vrsqrte<mode>"
3095 [(set (match_operand:V32 0 "s_register_operand" "=w")
3096 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3097 UNSPEC_VRSQRTE))]
3098 "TARGET_NEON"
3099 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3100 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3101 )
3102
3103 (define_expand "neon_vmvn<mode>"
3104 [(match_operand:VDQIW 0 "s_register_operand" "")
3105 (match_operand:VDQIW 1 "s_register_operand" "")]
3106 "TARGET_NEON"
3107 {
3108 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3109 DONE;
3110 })
3111
3112 (define_insn "neon_vget_lane<mode>_sext_internal"
3113 [(set (match_operand:SI 0 "s_register_operand" "=r")
3114 (sign_extend:SI
3115 (vec_select:<V_elem>
3116 (match_operand:VD 1 "s_register_operand" "w")
3117 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3118 "TARGET_NEON"
3119 {
3120 if (BYTES_BIG_ENDIAN)
3121 {
3122 int elt = INTVAL (operands[2]);
3123 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3124 operands[2] = GEN_INT (elt);
3125 }
3126 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3127 }
3128 [(set_attr "type" "neon_to_gp")]
3129 )
3130
3131 (define_insn "neon_vget_lane<mode>_zext_internal"
3132 [(set (match_operand:SI 0 "s_register_operand" "=r")
3133 (zero_extend:SI
3134 (vec_select:<V_elem>
3135 (match_operand:VD 1 "s_register_operand" "w")
3136 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3137 "TARGET_NEON"
3138 {
3139 if (BYTES_BIG_ENDIAN)
3140 {
3141 int elt = INTVAL (operands[2]);
3142 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3143 operands[2] = GEN_INT (elt);
3144 }
3145 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3146 }
3147 [(set_attr "type" "neon_to_gp")]
3148 )
3149
3150 (define_insn "neon_vget_lane<mode>_sext_internal"
3151 [(set (match_operand:SI 0 "s_register_operand" "=r")
3152 (sign_extend:SI
3153 (vec_select:<V_elem>
3154 (match_operand:VQ2 1 "s_register_operand" "w")
3155 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3156 "TARGET_NEON"
3157 {
3158 rtx ops[3];
3159 int regno = REGNO (operands[1]);
3160 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3161 unsigned int elt = INTVAL (operands[2]);
3162 unsigned int elt_adj = elt % halfelts;
3163
3164 if (BYTES_BIG_ENDIAN)
3165 elt_adj = halfelts - 1 - elt_adj;
3166
3167 ops[0] = operands[0];
3168 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3169 ops[2] = GEN_INT (elt_adj);
3170 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3171
3172 return "";
3173 }
3174 [(set_attr "type" "neon_to_gp_q")]
3175 )
3176
3177 (define_insn "neon_vget_lane<mode>_zext_internal"
3178 [(set (match_operand:SI 0 "s_register_operand" "=r")
3179 (zero_extend:SI
3180 (vec_select:<V_elem>
3181 (match_operand:VQ2 1 "s_register_operand" "w")
3182 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3183 "TARGET_NEON"
3184 {
3185 rtx ops[3];
3186 int regno = REGNO (operands[1]);
3187 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3188 unsigned int elt = INTVAL (operands[2]);
3189 unsigned int elt_adj = elt % halfelts;
3190
3191 if (BYTES_BIG_ENDIAN)
3192 elt_adj = halfelts - 1 - elt_adj;
3193
3194 ops[0] = operands[0];
3195 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3196 ops[2] = GEN_INT (elt_adj);
3197 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3198
3199 return "";
3200 }
3201 [(set_attr "type" "neon_to_gp_q")]
3202 )
3203
3204 (define_expand "neon_vget_lane<mode>"
3205 [(match_operand:<V_ext> 0 "s_register_operand" "")
3206 (match_operand:VDQW 1 "s_register_operand" "")
3207 (match_operand:SI 2 "immediate_operand" "")]
3208 "TARGET_NEON"
3209 {
3210 if (BYTES_BIG_ENDIAN)
3211 {
3212 /* The intrinsics are defined in terms of a model where the
3213 element ordering in memory is vldm order, whereas the generic
3214 RTL is defined in terms of a model where the element ordering
3215 in memory is array order. Convert the lane number to conform
3216 to this model. */
3217 unsigned int elt = INTVAL (operands[2]);
3218 unsigned int reg_nelts
3219 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3220 elt ^= reg_nelts - 1;
3221 operands[2] = GEN_INT (elt);
3222 }
3223
3224 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3225 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3226 else
3227 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3228 operands[1],
3229 operands[2]));
3230 DONE;
3231 })
3232
3233 (define_expand "neon_vget_laneu<mode>"
3234 [(match_operand:<V_ext> 0 "s_register_operand" "")
3235 (match_operand:VDQIW 1 "s_register_operand" "")
3236 (match_operand:SI 2 "immediate_operand" "")]
3237 "TARGET_NEON"
3238 {
3239 if (BYTES_BIG_ENDIAN)
3240 {
3241 /* The intrinsics are defined in terms of a model where the
3242 element ordering in memory is vldm order, whereas the generic
3243 RTL is defined in terms of a model where the element ordering
3244 in memory is array order. Convert the lane number to conform
3245 to this model. */
3246 unsigned int elt = INTVAL (operands[2]);
3247 unsigned int reg_nelts
3248 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3249 elt ^= reg_nelts - 1;
3250 operands[2] = GEN_INT (elt);
3251 }
3252
3253 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3254 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3255 else
3256 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3257 operands[1],
3258 operands[2]));
3259 DONE;
3260 })
3261
3262 (define_expand "neon_vget_lanedi"
3263 [(match_operand:DI 0 "s_register_operand" "=r")
3264 (match_operand:DI 1 "s_register_operand" "w")
3265 (match_operand:SI 2 "immediate_operand" "")]
3266 "TARGET_NEON"
3267 {
3268 emit_move_insn (operands[0], operands[1]);
3269 DONE;
3270 })
3271
3272 (define_expand "neon_vget_lanev2di"
3273 [(match_operand:DI 0 "s_register_operand" "")
3274 (match_operand:V2DI 1 "s_register_operand" "")
3275 (match_operand:SI 2 "immediate_operand" "")]
3276 "TARGET_NEON"
3277 {
3278 int lane;
3279
3280 if (BYTES_BIG_ENDIAN)
3281 {
3282 /* The intrinsics are defined in terms of a model where the
3283 element ordering in memory is vldm order, whereas the generic
3284 RTL is defined in terms of a model where the element ordering
3285 in memory is array order. Convert the lane number to conform
3286 to this model. */
3287 unsigned int elt = INTVAL (operands[2]);
3288 unsigned int reg_nelts = 2;
3289 elt ^= reg_nelts - 1;
3290 operands[2] = GEN_INT (elt);
3291 }
3292
3293 lane = INTVAL (operands[2]);
3294 gcc_assert ((lane ==0) || (lane == 1));
3295 emit_move_insn (operands[0], lane == 0
3296 ? gen_lowpart (DImode, operands[1])
3297 : gen_highpart (DImode, operands[1]));
3298 DONE;
3299 })
3300
3301 (define_expand "neon_vset_lane<mode>"
3302 [(match_operand:VDQ 0 "s_register_operand" "=w")
3303 (match_operand:<V_elem> 1 "s_register_operand" "r")
3304 (match_operand:VDQ 2 "s_register_operand" "0")
3305 (match_operand:SI 3 "immediate_operand" "i")]
3306 "TARGET_NEON"
3307 {
3308 unsigned int elt = INTVAL (operands[3]);
3309
3310 if (BYTES_BIG_ENDIAN)
3311 {
3312 unsigned int reg_nelts
3313 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3314 elt ^= reg_nelts - 1;
3315 }
3316
3317 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3318 GEN_INT (1 << elt), operands[2]));
3319 DONE;
3320 })
3321
3322 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3323
3324 (define_expand "neon_vset_lanedi"
3325 [(match_operand:DI 0 "s_register_operand" "=w")
3326 (match_operand:DI 1 "s_register_operand" "r")
3327 (match_operand:DI 2 "s_register_operand" "0")
3328 (match_operand:SI 3 "immediate_operand" "i")]
3329 "TARGET_NEON"
3330 {
3331 emit_move_insn (operands[0], operands[1]);
3332 DONE;
3333 })
3334
3335 (define_expand "neon_vcreate<mode>"
3336 [(match_operand:VD_RE 0 "s_register_operand" "")
3337 (match_operand:DI 1 "general_operand" "")]
3338 "TARGET_NEON"
3339 {
3340 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3341 emit_move_insn (operands[0], src);
3342 DONE;
3343 })
3344
3345 (define_insn "neon_vdup_n<mode>"
3346 [(set (match_operand:VX 0 "s_register_operand" "=w")
3347 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3348 "TARGET_NEON"
3349 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3350 [(set_attr "type" "neon_from_gp<q>")]
3351 )
3352
3353 (define_insn "neon_vdup_nv4hf"
3354 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3355 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3356 "TARGET_NEON"
3357 "vdup.16\t%P0, %1"
3358 [(set_attr "type" "neon_from_gp")]
3359 )
3360
3361 (define_insn "neon_vdup_nv8hf"
3362 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3363 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3364 "TARGET_NEON"
3365 "vdup.16\t%q0, %1"
3366 [(set_attr "type" "neon_from_gp_q")]
3367 )
3368
3369 (define_insn "neon_vdup_n<mode>"
3370 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3371 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3372 "TARGET_NEON"
3373 "@
3374 vdup.<V_sz_elem>\t%<V_reg>0, %1
3375 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3376 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3377 )
3378
3379 (define_expand "neon_vdup_ndi"
3380 [(match_operand:DI 0 "s_register_operand" "=w")
3381 (match_operand:DI 1 "s_register_operand" "r")]
3382 "TARGET_NEON"
3383 {
3384 emit_move_insn (operands[0], operands[1]);
3385 DONE;
3386 }
3387 )
3388
3389 (define_insn "neon_vdup_nv2di"
3390 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3391 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3392 "TARGET_NEON"
3393 "@
3394 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3395 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3396 [(set_attr "length" "8")
3397 (set_attr "type" "multiple")]
3398 )
3399
3400 (define_insn "neon_vdup_lane<mode>_internal"
3401 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3402 (vec_duplicate:VDQW
3403 (vec_select:<V_elem>
3404 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3405 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3406 "TARGET_NEON"
3407 {
3408 if (BYTES_BIG_ENDIAN)
3409 {
3410 int elt = INTVAL (operands[2]);
3411 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3412 operands[2] = GEN_INT (elt);
3413 }
3414 if (<Is_d_reg>)
3415 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3416 else
3417 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3418 }
3419 [(set_attr "type" "neon_dup<q>")]
3420 )
3421
3422 (define_insn "neon_vdup_lane<mode>_internal"
3423 [(set (match_operand:VH 0 "s_register_operand" "=w")
3424 (vec_duplicate:VH
3425 (vec_select:<V_elem>
3426 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3427 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3428 "TARGET_NEON && TARGET_FP16"
3429 {
3430 if (BYTES_BIG_ENDIAN)
3431 {
3432 int elt = INTVAL (operands[2]);
3433 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3434 operands[2] = GEN_INT (elt);
3435 }
3436 if (<Is_d_reg>)
3437 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3438 else
3439 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3440 }
3441 [(set_attr "type" "neon_dup<q>")]
3442 )
3443
3444 (define_expand "neon_vdup_lane<mode>"
3445 [(match_operand:VDQW 0 "s_register_operand" "=w")
3446 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3447 (match_operand:SI 2 "immediate_operand" "i")]
3448 "TARGET_NEON"
3449 {
3450 if (BYTES_BIG_ENDIAN)
3451 {
3452 unsigned int elt = INTVAL (operands[2]);
3453 unsigned int reg_nelts
3454 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3455 elt ^= reg_nelts - 1;
3456 operands[2] = GEN_INT (elt);
3457 }
3458 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3459 operands[2]));
3460 DONE;
3461 })
3462
3463 (define_expand "neon_vdup_lane<mode>"
3464 [(match_operand:VH 0 "s_register_operand")
3465 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3466 (match_operand:SI 2 "immediate_operand")]
3467 "TARGET_NEON && TARGET_FP16"
3468 {
3469 if (BYTES_BIG_ENDIAN)
3470 {
3471 unsigned int elt = INTVAL (operands[2]);
3472 unsigned int reg_nelts
3473 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3474 elt ^= reg_nelts - 1;
3475 operands[2] = GEN_INT (elt);
3476 }
3477 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3478 operands[2]));
3479 DONE;
3480 })
3481
3482 ; Scalar index is ignored, since only zero is valid here.
3483 (define_expand "neon_vdup_lanedi"
3484 [(match_operand:DI 0 "s_register_operand" "=w")
3485 (match_operand:DI 1 "s_register_operand" "w")
3486 (match_operand:SI 2 "immediate_operand" "i")]
3487 "TARGET_NEON"
3488 {
3489 emit_move_insn (operands[0], operands[1]);
3490 DONE;
3491 })
3492
3493 ; Likewise for v2di, as the DImode second operand has only a single element.
3494 (define_expand "neon_vdup_lanev2di"
3495 [(match_operand:V2DI 0 "s_register_operand" "=w")
3496 (match_operand:DI 1 "s_register_operand" "w")
3497 (match_operand:SI 2 "immediate_operand" "i")]
3498 "TARGET_NEON"
3499 {
3500 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3501 DONE;
3502 })
3503
3504 ; Disabled before reload because we don't want combine doing something silly,
3505 ; but used by the post-reload expansion of neon_vcombine.
3506 (define_insn "*neon_vswp<mode>"
3507 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3508 (match_operand:VDQX 1 "s_register_operand" "+w"))
3509 (set (match_dup 1) (match_dup 0))]
3510 "TARGET_NEON && reload_completed"
3511 "vswp\t%<V_reg>0, %<V_reg>1"
3512 [(set_attr "type" "neon_permute<q>")]
3513 )
3514
3515 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3516 ;; dest vector.
3517 ;; FIXME: A different implementation of this builtin could make it much
3518 ;; more likely that we wouldn't actually need to output anything (we could make
3519 ;; it so that the reg allocator puts things in the right places magically
3520 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3521
3522 (define_insn_and_split "neon_vcombine<mode>"
3523 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3524 (vec_concat:<V_DOUBLE>
3525 (match_operand:VDX 1 "s_register_operand" "w")
3526 (match_operand:VDX 2 "s_register_operand" "w")))]
3527 "TARGET_NEON"
3528 "#"
3529 "&& reload_completed"
3530 [(const_int 0)]
3531 {
3532 neon_split_vcombine (operands);
3533 DONE;
3534 }
3535 [(set_attr "type" "multiple")]
3536 )
3537
3538 (define_expand "neon_vget_high<mode>"
3539 [(match_operand:<V_HALF> 0 "s_register_operand")
3540 (match_operand:VQX 1 "s_register_operand")]
3541 "TARGET_NEON"
3542 {
3543 emit_move_insn (operands[0],
3544 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3545 GET_MODE_SIZE (<V_HALF>mode)));
3546 DONE;
3547 })
3548
3549 (define_expand "neon_vget_low<mode>"
3550 [(match_operand:<V_HALF> 0 "s_register_operand")
3551 (match_operand:VQX 1 "s_register_operand")]
3552 "TARGET_NEON"
3553 {
3554 emit_move_insn (operands[0],
3555 simplify_gen_subreg (<V_HALF>mode, operands[1],
3556 <MODE>mode, 0));
3557 DONE;
3558 })
3559
3560 (define_insn "float<mode><V_cvtto>2"
3561 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3562 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3563 "TARGET_NEON && !flag_rounding_math"
3564 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3565 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3566 )
3567
3568 (define_insn "floatuns<mode><V_cvtto>2"
3569 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3570 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3571 "TARGET_NEON && !flag_rounding_math"
3572 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3573 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3574 )
3575
3576 (define_insn "fix_trunc<mode><V_cvtto>2"
3577 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3578 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3579 "TARGET_NEON"
3580 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3581 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3582 )
3583
3584 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3585 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3586 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3587 "TARGET_NEON"
3588 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3589 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3590 )
3591
3592 (define_insn "neon_vcvt<sup><mode>"
3593 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3594 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3595 VCVT_US))]
3596 "TARGET_NEON"
3597 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3598 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3599 )
3600
3601 (define_insn "neon_vcvt<sup><mode>"
3602 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3603 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3604 VCVT_US))]
3605 "TARGET_NEON"
3606 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3607 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3608 )
3609
3610 (define_insn "neon_vcvtv4sfv4hf"
3611 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3612 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3613 UNSPEC_VCVT))]
3614 "TARGET_NEON && TARGET_FP16"
3615 "vcvt.f32.f16\t%q0, %P1"
3616 [(set_attr "type" "neon_fp_cvt_widen_h")]
3617 )
3618
3619 (define_insn "neon_vcvtv4hfv4sf"
3620 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3621 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3622 UNSPEC_VCVT))]
3623 "TARGET_NEON && TARGET_FP16"
3624 "vcvt.f16.f32\t%P0, %q1"
3625 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3626 )
3627
3628 (define_insn "neon_vcvt<sup><mode>"
3629 [(set
3630 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3631 (unspec:<VH_CVTTO>
3632 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3633 VCVT_US))]
3634 "TARGET_NEON_FP16INST"
3635 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3636 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3637 )
3638
3639 (define_insn "neon_vcvt<sup><mode>"
3640 [(set
3641 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3642 (unspec:<VH_CVTTO>
3643 [(match_operand:VH 1 "s_register_operand" "w")]
3644 VCVT_US))]
3645 "TARGET_NEON_FP16INST"
3646 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3647 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3648 )
3649
3650 (define_insn "neon_vcvt<sup>_n<mode>"
3651 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3652 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3653 (match_operand:SI 2 "immediate_operand" "i")]
3654 VCVT_US_N))]
3655 "TARGET_NEON"
3656 {
3657 arm_const_bounds (operands[2], 1, 33);
3658 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3659 }
3660 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3661 )
3662
3663 (define_insn "neon_vcvt<sup>_n<mode>"
3664 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3665 (unspec:<VH_CVTTO>
3666 [(match_operand:VH 1 "s_register_operand" "w")
3667 (match_operand:SI 2 "immediate_operand" "i")]
3668 VCVT_US_N))]
3669 "TARGET_NEON_FP16INST"
3670 {
3671 arm_const_bounds (operands[2], 0, 17);
3672 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3673 }
3674 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3675 )
3676
3677 (define_insn "neon_vcvt<sup>_n<mode>"
3678 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3679 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3680 (match_operand:SI 2 "immediate_operand" "i")]
3681 VCVT_US_N))]
3682 "TARGET_NEON"
3683 {
3684 arm_const_bounds (operands[2], 1, 33);
3685 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3686 }
3687 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3688 )
3689
3690 (define_insn "neon_vcvt<sup>_n<mode>"
3691 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3692 (unspec:<VH_CVTTO>
3693 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3694 (match_operand:SI 2 "immediate_operand" "i")]
3695 VCVT_US_N))]
3696 "TARGET_NEON_FP16INST"
3697 {
3698 arm_const_bounds (operands[2], 0, 17);
3699 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3700 }
3701 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3702 )
3703
3704 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3705 [(set
3706 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3707 (unspec:<VH_CVTTO>
3708 [(match_operand:VH 1 "s_register_operand" "w")]
3709 VCVT_HF_US))]
3710 "TARGET_NEON_FP16INST"
3711 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3712 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3713 )
3714
3715 (define_insn "neon_vmovn<mode>"
3716 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3717 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3718 UNSPEC_VMOVN))]
3719 "TARGET_NEON"
3720 "vmovn.<V_if_elem>\t%P0, %q1"
3721 [(set_attr "type" "neon_shift_imm_narrow_q")]
3722 )
3723
3724 (define_insn "neon_vqmovn<sup><mode>"
3725 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3726 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3727 VQMOVN))]
3728 "TARGET_NEON"
3729 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3730 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3731 )
3732
3733 (define_insn "neon_vqmovun<mode>"
3734 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3735 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3736 UNSPEC_VQMOVUN))]
3737 "TARGET_NEON"
3738 "vqmovun.<V_s_elem>\t%P0, %q1"
3739 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3740 )
3741
3742 (define_insn "neon_vmovl<sup><mode>"
3743 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3744 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3745 VMOVL))]
3746 "TARGET_NEON"
3747 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3748 [(set_attr "type" "neon_shift_imm_long")]
3749 )
3750
3751 (define_insn "neon_vmul_lane<mode>"
3752 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3753 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3754 (match_operand:VMD 2 "s_register_operand"
3755 "<scalar_mul_constraint>")
3756 (match_operand:SI 3 "immediate_operand" "i")]
3757 UNSPEC_VMUL_LANE))]
3758 "TARGET_NEON"
3759 {
3760 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3761 }
3762 [(set (attr "type")
3763 (if_then_else (match_test "<Is_float_mode>")
3764 (const_string "neon_fp_mul_s_scalar<q>")
3765 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3766 )
3767
3768 (define_insn "neon_vmul_lane<mode>"
3769 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3770 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3771 (match_operand:<V_HALF> 2 "s_register_operand"
3772 "<scalar_mul_constraint>")
3773 (match_operand:SI 3 "immediate_operand" "i")]
3774 UNSPEC_VMUL_LANE))]
3775 "TARGET_NEON"
3776 {
3777 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3778 }
3779 [(set (attr "type")
3780 (if_then_else (match_test "<Is_float_mode>")
3781 (const_string "neon_fp_mul_s_scalar<q>")
3782 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3783 )
3784
3785 (define_insn "neon_vmul_lane<mode>"
3786 [(set (match_operand:VH 0 "s_register_operand" "=w")
3787 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3788 (match_operand:V4HF 2 "s_register_operand"
3789 "<scalar_mul_constraint>")
3790 (match_operand:SI 3 "immediate_operand" "i")]
3791 UNSPEC_VMUL_LANE))]
3792 "TARGET_NEON_FP16INST"
3793 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3794 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3795 )
3796
3797 (define_insn "neon_vmull<sup>_lane<mode>"
3798 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3799 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3800 (match_operand:VMDI 2 "s_register_operand"
3801 "<scalar_mul_constraint>")
3802 (match_operand:SI 3 "immediate_operand" "i")]
3803 VMULL_LANE))]
3804 "TARGET_NEON"
3805 {
3806 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3807 }
3808 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3809 )
3810
3811 (define_insn "neon_vqdmull_lane<mode>"
3812 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3813 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3814 (match_operand:VMDI 2 "s_register_operand"
3815 "<scalar_mul_constraint>")
3816 (match_operand:SI 3 "immediate_operand" "i")]
3817 UNSPEC_VQDMULL_LANE))]
3818 "TARGET_NEON"
3819 {
3820 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3821 }
3822 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3823 )
3824
3825 (define_insn "neon_vq<r>dmulh_lane<mode>"
3826 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3827 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3828 (match_operand:<V_HALF> 2 "s_register_operand"
3829 "<scalar_mul_constraint>")
3830 (match_operand:SI 3 "immediate_operand" "i")]
3831 VQDMULH_LANE))]
3832 "TARGET_NEON"
3833 {
3834 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3835 }
3836 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3837 )
3838
3839 (define_insn "neon_vq<r>dmulh_lane<mode>"
3840 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3841 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3842 (match_operand:VMDI 2 "s_register_operand"
3843 "<scalar_mul_constraint>")
3844 (match_operand:SI 3 "immediate_operand" "i")]
3845 VQDMULH_LANE))]
3846 "TARGET_NEON"
3847 {
3848 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3849 }
3850 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3851 )
3852
3853 ;; vqrdmlah_lane, vqrdmlsh_lane
3854 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3855 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3856 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3857 (match_operand:VMQI 2 "s_register_operand" "w")
3858 (match_operand:<V_HALF> 3 "s_register_operand"
3859 "<scalar_mul_constraint>")
3860 (match_operand:SI 4 "immediate_operand" "i")]
3861 VQRDMLH_AS))]
3862 "TARGET_NEON_RDMA"
3863 {
3864 return
3865 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3866 }
3867 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3868 )
3869
3870 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3871 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3872 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3873 (match_operand:VMDI 2 "s_register_operand" "w")
3874 (match_operand:VMDI 3 "s_register_operand"
3875 "<scalar_mul_constraint>")
3876 (match_operand:SI 4 "immediate_operand" "i")]
3877 VQRDMLH_AS))]
3878 "TARGET_NEON_RDMA"
3879 {
3880 return
3881 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3882 }
3883 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3884 )
3885
3886 (define_insn "neon_vmla_lane<mode>"
3887 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3888 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3889 (match_operand:VMD 2 "s_register_operand" "w")
3890 (match_operand:VMD 3 "s_register_operand"
3891 "<scalar_mul_constraint>")
3892 (match_operand:SI 4 "immediate_operand" "i")]
3893 UNSPEC_VMLA_LANE))]
3894 "TARGET_NEON"
3895 {
3896 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3897 }
3898 [(set (attr "type")
3899 (if_then_else (match_test "<Is_float_mode>")
3900 (const_string "neon_fp_mla_s_scalar<q>")
3901 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3902 )
3903
3904 (define_insn "neon_vmla_lane<mode>"
3905 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3906 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3907 (match_operand:VMQ 2 "s_register_operand" "w")
3908 (match_operand:<V_HALF> 3 "s_register_operand"
3909 "<scalar_mul_constraint>")
3910 (match_operand:SI 4 "immediate_operand" "i")]
3911 UNSPEC_VMLA_LANE))]
3912 "TARGET_NEON"
3913 {
3914 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3915 }
3916 [(set (attr "type")
3917 (if_then_else (match_test "<Is_float_mode>")
3918 (const_string "neon_fp_mla_s_scalar<q>")
3919 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3920 )
3921
3922 (define_insn "neon_vmlal<sup>_lane<mode>"
3923 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3924 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3925 (match_operand:VMDI 2 "s_register_operand" "w")
3926 (match_operand:VMDI 3 "s_register_operand"
3927 "<scalar_mul_constraint>")
3928 (match_operand:SI 4 "immediate_operand" "i")]
3929 VMLAL_LANE))]
3930 "TARGET_NEON"
3931 {
3932 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3933 }
3934 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3935 )
3936
3937 (define_insn "neon_vqdmlal_lane<mode>"
3938 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3939 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3940 (match_operand:VMDI 2 "s_register_operand" "w")
3941 (match_operand:VMDI 3 "s_register_operand"
3942 "<scalar_mul_constraint>")
3943 (match_operand:SI 4 "immediate_operand" "i")]
3944 UNSPEC_VQDMLAL_LANE))]
3945 "TARGET_NEON"
3946 {
3947 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3948 }
3949 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3950 )
3951
3952 (define_insn "neon_vmls_lane<mode>"
3953 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3954 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3955 (match_operand:VMD 2 "s_register_operand" "w")
3956 (match_operand:VMD 3 "s_register_operand"
3957 "<scalar_mul_constraint>")
3958 (match_operand:SI 4 "immediate_operand" "i")]
3959 UNSPEC_VMLS_LANE))]
3960 "TARGET_NEON"
3961 {
3962 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3963 }
3964 [(set (attr "type")
3965 (if_then_else (match_test "<Is_float_mode>")
3966 (const_string "neon_fp_mla_s_scalar<q>")
3967 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3968 )
3969
3970 (define_insn "neon_vmls_lane<mode>"
3971 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3972 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3973 (match_operand:VMQ 2 "s_register_operand" "w")
3974 (match_operand:<V_HALF> 3 "s_register_operand"
3975 "<scalar_mul_constraint>")
3976 (match_operand:SI 4 "immediate_operand" "i")]
3977 UNSPEC_VMLS_LANE))]
3978 "TARGET_NEON"
3979 {
3980 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3981 }
3982 [(set (attr "type")
3983 (if_then_else (match_test "<Is_float_mode>")
3984 (const_string "neon_fp_mla_s_scalar<q>")
3985 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3986 )
3987
3988 (define_insn "neon_vmlsl<sup>_lane<mode>"
3989 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3990 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3991 (match_operand:VMDI 2 "s_register_operand" "w")
3992 (match_operand:VMDI 3 "s_register_operand"
3993 "<scalar_mul_constraint>")
3994 (match_operand:SI 4 "immediate_operand" "i")]
3995 VMLSL_LANE))]
3996 "TARGET_NEON"
3997 {
3998 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3999 }
4000 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4001 )
4002
4003 (define_insn "neon_vqdmlsl_lane<mode>"
4004 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4005 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4006 (match_operand:VMDI 2 "s_register_operand" "w")
4007 (match_operand:VMDI 3 "s_register_operand"
4008 "<scalar_mul_constraint>")
4009 (match_operand:SI 4 "immediate_operand" "i")]
4010 UNSPEC_VQDMLSL_LANE))]
4011 "TARGET_NEON"
4012 {
4013 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4014 }
4015 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4016 )
4017
4018 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4019 ; core register into a temp register, then use a scalar taken from that. This
4020 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4021 ; or extracted from another vector. The latter case it's currently better to
4022 ; use the "_lane" variant, and the former case can probably be implemented
4023 ; using vld1_lane, but that hasn't been done yet.
4024
4025 (define_expand "neon_vmul_n<mode>"
4026 [(match_operand:VMD 0 "s_register_operand" "")
4027 (match_operand:VMD 1 "s_register_operand" "")
4028 (match_operand:<V_elem> 2 "s_register_operand" "")]
4029 "TARGET_NEON"
4030 {
4031 rtx tmp = gen_reg_rtx (<MODE>mode);
4032 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4033 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4034 const0_rtx));
4035 DONE;
4036 })
4037
4038 (define_expand "neon_vmul_n<mode>"
4039 [(match_operand:VMQ 0 "s_register_operand" "")
4040 (match_operand:VMQ 1 "s_register_operand" "")
4041 (match_operand:<V_elem> 2 "s_register_operand" "")]
4042 "TARGET_NEON"
4043 {
4044 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4045 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4046 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4047 const0_rtx));
4048 DONE;
4049 })
4050
4051 (define_expand "neon_vmul_n<mode>"
4052 [(match_operand:VH 0 "s_register_operand")
4053 (match_operand:VH 1 "s_register_operand")
4054 (match_operand:<V_elem> 2 "s_register_operand")]
4055 "TARGET_NEON_FP16INST"
4056 {
4057 rtx tmp = gen_reg_rtx (V4HFmode);
4058 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4059 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4060 const0_rtx));
4061 DONE;
4062 })
4063
4064 (define_expand "neon_vmulls_n<mode>"
4065 [(match_operand:<V_widen> 0 "s_register_operand" "")
4066 (match_operand:VMDI 1 "s_register_operand" "")
4067 (match_operand:<V_elem> 2 "s_register_operand" "")]
4068 "TARGET_NEON"
4069 {
4070 rtx tmp = gen_reg_rtx (<MODE>mode);
4071 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4072 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4073 const0_rtx));
4074 DONE;
4075 })
4076
4077 (define_expand "neon_vmullu_n<mode>"
4078 [(match_operand:<V_widen> 0 "s_register_operand" "")
4079 (match_operand:VMDI 1 "s_register_operand" "")
4080 (match_operand:<V_elem> 2 "s_register_operand" "")]
4081 "TARGET_NEON"
4082 {
4083 rtx tmp = gen_reg_rtx (<MODE>mode);
4084 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4085 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4086 const0_rtx));
4087 DONE;
4088 })
4089
4090 (define_expand "neon_vqdmull_n<mode>"
4091 [(match_operand:<V_widen> 0 "s_register_operand" "")
4092 (match_operand:VMDI 1 "s_register_operand" "")
4093 (match_operand:<V_elem> 2 "s_register_operand" "")]
4094 "TARGET_NEON"
4095 {
4096 rtx tmp = gen_reg_rtx (<MODE>mode);
4097 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4098 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4099 const0_rtx));
4100 DONE;
4101 })
4102
4103 (define_expand "neon_vqdmulh_n<mode>"
4104 [(match_operand:VMDI 0 "s_register_operand" "")
4105 (match_operand:VMDI 1 "s_register_operand" "")
4106 (match_operand:<V_elem> 2 "s_register_operand" "")]
4107 "TARGET_NEON"
4108 {
4109 rtx tmp = gen_reg_rtx (<MODE>mode);
4110 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4111 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4112 const0_rtx));
4113 DONE;
4114 })
4115
4116 (define_expand "neon_vqrdmulh_n<mode>"
4117 [(match_operand:VMDI 0 "s_register_operand" "")
4118 (match_operand:VMDI 1 "s_register_operand" "")
4119 (match_operand:<V_elem> 2 "s_register_operand" "")]
4120 "TARGET_NEON"
4121 {
4122 rtx tmp = gen_reg_rtx (<MODE>mode);
4123 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4124 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4125 const0_rtx));
4126 DONE;
4127 })
4128
4129 (define_expand "neon_vqdmulh_n<mode>"
4130 [(match_operand:VMQI 0 "s_register_operand" "")
4131 (match_operand:VMQI 1 "s_register_operand" "")
4132 (match_operand:<V_elem> 2 "s_register_operand" "")]
4133 "TARGET_NEON"
4134 {
4135 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4136 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4137 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4138 const0_rtx));
4139 DONE;
4140 })
4141
4142 (define_expand "neon_vqrdmulh_n<mode>"
4143 [(match_operand:VMQI 0 "s_register_operand" "")
4144 (match_operand:VMQI 1 "s_register_operand" "")
4145 (match_operand:<V_elem> 2 "s_register_operand" "")]
4146 "TARGET_NEON"
4147 {
4148 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4149 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4150 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4151 const0_rtx));
4152 DONE;
4153 })
4154
4155 (define_expand "neon_vmla_n<mode>"
4156 [(match_operand:VMD 0 "s_register_operand" "")
4157 (match_operand:VMD 1 "s_register_operand" "")
4158 (match_operand:VMD 2 "s_register_operand" "")
4159 (match_operand:<V_elem> 3 "s_register_operand" "")]
4160 "TARGET_NEON"
4161 {
4162 rtx tmp = gen_reg_rtx (<MODE>mode);
4163 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4164 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4165 tmp, const0_rtx));
4166 DONE;
4167 })
4168
4169 (define_expand "neon_vmla_n<mode>"
4170 [(match_operand:VMQ 0 "s_register_operand" "")
4171 (match_operand:VMQ 1 "s_register_operand" "")
4172 (match_operand:VMQ 2 "s_register_operand" "")
4173 (match_operand:<V_elem> 3 "s_register_operand" "")]
4174 "TARGET_NEON"
4175 {
4176 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4177 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4178 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4179 tmp, const0_rtx));
4180 DONE;
4181 })
4182
4183 (define_expand "neon_vmlals_n<mode>"
4184 [(match_operand:<V_widen> 0 "s_register_operand" "")
4185 (match_operand:<V_widen> 1 "s_register_operand" "")
4186 (match_operand:VMDI 2 "s_register_operand" "")
4187 (match_operand:<V_elem> 3 "s_register_operand" "")]
4188 "TARGET_NEON"
4189 {
4190 rtx tmp = gen_reg_rtx (<MODE>mode);
4191 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4192 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4193 tmp, const0_rtx));
4194 DONE;
4195 })
4196
4197 (define_expand "neon_vmlalu_n<mode>"
4198 [(match_operand:<V_widen> 0 "s_register_operand" "")
4199 (match_operand:<V_widen> 1 "s_register_operand" "")
4200 (match_operand:VMDI 2 "s_register_operand" "")
4201 (match_operand:<V_elem> 3 "s_register_operand" "")]
4202 "TARGET_NEON"
4203 {
4204 rtx tmp = gen_reg_rtx (<MODE>mode);
4205 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4206 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4207 tmp, const0_rtx));
4208 DONE;
4209 })
4210
4211 (define_expand "neon_vqdmlal_n<mode>"
4212 [(match_operand:<V_widen> 0 "s_register_operand" "")
4213 (match_operand:<V_widen> 1 "s_register_operand" "")
4214 (match_operand:VMDI 2 "s_register_operand" "")
4215 (match_operand:<V_elem> 3 "s_register_operand" "")]
4216 "TARGET_NEON"
4217 {
4218 rtx tmp = gen_reg_rtx (<MODE>mode);
4219 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4220 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4221 tmp, const0_rtx));
4222 DONE;
4223 })
4224
4225 (define_expand "neon_vmls_n<mode>"
4226 [(match_operand:VMD 0 "s_register_operand" "")
4227 (match_operand:VMD 1 "s_register_operand" "")
4228 (match_operand:VMD 2 "s_register_operand" "")
4229 (match_operand:<V_elem> 3 "s_register_operand" "")]
4230 "TARGET_NEON"
4231 {
4232 rtx tmp = gen_reg_rtx (<MODE>mode);
4233 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4234 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4235 tmp, const0_rtx));
4236 DONE;
4237 })
4238
4239 (define_expand "neon_vmls_n<mode>"
4240 [(match_operand:VMQ 0 "s_register_operand" "")
4241 (match_operand:VMQ 1 "s_register_operand" "")
4242 (match_operand:VMQ 2 "s_register_operand" "")
4243 (match_operand:<V_elem> 3 "s_register_operand" "")]
4244 "TARGET_NEON"
4245 {
4246 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4247 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4248 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4249 tmp, const0_rtx));
4250 DONE;
4251 })
4252
4253 (define_expand "neon_vmlsls_n<mode>"
4254 [(match_operand:<V_widen> 0 "s_register_operand" "")
4255 (match_operand:<V_widen> 1 "s_register_operand" "")
4256 (match_operand:VMDI 2 "s_register_operand" "")
4257 (match_operand:<V_elem> 3 "s_register_operand" "")]
4258 "TARGET_NEON"
4259 {
4260 rtx tmp = gen_reg_rtx (<MODE>mode);
4261 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4262 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4263 tmp, const0_rtx));
4264 DONE;
4265 })
4266
4267 (define_expand "neon_vmlslu_n<mode>"
4268 [(match_operand:<V_widen> 0 "s_register_operand" "")
4269 (match_operand:<V_widen> 1 "s_register_operand" "")
4270 (match_operand:VMDI 2 "s_register_operand" "")
4271 (match_operand:<V_elem> 3 "s_register_operand" "")]
4272 "TARGET_NEON"
4273 {
4274 rtx tmp = gen_reg_rtx (<MODE>mode);
4275 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4276 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4277 tmp, const0_rtx));
4278 DONE;
4279 })
4280
4281 (define_expand "neon_vqdmlsl_n<mode>"
4282 [(match_operand:<V_widen> 0 "s_register_operand" "")
4283 (match_operand:<V_widen> 1 "s_register_operand" "")
4284 (match_operand:VMDI 2 "s_register_operand" "")
4285 (match_operand:<V_elem> 3 "s_register_operand" "")]
4286 "TARGET_NEON"
4287 {
4288 rtx tmp = gen_reg_rtx (<MODE>mode);
4289 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4290 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4291 tmp, const0_rtx));
4292 DONE;
4293 })
4294
4295 (define_insn "neon_vext<mode>"
4296 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4297 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4298 (match_operand:VDQX 2 "s_register_operand" "w")
4299 (match_operand:SI 3 "immediate_operand" "i")]
4300 UNSPEC_VEXT))]
4301 "TARGET_NEON"
4302 {
4303 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4304 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4305 }
4306 [(set_attr "type" "neon_ext<q>")]
4307 )
4308
4309 (define_insn "neon_vrev64<mode>"
4310 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4311 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4312 UNSPEC_VREV64))]
4313 "TARGET_NEON"
4314 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4315 [(set_attr "type" "neon_rev<q>")]
4316 )
4317
4318 (define_insn "neon_vrev32<mode>"
4319 [(set (match_operand:VX 0 "s_register_operand" "=w")
4320 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4321 UNSPEC_VREV32))]
4322 "TARGET_NEON"
4323 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4324 [(set_attr "type" "neon_rev<q>")]
4325 )
4326
4327 (define_insn "neon_vrev16<mode>"
4328 [(set (match_operand:VE 0 "s_register_operand" "=w")
4329 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4330 UNSPEC_VREV16))]
4331 "TARGET_NEON"
4332 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4333 [(set_attr "type" "neon_rev<q>")]
4334 )
4335
4336 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4337 ; allocation. For an intrinsic of form:
4338 ; rD = vbsl_* (rS, rN, rM)
4339 ; We can use any of:
4340 ; vbsl rS, rN, rM (if D = S)
4341 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4342 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4343
4344 (define_insn "neon_vbsl<mode>_internal"
4345 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4346 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4347 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4348 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4349 UNSPEC_VBSL))]
4350 "TARGET_NEON"
4351 "@
4352 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4353 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4354 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4355 [(set_attr "type" "neon_bsl<q>")]
4356 )
4357
4358 (define_expand "neon_vbsl<mode>"
4359 [(set (match_operand:VDQX 0 "s_register_operand" "")
4360 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4361 (match_operand:VDQX 2 "s_register_operand" "")
4362 (match_operand:VDQX 3 "s_register_operand" "")]
4363 UNSPEC_VBSL))]
4364 "TARGET_NEON"
4365 {
4366 /* We can't alias operands together if they have different modes. */
4367 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4368 })
4369
4370 ;; vshl, vrshl
4371 (define_insn "neon_v<shift_op><sup><mode>"
4372 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4373 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4374 (match_operand:VDQIX 2 "s_register_operand" "w")]
4375 VSHL))]
4376 "TARGET_NEON"
4377 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4378 [(set_attr "type" "neon_shift_imm<q>")]
4379 )
4380
4381 ;; vqshl, vqrshl
4382 (define_insn "neon_v<shift_op><sup><mode>"
4383 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4384 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4385 (match_operand:VDQIX 2 "s_register_operand" "w")]
4386 VQSHL))]
4387 "TARGET_NEON"
4388 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4389 [(set_attr "type" "neon_sat_shift_imm<q>")]
4390 )
4391
4392 ;; vshr_n, vrshr_n
4393 (define_insn "neon_v<shift_op><sup>_n<mode>"
4394 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4395 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4396 (match_operand:SI 2 "immediate_operand" "i")]
4397 VSHR_N))]
4398 "TARGET_NEON"
4399 {
4400 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4401 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4402 }
4403 [(set_attr "type" "neon_shift_imm<q>")]
4404 )
4405
4406 ;; vshrn_n, vrshrn_n
4407 (define_insn "neon_v<shift_op>_n<mode>"
4408 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4409 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4410 (match_operand:SI 2 "immediate_operand" "i")]
4411 VSHRN_N))]
4412 "TARGET_NEON"
4413 {
4414 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4415 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4416 }
4417 [(set_attr "type" "neon_shift_imm_narrow_q")]
4418 )
4419
4420 ;; vqshrn_n, vqrshrn_n
4421 (define_insn "neon_v<shift_op><sup>_n<mode>"
4422 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4423 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4424 (match_operand:SI 2 "immediate_operand" "i")]
4425 VQSHRN_N))]
4426 "TARGET_NEON"
4427 {
4428 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4429 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4430 }
4431 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4432 )
4433
4434 ;; vqshrun_n, vqrshrun_n
4435 (define_insn "neon_v<shift_op>_n<mode>"
4436 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4437 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4438 (match_operand:SI 2 "immediate_operand" "i")]
4439 VQSHRUN_N))]
4440 "TARGET_NEON"
4441 {
4442 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4443 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4444 }
4445 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4446 )
4447
4448 (define_insn "neon_vshl_n<mode>"
4449 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4450 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4451 (match_operand:SI 2 "immediate_operand" "i")]
4452 UNSPEC_VSHL_N))]
4453 "TARGET_NEON"
4454 {
4455 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4456 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4457 }
4458 [(set_attr "type" "neon_shift_imm<q>")]
4459 )
4460
4461 (define_insn "neon_vqshl_<sup>_n<mode>"
4462 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4463 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4464 (match_operand:SI 2 "immediate_operand" "i")]
4465 VQSHL_N))]
4466 "TARGET_NEON"
4467 {
4468 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4469 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4470 }
4471 [(set_attr "type" "neon_sat_shift_imm<q>")]
4472 )
4473
4474 (define_insn "neon_vqshlu_n<mode>"
4475 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4476 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4477 (match_operand:SI 2 "immediate_operand" "i")]
4478 UNSPEC_VQSHLU_N))]
4479 "TARGET_NEON"
4480 {
4481 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4482 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4483 }
4484 [(set_attr "type" "neon_sat_shift_imm<q>")]
4485 )
4486
4487 (define_insn "neon_vshll<sup>_n<mode>"
4488 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4489 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4491 VSHLL_N))]
4492 "TARGET_NEON"
4493 {
4494 /* The boundaries are: 0 < imm <= size. */
4495 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4496 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4497 }
4498 [(set_attr "type" "neon_shift_imm_long")]
4499 )
4500
4501 ;; vsra_n, vrsra_n
4502 (define_insn "neon_v<shift_op><sup>_n<mode>"
4503 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4504 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4505 (match_operand:VDQIX 2 "s_register_operand" "w")
4506 (match_operand:SI 3 "immediate_operand" "i")]
4507 VSRA_N))]
4508 "TARGET_NEON"
4509 {
4510 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4511 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4512 }
4513 [(set_attr "type" "neon_shift_acc<q>")]
4514 )
4515
4516 (define_insn "neon_vsri_n<mode>"
4517 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4518 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4519 (match_operand:VDQIX 2 "s_register_operand" "w")
4520 (match_operand:SI 3 "immediate_operand" "i")]
4521 UNSPEC_VSRI))]
4522 "TARGET_NEON"
4523 {
4524 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4525 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4526 }
4527 [(set_attr "type" "neon_shift_reg<q>")]
4528 )
4529
4530 (define_insn "neon_vsli_n<mode>"
4531 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4532 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4533 (match_operand:VDQIX 2 "s_register_operand" "w")
4534 (match_operand:SI 3 "immediate_operand" "i")]
4535 UNSPEC_VSLI))]
4536 "TARGET_NEON"
4537 {
4538 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4539 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4540 }
4541 [(set_attr "type" "neon_shift_reg<q>")]
4542 )
4543
4544 (define_insn "neon_vtbl1v8qi"
4545 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4546 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4547 (match_operand:V8QI 2 "s_register_operand" "w")]
4548 UNSPEC_VTBL))]
4549 "TARGET_NEON"
4550 "vtbl.8\t%P0, {%P1}, %P2"
4551 [(set_attr "type" "neon_tbl1")]
4552 )
4553
4554 (define_insn "neon_vtbl2v8qi"
4555 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4556 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4557 (match_operand:V8QI 2 "s_register_operand" "w")]
4558 UNSPEC_VTBL))]
4559 "TARGET_NEON"
4560 {
4561 rtx ops[4];
4562 int tabbase = REGNO (operands[1]);
4563
4564 ops[0] = operands[0];
4565 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4566 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4567 ops[3] = operands[2];
4568 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4569
4570 return "";
4571 }
4572 [(set_attr "type" "neon_tbl2")]
4573 )
4574
4575 (define_insn "neon_vtbl3v8qi"
4576 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4577 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4578 (match_operand:V8QI 2 "s_register_operand" "w")]
4579 UNSPEC_VTBL))]
4580 "TARGET_NEON"
4581 {
4582 rtx ops[5];
4583 int tabbase = REGNO (operands[1]);
4584
4585 ops[0] = operands[0];
4586 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4587 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4588 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4589 ops[4] = operands[2];
4590 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4591
4592 return "";
4593 }
4594 [(set_attr "type" "neon_tbl3")]
4595 )
4596
4597 (define_insn "neon_vtbl4v8qi"
4598 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4599 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4600 (match_operand:V8QI 2 "s_register_operand" "w")]
4601 UNSPEC_VTBL))]
4602 "TARGET_NEON"
4603 {
4604 rtx ops[6];
4605 int tabbase = REGNO (operands[1]);
4606
4607 ops[0] = operands[0];
4608 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4609 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4610 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4611 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4612 ops[5] = operands[2];
4613 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4614
4615 return "";
4616 }
4617 [(set_attr "type" "neon_tbl4")]
4618 )
4619
4620 ;; These three are used by the vec_perm infrastructure for V16QImode.
4621 (define_insn_and_split "neon_vtbl1v16qi"
4622 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4623 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4624 (match_operand:V16QI 2 "s_register_operand" "w")]
4625 UNSPEC_VTBL))]
4626 "TARGET_NEON"
4627 "#"
4628 "&& reload_completed"
4629 [(const_int 0)]
4630 {
4631 rtx op0, op1, op2, part0, part2;
4632 unsigned ofs;
4633
4634 op0 = operands[0];
4635 op1 = gen_lowpart (TImode, operands[1]);
4636 op2 = operands[2];
4637
4638 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4639 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4640 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4641 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4642
4643 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4644 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4645 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4646 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4647 DONE;
4648 }
4649 [(set_attr "type" "multiple")]
4650 )
4651
4652 (define_insn_and_split "neon_vtbl2v16qi"
4653 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4654 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4655 (match_operand:V16QI 2 "s_register_operand" "w")]
4656 UNSPEC_VTBL))]
4657 "TARGET_NEON"
4658 "#"
4659 "&& reload_completed"
4660 [(const_int 0)]
4661 {
4662 rtx op0, op1, op2, part0, part2;
4663 unsigned ofs;
4664
4665 op0 = operands[0];
4666 op1 = operands[1];
4667 op2 = operands[2];
4668
4669 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4670 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4671 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4672 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4673
4674 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4675 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4676 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4677 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4678 DONE;
4679 }
4680 [(set_attr "type" "multiple")]
4681 )
4682
4683 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4684 ;; handle quad-word input modes, producing octa-word output modes. But
4685 ;; that requires us to add support for octa-word vector modes in moves.
4686 ;; That seems overkill for this one use in vec_perm.
4687 (define_insn_and_split "neon_vcombinev16qi"
4688 [(set (match_operand:OI 0 "s_register_operand" "=w")
4689 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4690 (match_operand:V16QI 2 "s_register_operand" "w")]
4691 UNSPEC_VCONCAT))]
4692 "TARGET_NEON"
4693 "#"
4694 "&& reload_completed"
4695 [(const_int 0)]
4696 {
4697 neon_split_vcombine (operands);
4698 DONE;
4699 }
4700 [(set_attr "type" "multiple")]
4701 )
4702
4703 (define_insn "neon_vtbx1v8qi"
4704 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4705 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4706 (match_operand:V8QI 2 "s_register_operand" "w")
4707 (match_operand:V8QI 3 "s_register_operand" "w")]
4708 UNSPEC_VTBX))]
4709 "TARGET_NEON"
4710 "vtbx.8\t%P0, {%P2}, %P3"
4711 [(set_attr "type" "neon_tbl1")]
4712 )
4713
4714 (define_insn "neon_vtbx2v8qi"
4715 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4716 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4717 (match_operand:TI 2 "s_register_operand" "w")
4718 (match_operand:V8QI 3 "s_register_operand" "w")]
4719 UNSPEC_VTBX))]
4720 "TARGET_NEON"
4721 {
4722 rtx ops[4];
4723 int tabbase = REGNO (operands[2]);
4724
4725 ops[0] = operands[0];
4726 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4727 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4728 ops[3] = operands[3];
4729 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4730
4731 return "";
4732 }
4733 [(set_attr "type" "neon_tbl2")]
4734 )
4735
4736 (define_insn "neon_vtbx3v8qi"
4737 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4738 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4739 (match_operand:EI 2 "s_register_operand" "w")
4740 (match_operand:V8QI 3 "s_register_operand" "w")]
4741 UNSPEC_VTBX))]
4742 "TARGET_NEON"
4743 {
4744 rtx ops[5];
4745 int tabbase = REGNO (operands[2]);
4746
4747 ops[0] = operands[0];
4748 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4749 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4750 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4751 ops[4] = operands[3];
4752 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4753
4754 return "";
4755 }
4756 [(set_attr "type" "neon_tbl3")]
4757 )
4758
4759 (define_insn "neon_vtbx4v8qi"
4760 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4761 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4762 (match_operand:OI 2 "s_register_operand" "w")
4763 (match_operand:V8QI 3 "s_register_operand" "w")]
4764 UNSPEC_VTBX))]
4765 "TARGET_NEON"
4766 {
4767 rtx ops[6];
4768 int tabbase = REGNO (operands[2]);
4769
4770 ops[0] = operands[0];
4771 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4772 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4773 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4774 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4775 ops[5] = operands[3];
4776 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4777
4778 return "";
4779 }
4780 [(set_attr "type" "neon_tbl4")]
4781 )
4782
4783 (define_expand "neon_vtrn<mode>_internal"
4784 [(parallel
4785 [(set (match_operand:VDQWH 0 "s_register_operand")
4786 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4787 (match_operand:VDQWH 2 "s_register_operand")]
4788 UNSPEC_VTRN1))
4789 (set (match_operand:VDQWH 3 "s_register_operand")
4790 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4791 "TARGET_NEON"
4792 ""
4793 )
4794
4795 ;; Note: Different operand numbering to handle tied registers correctly.
4796 (define_insn "*neon_vtrn<mode>_insn"
4797 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4798 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4799 (match_operand:VDQWH 3 "s_register_operand" "2")]
4800 UNSPEC_VTRN1))
4801 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4802 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4803 UNSPEC_VTRN2))]
4804 "TARGET_NEON"
4805 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4806 [(set_attr "type" "neon_permute<q>")]
4807 )
4808
4809 (define_expand "neon_vzip<mode>_internal"
4810 [(parallel
4811 [(set (match_operand:VDQWH 0 "s_register_operand")
4812 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4813 (match_operand:VDQWH 2 "s_register_operand")]
4814 UNSPEC_VZIP1))
4815 (set (match_operand:VDQWH 3 "s_register_operand")
4816 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4817 "TARGET_NEON"
4818 ""
4819 )
4820
4821 ;; Note: Different operand numbering to handle tied registers correctly.
4822 (define_insn "*neon_vzip<mode>_insn"
4823 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4824 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4825 (match_operand:VDQWH 3 "s_register_operand" "2")]
4826 UNSPEC_VZIP1))
4827 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4828 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4829 UNSPEC_VZIP2))]
4830 "TARGET_NEON"
4831 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4832 [(set_attr "type" "neon_zip<q>")]
4833 )
4834
4835 (define_expand "neon_vuzp<mode>_internal"
4836 [(parallel
4837 [(set (match_operand:VDQWH 0 "s_register_operand")
4838 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4839 (match_operand:VDQWH 2 "s_register_operand")]
4840 UNSPEC_VUZP1))
4841 (set (match_operand:VDQWH 3 "s_register_operand" "")
4842 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4843 "TARGET_NEON"
4844 ""
4845 )
4846
4847 ;; Note: Different operand numbering to handle tied registers correctly.
4848 (define_insn "*neon_vuzp<mode>_insn"
4849 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4850 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4851 (match_operand:VDQWH 3 "s_register_operand" "2")]
4852 UNSPEC_VUZP1))
4853 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4854 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4855 UNSPEC_VUZP2))]
4856 "TARGET_NEON"
4857 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4858 [(set_attr "type" "neon_zip<q>")]
4859 )
4860
4861 (define_expand "vec_load_lanes<mode><mode>"
4862 [(set (match_operand:VDQX 0 "s_register_operand")
4863 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4864 UNSPEC_VLD1))]
4865 "TARGET_NEON")
4866
4867 (define_insn "neon_vld1<mode>"
4868 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4869 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4870 UNSPEC_VLD1))]
4871 "TARGET_NEON"
4872 "vld1.<V_sz_elem>\t%h0, %A1"
4873 [(set_attr "type" "neon_load1_1reg<q>")]
4874 )
4875
4876 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4877 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4878 ;; lane order here.
4879 (define_insn "neon_vld1_lane<mode>"
4880 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4881 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4882 (match_operand:VDX 2 "s_register_operand" "0")
4883 (match_operand:SI 3 "immediate_operand" "i")]
4884 UNSPEC_VLD1_LANE))]
4885 "TARGET_NEON"
4886 {
4887 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4888 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4889 operands[3] = GEN_INT (lane);
4890 if (max == 1)
4891 return "vld1.<V_sz_elem>\t%P0, %A1";
4892 else
4893 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4894 }
4895 [(set_attr "type" "neon_load1_one_lane<q>")]
4896 )
4897
4898 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4899 ;; here on big endian targets.
4900 (define_insn "neon_vld1_lane<mode>"
4901 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4902 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4903 (match_operand:VQX 2 "s_register_operand" "0")
4904 (match_operand:SI 3 "immediate_operand" "i")]
4905 UNSPEC_VLD1_LANE))]
4906 "TARGET_NEON"
4907 {
4908 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4909 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4910 operands[3] = GEN_INT (lane);
4911 int regno = REGNO (operands[0]);
4912 if (lane >= max / 2)
4913 {
4914 lane -= max / 2;
4915 regno += 2;
4916 operands[3] = GEN_INT (lane);
4917 }
4918 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4919 if (max == 2)
4920 return "vld1.<V_sz_elem>\t%P0, %A1";
4921 else
4922 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4923 }
4924 [(set_attr "type" "neon_load1_one_lane<q>")]
4925 )
4926
4927 (define_insn "neon_vld1_dup<mode>"
4928 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4929 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4930 "TARGET_NEON"
4931 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4932 [(set_attr "type" "neon_load1_all_lanes<q>")]
4933 )
4934
4935 ;; Special case for DImode. Treat it exactly like a simple load.
4936 (define_expand "neon_vld1_dupdi"
4937 [(set (match_operand:DI 0 "s_register_operand" "")
4938 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4939 UNSPEC_VLD1))]
4940 "TARGET_NEON"
4941 ""
4942 )
4943
4944 (define_insn "neon_vld1_dup<mode>"
4945 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4946 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4947 "TARGET_NEON"
4948 {
4949 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4950 }
4951 [(set_attr "type" "neon_load1_all_lanes<q>")]
4952 )
4953
4954 (define_insn_and_split "neon_vld1_dupv2di"
4955 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4956 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4957 "TARGET_NEON"
4958 "#"
4959 "&& reload_completed"
4960 [(const_int 0)]
4961 {
4962 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4963 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4964 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4965 DONE;
4966 }
4967 [(set_attr "length" "8")
4968 (set_attr "type" "neon_load1_all_lanes_q")]
4969 )
4970
4971 (define_expand "vec_store_lanes<mode><mode>"
4972 [(set (match_operand:VDQX 0 "neon_struct_operand")
4973 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4974 UNSPEC_VST1))]
4975 "TARGET_NEON")
4976
4977 (define_insn "neon_vst1<mode>"
4978 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4979 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4980 UNSPEC_VST1))]
4981 "TARGET_NEON"
4982 "vst1.<V_sz_elem>\t%h1, %A0"
4983 [(set_attr "type" "neon_store1_1reg<q>")])
4984
4985 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4986 ;; here on big endian targets.
4987 (define_insn "neon_vst1_lane<mode>"
4988 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4989 (unspec:<V_elem>
4990 [(match_operand:VDX 1 "s_register_operand" "w")
4991 (match_operand:SI 2 "immediate_operand" "i")]
4992 UNSPEC_VST1_LANE))]
4993 "TARGET_NEON"
4994 {
4995 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4996 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4997 operands[2] = GEN_INT (lane);
4998 if (max == 1)
4999 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5000 else
5001 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5002 }
5003 [(set_attr "type" "neon_store1_one_lane<q>")]
5004 )
5005
5006 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5007 ;; here on big endian targets.
5008 (define_insn "neon_vst1_lane<mode>"
5009 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5010 (unspec:<V_elem>
5011 [(match_operand:VQX 1 "s_register_operand" "w")
5012 (match_operand:SI 2 "immediate_operand" "i")]
5013 UNSPEC_VST1_LANE))]
5014 "TARGET_NEON"
5015 {
5016 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5017 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5018 int regno = REGNO (operands[1]);
5019 if (lane >= max / 2)
5020 {
5021 lane -= max / 2;
5022 regno += 2;
5023 }
5024 operands[2] = GEN_INT (lane);
5025 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5026 if (max == 2)
5027 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5028 else
5029 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5030 }
5031 [(set_attr "type" "neon_store1_one_lane<q>")]
5032 )
5033
5034 (define_expand "vec_load_lanesti<mode>"
5035 [(set (match_operand:TI 0 "s_register_operand")
5036 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5037 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5038 UNSPEC_VLD2))]
5039 "TARGET_NEON")
5040
5041 (define_insn "neon_vld2<mode>"
5042 [(set (match_operand:TI 0 "s_register_operand" "=w")
5043 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5044 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5045 UNSPEC_VLD2))]
5046 "TARGET_NEON"
5047 {
5048 if (<V_sz_elem> == 64)
5049 return "vld1.64\t%h0, %A1";
5050 else
5051 return "vld2.<V_sz_elem>\t%h0, %A1";
5052 }
5053 [(set (attr "type")
5054 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5055 (const_string "neon_load1_2reg<q>")
5056 (const_string "neon_load2_2reg<q>")))]
5057 )
5058
5059 (define_expand "vec_load_lanesoi<mode>"
5060 [(set (match_operand:OI 0 "s_register_operand")
5061 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5062 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5063 UNSPEC_VLD2))]
5064 "TARGET_NEON")
5065
5066 (define_insn "neon_vld2<mode>"
5067 [(set (match_operand:OI 0 "s_register_operand" "=w")
5068 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5069 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5070 UNSPEC_VLD2))]
5071 "TARGET_NEON"
5072 "vld2.<V_sz_elem>\t%h0, %A1"
5073 [(set_attr "type" "neon_load2_2reg_q")])
5074
5075 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5076 ;; here on big endian targets.
5077 (define_insn "neon_vld2_lane<mode>"
5078 [(set (match_operand:TI 0 "s_register_operand" "=w")
5079 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5080 (match_operand:TI 2 "s_register_operand" "0")
5081 (match_operand:SI 3 "immediate_operand" "i")
5082 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5083 UNSPEC_VLD2_LANE))]
5084 "TARGET_NEON"
5085 {
5086 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5087 int regno = REGNO (operands[0]);
5088 rtx ops[4];
5089 ops[0] = gen_rtx_REG (DImode, regno);
5090 ops[1] = gen_rtx_REG (DImode, regno + 2);
5091 ops[2] = operands[1];
5092 ops[3] = GEN_INT (lane);
5093 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5094 return "";
5095 }
5096 [(set_attr "type" "neon_load2_one_lane<q>")]
5097 )
5098
5099 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5100 ;; here on big endian targets.
5101 (define_insn "neon_vld2_lane<mode>"
5102 [(set (match_operand:OI 0 "s_register_operand" "=w")
5103 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5104 (match_operand:OI 2 "s_register_operand" "0")
5105 (match_operand:SI 3 "immediate_operand" "i")
5106 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5107 UNSPEC_VLD2_LANE))]
5108 "TARGET_NEON"
5109 {
5110 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5111 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5112 int regno = REGNO (operands[0]);
5113 rtx ops[4];
5114 if (lane >= max / 2)
5115 {
5116 lane -= max / 2;
5117 regno += 2;
5118 }
5119 ops[0] = gen_rtx_REG (DImode, regno);
5120 ops[1] = gen_rtx_REG (DImode, regno + 4);
5121 ops[2] = operands[1];
5122 ops[3] = GEN_INT (lane);
5123 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5124 return "";
5125 }
5126 [(set_attr "type" "neon_load2_one_lane<q>")]
5127 )
5128
5129 (define_insn "neon_vld2_dup<mode>"
5130 [(set (match_operand:TI 0 "s_register_operand" "=w")
5131 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5132 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5133 UNSPEC_VLD2_DUP))]
5134 "TARGET_NEON"
5135 {
5136 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5137 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5138 else
5139 return "vld1.<V_sz_elem>\t%h0, %A1";
5140 }
5141 [(set (attr "type")
5142 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5143 (const_string "neon_load2_all_lanes<q>")
5144 (const_string "neon_load1_1reg<q>")))]
5145 )
5146
5147 (define_expand "vec_store_lanesti<mode>"
5148 [(set (match_operand:TI 0 "neon_struct_operand")
5149 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5150 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5151 UNSPEC_VST2))]
5152 "TARGET_NEON")
5153
5154 (define_insn "neon_vst2<mode>"
5155 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5156 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5157 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5158 UNSPEC_VST2))]
5159 "TARGET_NEON"
5160 {
5161 if (<V_sz_elem> == 64)
5162 return "vst1.64\t%h1, %A0";
5163 else
5164 return "vst2.<V_sz_elem>\t%h1, %A0";
5165 }
5166 [(set (attr "type")
5167 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5168 (const_string "neon_store1_2reg<q>")
5169 (const_string "neon_store2_one_lane<q>")))]
5170 )
5171
5172 (define_expand "vec_store_lanesoi<mode>"
5173 [(set (match_operand:OI 0 "neon_struct_operand")
5174 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5175 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5176 UNSPEC_VST2))]
5177 "TARGET_NEON")
5178
5179 (define_insn "neon_vst2<mode>"
5180 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5181 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5182 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5183 UNSPEC_VST2))]
5184 "TARGET_NEON"
5185 "vst2.<V_sz_elem>\t%h1, %A0"
5186 [(set_attr "type" "neon_store2_4reg<q>")]
5187 )
5188
5189 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5190 ;; here on big endian targets.
5191 (define_insn "neon_vst2_lane<mode>"
5192 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5193 (unspec:<V_two_elem>
5194 [(match_operand:TI 1 "s_register_operand" "w")
5195 (match_operand:SI 2 "immediate_operand" "i")
5196 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5197 UNSPEC_VST2_LANE))]
5198 "TARGET_NEON"
5199 {
5200 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5201 int regno = REGNO (operands[1]);
5202 rtx ops[4];
5203 ops[0] = operands[0];
5204 ops[1] = gen_rtx_REG (DImode, regno);
5205 ops[2] = gen_rtx_REG (DImode, regno + 2);
5206 ops[3] = GEN_INT (lane);
5207 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5208 return "";
5209 }
5210 [(set_attr "type" "neon_store2_one_lane<q>")]
5211 )
5212
5213 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5214 ;; here on big endian targets.
5215 (define_insn "neon_vst2_lane<mode>"
5216 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5217 (unspec:<V_two_elem>
5218 [(match_operand:OI 1 "s_register_operand" "w")
5219 (match_operand:SI 2 "immediate_operand" "i")
5220 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5221 UNSPEC_VST2_LANE))]
5222 "TARGET_NEON"
5223 {
5224 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5225 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5226 int regno = REGNO (operands[1]);
5227 rtx ops[4];
5228 if (lane >= max / 2)
5229 {
5230 lane -= max / 2;
5231 regno += 2;
5232 }
5233 ops[0] = operands[0];
5234 ops[1] = gen_rtx_REG (DImode, regno);
5235 ops[2] = gen_rtx_REG (DImode, regno + 4);
5236 ops[3] = GEN_INT (lane);
5237 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5238 return "";
5239 }
5240 [(set_attr "type" "neon_store2_one_lane<q>")]
5241 )
5242
5243 (define_expand "vec_load_lanesei<mode>"
5244 [(set (match_operand:EI 0 "s_register_operand")
5245 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5246 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5247 UNSPEC_VLD3))]
5248 "TARGET_NEON")
5249
5250 (define_insn "neon_vld3<mode>"
5251 [(set (match_operand:EI 0 "s_register_operand" "=w")
5252 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5253 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254 UNSPEC_VLD3))]
5255 "TARGET_NEON"
5256 {
5257 if (<V_sz_elem> == 64)
5258 return "vld1.64\t%h0, %A1";
5259 else
5260 return "vld3.<V_sz_elem>\t%h0, %A1";
5261 }
5262 [(set (attr "type")
5263 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5264 (const_string "neon_load1_3reg<q>")
5265 (const_string "neon_load3_3reg<q>")))]
5266 )
5267
5268 (define_expand "vec_load_lanesci<mode>"
5269 [(match_operand:CI 0 "s_register_operand")
5270 (match_operand:CI 1 "neon_struct_operand")
5271 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5272 "TARGET_NEON"
5273 {
5274 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5275 DONE;
5276 })
5277
5278 (define_expand "neon_vld3<mode>"
5279 [(match_operand:CI 0 "s_register_operand")
5280 (match_operand:CI 1 "neon_struct_operand")
5281 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5282 "TARGET_NEON"
5283 {
5284 rtx mem;
5285
5286 mem = adjust_address (operands[1], EImode, 0);
5287 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5288 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5289 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5290 DONE;
5291 })
5292
5293 (define_insn "neon_vld3qa<mode>"
5294 [(set (match_operand:CI 0 "s_register_operand" "=w")
5295 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5296 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5297 UNSPEC_VLD3A))]
5298 "TARGET_NEON"
5299 {
5300 int regno = REGNO (operands[0]);
5301 rtx ops[4];
5302 ops[0] = gen_rtx_REG (DImode, regno);
5303 ops[1] = gen_rtx_REG (DImode, regno + 4);
5304 ops[2] = gen_rtx_REG (DImode, regno + 8);
5305 ops[3] = operands[1];
5306 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5307 return "";
5308 }
5309 [(set_attr "type" "neon_load3_3reg<q>")]
5310 )
5311
5312 (define_insn "neon_vld3qb<mode>"
5313 [(set (match_operand:CI 0 "s_register_operand" "=w")
5314 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5315 (match_operand:CI 2 "s_register_operand" "0")
5316 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5317 UNSPEC_VLD3B))]
5318 "TARGET_NEON"
5319 {
5320 int regno = REGNO (operands[0]);
5321 rtx ops[4];
5322 ops[0] = gen_rtx_REG (DImode, regno + 2);
5323 ops[1] = gen_rtx_REG (DImode, regno + 6);
5324 ops[2] = gen_rtx_REG (DImode, regno + 10);
5325 ops[3] = operands[1];
5326 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5327 return "";
5328 }
5329 [(set_attr "type" "neon_load3_3reg<q>")]
5330 )
5331
5332 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5333 ;; here on big endian targets.
5334 (define_insn "neon_vld3_lane<mode>"
5335 [(set (match_operand:EI 0 "s_register_operand" "=w")
5336 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5337 (match_operand:EI 2 "s_register_operand" "0")
5338 (match_operand:SI 3 "immediate_operand" "i")
5339 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5340 UNSPEC_VLD3_LANE))]
5341 "TARGET_NEON"
5342 {
5343 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5344 int regno = REGNO (operands[0]);
5345 rtx ops[5];
5346 ops[0] = gen_rtx_REG (DImode, regno);
5347 ops[1] = gen_rtx_REG (DImode, regno + 2);
5348 ops[2] = gen_rtx_REG (DImode, regno + 4);
5349 ops[3] = operands[1];
5350 ops[4] = GEN_INT (lane);
5351 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5352 ops);
5353 return "";
5354 }
5355 [(set_attr "type" "neon_load3_one_lane<q>")]
5356 )
5357
5358 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5359 ;; here on big endian targets.
5360 (define_insn "neon_vld3_lane<mode>"
5361 [(set (match_operand:CI 0 "s_register_operand" "=w")
5362 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5363 (match_operand:CI 2 "s_register_operand" "0")
5364 (match_operand:SI 3 "immediate_operand" "i")
5365 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5366 UNSPEC_VLD3_LANE))]
5367 "TARGET_NEON"
5368 {
5369 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5370 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5371 int regno = REGNO (operands[0]);
5372 rtx ops[5];
5373 if (lane >= max / 2)
5374 {
5375 lane -= max / 2;
5376 regno += 2;
5377 }
5378 ops[0] = gen_rtx_REG (DImode, regno);
5379 ops[1] = gen_rtx_REG (DImode, regno + 4);
5380 ops[2] = gen_rtx_REG (DImode, regno + 8);
5381 ops[3] = operands[1];
5382 ops[4] = GEN_INT (lane);
5383 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5384 ops);
5385 return "";
5386 }
5387 [(set_attr "type" "neon_load3_one_lane<q>")]
5388 )
5389
5390 (define_insn "neon_vld3_dup<mode>"
5391 [(set (match_operand:EI 0 "s_register_operand" "=w")
5392 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5393 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5394 UNSPEC_VLD3_DUP))]
5395 "TARGET_NEON"
5396 {
5397 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5398 {
5399 int regno = REGNO (operands[0]);
5400 rtx ops[4];
5401 ops[0] = gen_rtx_REG (DImode, regno);
5402 ops[1] = gen_rtx_REG (DImode, regno + 2);
5403 ops[2] = gen_rtx_REG (DImode, regno + 4);
5404 ops[3] = operands[1];
5405 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5406 return "";
5407 }
5408 else
5409 return "vld1.<V_sz_elem>\t%h0, %A1";
5410 }
5411 [(set (attr "type")
5412 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5413 (const_string "neon_load3_all_lanes<q>")
5414 (const_string "neon_load1_1reg<q>")))])
5415
5416 (define_expand "vec_store_lanesei<mode>"
5417 [(set (match_operand:EI 0 "neon_struct_operand")
5418 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5419 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5420 UNSPEC_VST3))]
5421 "TARGET_NEON")
5422
5423 (define_insn "neon_vst3<mode>"
5424 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5425 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5426 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5427 UNSPEC_VST3))]
5428 "TARGET_NEON"
5429 {
5430 if (<V_sz_elem> == 64)
5431 return "vst1.64\t%h1, %A0";
5432 else
5433 return "vst3.<V_sz_elem>\t%h1, %A0";
5434 }
5435 [(set (attr "type")
5436 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5437 (const_string "neon_store1_3reg<q>")
5438 (const_string "neon_store3_one_lane<q>")))])
5439
5440 (define_expand "vec_store_lanesci<mode>"
5441 [(match_operand:CI 0 "neon_struct_operand")
5442 (match_operand:CI 1 "s_register_operand")
5443 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444 "TARGET_NEON"
5445 {
5446 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5447 DONE;
5448 })
5449
5450 (define_expand "neon_vst3<mode>"
5451 [(match_operand:CI 0 "neon_struct_operand")
5452 (match_operand:CI 1 "s_register_operand")
5453 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5454 "TARGET_NEON"
5455 {
5456 rtx mem;
5457
5458 mem = adjust_address (operands[0], EImode, 0);
5459 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5460 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5461 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5462 DONE;
5463 })
5464
5465 (define_insn "neon_vst3qa<mode>"
5466 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5467 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5468 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5469 UNSPEC_VST3A))]
5470 "TARGET_NEON"
5471 {
5472 int regno = REGNO (operands[1]);
5473 rtx ops[4];
5474 ops[0] = operands[0];
5475 ops[1] = gen_rtx_REG (DImode, regno);
5476 ops[2] = gen_rtx_REG (DImode, regno + 4);
5477 ops[3] = gen_rtx_REG (DImode, regno + 8);
5478 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5479 return "";
5480 }
5481 [(set_attr "type" "neon_store3_3reg<q>")]
5482 )
5483
5484 (define_insn "neon_vst3qb<mode>"
5485 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5486 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5487 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488 UNSPEC_VST3B))]
5489 "TARGET_NEON"
5490 {
5491 int regno = REGNO (operands[1]);
5492 rtx ops[4];
5493 ops[0] = operands[0];
5494 ops[1] = gen_rtx_REG (DImode, regno + 2);
5495 ops[2] = gen_rtx_REG (DImode, regno + 6);
5496 ops[3] = gen_rtx_REG (DImode, regno + 10);
5497 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5498 return "";
5499 }
5500 [(set_attr "type" "neon_store3_3reg<q>")]
5501 )
5502
5503 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5504 ;; here on big endian targets.
5505 (define_insn "neon_vst3_lane<mode>"
5506 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5507 (unspec:<V_three_elem>
5508 [(match_operand:EI 1 "s_register_operand" "w")
5509 (match_operand:SI 2 "immediate_operand" "i")
5510 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5511 UNSPEC_VST3_LANE))]
5512 "TARGET_NEON"
5513 {
5514 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5515 int regno = REGNO (operands[1]);
5516 rtx ops[5];
5517 ops[0] = operands[0];
5518 ops[1] = gen_rtx_REG (DImode, regno);
5519 ops[2] = gen_rtx_REG (DImode, regno + 2);
5520 ops[3] = gen_rtx_REG (DImode, regno + 4);
5521 ops[4] = GEN_INT (lane);
5522 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5523 ops);
5524 return "";
5525 }
5526 [(set_attr "type" "neon_store3_one_lane<q>")]
5527 )
5528
5529 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5530 ;; here on big endian targets.
5531 (define_insn "neon_vst3_lane<mode>"
5532 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5533 (unspec:<V_three_elem>
5534 [(match_operand:CI 1 "s_register_operand" "w")
5535 (match_operand:SI 2 "immediate_operand" "i")
5536 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5537 UNSPEC_VST3_LANE))]
5538 "TARGET_NEON"
5539 {
5540 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5541 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5542 int regno = REGNO (operands[1]);
5543 rtx ops[5];
5544 if (lane >= max / 2)
5545 {
5546 lane -= max / 2;
5547 regno += 2;
5548 }
5549 ops[0] = operands[0];
5550 ops[1] = gen_rtx_REG (DImode, regno);
5551 ops[2] = gen_rtx_REG (DImode, regno + 4);
5552 ops[3] = gen_rtx_REG (DImode, regno + 8);
5553 ops[4] = GEN_INT (lane);
5554 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5555 ops);
5556 return "";
5557 }
5558 [(set_attr "type" "neon_store3_one_lane<q>")]
5559 )
5560
5561 (define_expand "vec_load_lanesoi<mode>"
5562 [(set (match_operand:OI 0 "s_register_operand")
5563 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5564 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5565 UNSPEC_VLD4))]
5566 "TARGET_NEON")
5567
5568 (define_insn "neon_vld4<mode>"
5569 [(set (match_operand:OI 0 "s_register_operand" "=w")
5570 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5571 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5572 UNSPEC_VLD4))]
5573 "TARGET_NEON"
5574 {
5575 if (<V_sz_elem> == 64)
5576 return "vld1.64\t%h0, %A1";
5577 else
5578 return "vld4.<V_sz_elem>\t%h0, %A1";
5579 }
5580 [(set (attr "type")
5581 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5582 (const_string "neon_load1_4reg<q>")
5583 (const_string "neon_load4_4reg<q>")))]
5584 )
5585
5586 (define_expand "vec_load_lanesxi<mode>"
5587 [(match_operand:XI 0 "s_register_operand")
5588 (match_operand:XI 1 "neon_struct_operand")
5589 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5590 "TARGET_NEON"
5591 {
5592 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5593 DONE;
5594 })
5595
5596 (define_expand "neon_vld4<mode>"
5597 [(match_operand:XI 0 "s_register_operand")
5598 (match_operand:XI 1 "neon_struct_operand")
5599 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5600 "TARGET_NEON"
5601 {
5602 rtx mem;
5603
5604 mem = adjust_address (operands[1], OImode, 0);
5605 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5606 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5607 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5608 DONE;
5609 })
5610
5611 (define_insn "neon_vld4qa<mode>"
5612 [(set (match_operand:XI 0 "s_register_operand" "=w")
5613 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5614 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5615 UNSPEC_VLD4A))]
5616 "TARGET_NEON"
5617 {
5618 int regno = REGNO (operands[0]);
5619 rtx ops[5];
5620 ops[0] = gen_rtx_REG (DImode, regno);
5621 ops[1] = gen_rtx_REG (DImode, regno + 4);
5622 ops[2] = gen_rtx_REG (DImode, regno + 8);
5623 ops[3] = gen_rtx_REG (DImode, regno + 12);
5624 ops[4] = operands[1];
5625 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5626 return "";
5627 }
5628 [(set_attr "type" "neon_load4_4reg<q>")]
5629 )
5630
5631 (define_insn "neon_vld4qb<mode>"
5632 [(set (match_operand:XI 0 "s_register_operand" "=w")
5633 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5634 (match_operand:XI 2 "s_register_operand" "0")
5635 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5636 UNSPEC_VLD4B))]
5637 "TARGET_NEON"
5638 {
5639 int regno = REGNO (operands[0]);
5640 rtx ops[5];
5641 ops[0] = gen_rtx_REG (DImode, regno + 2);
5642 ops[1] = gen_rtx_REG (DImode, regno + 6);
5643 ops[2] = gen_rtx_REG (DImode, regno + 10);
5644 ops[3] = gen_rtx_REG (DImode, regno + 14);
5645 ops[4] = operands[1];
5646 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5647 return "";
5648 }
5649 [(set_attr "type" "neon_load4_4reg<q>")]
5650 )
5651
5652 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5653 ;; here on big endian targets.
5654 (define_insn "neon_vld4_lane<mode>"
5655 [(set (match_operand:OI 0 "s_register_operand" "=w")
5656 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5657 (match_operand:OI 2 "s_register_operand" "0")
5658 (match_operand:SI 3 "immediate_operand" "i")
5659 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5660 UNSPEC_VLD4_LANE))]
5661 "TARGET_NEON"
5662 {
5663 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5664 int regno = REGNO (operands[0]);
5665 rtx ops[6];
5666 ops[0] = gen_rtx_REG (DImode, regno);
5667 ops[1] = gen_rtx_REG (DImode, regno + 2);
5668 ops[2] = gen_rtx_REG (DImode, regno + 4);
5669 ops[3] = gen_rtx_REG (DImode, regno + 6);
5670 ops[4] = operands[1];
5671 ops[5] = GEN_INT (lane);
5672 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5673 ops);
5674 return "";
5675 }
5676 [(set_attr "type" "neon_load4_one_lane<q>")]
5677 )
5678
5679 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5680 ;; here on big endian targets.
5681 (define_insn "neon_vld4_lane<mode>"
5682 [(set (match_operand:XI 0 "s_register_operand" "=w")
5683 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5684 (match_operand:XI 2 "s_register_operand" "0")
5685 (match_operand:SI 3 "immediate_operand" "i")
5686 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5687 UNSPEC_VLD4_LANE))]
5688 "TARGET_NEON"
5689 {
5690 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5691 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5692 int regno = REGNO (operands[0]);
5693 rtx ops[6];
5694 if (lane >= max / 2)
5695 {
5696 lane -= max / 2;
5697 regno += 2;
5698 }
5699 ops[0] = gen_rtx_REG (DImode, regno);
5700 ops[1] = gen_rtx_REG (DImode, regno + 4);
5701 ops[2] = gen_rtx_REG (DImode, regno + 8);
5702 ops[3] = gen_rtx_REG (DImode, regno + 12);
5703 ops[4] = operands[1];
5704 ops[5] = GEN_INT (lane);
5705 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5706 ops);
5707 return "";
5708 }
5709 [(set_attr "type" "neon_load4_one_lane<q>")]
5710 )
5711
5712 (define_insn "neon_vld4_dup<mode>"
5713 [(set (match_operand:OI 0 "s_register_operand" "=w")
5714 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5715 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5716 UNSPEC_VLD4_DUP))]
5717 "TARGET_NEON"
5718 {
5719 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5720 {
5721 int regno = REGNO (operands[0]);
5722 rtx ops[5];
5723 ops[0] = gen_rtx_REG (DImode, regno);
5724 ops[1] = gen_rtx_REG (DImode, regno + 2);
5725 ops[2] = gen_rtx_REG (DImode, regno + 4);
5726 ops[3] = gen_rtx_REG (DImode, regno + 6);
5727 ops[4] = operands[1];
5728 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5729 ops);
5730 return "";
5731 }
5732 else
5733 return "vld1.<V_sz_elem>\t%h0, %A1";
5734 }
5735 [(set (attr "type")
5736 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5737 (const_string "neon_load4_all_lanes<q>")
5738 (const_string "neon_load1_1reg<q>")))]
5739 )
5740
5741 (define_expand "vec_store_lanesoi<mode>"
5742 [(set (match_operand:OI 0 "neon_struct_operand")
5743 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5744 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5745 UNSPEC_VST4))]
5746 "TARGET_NEON")
5747
5748 (define_insn "neon_vst4<mode>"
5749 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5750 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5751 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5752 UNSPEC_VST4))]
5753 "TARGET_NEON"
5754 {
5755 if (<V_sz_elem> == 64)
5756 return "vst1.64\t%h1, %A0";
5757 else
5758 return "vst4.<V_sz_elem>\t%h1, %A0";
5759 }
5760 [(set (attr "type")
5761 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5762 (const_string "neon_store1_4reg<q>")
5763 (const_string "neon_store4_4reg<q>")))]
5764 )
5765
5766 (define_expand "vec_store_lanesxi<mode>"
5767 [(match_operand:XI 0 "neon_struct_operand")
5768 (match_operand:XI 1 "s_register_operand")
5769 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5770 "TARGET_NEON"
5771 {
5772 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5773 DONE;
5774 })
5775
5776 (define_expand "neon_vst4<mode>"
5777 [(match_operand:XI 0 "neon_struct_operand")
5778 (match_operand:XI 1 "s_register_operand")
5779 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5780 "TARGET_NEON"
5781 {
5782 rtx mem;
5783
5784 mem = adjust_address (operands[0], OImode, 0);
5785 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5786 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5787 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5788 DONE;
5789 })
5790
5791 (define_insn "neon_vst4qa<mode>"
5792 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5793 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5794 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5795 UNSPEC_VST4A))]
5796 "TARGET_NEON"
5797 {
5798 int regno = REGNO (operands[1]);
5799 rtx ops[5];
5800 ops[0] = operands[0];
5801 ops[1] = gen_rtx_REG (DImode, regno);
5802 ops[2] = gen_rtx_REG (DImode, regno + 4);
5803 ops[3] = gen_rtx_REG (DImode, regno + 8);
5804 ops[4] = gen_rtx_REG (DImode, regno + 12);
5805 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5806 return "";
5807 }
5808 [(set_attr "type" "neon_store4_4reg<q>")]
5809 )
5810
5811 (define_insn "neon_vst4qb<mode>"
5812 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5813 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5814 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5815 UNSPEC_VST4B))]
5816 "TARGET_NEON"
5817 {
5818 int regno = REGNO (operands[1]);
5819 rtx ops[5];
5820 ops[0] = operands[0];
5821 ops[1] = gen_rtx_REG (DImode, regno + 2);
5822 ops[2] = gen_rtx_REG (DImode, regno + 6);
5823 ops[3] = gen_rtx_REG (DImode, regno + 10);
5824 ops[4] = gen_rtx_REG (DImode, regno + 14);
5825 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5826 return "";
5827 }
5828 [(set_attr "type" "neon_store4_4reg<q>")]
5829 )
5830
5831 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5832 ;; here on big endian targets.
5833 (define_insn "neon_vst4_lane<mode>"
5834 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5835 (unspec:<V_four_elem>
5836 [(match_operand:OI 1 "s_register_operand" "w")
5837 (match_operand:SI 2 "immediate_operand" "i")
5838 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5839 UNSPEC_VST4_LANE))]
5840 "TARGET_NEON"
5841 {
5842 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5843 int regno = REGNO (operands[1]);
5844 rtx ops[6];
5845 ops[0] = operands[0];
5846 ops[1] = gen_rtx_REG (DImode, regno);
5847 ops[2] = gen_rtx_REG (DImode, regno + 2);
5848 ops[3] = gen_rtx_REG (DImode, regno + 4);
5849 ops[4] = gen_rtx_REG (DImode, regno + 6);
5850 ops[5] = GEN_INT (lane);
5851 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5852 ops);
5853 return "";
5854 }
5855 [(set_attr "type" "neon_store4_one_lane<q>")]
5856 )
5857
5858 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5859 ;; here on big endian targets.
5860 (define_insn "neon_vst4_lane<mode>"
5861 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5862 (unspec:<V_four_elem>
5863 [(match_operand:XI 1 "s_register_operand" "w")
5864 (match_operand:SI 2 "immediate_operand" "i")
5865 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5866 UNSPEC_VST4_LANE))]
5867 "TARGET_NEON"
5868 {
5869 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5870 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5871 int regno = REGNO (operands[1]);
5872 rtx ops[6];
5873 if (lane >= max / 2)
5874 {
5875 lane -= max / 2;
5876 regno += 2;
5877 }
5878 ops[0] = operands[0];
5879 ops[1] = gen_rtx_REG (DImode, regno);
5880 ops[2] = gen_rtx_REG (DImode, regno + 4);
5881 ops[3] = gen_rtx_REG (DImode, regno + 8);
5882 ops[4] = gen_rtx_REG (DImode, regno + 12);
5883 ops[5] = GEN_INT (lane);
5884 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5885 ops);
5886 return "";
5887 }
5888 [(set_attr "type" "neon_store4_4reg<q>")]
5889 )
5890
5891 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5892 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5893 (SE:<V_unpack> (vec_select:<V_HALF>
5894 (match_operand:VU 1 "register_operand" "w")
5895 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5896 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5897 "vmovl.<US><V_sz_elem> %q0, %e1"
5898 [(set_attr "type" "neon_shift_imm_long")]
5899 )
5900
5901 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5902 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5903 (SE:<V_unpack> (vec_select:<V_HALF>
5904 (match_operand:VU 1 "register_operand" "w")
5905 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5906 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5907 "vmovl.<US><V_sz_elem> %q0, %f1"
5908 [(set_attr "type" "neon_shift_imm_long")]
5909 )
5910
5911 (define_expand "vec_unpack<US>_hi_<mode>"
5912 [(match_operand:<V_unpack> 0 "register_operand" "")
5913 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5914 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5915 {
5916 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5917 rtx t1;
5918 int i;
5919 for (i = 0; i < (<V_mode_nunits>/2); i++)
5920 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5921
5922 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5923 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5924 operands[1],
5925 t1));
5926 DONE;
5927 }
5928 )
5929
5930 (define_expand "vec_unpack<US>_lo_<mode>"
5931 [(match_operand:<V_unpack> 0 "register_operand" "")
5932 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5933 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5934 {
5935 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5936 rtx t1;
5937 int i;
5938 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5939 RTVEC_ELT (v, i) = GEN_INT (i);
5940 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5941 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5942 operands[1],
5943 t1));
5944 DONE;
5945 }
5946 )
5947
5948 (define_insn "neon_vec_<US>mult_lo_<mode>"
5949 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5950 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5951 (match_operand:VU 1 "register_operand" "w")
5952 (match_operand:VU 2 "vect_par_constant_low" "")))
5953 (SE:<V_unpack> (vec_select:<V_HALF>
5954 (match_operand:VU 3 "register_operand" "w")
5955 (match_dup 2)))))]
5956 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5957 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5958 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5959 )
5960
5961 (define_expand "vec_widen_<US>mult_lo_<mode>"
5962 [(match_operand:<V_unpack> 0 "register_operand" "")
5963 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5964 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5965 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5966 {
5967 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5968 rtx t1;
5969 int i;
5970 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5971 RTVEC_ELT (v, i) = GEN_INT (i);
5972 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5973
5974 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5975 operands[1],
5976 t1,
5977 operands[2]));
5978 DONE;
5979 }
5980 )
5981
5982 (define_insn "neon_vec_<US>mult_hi_<mode>"
5983 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5984 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5985 (match_operand:VU 1 "register_operand" "w")
5986 (match_operand:VU 2 "vect_par_constant_high" "")))
5987 (SE:<V_unpack> (vec_select:<V_HALF>
5988 (match_operand:VU 3 "register_operand" "w")
5989 (match_dup 2)))))]
5990 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5991 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5992 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5993 )
5994
5995 (define_expand "vec_widen_<US>mult_hi_<mode>"
5996 [(match_operand:<V_unpack> 0 "register_operand" "")
5997 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5998 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5999 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6000 {
6001 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6002 rtx t1;
6003 int i;
6004 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6005 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6006 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6007
6008 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6009 operands[1],
6010 t1,
6011 operands[2]));
6012 DONE;
6013
6014 }
6015 )
6016
6017 (define_insn "neon_vec_<US>shiftl_<mode>"
6018 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6019 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6020 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6021 "TARGET_NEON"
6022 {
6023 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6024 }
6025 [(set_attr "type" "neon_shift_imm_long")]
6026 )
6027
6028 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6029 [(match_operand:<V_unpack> 0 "register_operand" "")
6030 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6031 (match_operand:SI 2 "immediate_operand" "i")]
6032 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6033 {
6034 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6035 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6036 operands[2]));
6037 DONE;
6038 }
6039 )
6040
6041 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6042 [(match_operand:<V_unpack> 0 "register_operand" "")
6043 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6044 (match_operand:SI 2 "immediate_operand" "i")]
6045 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6046 {
6047 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6048 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6049 GET_MODE_SIZE (<V_HALF>mode)),
6050 operands[2]));
6051 DONE;
6052 }
6053 )
6054
6055 ;; Vectorize for non-neon-quad case
6056 (define_insn "neon_unpack<US>_<mode>"
6057 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6058 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6059 "TARGET_NEON"
6060 "vmovl.<US><V_sz_elem> %q0, %P1"
6061 [(set_attr "type" "neon_move")]
6062 )
6063
6064 (define_expand "vec_unpack<US>_lo_<mode>"
6065 [(match_operand:<V_double_width> 0 "register_operand" "")
6066 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6067 "TARGET_NEON"
6068 {
6069 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6070 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6071 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6072
6073 DONE;
6074 }
6075 )
6076
6077 (define_expand "vec_unpack<US>_hi_<mode>"
6078 [(match_operand:<V_double_width> 0 "register_operand" "")
6079 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6080 "TARGET_NEON"
6081 {
6082 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6083 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6084 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6085
6086 DONE;
6087 }
6088 )
6089
6090 (define_insn "neon_vec_<US>mult_<mode>"
6091 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6092 (mult:<V_widen> (SE:<V_widen>
6093 (match_operand:VDI 1 "register_operand" "w"))
6094 (SE:<V_widen>
6095 (match_operand:VDI 2 "register_operand" "w"))))]
6096 "TARGET_NEON"
6097 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6098 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6099 )
6100
6101 (define_expand "vec_widen_<US>mult_hi_<mode>"
6102 [(match_operand:<V_double_width> 0 "register_operand" "")
6103 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6104 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6105 "TARGET_NEON"
6106 {
6107 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6108 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6109 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6110
6111 DONE;
6112
6113 }
6114 )
6115
6116 (define_expand "vec_widen_<US>mult_lo_<mode>"
6117 [(match_operand:<V_double_width> 0 "register_operand" "")
6118 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6119 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6120 "TARGET_NEON"
6121 {
6122 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6123 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6124 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6125
6126 DONE;
6127
6128 }
6129 )
6130
6131 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6132 [(match_operand:<V_double_width> 0 "register_operand" "")
6133 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6134 (match_operand:SI 2 "immediate_operand" "i")]
6135 "TARGET_NEON"
6136 {
6137 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6138 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6139 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6140
6141 DONE;
6142 }
6143 )
6144
6145 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6146 [(match_operand:<V_double_width> 0 "register_operand" "")
6147 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6148 (match_operand:SI 2 "immediate_operand" "i")]
6149 "TARGET_NEON"
6150 {
6151 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6152 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6153 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6154
6155 DONE;
6156 }
6157 )
6158
6159 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6160 ; because the ordering of vector elements in Q registers is different from what
6161 ; the semantics of the instructions require.
6162
6163 (define_insn "vec_pack_trunc_<mode>"
6164 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6165 (vec_concat:<V_narrow_pack>
6166 (truncate:<V_narrow>
6167 (match_operand:VN 1 "register_operand" "w"))
6168 (truncate:<V_narrow>
6169 (match_operand:VN 2 "register_operand" "w"))))]
6170 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6171 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6172 [(set_attr "type" "multiple")
6173 (set_attr "length" "8")]
6174 )
6175
6176 ;; For the non-quad case.
6177 (define_insn "neon_vec_pack_trunc_<mode>"
6178 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6179 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6180 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6181 "vmovn.i<V_sz_elem>\t%P0, %q1"
6182 [(set_attr "type" "neon_move_narrow_q")]
6183 )
6184
6185 (define_expand "vec_pack_trunc_<mode>"
6186 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6187 (match_operand:VSHFT 1 "register_operand" "")
6188 (match_operand:VSHFT 2 "register_operand")]
6189 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6190 {
6191 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6192
6193 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6194 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6195 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6196 DONE;
6197 })
6198
6199 (define_insn "neon_vabd<mode>_2"
6200 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6201 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6202 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6203 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6204 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6205 [(set (attr "type")
6206 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6207 (const_string "neon_fp_abd_s<q>")
6208 (const_string "neon_abd<q>")))]
6209 )
6210
6211 (define_insn "neon_vabd<mode>_3"
6212 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6213 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6214 (match_operand:VDQ 2 "s_register_operand" "w")]
6215 UNSPEC_VSUB)))]
6216 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6217 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6218 [(set (attr "type")
6219 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6220 (const_string "neon_fp_abd_s<q>")
6221 (const_string "neon_abd<q>")))]
6222 )
6223
6224 ;; Copy from core-to-neon regs, then extend, not vice-versa
6225
6226 (define_split
6227 [(set (match_operand:DI 0 "s_register_operand" "")
6228 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6229 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6230 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6231 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6232 {
6233 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6234 })
6235
6236 (define_split
6237 [(set (match_operand:DI 0 "s_register_operand" "")
6238 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6239 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6240 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6241 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6242 {
6243 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6244 })
6245
6246 (define_split
6247 [(set (match_operand:DI 0 "s_register_operand" "")
6248 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6249 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6250 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6251 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6252 {
6253 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6254 })
6255
6256 (define_split
6257 [(set (match_operand:DI 0 "s_register_operand" "")
6258 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6259 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6260 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6261 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6262 {
6263 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6264 })
6265
6266 (define_split
6267 [(set (match_operand:DI 0 "s_register_operand" "")
6268 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6269 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6270 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6271 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6272 {
6273 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6274 })
6275
6276 (define_split
6277 [(set (match_operand:DI 0 "s_register_operand" "")
6278 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6279 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6280 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6281 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6282 {
6283 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6284 })