With -fpu=neon DI mode shifts are expanded after reload.
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
144 {
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ())
149 {
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 }
168 })
169
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
173 "TARGET_NEON
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
176 {
177 switch (which_alternative)
178 {
179 case 0: return "#";
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
182 }
183 }
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187 (define_split
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
193 {
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
196 rtx dest[2], src[2];
197
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203 neon_disambiguate_copy (operands, dest, src, 2);
204 })
205
206 (define_split
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
212 {
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
215 rtx dest[2], src[2];
216
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222 neon_disambiguate_copy (operands, dest, src, 2);
223 })
224
225 (define_split
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
232 {
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
235 rtx dest[3], src[3];
236
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244 neon_disambiguate_copy (operands, dest, src, 3);
245 })
246
247 (define_split
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
255 {
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
258 rtx dest[4], src[4];
259
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269 neon_disambiguate_copy (operands, dest, src, 4);
270 })
271
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277 {
278 rtx adjust_mem;
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
285
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
288 else
289 adjust_mem = operands[0];
290
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295 })
296
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308 " Um")]
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
313
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
321
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325 " Um")]
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
330
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333 (vec_merge:VD_LANE
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
338 "TARGET_NEON"
339 {
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
344
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347 else
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349 }
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354 (vec_merge:VQ2
355 (vec_duplicate:VQ2
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
359 "TARGET_NEON"
360 {
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
366
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
369
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
372
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375 else
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377 }
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379 )
380
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383 (vec_merge:V2DI
384 (vec_duplicate:V2DI
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
388 "TARGET_NEON"
389 {
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
392
393 operands[0] = gen_rtx_REG (DImode, regno);
394
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
397 else
398 return "vmov\t%P0, %Q1, %R1";
399 }
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401 )
402
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
407 "TARGET_NEON"
408 {
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
412 DONE;
413 })
414
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (vec_select:<V_elem>
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420 "TARGET_NEON"
421 {
422 if (BYTES_BIG_ENDIAN)
423 {
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
427 }
428
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 else
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 }
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435 )
436
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439 (vec_select:<V_elem>
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442 "TARGET_NEON"
443 {
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
448
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
451
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
454
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457 else
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459 }
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461 )
462
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465 (vec_select:DI
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468 "TARGET_NEON"
469 {
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472 operands[1] = gen_rtx_REG (DImode, regno);
473
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
476 else
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
478 }
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480 )
481
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
485 "TARGET_NEON"
486 {
487 neon_expand_vector_init (operands[0], operands[1]);
488 DONE;
489 })
490
491 ;; Doubleword and quadword arithmetic.
492
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
495
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502 [(set (attr "type")
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
506 )
507
508 (define_insn "add<mode>3_fp16"
509 [(set
510 (match_operand:VH 0 "s_register_operand" "=w")
511 (plus:VH
512 (match_operand:VH 1 "s_register_operand" "w")
513 (match_operand:VH 2 "s_register_operand" "w")))]
514 "TARGET_NEON_FP16INST"
515 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
516 [(set (attr "type")
517 (if_then_else (match_test "<Is_float_mode>")
518 (const_string "neon_fp_addsub_s<q>")
519 (const_string "neon_add<q>")))]
520 )
521
522 (define_insn "adddi3_neon"
523 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
524 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
525 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
526 (clobber (reg:CC CC_REGNUM))]
527 "TARGET_NEON"
528 {
529 switch (which_alternative)
530 {
531 case 0: /* fall through */
532 case 3: return "vadd.i64\t%P0, %P1, %P2";
533 case 1: return "#";
534 case 2: return "#";
535 case 4: return "#";
536 case 5: return "#";
537 case 6: return "#";
538 default: gcc_unreachable ();
539 }
540 }
541 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
542 multiple,multiple,multiple")
543 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
544 (set_attr "length" "*,8,8,*,8,8,8")
545 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
546 )
547
548 (define_insn "*sub<mode>3_neon"
549 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
550 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
551 (match_operand:VDQ 2 "s_register_operand" "w")))]
552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
553 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
554 [(set (attr "type")
555 (if_then_else (match_test "<Is_float_mode>")
556 (const_string "neon_fp_addsub_s<q>")
557 (const_string "neon_sub<q>")))]
558 )
559
560 (define_insn "sub<mode>3_fp16"
561 [(set
562 (match_operand:VH 0 "s_register_operand" "=w")
563 (minus:VH
564 (match_operand:VH 1 "s_register_operand" "w")
565 (match_operand:VH 2 "s_register_operand" "w")))]
566 "TARGET_NEON_FP16INST"
567 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
568 [(set_attr "type" "neon_sub<q>")]
569 )
570
571 (define_insn "subdi3_neon"
572 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
573 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
574 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
575 (clobber (reg:CC CC_REGNUM))]
576 "TARGET_NEON"
577 {
578 switch (which_alternative)
579 {
580 case 0: /* fall through */
581 case 4: return "vsub.i64\t%P0, %P1, %P2";
582 case 1: /* fall through */
583 case 2: /* fall through */
584 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
585 default: gcc_unreachable ();
586 }
587 }
588 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
589 (set_attr "conds" "*,clob,clob,clob,*")
590 (set_attr "length" "*,8,8,8,*")
591 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
592 )
593
594 (define_insn "*mul<mode>3_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
597 (match_operand:VDQW 2 "s_register_operand" "w")))]
598 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
599 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
600 [(set (attr "type")
601 (if_then_else (match_test "<Is_float_mode>")
602 (const_string "neon_fp_mul_s<q>")
603 (const_string "neon_mul_<V_elem_ch><q>")))]
604 )
605
606 (define_insn "mul<mode>3add<mode>_neon"
607 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
608 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
609 (match_operand:VDQW 3 "s_register_operand" "w"))
610 (match_operand:VDQW 1 "s_register_operand" "0")))]
611 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
612 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
613 [(set (attr "type")
614 (if_then_else (match_test "<Is_float_mode>")
615 (const_string "neon_fp_mla_s<q>")
616 (const_string "neon_mla_<V_elem_ch><q>")))]
617 )
618
619 (define_insn "mul<mode>3add<mode>_neon"
620 [(set (match_operand:VH 0 "s_register_operand" "=w")
621 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
622 (match_operand:VH 3 "s_register_operand" "w"))
623 (match_operand:VH 1 "s_register_operand" "0")))]
624 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
625 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
626 [(set_attr "type" "neon_fp_mla_s<q>")]
627 )
628
629 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
630 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
631 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
632 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
633 (match_operand:VDQW 3 "s_register_operand" "w"))))]
634 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
635 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
636 [(set (attr "type")
637 (if_then_else (match_test "<Is_float_mode>")
638 (const_string "neon_fp_mla_s<q>")
639 (const_string "neon_mla_<V_elem_ch><q>")))]
640 )
641
642 ;; Fused multiply-accumulate
643 ;; We define each insn twice here:
644 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
645 ;; to be able to use when converting to FMA.
646 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
647 (define_insn "fma<VCVTF:mode>4"
648 [(set (match_operand:VCVTF 0 "register_operand" "=w")
649 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
650 (match_operand:VCVTF 2 "register_operand" "w")
651 (match_operand:VCVTF 3 "register_operand" "0")))]
652 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
653 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
655 )
656
657 (define_insn "fma<VCVTF:mode>4_intrinsic"
658 [(set (match_operand:VCVTF 0 "register_operand" "=w")
659 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
660 (match_operand:VCVTF 2 "register_operand" "w")
661 (match_operand:VCVTF 3 "register_operand" "0")))]
662 "TARGET_NEON && TARGET_FMA"
663 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
664 [(set_attr "type" "neon_fp_mla_s<q>")]
665 )
666
667 ;; There is limited support for unsafe-math optimizations using the NEON FP16
668 ;; arithmetic instructions, so only the intrinsic is currently supported.
669 (define_insn "fma<VH:mode>4_intrinsic"
670 [(set (match_operand:VH 0 "register_operand" "=w")
671 (fma:VH
672 (match_operand:VH 1 "register_operand" "w")
673 (match_operand:VH 2 "register_operand" "w")
674 (match_operand:VH 3 "register_operand" "0")))]
675 "TARGET_NEON_FP16INST"
676 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
678 )
679
680 (define_insn "*fmsub<VCVTF:mode>4"
681 [(set (match_operand:VCVTF 0 "register_operand" "=w")
682 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
683 (match_operand:VCVTF 2 "register_operand" "w")
684 (match_operand:VCVTF 3 "register_operand" "0")))]
685 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
686 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
687 [(set_attr "type" "neon_fp_mla_s<q>")]
688 )
689
690 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
691 [(set (match_operand:VCVTF 0 "register_operand" "=w")
692 (fma:VCVTF
693 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
694 (match_operand:VCVTF 2 "register_operand" "w")
695 (match_operand:VCVTF 3 "register_operand" "0")))]
696 "TARGET_NEON && TARGET_FMA"
697 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
698 [(set_attr "type" "neon_fp_mla_s<q>")]
699 )
700
701 (define_insn "fmsub<VH:mode>4_intrinsic"
702 [(set (match_operand:VH 0 "register_operand" "=w")
703 (fma:VH
704 (neg:VH (match_operand:VH 1 "register_operand" "w"))
705 (match_operand:VH 2 "register_operand" "w")
706 (match_operand:VH 3 "register_operand" "0")))]
707 "TARGET_NEON_FP16INST"
708 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
709 [(set_attr "type" "neon_fp_mla_s<q>")]
710 )
711
712 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
713 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
714 (unspec:VCVTF [(match_operand:VCVTF 1
715 "s_register_operand" "w")]
716 NEON_VRINT))]
717 "TARGET_NEON && TARGET_FPU_ARMV8"
718 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
719 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
720 )
721
722 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
723 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
724 (FIXUORS:<V_cmp_result> (unspec:VCVTF
725 [(match_operand:VCVTF 1 "register_operand" "w")]
726 NEON_VCVT)))]
727 "TARGET_NEON && TARGET_FPU_ARMV8"
728 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
729 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
730 (set_attr "predicable" "no")]
731 )
732
733 (define_insn "ior<mode>3"
734 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
735 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
736 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
737 "TARGET_NEON"
738 {
739 switch (which_alternative)
740 {
741 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
742 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
743 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
744 default: gcc_unreachable ();
745 }
746 }
747 [(set_attr "type" "neon_logic<q>")]
748 )
749
750 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
751 ;; vorr. We support the pseudo-instruction vand instead, because that
752 ;; corresponds to the canonical form the middle-end expects to use for
753 ;; immediate bitwise-ANDs.
754
755 (define_insn "and<mode>3"
756 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
757 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
758 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
759 "TARGET_NEON"
760 {
761 switch (which_alternative)
762 {
763 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
764 case 1: return neon_output_logic_immediate ("vand", &operands[2],
765 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
766 default: gcc_unreachable ();
767 }
768 }
769 [(set_attr "type" "neon_logic<q>")]
770 )
771
772 (define_insn "orn<mode>3_neon"
773 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
774 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
775 (match_operand:VDQ 1 "s_register_operand" "w")))]
776 "TARGET_NEON"
777 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
778 [(set_attr "type" "neon_logic<q>")]
779 )
780
781 ;; TODO: investigate whether we should disable
782 ;; this and bicdi3_neon for the A8 in line with the other
783 ;; changes above.
784 (define_insn_and_split "orndi3_neon"
785 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
786 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
787 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
788 "TARGET_NEON"
789 "@
790 vorn\t%P0, %P1, %P2
791 #
792 #
793 #"
794 "reload_completed &&
795 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
796 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
797 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
798 "
799 {
800 if (TARGET_THUMB2)
801 {
802 operands[3] = gen_highpart (SImode, operands[0]);
803 operands[0] = gen_lowpart (SImode, operands[0]);
804 operands[4] = gen_highpart (SImode, operands[2]);
805 operands[2] = gen_lowpart (SImode, operands[2]);
806 operands[5] = gen_highpart (SImode, operands[1]);
807 operands[1] = gen_lowpart (SImode, operands[1]);
808 }
809 else
810 {
811 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
812 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
813 DONE;
814 }
815 }"
816 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
817 (set_attr "length" "*,16,8,8")
818 (set_attr "arch" "any,a,t2,t2")]
819 )
820
821 (define_insn "bic<mode>3_neon"
822 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
823 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
824 (match_operand:VDQ 1 "s_register_operand" "w")))]
825 "TARGET_NEON"
826 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
827 [(set_attr "type" "neon_logic<q>")]
828 )
829
830 ;; Compare to *anddi_notdi_di.
831 (define_insn "bicdi3_neon"
832 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
833 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
834 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
835 "TARGET_NEON"
836 "@
837 vbic\t%P0, %P1, %P2
838 #
839 #"
840 [(set_attr "type" "neon_logic,multiple,multiple")
841 (set_attr "length" "*,8,8")]
842 )
843
844 (define_insn "xor<mode>3"
845 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
846 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
847 (match_operand:VDQ 2 "s_register_operand" "w")))]
848 "TARGET_NEON"
849 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
850 [(set_attr "type" "neon_logic<q>")]
851 )
852
853 (define_insn "one_cmpl<mode>2"
854 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
855 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
856 "TARGET_NEON"
857 "vmvn\t%<V_reg>0, %<V_reg>1"
858 [(set_attr "type" "neon_move<q>")]
859 )
860
861 (define_insn "abs<mode>2"
862 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
863 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
864 "TARGET_NEON"
865 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
866 [(set (attr "type")
867 (if_then_else (match_test "<Is_float_mode>")
868 (const_string "neon_fp_abs_s<q>")
869 (const_string "neon_abs<q>")))]
870 )
871
872 (define_insn "neg<mode>2"
873 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
874 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
875 "TARGET_NEON"
876 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
877 [(set (attr "type")
878 (if_then_else (match_test "<Is_float_mode>")
879 (const_string "neon_fp_neg_s<q>")
880 (const_string "neon_neg<q>")))]
881 )
882
883 (define_insn "negdi2_neon"
884 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
885 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
886 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
887 (clobber (reg:CC CC_REGNUM))]
888 "TARGET_NEON"
889 "#"
890 [(set_attr "length" "8")
891 (set_attr "type" "multiple")]
892 )
893
894 ; Split negdi2_neon for vfp registers
895 (define_split
896 [(set (match_operand:DI 0 "s_register_operand" "")
897 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
898 (clobber (match_scratch:DI 2 ""))
899 (clobber (reg:CC CC_REGNUM))]
900 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
901 [(set (match_dup 2) (const_int 0))
902 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
903 (clobber (reg:CC CC_REGNUM))])]
904 {
905 if (!REG_P (operands[2]))
906 operands[2] = operands[0];
907 }
908 )
909
910 ; Split negdi2_neon for core registers
911 (define_split
912 [(set (match_operand:DI 0 "s_register_operand" "")
913 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
914 (clobber (match_scratch:DI 2 ""))
915 (clobber (reg:CC CC_REGNUM))]
916 "TARGET_32BIT && reload_completed
917 && arm_general_register_operand (operands[0], DImode)"
918 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
919 (clobber (reg:CC CC_REGNUM))])]
920 ""
921 )
922
923 (define_insn "<absneg_str><mode>2"
924 [(set (match_operand:VH 0 "s_register_operand" "=w")
925 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
926 "TARGET_NEON_FP16INST"
927 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
928 [(set_attr "type" "neon_abs<q>")]
929 )
930
931 (define_expand "neon_v<absneg_str><mode>"
932 [(set
933 (match_operand:VH 0 "s_register_operand")
934 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
935 "TARGET_NEON_FP16INST"
936 {
937 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
938 DONE;
939 })
940
941 (define_insn "neon_v<fp16_rnd_str><mode>"
942 [(set (match_operand:VH 0 "s_register_operand" "=w")
943 (unspec:VH
944 [(match_operand:VH 1 "s_register_operand" "w")]
945 FP16_RND))]
946 "TARGET_NEON_FP16INST"
947 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
948 [(set_attr "type" "neon_fp_round_s<q>")]
949 )
950
951 (define_insn "neon_vrsqrte<mode>"
952 [(set (match_operand:VH 0 "s_register_operand" "=w")
953 (unspec:VH
954 [(match_operand:VH 1 "s_register_operand" "w")]
955 UNSPEC_VRSQRTE))]
956 "TARGET_NEON_FP16INST"
957 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
958 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
959 )
960
961 (define_insn "*umin<mode>3_neon"
962 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
963 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
964 (match_operand:VDQIW 2 "s_register_operand" "w")))]
965 "TARGET_NEON"
966 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
967 [(set_attr "type" "neon_minmax<q>")]
968 )
969
970 (define_insn "*umax<mode>3_neon"
971 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
972 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
973 (match_operand:VDQIW 2 "s_register_operand" "w")))]
974 "TARGET_NEON"
975 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
976 [(set_attr "type" "neon_minmax<q>")]
977 )
978
979 (define_insn "*smin<mode>3_neon"
980 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
981 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
982 (match_operand:VDQW 2 "s_register_operand" "w")))]
983 "TARGET_NEON"
984 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
985 [(set (attr "type")
986 (if_then_else (match_test "<Is_float_mode>")
987 (const_string "neon_fp_minmax_s<q>")
988 (const_string "neon_minmax<q>")))]
989 )
990
991 (define_insn "*smax<mode>3_neon"
992 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
993 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
994 (match_operand:VDQW 2 "s_register_operand" "w")))]
995 "TARGET_NEON"
996 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
997 [(set (attr "type")
998 (if_then_else (match_test "<Is_float_mode>")
999 (const_string "neon_fp_minmax_s<q>")
1000 (const_string "neon_minmax<q>")))]
1001 )
1002
1003 ; TODO: V2DI shifts are current disabled because there are bugs in the
1004 ; generic vectorizer code. It ends up creating a V2DI constructor with
1005 ; SImode elements.
1006
1007 (define_insn "vashl<mode>3"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1009 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1010 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1011 "TARGET_NEON"
1012 {
1013 switch (which_alternative)
1014 {
1015 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1016 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1017 <MODE>mode,
1018 VALID_NEON_QREG_MODE (<MODE>mode),
1019 true);
1020 default: gcc_unreachable ();
1021 }
1022 }
1023 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1024 )
1025
1026 (define_insn "vashr<mode>3_imm"
1027 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1028 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1029 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1030 "TARGET_NEON"
1031 {
1032 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1033 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1034 false);
1035 }
1036 [(set_attr "type" "neon_shift_imm<q>")]
1037 )
1038
1039 (define_insn "vlshr<mode>3_imm"
1040 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1041 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1042 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1043 "TARGET_NEON"
1044 {
1045 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1046 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1047 false);
1048 }
1049 [(set_attr "type" "neon_shift_imm<q>")]
1050 )
1051
1052 ; Used for implementing logical shift-right, which is a left-shift by a negative
1053 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1054 ; above, but using an unspec in case GCC tries anything tricky with negative
1055 ; shift amounts.
1056
1057 (define_insn "ashl<mode>3_signed"
1058 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1059 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1060 (match_operand:VDQI 2 "s_register_operand" "w")]
1061 UNSPEC_ASHIFT_SIGNED))]
1062 "TARGET_NEON"
1063 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1064 [(set_attr "type" "neon_shift_reg<q>")]
1065 )
1066
1067 ; Used for implementing logical shift-right, which is a left-shift by a negative
1068 ; amount, with unsigned operands.
1069
1070 (define_insn "ashl<mode>3_unsigned"
1071 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1072 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1073 (match_operand:VDQI 2 "s_register_operand" "w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1075 "TARGET_NEON"
1076 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1077 [(set_attr "type" "neon_shift_reg<q>")]
1078 )
1079
1080 (define_expand "vashr<mode>3"
1081 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1082 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1083 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1084 "TARGET_NEON"
1085 {
1086 if (s_register_operand (operands[2], <MODE>mode))
1087 {
1088 rtx neg = gen_reg_rtx (<MODE>mode);
1089 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1090 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1091 }
1092 else
1093 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1094 DONE;
1095 })
1096
1097 (define_expand "vlshr<mode>3"
1098 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1099 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1100 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1101 "TARGET_NEON"
1102 {
1103 if (s_register_operand (operands[2], <MODE>mode))
1104 {
1105 rtx neg = gen_reg_rtx (<MODE>mode);
1106 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1107 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1108 }
1109 else
1110 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1111 DONE;
1112 })
1113
1114 ;; 64-bit shifts
1115
1116 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1117 ;; leaving the upper half uninitalized. This is OK since the shift
1118 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1119 ;; data flow analysis however, we pretend the full register is set
1120 ;; using an unspec.
1121 (define_insn "neon_load_count"
1122 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1123 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1124 UNSPEC_LOAD_COUNT))]
1125 "TARGET_NEON"
1126 "@
1127 vld1.32\t{%P0[0]}, %A1
1128 vmov.32\t%P0[0], %1"
1129 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1130 )
1131
1132 (define_insn "ashldi3_neon_noclobber"
1133 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1134 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1135 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1136 "TARGET_NEON && reload_completed
1137 && (!CONST_INT_P (operands[2])
1138 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1139 "@
1140 vshl.u64\t%P0, %P1, %2
1141 vshl.u64\t%P0, %P1, %P2"
1142 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1143 )
1144
1145 (define_insn_and_split "ashldi3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1147 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1148 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1149 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1150 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1152 (clobber (reg:CC_C CC_REGNUM))]
1153 "TARGET_NEON"
1154 "#"
1155 "TARGET_NEON && reload_completed"
1156 [(const_int 0)]
1157 "
1158 {
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1160 {
1161 if (CONST_INT_P (operands[2]))
1162 {
1163 if (INTVAL (operands[2]) < 1)
1164 {
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1166 DONE;
1167 }
1168 else if (INTVAL (operands[2]) > 63)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1170 }
1171 else
1172 {
1173 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1174 operands[2] = operands[5];
1175 }
1176
1177 /* Ditch the unnecessary clobbers. */
1178 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1179 operands[2]));
1180 }
1181 else
1182 {
1183 /* The shift expanders support either full overlap or no overlap. */
1184 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1185 || REGNO (operands[0]) == REGNO (operands[1]));
1186
1187 if (operands[2] == CONST1_RTX (SImode))
1188 /* This clobbers CC. */
1189 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1190 else
1191 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1192 operands[2], operands[3], operands[4]);
1193 }
1194 DONE;
1195 }"
1196 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1197 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1198 (set_attr "type" "multiple")]
1199 )
1200
1201 ; The shift amount needs to be negated for right-shifts
1202 (define_insn "signed_shift_di3_neon"
1203 [(set (match_operand:DI 0 "s_register_operand" "=w")
1204 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1205 (match_operand:DI 2 "s_register_operand" " w")]
1206 UNSPEC_ASHIFT_SIGNED))]
1207 "TARGET_NEON && reload_completed"
1208 "vshl.s64\t%P0, %P1, %P2"
1209 [(set_attr "type" "neon_shift_reg")]
1210 )
1211
1212 ; The shift amount needs to be negated for right-shifts
1213 (define_insn "unsigned_shift_di3_neon"
1214 [(set (match_operand:DI 0 "s_register_operand" "=w")
1215 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1216 (match_operand:DI 2 "s_register_operand" " w")]
1217 UNSPEC_ASHIFT_UNSIGNED))]
1218 "TARGET_NEON && reload_completed"
1219 "vshl.u64\t%P0, %P1, %P2"
1220 [(set_attr "type" "neon_shift_reg")]
1221 )
1222
1223 (define_insn "ashrdi3_neon_imm_noclobber"
1224 [(set (match_operand:DI 0 "s_register_operand" "=w")
1225 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1226 (match_operand:DI 2 "const_int_operand" " i")))]
1227 "TARGET_NEON && reload_completed
1228 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1229 "vshr.s64\t%P0, %P1, %2"
1230 [(set_attr "type" "neon_shift_imm")]
1231 )
1232
1233 (define_insn "lshrdi3_neon_imm_noclobber"
1234 [(set (match_operand:DI 0 "s_register_operand" "=w")
1235 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1236 (match_operand:DI 2 "const_int_operand" " i")))]
1237 "TARGET_NEON && reload_completed
1238 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1239 "vshr.u64\t%P0, %P1, %2"
1240 [(set_attr "type" "neon_shift_imm")]
1241 )
1242
1243 ;; ashrdi3_neon
1244 ;; lshrdi3_neon
1245 (define_insn_and_split "<shift>di3_neon"
1246 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1247 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1248 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1249 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1250 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1251 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1252 (clobber (reg:CC CC_REGNUM))]
1253 "TARGET_NEON"
1254 "#"
1255 "TARGET_NEON && reload_completed"
1256 [(const_int 0)]
1257 "
1258 {
1259 if (IS_VFP_REGNUM (REGNO (operands[0])))
1260 {
1261 if (CONST_INT_P (operands[2]))
1262 {
1263 if (INTVAL (operands[2]) < 1)
1264 {
1265 emit_insn (gen_movdi (operands[0], operands[1]));
1266 DONE;
1267 }
1268 else if (INTVAL (operands[2]) > 64)
1269 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1270
1271 /* Ditch the unnecessary clobbers. */
1272 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1273 operands[1],
1274 operands[2]));
1275 }
1276 else
1277 {
1278 /* We must use a negative left-shift. */
1279 emit_insn (gen_negsi2 (operands[3], operands[2]));
1280 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1281 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1282 operands[5]));
1283 }
1284 }
1285 else
1286 {
1287 /* The shift expanders support either full overlap or no overlap. */
1288 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1289 || REGNO (operands[0]) == REGNO (operands[1]));
1290
1291 if (operands[2] == CONST1_RTX (SImode))
1292 /* This clobbers CC. */
1293 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1294 else
1295 /* This clobbers CC (ASHIFTRT by register only). */
1296 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1297 operands[2], operands[3], operands[4]);
1298 }
1299
1300 DONE;
1301 }"
1302 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1303 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1304 (set_attr "type" "multiple")]
1305 )
1306
1307 ;; Widening operations
1308
1309 (define_expand "widen_ssum<mode>3"
1310 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1311 (plus:<V_double_width>
1312 (sign_extend:<V_double_width>
1313 (match_operand:VQI 1 "s_register_operand" ""))
1314 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1315 "TARGET_NEON"
1316 {
1317 machine_mode mode = GET_MODE (operands[1]);
1318 rtx p1, p2;
1319
1320 p1 = arm_simd_vect_par_cnst_half (mode, false);
1321 p2 = arm_simd_vect_par_cnst_half (mode, true);
1322
1323 if (operands[0] != operands[2])
1324 emit_move_insn (operands[0], operands[2]);
1325
1326 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1327 operands[1],
1328 p1,
1329 operands[0]));
1330 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1331 operands[1],
1332 p2,
1333 operands[0]));
1334 DONE;
1335 }
1336 )
1337
1338 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1339 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1340 (plus:<VW:V_widen>
1341 (sign_extend:<VW:V_widen>
1342 (vec_select:VW
1343 (match_operand:VQI 1 "s_register_operand" "%w")
1344 (match_operand:VQI 2 "vect_par_constant_low" "")))
1345 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1346 "TARGET_NEON"
1347 {
1348 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1349 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1350 }
1351 [(set_attr "type" "neon_add_widen")])
1352
1353 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1354 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1355 (plus:<VW:V_widen>
1356 (sign_extend:<VW:V_widen>
1357 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1358 (match_operand:VQI 2 "vect_par_constant_high" "")))
1359 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1360 "TARGET_NEON"
1361 {
1362 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1363 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1364 }
1365 [(set_attr "type" "neon_add_widen")])
1366
1367 (define_insn "widen_ssum<mode>3"
1368 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1369 (plus:<V_widen>
1370 (sign_extend:<V_widen>
1371 (match_operand:VW 1 "s_register_operand" "%w"))
1372 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1373 "TARGET_NEON"
1374 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1375 [(set_attr "type" "neon_add_widen")]
1376 )
1377
1378 (define_expand "widen_usum<mode>3"
1379 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1380 (plus:<V_double_width>
1381 (zero_extend:<V_double_width>
1382 (match_operand:VQI 1 "s_register_operand" ""))
1383 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1384 "TARGET_NEON"
1385 {
1386 machine_mode mode = GET_MODE (operands[1]);
1387 rtx p1, p2;
1388
1389 p1 = arm_simd_vect_par_cnst_half (mode, false);
1390 p2 = arm_simd_vect_par_cnst_half (mode, true);
1391
1392 if (operands[0] != operands[2])
1393 emit_move_insn (operands[0], operands[2]);
1394
1395 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1396 operands[1],
1397 p1,
1398 operands[0]));
1399 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1400 operands[1],
1401 p2,
1402 operands[0]));
1403 DONE;
1404 }
1405 )
1406
1407 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1408 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1409 (plus:<VW:V_widen>
1410 (zero_extend:<VW:V_widen>
1411 (vec_select:VW
1412 (match_operand:VQI 1 "s_register_operand" "%w")
1413 (match_operand:VQI 2 "vect_par_constant_low" "")))
1414 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1415 "TARGET_NEON"
1416 {
1417 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1418 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1419 }
1420 [(set_attr "type" "neon_add_widen")])
1421
1422 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1423 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1424 (plus:<VW:V_widen>
1425 (zero_extend:<VW:V_widen>
1426 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1427 (match_operand:VQI 2 "vect_par_constant_high" "")))
1428 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1429 "TARGET_NEON"
1430 {
1431 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1432 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1433 }
1434 [(set_attr "type" "neon_add_widen")])
1435
1436 (define_insn "widen_usum<mode>3"
1437 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1438 (plus:<V_widen> (zero_extend:<V_widen>
1439 (match_operand:VW 1 "s_register_operand" "%w"))
1440 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1441 "TARGET_NEON"
1442 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1443 [(set_attr "type" "neon_add_widen")]
1444 )
1445
1446 ;; Helpers for quad-word reduction operations
1447
1448 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1449 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1450 ; N/2-element vector.
1451
1452 (define_insn "quad_halves_<code>v4si"
1453 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1454 (VQH_OPS:V2SI
1455 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1456 (parallel [(const_int 0) (const_int 1)]))
1457 (vec_select:V2SI (match_dup 1)
1458 (parallel [(const_int 2) (const_int 3)]))))]
1459 "TARGET_NEON"
1460 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1461 [(set_attr "vqh_mnem" "<VQH_mnem>")
1462 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1463 )
1464
1465 (define_insn "quad_halves_<code>v4sf"
1466 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1467 (VQHS_OPS:V2SF
1468 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1469 (parallel [(const_int 0) (const_int 1)]))
1470 (vec_select:V2SF (match_dup 1)
1471 (parallel [(const_int 2) (const_int 3)]))))]
1472 "TARGET_NEON && flag_unsafe_math_optimizations"
1473 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1474 [(set_attr "vqh_mnem" "<VQH_mnem>")
1475 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1476 )
1477
1478 (define_insn "quad_halves_<code>v8hi"
1479 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1480 (VQH_OPS:V4HI
1481 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1482 (parallel [(const_int 0) (const_int 1)
1483 (const_int 2) (const_int 3)]))
1484 (vec_select:V4HI (match_dup 1)
1485 (parallel [(const_int 4) (const_int 5)
1486 (const_int 6) (const_int 7)]))))]
1487 "TARGET_NEON"
1488 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1489 [(set_attr "vqh_mnem" "<VQH_mnem>")
1490 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1491 )
1492
1493 (define_insn "quad_halves_<code>v16qi"
1494 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1495 (VQH_OPS:V8QI
1496 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1497 (parallel [(const_int 0) (const_int 1)
1498 (const_int 2) (const_int 3)
1499 (const_int 4) (const_int 5)
1500 (const_int 6) (const_int 7)]))
1501 (vec_select:V8QI (match_dup 1)
1502 (parallel [(const_int 8) (const_int 9)
1503 (const_int 10) (const_int 11)
1504 (const_int 12) (const_int 13)
1505 (const_int 14) (const_int 15)]))))]
1506 "TARGET_NEON"
1507 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1508 [(set_attr "vqh_mnem" "<VQH_mnem>")
1509 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1510 )
1511
1512 (define_expand "move_hi_quad_<mode>"
1513 [(match_operand:ANY128 0 "s_register_operand" "")
1514 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1515 "TARGET_NEON"
1516 {
1517 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1518 GET_MODE_SIZE (<V_HALF>mode)),
1519 operands[1]);
1520 DONE;
1521 })
1522
1523 (define_expand "move_lo_quad_<mode>"
1524 [(match_operand:ANY128 0 "s_register_operand" "")
1525 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1526 "TARGET_NEON"
1527 {
1528 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1529 <MODE>mode, 0),
1530 operands[1]);
1531 DONE;
1532 })
1533
1534 ;; Reduction operations
1535
1536 (define_expand "reduc_plus_scal_<mode>"
1537 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1538 (match_operand:VD 1 "s_register_operand" "")]
1539 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1540 {
1541 rtx vec = gen_reg_rtx (<MODE>mode);
1542 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1543 &gen_neon_vpadd_internal<mode>);
1544 /* The same result is actually computed into every element. */
1545 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1546 DONE;
1547 })
1548
1549 (define_expand "reduc_plus_scal_<mode>"
1550 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1551 (match_operand:VQ 1 "s_register_operand" "")]
1552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1553 && !BYTES_BIG_ENDIAN"
1554 {
1555 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1556
1557 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1558 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1559
1560 DONE;
1561 })
1562
1563 (define_expand "reduc_plus_scal_v2di"
1564 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1565 (match_operand:V2DI 1 "s_register_operand" "")]
1566 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1567 {
1568 rtx vec = gen_reg_rtx (V2DImode);
1569
1570 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1571 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1572
1573 DONE;
1574 })
1575
1576 (define_insn "arm_reduc_plus_internal_v2di"
1577 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1578 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1579 UNSPEC_VPADD))]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1581 "vadd.i64\t%e0, %e1, %f1"
1582 [(set_attr "type" "neon_add_q")]
1583 )
1584
1585 (define_expand "reduc_smin_scal_<mode>"
1586 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1587 (match_operand:VD 1 "s_register_operand" "")]
1588 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1589 {
1590 rtx vec = gen_reg_rtx (<MODE>mode);
1591
1592 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1593 &gen_neon_vpsmin<mode>);
1594 /* The result is computed into every element of the vector. */
1595 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1596 DONE;
1597 })
1598
1599 (define_expand "reduc_smin_scal_<mode>"
1600 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1601 (match_operand:VQ 1 "s_register_operand" "")]
1602 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1603 && !BYTES_BIG_ENDIAN"
1604 {
1605 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1606
1607 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1608 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1609
1610 DONE;
1611 })
1612
1613 (define_expand "reduc_smax_scal_<mode>"
1614 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1615 (match_operand:VD 1 "s_register_operand" "")]
1616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1617 {
1618 rtx vec = gen_reg_rtx (<MODE>mode);
1619 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1620 &gen_neon_vpsmax<mode>);
1621 /* The result is computed into every element of the vector. */
1622 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1623 DONE;
1624 })
1625
1626 (define_expand "reduc_smax_scal_<mode>"
1627 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1628 (match_operand:VQ 1 "s_register_operand" "")]
1629 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1630 && !BYTES_BIG_ENDIAN"
1631 {
1632 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1633
1634 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1635 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1636
1637 DONE;
1638 })
1639
1640 (define_expand "reduc_umin_scal_<mode>"
1641 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1642 (match_operand:VDI 1 "s_register_operand" "")]
1643 "TARGET_NEON"
1644 {
1645 rtx vec = gen_reg_rtx (<MODE>mode);
1646 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1647 &gen_neon_vpumin<mode>);
1648 /* The result is computed into every element of the vector. */
1649 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1650 DONE;
1651 })
1652
1653 (define_expand "reduc_umin_scal_<mode>"
1654 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1655 (match_operand:VQI 1 "s_register_operand" "")]
1656 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1657 {
1658 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1659
1660 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1661 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1662
1663 DONE;
1664 })
1665
1666 (define_expand "reduc_umax_scal_<mode>"
1667 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1668 (match_operand:VDI 1 "s_register_operand" "")]
1669 "TARGET_NEON"
1670 {
1671 rtx vec = gen_reg_rtx (<MODE>mode);
1672 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1673 &gen_neon_vpumax<mode>);
1674 /* The result is computed into every element of the vector. */
1675 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1676 DONE;
1677 })
1678
1679 (define_expand "reduc_umax_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VQI 1 "s_register_operand" "")]
1682 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1683 {
1684 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1685
1686 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1687 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1688
1689 DONE;
1690 })
1691
1692 (define_insn "neon_vpadd_internal<mode>"
1693 [(set (match_operand:VD 0 "s_register_operand" "=w")
1694 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1695 (match_operand:VD 2 "s_register_operand" "w")]
1696 UNSPEC_VPADD))]
1697 "TARGET_NEON"
1698 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1699 ;; Assume this schedules like vadd.
1700 [(set (attr "type")
1701 (if_then_else (match_test "<Is_float_mode>")
1702 (const_string "neon_fp_reduc_add_s<q>")
1703 (const_string "neon_reduc_add<q>")))]
1704 )
1705
1706 (define_insn "neon_vpaddv4hf"
1707 [(set
1708 (match_operand:V4HF 0 "s_register_operand" "=w")
1709 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1710 (match_operand:V4HF 2 "s_register_operand" "w")]
1711 UNSPEC_VPADD))]
1712 "TARGET_NEON_FP16INST"
1713 "vpadd.f16\t%P0, %P1, %P2"
1714 [(set_attr "type" "neon_reduc_add")]
1715 )
1716
1717 (define_insn "neon_vpsmin<mode>"
1718 [(set (match_operand:VD 0 "s_register_operand" "=w")
1719 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1720 (match_operand:VD 2 "s_register_operand" "w")]
1721 UNSPEC_VPSMIN))]
1722 "TARGET_NEON"
1723 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1724 [(set (attr "type")
1725 (if_then_else (match_test "<Is_float_mode>")
1726 (const_string "neon_fp_reduc_minmax_s<q>")
1727 (const_string "neon_reduc_minmax<q>")))]
1728 )
1729
1730 (define_insn "neon_vpsmax<mode>"
1731 [(set (match_operand:VD 0 "s_register_operand" "=w")
1732 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1733 (match_operand:VD 2 "s_register_operand" "w")]
1734 UNSPEC_VPSMAX))]
1735 "TARGET_NEON"
1736 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1737 [(set (attr "type")
1738 (if_then_else (match_test "<Is_float_mode>")
1739 (const_string "neon_fp_reduc_minmax_s<q>")
1740 (const_string "neon_reduc_minmax<q>")))]
1741 )
1742
1743 (define_insn "neon_vpumin<mode>"
1744 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1745 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1746 (match_operand:VDI 2 "s_register_operand" "w")]
1747 UNSPEC_VPUMIN))]
1748 "TARGET_NEON"
1749 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1750 [(set_attr "type" "neon_reduc_minmax<q>")]
1751 )
1752
1753 (define_insn "neon_vpumax<mode>"
1754 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1755 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1756 (match_operand:VDI 2 "s_register_operand" "w")]
1757 UNSPEC_VPUMAX))]
1758 "TARGET_NEON"
1759 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1760 [(set_attr "type" "neon_reduc_minmax<q>")]
1761 )
1762
1763 ;; Saturating arithmetic
1764
1765 ; NOTE: Neon supports many more saturating variants of instructions than the
1766 ; following, but these are all GCC currently understands.
1767 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1768 ; yet either, although these patterns may be used by intrinsics when they're
1769 ; added.
1770
1771 (define_insn "*ss_add<mode>_neon"
1772 [(set (match_operand:VD 0 "s_register_operand" "=w")
1773 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1774 (match_operand:VD 2 "s_register_operand" "w")))]
1775 "TARGET_NEON"
1776 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1777 [(set_attr "type" "neon_qadd<q>")]
1778 )
1779
1780 (define_insn "*us_add<mode>_neon"
1781 [(set (match_operand:VD 0 "s_register_operand" "=w")
1782 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1783 (match_operand:VD 2 "s_register_operand" "w")))]
1784 "TARGET_NEON"
1785 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1786 [(set_attr "type" "neon_qadd<q>")]
1787 )
1788
1789 (define_insn "*ss_sub<mode>_neon"
1790 [(set (match_operand:VD 0 "s_register_operand" "=w")
1791 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1792 (match_operand:VD 2 "s_register_operand" "w")))]
1793 "TARGET_NEON"
1794 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1795 [(set_attr "type" "neon_qsub<q>")]
1796 )
1797
1798 (define_insn "*us_sub<mode>_neon"
1799 [(set (match_operand:VD 0 "s_register_operand" "=w")
1800 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1801 (match_operand:VD 2 "s_register_operand" "w")))]
1802 "TARGET_NEON"
1803 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1804 [(set_attr "type" "neon_qsub<q>")]
1805 )
1806
1807 ;; Conditional instructions. These are comparisons with conditional moves for
1808 ;; vectors. They perform the assignment:
1809 ;;
1810 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1811 ;;
1812 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1813 ;; element-wise.
1814
1815 (define_expand "vcond<mode><mode>"
1816 [(set (match_operand:VDQW 0 "s_register_operand" "")
1817 (if_then_else:VDQW
1818 (match_operator 3 "comparison_operator"
1819 [(match_operand:VDQW 4 "s_register_operand" "")
1820 (match_operand:VDQW 5 "nonmemory_operand" "")])
1821 (match_operand:VDQW 1 "s_register_operand" "")
1822 (match_operand:VDQW 2 "s_register_operand" "")))]
1823 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1824 {
1825 int inverse = 0;
1826 int use_zero_form = 0;
1827 int swap_bsl_operands = 0;
1828 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1829 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1830
1831 rtx (*base_comparison) (rtx, rtx, rtx);
1832 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1833
1834 switch (GET_CODE (operands[3]))
1835 {
1836 case GE:
1837 case GT:
1838 case LE:
1839 case LT:
1840 case EQ:
1841 if (operands[5] == CONST0_RTX (<MODE>mode))
1842 {
1843 use_zero_form = 1;
1844 break;
1845 }
1846 /* Fall through. */
1847 default:
1848 if (!REG_P (operands[5]))
1849 operands[5] = force_reg (<MODE>mode, operands[5]);
1850 }
1851
1852 switch (GET_CODE (operands[3]))
1853 {
1854 case LT:
1855 case UNLT:
1856 inverse = 1;
1857 /* Fall through. */
1858 case GE:
1859 case UNGE:
1860 case ORDERED:
1861 case UNORDERED:
1862 base_comparison = gen_neon_vcge<mode>;
1863 complimentary_comparison = gen_neon_vcgt<mode>;
1864 break;
1865 case LE:
1866 case UNLE:
1867 inverse = 1;
1868 /* Fall through. */
1869 case GT:
1870 case UNGT:
1871 base_comparison = gen_neon_vcgt<mode>;
1872 complimentary_comparison = gen_neon_vcge<mode>;
1873 break;
1874 case EQ:
1875 case NE:
1876 case UNEQ:
1877 base_comparison = gen_neon_vceq<mode>;
1878 complimentary_comparison = gen_neon_vceq<mode>;
1879 break;
1880 default:
1881 gcc_unreachable ();
1882 }
1883
1884 switch (GET_CODE (operands[3]))
1885 {
1886 case LT:
1887 case LE:
1888 case GT:
1889 case GE:
1890 case EQ:
1891 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1892 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1893 a GE b -> a GE b
1894 a GT b -> a GT b
1895 a LE b -> b GE a
1896 a LT b -> b GT a
1897 a EQ b -> a EQ b
1898 Note that there also exist direct comparison against 0 forms,
1899 so catch those as a special case. */
1900 if (use_zero_form)
1901 {
1902 inverse = 0;
1903 switch (GET_CODE (operands[3]))
1904 {
1905 case LT:
1906 base_comparison = gen_neon_vclt<mode>;
1907 break;
1908 case LE:
1909 base_comparison = gen_neon_vcle<mode>;
1910 break;
1911 default:
1912 /* Do nothing, other zero form cases already have the correct
1913 base_comparison. */
1914 break;
1915 }
1916 }
1917
1918 if (!inverse)
1919 emit_insn (base_comparison (mask, operands[4], operands[5]));
1920 else
1921 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1922 break;
1923 case UNLT:
1924 case UNLE:
1925 case UNGT:
1926 case UNGE:
1927 case NE:
1928 /* Vector compare returns false for lanes which are unordered, so if we use
1929 the inverse of the comparison we actually want to emit, then
1930 swap the operands to BSL, we will end up with the correct result.
1931 Note that a NE NaN and NaN NE b are true for all a, b.
1932
1933 Our transformations are:
1934 a GE b -> !(b GT a)
1935 a GT b -> !(b GE a)
1936 a LE b -> !(a GT b)
1937 a LT b -> !(a GE b)
1938 a NE b -> !(a EQ b) */
1939
1940 if (inverse)
1941 emit_insn (base_comparison (mask, operands[4], operands[5]));
1942 else
1943 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1944
1945 swap_bsl_operands = 1;
1946 break;
1947 case UNEQ:
1948 /* We check (a > b || b > a). combining these comparisons give us
1949 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1950 will then give us (a == b || a UNORDERED b) as intended. */
1951
1952 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1953 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1954 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1955 swap_bsl_operands = 1;
1956 break;
1957 case UNORDERED:
1958 /* Operands are ORDERED iff (a > b || b >= a).
1959 Swapping the operands to BSL will give the UNORDERED case. */
1960 swap_bsl_operands = 1;
1961 /* Fall through. */
1962 case ORDERED:
1963 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1964 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1965 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1966 break;
1967 default:
1968 gcc_unreachable ();
1969 }
1970
1971 if (swap_bsl_operands)
1972 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1973 operands[1]));
1974 else
1975 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1976 operands[2]));
1977 DONE;
1978 })
1979
1980 (define_expand "vcondu<mode><mode>"
1981 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1982 (if_then_else:VDQIW
1983 (match_operator 3 "arm_comparison_operator"
1984 [(match_operand:VDQIW 4 "s_register_operand" "")
1985 (match_operand:VDQIW 5 "s_register_operand" "")])
1986 (match_operand:VDQIW 1 "s_register_operand" "")
1987 (match_operand:VDQIW 2 "s_register_operand" "")))]
1988 "TARGET_NEON"
1989 {
1990 rtx mask;
1991 int inverse = 0, immediate_zero = 0;
1992
1993 mask = gen_reg_rtx (<V_cmp_result>mode);
1994
1995 if (operands[5] == CONST0_RTX (<MODE>mode))
1996 immediate_zero = 1;
1997 else if (!REG_P (operands[5]))
1998 operands[5] = force_reg (<MODE>mode, operands[5]);
1999
2000 switch (GET_CODE (operands[3]))
2001 {
2002 case GEU:
2003 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2004 break;
2005
2006 case GTU:
2007 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2008 break;
2009
2010 case EQ:
2011 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2012 break;
2013
2014 case LEU:
2015 if (immediate_zero)
2016 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2017 else
2018 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2019 break;
2020
2021 case LTU:
2022 if (immediate_zero)
2023 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2024 else
2025 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2026 break;
2027
2028 case NE:
2029 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2030 inverse = 1;
2031 break;
2032
2033 default:
2034 gcc_unreachable ();
2035 }
2036
2037 if (inverse)
2038 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2039 operands[1]));
2040 else
2041 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2042 operands[2]));
2043
2044 DONE;
2045 })
2046
2047 ;; Patterns for builtins.
2048
2049 ; good for plain vadd, vaddq.
2050
2051 (define_expand "neon_vadd<mode>"
2052 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2053 (match_operand:VCVTF 1 "s_register_operand" "w")
2054 (match_operand:VCVTF 2 "s_register_operand" "w")]
2055 "TARGET_NEON"
2056 {
2057 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2058 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2059 else
2060 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2061 operands[2]));
2062 DONE;
2063 })
2064
2065 (define_expand "neon_vadd<mode>"
2066 [(match_operand:VH 0 "s_register_operand")
2067 (match_operand:VH 1 "s_register_operand")
2068 (match_operand:VH 2 "s_register_operand")]
2069 "TARGET_NEON_FP16INST"
2070 {
2071 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2072 DONE;
2073 })
2074
2075 (define_expand "neon_vsub<mode>"
2076 [(match_operand:VH 0 "s_register_operand")
2077 (match_operand:VH 1 "s_register_operand")
2078 (match_operand:VH 2 "s_register_operand")]
2079 "TARGET_NEON_FP16INST"
2080 {
2081 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2082 DONE;
2083 })
2084
2085 ; Note that NEON operations don't support the full IEEE 754 standard: in
2086 ; particular, denormal values are flushed to zero. This means that GCC cannot
2087 ; use those instructions for autovectorization, etc. unless
2088 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2089 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2090 ; header) must work in either case: if -funsafe-math-optimizations is given,
2091 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2092 ; expand to unspecs (which may potentially limit the extent to which they might
2093 ; be optimized by generic code).
2094
2095 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2096
2097 (define_insn "neon_vadd<mode>_unspec"
2098 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2099 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2100 (match_operand:VCVTF 2 "s_register_operand" "w")]
2101 UNSPEC_VADD))]
2102 "TARGET_NEON"
2103 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2104 [(set (attr "type")
2105 (if_then_else (match_test "<Is_float_mode>")
2106 (const_string "neon_fp_addsub_s<q>")
2107 (const_string "neon_add<q>")))]
2108 )
2109
2110 (define_insn "neon_vaddl<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2114 VADDL))]
2115 "TARGET_NEON"
2116 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2117 [(set_attr "type" "neon_add_long")]
2118 )
2119
2120 (define_insn "neon_vaddw<sup><mode>"
2121 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2122 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2123 (match_operand:VDI 2 "s_register_operand" "w")]
2124 VADDW))]
2125 "TARGET_NEON"
2126 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2127 [(set_attr "type" "neon_add_widen")]
2128 )
2129
2130 ; vhadd and vrhadd.
2131
2132 (define_insn "neon_v<r>hadd<sup><mode>"
2133 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2134 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2135 (match_operand:VDQIW 2 "s_register_operand" "w")]
2136 VHADD))]
2137 "TARGET_NEON"
2138 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2139 [(set_attr "type" "neon_add_halve_q")]
2140 )
2141
2142 (define_insn "neon_vqadd<sup><mode>"
2143 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2144 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2145 (match_operand:VDQIX 2 "s_register_operand" "w")]
2146 VQADD))]
2147 "TARGET_NEON"
2148 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2149 [(set_attr "type" "neon_qadd<q>")]
2150 )
2151
2152 (define_insn "neon_v<r>addhn<mode>"
2153 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2154 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2155 (match_operand:VN 2 "s_register_operand" "w")]
2156 VADDHN))]
2157 "TARGET_NEON"
2158 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2159 [(set_attr "type" "neon_add_halve_narrow_q")]
2160 )
2161
2162 ;; Polynomial and Float multiplication.
2163 (define_insn "neon_vmul<pf><mode>"
2164 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2165 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2166 (match_operand:VPF 2 "s_register_operand" "w")]
2167 UNSPEC_VMUL))]
2168 "TARGET_NEON"
2169 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2170 [(set (attr "type")
2171 (if_then_else (match_test "<Is_float_mode>")
2172 (const_string "neon_fp_mul_s<q>")
2173 (const_string "neon_mul_<V_elem_ch><q>")))]
2174 )
2175
2176 (define_insn "neon_vmulf<mode>"
2177 [(set
2178 (match_operand:VH 0 "s_register_operand" "=w")
2179 (mult:VH
2180 (match_operand:VH 1 "s_register_operand" "w")
2181 (match_operand:VH 2 "s_register_operand" "w")))]
2182 "TARGET_NEON_FP16INST"
2183 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2184 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2185 )
2186
2187 (define_expand "neon_vmla<mode>"
2188 [(match_operand:VDQW 0 "s_register_operand" "=w")
2189 (match_operand:VDQW 1 "s_register_operand" "0")
2190 (match_operand:VDQW 2 "s_register_operand" "w")
2191 (match_operand:VDQW 3 "s_register_operand" "w")]
2192 "TARGET_NEON"
2193 {
2194 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2195 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2196 operands[2], operands[3]));
2197 else
2198 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2199 operands[2], operands[3]));
2200 DONE;
2201 })
2202
2203 (define_expand "neon_vfma<VCVTF:mode>"
2204 [(match_operand:VCVTF 0 "s_register_operand")
2205 (match_operand:VCVTF 1 "s_register_operand")
2206 (match_operand:VCVTF 2 "s_register_operand")
2207 (match_operand:VCVTF 3 "s_register_operand")]
2208 "TARGET_NEON && TARGET_FMA"
2209 {
2210 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2211 operands[1]));
2212 DONE;
2213 })
2214
2215 (define_expand "neon_vfma<VH:mode>"
2216 [(match_operand:VH 0 "s_register_operand")
2217 (match_operand:VH 1 "s_register_operand")
2218 (match_operand:VH 2 "s_register_operand")
2219 (match_operand:VH 3 "s_register_operand")]
2220 "TARGET_NEON_FP16INST"
2221 {
2222 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2223 operands[1]));
2224 DONE;
2225 })
2226
2227 (define_expand "neon_vfms<VCVTF:mode>"
2228 [(match_operand:VCVTF 0 "s_register_operand")
2229 (match_operand:VCVTF 1 "s_register_operand")
2230 (match_operand:VCVTF 2 "s_register_operand")
2231 (match_operand:VCVTF 3 "s_register_operand")]
2232 "TARGET_NEON && TARGET_FMA"
2233 {
2234 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2235 operands[1]));
2236 DONE;
2237 })
2238
2239 (define_expand "neon_vfms<VH:mode>"
2240 [(match_operand:VH 0 "s_register_operand")
2241 (match_operand:VH 1 "s_register_operand")
2242 (match_operand:VH 2 "s_register_operand")
2243 (match_operand:VH 3 "s_register_operand")]
2244 "TARGET_NEON_FP16INST"
2245 {
2246 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2247 operands[1]));
2248 DONE;
2249 })
2250
2251 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2252
2253 (define_insn "neon_vmla<mode>_unspec"
2254 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2255 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2256 (match_operand:VDQW 2 "s_register_operand" "w")
2257 (match_operand:VDQW 3 "s_register_operand" "w")]
2258 UNSPEC_VMLA))]
2259 "TARGET_NEON"
2260 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2261 [(set (attr "type")
2262 (if_then_else (match_test "<Is_float_mode>")
2263 (const_string "neon_fp_mla_s<q>")
2264 (const_string "neon_mla_<V_elem_ch><q>")))]
2265 )
2266
2267 (define_insn "neon_vmlal<sup><mode>"
2268 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2269 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2270 (match_operand:VW 2 "s_register_operand" "w")
2271 (match_operand:VW 3 "s_register_operand" "w")]
2272 VMLAL))]
2273 "TARGET_NEON"
2274 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2275 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2276 )
2277
2278 (define_expand "neon_vmls<mode>"
2279 [(match_operand:VDQW 0 "s_register_operand" "=w")
2280 (match_operand:VDQW 1 "s_register_operand" "0")
2281 (match_operand:VDQW 2 "s_register_operand" "w")
2282 (match_operand:VDQW 3 "s_register_operand" "w")]
2283 "TARGET_NEON"
2284 {
2285 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2286 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2287 operands[1], operands[2], operands[3]));
2288 else
2289 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2290 operands[2], operands[3]));
2291 DONE;
2292 })
2293
2294 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2295
2296 (define_insn "neon_vmls<mode>_unspec"
2297 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2298 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2299 (match_operand:VDQW 2 "s_register_operand" "w")
2300 (match_operand:VDQW 3 "s_register_operand" "w")]
2301 UNSPEC_VMLS))]
2302 "TARGET_NEON"
2303 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2304 [(set (attr "type")
2305 (if_then_else (match_test "<Is_float_mode>")
2306 (const_string "neon_fp_mla_s<q>")
2307 (const_string "neon_mla_<V_elem_ch><q>")))]
2308 )
2309
2310 (define_insn "neon_vmlsl<sup><mode>"
2311 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2312 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2313 (match_operand:VW 2 "s_register_operand" "w")
2314 (match_operand:VW 3 "s_register_operand" "w")]
2315 VMLSL))]
2316 "TARGET_NEON"
2317 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2318 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2319 )
2320
2321 ;; vqdmulh, vqrdmulh
2322 (define_insn "neon_vq<r>dmulh<mode>"
2323 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2324 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2325 (match_operand:VMDQI 2 "s_register_operand" "w")]
2326 VQDMULH))]
2327 "TARGET_NEON"
2328 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2329 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2330 )
2331
2332 ;; vqrdmlah, vqrdmlsh
2333 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2334 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2335 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2336 (match_operand:VMDQI 2 "s_register_operand" "w")
2337 (match_operand:VMDQI 3 "s_register_operand" "w")]
2338 VQRDMLH_AS))]
2339 "TARGET_NEON_RDMA"
2340 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2341 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2342 )
2343
2344 (define_insn "neon_vqdmlal<mode>"
2345 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2346 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2347 (match_operand:VMDI 2 "s_register_operand" "w")
2348 (match_operand:VMDI 3 "s_register_operand" "w")]
2349 UNSPEC_VQDMLAL))]
2350 "TARGET_NEON"
2351 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2352 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2353 )
2354
2355 (define_insn "neon_vqdmlsl<mode>"
2356 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2357 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2358 (match_operand:VMDI 2 "s_register_operand" "w")
2359 (match_operand:VMDI 3 "s_register_operand" "w")]
2360 UNSPEC_VQDMLSL))]
2361 "TARGET_NEON"
2362 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2363 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2364 )
2365
2366 (define_insn "neon_vmull<sup><mode>"
2367 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2368 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2369 (match_operand:VW 2 "s_register_operand" "w")]
2370 VMULL))]
2371 "TARGET_NEON"
2372 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2373 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2374 )
2375
2376 (define_insn "neon_vqdmull<mode>"
2377 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2378 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2379 (match_operand:VMDI 2 "s_register_operand" "w")]
2380 UNSPEC_VQDMULL))]
2381 "TARGET_NEON"
2382 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2383 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2384 )
2385
2386 (define_expand "neon_vsub<mode>"
2387 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2388 (match_operand:VCVTF 1 "s_register_operand" "w")
2389 (match_operand:VCVTF 2 "s_register_operand" "w")]
2390 "TARGET_NEON"
2391 {
2392 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2393 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2394 else
2395 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2396 operands[2]));
2397 DONE;
2398 })
2399
2400 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2401
2402 (define_insn "neon_vsub<mode>_unspec"
2403 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2404 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2405 (match_operand:VCVTF 2 "s_register_operand" "w")]
2406 UNSPEC_VSUB))]
2407 "TARGET_NEON"
2408 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2409 [(set (attr "type")
2410 (if_then_else (match_test "<Is_float_mode>")
2411 (const_string "neon_fp_addsub_s<q>")
2412 (const_string "neon_sub<q>")))]
2413 )
2414
2415 (define_insn "neon_vsubl<sup><mode>"
2416 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2417 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2418 (match_operand:VDI 2 "s_register_operand" "w")]
2419 VSUBL))]
2420 "TARGET_NEON"
2421 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2422 [(set_attr "type" "neon_sub_long")]
2423 )
2424
2425 (define_insn "neon_vsubw<sup><mode>"
2426 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2427 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2428 (match_operand:VDI 2 "s_register_operand" "w")]
2429 VSUBW))]
2430 "TARGET_NEON"
2431 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2432 [(set_attr "type" "neon_sub_widen")]
2433 )
2434
2435 (define_insn "neon_vqsub<sup><mode>"
2436 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2437 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2438 (match_operand:VDQIX 2 "s_register_operand" "w")]
2439 VQSUB))]
2440 "TARGET_NEON"
2441 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2442 [(set_attr "type" "neon_qsub<q>")]
2443 )
2444
2445 (define_insn "neon_vhsub<sup><mode>"
2446 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2447 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2448 (match_operand:VDQIW 2 "s_register_operand" "w")]
2449 VHSUB))]
2450 "TARGET_NEON"
2451 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2452 [(set_attr "type" "neon_sub_halve<q>")]
2453 )
2454
2455 (define_insn "neon_v<r>subhn<mode>"
2456 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2457 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2458 (match_operand:VN 2 "s_register_operand" "w")]
2459 VSUBHN))]
2460 "TARGET_NEON"
2461 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2462 [(set_attr "type" "neon_sub_halve_narrow_q")]
2463 )
2464
2465 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2466 ;; without unsafe math optimizations.
2467 (define_expand "neon_vc<cmp_op><mode>"
2468 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2469 (neg:<V_cmp_result>
2470 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2471 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2472 "TARGET_NEON"
2473 {
2474 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2475 are enabled. */
2476 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2477 && !flag_unsafe_math_optimizations)
2478 {
2479 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2480 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2481 whereas this expander iterates over the integer modes as well,
2482 but we will never expand to UNSPECs for the integer comparisons. */
2483 switch (<MODE>mode)
2484 {
2485 case V2SFmode:
2486 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2487 operands[1],
2488 operands[2]));
2489 break;
2490 case V4SFmode:
2491 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2492 operands[1],
2493 operands[2]));
2494 break;
2495 default:
2496 gcc_unreachable ();
2497 }
2498 }
2499 else
2500 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2501 operands[1],
2502 operands[2]));
2503 DONE;
2504 }
2505 )
2506
2507 (define_insn "neon_vc<cmp_op><mode>_insn"
2508 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2509 (neg:<V_cmp_result>
2510 (COMPARISONS:<V_cmp_result>
2511 (match_operand:VDQW 1 "s_register_operand" "w,w")
2512 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2513 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2514 && !flag_unsafe_math_optimizations)"
2515 {
2516 char pattern[100];
2517 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2518 " %%<V_reg>1, %s",
2519 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2520 ? "f" : "<cmp_type>",
2521 which_alternative == 0
2522 ? "%<V_reg>2" : "#0");
2523 output_asm_insn (pattern, operands);
2524 return "";
2525 }
2526 [(set (attr "type")
2527 (if_then_else (match_operand 2 "zero_operand")
2528 (const_string "neon_compare_zero<q>")
2529 (const_string "neon_compare<q>")))]
2530 )
2531
2532 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2533 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2534 (unspec:<V_cmp_result>
2535 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2536 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2537 NEON_VCMP))]
2538 "TARGET_NEON"
2539 {
2540 char pattern[100];
2541 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2542 " %%<V_reg>1, %s",
2543 which_alternative == 0
2544 ? "%<V_reg>2" : "#0");
2545 output_asm_insn (pattern, operands);
2546 return "";
2547 }
2548 [(set_attr "type" "neon_fp_compare_s<q>")]
2549 )
2550
2551 (define_expand "neon_vc<cmp_op><mode>"
2552 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2553 (neg:<V_cmp_result>
2554 (COMPARISONS:VH
2555 (match_operand:VH 1 "s_register_operand")
2556 (match_operand:VH 2 "reg_or_zero_operand")))]
2557 "TARGET_NEON_FP16INST"
2558 {
2559 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2560 are enabled. */
2561 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2562 && !flag_unsafe_math_optimizations)
2563 emit_insn
2564 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2565 (operands[0], operands[1], operands[2]));
2566 else
2567 emit_insn
2568 (gen_neon_vc<cmp_op><mode>_fp16insn
2569 (operands[0], operands[1], operands[2]));
2570 DONE;
2571 })
2572
2573 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2574 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2575 (neg:<V_cmp_result>
2576 (COMPARISONS:<V_cmp_result>
2577 (match_operand:VH 1 "s_register_operand" "w,w")
2578 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2579 "TARGET_NEON_FP16INST
2580 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2581 && !flag_unsafe_math_optimizations)"
2582 {
2583 char pattern[100];
2584 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2585 " %%<V_reg>1, %s",
2586 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2587 ? "f" : "<cmp_type>",
2588 which_alternative == 0
2589 ? "%<V_reg>2" : "#0");
2590 output_asm_insn (pattern, operands);
2591 return "";
2592 }
2593 [(set (attr "type")
2594 (if_then_else (match_operand 2 "zero_operand")
2595 (const_string "neon_compare_zero<q>")
2596 (const_string "neon_compare<q>")))])
2597
2598 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2599 [(set
2600 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2601 (unspec:<V_cmp_result>
2602 [(match_operand:VH 1 "s_register_operand" "w,w")
2603 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2604 NEON_VCMP))]
2605 "TARGET_NEON_FP16INST"
2606 {
2607 char pattern[100];
2608 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2609 " %%<V_reg>1, %s",
2610 which_alternative == 0
2611 ? "%<V_reg>2" : "#0");
2612 output_asm_insn (pattern, operands);
2613 return "";
2614 }
2615 [(set_attr "type" "neon_fp_compare_s<q>")])
2616
2617 (define_insn "neon_vc<cmp_op>u<mode>"
2618 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2619 (neg:<V_cmp_result>
2620 (GTUGEU:<V_cmp_result>
2621 (match_operand:VDQIW 1 "s_register_operand" "w")
2622 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2623 "TARGET_NEON"
2624 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2625 [(set_attr "type" "neon_compare<q>")]
2626 )
2627
2628 (define_expand "neon_vca<cmp_op><mode>"
2629 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2630 (neg:<V_cmp_result>
2631 (GTGE:<V_cmp_result>
2632 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2633 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2634 "TARGET_NEON"
2635 {
2636 if (flag_unsafe_math_optimizations)
2637 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2638 operands[2]));
2639 else
2640 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2641 operands[1],
2642 operands[2]));
2643 DONE;
2644 }
2645 )
2646
2647 (define_insn "neon_vca<cmp_op><mode>_insn"
2648 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2649 (neg:<V_cmp_result>
2650 (GTGE:<V_cmp_result>
2651 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2652 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2653 "TARGET_NEON && flag_unsafe_math_optimizations"
2654 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2655 [(set_attr "type" "neon_fp_compare_s<q>")]
2656 )
2657
2658 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2659 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2660 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2661 (match_operand:VCVTF 2 "s_register_operand" "w")]
2662 NEON_VACMP))]
2663 "TARGET_NEON"
2664 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2665 [(set_attr "type" "neon_fp_compare_s<q>")]
2666 )
2667
2668 (define_expand "neon_vca<cmp_op><mode>"
2669 [(set
2670 (match_operand:<V_cmp_result> 0 "s_register_operand")
2671 (neg:<V_cmp_result>
2672 (GLTE:<V_cmp_result>
2673 (abs:VH (match_operand:VH 1 "s_register_operand"))
2674 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2675 "TARGET_NEON_FP16INST"
2676 {
2677 if (flag_unsafe_math_optimizations)
2678 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2679 (operands[0], operands[1], operands[2]));
2680 else
2681 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2682 (operands[0], operands[1], operands[2]));
2683 DONE;
2684 })
2685
2686 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2687 [(set
2688 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2689 (neg:<V_cmp_result>
2690 (GLTE:<V_cmp_result>
2691 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2692 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2693 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2694 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2695 [(set_attr "type" "neon_fp_compare_s<q>")]
2696 )
2697
2698 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2699 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2700 (unspec:<V_cmp_result>
2701 [(match_operand:VH 1 "s_register_operand" "w")
2702 (match_operand:VH 2 "s_register_operand" "w")]
2703 NEON_VAGLTE))]
2704 "TARGET_NEON"
2705 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2706 [(set_attr "type" "neon_fp_compare_s<q>")]
2707 )
2708
2709 (define_expand "neon_vc<cmp_op>z<mode>"
2710 [(set
2711 (match_operand:<V_cmp_result> 0 "s_register_operand")
2712 (COMPARISONS:<V_cmp_result>
2713 (match_operand:VH 1 "s_register_operand")
2714 (const_int 0)))]
2715 "TARGET_NEON_FP16INST"
2716 {
2717 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2718 CONST0_RTX (<MODE>mode)));
2719 DONE;
2720 })
2721
2722 (define_insn "neon_vtst<mode>"
2723 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2724 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2725 (match_operand:VDQIW 2 "s_register_operand" "w")]
2726 UNSPEC_VTST))]
2727 "TARGET_NEON"
2728 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2729 [(set_attr "type" "neon_tst<q>")]
2730 )
2731
2732 (define_insn "neon_vabd<sup><mode>"
2733 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2734 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2735 (match_operand:VDQIW 2 "s_register_operand" "w")]
2736 VABD))]
2737 "TARGET_NEON"
2738 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2739 [(set_attr "type" "neon_abd<q>")]
2740 )
2741
2742 (define_insn "neon_vabd<mode>"
2743 [(set (match_operand:VH 0 "s_register_operand" "=w")
2744 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2745 (match_operand:VH 2 "s_register_operand" "w")]
2746 UNSPEC_VABD_F))]
2747 "TARGET_NEON_FP16INST"
2748 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2749 [(set_attr "type" "neon_abd<q>")]
2750 )
2751
2752 (define_insn "neon_vabdf<mode>"
2753 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2754 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2755 (match_operand:VCVTF 2 "s_register_operand" "w")]
2756 UNSPEC_VABD_F))]
2757 "TARGET_NEON"
2758 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2759 [(set_attr "type" "neon_fp_abd_s<q>")]
2760 )
2761
2762 (define_insn "neon_vabdl<sup><mode>"
2763 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2764 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2765 (match_operand:VW 2 "s_register_operand" "w")]
2766 VABDL))]
2767 "TARGET_NEON"
2768 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2769 [(set_attr "type" "neon_abd_long")]
2770 )
2771
2772 (define_insn "neon_vaba<sup><mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2775 (match_operand:VDQIW 3 "s_register_operand" "w")]
2776 VABD)
2777 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2778 "TARGET_NEON"
2779 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2780 [(set_attr "type" "neon_arith_acc<q>")]
2781 )
2782
2783 (define_insn "neon_vabal<sup><mode>"
2784 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2785 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2786 (match_operand:VW 3 "s_register_operand" "w")]
2787 VABDL)
2788 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2789 "TARGET_NEON"
2790 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2791 [(set_attr "type" "neon_arith_acc<q>")]
2792 )
2793
2794 (define_insn "neon_v<maxmin><sup><mode>"
2795 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2796 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2797 (match_operand:VDQIW 2 "s_register_operand" "w")]
2798 VMAXMIN))]
2799 "TARGET_NEON"
2800 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2801 [(set_attr "type" "neon_minmax<q>")]
2802 )
2803
2804 (define_insn "neon_v<maxmin>f<mode>"
2805 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2806 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2807 (match_operand:VCVTF 2 "s_register_operand" "w")]
2808 VMAXMINF))]
2809 "TARGET_NEON"
2810 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2811 [(set_attr "type" "neon_fp_minmax_s<q>")]
2812 )
2813
2814 (define_insn "neon_v<maxmin>f<mode>"
2815 [(set (match_operand:VH 0 "s_register_operand" "=w")
2816 (unspec:VH
2817 [(match_operand:VH 1 "s_register_operand" "w")
2818 (match_operand:VH 2 "s_register_operand" "w")]
2819 VMAXMINF))]
2820 "TARGET_NEON_FP16INST"
2821 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2822 [(set_attr "type" "neon_fp_minmax_s<q>")]
2823 )
2824
2825 (define_insn "neon_vp<maxmin>fv4hf"
2826 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2827 (unspec:V4HF
2828 [(match_operand:V4HF 1 "s_register_operand" "w")
2829 (match_operand:V4HF 2 "s_register_operand" "w")]
2830 VPMAXMINF))]
2831 "TARGET_NEON_FP16INST"
2832 "vp<maxmin>.f16\t%P0, %P1, %P2"
2833 [(set_attr "type" "neon_reduc_minmax")]
2834 )
2835
2836 (define_insn "neon_<fmaxmin_op><mode>"
2837 [(set
2838 (match_operand:VH 0 "s_register_operand" "=w")
2839 (unspec:VH
2840 [(match_operand:VH 1 "s_register_operand" "w")
2841 (match_operand:VH 2 "s_register_operand" "w")]
2842 VMAXMINFNM))]
2843 "TARGET_NEON_FP16INST"
2844 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2845 [(set_attr "type" "neon_fp_minmax_s<q>")]
2846 )
2847
2848 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2849 (define_insn "<fmaxmin><mode>3"
2850 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2851 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2852 (match_operand:VCVTF 2 "s_register_operand" "w")]
2853 VMAXMINFNM))]
2854 "TARGET_NEON && TARGET_FPU_ARMV8"
2855 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2856 [(set_attr "type" "neon_fp_minmax_s<q>")]
2857 )
2858
2859 (define_expand "neon_vpadd<mode>"
2860 [(match_operand:VD 0 "s_register_operand" "=w")
2861 (match_operand:VD 1 "s_register_operand" "w")
2862 (match_operand:VD 2 "s_register_operand" "w")]
2863 "TARGET_NEON"
2864 {
2865 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2866 operands[2]));
2867 DONE;
2868 })
2869
2870 (define_insn "neon_vpaddl<sup><mode>"
2871 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2872 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2873 VPADDL))]
2874 "TARGET_NEON"
2875 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2876 [(set_attr "type" "neon_reduc_add_long")]
2877 )
2878
2879 (define_insn "neon_vpadal<sup><mode>"
2880 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2881 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2882 (match_operand:VDQIW 2 "s_register_operand" "w")]
2883 VPADAL))]
2884 "TARGET_NEON"
2885 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2886 [(set_attr "type" "neon_reduc_add_acc")]
2887 )
2888
2889 (define_insn "neon_vp<maxmin><sup><mode>"
2890 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2891 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2892 (match_operand:VDI 2 "s_register_operand" "w")]
2893 VPMAXMIN))]
2894 "TARGET_NEON"
2895 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2896 [(set_attr "type" "neon_reduc_minmax<q>")]
2897 )
2898
2899 (define_insn "neon_vp<maxmin>f<mode>"
2900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2901 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2902 (match_operand:VCVTF 2 "s_register_operand" "w")]
2903 VPMAXMINF))]
2904 "TARGET_NEON"
2905 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2907 )
2908
2909 (define_insn "neon_vrecps<mode>"
2910 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2911 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2912 (match_operand:VCVTF 2 "s_register_operand" "w")]
2913 UNSPEC_VRECPS))]
2914 "TARGET_NEON"
2915 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2916 [(set_attr "type" "neon_fp_recps_s<q>")]
2917 )
2918
2919 (define_insn "neon_vrecps<mode>"
2920 [(set
2921 (match_operand:VH 0 "s_register_operand" "=w")
2922 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2923 (match_operand:VH 2 "s_register_operand" "w")]
2924 UNSPEC_VRECPS))]
2925 "TARGET_NEON_FP16INST"
2926 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2927 [(set_attr "type" "neon_fp_recps_s<q>")]
2928 )
2929
2930 (define_insn "neon_vrsqrts<mode>"
2931 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2932 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2933 (match_operand:VCVTF 2 "s_register_operand" "w")]
2934 UNSPEC_VRSQRTS))]
2935 "TARGET_NEON"
2936 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2937 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2938 )
2939
2940 (define_insn "neon_vrsqrts<mode>"
2941 [(set
2942 (match_operand:VH 0 "s_register_operand" "=w")
2943 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2944 (match_operand:VH 2 "s_register_operand" "w")]
2945 UNSPEC_VRSQRTS))]
2946 "TARGET_NEON_FP16INST"
2947 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2948 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2949 )
2950
2951 (define_expand "neon_vabs<mode>"
2952 [(match_operand:VDQW 0 "s_register_operand" "")
2953 (match_operand:VDQW 1 "s_register_operand" "")]
2954 "TARGET_NEON"
2955 {
2956 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2957 DONE;
2958 })
2959
2960 (define_insn "neon_vqabs<mode>"
2961 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2962 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2963 UNSPEC_VQABS))]
2964 "TARGET_NEON"
2965 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2966 [(set_attr "type" "neon_qabs<q>")]
2967 )
2968
2969 (define_insn "neon_bswap<mode>"
2970 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2971 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2972 "TARGET_NEON"
2973 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2974 [(set_attr "type" "neon_rev<q>")]
2975 )
2976
2977 (define_expand "neon_vneg<mode>"
2978 [(match_operand:VDQW 0 "s_register_operand" "")
2979 (match_operand:VDQW 1 "s_register_operand" "")]
2980 "TARGET_NEON"
2981 {
2982 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2983 DONE;
2984 })
2985
2986 (define_expand "neon_copysignf<mode>"
2987 [(match_operand:VCVTF 0 "register_operand")
2988 (match_operand:VCVTF 1 "register_operand")
2989 (match_operand:VCVTF 2 "register_operand")]
2990 "TARGET_NEON"
2991 "{
2992 rtx v_bitmask_cast;
2993 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2994 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2995 rtvec v = rtvec_alloc (n_elt);
2996
2997 /* Create bitmask for vector select. */
2998 for (i = 0; i < n_elt; ++i)
2999 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3000
3001 emit_move_insn (v_bitmask,
3002 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3003 emit_move_insn (operands[0], operands[2]);
3004 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3005 <VCVTF:V_cmp_result>mode, 0);
3006 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3007 operands[1]));
3008
3009 DONE;
3010 }"
3011 )
3012
3013 (define_insn "neon_vqneg<mode>"
3014 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3015 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3016 UNSPEC_VQNEG))]
3017 "TARGET_NEON"
3018 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3019 [(set_attr "type" "neon_qneg<q>")]
3020 )
3021
3022 (define_insn "neon_vcls<mode>"
3023 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3024 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3025 UNSPEC_VCLS))]
3026 "TARGET_NEON"
3027 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3028 [(set_attr "type" "neon_cls<q>")]
3029 )
3030
3031 (define_insn "clz<mode>2"
3032 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3033 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3034 "TARGET_NEON"
3035 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3036 [(set_attr "type" "neon_cnt<q>")]
3037 )
3038
3039 (define_expand "neon_vclz<mode>"
3040 [(match_operand:VDQIW 0 "s_register_operand" "")
3041 (match_operand:VDQIW 1 "s_register_operand" "")]
3042 "TARGET_NEON"
3043 {
3044 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3045 DONE;
3046 })
3047
3048 (define_insn "popcount<mode>2"
3049 [(set (match_operand:VE 0 "s_register_operand" "=w")
3050 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3051 "TARGET_NEON"
3052 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3053 [(set_attr "type" "neon_cnt<q>")]
3054 )
3055
3056 (define_expand "neon_vcnt<mode>"
3057 [(match_operand:VE 0 "s_register_operand" "=w")
3058 (match_operand:VE 1 "s_register_operand" "w")]
3059 "TARGET_NEON"
3060 {
3061 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3062 DONE;
3063 })
3064
3065 (define_insn "neon_vrecpe<mode>"
3066 [(set (match_operand:VH 0 "s_register_operand" "=w")
3067 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3068 UNSPEC_VRECPE))]
3069 "TARGET_NEON_FP16INST"
3070 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3071 [(set_attr "type" "neon_fp_recpe_s<q>")]
3072 )
3073
3074 (define_insn "neon_vrecpe<mode>"
3075 [(set (match_operand:V32 0 "s_register_operand" "=w")
3076 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3077 UNSPEC_VRECPE))]
3078 "TARGET_NEON"
3079 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3080 [(set_attr "type" "neon_fp_recpe_s<q>")]
3081 )
3082
3083 (define_insn "neon_vrsqrte<mode>"
3084 [(set (match_operand:V32 0 "s_register_operand" "=w")
3085 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3086 UNSPEC_VRSQRTE))]
3087 "TARGET_NEON"
3088 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3089 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3090 )
3091
3092 (define_expand "neon_vmvn<mode>"
3093 [(match_operand:VDQIW 0 "s_register_operand" "")
3094 (match_operand:VDQIW 1 "s_register_operand" "")]
3095 "TARGET_NEON"
3096 {
3097 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3098 DONE;
3099 })
3100
3101 (define_insn "neon_vget_lane<mode>_sext_internal"
3102 [(set (match_operand:SI 0 "s_register_operand" "=r")
3103 (sign_extend:SI
3104 (vec_select:<V_elem>
3105 (match_operand:VD 1 "s_register_operand" "w")
3106 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3107 "TARGET_NEON"
3108 {
3109 if (BYTES_BIG_ENDIAN)
3110 {
3111 int elt = INTVAL (operands[2]);
3112 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3113 operands[2] = GEN_INT (elt);
3114 }
3115 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3116 }
3117 [(set_attr "type" "neon_to_gp")]
3118 )
3119
3120 (define_insn "neon_vget_lane<mode>_zext_internal"
3121 [(set (match_operand:SI 0 "s_register_operand" "=r")
3122 (zero_extend:SI
3123 (vec_select:<V_elem>
3124 (match_operand:VD 1 "s_register_operand" "w")
3125 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3126 "TARGET_NEON"
3127 {
3128 if (BYTES_BIG_ENDIAN)
3129 {
3130 int elt = INTVAL (operands[2]);
3131 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3132 operands[2] = GEN_INT (elt);
3133 }
3134 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3135 }
3136 [(set_attr "type" "neon_to_gp")]
3137 )
3138
3139 (define_insn "neon_vget_lane<mode>_sext_internal"
3140 [(set (match_operand:SI 0 "s_register_operand" "=r")
3141 (sign_extend:SI
3142 (vec_select:<V_elem>
3143 (match_operand:VQ2 1 "s_register_operand" "w")
3144 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3145 "TARGET_NEON"
3146 {
3147 rtx ops[3];
3148 int regno = REGNO (operands[1]);
3149 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3150 unsigned int elt = INTVAL (operands[2]);
3151 unsigned int elt_adj = elt % halfelts;
3152
3153 if (BYTES_BIG_ENDIAN)
3154 elt_adj = halfelts - 1 - elt_adj;
3155
3156 ops[0] = operands[0];
3157 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3158 ops[2] = GEN_INT (elt_adj);
3159 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3160
3161 return "";
3162 }
3163 [(set_attr "type" "neon_to_gp_q")]
3164 )
3165
3166 (define_insn "neon_vget_lane<mode>_zext_internal"
3167 [(set (match_operand:SI 0 "s_register_operand" "=r")
3168 (zero_extend:SI
3169 (vec_select:<V_elem>
3170 (match_operand:VQ2 1 "s_register_operand" "w")
3171 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3172 "TARGET_NEON"
3173 {
3174 rtx ops[3];
3175 int regno = REGNO (operands[1]);
3176 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3177 unsigned int elt = INTVAL (operands[2]);
3178 unsigned int elt_adj = elt % halfelts;
3179
3180 if (BYTES_BIG_ENDIAN)
3181 elt_adj = halfelts - 1 - elt_adj;
3182
3183 ops[0] = operands[0];
3184 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3185 ops[2] = GEN_INT (elt_adj);
3186 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3187
3188 return "";
3189 }
3190 [(set_attr "type" "neon_to_gp_q")]
3191 )
3192
3193 (define_expand "neon_vget_lane<mode>"
3194 [(match_operand:<V_ext> 0 "s_register_operand" "")
3195 (match_operand:VDQW 1 "s_register_operand" "")
3196 (match_operand:SI 2 "immediate_operand" "")]
3197 "TARGET_NEON"
3198 {
3199 if (BYTES_BIG_ENDIAN)
3200 {
3201 /* The intrinsics are defined in terms of a model where the
3202 element ordering in memory is vldm order, whereas the generic
3203 RTL is defined in terms of a model where the element ordering
3204 in memory is array order. Convert the lane number to conform
3205 to this model. */
3206 unsigned int elt = INTVAL (operands[2]);
3207 unsigned int reg_nelts
3208 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3209 elt ^= reg_nelts - 1;
3210 operands[2] = GEN_INT (elt);
3211 }
3212
3213 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3214 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3215 else
3216 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3217 operands[1],
3218 operands[2]));
3219 DONE;
3220 })
3221
3222 (define_expand "neon_vget_laneu<mode>"
3223 [(match_operand:<V_ext> 0 "s_register_operand" "")
3224 (match_operand:VDQIW 1 "s_register_operand" "")
3225 (match_operand:SI 2 "immediate_operand" "")]
3226 "TARGET_NEON"
3227 {
3228 if (BYTES_BIG_ENDIAN)
3229 {
3230 /* The intrinsics are defined in terms of a model where the
3231 element ordering in memory is vldm order, whereas the generic
3232 RTL is defined in terms of a model where the element ordering
3233 in memory is array order. Convert the lane number to conform
3234 to this model. */
3235 unsigned int elt = INTVAL (operands[2]);
3236 unsigned int reg_nelts
3237 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3238 elt ^= reg_nelts - 1;
3239 operands[2] = GEN_INT (elt);
3240 }
3241
3242 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3243 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3244 else
3245 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3246 operands[1],
3247 operands[2]));
3248 DONE;
3249 })
3250
3251 (define_expand "neon_vget_lanedi"
3252 [(match_operand:DI 0 "s_register_operand" "=r")
3253 (match_operand:DI 1 "s_register_operand" "w")
3254 (match_operand:SI 2 "immediate_operand" "")]
3255 "TARGET_NEON"
3256 {
3257 emit_move_insn (operands[0], operands[1]);
3258 DONE;
3259 })
3260
3261 (define_expand "neon_vget_lanev2di"
3262 [(match_operand:DI 0 "s_register_operand" "")
3263 (match_operand:V2DI 1 "s_register_operand" "")
3264 (match_operand:SI 2 "immediate_operand" "")]
3265 "TARGET_NEON"
3266 {
3267 int lane;
3268
3269 if (BYTES_BIG_ENDIAN)
3270 {
3271 /* The intrinsics are defined in terms of a model where the
3272 element ordering in memory is vldm order, whereas the generic
3273 RTL is defined in terms of a model where the element ordering
3274 in memory is array order. Convert the lane number to conform
3275 to this model. */
3276 unsigned int elt = INTVAL (operands[2]);
3277 unsigned int reg_nelts = 2;
3278 elt ^= reg_nelts - 1;
3279 operands[2] = GEN_INT (elt);
3280 }
3281
3282 lane = INTVAL (operands[2]);
3283 gcc_assert ((lane ==0) || (lane == 1));
3284 emit_move_insn (operands[0], lane == 0
3285 ? gen_lowpart (DImode, operands[1])
3286 : gen_highpart (DImode, operands[1]));
3287 DONE;
3288 })
3289
3290 (define_expand "neon_vset_lane<mode>"
3291 [(match_operand:VDQ 0 "s_register_operand" "=w")
3292 (match_operand:<V_elem> 1 "s_register_operand" "r")
3293 (match_operand:VDQ 2 "s_register_operand" "0")
3294 (match_operand:SI 3 "immediate_operand" "i")]
3295 "TARGET_NEON"
3296 {
3297 unsigned int elt = INTVAL (operands[3]);
3298
3299 if (BYTES_BIG_ENDIAN)
3300 {
3301 unsigned int reg_nelts
3302 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3303 elt ^= reg_nelts - 1;
3304 }
3305
3306 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3307 GEN_INT (1 << elt), operands[2]));
3308 DONE;
3309 })
3310
3311 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3312
3313 (define_expand "neon_vset_lanedi"
3314 [(match_operand:DI 0 "s_register_operand" "=w")
3315 (match_operand:DI 1 "s_register_operand" "r")
3316 (match_operand:DI 2 "s_register_operand" "0")
3317 (match_operand:SI 3 "immediate_operand" "i")]
3318 "TARGET_NEON"
3319 {
3320 emit_move_insn (operands[0], operands[1]);
3321 DONE;
3322 })
3323
3324 (define_expand "neon_vcreate<mode>"
3325 [(match_operand:VD_RE 0 "s_register_operand" "")
3326 (match_operand:DI 1 "general_operand" "")]
3327 "TARGET_NEON"
3328 {
3329 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3330 emit_move_insn (operands[0], src);
3331 DONE;
3332 })
3333
3334 (define_insn "neon_vdup_n<mode>"
3335 [(set (match_operand:VX 0 "s_register_operand" "=w")
3336 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3337 "TARGET_NEON"
3338 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3339 [(set_attr "type" "neon_from_gp<q>")]
3340 )
3341
3342 (define_insn "neon_vdup_nv4hf"
3343 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3344 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3345 "TARGET_NEON"
3346 "vdup.16\t%P0, %1"
3347 [(set_attr "type" "neon_from_gp")]
3348 )
3349
3350 (define_insn "neon_vdup_nv8hf"
3351 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3352 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3353 "TARGET_NEON"
3354 "vdup.16\t%q0, %1"
3355 [(set_attr "type" "neon_from_gp_q")]
3356 )
3357
3358 (define_insn "neon_vdup_n<mode>"
3359 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3360 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3361 "TARGET_NEON"
3362 "@
3363 vdup.<V_sz_elem>\t%<V_reg>0, %1
3364 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3365 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3366 )
3367
3368 (define_expand "neon_vdup_ndi"
3369 [(match_operand:DI 0 "s_register_operand" "=w")
3370 (match_operand:DI 1 "s_register_operand" "r")]
3371 "TARGET_NEON"
3372 {
3373 emit_move_insn (operands[0], operands[1]);
3374 DONE;
3375 }
3376 )
3377
3378 (define_insn "neon_vdup_nv2di"
3379 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3380 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3381 "TARGET_NEON"
3382 "@
3383 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3384 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3385 [(set_attr "length" "8")
3386 (set_attr "type" "multiple")]
3387 )
3388
3389 (define_insn "neon_vdup_lane<mode>_internal"
3390 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3391 (vec_duplicate:VDQW
3392 (vec_select:<V_elem>
3393 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3394 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3395 "TARGET_NEON"
3396 {
3397 if (BYTES_BIG_ENDIAN)
3398 {
3399 int elt = INTVAL (operands[2]);
3400 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3401 operands[2] = GEN_INT (elt);
3402 }
3403 if (<Is_d_reg>)
3404 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3405 else
3406 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3407 }
3408 [(set_attr "type" "neon_dup<q>")]
3409 )
3410
3411 (define_insn "neon_vdup_lane<mode>_internal"
3412 [(set (match_operand:VH 0 "s_register_operand" "=w")
3413 (vec_duplicate:VH
3414 (vec_select:<V_elem>
3415 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3416 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3417 "TARGET_NEON && TARGET_FP16"
3418 {
3419 if (BYTES_BIG_ENDIAN)
3420 {
3421 int elt = INTVAL (operands[2]);
3422 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3423 operands[2] = GEN_INT (elt);
3424 }
3425 if (<Is_d_reg>)
3426 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3427 else
3428 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3429 }
3430 [(set_attr "type" "neon_dup<q>")]
3431 )
3432
3433 (define_expand "neon_vdup_lane<mode>"
3434 [(match_operand:VDQW 0 "s_register_operand" "=w")
3435 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3436 (match_operand:SI 2 "immediate_operand" "i")]
3437 "TARGET_NEON"
3438 {
3439 if (BYTES_BIG_ENDIAN)
3440 {
3441 unsigned int elt = INTVAL (operands[2]);
3442 unsigned int reg_nelts
3443 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3444 elt ^= reg_nelts - 1;
3445 operands[2] = GEN_INT (elt);
3446 }
3447 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3448 operands[2]));
3449 DONE;
3450 })
3451
3452 (define_expand "neon_vdup_lane<mode>"
3453 [(match_operand:VH 0 "s_register_operand")
3454 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3455 (match_operand:SI 2 "immediate_operand")]
3456 "TARGET_NEON && TARGET_FP16"
3457 {
3458 if (BYTES_BIG_ENDIAN)
3459 {
3460 unsigned int elt = INTVAL (operands[2]);
3461 unsigned int reg_nelts
3462 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3463 elt ^= reg_nelts - 1;
3464 operands[2] = GEN_INT (elt);
3465 }
3466 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3467 operands[2]));
3468 DONE;
3469 })
3470
3471 ; Scalar index is ignored, since only zero is valid here.
3472 (define_expand "neon_vdup_lanedi"
3473 [(match_operand:DI 0 "s_register_operand" "=w")
3474 (match_operand:DI 1 "s_register_operand" "w")
3475 (match_operand:SI 2 "immediate_operand" "i")]
3476 "TARGET_NEON"
3477 {
3478 emit_move_insn (operands[0], operands[1]);
3479 DONE;
3480 })
3481
3482 ; Likewise for v2di, as the DImode second operand has only a single element.
3483 (define_expand "neon_vdup_lanev2di"
3484 [(match_operand:V2DI 0 "s_register_operand" "=w")
3485 (match_operand:DI 1 "s_register_operand" "w")
3486 (match_operand:SI 2 "immediate_operand" "i")]
3487 "TARGET_NEON"
3488 {
3489 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3490 DONE;
3491 })
3492
3493 ; Disabled before reload because we don't want combine doing something silly,
3494 ; but used by the post-reload expansion of neon_vcombine.
3495 (define_insn "*neon_vswp<mode>"
3496 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3497 (match_operand:VDQX 1 "s_register_operand" "+w"))
3498 (set (match_dup 1) (match_dup 0))]
3499 "TARGET_NEON && reload_completed"
3500 "vswp\t%<V_reg>0, %<V_reg>1"
3501 [(set_attr "type" "neon_permute<q>")]
3502 )
3503
3504 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3505 ;; dest vector.
3506 ;; FIXME: A different implementation of this builtin could make it much
3507 ;; more likely that we wouldn't actually need to output anything (we could make
3508 ;; it so that the reg allocator puts things in the right places magically
3509 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3510
3511 (define_insn_and_split "neon_vcombine<mode>"
3512 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3513 (vec_concat:<V_DOUBLE>
3514 (match_operand:VDX 1 "s_register_operand" "w")
3515 (match_operand:VDX 2 "s_register_operand" "w")))]
3516 "TARGET_NEON"
3517 "#"
3518 "&& reload_completed"
3519 [(const_int 0)]
3520 {
3521 neon_split_vcombine (operands);
3522 DONE;
3523 }
3524 [(set_attr "type" "multiple")]
3525 )
3526
3527 (define_expand "neon_vget_high<mode>"
3528 [(match_operand:<V_HALF> 0 "s_register_operand")
3529 (match_operand:VQX 1 "s_register_operand")]
3530 "TARGET_NEON"
3531 {
3532 emit_move_insn (operands[0],
3533 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3534 GET_MODE_SIZE (<V_HALF>mode)));
3535 DONE;
3536 })
3537
3538 (define_expand "neon_vget_low<mode>"
3539 [(match_operand:<V_HALF> 0 "s_register_operand")
3540 (match_operand:VQX 1 "s_register_operand")]
3541 "TARGET_NEON"
3542 {
3543 emit_move_insn (operands[0],
3544 simplify_gen_subreg (<V_HALF>mode, operands[1],
3545 <MODE>mode, 0));
3546 DONE;
3547 })
3548
3549 (define_insn "float<mode><V_cvtto>2"
3550 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3551 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3552 "TARGET_NEON && !flag_rounding_math"
3553 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3554 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3555 )
3556
3557 (define_insn "floatuns<mode><V_cvtto>2"
3558 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3559 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3560 "TARGET_NEON && !flag_rounding_math"
3561 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3562 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3563 )
3564
3565 (define_insn "fix_trunc<mode><V_cvtto>2"
3566 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3567 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3568 "TARGET_NEON"
3569 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3570 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3571 )
3572
3573 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3574 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3575 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3576 "TARGET_NEON"
3577 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3578 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3579 )
3580
3581 (define_insn "neon_vcvt<sup><mode>"
3582 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3583 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3584 VCVT_US))]
3585 "TARGET_NEON"
3586 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3587 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3588 )
3589
3590 (define_insn "neon_vcvt<sup><mode>"
3591 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3592 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3593 VCVT_US))]
3594 "TARGET_NEON"
3595 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3596 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3597 )
3598
3599 (define_insn "neon_vcvtv4sfv4hf"
3600 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3601 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3602 UNSPEC_VCVT))]
3603 "TARGET_NEON && TARGET_FP16"
3604 "vcvt.f32.f16\t%q0, %P1"
3605 [(set_attr "type" "neon_fp_cvt_widen_h")]
3606 )
3607
3608 (define_insn "neon_vcvtv4hfv4sf"
3609 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3610 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3611 UNSPEC_VCVT))]
3612 "TARGET_NEON && TARGET_FP16"
3613 "vcvt.f16.f32\t%P0, %q1"
3614 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3615 )
3616
3617 (define_insn "neon_vcvt<sup><mode>"
3618 [(set
3619 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3620 (unspec:<VH_CVTTO>
3621 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3622 VCVT_US))]
3623 "TARGET_NEON_FP16INST"
3624 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3625 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3626 )
3627
3628 (define_insn "neon_vcvt<sup><mode>"
3629 [(set
3630 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3631 (unspec:<VH_CVTTO>
3632 [(match_operand:VH 1 "s_register_operand" "w")]
3633 VCVT_US))]
3634 "TARGET_NEON_FP16INST"
3635 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3636 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3637 )
3638
3639 (define_insn "neon_vcvt<sup>_n<mode>"
3640 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3641 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3642 (match_operand:SI 2 "immediate_operand" "i")]
3643 VCVT_US_N))]
3644 "TARGET_NEON"
3645 {
3646 neon_const_bounds (operands[2], 1, 33);
3647 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3648 }
3649 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3650 )
3651
3652 (define_insn "neon_vcvt<sup>_n<mode>"
3653 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3654 (unspec:<VH_CVTTO>
3655 [(match_operand:VH 1 "s_register_operand" "w")
3656 (match_operand:SI 2 "immediate_operand" "i")]
3657 VCVT_US_N))]
3658 "TARGET_NEON_FP16INST"
3659 {
3660 neon_const_bounds (operands[2], 0, 17);
3661 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3662 }
3663 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3664 )
3665
3666 (define_insn "neon_vcvt<sup>_n<mode>"
3667 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3668 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3669 (match_operand:SI 2 "immediate_operand" "i")]
3670 VCVT_US_N))]
3671 "TARGET_NEON"
3672 {
3673 neon_const_bounds (operands[2], 1, 33);
3674 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3675 }
3676 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3677 )
3678
3679 (define_insn "neon_vcvt<sup>_n<mode>"
3680 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3681 (unspec:<VH_CVTTO>
3682 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3683 (match_operand:SI 2 "immediate_operand" "i")]
3684 VCVT_US_N))]
3685 "TARGET_NEON_FP16INST"
3686 {
3687 neon_const_bounds (operands[2], 0, 17);
3688 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3689 }
3690 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3691 )
3692
3693 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3694 [(set
3695 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3696 (unspec:<VH_CVTTO>
3697 [(match_operand:VH 1 "s_register_operand" "w")]
3698 VCVT_HF_US))]
3699 "TARGET_NEON_FP16INST"
3700 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3701 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3702 )
3703
3704 (define_insn "neon_vmovn<mode>"
3705 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3706 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3707 UNSPEC_VMOVN))]
3708 "TARGET_NEON"
3709 "vmovn.<V_if_elem>\t%P0, %q1"
3710 [(set_attr "type" "neon_shift_imm_narrow_q")]
3711 )
3712
3713 (define_insn "neon_vqmovn<sup><mode>"
3714 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3715 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3716 VQMOVN))]
3717 "TARGET_NEON"
3718 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3719 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3720 )
3721
3722 (define_insn "neon_vqmovun<mode>"
3723 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3724 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3725 UNSPEC_VQMOVUN))]
3726 "TARGET_NEON"
3727 "vqmovun.<V_s_elem>\t%P0, %q1"
3728 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3729 )
3730
3731 (define_insn "neon_vmovl<sup><mode>"
3732 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3733 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3734 VMOVL))]
3735 "TARGET_NEON"
3736 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3737 [(set_attr "type" "neon_shift_imm_long")]
3738 )
3739
3740 (define_insn "neon_vmul_lane<mode>"
3741 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3742 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3743 (match_operand:VMD 2 "s_register_operand"
3744 "<scalar_mul_constraint>")
3745 (match_operand:SI 3 "immediate_operand" "i")]
3746 UNSPEC_VMUL_LANE))]
3747 "TARGET_NEON"
3748 {
3749 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3750 }
3751 [(set (attr "type")
3752 (if_then_else (match_test "<Is_float_mode>")
3753 (const_string "neon_fp_mul_s_scalar<q>")
3754 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3755 )
3756
3757 (define_insn "neon_vmul_lane<mode>"
3758 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3759 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3760 (match_operand:<V_HALF> 2 "s_register_operand"
3761 "<scalar_mul_constraint>")
3762 (match_operand:SI 3 "immediate_operand" "i")]
3763 UNSPEC_VMUL_LANE))]
3764 "TARGET_NEON"
3765 {
3766 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3767 }
3768 [(set (attr "type")
3769 (if_then_else (match_test "<Is_float_mode>")
3770 (const_string "neon_fp_mul_s_scalar<q>")
3771 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3772 )
3773
3774 (define_insn "neon_vmul_lane<mode>"
3775 [(set (match_operand:VH 0 "s_register_operand" "=w")
3776 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3777 (match_operand:V4HF 2 "s_register_operand"
3778 "<scalar_mul_constraint>")
3779 (match_operand:SI 3 "immediate_operand" "i")]
3780 UNSPEC_VMUL_LANE))]
3781 "TARGET_NEON_FP16INST"
3782 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3783 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3784 )
3785
3786 (define_insn "neon_vmull<sup>_lane<mode>"
3787 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3788 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3789 (match_operand:VMDI 2 "s_register_operand"
3790 "<scalar_mul_constraint>")
3791 (match_operand:SI 3 "immediate_operand" "i")]
3792 VMULL_LANE))]
3793 "TARGET_NEON"
3794 {
3795 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3796 }
3797 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3798 )
3799
3800 (define_insn "neon_vqdmull_lane<mode>"
3801 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3802 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3803 (match_operand:VMDI 2 "s_register_operand"
3804 "<scalar_mul_constraint>")
3805 (match_operand:SI 3 "immediate_operand" "i")]
3806 UNSPEC_VQDMULL_LANE))]
3807 "TARGET_NEON"
3808 {
3809 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3810 }
3811 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3812 )
3813
3814 (define_insn "neon_vq<r>dmulh_lane<mode>"
3815 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3816 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3817 (match_operand:<V_HALF> 2 "s_register_operand"
3818 "<scalar_mul_constraint>")
3819 (match_operand:SI 3 "immediate_operand" "i")]
3820 VQDMULH_LANE))]
3821 "TARGET_NEON"
3822 {
3823 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3824 }
3825 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3826 )
3827
3828 (define_insn "neon_vq<r>dmulh_lane<mode>"
3829 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3830 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3831 (match_operand:VMDI 2 "s_register_operand"
3832 "<scalar_mul_constraint>")
3833 (match_operand:SI 3 "immediate_operand" "i")]
3834 VQDMULH_LANE))]
3835 "TARGET_NEON"
3836 {
3837 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3838 }
3839 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3840 )
3841
3842 ;; vqrdmlah_lane, vqrdmlsh_lane
3843 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3844 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3845 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3846 (match_operand:VMQI 2 "s_register_operand" "w")
3847 (match_operand:<V_HALF> 3 "s_register_operand"
3848 "<scalar_mul_constraint>")
3849 (match_operand:SI 4 "immediate_operand" "i")]
3850 VQRDMLH_AS))]
3851 "TARGET_NEON_RDMA"
3852 {
3853 return
3854 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3855 }
3856 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3857 )
3858
3859 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3860 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3861 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3862 (match_operand:VMDI 2 "s_register_operand" "w")
3863 (match_operand:VMDI 3 "s_register_operand"
3864 "<scalar_mul_constraint>")
3865 (match_operand:SI 4 "immediate_operand" "i")]
3866 VQRDMLH_AS))]
3867 "TARGET_NEON_RDMA"
3868 {
3869 return
3870 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3871 }
3872 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3873 )
3874
3875 (define_insn "neon_vmla_lane<mode>"
3876 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3877 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3878 (match_operand:VMD 2 "s_register_operand" "w")
3879 (match_operand:VMD 3 "s_register_operand"
3880 "<scalar_mul_constraint>")
3881 (match_operand:SI 4 "immediate_operand" "i")]
3882 UNSPEC_VMLA_LANE))]
3883 "TARGET_NEON"
3884 {
3885 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3886 }
3887 [(set (attr "type")
3888 (if_then_else (match_test "<Is_float_mode>")
3889 (const_string "neon_fp_mla_s_scalar<q>")
3890 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3891 )
3892
3893 (define_insn "neon_vmla_lane<mode>"
3894 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3895 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3896 (match_operand:VMQ 2 "s_register_operand" "w")
3897 (match_operand:<V_HALF> 3 "s_register_operand"
3898 "<scalar_mul_constraint>")
3899 (match_operand:SI 4 "immediate_operand" "i")]
3900 UNSPEC_VMLA_LANE))]
3901 "TARGET_NEON"
3902 {
3903 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3904 }
3905 [(set (attr "type")
3906 (if_then_else (match_test "<Is_float_mode>")
3907 (const_string "neon_fp_mla_s_scalar<q>")
3908 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3909 )
3910
3911 (define_insn "neon_vmlal<sup>_lane<mode>"
3912 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3913 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3914 (match_operand:VMDI 2 "s_register_operand" "w")
3915 (match_operand:VMDI 3 "s_register_operand"
3916 "<scalar_mul_constraint>")
3917 (match_operand:SI 4 "immediate_operand" "i")]
3918 VMLAL_LANE))]
3919 "TARGET_NEON"
3920 {
3921 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3922 }
3923 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3924 )
3925
3926 (define_insn "neon_vqdmlal_lane<mode>"
3927 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3928 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3929 (match_operand:VMDI 2 "s_register_operand" "w")
3930 (match_operand:VMDI 3 "s_register_operand"
3931 "<scalar_mul_constraint>")
3932 (match_operand:SI 4 "immediate_operand" "i")]
3933 UNSPEC_VQDMLAL_LANE))]
3934 "TARGET_NEON"
3935 {
3936 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3937 }
3938 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3939 )
3940
3941 (define_insn "neon_vmls_lane<mode>"
3942 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3943 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3944 (match_operand:VMD 2 "s_register_operand" "w")
3945 (match_operand:VMD 3 "s_register_operand"
3946 "<scalar_mul_constraint>")
3947 (match_operand:SI 4 "immediate_operand" "i")]
3948 UNSPEC_VMLS_LANE))]
3949 "TARGET_NEON"
3950 {
3951 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3952 }
3953 [(set (attr "type")
3954 (if_then_else (match_test "<Is_float_mode>")
3955 (const_string "neon_fp_mla_s_scalar<q>")
3956 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3957 )
3958
3959 (define_insn "neon_vmls_lane<mode>"
3960 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3961 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3962 (match_operand:VMQ 2 "s_register_operand" "w")
3963 (match_operand:<V_HALF> 3 "s_register_operand"
3964 "<scalar_mul_constraint>")
3965 (match_operand:SI 4 "immediate_operand" "i")]
3966 UNSPEC_VMLS_LANE))]
3967 "TARGET_NEON"
3968 {
3969 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3970 }
3971 [(set (attr "type")
3972 (if_then_else (match_test "<Is_float_mode>")
3973 (const_string "neon_fp_mla_s_scalar<q>")
3974 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3975 )
3976
3977 (define_insn "neon_vmlsl<sup>_lane<mode>"
3978 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3979 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3980 (match_operand:VMDI 2 "s_register_operand" "w")
3981 (match_operand:VMDI 3 "s_register_operand"
3982 "<scalar_mul_constraint>")
3983 (match_operand:SI 4 "immediate_operand" "i")]
3984 VMLSL_LANE))]
3985 "TARGET_NEON"
3986 {
3987 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3988 }
3989 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3990 )
3991
3992 (define_insn "neon_vqdmlsl_lane<mode>"
3993 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3994 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3995 (match_operand:VMDI 2 "s_register_operand" "w")
3996 (match_operand:VMDI 3 "s_register_operand"
3997 "<scalar_mul_constraint>")
3998 (match_operand:SI 4 "immediate_operand" "i")]
3999 UNSPEC_VQDMLSL_LANE))]
4000 "TARGET_NEON"
4001 {
4002 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4003 }
4004 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4005 )
4006
4007 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4008 ; core register into a temp register, then use a scalar taken from that. This
4009 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4010 ; or extracted from another vector. The latter case it's currently better to
4011 ; use the "_lane" variant, and the former case can probably be implemented
4012 ; using vld1_lane, but that hasn't been done yet.
4013
4014 (define_expand "neon_vmul_n<mode>"
4015 [(match_operand:VMD 0 "s_register_operand" "")
4016 (match_operand:VMD 1 "s_register_operand" "")
4017 (match_operand:<V_elem> 2 "s_register_operand" "")]
4018 "TARGET_NEON"
4019 {
4020 rtx tmp = gen_reg_rtx (<MODE>mode);
4021 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4022 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4023 const0_rtx));
4024 DONE;
4025 })
4026
4027 (define_expand "neon_vmul_n<mode>"
4028 [(match_operand:VMQ 0 "s_register_operand" "")
4029 (match_operand:VMQ 1 "s_register_operand" "")
4030 (match_operand:<V_elem> 2 "s_register_operand" "")]
4031 "TARGET_NEON"
4032 {
4033 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4034 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4035 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4036 const0_rtx));
4037 DONE;
4038 })
4039
4040 (define_expand "neon_vmul_n<mode>"
4041 [(match_operand:VH 0 "s_register_operand")
4042 (match_operand:VH 1 "s_register_operand")
4043 (match_operand:<V_elem> 2 "s_register_operand")]
4044 "TARGET_NEON_FP16INST"
4045 {
4046 rtx tmp = gen_reg_rtx (V4HFmode);
4047 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4048 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4049 const0_rtx));
4050 DONE;
4051 })
4052
4053 (define_expand "neon_vmulls_n<mode>"
4054 [(match_operand:<V_widen> 0 "s_register_operand" "")
4055 (match_operand:VMDI 1 "s_register_operand" "")
4056 (match_operand:<V_elem> 2 "s_register_operand" "")]
4057 "TARGET_NEON"
4058 {
4059 rtx tmp = gen_reg_rtx (<MODE>mode);
4060 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4061 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4062 const0_rtx));
4063 DONE;
4064 })
4065
4066 (define_expand "neon_vmullu_n<mode>"
4067 [(match_operand:<V_widen> 0 "s_register_operand" "")
4068 (match_operand:VMDI 1 "s_register_operand" "")
4069 (match_operand:<V_elem> 2 "s_register_operand" "")]
4070 "TARGET_NEON"
4071 {
4072 rtx tmp = gen_reg_rtx (<MODE>mode);
4073 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4074 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4075 const0_rtx));
4076 DONE;
4077 })
4078
4079 (define_expand "neon_vqdmull_n<mode>"
4080 [(match_operand:<V_widen> 0 "s_register_operand" "")
4081 (match_operand:VMDI 1 "s_register_operand" "")
4082 (match_operand:<V_elem> 2 "s_register_operand" "")]
4083 "TARGET_NEON"
4084 {
4085 rtx tmp = gen_reg_rtx (<MODE>mode);
4086 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4087 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4088 const0_rtx));
4089 DONE;
4090 })
4091
4092 (define_expand "neon_vqdmulh_n<mode>"
4093 [(match_operand:VMDI 0 "s_register_operand" "")
4094 (match_operand:VMDI 1 "s_register_operand" "")
4095 (match_operand:<V_elem> 2 "s_register_operand" "")]
4096 "TARGET_NEON"
4097 {
4098 rtx tmp = gen_reg_rtx (<MODE>mode);
4099 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4100 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4101 const0_rtx));
4102 DONE;
4103 })
4104
4105 (define_expand "neon_vqrdmulh_n<mode>"
4106 [(match_operand:VMDI 0 "s_register_operand" "")
4107 (match_operand:VMDI 1 "s_register_operand" "")
4108 (match_operand:<V_elem> 2 "s_register_operand" "")]
4109 "TARGET_NEON"
4110 {
4111 rtx tmp = gen_reg_rtx (<MODE>mode);
4112 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4113 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4114 const0_rtx));
4115 DONE;
4116 })
4117
4118 (define_expand "neon_vqdmulh_n<mode>"
4119 [(match_operand:VMQI 0 "s_register_operand" "")
4120 (match_operand:VMQI 1 "s_register_operand" "")
4121 (match_operand:<V_elem> 2 "s_register_operand" "")]
4122 "TARGET_NEON"
4123 {
4124 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4125 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4126 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4127 const0_rtx));
4128 DONE;
4129 })
4130
4131 (define_expand "neon_vqrdmulh_n<mode>"
4132 [(match_operand:VMQI 0 "s_register_operand" "")
4133 (match_operand:VMQI 1 "s_register_operand" "")
4134 (match_operand:<V_elem> 2 "s_register_operand" "")]
4135 "TARGET_NEON"
4136 {
4137 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4138 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4139 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4140 const0_rtx));
4141 DONE;
4142 })
4143
4144 (define_expand "neon_vmla_n<mode>"
4145 [(match_operand:VMD 0 "s_register_operand" "")
4146 (match_operand:VMD 1 "s_register_operand" "")
4147 (match_operand:VMD 2 "s_register_operand" "")
4148 (match_operand:<V_elem> 3 "s_register_operand" "")]
4149 "TARGET_NEON"
4150 {
4151 rtx tmp = gen_reg_rtx (<MODE>mode);
4152 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4153 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4154 tmp, const0_rtx));
4155 DONE;
4156 })
4157
4158 (define_expand "neon_vmla_n<mode>"
4159 [(match_operand:VMQ 0 "s_register_operand" "")
4160 (match_operand:VMQ 1 "s_register_operand" "")
4161 (match_operand:VMQ 2 "s_register_operand" "")
4162 (match_operand:<V_elem> 3 "s_register_operand" "")]
4163 "TARGET_NEON"
4164 {
4165 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4166 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4167 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4168 tmp, const0_rtx));
4169 DONE;
4170 })
4171
4172 (define_expand "neon_vmlals_n<mode>"
4173 [(match_operand:<V_widen> 0 "s_register_operand" "")
4174 (match_operand:<V_widen> 1 "s_register_operand" "")
4175 (match_operand:VMDI 2 "s_register_operand" "")
4176 (match_operand:<V_elem> 3 "s_register_operand" "")]
4177 "TARGET_NEON"
4178 {
4179 rtx tmp = gen_reg_rtx (<MODE>mode);
4180 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4181 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4182 tmp, const0_rtx));
4183 DONE;
4184 })
4185
4186 (define_expand "neon_vmlalu_n<mode>"
4187 [(match_operand:<V_widen> 0 "s_register_operand" "")
4188 (match_operand:<V_widen> 1 "s_register_operand" "")
4189 (match_operand:VMDI 2 "s_register_operand" "")
4190 (match_operand:<V_elem> 3 "s_register_operand" "")]
4191 "TARGET_NEON"
4192 {
4193 rtx tmp = gen_reg_rtx (<MODE>mode);
4194 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4195 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4196 tmp, const0_rtx));
4197 DONE;
4198 })
4199
4200 (define_expand "neon_vqdmlal_n<mode>"
4201 [(match_operand:<V_widen> 0 "s_register_operand" "")
4202 (match_operand:<V_widen> 1 "s_register_operand" "")
4203 (match_operand:VMDI 2 "s_register_operand" "")
4204 (match_operand:<V_elem> 3 "s_register_operand" "")]
4205 "TARGET_NEON"
4206 {
4207 rtx tmp = gen_reg_rtx (<MODE>mode);
4208 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4209 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4210 tmp, const0_rtx));
4211 DONE;
4212 })
4213
4214 (define_expand "neon_vmls_n<mode>"
4215 [(match_operand:VMD 0 "s_register_operand" "")
4216 (match_operand:VMD 1 "s_register_operand" "")
4217 (match_operand:VMD 2 "s_register_operand" "")
4218 (match_operand:<V_elem> 3 "s_register_operand" "")]
4219 "TARGET_NEON"
4220 {
4221 rtx tmp = gen_reg_rtx (<MODE>mode);
4222 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4223 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4224 tmp, const0_rtx));
4225 DONE;
4226 })
4227
4228 (define_expand "neon_vmls_n<mode>"
4229 [(match_operand:VMQ 0 "s_register_operand" "")
4230 (match_operand:VMQ 1 "s_register_operand" "")
4231 (match_operand:VMQ 2 "s_register_operand" "")
4232 (match_operand:<V_elem> 3 "s_register_operand" "")]
4233 "TARGET_NEON"
4234 {
4235 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4236 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4237 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4238 tmp, const0_rtx));
4239 DONE;
4240 })
4241
4242 (define_expand "neon_vmlsls_n<mode>"
4243 [(match_operand:<V_widen> 0 "s_register_operand" "")
4244 (match_operand:<V_widen> 1 "s_register_operand" "")
4245 (match_operand:VMDI 2 "s_register_operand" "")
4246 (match_operand:<V_elem> 3 "s_register_operand" "")]
4247 "TARGET_NEON"
4248 {
4249 rtx tmp = gen_reg_rtx (<MODE>mode);
4250 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4251 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4252 tmp, const0_rtx));
4253 DONE;
4254 })
4255
4256 (define_expand "neon_vmlslu_n<mode>"
4257 [(match_operand:<V_widen> 0 "s_register_operand" "")
4258 (match_operand:<V_widen> 1 "s_register_operand" "")
4259 (match_operand:VMDI 2 "s_register_operand" "")
4260 (match_operand:<V_elem> 3 "s_register_operand" "")]
4261 "TARGET_NEON"
4262 {
4263 rtx tmp = gen_reg_rtx (<MODE>mode);
4264 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4265 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4266 tmp, const0_rtx));
4267 DONE;
4268 })
4269
4270 (define_expand "neon_vqdmlsl_n<mode>"
4271 [(match_operand:<V_widen> 0 "s_register_operand" "")
4272 (match_operand:<V_widen> 1 "s_register_operand" "")
4273 (match_operand:VMDI 2 "s_register_operand" "")
4274 (match_operand:<V_elem> 3 "s_register_operand" "")]
4275 "TARGET_NEON"
4276 {
4277 rtx tmp = gen_reg_rtx (<MODE>mode);
4278 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4279 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4280 tmp, const0_rtx));
4281 DONE;
4282 })
4283
4284 (define_insn "neon_vext<mode>"
4285 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4286 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4287 (match_operand:VDQX 2 "s_register_operand" "w")
4288 (match_operand:SI 3 "immediate_operand" "i")]
4289 UNSPEC_VEXT))]
4290 "TARGET_NEON"
4291 {
4292 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4293 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4294 }
4295 [(set_attr "type" "neon_ext<q>")]
4296 )
4297
4298 (define_insn "neon_vrev64<mode>"
4299 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4300 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4301 UNSPEC_VREV64))]
4302 "TARGET_NEON"
4303 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4304 [(set_attr "type" "neon_rev<q>")]
4305 )
4306
4307 (define_insn "neon_vrev32<mode>"
4308 [(set (match_operand:VX 0 "s_register_operand" "=w")
4309 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4310 UNSPEC_VREV32))]
4311 "TARGET_NEON"
4312 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4313 [(set_attr "type" "neon_rev<q>")]
4314 )
4315
4316 (define_insn "neon_vrev16<mode>"
4317 [(set (match_operand:VE 0 "s_register_operand" "=w")
4318 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4319 UNSPEC_VREV16))]
4320 "TARGET_NEON"
4321 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4322 [(set_attr "type" "neon_rev<q>")]
4323 )
4324
4325 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4326 ; allocation. For an intrinsic of form:
4327 ; rD = vbsl_* (rS, rN, rM)
4328 ; We can use any of:
4329 ; vbsl rS, rN, rM (if D = S)
4330 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4331 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4332
4333 (define_insn "neon_vbsl<mode>_internal"
4334 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4335 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4336 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4337 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4338 UNSPEC_VBSL))]
4339 "TARGET_NEON"
4340 "@
4341 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4342 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4343 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4344 [(set_attr "type" "neon_bsl<q>")]
4345 )
4346
4347 (define_expand "neon_vbsl<mode>"
4348 [(set (match_operand:VDQX 0 "s_register_operand" "")
4349 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4350 (match_operand:VDQX 2 "s_register_operand" "")
4351 (match_operand:VDQX 3 "s_register_operand" "")]
4352 UNSPEC_VBSL))]
4353 "TARGET_NEON"
4354 {
4355 /* We can't alias operands together if they have different modes. */
4356 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4357 })
4358
4359 ;; vshl, vrshl
4360 (define_insn "neon_v<shift_op><sup><mode>"
4361 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4362 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4363 (match_operand:VDQIX 2 "s_register_operand" "w")]
4364 VSHL))]
4365 "TARGET_NEON"
4366 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4367 [(set_attr "type" "neon_shift_imm<q>")]
4368 )
4369
4370 ;; vqshl, vqrshl
4371 (define_insn "neon_v<shift_op><sup><mode>"
4372 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4373 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4374 (match_operand:VDQIX 2 "s_register_operand" "w")]
4375 VQSHL))]
4376 "TARGET_NEON"
4377 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4378 [(set_attr "type" "neon_sat_shift_imm<q>")]
4379 )
4380
4381 ;; vshr_n, vrshr_n
4382 (define_insn "neon_v<shift_op><sup>_n<mode>"
4383 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4384 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4385 (match_operand:SI 2 "immediate_operand" "i")]
4386 VSHR_N))]
4387 "TARGET_NEON"
4388 {
4389 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4390 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4391 }
4392 [(set_attr "type" "neon_shift_imm<q>")]
4393 )
4394
4395 ;; vshrn_n, vrshrn_n
4396 (define_insn "neon_v<shift_op>_n<mode>"
4397 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4398 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4399 (match_operand:SI 2 "immediate_operand" "i")]
4400 VSHRN_N))]
4401 "TARGET_NEON"
4402 {
4403 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4404 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4405 }
4406 [(set_attr "type" "neon_shift_imm_narrow_q")]
4407 )
4408
4409 ;; vqshrn_n, vqrshrn_n
4410 (define_insn "neon_v<shift_op><sup>_n<mode>"
4411 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4412 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4413 (match_operand:SI 2 "immediate_operand" "i")]
4414 VQSHRN_N))]
4415 "TARGET_NEON"
4416 {
4417 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4418 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4419 }
4420 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4421 )
4422
4423 ;; vqshrun_n, vqrshrun_n
4424 (define_insn "neon_v<shift_op>_n<mode>"
4425 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4426 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4427 (match_operand:SI 2 "immediate_operand" "i")]
4428 VQSHRUN_N))]
4429 "TARGET_NEON"
4430 {
4431 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4432 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4433 }
4434 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4435 )
4436
4437 (define_insn "neon_vshl_n<mode>"
4438 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4439 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4440 (match_operand:SI 2 "immediate_operand" "i")]
4441 UNSPEC_VSHL_N))]
4442 "TARGET_NEON"
4443 {
4444 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4445 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4446 }
4447 [(set_attr "type" "neon_shift_imm<q>")]
4448 )
4449
4450 (define_insn "neon_vqshl_<sup>_n<mode>"
4451 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4452 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4453 (match_operand:SI 2 "immediate_operand" "i")]
4454 VQSHL_N))]
4455 "TARGET_NEON"
4456 {
4457 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4458 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4459 }
4460 [(set_attr "type" "neon_sat_shift_imm<q>")]
4461 )
4462
4463 (define_insn "neon_vqshlu_n<mode>"
4464 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4465 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4466 (match_operand:SI 2 "immediate_operand" "i")]
4467 UNSPEC_VQSHLU_N))]
4468 "TARGET_NEON"
4469 {
4470 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4471 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4472 }
4473 [(set_attr "type" "neon_sat_shift_imm<q>")]
4474 )
4475
4476 (define_insn "neon_vshll<sup>_n<mode>"
4477 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4478 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4479 (match_operand:SI 2 "immediate_operand" "i")]
4480 VSHLL_N))]
4481 "TARGET_NEON"
4482 {
4483 /* The boundaries are: 0 < imm <= size. */
4484 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4485 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4486 }
4487 [(set_attr "type" "neon_shift_imm_long")]
4488 )
4489
4490 ;; vsra_n, vrsra_n
4491 (define_insn "neon_v<shift_op><sup>_n<mode>"
4492 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4493 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4494 (match_operand:VDQIX 2 "s_register_operand" "w")
4495 (match_operand:SI 3 "immediate_operand" "i")]
4496 VSRA_N))]
4497 "TARGET_NEON"
4498 {
4499 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4500 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4501 }
4502 [(set_attr "type" "neon_shift_acc<q>")]
4503 )
4504
4505 (define_insn "neon_vsri_n<mode>"
4506 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4507 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4508 (match_operand:VDQIX 2 "s_register_operand" "w")
4509 (match_operand:SI 3 "immediate_operand" "i")]
4510 UNSPEC_VSRI))]
4511 "TARGET_NEON"
4512 {
4513 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4514 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4515 }
4516 [(set_attr "type" "neon_shift_reg<q>")]
4517 )
4518
4519 (define_insn "neon_vsli_n<mode>"
4520 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4521 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4522 (match_operand:VDQIX 2 "s_register_operand" "w")
4523 (match_operand:SI 3 "immediate_operand" "i")]
4524 UNSPEC_VSLI))]
4525 "TARGET_NEON"
4526 {
4527 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4528 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4529 }
4530 [(set_attr "type" "neon_shift_reg<q>")]
4531 )
4532
4533 (define_insn "neon_vtbl1v8qi"
4534 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4535 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4536 (match_operand:V8QI 2 "s_register_operand" "w")]
4537 UNSPEC_VTBL))]
4538 "TARGET_NEON"
4539 "vtbl.8\t%P0, {%P1}, %P2"
4540 [(set_attr "type" "neon_tbl1")]
4541 )
4542
4543 (define_insn "neon_vtbl2v8qi"
4544 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4545 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4546 (match_operand:V8QI 2 "s_register_operand" "w")]
4547 UNSPEC_VTBL))]
4548 "TARGET_NEON"
4549 {
4550 rtx ops[4];
4551 int tabbase = REGNO (operands[1]);
4552
4553 ops[0] = operands[0];
4554 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4555 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4556 ops[3] = operands[2];
4557 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4558
4559 return "";
4560 }
4561 [(set_attr "type" "neon_tbl2")]
4562 )
4563
4564 (define_insn "neon_vtbl3v8qi"
4565 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4566 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4567 (match_operand:V8QI 2 "s_register_operand" "w")]
4568 UNSPEC_VTBL))]
4569 "TARGET_NEON"
4570 {
4571 rtx ops[5];
4572 int tabbase = REGNO (operands[1]);
4573
4574 ops[0] = operands[0];
4575 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4576 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4577 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4578 ops[4] = operands[2];
4579 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4580
4581 return "";
4582 }
4583 [(set_attr "type" "neon_tbl3")]
4584 )
4585
4586 (define_insn "neon_vtbl4v8qi"
4587 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4588 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4589 (match_operand:V8QI 2 "s_register_operand" "w")]
4590 UNSPEC_VTBL))]
4591 "TARGET_NEON"
4592 {
4593 rtx ops[6];
4594 int tabbase = REGNO (operands[1]);
4595
4596 ops[0] = operands[0];
4597 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4598 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4599 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4600 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4601 ops[5] = operands[2];
4602 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4603
4604 return "";
4605 }
4606 [(set_attr "type" "neon_tbl4")]
4607 )
4608
4609 ;; These three are used by the vec_perm infrastructure for V16QImode.
4610 (define_insn_and_split "neon_vtbl1v16qi"
4611 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4612 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4613 (match_operand:V16QI 2 "s_register_operand" "w")]
4614 UNSPEC_VTBL))]
4615 "TARGET_NEON"
4616 "#"
4617 "&& reload_completed"
4618 [(const_int 0)]
4619 {
4620 rtx op0, op1, op2, part0, part2;
4621 unsigned ofs;
4622
4623 op0 = operands[0];
4624 op1 = gen_lowpart (TImode, operands[1]);
4625 op2 = operands[2];
4626
4627 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4628 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4629 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4630 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4631
4632 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4633 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4634 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4635 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4636 DONE;
4637 }
4638 [(set_attr "type" "multiple")]
4639 )
4640
4641 (define_insn_and_split "neon_vtbl2v16qi"
4642 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4643 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4644 (match_operand:V16QI 2 "s_register_operand" "w")]
4645 UNSPEC_VTBL))]
4646 "TARGET_NEON"
4647 "#"
4648 "&& reload_completed"
4649 [(const_int 0)]
4650 {
4651 rtx op0, op1, op2, part0, part2;
4652 unsigned ofs;
4653
4654 op0 = operands[0];
4655 op1 = operands[1];
4656 op2 = operands[2];
4657
4658 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4659 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4660 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4661 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4662
4663 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4664 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4665 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4666 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4667 DONE;
4668 }
4669 [(set_attr "type" "multiple")]
4670 )
4671
4672 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4673 ;; handle quad-word input modes, producing octa-word output modes. But
4674 ;; that requires us to add support for octa-word vector modes in moves.
4675 ;; That seems overkill for this one use in vec_perm.
4676 (define_insn_and_split "neon_vcombinev16qi"
4677 [(set (match_operand:OI 0 "s_register_operand" "=w")
4678 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4679 (match_operand:V16QI 2 "s_register_operand" "w")]
4680 UNSPEC_VCONCAT))]
4681 "TARGET_NEON"
4682 "#"
4683 "&& reload_completed"
4684 [(const_int 0)]
4685 {
4686 neon_split_vcombine (operands);
4687 DONE;
4688 }
4689 [(set_attr "type" "multiple")]
4690 )
4691
4692 (define_insn "neon_vtbx1v8qi"
4693 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4694 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4695 (match_operand:V8QI 2 "s_register_operand" "w")
4696 (match_operand:V8QI 3 "s_register_operand" "w")]
4697 UNSPEC_VTBX))]
4698 "TARGET_NEON"
4699 "vtbx.8\t%P0, {%P2}, %P3"
4700 [(set_attr "type" "neon_tbl1")]
4701 )
4702
4703 (define_insn "neon_vtbx2v8qi"
4704 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4705 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4706 (match_operand:TI 2 "s_register_operand" "w")
4707 (match_operand:V8QI 3 "s_register_operand" "w")]
4708 UNSPEC_VTBX))]
4709 "TARGET_NEON"
4710 {
4711 rtx ops[4];
4712 int tabbase = REGNO (operands[2]);
4713
4714 ops[0] = operands[0];
4715 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4716 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4717 ops[3] = operands[3];
4718 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4719
4720 return "";
4721 }
4722 [(set_attr "type" "neon_tbl2")]
4723 )
4724
4725 (define_insn "neon_vtbx3v8qi"
4726 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4727 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4728 (match_operand:EI 2 "s_register_operand" "w")
4729 (match_operand:V8QI 3 "s_register_operand" "w")]
4730 UNSPEC_VTBX))]
4731 "TARGET_NEON"
4732 {
4733 rtx ops[5];
4734 int tabbase = REGNO (operands[2]);
4735
4736 ops[0] = operands[0];
4737 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4738 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4739 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4740 ops[4] = operands[3];
4741 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4742
4743 return "";
4744 }
4745 [(set_attr "type" "neon_tbl3")]
4746 )
4747
4748 (define_insn "neon_vtbx4v8qi"
4749 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4750 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4751 (match_operand:OI 2 "s_register_operand" "w")
4752 (match_operand:V8QI 3 "s_register_operand" "w")]
4753 UNSPEC_VTBX))]
4754 "TARGET_NEON"
4755 {
4756 rtx ops[6];
4757 int tabbase = REGNO (operands[2]);
4758
4759 ops[0] = operands[0];
4760 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4761 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4762 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4763 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4764 ops[5] = operands[3];
4765 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4766
4767 return "";
4768 }
4769 [(set_attr "type" "neon_tbl4")]
4770 )
4771
4772 (define_expand "neon_vtrn<mode>_internal"
4773 [(parallel
4774 [(set (match_operand:VDQWH 0 "s_register_operand")
4775 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4776 (match_operand:VDQWH 2 "s_register_operand")]
4777 UNSPEC_VTRN1))
4778 (set (match_operand:VDQWH 3 "s_register_operand")
4779 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4780 "TARGET_NEON"
4781 ""
4782 )
4783
4784 ;; Note: Different operand numbering to handle tied registers correctly.
4785 (define_insn "*neon_vtrn<mode>_insn"
4786 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4787 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4788 (match_operand:VDQWH 3 "s_register_operand" "2")]
4789 UNSPEC_VTRN1))
4790 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4791 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4792 UNSPEC_VTRN2))]
4793 "TARGET_NEON"
4794 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4795 [(set_attr "type" "neon_permute<q>")]
4796 )
4797
4798 (define_expand "neon_vzip<mode>_internal"
4799 [(parallel
4800 [(set (match_operand:VDQWH 0 "s_register_operand")
4801 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4802 (match_operand:VDQWH 2 "s_register_operand")]
4803 UNSPEC_VZIP1))
4804 (set (match_operand:VDQWH 3 "s_register_operand")
4805 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4806 "TARGET_NEON"
4807 ""
4808 )
4809
4810 ;; Note: Different operand numbering to handle tied registers correctly.
4811 (define_insn "*neon_vzip<mode>_insn"
4812 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4813 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4814 (match_operand:VDQWH 3 "s_register_operand" "2")]
4815 UNSPEC_VZIP1))
4816 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4817 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4818 UNSPEC_VZIP2))]
4819 "TARGET_NEON"
4820 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4821 [(set_attr "type" "neon_zip<q>")]
4822 )
4823
4824 (define_expand "neon_vuzp<mode>_internal"
4825 [(parallel
4826 [(set (match_operand:VDQWH 0 "s_register_operand")
4827 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4828 (match_operand:VDQWH 2 "s_register_operand")]
4829 UNSPEC_VUZP1))
4830 (set (match_operand:VDQWH 3 "s_register_operand" "")
4831 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4832 "TARGET_NEON"
4833 ""
4834 )
4835
4836 ;; Note: Different operand numbering to handle tied registers correctly.
4837 (define_insn "*neon_vuzp<mode>_insn"
4838 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4839 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4840 (match_operand:VDQWH 3 "s_register_operand" "2")]
4841 UNSPEC_VUZP1))
4842 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4843 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4844 UNSPEC_VUZP2))]
4845 "TARGET_NEON"
4846 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4847 [(set_attr "type" "neon_zip<q>")]
4848 )
4849
4850 (define_expand "vec_load_lanes<mode><mode>"
4851 [(set (match_operand:VDQX 0 "s_register_operand")
4852 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4853 UNSPEC_VLD1))]
4854 "TARGET_NEON")
4855
4856 (define_insn "neon_vld1<mode>"
4857 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4858 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4859 UNSPEC_VLD1))]
4860 "TARGET_NEON"
4861 "vld1.<V_sz_elem>\t%h0, %A1"
4862 [(set_attr "type" "neon_load1_1reg<q>")]
4863 )
4864
4865 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4866 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4867 ;; lane order here.
4868 (define_insn "neon_vld1_lane<mode>"
4869 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4870 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4871 (match_operand:VDX 2 "s_register_operand" "0")
4872 (match_operand:SI 3 "immediate_operand" "i")]
4873 UNSPEC_VLD1_LANE))]
4874 "TARGET_NEON"
4875 {
4876 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4877 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4878 operands[3] = GEN_INT (lane);
4879 if (max == 1)
4880 return "vld1.<V_sz_elem>\t%P0, %A1";
4881 else
4882 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4883 }
4884 [(set_attr "type" "neon_load1_one_lane<q>")]
4885 )
4886
4887 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4888 ;; here on big endian targets.
4889 (define_insn "neon_vld1_lane<mode>"
4890 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4891 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4892 (match_operand:VQX 2 "s_register_operand" "0")
4893 (match_operand:SI 3 "immediate_operand" "i")]
4894 UNSPEC_VLD1_LANE))]
4895 "TARGET_NEON"
4896 {
4897 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4898 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4899 operands[3] = GEN_INT (lane);
4900 int regno = REGNO (operands[0]);
4901 if (lane >= max / 2)
4902 {
4903 lane -= max / 2;
4904 regno += 2;
4905 operands[3] = GEN_INT (lane);
4906 }
4907 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4908 if (max == 2)
4909 return "vld1.<V_sz_elem>\t%P0, %A1";
4910 else
4911 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4912 }
4913 [(set_attr "type" "neon_load1_one_lane<q>")]
4914 )
4915
4916 (define_insn "neon_vld1_dup<mode>"
4917 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4918 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4919 "TARGET_NEON"
4920 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4921 [(set_attr "type" "neon_load1_all_lanes<q>")]
4922 )
4923
4924 ;; Special case for DImode. Treat it exactly like a simple load.
4925 (define_expand "neon_vld1_dupdi"
4926 [(set (match_operand:DI 0 "s_register_operand" "")
4927 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4928 UNSPEC_VLD1))]
4929 "TARGET_NEON"
4930 ""
4931 )
4932
4933 (define_insn "neon_vld1_dup<mode>"
4934 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4935 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4936 "TARGET_NEON"
4937 {
4938 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4939 }
4940 [(set_attr "type" "neon_load1_all_lanes<q>")]
4941 )
4942
4943 (define_insn_and_split "neon_vld1_dupv2di"
4944 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4945 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4946 "TARGET_NEON"
4947 "#"
4948 "&& reload_completed"
4949 [(const_int 0)]
4950 {
4951 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4952 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4953 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4954 DONE;
4955 }
4956 [(set_attr "length" "8")
4957 (set_attr "type" "neon_load1_all_lanes_q")]
4958 )
4959
4960 (define_expand "vec_store_lanes<mode><mode>"
4961 [(set (match_operand:VDQX 0 "neon_struct_operand")
4962 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4963 UNSPEC_VST1))]
4964 "TARGET_NEON")
4965
4966 (define_insn "neon_vst1<mode>"
4967 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4968 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4969 UNSPEC_VST1))]
4970 "TARGET_NEON"
4971 "vst1.<V_sz_elem>\t%h1, %A0"
4972 [(set_attr "type" "neon_store1_1reg<q>")])
4973
4974 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4975 ;; here on big endian targets.
4976 (define_insn "neon_vst1_lane<mode>"
4977 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4978 (unspec:<V_elem>
4979 [(match_operand:VDX 1 "s_register_operand" "w")
4980 (match_operand:SI 2 "immediate_operand" "i")]
4981 UNSPEC_VST1_LANE))]
4982 "TARGET_NEON"
4983 {
4984 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4985 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4986 operands[2] = GEN_INT (lane);
4987 if (max == 1)
4988 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4989 else
4990 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4991 }
4992 [(set_attr "type" "neon_store1_one_lane<q>")]
4993 )
4994
4995 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4996 ;; here on big endian targets.
4997 (define_insn "neon_vst1_lane<mode>"
4998 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4999 (unspec:<V_elem>
5000 [(match_operand:VQX 1 "s_register_operand" "w")
5001 (match_operand:SI 2 "immediate_operand" "i")]
5002 UNSPEC_VST1_LANE))]
5003 "TARGET_NEON"
5004 {
5005 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5006 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5007 int regno = REGNO (operands[1]);
5008 if (lane >= max / 2)
5009 {
5010 lane -= max / 2;
5011 regno += 2;
5012 }
5013 operands[2] = GEN_INT (lane);
5014 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5015 if (max == 2)
5016 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5017 else
5018 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5019 }
5020 [(set_attr "type" "neon_store1_one_lane<q>")]
5021 )
5022
5023 (define_expand "vec_load_lanesti<mode>"
5024 [(set (match_operand:TI 0 "s_register_operand")
5025 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5026 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5027 UNSPEC_VLD2))]
5028 "TARGET_NEON")
5029
5030 (define_insn "neon_vld2<mode>"
5031 [(set (match_operand:TI 0 "s_register_operand" "=w")
5032 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5033 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034 UNSPEC_VLD2))]
5035 "TARGET_NEON"
5036 {
5037 if (<V_sz_elem> == 64)
5038 return "vld1.64\t%h0, %A1";
5039 else
5040 return "vld2.<V_sz_elem>\t%h0, %A1";
5041 }
5042 [(set (attr "type")
5043 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5044 (const_string "neon_load1_2reg<q>")
5045 (const_string "neon_load2_2reg<q>")))]
5046 )
5047
5048 (define_expand "vec_load_lanesoi<mode>"
5049 [(set (match_operand:OI 0 "s_register_operand")
5050 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5051 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5052 UNSPEC_VLD2))]
5053 "TARGET_NEON")
5054
5055 (define_insn "neon_vld2<mode>"
5056 [(set (match_operand:OI 0 "s_register_operand" "=w")
5057 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5058 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5059 UNSPEC_VLD2))]
5060 "TARGET_NEON"
5061 "vld2.<V_sz_elem>\t%h0, %A1"
5062 [(set_attr "type" "neon_load2_2reg_q")])
5063
5064 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5065 ;; here on big endian targets.
5066 (define_insn "neon_vld2_lane<mode>"
5067 [(set (match_operand:TI 0 "s_register_operand" "=w")
5068 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5069 (match_operand:TI 2 "s_register_operand" "0")
5070 (match_operand:SI 3 "immediate_operand" "i")
5071 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5072 UNSPEC_VLD2_LANE))]
5073 "TARGET_NEON"
5074 {
5075 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5076 int regno = REGNO (operands[0]);
5077 rtx ops[4];
5078 ops[0] = gen_rtx_REG (DImode, regno);
5079 ops[1] = gen_rtx_REG (DImode, regno + 2);
5080 ops[2] = operands[1];
5081 ops[3] = GEN_INT (lane);
5082 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5083 return "";
5084 }
5085 [(set_attr "type" "neon_load2_one_lane<q>")]
5086 )
5087
5088 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5089 ;; here on big endian targets.
5090 (define_insn "neon_vld2_lane<mode>"
5091 [(set (match_operand:OI 0 "s_register_operand" "=w")
5092 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5093 (match_operand:OI 2 "s_register_operand" "0")
5094 (match_operand:SI 3 "immediate_operand" "i")
5095 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5096 UNSPEC_VLD2_LANE))]
5097 "TARGET_NEON"
5098 {
5099 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5100 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5101 int regno = REGNO (operands[0]);
5102 rtx ops[4];
5103 if (lane >= max / 2)
5104 {
5105 lane -= max / 2;
5106 regno += 2;
5107 }
5108 ops[0] = gen_rtx_REG (DImode, regno);
5109 ops[1] = gen_rtx_REG (DImode, regno + 4);
5110 ops[2] = operands[1];
5111 ops[3] = GEN_INT (lane);
5112 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5113 return "";
5114 }
5115 [(set_attr "type" "neon_load2_one_lane<q>")]
5116 )
5117
5118 (define_insn "neon_vld2_dup<mode>"
5119 [(set (match_operand:TI 0 "s_register_operand" "=w")
5120 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5121 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5122 UNSPEC_VLD2_DUP))]
5123 "TARGET_NEON"
5124 {
5125 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5126 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5127 else
5128 return "vld1.<V_sz_elem>\t%h0, %A1";
5129 }
5130 [(set (attr "type")
5131 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5132 (const_string "neon_load2_all_lanes<q>")
5133 (const_string "neon_load1_1reg<q>")))]
5134 )
5135
5136 (define_expand "vec_store_lanesti<mode>"
5137 [(set (match_operand:TI 0 "neon_struct_operand")
5138 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5139 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140 UNSPEC_VST2))]
5141 "TARGET_NEON")
5142
5143 (define_insn "neon_vst2<mode>"
5144 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5145 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5146 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5147 UNSPEC_VST2))]
5148 "TARGET_NEON"
5149 {
5150 if (<V_sz_elem> == 64)
5151 return "vst1.64\t%h1, %A0";
5152 else
5153 return "vst2.<V_sz_elem>\t%h1, %A0";
5154 }
5155 [(set (attr "type")
5156 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5157 (const_string "neon_store1_2reg<q>")
5158 (const_string "neon_store2_one_lane<q>")))]
5159 )
5160
5161 (define_expand "vec_store_lanesoi<mode>"
5162 [(set (match_operand:OI 0 "neon_struct_operand")
5163 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5164 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5165 UNSPEC_VST2))]
5166 "TARGET_NEON")
5167
5168 (define_insn "neon_vst2<mode>"
5169 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5170 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5171 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5172 UNSPEC_VST2))]
5173 "TARGET_NEON"
5174 "vst2.<V_sz_elem>\t%h1, %A0"
5175 [(set_attr "type" "neon_store2_4reg<q>")]
5176 )
5177
5178 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5179 ;; here on big endian targets.
5180 (define_insn "neon_vst2_lane<mode>"
5181 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5182 (unspec:<V_two_elem>
5183 [(match_operand:TI 1 "s_register_operand" "w")
5184 (match_operand:SI 2 "immediate_operand" "i")
5185 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186 UNSPEC_VST2_LANE))]
5187 "TARGET_NEON"
5188 {
5189 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5190 int regno = REGNO (operands[1]);
5191 rtx ops[4];
5192 ops[0] = operands[0];
5193 ops[1] = gen_rtx_REG (DImode, regno);
5194 ops[2] = gen_rtx_REG (DImode, regno + 2);
5195 ops[3] = GEN_INT (lane);
5196 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5197 return "";
5198 }
5199 [(set_attr "type" "neon_store2_one_lane<q>")]
5200 )
5201
5202 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5203 ;; here on big endian targets.
5204 (define_insn "neon_vst2_lane<mode>"
5205 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5206 (unspec:<V_two_elem>
5207 [(match_operand:OI 1 "s_register_operand" "w")
5208 (match_operand:SI 2 "immediate_operand" "i")
5209 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210 UNSPEC_VST2_LANE))]
5211 "TARGET_NEON"
5212 {
5213 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5214 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5215 int regno = REGNO (operands[1]);
5216 rtx ops[4];
5217 if (lane >= max / 2)
5218 {
5219 lane -= max / 2;
5220 regno += 2;
5221 }
5222 ops[0] = operands[0];
5223 ops[1] = gen_rtx_REG (DImode, regno);
5224 ops[2] = gen_rtx_REG (DImode, regno + 4);
5225 ops[3] = GEN_INT (lane);
5226 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5227 return "";
5228 }
5229 [(set_attr "type" "neon_store2_one_lane<q>")]
5230 )
5231
5232 (define_expand "vec_load_lanesei<mode>"
5233 [(set (match_operand:EI 0 "s_register_operand")
5234 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5235 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5236 UNSPEC_VLD3))]
5237 "TARGET_NEON")
5238
5239 (define_insn "neon_vld3<mode>"
5240 [(set (match_operand:EI 0 "s_register_operand" "=w")
5241 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5242 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5243 UNSPEC_VLD3))]
5244 "TARGET_NEON"
5245 {
5246 if (<V_sz_elem> == 64)
5247 return "vld1.64\t%h0, %A1";
5248 else
5249 return "vld3.<V_sz_elem>\t%h0, %A1";
5250 }
5251 [(set (attr "type")
5252 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5253 (const_string "neon_load1_3reg<q>")
5254 (const_string "neon_load3_3reg<q>")))]
5255 )
5256
5257 (define_expand "vec_load_lanesci<mode>"
5258 [(match_operand:CI 0 "s_register_operand")
5259 (match_operand:CI 1 "neon_struct_operand")
5260 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5261 "TARGET_NEON"
5262 {
5263 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5264 DONE;
5265 })
5266
5267 (define_expand "neon_vld3<mode>"
5268 [(match_operand:CI 0 "s_register_operand")
5269 (match_operand:CI 1 "neon_struct_operand")
5270 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5271 "TARGET_NEON"
5272 {
5273 rtx mem;
5274
5275 mem = adjust_address (operands[1], EImode, 0);
5276 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5277 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5278 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5279 DONE;
5280 })
5281
5282 (define_insn "neon_vld3qa<mode>"
5283 [(set (match_operand:CI 0 "s_register_operand" "=w")
5284 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5285 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5286 UNSPEC_VLD3A))]
5287 "TARGET_NEON"
5288 {
5289 int regno = REGNO (operands[0]);
5290 rtx ops[4];
5291 ops[0] = gen_rtx_REG (DImode, regno);
5292 ops[1] = gen_rtx_REG (DImode, regno + 4);
5293 ops[2] = gen_rtx_REG (DImode, regno + 8);
5294 ops[3] = operands[1];
5295 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5296 return "";
5297 }
5298 [(set_attr "type" "neon_load3_3reg<q>")]
5299 )
5300
5301 (define_insn "neon_vld3qb<mode>"
5302 [(set (match_operand:CI 0 "s_register_operand" "=w")
5303 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5304 (match_operand:CI 2 "s_register_operand" "0")
5305 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5306 UNSPEC_VLD3B))]
5307 "TARGET_NEON"
5308 {
5309 int regno = REGNO (operands[0]);
5310 rtx ops[4];
5311 ops[0] = gen_rtx_REG (DImode, regno + 2);
5312 ops[1] = gen_rtx_REG (DImode, regno + 6);
5313 ops[2] = gen_rtx_REG (DImode, regno + 10);
5314 ops[3] = operands[1];
5315 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5316 return "";
5317 }
5318 [(set_attr "type" "neon_load3_3reg<q>")]
5319 )
5320
5321 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5322 ;; here on big endian targets.
5323 (define_insn "neon_vld3_lane<mode>"
5324 [(set (match_operand:EI 0 "s_register_operand" "=w")
5325 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5326 (match_operand:EI 2 "s_register_operand" "0")
5327 (match_operand:SI 3 "immediate_operand" "i")
5328 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329 UNSPEC_VLD3_LANE))]
5330 "TARGET_NEON"
5331 {
5332 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5333 int regno = REGNO (operands[0]);
5334 rtx ops[5];
5335 ops[0] = gen_rtx_REG (DImode, regno);
5336 ops[1] = gen_rtx_REG (DImode, regno + 2);
5337 ops[2] = gen_rtx_REG (DImode, regno + 4);
5338 ops[3] = operands[1];
5339 ops[4] = GEN_INT (lane);
5340 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5341 ops);
5342 return "";
5343 }
5344 [(set_attr "type" "neon_load3_one_lane<q>")]
5345 )
5346
5347 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5348 ;; here on big endian targets.
5349 (define_insn "neon_vld3_lane<mode>"
5350 [(set (match_operand:CI 0 "s_register_operand" "=w")
5351 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5352 (match_operand:CI 2 "s_register_operand" "0")
5353 (match_operand:SI 3 "immediate_operand" "i")
5354 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5355 UNSPEC_VLD3_LANE))]
5356 "TARGET_NEON"
5357 {
5358 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5359 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5360 int regno = REGNO (operands[0]);
5361 rtx ops[5];
5362 if (lane >= max / 2)
5363 {
5364 lane -= max / 2;
5365 regno += 2;
5366 }
5367 ops[0] = gen_rtx_REG (DImode, regno);
5368 ops[1] = gen_rtx_REG (DImode, regno + 4);
5369 ops[2] = gen_rtx_REG (DImode, regno + 8);
5370 ops[3] = operands[1];
5371 ops[4] = GEN_INT (lane);
5372 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5373 ops);
5374 return "";
5375 }
5376 [(set_attr "type" "neon_load3_one_lane<q>")]
5377 )
5378
5379 (define_insn "neon_vld3_dup<mode>"
5380 [(set (match_operand:EI 0 "s_register_operand" "=w")
5381 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5382 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5383 UNSPEC_VLD3_DUP))]
5384 "TARGET_NEON"
5385 {
5386 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5387 {
5388 int regno = REGNO (operands[0]);
5389 rtx ops[4];
5390 ops[0] = gen_rtx_REG (DImode, regno);
5391 ops[1] = gen_rtx_REG (DImode, regno + 2);
5392 ops[2] = gen_rtx_REG (DImode, regno + 4);
5393 ops[3] = operands[1];
5394 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5395 return "";
5396 }
5397 else
5398 return "vld1.<V_sz_elem>\t%h0, %A1";
5399 }
5400 [(set (attr "type")
5401 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5402 (const_string "neon_load3_all_lanes<q>")
5403 (const_string "neon_load1_1reg<q>")))])
5404
5405 (define_expand "vec_store_lanesei<mode>"
5406 [(set (match_operand:EI 0 "neon_struct_operand")
5407 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5408 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5409 UNSPEC_VST3))]
5410 "TARGET_NEON")
5411
5412 (define_insn "neon_vst3<mode>"
5413 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5414 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5415 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5416 UNSPEC_VST3))]
5417 "TARGET_NEON"
5418 {
5419 if (<V_sz_elem> == 64)
5420 return "vst1.64\t%h1, %A0";
5421 else
5422 return "vst3.<V_sz_elem>\t%h1, %A0";
5423 }
5424 [(set (attr "type")
5425 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5426 (const_string "neon_store1_3reg<q>")
5427 (const_string "neon_store3_one_lane<q>")))])
5428
5429 (define_expand "vec_store_lanesci<mode>"
5430 [(match_operand:CI 0 "neon_struct_operand")
5431 (match_operand:CI 1 "s_register_operand")
5432 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5433 "TARGET_NEON"
5434 {
5435 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5436 DONE;
5437 })
5438
5439 (define_expand "neon_vst3<mode>"
5440 [(match_operand:CI 0 "neon_struct_operand")
5441 (match_operand:CI 1 "s_register_operand")
5442 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5443 "TARGET_NEON"
5444 {
5445 rtx mem;
5446
5447 mem = adjust_address (operands[0], EImode, 0);
5448 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5449 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5450 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5451 DONE;
5452 })
5453
5454 (define_insn "neon_vst3qa<mode>"
5455 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5456 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5457 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5458 UNSPEC_VST3A))]
5459 "TARGET_NEON"
5460 {
5461 int regno = REGNO (operands[1]);
5462 rtx ops[4];
5463 ops[0] = operands[0];
5464 ops[1] = gen_rtx_REG (DImode, regno);
5465 ops[2] = gen_rtx_REG (DImode, regno + 4);
5466 ops[3] = gen_rtx_REG (DImode, regno + 8);
5467 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5468 return "";
5469 }
5470 [(set_attr "type" "neon_store3_3reg<q>")]
5471 )
5472
5473 (define_insn "neon_vst3qb<mode>"
5474 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5475 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5476 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5477 UNSPEC_VST3B))]
5478 "TARGET_NEON"
5479 {
5480 int regno = REGNO (operands[1]);
5481 rtx ops[4];
5482 ops[0] = operands[0];
5483 ops[1] = gen_rtx_REG (DImode, regno + 2);
5484 ops[2] = gen_rtx_REG (DImode, regno + 6);
5485 ops[3] = gen_rtx_REG (DImode, regno + 10);
5486 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5487 return "";
5488 }
5489 [(set_attr "type" "neon_store3_3reg<q>")]
5490 )
5491
5492 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5493 ;; here on big endian targets.
5494 (define_insn "neon_vst3_lane<mode>"
5495 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5496 (unspec:<V_three_elem>
5497 [(match_operand:EI 1 "s_register_operand" "w")
5498 (match_operand:SI 2 "immediate_operand" "i")
5499 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5500 UNSPEC_VST3_LANE))]
5501 "TARGET_NEON"
5502 {
5503 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5504 int regno = REGNO (operands[1]);
5505 rtx ops[5];
5506 ops[0] = operands[0];
5507 ops[1] = gen_rtx_REG (DImode, regno);
5508 ops[2] = gen_rtx_REG (DImode, regno + 2);
5509 ops[3] = gen_rtx_REG (DImode, regno + 4);
5510 ops[4] = GEN_INT (lane);
5511 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5512 ops);
5513 return "";
5514 }
5515 [(set_attr "type" "neon_store3_one_lane<q>")]
5516 )
5517
5518 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5519 ;; here on big endian targets.
5520 (define_insn "neon_vst3_lane<mode>"
5521 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5522 (unspec:<V_three_elem>
5523 [(match_operand:CI 1 "s_register_operand" "w")
5524 (match_operand:SI 2 "immediate_operand" "i")
5525 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5526 UNSPEC_VST3_LANE))]
5527 "TARGET_NEON"
5528 {
5529 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5530 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5531 int regno = REGNO (operands[1]);
5532 rtx ops[5];
5533 if (lane >= max / 2)
5534 {
5535 lane -= max / 2;
5536 regno += 2;
5537 }
5538 ops[0] = operands[0];
5539 ops[1] = gen_rtx_REG (DImode, regno);
5540 ops[2] = gen_rtx_REG (DImode, regno + 4);
5541 ops[3] = gen_rtx_REG (DImode, regno + 8);
5542 ops[4] = GEN_INT (lane);
5543 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5544 ops);
5545 return "";
5546 }
5547 [(set_attr "type" "neon_store3_one_lane<q>")]
5548 )
5549
5550 (define_expand "vec_load_lanesoi<mode>"
5551 [(set (match_operand:OI 0 "s_register_operand")
5552 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5553 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5554 UNSPEC_VLD4))]
5555 "TARGET_NEON")
5556
5557 (define_insn "neon_vld4<mode>"
5558 [(set (match_operand:OI 0 "s_register_operand" "=w")
5559 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5560 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5561 UNSPEC_VLD4))]
5562 "TARGET_NEON"
5563 {
5564 if (<V_sz_elem> == 64)
5565 return "vld1.64\t%h0, %A1";
5566 else
5567 return "vld4.<V_sz_elem>\t%h0, %A1";
5568 }
5569 [(set (attr "type")
5570 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5571 (const_string "neon_load1_4reg<q>")
5572 (const_string "neon_load4_4reg<q>")))]
5573 )
5574
5575 (define_expand "vec_load_lanesxi<mode>"
5576 [(match_operand:XI 0 "s_register_operand")
5577 (match_operand:XI 1 "neon_struct_operand")
5578 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579 "TARGET_NEON"
5580 {
5581 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5582 DONE;
5583 })
5584
5585 (define_expand "neon_vld4<mode>"
5586 [(match_operand:XI 0 "s_register_operand")
5587 (match_operand:XI 1 "neon_struct_operand")
5588 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5589 "TARGET_NEON"
5590 {
5591 rtx mem;
5592
5593 mem = adjust_address (operands[1], OImode, 0);
5594 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5595 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5596 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5597 DONE;
5598 })
5599
5600 (define_insn "neon_vld4qa<mode>"
5601 [(set (match_operand:XI 0 "s_register_operand" "=w")
5602 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5603 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5604 UNSPEC_VLD4A))]
5605 "TARGET_NEON"
5606 {
5607 int regno = REGNO (operands[0]);
5608 rtx ops[5];
5609 ops[0] = gen_rtx_REG (DImode, regno);
5610 ops[1] = gen_rtx_REG (DImode, regno + 4);
5611 ops[2] = gen_rtx_REG (DImode, regno + 8);
5612 ops[3] = gen_rtx_REG (DImode, regno + 12);
5613 ops[4] = operands[1];
5614 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5615 return "";
5616 }
5617 [(set_attr "type" "neon_load4_4reg<q>")]
5618 )
5619
5620 (define_insn "neon_vld4qb<mode>"
5621 [(set (match_operand:XI 0 "s_register_operand" "=w")
5622 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5623 (match_operand:XI 2 "s_register_operand" "0")
5624 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5625 UNSPEC_VLD4B))]
5626 "TARGET_NEON"
5627 {
5628 int regno = REGNO (operands[0]);
5629 rtx ops[5];
5630 ops[0] = gen_rtx_REG (DImode, regno + 2);
5631 ops[1] = gen_rtx_REG (DImode, regno + 6);
5632 ops[2] = gen_rtx_REG (DImode, regno + 10);
5633 ops[3] = gen_rtx_REG (DImode, regno + 14);
5634 ops[4] = operands[1];
5635 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5636 return "";
5637 }
5638 [(set_attr "type" "neon_load4_4reg<q>")]
5639 )
5640
5641 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5642 ;; here on big endian targets.
5643 (define_insn "neon_vld4_lane<mode>"
5644 [(set (match_operand:OI 0 "s_register_operand" "=w")
5645 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5646 (match_operand:OI 2 "s_register_operand" "0")
5647 (match_operand:SI 3 "immediate_operand" "i")
5648 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5649 UNSPEC_VLD4_LANE))]
5650 "TARGET_NEON"
5651 {
5652 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5653 int regno = REGNO (operands[0]);
5654 rtx ops[6];
5655 ops[0] = gen_rtx_REG (DImode, regno);
5656 ops[1] = gen_rtx_REG (DImode, regno + 2);
5657 ops[2] = gen_rtx_REG (DImode, regno + 4);
5658 ops[3] = gen_rtx_REG (DImode, regno + 6);
5659 ops[4] = operands[1];
5660 ops[5] = GEN_INT (lane);
5661 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5662 ops);
5663 return "";
5664 }
5665 [(set_attr "type" "neon_load4_one_lane<q>")]
5666 )
5667
5668 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5669 ;; here on big endian targets.
5670 (define_insn "neon_vld4_lane<mode>"
5671 [(set (match_operand:XI 0 "s_register_operand" "=w")
5672 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5673 (match_operand:XI 2 "s_register_operand" "0")
5674 (match_operand:SI 3 "immediate_operand" "i")
5675 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5676 UNSPEC_VLD4_LANE))]
5677 "TARGET_NEON"
5678 {
5679 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5680 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5681 int regno = REGNO (operands[0]);
5682 rtx ops[6];
5683 if (lane >= max / 2)
5684 {
5685 lane -= max / 2;
5686 regno += 2;
5687 }
5688 ops[0] = gen_rtx_REG (DImode, regno);
5689 ops[1] = gen_rtx_REG (DImode, regno + 4);
5690 ops[2] = gen_rtx_REG (DImode, regno + 8);
5691 ops[3] = gen_rtx_REG (DImode, regno + 12);
5692 ops[4] = operands[1];
5693 ops[5] = GEN_INT (lane);
5694 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5695 ops);
5696 return "";
5697 }
5698 [(set_attr "type" "neon_load4_one_lane<q>")]
5699 )
5700
5701 (define_insn "neon_vld4_dup<mode>"
5702 [(set (match_operand:OI 0 "s_register_operand" "=w")
5703 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5704 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5705 UNSPEC_VLD4_DUP))]
5706 "TARGET_NEON"
5707 {
5708 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5709 {
5710 int regno = REGNO (operands[0]);
5711 rtx ops[5];
5712 ops[0] = gen_rtx_REG (DImode, regno);
5713 ops[1] = gen_rtx_REG (DImode, regno + 2);
5714 ops[2] = gen_rtx_REG (DImode, regno + 4);
5715 ops[3] = gen_rtx_REG (DImode, regno + 6);
5716 ops[4] = operands[1];
5717 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5718 ops);
5719 return "";
5720 }
5721 else
5722 return "vld1.<V_sz_elem>\t%h0, %A1";
5723 }
5724 [(set (attr "type")
5725 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5726 (const_string "neon_load4_all_lanes<q>")
5727 (const_string "neon_load1_1reg<q>")))]
5728 )
5729
5730 (define_expand "vec_store_lanesoi<mode>"
5731 [(set (match_operand:OI 0 "neon_struct_operand")
5732 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5733 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5734 UNSPEC_VST4))]
5735 "TARGET_NEON")
5736
5737 (define_insn "neon_vst4<mode>"
5738 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5739 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5740 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5741 UNSPEC_VST4))]
5742 "TARGET_NEON"
5743 {
5744 if (<V_sz_elem> == 64)
5745 return "vst1.64\t%h1, %A0";
5746 else
5747 return "vst4.<V_sz_elem>\t%h1, %A0";
5748 }
5749 [(set (attr "type")
5750 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5751 (const_string "neon_store1_4reg<q>")
5752 (const_string "neon_store4_4reg<q>")))]
5753 )
5754
5755 (define_expand "vec_store_lanesxi<mode>"
5756 [(match_operand:XI 0 "neon_struct_operand")
5757 (match_operand:XI 1 "s_register_operand")
5758 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 "TARGET_NEON"
5760 {
5761 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5762 DONE;
5763 })
5764
5765 (define_expand "neon_vst4<mode>"
5766 [(match_operand:XI 0 "neon_struct_operand")
5767 (match_operand:XI 1 "s_register_operand")
5768 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5769 "TARGET_NEON"
5770 {
5771 rtx mem;
5772
5773 mem = adjust_address (operands[0], OImode, 0);
5774 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5775 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5776 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5777 DONE;
5778 })
5779
5780 (define_insn "neon_vst4qa<mode>"
5781 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5782 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5783 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784 UNSPEC_VST4A))]
5785 "TARGET_NEON"
5786 {
5787 int regno = REGNO (operands[1]);
5788 rtx ops[5];
5789 ops[0] = operands[0];
5790 ops[1] = gen_rtx_REG (DImode, regno);
5791 ops[2] = gen_rtx_REG (DImode, regno + 4);
5792 ops[3] = gen_rtx_REG (DImode, regno + 8);
5793 ops[4] = gen_rtx_REG (DImode, regno + 12);
5794 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5795 return "";
5796 }
5797 [(set_attr "type" "neon_store4_4reg<q>")]
5798 )
5799
5800 (define_insn "neon_vst4qb<mode>"
5801 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5802 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5803 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5804 UNSPEC_VST4B))]
5805 "TARGET_NEON"
5806 {
5807 int regno = REGNO (operands[1]);
5808 rtx ops[5];
5809 ops[0] = operands[0];
5810 ops[1] = gen_rtx_REG (DImode, regno + 2);
5811 ops[2] = gen_rtx_REG (DImode, regno + 6);
5812 ops[3] = gen_rtx_REG (DImode, regno + 10);
5813 ops[4] = gen_rtx_REG (DImode, regno + 14);
5814 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5815 return "";
5816 }
5817 [(set_attr "type" "neon_store4_4reg<q>")]
5818 )
5819
5820 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5821 ;; here on big endian targets.
5822 (define_insn "neon_vst4_lane<mode>"
5823 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5824 (unspec:<V_four_elem>
5825 [(match_operand:OI 1 "s_register_operand" "w")
5826 (match_operand:SI 2 "immediate_operand" "i")
5827 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5828 UNSPEC_VST4_LANE))]
5829 "TARGET_NEON"
5830 {
5831 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5832 int regno = REGNO (operands[1]);
5833 rtx ops[6];
5834 ops[0] = operands[0];
5835 ops[1] = gen_rtx_REG (DImode, regno);
5836 ops[2] = gen_rtx_REG (DImode, regno + 2);
5837 ops[3] = gen_rtx_REG (DImode, regno + 4);
5838 ops[4] = gen_rtx_REG (DImode, regno + 6);
5839 ops[5] = GEN_INT (lane);
5840 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5841 ops);
5842 return "";
5843 }
5844 [(set_attr "type" "neon_store4_one_lane<q>")]
5845 )
5846
5847 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5848 ;; here on big endian targets.
5849 (define_insn "neon_vst4_lane<mode>"
5850 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5851 (unspec:<V_four_elem>
5852 [(match_operand:XI 1 "s_register_operand" "w")
5853 (match_operand:SI 2 "immediate_operand" "i")
5854 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5855 UNSPEC_VST4_LANE))]
5856 "TARGET_NEON"
5857 {
5858 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5859 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5860 int regno = REGNO (operands[1]);
5861 rtx ops[6];
5862 if (lane >= max / 2)
5863 {
5864 lane -= max / 2;
5865 regno += 2;
5866 }
5867 ops[0] = operands[0];
5868 ops[1] = gen_rtx_REG (DImode, regno);
5869 ops[2] = gen_rtx_REG (DImode, regno + 4);
5870 ops[3] = gen_rtx_REG (DImode, regno + 8);
5871 ops[4] = gen_rtx_REG (DImode, regno + 12);
5872 ops[5] = GEN_INT (lane);
5873 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5874 ops);
5875 return "";
5876 }
5877 [(set_attr "type" "neon_store4_4reg<q>")]
5878 )
5879
5880 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5881 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5882 (SE:<V_unpack> (vec_select:<V_HALF>
5883 (match_operand:VU 1 "register_operand" "w")
5884 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5885 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5886 "vmovl.<US><V_sz_elem> %q0, %e1"
5887 [(set_attr "type" "neon_shift_imm_long")]
5888 )
5889
5890 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5891 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5892 (SE:<V_unpack> (vec_select:<V_HALF>
5893 (match_operand:VU 1 "register_operand" "w")
5894 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5895 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5896 "vmovl.<US><V_sz_elem> %q0, %f1"
5897 [(set_attr "type" "neon_shift_imm_long")]
5898 )
5899
5900 (define_expand "vec_unpack<US>_hi_<mode>"
5901 [(match_operand:<V_unpack> 0 "register_operand" "")
5902 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5903 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5904 {
5905 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5906 rtx t1;
5907 int i;
5908 for (i = 0; i < (<V_mode_nunits>/2); i++)
5909 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5910
5911 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5912 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5913 operands[1],
5914 t1));
5915 DONE;
5916 }
5917 )
5918
5919 (define_expand "vec_unpack<US>_lo_<mode>"
5920 [(match_operand:<V_unpack> 0 "register_operand" "")
5921 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5922 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5923 {
5924 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5925 rtx t1;
5926 int i;
5927 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5928 RTVEC_ELT (v, i) = GEN_INT (i);
5929 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5930 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5931 operands[1],
5932 t1));
5933 DONE;
5934 }
5935 )
5936
5937 (define_insn "neon_vec_<US>mult_lo_<mode>"
5938 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5939 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5940 (match_operand:VU 1 "register_operand" "w")
5941 (match_operand:VU 2 "vect_par_constant_low" "")))
5942 (SE:<V_unpack> (vec_select:<V_HALF>
5943 (match_operand:VU 3 "register_operand" "w")
5944 (match_dup 2)))))]
5945 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5946 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5947 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5948 )
5949
5950 (define_expand "vec_widen_<US>mult_lo_<mode>"
5951 [(match_operand:<V_unpack> 0 "register_operand" "")
5952 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5953 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5954 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5955 {
5956 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5957 rtx t1;
5958 int i;
5959 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5960 RTVEC_ELT (v, i) = GEN_INT (i);
5961 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5962
5963 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5964 operands[1],
5965 t1,
5966 operands[2]));
5967 DONE;
5968 }
5969 )
5970
5971 (define_insn "neon_vec_<US>mult_hi_<mode>"
5972 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5973 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5974 (match_operand:VU 1 "register_operand" "w")
5975 (match_operand:VU 2 "vect_par_constant_high" "")))
5976 (SE:<V_unpack> (vec_select:<V_HALF>
5977 (match_operand:VU 3 "register_operand" "w")
5978 (match_dup 2)))))]
5979 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5980 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5981 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5982 )
5983
5984 (define_expand "vec_widen_<US>mult_hi_<mode>"
5985 [(match_operand:<V_unpack> 0 "register_operand" "")
5986 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5987 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5988 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5989 {
5990 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5991 rtx t1;
5992 int i;
5993 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5994 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5995 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5996
5997 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5998 operands[1],
5999 t1,
6000 operands[2]));
6001 DONE;
6002
6003 }
6004 )
6005
6006 (define_insn "neon_vec_<US>shiftl_<mode>"
6007 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6008 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6009 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6010 "TARGET_NEON"
6011 {
6012 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6013 }
6014 [(set_attr "type" "neon_shift_imm_long")]
6015 )
6016
6017 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6018 [(match_operand:<V_unpack> 0 "register_operand" "")
6019 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6020 (match_operand:SI 2 "immediate_operand" "i")]
6021 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6022 {
6023 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6024 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6025 operands[2]));
6026 DONE;
6027 }
6028 )
6029
6030 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6031 [(match_operand:<V_unpack> 0 "register_operand" "")
6032 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6033 (match_operand:SI 2 "immediate_operand" "i")]
6034 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6035 {
6036 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6037 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6038 GET_MODE_SIZE (<V_HALF>mode)),
6039 operands[2]));
6040 DONE;
6041 }
6042 )
6043
6044 ;; Vectorize for non-neon-quad case
6045 (define_insn "neon_unpack<US>_<mode>"
6046 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6047 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6048 "TARGET_NEON"
6049 "vmovl.<US><V_sz_elem> %q0, %P1"
6050 [(set_attr "type" "neon_move")]
6051 )
6052
6053 (define_expand "vec_unpack<US>_lo_<mode>"
6054 [(match_operand:<V_double_width> 0 "register_operand" "")
6055 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6056 "TARGET_NEON"
6057 {
6058 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6059 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6060 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6061
6062 DONE;
6063 }
6064 )
6065
6066 (define_expand "vec_unpack<US>_hi_<mode>"
6067 [(match_operand:<V_double_width> 0 "register_operand" "")
6068 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6069 "TARGET_NEON"
6070 {
6071 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6072 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6073 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6074
6075 DONE;
6076 }
6077 )
6078
6079 (define_insn "neon_vec_<US>mult_<mode>"
6080 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6081 (mult:<V_widen> (SE:<V_widen>
6082 (match_operand:VDI 1 "register_operand" "w"))
6083 (SE:<V_widen>
6084 (match_operand:VDI 2 "register_operand" "w"))))]
6085 "TARGET_NEON"
6086 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6087 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6088 )
6089
6090 (define_expand "vec_widen_<US>mult_hi_<mode>"
6091 [(match_operand:<V_double_width> 0 "register_operand" "")
6092 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6093 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6094 "TARGET_NEON"
6095 {
6096 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6097 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6098 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6099
6100 DONE;
6101
6102 }
6103 )
6104
6105 (define_expand "vec_widen_<US>mult_lo_<mode>"
6106 [(match_operand:<V_double_width> 0 "register_operand" "")
6107 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6108 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6109 "TARGET_NEON"
6110 {
6111 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6112 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6113 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6114
6115 DONE;
6116
6117 }
6118 )
6119
6120 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6121 [(match_operand:<V_double_width> 0 "register_operand" "")
6122 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6123 (match_operand:SI 2 "immediate_operand" "i")]
6124 "TARGET_NEON"
6125 {
6126 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6127 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6128 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6129
6130 DONE;
6131 }
6132 )
6133
6134 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6135 [(match_operand:<V_double_width> 0 "register_operand" "")
6136 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6137 (match_operand:SI 2 "immediate_operand" "i")]
6138 "TARGET_NEON"
6139 {
6140 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6141 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6142 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6143
6144 DONE;
6145 }
6146 )
6147
6148 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6149 ; because the ordering of vector elements in Q registers is different from what
6150 ; the semantics of the instructions require.
6151
6152 (define_insn "vec_pack_trunc_<mode>"
6153 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6154 (vec_concat:<V_narrow_pack>
6155 (truncate:<V_narrow>
6156 (match_operand:VN 1 "register_operand" "w"))
6157 (truncate:<V_narrow>
6158 (match_operand:VN 2 "register_operand" "w"))))]
6159 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6160 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6161 [(set_attr "type" "multiple")
6162 (set_attr "length" "8")]
6163 )
6164
6165 ;; For the non-quad case.
6166 (define_insn "neon_vec_pack_trunc_<mode>"
6167 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6168 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6169 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6170 "vmovn.i<V_sz_elem>\t%P0, %q1"
6171 [(set_attr "type" "neon_move_narrow_q")]
6172 )
6173
6174 (define_expand "vec_pack_trunc_<mode>"
6175 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6176 (match_operand:VSHFT 1 "register_operand" "")
6177 (match_operand:VSHFT 2 "register_operand")]
6178 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6179 {
6180 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6181
6182 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6183 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6184 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6185 DONE;
6186 })
6187
6188 (define_insn "neon_vabd<mode>_2"
6189 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6190 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6191 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6192 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6193 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6194 [(set (attr "type")
6195 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6196 (const_string "neon_fp_abd_s<q>")
6197 (const_string "neon_abd<q>")))]
6198 )
6199
6200 (define_insn "neon_vabd<mode>_3"
6201 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6202 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6203 (match_operand:VDQ 2 "s_register_operand" "w")]
6204 UNSPEC_VSUB)))]
6205 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6206 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6207 [(set (attr "type")
6208 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6209 (const_string "neon_fp_abd_s<q>")
6210 (const_string "neon_abd<q>")))]
6211 )
6212
6213 ;; Copy from core-to-neon regs, then extend, not vice-versa
6214
6215 (define_split
6216 [(set (match_operand:DI 0 "s_register_operand" "")
6217 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6218 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6219 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6220 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6221 {
6222 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6223 })
6224
6225 (define_split
6226 [(set (match_operand:DI 0 "s_register_operand" "")
6227 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6228 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6229 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6230 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6231 {
6232 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6233 })
6234
6235 (define_split
6236 [(set (match_operand:DI 0 "s_register_operand" "")
6237 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6238 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6239 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6240 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6241 {
6242 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6243 })
6244
6245 (define_split
6246 [(set (match_operand:DI 0 "s_register_operand" "")
6247 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6248 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6249 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6250 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6251 {
6252 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6253 })
6254
6255 (define_split
6256 [(set (match_operand:DI 0 "s_register_operand" "")
6257 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6258 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6259 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6260 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6261 {
6262 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6263 })
6264
6265 (define_split
6266 [(set (match_operand:DI 0 "s_register_operand" "")
6267 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6268 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6269 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6270 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6271 {
6272 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6273 })