[PATCH 6/17][ARM] Add data processing intrinsics for float16_t.
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
144 {
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ())
149 {
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 }
168 })
169
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
173 "TARGET_NEON
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
176 {
177 switch (which_alternative)
178 {
179 case 0: return "#";
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
182 }
183 }
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187 (define_split
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
193 {
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
196 rtx dest[2], src[2];
197
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203 neon_disambiguate_copy (operands, dest, src, 2);
204 })
205
206 (define_split
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
212 {
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
215 rtx dest[2], src[2];
216
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222 neon_disambiguate_copy (operands, dest, src, 2);
223 })
224
225 (define_split
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
232 {
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
235 rtx dest[3], src[3];
236
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244 neon_disambiguate_copy (operands, dest, src, 3);
245 })
246
247 (define_split
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
255 {
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
258 rtx dest[4], src[4];
259
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269 neon_disambiguate_copy (operands, dest, src, 4);
270 })
271
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277 {
278 rtx adjust_mem;
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
285
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
288 else
289 adjust_mem = operands[0];
290
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295 })
296
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308 " Um")]
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
313
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
321
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325 " Um")]
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
330
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333 (vec_merge:VD_LANE
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
338 "TARGET_NEON"
339 {
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
344
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347 else
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349 }
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354 (vec_merge:VQ2
355 (vec_duplicate:VQ2
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
359 "TARGET_NEON"
360 {
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
366
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
369
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
372
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375 else
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377 }
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379 )
380
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383 (vec_merge:V2DI
384 (vec_duplicate:V2DI
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
388 "TARGET_NEON"
389 {
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
392
393 operands[0] = gen_rtx_REG (DImode, regno);
394
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
397 else
398 return "vmov\t%P0, %Q1, %R1";
399 }
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401 )
402
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
407 "TARGET_NEON"
408 {
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
412 DONE;
413 })
414
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (vec_select:<V_elem>
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420 "TARGET_NEON"
421 {
422 if (BYTES_BIG_ENDIAN)
423 {
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
427 }
428
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 else
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 }
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435 )
436
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439 (vec_select:<V_elem>
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442 "TARGET_NEON"
443 {
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
448
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
451
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
454
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457 else
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459 }
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461 )
462
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465 (vec_select:DI
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468 "TARGET_NEON"
469 {
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472 operands[1] = gen_rtx_REG (DImode, regno);
473
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
476 else
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
478 }
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480 )
481
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
485 "TARGET_NEON"
486 {
487 neon_expand_vector_init (operands[0], operands[1]);
488 DONE;
489 })
490
491 ;; Doubleword and quadword arithmetic.
492
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
495
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502 [(set (attr "type")
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
506 )
507
508 (define_insn "adddi3_neon"
509 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
510 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
511 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
512 (clobber (reg:CC CC_REGNUM))]
513 "TARGET_NEON"
514 {
515 switch (which_alternative)
516 {
517 case 0: /* fall through */
518 case 3: return "vadd.i64\t%P0, %P1, %P2";
519 case 1: return "#";
520 case 2: return "#";
521 case 4: return "#";
522 case 5: return "#";
523 case 6: return "#";
524 default: gcc_unreachable ();
525 }
526 }
527 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
528 multiple,multiple,multiple")
529 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
530 (set_attr "length" "*,8,8,*,8,8,8")
531 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
532 )
533
534 (define_insn "*sub<mode>3_neon"
535 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
536 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
537 (match_operand:VDQ 2 "s_register_operand" "w")))]
538 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
539 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
540 [(set (attr "type")
541 (if_then_else (match_test "<Is_float_mode>")
542 (const_string "neon_fp_addsub_s<q>")
543 (const_string "neon_sub<q>")))]
544 )
545
546 (define_insn "subdi3_neon"
547 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
548 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
549 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
550 (clobber (reg:CC CC_REGNUM))]
551 "TARGET_NEON"
552 {
553 switch (which_alternative)
554 {
555 case 0: /* fall through */
556 case 4: return "vsub.i64\t%P0, %P1, %P2";
557 case 1: /* fall through */
558 case 2: /* fall through */
559 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
560 default: gcc_unreachable ();
561 }
562 }
563 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
564 (set_attr "conds" "*,clob,clob,clob,*")
565 (set_attr "length" "*,8,8,8,*")
566 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
567 )
568
569 (define_insn "*mul<mode>3_neon"
570 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
571 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
572 (match_operand:VDQW 2 "s_register_operand" "w")))]
573 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
574 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575 [(set (attr "type")
576 (if_then_else (match_test "<Is_float_mode>")
577 (const_string "neon_fp_mul_s<q>")
578 (const_string "neon_mul_<V_elem_ch><q>")))]
579 )
580
581 (define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
583 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
584 (match_operand:VDQW 3 "s_register_operand" "w"))
585 (match_operand:VDQW 1 "s_register_operand" "0")))]
586 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
587 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
588 [(set (attr "type")
589 (if_then_else (match_test "<Is_float_mode>")
590 (const_string "neon_fp_mla_s<q>")
591 (const_string "neon_mla_<V_elem_ch><q>")))]
592 )
593
594 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
597 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
598 (match_operand:VDQW 3 "s_register_operand" "w"))))]
599 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
600 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
601 [(set (attr "type")
602 (if_then_else (match_test "<Is_float_mode>")
603 (const_string "neon_fp_mla_s<q>")
604 (const_string "neon_mla_<V_elem_ch><q>")))]
605 )
606
607 ;; Fused multiply-accumulate
608 ;; We define each insn twice here:
609 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
610 ;; to be able to use when converting to FMA.
611 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
612 (define_insn "fma<VCVTF:mode>4"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
618 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "fma<VCVTF:mode>4_intrinsic"
623 [(set (match_operand:VCVTF 0 "register_operand" "=w")
624 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
625 (match_operand:VCVTF 2 "register_operand" "w")
626 (match_operand:VCVTF 3 "register_operand" "0")))]
627 "TARGET_NEON && TARGET_FMA"
628 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 [(set_attr "type" "neon_fp_mla_s<q>")]
630 )
631
632 (define_insn "*fmsub<VCVTF:mode>4"
633 [(set (match_operand:VCVTF 0 "register_operand" "=w")
634 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
635 (match_operand:VCVTF 2 "register_operand" "w")
636 (match_operand:VCVTF 3 "register_operand" "0")))]
637 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
638 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
639 [(set_attr "type" "neon_fp_mla_s<q>")]
640 )
641
642 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
643 [(set (match_operand:VCVTF 0 "register_operand" "=w")
644 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
645 (match_operand:VCVTF 2 "register_operand" "w")
646 (match_operand:VCVTF 3 "register_operand" "0")))]
647 "TARGET_NEON && TARGET_FMA"
648 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
649 [(set_attr "type" "neon_fp_mla_s<q>")]
650 )
651
652 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
654 (unspec:VCVTF [(match_operand:VCVTF 1
655 "s_register_operand" "w")]
656 NEON_VRINT))]
657 "TARGET_NEON && TARGET_FPU_ARMV8"
658 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
659 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
660 )
661
662 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
663 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
664 (FIXUORS:<V_cmp_result> (unspec:VCVTF
665 [(match_operand:VCVTF 1 "register_operand" "w")]
666 NEON_VCVT)))]
667 "TARGET_NEON && TARGET_FPU_ARMV8"
668 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
669 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
670 (set_attr "predicable" "no")]
671 )
672
673 (define_insn "ior<mode>3"
674 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
675 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
676 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
677 "TARGET_NEON"
678 {
679 switch (which_alternative)
680 {
681 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
682 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
683 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
684 default: gcc_unreachable ();
685 }
686 }
687 [(set_attr "type" "neon_logic<q>")]
688 )
689
690 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
691 ;; vorr. We support the pseudo-instruction vand instead, because that
692 ;; corresponds to the canonical form the middle-end expects to use for
693 ;; immediate bitwise-ANDs.
694
695 (define_insn "and<mode>3"
696 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
697 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
698 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
699 "TARGET_NEON"
700 {
701 switch (which_alternative)
702 {
703 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
704 case 1: return neon_output_logic_immediate ("vand", &operands[2],
705 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
706 default: gcc_unreachable ();
707 }
708 }
709 [(set_attr "type" "neon_logic<q>")]
710 )
711
712 (define_insn "orn<mode>3_neon"
713 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
714 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
715 (match_operand:VDQ 1 "s_register_operand" "w")))]
716 "TARGET_NEON"
717 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
718 [(set_attr "type" "neon_logic<q>")]
719 )
720
721 ;; TODO: investigate whether we should disable
722 ;; this and bicdi3_neon for the A8 in line with the other
723 ;; changes above.
724 (define_insn_and_split "orndi3_neon"
725 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
726 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
727 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
728 "TARGET_NEON"
729 "@
730 vorn\t%P0, %P1, %P2
731 #
732 #
733 #"
734 "reload_completed &&
735 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
736 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
737 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
738 "
739 {
740 if (TARGET_THUMB2)
741 {
742 operands[3] = gen_highpart (SImode, operands[0]);
743 operands[0] = gen_lowpart (SImode, operands[0]);
744 operands[4] = gen_highpart (SImode, operands[2]);
745 operands[2] = gen_lowpart (SImode, operands[2]);
746 operands[5] = gen_highpart (SImode, operands[1]);
747 operands[1] = gen_lowpart (SImode, operands[1]);
748 }
749 else
750 {
751 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
752 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
753 DONE;
754 }
755 }"
756 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
757 (set_attr "length" "*,16,8,8")
758 (set_attr "arch" "any,a,t2,t2")]
759 )
760
761 (define_insn "bic<mode>3_neon"
762 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
763 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
764 (match_operand:VDQ 1 "s_register_operand" "w")))]
765 "TARGET_NEON"
766 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767 [(set_attr "type" "neon_logic<q>")]
768 )
769
770 ;; Compare to *anddi_notdi_di.
771 (define_insn "bicdi3_neon"
772 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
773 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
774 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
775 "TARGET_NEON"
776 "@
777 vbic\t%P0, %P1, %P2
778 #
779 #"
780 [(set_attr "type" "neon_logic,multiple,multiple")
781 (set_attr "length" "*,8,8")]
782 )
783
784 (define_insn "xor<mode>3"
785 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
786 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
787 (match_operand:VDQ 2 "s_register_operand" "w")))]
788 "TARGET_NEON"
789 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
790 [(set_attr "type" "neon_logic<q>")]
791 )
792
793 (define_insn "one_cmpl<mode>2"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
795 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
796 "TARGET_NEON"
797 "vmvn\t%<V_reg>0, %<V_reg>1"
798 [(set_attr "type" "neon_move<q>")]
799 )
800
801 (define_insn "abs<mode>2"
802 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
803 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
804 "TARGET_NEON"
805 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
806 [(set (attr "type")
807 (if_then_else (match_test "<Is_float_mode>")
808 (const_string "neon_fp_abs_s<q>")
809 (const_string "neon_abs<q>")))]
810 )
811
812 (define_insn "neg<mode>2"
813 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
814 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
815 "TARGET_NEON"
816 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
817 [(set (attr "type")
818 (if_then_else (match_test "<Is_float_mode>")
819 (const_string "neon_fp_neg_s<q>")
820 (const_string "neon_neg<q>")))]
821 )
822
823 (define_insn "negdi2_neon"
824 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
825 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
826 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
827 (clobber (reg:CC CC_REGNUM))]
828 "TARGET_NEON"
829 "#"
830 [(set_attr "length" "8")
831 (set_attr "type" "multiple")]
832 )
833
834 ; Split negdi2_neon for vfp registers
835 (define_split
836 [(set (match_operand:DI 0 "s_register_operand" "")
837 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
838 (clobber (match_scratch:DI 2 ""))
839 (clobber (reg:CC CC_REGNUM))]
840 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
841 [(set (match_dup 2) (const_int 0))
842 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
843 (clobber (reg:CC CC_REGNUM))])]
844 {
845 if (!REG_P (operands[2]))
846 operands[2] = operands[0];
847 }
848 )
849
850 ; Split negdi2_neon for core registers
851 (define_split
852 [(set (match_operand:DI 0 "s_register_operand" "")
853 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
854 (clobber (match_scratch:DI 2 ""))
855 (clobber (reg:CC CC_REGNUM))]
856 "TARGET_32BIT && reload_completed
857 && arm_general_register_operand (operands[0], DImode)"
858 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
859 (clobber (reg:CC CC_REGNUM))])]
860 ""
861 )
862
863 (define_insn "*umin<mode>3_neon"
864 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
865 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
866 (match_operand:VDQIW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set_attr "type" "neon_minmax<q>")]
870 )
871
872 (define_insn "*umax<mode>3_neon"
873 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
874 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
875 (match_operand:VDQIW 2 "s_register_operand" "w")))]
876 "TARGET_NEON"
877 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
878 [(set_attr "type" "neon_minmax<q>")]
879 )
880
881 (define_insn "*smin<mode>3_neon"
882 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
883 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
884 (match_operand:VDQW 2 "s_register_operand" "w")))]
885 "TARGET_NEON"
886 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set (attr "type")
888 (if_then_else (match_test "<Is_float_mode>")
889 (const_string "neon_fp_minmax_s<q>")
890 (const_string "neon_minmax<q>")))]
891 )
892
893 (define_insn "*smax<mode>3_neon"
894 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
895 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
896 (match_operand:VDQW 2 "s_register_operand" "w")))]
897 "TARGET_NEON"
898 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
899 [(set (attr "type")
900 (if_then_else (match_test "<Is_float_mode>")
901 (const_string "neon_fp_minmax_s<q>")
902 (const_string "neon_minmax<q>")))]
903 )
904
905 ; TODO: V2DI shifts are current disabled because there are bugs in the
906 ; generic vectorizer code. It ends up creating a V2DI constructor with
907 ; SImode elements.
908
909 (define_insn "vashl<mode>3"
910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
911 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
912 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
913 "TARGET_NEON"
914 {
915 switch (which_alternative)
916 {
917 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
918 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
919 <MODE>mode,
920 VALID_NEON_QREG_MODE (<MODE>mode),
921 true);
922 default: gcc_unreachable ();
923 }
924 }
925 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
926 )
927
928 (define_insn "vashr<mode>3_imm"
929 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
930 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
931 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
932 "TARGET_NEON"
933 {
934 return neon_output_shift_immediate ("vshr", 's', &operands[2],
935 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
936 false);
937 }
938 [(set_attr "type" "neon_shift_imm<q>")]
939 )
940
941 (define_insn "vlshr<mode>3_imm"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
943 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
944 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
945 "TARGET_NEON"
946 {
947 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
948 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
949 false);
950 }
951 [(set_attr "type" "neon_shift_imm<q>")]
952 )
953
954 ; Used for implementing logical shift-right, which is a left-shift by a negative
955 ; amount, with signed operands. This is essentially the same as ashl<mode>3
956 ; above, but using an unspec in case GCC tries anything tricky with negative
957 ; shift amounts.
958
959 (define_insn "ashl<mode>3_signed"
960 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
961 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
962 (match_operand:VDQI 2 "s_register_operand" "w")]
963 UNSPEC_ASHIFT_SIGNED))]
964 "TARGET_NEON"
965 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
966 [(set_attr "type" "neon_shift_reg<q>")]
967 )
968
969 ; Used for implementing logical shift-right, which is a left-shift by a negative
970 ; amount, with unsigned operands.
971
972 (define_insn "ashl<mode>3_unsigned"
973 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
974 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
975 (match_operand:VDQI 2 "s_register_operand" "w")]
976 UNSPEC_ASHIFT_UNSIGNED))]
977 "TARGET_NEON"
978 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
979 [(set_attr "type" "neon_shift_reg<q>")]
980 )
981
982 (define_expand "vashr<mode>3"
983 [(set (match_operand:VDQIW 0 "s_register_operand" "")
984 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
985 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
986 "TARGET_NEON"
987 {
988 if (s_register_operand (operands[2], <MODE>mode))
989 {
990 rtx neg = gen_reg_rtx (<MODE>mode);
991 emit_insn (gen_neg<mode>2 (neg, operands[2]));
992 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
993 }
994 else
995 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
996 DONE;
997 })
998
999 (define_expand "vlshr<mode>3"
1000 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1001 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1002 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1003 "TARGET_NEON"
1004 {
1005 if (s_register_operand (operands[2], <MODE>mode))
1006 {
1007 rtx neg = gen_reg_rtx (<MODE>mode);
1008 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1009 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1010 }
1011 else
1012 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1013 DONE;
1014 })
1015
1016 ;; 64-bit shifts
1017
1018 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1019 ;; leaving the upper half uninitalized. This is OK since the shift
1020 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1021 ;; data flow analysis however, we pretend the full register is set
1022 ;; using an unspec.
1023 (define_insn "neon_load_count"
1024 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1025 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1026 UNSPEC_LOAD_COUNT))]
1027 "TARGET_NEON"
1028 "@
1029 vld1.32\t{%P0[0]}, %A1
1030 vmov.32\t%P0[0], %1"
1031 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1032 )
1033
1034 (define_insn "ashldi3_neon_noclobber"
1035 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1036 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1037 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1038 "TARGET_NEON && reload_completed
1039 && (!CONST_INT_P (operands[2])
1040 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1041 "@
1042 vshl.u64\t%P0, %P1, %2
1043 vshl.u64\t%P0, %P1, %P2"
1044 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1045 )
1046
1047 (define_insn_and_split "ashldi3_neon"
1048 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1049 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1050 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1051 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1052 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1053 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1054 (clobber (reg:CC_C CC_REGNUM))]
1055 "TARGET_NEON"
1056 "#"
1057 "TARGET_NEON && reload_completed"
1058 [(const_int 0)]
1059 "
1060 {
1061 if (IS_VFP_REGNUM (REGNO (operands[0])))
1062 {
1063 if (CONST_INT_P (operands[2]))
1064 {
1065 if (INTVAL (operands[2]) < 1)
1066 {
1067 emit_insn (gen_movdi (operands[0], operands[1]));
1068 DONE;
1069 }
1070 else if (INTVAL (operands[2]) > 63)
1071 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1072 }
1073 else
1074 {
1075 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1076 operands[2] = operands[5];
1077 }
1078
1079 /* Ditch the unnecessary clobbers. */
1080 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1081 operands[2]));
1082 }
1083 else
1084 {
1085 if (operands[2] == CONST1_RTX (SImode)
1086 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1087 || REGNO (operands[0]) == REGNO (operands[1])))
1088 /* This clobbers CC. */
1089 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1090 else
1091 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1092 operands[2], operands[3], operands[4]);
1093 }
1094 DONE;
1095 }"
1096 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1097 (set_attr "opt" "*,*,speed,speed,*,*")
1098 (set_attr "type" "multiple")]
1099 )
1100
1101 ; The shift amount needs to be negated for right-shifts
1102 (define_insn "signed_shift_di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "=w")
1104 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1105 (match_operand:DI 2 "s_register_operand" " w")]
1106 UNSPEC_ASHIFT_SIGNED))]
1107 "TARGET_NEON && reload_completed"
1108 "vshl.s64\t%P0, %P1, %P2"
1109 [(set_attr "type" "neon_shift_reg")]
1110 )
1111
1112 ; The shift amount needs to be negated for right-shifts
1113 (define_insn "unsigned_shift_di3_neon"
1114 [(set (match_operand:DI 0 "s_register_operand" "=w")
1115 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1116 (match_operand:DI 2 "s_register_operand" " w")]
1117 UNSPEC_ASHIFT_UNSIGNED))]
1118 "TARGET_NEON && reload_completed"
1119 "vshl.u64\t%P0, %P1, %P2"
1120 [(set_attr "type" "neon_shift_reg")]
1121 )
1122
1123 (define_insn "ashrdi3_neon_imm_noclobber"
1124 [(set (match_operand:DI 0 "s_register_operand" "=w")
1125 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1126 (match_operand:DI 2 "const_int_operand" " i")))]
1127 "TARGET_NEON && reload_completed
1128 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1129 "vshr.s64\t%P0, %P1, %2"
1130 [(set_attr "type" "neon_shift_imm")]
1131 )
1132
1133 (define_insn "lshrdi3_neon_imm_noclobber"
1134 [(set (match_operand:DI 0 "s_register_operand" "=w")
1135 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1136 (match_operand:DI 2 "const_int_operand" " i")))]
1137 "TARGET_NEON && reload_completed
1138 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1139 "vshr.u64\t%P0, %P1, %2"
1140 [(set_attr "type" "neon_shift_imm")]
1141 )
1142
1143 ;; ashrdi3_neon
1144 ;; lshrdi3_neon
1145 (define_insn_and_split "<shift>di3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1147 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1148 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1149 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1150 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1152 (clobber (reg:CC CC_REGNUM))]
1153 "TARGET_NEON"
1154 "#"
1155 "TARGET_NEON && reload_completed"
1156 [(const_int 0)]
1157 "
1158 {
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1160 {
1161 if (CONST_INT_P (operands[2]))
1162 {
1163 if (INTVAL (operands[2]) < 1)
1164 {
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1166 DONE;
1167 }
1168 else if (INTVAL (operands[2]) > 64)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1170
1171 /* Ditch the unnecessary clobbers. */
1172 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1173 operands[1],
1174 operands[2]));
1175 }
1176 else
1177 {
1178 /* We must use a negative left-shift. */
1179 emit_insn (gen_negsi2 (operands[3], operands[2]));
1180 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1181 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1182 operands[5]));
1183 }
1184 }
1185 else
1186 {
1187 if (operands[2] == CONST1_RTX (SImode)
1188 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1189 || REGNO (operands[0]) == REGNO (operands[1])))
1190 /* This clobbers CC. */
1191 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1192 else
1193 /* This clobbers CC (ASHIFTRT by register only). */
1194 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1195 operands[2], operands[3], operands[4]);
1196 }
1197
1198 DONE;
1199 }"
1200 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1201 (set_attr "opt" "*,*,speed,speed,*,*")
1202 (set_attr "type" "multiple")]
1203 )
1204
1205 ;; Widening operations
1206
1207 (define_expand "widen_ssum<mode>3"
1208 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1209 (plus:<V_double_width>
1210 (sign_extend:<V_double_width>
1211 (match_operand:VQI 1 "s_register_operand" ""))
1212 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1213 "TARGET_NEON"
1214 {
1215 machine_mode mode = GET_MODE (operands[1]);
1216 rtx p1, p2;
1217
1218 p1 = arm_simd_vect_par_cnst_half (mode, false);
1219 p2 = arm_simd_vect_par_cnst_half (mode, true);
1220
1221 if (operands[0] != operands[2])
1222 emit_move_insn (operands[0], operands[2]);
1223
1224 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1225 operands[1],
1226 p1,
1227 operands[0]));
1228 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1229 operands[1],
1230 p2,
1231 operands[0]));
1232 DONE;
1233 }
1234 )
1235
1236 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1237 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1238 (plus:<VW:V_widen>
1239 (sign_extend:<VW:V_widen>
1240 (vec_select:VW
1241 (match_operand:VQI 1 "s_register_operand" "%w")
1242 (match_operand:VQI 2 "vect_par_constant_low" "")))
1243 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1244 "TARGET_NEON"
1245 {
1246 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1247 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1248 }
1249 [(set_attr "type" "neon_add_widen")])
1250
1251 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1252 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1253 (plus:<VW:V_widen>
1254 (sign_extend:<VW:V_widen>
1255 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1256 (match_operand:VQI 2 "vect_par_constant_high" "")))
1257 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1258 "TARGET_NEON"
1259 {
1260 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1261 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1262 }
1263 [(set_attr "type" "neon_add_widen")])
1264
1265 (define_insn "widen_ssum<mode>3"
1266 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1267 (plus:<V_widen>
1268 (sign_extend:<V_widen>
1269 (match_operand:VW 1 "s_register_operand" "%w"))
1270 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1271 "TARGET_NEON"
1272 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1273 [(set_attr "type" "neon_add_widen")]
1274 )
1275
1276 (define_expand "widen_usum<mode>3"
1277 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1278 (plus:<V_double_width>
1279 (zero_extend:<V_double_width>
1280 (match_operand:VQI 1 "s_register_operand" ""))
1281 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1282 "TARGET_NEON"
1283 {
1284 machine_mode mode = GET_MODE (operands[1]);
1285 rtx p1, p2;
1286
1287 p1 = arm_simd_vect_par_cnst_half (mode, false);
1288 p2 = arm_simd_vect_par_cnst_half (mode, true);
1289
1290 if (operands[0] != operands[2])
1291 emit_move_insn (operands[0], operands[2]);
1292
1293 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1294 operands[1],
1295 p1,
1296 operands[0]));
1297 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1298 operands[1],
1299 p2,
1300 operands[0]));
1301 DONE;
1302 }
1303 )
1304
1305 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1306 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1307 (plus:<VW:V_widen>
1308 (zero_extend:<VW:V_widen>
1309 (vec_select:VW
1310 (match_operand:VQI 1 "s_register_operand" "%w")
1311 (match_operand:VQI 2 "vect_par_constant_low" "")))
1312 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1313 "TARGET_NEON"
1314 {
1315 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1316 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1317 }
1318 [(set_attr "type" "neon_add_widen")])
1319
1320 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1321 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1322 (plus:<VW:V_widen>
1323 (zero_extend:<VW:V_widen>
1324 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1325 (match_operand:VQI 2 "vect_par_constant_high" "")))
1326 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1327 "TARGET_NEON"
1328 {
1329 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1330 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1331 }
1332 [(set_attr "type" "neon_add_widen")])
1333
1334 (define_insn "widen_usum<mode>3"
1335 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1336 (plus:<V_widen> (zero_extend:<V_widen>
1337 (match_operand:VW 1 "s_register_operand" "%w"))
1338 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1339 "TARGET_NEON"
1340 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1341 [(set_attr "type" "neon_add_widen")]
1342 )
1343
1344 ;; Helpers for quad-word reduction operations
1345
1346 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1347 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1348 ; N/2-element vector.
1349
1350 (define_insn "quad_halves_<code>v4si"
1351 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1352 (VQH_OPS:V2SI
1353 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1354 (parallel [(const_int 0) (const_int 1)]))
1355 (vec_select:V2SI (match_dup 1)
1356 (parallel [(const_int 2) (const_int 3)]))))]
1357 "TARGET_NEON"
1358 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1359 [(set_attr "vqh_mnem" "<VQH_mnem>")
1360 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1361 )
1362
1363 (define_insn "quad_halves_<code>v4sf"
1364 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1365 (VQHS_OPS:V2SF
1366 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1367 (parallel [(const_int 0) (const_int 1)]))
1368 (vec_select:V2SF (match_dup 1)
1369 (parallel [(const_int 2) (const_int 3)]))))]
1370 "TARGET_NEON && flag_unsafe_math_optimizations"
1371 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1372 [(set_attr "vqh_mnem" "<VQH_mnem>")
1373 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1374 )
1375
1376 (define_insn "quad_halves_<code>v8hi"
1377 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1378 (VQH_OPS:V4HI
1379 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1380 (parallel [(const_int 0) (const_int 1)
1381 (const_int 2) (const_int 3)]))
1382 (vec_select:V4HI (match_dup 1)
1383 (parallel [(const_int 4) (const_int 5)
1384 (const_int 6) (const_int 7)]))))]
1385 "TARGET_NEON"
1386 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1387 [(set_attr "vqh_mnem" "<VQH_mnem>")
1388 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1389 )
1390
1391 (define_insn "quad_halves_<code>v16qi"
1392 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1393 (VQH_OPS:V8QI
1394 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1395 (parallel [(const_int 0) (const_int 1)
1396 (const_int 2) (const_int 3)
1397 (const_int 4) (const_int 5)
1398 (const_int 6) (const_int 7)]))
1399 (vec_select:V8QI (match_dup 1)
1400 (parallel [(const_int 8) (const_int 9)
1401 (const_int 10) (const_int 11)
1402 (const_int 12) (const_int 13)
1403 (const_int 14) (const_int 15)]))))]
1404 "TARGET_NEON"
1405 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1406 [(set_attr "vqh_mnem" "<VQH_mnem>")
1407 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1408 )
1409
1410 (define_expand "move_hi_quad_<mode>"
1411 [(match_operand:ANY128 0 "s_register_operand" "")
1412 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1413 "TARGET_NEON"
1414 {
1415 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1416 GET_MODE_SIZE (<V_HALF>mode)),
1417 operands[1]);
1418 DONE;
1419 })
1420
1421 (define_expand "move_lo_quad_<mode>"
1422 [(match_operand:ANY128 0 "s_register_operand" "")
1423 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1424 "TARGET_NEON"
1425 {
1426 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1427 <MODE>mode, 0),
1428 operands[1]);
1429 DONE;
1430 })
1431
1432 ;; Reduction operations
1433
1434 (define_expand "reduc_plus_scal_<mode>"
1435 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1436 (match_operand:VD 1 "s_register_operand" "")]
1437 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1438 {
1439 rtx vec = gen_reg_rtx (<MODE>mode);
1440 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1441 &gen_neon_vpadd_internal<mode>);
1442 /* The same result is actually computed into every element. */
1443 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1444 DONE;
1445 })
1446
1447 (define_expand "reduc_plus_scal_<mode>"
1448 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1449 (match_operand:VQ 1 "s_register_operand" "")]
1450 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1451 && !BYTES_BIG_ENDIAN"
1452 {
1453 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1454
1455 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1456 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1457
1458 DONE;
1459 })
1460
1461 (define_expand "reduc_plus_scal_v2di"
1462 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1463 (match_operand:V2DI 1 "s_register_operand" "")]
1464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1465 {
1466 rtx vec = gen_reg_rtx (V2DImode);
1467
1468 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1469 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1470
1471 DONE;
1472 })
1473
1474 (define_insn "arm_reduc_plus_internal_v2di"
1475 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1476 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1477 UNSPEC_VPADD))]
1478 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1479 "vadd.i64\t%e0, %e1, %f1"
1480 [(set_attr "type" "neon_add_q")]
1481 )
1482
1483 (define_expand "reduc_smin_scal_<mode>"
1484 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1485 (match_operand:VD 1 "s_register_operand" "")]
1486 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1487 {
1488 rtx vec = gen_reg_rtx (<MODE>mode);
1489
1490 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1491 &gen_neon_vpsmin<mode>);
1492 /* The result is computed into every element of the vector. */
1493 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1494 DONE;
1495 })
1496
1497 (define_expand "reduc_smin_scal_<mode>"
1498 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1499 (match_operand:VQ 1 "s_register_operand" "")]
1500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1501 && !BYTES_BIG_ENDIAN"
1502 {
1503 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1504
1505 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1506 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1507
1508 DONE;
1509 })
1510
1511 (define_expand "reduc_smax_scal_<mode>"
1512 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1513 (match_operand:VD 1 "s_register_operand" "")]
1514 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1515 {
1516 rtx vec = gen_reg_rtx (<MODE>mode);
1517 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1518 &gen_neon_vpsmax<mode>);
1519 /* The result is computed into every element of the vector. */
1520 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1521 DONE;
1522 })
1523
1524 (define_expand "reduc_smax_scal_<mode>"
1525 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1526 (match_operand:VQ 1 "s_register_operand" "")]
1527 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1528 && !BYTES_BIG_ENDIAN"
1529 {
1530 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1531
1532 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1533 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1534
1535 DONE;
1536 })
1537
1538 (define_expand "reduc_umin_scal_<mode>"
1539 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1540 (match_operand:VDI 1 "s_register_operand" "")]
1541 "TARGET_NEON"
1542 {
1543 rtx vec = gen_reg_rtx (<MODE>mode);
1544 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1545 &gen_neon_vpumin<mode>);
1546 /* The result is computed into every element of the vector. */
1547 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1548 DONE;
1549 })
1550
1551 (define_expand "reduc_umin_scal_<mode>"
1552 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1553 (match_operand:VQI 1 "s_register_operand" "")]
1554 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1555 {
1556 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1557
1558 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1559 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1560
1561 DONE;
1562 })
1563
1564 (define_expand "reduc_umax_scal_<mode>"
1565 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1566 (match_operand:VDI 1 "s_register_operand" "")]
1567 "TARGET_NEON"
1568 {
1569 rtx vec = gen_reg_rtx (<MODE>mode);
1570 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1571 &gen_neon_vpumax<mode>);
1572 /* The result is computed into every element of the vector. */
1573 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1574 DONE;
1575 })
1576
1577 (define_expand "reduc_umax_scal_<mode>"
1578 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1579 (match_operand:VQI 1 "s_register_operand" "")]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1581 {
1582 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1583
1584 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1585 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1586
1587 DONE;
1588 })
1589
1590 (define_insn "neon_vpadd_internal<mode>"
1591 [(set (match_operand:VD 0 "s_register_operand" "=w")
1592 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1593 (match_operand:VD 2 "s_register_operand" "w")]
1594 UNSPEC_VPADD))]
1595 "TARGET_NEON"
1596 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1597 ;; Assume this schedules like vadd.
1598 [(set (attr "type")
1599 (if_then_else (match_test "<Is_float_mode>")
1600 (const_string "neon_fp_reduc_add_s<q>")
1601 (const_string "neon_reduc_add<q>")))]
1602 )
1603
1604 (define_insn "neon_vpsmin<mode>"
1605 [(set (match_operand:VD 0 "s_register_operand" "=w")
1606 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1607 (match_operand:VD 2 "s_register_operand" "w")]
1608 UNSPEC_VPSMIN))]
1609 "TARGET_NEON"
1610 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1611 [(set (attr "type")
1612 (if_then_else (match_test "<Is_float_mode>")
1613 (const_string "neon_fp_reduc_minmax_s<q>")
1614 (const_string "neon_reduc_minmax<q>")))]
1615 )
1616
1617 (define_insn "neon_vpsmax<mode>"
1618 [(set (match_operand:VD 0 "s_register_operand" "=w")
1619 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1620 (match_operand:VD 2 "s_register_operand" "w")]
1621 UNSPEC_VPSMAX))]
1622 "TARGET_NEON"
1623 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1624 [(set (attr "type")
1625 (if_then_else (match_test "<Is_float_mode>")
1626 (const_string "neon_fp_reduc_minmax_s<q>")
1627 (const_string "neon_reduc_minmax<q>")))]
1628 )
1629
1630 (define_insn "neon_vpumin<mode>"
1631 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1632 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1633 (match_operand:VDI 2 "s_register_operand" "w")]
1634 UNSPEC_VPUMIN))]
1635 "TARGET_NEON"
1636 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1637 [(set_attr "type" "neon_reduc_minmax<q>")]
1638 )
1639
1640 (define_insn "neon_vpumax<mode>"
1641 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1642 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1643 (match_operand:VDI 2 "s_register_operand" "w")]
1644 UNSPEC_VPUMAX))]
1645 "TARGET_NEON"
1646 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1647 [(set_attr "type" "neon_reduc_minmax<q>")]
1648 )
1649
1650 ;; Saturating arithmetic
1651
1652 ; NOTE: Neon supports many more saturating variants of instructions than the
1653 ; following, but these are all GCC currently understands.
1654 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1655 ; yet either, although these patterns may be used by intrinsics when they're
1656 ; added.
1657
1658 (define_insn "*ss_add<mode>_neon"
1659 [(set (match_operand:VD 0 "s_register_operand" "=w")
1660 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1661 (match_operand:VD 2 "s_register_operand" "w")))]
1662 "TARGET_NEON"
1663 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1664 [(set_attr "type" "neon_qadd<q>")]
1665 )
1666
1667 (define_insn "*us_add<mode>_neon"
1668 [(set (match_operand:VD 0 "s_register_operand" "=w")
1669 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1670 (match_operand:VD 2 "s_register_operand" "w")))]
1671 "TARGET_NEON"
1672 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1673 [(set_attr "type" "neon_qadd<q>")]
1674 )
1675
1676 (define_insn "*ss_sub<mode>_neon"
1677 [(set (match_operand:VD 0 "s_register_operand" "=w")
1678 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1679 (match_operand:VD 2 "s_register_operand" "w")))]
1680 "TARGET_NEON"
1681 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1682 [(set_attr "type" "neon_qsub<q>")]
1683 )
1684
1685 (define_insn "*us_sub<mode>_neon"
1686 [(set (match_operand:VD 0 "s_register_operand" "=w")
1687 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1688 (match_operand:VD 2 "s_register_operand" "w")))]
1689 "TARGET_NEON"
1690 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1691 [(set_attr "type" "neon_qsub<q>")]
1692 )
1693
1694 ;; Conditional instructions. These are comparisons with conditional moves for
1695 ;; vectors. They perform the assignment:
1696 ;;
1697 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1698 ;;
1699 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1700 ;; element-wise.
1701
1702 (define_expand "vcond<mode><mode>"
1703 [(set (match_operand:VDQW 0 "s_register_operand" "")
1704 (if_then_else:VDQW
1705 (match_operator 3 "comparison_operator"
1706 [(match_operand:VDQW 4 "s_register_operand" "")
1707 (match_operand:VDQW 5 "nonmemory_operand" "")])
1708 (match_operand:VDQW 1 "s_register_operand" "")
1709 (match_operand:VDQW 2 "s_register_operand" "")))]
1710 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1711 {
1712 int inverse = 0;
1713 int use_zero_form = 0;
1714 int swap_bsl_operands = 0;
1715 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1716 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1717
1718 rtx (*base_comparison) (rtx, rtx, rtx);
1719 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1720
1721 switch (GET_CODE (operands[3]))
1722 {
1723 case GE:
1724 case GT:
1725 case LE:
1726 case LT:
1727 case EQ:
1728 if (operands[5] == CONST0_RTX (<MODE>mode))
1729 {
1730 use_zero_form = 1;
1731 break;
1732 }
1733 /* Fall through. */
1734 default:
1735 if (!REG_P (operands[5]))
1736 operands[5] = force_reg (<MODE>mode, operands[5]);
1737 }
1738
1739 switch (GET_CODE (operands[3]))
1740 {
1741 case LT:
1742 case UNLT:
1743 inverse = 1;
1744 /* Fall through. */
1745 case GE:
1746 case UNGE:
1747 case ORDERED:
1748 case UNORDERED:
1749 base_comparison = gen_neon_vcge<mode>;
1750 complimentary_comparison = gen_neon_vcgt<mode>;
1751 break;
1752 case LE:
1753 case UNLE:
1754 inverse = 1;
1755 /* Fall through. */
1756 case GT:
1757 case UNGT:
1758 base_comparison = gen_neon_vcgt<mode>;
1759 complimentary_comparison = gen_neon_vcge<mode>;
1760 break;
1761 case EQ:
1762 case NE:
1763 case UNEQ:
1764 base_comparison = gen_neon_vceq<mode>;
1765 complimentary_comparison = gen_neon_vceq<mode>;
1766 break;
1767 default:
1768 gcc_unreachable ();
1769 }
1770
1771 switch (GET_CODE (operands[3]))
1772 {
1773 case LT:
1774 case LE:
1775 case GT:
1776 case GE:
1777 case EQ:
1778 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1779 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1780 a GE b -> a GE b
1781 a GT b -> a GT b
1782 a LE b -> b GE a
1783 a LT b -> b GT a
1784 a EQ b -> a EQ b
1785 Note that there also exist direct comparison against 0 forms,
1786 so catch those as a special case. */
1787 if (use_zero_form)
1788 {
1789 inverse = 0;
1790 switch (GET_CODE (operands[3]))
1791 {
1792 case LT:
1793 base_comparison = gen_neon_vclt<mode>;
1794 break;
1795 case LE:
1796 base_comparison = gen_neon_vcle<mode>;
1797 break;
1798 default:
1799 /* Do nothing, other zero form cases already have the correct
1800 base_comparison. */
1801 break;
1802 }
1803 }
1804
1805 if (!inverse)
1806 emit_insn (base_comparison (mask, operands[4], operands[5]));
1807 else
1808 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1809 break;
1810 case UNLT:
1811 case UNLE:
1812 case UNGT:
1813 case UNGE:
1814 case NE:
1815 /* Vector compare returns false for lanes which are unordered, so if we use
1816 the inverse of the comparison we actually want to emit, then
1817 swap the operands to BSL, we will end up with the correct result.
1818 Note that a NE NaN and NaN NE b are true for all a, b.
1819
1820 Our transformations are:
1821 a GE b -> !(b GT a)
1822 a GT b -> !(b GE a)
1823 a LE b -> !(a GT b)
1824 a LT b -> !(a GE b)
1825 a NE b -> !(a EQ b) */
1826
1827 if (inverse)
1828 emit_insn (base_comparison (mask, operands[4], operands[5]));
1829 else
1830 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1831
1832 swap_bsl_operands = 1;
1833 break;
1834 case UNEQ:
1835 /* We check (a > b || b > a). combining these comparisons give us
1836 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1837 will then give us (a == b || a UNORDERED b) as intended. */
1838
1839 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1840 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1841 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1842 swap_bsl_operands = 1;
1843 break;
1844 case UNORDERED:
1845 /* Operands are ORDERED iff (a > b || b >= a).
1846 Swapping the operands to BSL will give the UNORDERED case. */
1847 swap_bsl_operands = 1;
1848 /* Fall through. */
1849 case ORDERED:
1850 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1851 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1852 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1853 break;
1854 default:
1855 gcc_unreachable ();
1856 }
1857
1858 if (swap_bsl_operands)
1859 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1860 operands[1]));
1861 else
1862 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1863 operands[2]));
1864 DONE;
1865 })
1866
1867 (define_expand "vcondu<mode><mode>"
1868 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1869 (if_then_else:VDQIW
1870 (match_operator 3 "arm_comparison_operator"
1871 [(match_operand:VDQIW 4 "s_register_operand" "")
1872 (match_operand:VDQIW 5 "s_register_operand" "")])
1873 (match_operand:VDQIW 1 "s_register_operand" "")
1874 (match_operand:VDQIW 2 "s_register_operand" "")))]
1875 "TARGET_NEON"
1876 {
1877 rtx mask;
1878 int inverse = 0, immediate_zero = 0;
1879
1880 mask = gen_reg_rtx (<V_cmp_result>mode);
1881
1882 if (operands[5] == CONST0_RTX (<MODE>mode))
1883 immediate_zero = 1;
1884 else if (!REG_P (operands[5]))
1885 operands[5] = force_reg (<MODE>mode, operands[5]);
1886
1887 switch (GET_CODE (operands[3]))
1888 {
1889 case GEU:
1890 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1891 break;
1892
1893 case GTU:
1894 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1895 break;
1896
1897 case EQ:
1898 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1899 break;
1900
1901 case LEU:
1902 if (immediate_zero)
1903 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1904 else
1905 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1906 break;
1907
1908 case LTU:
1909 if (immediate_zero)
1910 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1911 else
1912 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1913 break;
1914
1915 case NE:
1916 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1917 inverse = 1;
1918 break;
1919
1920 default:
1921 gcc_unreachable ();
1922 }
1923
1924 if (inverse)
1925 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1926 operands[1]));
1927 else
1928 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1929 operands[2]));
1930
1931 DONE;
1932 })
1933
1934 ;; Patterns for builtins.
1935
1936 ; good for plain vadd, vaddq.
1937
1938 (define_expand "neon_vadd<mode>"
1939 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1940 (match_operand:VCVTF 1 "s_register_operand" "w")
1941 (match_operand:VCVTF 2 "s_register_operand" "w")]
1942 "TARGET_NEON"
1943 {
1944 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1945 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1946 else
1947 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1948 operands[2]));
1949 DONE;
1950 })
1951
1952 ; Note that NEON operations don't support the full IEEE 754 standard: in
1953 ; particular, denormal values are flushed to zero. This means that GCC cannot
1954 ; use those instructions for autovectorization, etc. unless
1955 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1956 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1957 ; header) must work in either case: if -funsafe-math-optimizations is given,
1958 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1959 ; expand to unspecs (which may potentially limit the extent to which they might
1960 ; be optimized by generic code).
1961
1962 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1963
1964 (define_insn "neon_vadd<mode>_unspec"
1965 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1966 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1967 (match_operand:VCVTF 2 "s_register_operand" "w")]
1968 UNSPEC_VADD))]
1969 "TARGET_NEON"
1970 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1971 [(set (attr "type")
1972 (if_then_else (match_test "<Is_float_mode>")
1973 (const_string "neon_fp_addsub_s<q>")
1974 (const_string "neon_add<q>")))]
1975 )
1976
1977 (define_insn "neon_vaddl<sup><mode>"
1978 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1979 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1980 (match_operand:VDI 2 "s_register_operand" "w")]
1981 VADDL))]
1982 "TARGET_NEON"
1983 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1984 [(set_attr "type" "neon_add_long")]
1985 )
1986
1987 (define_insn "neon_vaddw<sup><mode>"
1988 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1989 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1990 (match_operand:VDI 2 "s_register_operand" "w")]
1991 VADDW))]
1992 "TARGET_NEON"
1993 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1994 [(set_attr "type" "neon_add_widen")]
1995 )
1996
1997 ; vhadd and vrhadd.
1998
1999 (define_insn "neon_v<r>hadd<sup><mode>"
2000 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2001 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2002 (match_operand:VDQIW 2 "s_register_operand" "w")]
2003 VHADD))]
2004 "TARGET_NEON"
2005 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2006 [(set_attr "type" "neon_add_halve_q")]
2007 )
2008
2009 (define_insn "neon_vqadd<sup><mode>"
2010 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2011 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2012 (match_operand:VDQIX 2 "s_register_operand" "w")]
2013 VQADD))]
2014 "TARGET_NEON"
2015 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2016 [(set_attr "type" "neon_qadd<q>")]
2017 )
2018
2019 (define_insn "neon_v<r>addhn<mode>"
2020 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2021 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2022 (match_operand:VN 2 "s_register_operand" "w")]
2023 VADDHN))]
2024 "TARGET_NEON"
2025 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2026 [(set_attr "type" "neon_add_halve_narrow_q")]
2027 )
2028
2029 ;; Polynomial and Float multiplication.
2030 (define_insn "neon_vmul<pf><mode>"
2031 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2032 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2033 (match_operand:VPF 2 "s_register_operand" "w")]
2034 UNSPEC_VMUL))]
2035 "TARGET_NEON"
2036 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2037 [(set (attr "type")
2038 (if_then_else (match_test "<Is_float_mode>")
2039 (const_string "neon_fp_mul_s<q>")
2040 (const_string "neon_mul_<V_elem_ch><q>")))]
2041 )
2042
2043 (define_expand "neon_vmla<mode>"
2044 [(match_operand:VDQW 0 "s_register_operand" "=w")
2045 (match_operand:VDQW 1 "s_register_operand" "0")
2046 (match_operand:VDQW 2 "s_register_operand" "w")
2047 (match_operand:VDQW 3 "s_register_operand" "w")]
2048 "TARGET_NEON"
2049 {
2050 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2051 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2052 operands[2], operands[3]));
2053 else
2054 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2055 operands[2], operands[3]));
2056 DONE;
2057 })
2058
2059 (define_expand "neon_vfma<VCVTF:mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand")
2061 (match_operand:VCVTF 1 "s_register_operand")
2062 (match_operand:VCVTF 2 "s_register_operand")
2063 (match_operand:VCVTF 3 "s_register_operand")]
2064 "TARGET_NEON && TARGET_FMA"
2065 {
2066 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2067 operands[1]));
2068 DONE;
2069 })
2070
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand")
2073 (match_operand:VCVTF 1 "s_register_operand")
2074 (match_operand:VCVTF 2 "s_register_operand")
2075 (match_operand:VCVTF 3 "s_register_operand")]
2076 "TARGET_NEON && TARGET_FMA"
2077 {
2078 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2079 operands[1]));
2080 DONE;
2081 })
2082
2083 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2084
2085 (define_insn "neon_vmla<mode>_unspec"
2086 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2087 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2088 (match_operand:VDQW 2 "s_register_operand" "w")
2089 (match_operand:VDQW 3 "s_register_operand" "w")]
2090 UNSPEC_VMLA))]
2091 "TARGET_NEON"
2092 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2093 [(set (attr "type")
2094 (if_then_else (match_test "<Is_float_mode>")
2095 (const_string "neon_fp_mla_s<q>")
2096 (const_string "neon_mla_<V_elem_ch><q>")))]
2097 )
2098
2099 (define_insn "neon_vmlal<sup><mode>"
2100 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2101 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2102 (match_operand:VW 2 "s_register_operand" "w")
2103 (match_operand:VW 3 "s_register_operand" "w")]
2104 VMLAL))]
2105 "TARGET_NEON"
2106 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2107 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2108 )
2109
2110 (define_expand "neon_vmls<mode>"
2111 [(match_operand:VDQW 0 "s_register_operand" "=w")
2112 (match_operand:VDQW 1 "s_register_operand" "0")
2113 (match_operand:VDQW 2 "s_register_operand" "w")
2114 (match_operand:VDQW 3 "s_register_operand" "w")]
2115 "TARGET_NEON"
2116 {
2117 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2118 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2119 operands[1], operands[2], operands[3]));
2120 else
2121 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2122 operands[2], operands[3]));
2123 DONE;
2124 })
2125
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2127
2128 (define_insn "neon_vmls<mode>_unspec"
2129 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2131 (match_operand:VDQW 2 "s_register_operand" "w")
2132 (match_operand:VDQW 3 "s_register_operand" "w")]
2133 UNSPEC_VMLS))]
2134 "TARGET_NEON"
2135 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2136 [(set (attr "type")
2137 (if_then_else (match_test "<Is_float_mode>")
2138 (const_string "neon_fp_mla_s<q>")
2139 (const_string "neon_mla_<V_elem_ch><q>")))]
2140 )
2141
2142 (define_insn "neon_vmlsl<sup><mode>"
2143 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2144 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2145 (match_operand:VW 2 "s_register_operand" "w")
2146 (match_operand:VW 3 "s_register_operand" "w")]
2147 VMLSL))]
2148 "TARGET_NEON"
2149 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2150 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2151 )
2152
2153 ;; vqdmulh, vqrdmulh
2154 (define_insn "neon_vq<r>dmulh<mode>"
2155 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2156 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2157 (match_operand:VMDQI 2 "s_register_operand" "w")]
2158 VQDMULH))]
2159 "TARGET_NEON"
2160 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2161 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2162 )
2163
2164 ;; vqrdmlah, vqrdmlsh
2165 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2166 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2167 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2168 (match_operand:VMDQI 2 "s_register_operand" "w")
2169 (match_operand:VMDQI 3 "s_register_operand" "w")]
2170 VQRDMLH_AS))]
2171 "TARGET_NEON_RDMA"
2172 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2173 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2174 )
2175
2176 (define_insn "neon_vqdmlal<mode>"
2177 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2178 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2179 (match_operand:VMDI 2 "s_register_operand" "w")
2180 (match_operand:VMDI 3 "s_register_operand" "w")]
2181 UNSPEC_VQDMLAL))]
2182 "TARGET_NEON"
2183 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2184 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2185 )
2186
2187 (define_insn "neon_vqdmlsl<mode>"
2188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2189 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2190 (match_operand:VMDI 2 "s_register_operand" "w")
2191 (match_operand:VMDI 3 "s_register_operand" "w")]
2192 UNSPEC_VQDMLSL))]
2193 "TARGET_NEON"
2194 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2195 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2196 )
2197
2198 (define_insn "neon_vmull<sup><mode>"
2199 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2200 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2201 (match_operand:VW 2 "s_register_operand" "w")]
2202 VMULL))]
2203 "TARGET_NEON"
2204 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2205 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2206 )
2207
2208 (define_insn "neon_vqdmull<mode>"
2209 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2210 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2211 (match_operand:VMDI 2 "s_register_operand" "w")]
2212 UNSPEC_VQDMULL))]
2213 "TARGET_NEON"
2214 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2215 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2216 )
2217
2218 (define_expand "neon_vsub<mode>"
2219 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2220 (match_operand:VCVTF 1 "s_register_operand" "w")
2221 (match_operand:VCVTF 2 "s_register_operand" "w")]
2222 "TARGET_NEON"
2223 {
2224 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2225 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2226 else
2227 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2228 operands[2]));
2229 DONE;
2230 })
2231
2232 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2233
2234 (define_insn "neon_vsub<mode>_unspec"
2235 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2236 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2237 (match_operand:VCVTF 2 "s_register_operand" "w")]
2238 UNSPEC_VSUB))]
2239 "TARGET_NEON"
2240 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2241 [(set (attr "type")
2242 (if_then_else (match_test "<Is_float_mode>")
2243 (const_string "neon_fp_addsub_s<q>")
2244 (const_string "neon_sub<q>")))]
2245 )
2246
2247 (define_insn "neon_vsubl<sup><mode>"
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2250 (match_operand:VDI 2 "s_register_operand" "w")]
2251 VSUBL))]
2252 "TARGET_NEON"
2253 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2254 [(set_attr "type" "neon_sub_long")]
2255 )
2256
2257 (define_insn "neon_vsubw<sup><mode>"
2258 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2259 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2260 (match_operand:VDI 2 "s_register_operand" "w")]
2261 VSUBW))]
2262 "TARGET_NEON"
2263 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2264 [(set_attr "type" "neon_sub_widen")]
2265 )
2266
2267 (define_insn "neon_vqsub<sup><mode>"
2268 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2269 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2270 (match_operand:VDQIX 2 "s_register_operand" "w")]
2271 VQSUB))]
2272 "TARGET_NEON"
2273 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_qsub<q>")]
2275 )
2276
2277 (define_insn "neon_vhsub<sup><mode>"
2278 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2279 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2280 (match_operand:VDQIW 2 "s_register_operand" "w")]
2281 VHSUB))]
2282 "TARGET_NEON"
2283 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_sub_halve<q>")]
2285 )
2286
2287 (define_insn "neon_v<r>subhn<mode>"
2288 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2289 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2290 (match_operand:VN 2 "s_register_operand" "w")]
2291 VSUBHN))]
2292 "TARGET_NEON"
2293 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2294 [(set_attr "type" "neon_sub_halve_narrow_q")]
2295 )
2296
2297 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2298 ;; without unsafe math optimizations.
2299 (define_expand "neon_vc<cmp_op><mode>"
2300 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2301 (neg:<V_cmp_result>
2302 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2303 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2304 "TARGET_NEON"
2305 {
2306 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2307 are enabled. */
2308 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2309 && !flag_unsafe_math_optimizations)
2310 {
2311 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2312 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2313 whereas this expander iterates over the integer modes as well,
2314 but we will never expand to UNSPECs for the integer comparisons. */
2315 switch (<MODE>mode)
2316 {
2317 case V2SFmode:
2318 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2319 operands[1],
2320 operands[2]));
2321 break;
2322 case V4SFmode:
2323 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2324 operands[1],
2325 operands[2]));
2326 break;
2327 default:
2328 gcc_unreachable ();
2329 }
2330 }
2331 else
2332 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2333 operands[1],
2334 operands[2]));
2335 DONE;
2336 }
2337 )
2338
2339 (define_insn "neon_vc<cmp_op><mode>_insn"
2340 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2341 (neg:<V_cmp_result>
2342 (COMPARISONS:<V_cmp_result>
2343 (match_operand:VDQW 1 "s_register_operand" "w,w")
2344 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2345 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2346 && !flag_unsafe_math_optimizations)"
2347 {
2348 char pattern[100];
2349 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2350 " %%<V_reg>1, %s",
2351 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2352 ? "f" : "<cmp_type>",
2353 which_alternative == 0
2354 ? "%<V_reg>2" : "#0");
2355 output_asm_insn (pattern, operands);
2356 return "";
2357 }
2358 [(set (attr "type")
2359 (if_then_else (match_operand 2 "zero_operand")
2360 (const_string "neon_compare_zero<q>")
2361 (const_string "neon_compare<q>")))]
2362 )
2363
2364 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2365 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2366 (unspec:<V_cmp_result>
2367 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2368 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2369 NEON_VCMP))]
2370 "TARGET_NEON"
2371 {
2372 char pattern[100];
2373 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2374 " %%<V_reg>1, %s",
2375 which_alternative == 0
2376 ? "%<V_reg>2" : "#0");
2377 output_asm_insn (pattern, operands);
2378 return "";
2379 }
2380 [(set_attr "type" "neon_fp_compare_s<q>")]
2381 )
2382
2383 (define_insn "neon_vc<cmp_op>u<mode>"
2384 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2385 (neg:<V_cmp_result>
2386 (GTUGEU:<V_cmp_result>
2387 (match_operand:VDQIW 1 "s_register_operand" "w")
2388 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2389 "TARGET_NEON"
2390 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2391 [(set_attr "type" "neon_compare<q>")]
2392 )
2393
2394 (define_expand "neon_vca<cmp_op><mode>"
2395 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2396 (neg:<V_cmp_result>
2397 (GTGE:<V_cmp_result>
2398 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2399 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2400 "TARGET_NEON"
2401 {
2402 if (flag_unsafe_math_optimizations)
2403 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2404 operands[2]));
2405 else
2406 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2407 operands[1],
2408 operands[2]));
2409 DONE;
2410 }
2411 )
2412
2413 (define_insn "neon_vca<cmp_op><mode>_insn"
2414 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2415 (neg:<V_cmp_result>
2416 (GTGE:<V_cmp_result>
2417 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2418 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2419 "TARGET_NEON && flag_unsafe_math_optimizations"
2420 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2421 [(set_attr "type" "neon_fp_compare_s<q>")]
2422 )
2423
2424 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2425 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2426 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2427 (match_operand:VCVTF 2 "s_register_operand" "w")]
2428 NEON_VACMP))]
2429 "TARGET_NEON"
2430 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2431 [(set_attr "type" "neon_fp_compare_s<q>")]
2432 )
2433
2434 (define_insn "neon_vtst<mode>"
2435 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2436 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2437 (match_operand:VDQIW 2 "s_register_operand" "w")]
2438 UNSPEC_VTST))]
2439 "TARGET_NEON"
2440 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2441 [(set_attr "type" "neon_tst<q>")]
2442 )
2443
2444 (define_insn "neon_vabd<sup><mode>"
2445 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2446 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2447 (match_operand:VDQIW 2 "s_register_operand" "w")]
2448 VABD))]
2449 "TARGET_NEON"
2450 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2451 [(set_attr "type" "neon_abd<q>")]
2452 )
2453
2454 (define_insn "neon_vabdf<mode>"
2455 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2456 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2457 (match_operand:VCVTF 2 "s_register_operand" "w")]
2458 UNSPEC_VABD_F))]
2459 "TARGET_NEON"
2460 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2461 [(set_attr "type" "neon_fp_abd_s<q>")]
2462 )
2463
2464 (define_insn "neon_vabdl<sup><mode>"
2465 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2466 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2467 (match_operand:VW 2 "s_register_operand" "w")]
2468 VABDL))]
2469 "TARGET_NEON"
2470 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2471 [(set_attr "type" "neon_abd_long")]
2472 )
2473
2474 (define_insn "neon_vaba<sup><mode>"
2475 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2476 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2477 (match_operand:VDQIW 3 "s_register_operand" "w")]
2478 VABD)
2479 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2480 "TARGET_NEON"
2481 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2482 [(set_attr "type" "neon_arith_acc<q>")]
2483 )
2484
2485 (define_insn "neon_vabal<sup><mode>"
2486 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2487 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2488 (match_operand:VW 3 "s_register_operand" "w")]
2489 VABDL)
2490 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2491 "TARGET_NEON"
2492 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2493 [(set_attr "type" "neon_arith_acc<q>")]
2494 )
2495
2496 (define_insn "neon_v<maxmin><sup><mode>"
2497 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2498 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2499 (match_operand:VDQIW 2 "s_register_operand" "w")]
2500 VMAXMIN))]
2501 "TARGET_NEON"
2502 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2503 [(set_attr "type" "neon_minmax<q>")]
2504 )
2505
2506 (define_insn "neon_v<maxmin>f<mode>"
2507 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2508 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2509 (match_operand:VCVTF 2 "s_register_operand" "w")]
2510 VMAXMINF))]
2511 "TARGET_NEON"
2512 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2513 [(set_attr "type" "neon_fp_minmax_s<q>")]
2514 )
2515
2516 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2517 (define_insn "<fmaxmin><mode>3"
2518 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2519 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2520 (match_operand:VCVTF 2 "s_register_operand" "w")]
2521 VMAXMINFNM))]
2522 "TARGET_NEON && TARGET_FPU_ARMV8"
2523 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2524 [(set_attr "type" "neon_fp_minmax_s<q>")]
2525 )
2526
2527 (define_expand "neon_vpadd<mode>"
2528 [(match_operand:VD 0 "s_register_operand" "=w")
2529 (match_operand:VD 1 "s_register_operand" "w")
2530 (match_operand:VD 2 "s_register_operand" "w")]
2531 "TARGET_NEON"
2532 {
2533 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2534 operands[2]));
2535 DONE;
2536 })
2537
2538 (define_insn "neon_vpaddl<sup><mode>"
2539 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2540 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2541 VPADDL))]
2542 "TARGET_NEON"
2543 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2544 [(set_attr "type" "neon_reduc_add_long")]
2545 )
2546
2547 (define_insn "neon_vpadal<sup><mode>"
2548 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2549 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2550 (match_operand:VDQIW 2 "s_register_operand" "w")]
2551 VPADAL))]
2552 "TARGET_NEON"
2553 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2554 [(set_attr "type" "neon_reduc_add_acc")]
2555 )
2556
2557 (define_insn "neon_vp<maxmin><sup><mode>"
2558 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2559 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2560 (match_operand:VDI 2 "s_register_operand" "w")]
2561 VPMAXMIN))]
2562 "TARGET_NEON"
2563 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2564 [(set_attr "type" "neon_reduc_minmax<q>")]
2565 )
2566
2567 (define_insn "neon_vp<maxmin>f<mode>"
2568 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2569 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2570 (match_operand:VCVTF 2 "s_register_operand" "w")]
2571 VPMAXMINF))]
2572 "TARGET_NEON"
2573 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2574 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2575 )
2576
2577 (define_insn "neon_vrecps<mode>"
2578 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2580 (match_operand:VCVTF 2 "s_register_operand" "w")]
2581 UNSPEC_VRECPS))]
2582 "TARGET_NEON"
2583 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2584 [(set_attr "type" "neon_fp_recps_s<q>")]
2585 )
2586
2587 (define_insn "neon_vrsqrts<mode>"
2588 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2589 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2590 (match_operand:VCVTF 2 "s_register_operand" "w")]
2591 UNSPEC_VRSQRTS))]
2592 "TARGET_NEON"
2593 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2594 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2595 )
2596
2597 (define_expand "neon_vabs<mode>"
2598 [(match_operand:VDQW 0 "s_register_operand" "")
2599 (match_operand:VDQW 1 "s_register_operand" "")]
2600 "TARGET_NEON"
2601 {
2602 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2603 DONE;
2604 })
2605
2606 (define_insn "neon_vqabs<mode>"
2607 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2608 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2609 UNSPEC_VQABS))]
2610 "TARGET_NEON"
2611 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2612 [(set_attr "type" "neon_qabs<q>")]
2613 )
2614
2615 (define_insn "neon_bswap<mode>"
2616 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2617 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2618 "TARGET_NEON"
2619 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2620 [(set_attr "type" "neon_rev<q>")]
2621 )
2622
2623 (define_expand "neon_vneg<mode>"
2624 [(match_operand:VDQW 0 "s_register_operand" "")
2625 (match_operand:VDQW 1 "s_register_operand" "")]
2626 "TARGET_NEON"
2627 {
2628 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2629 DONE;
2630 })
2631
2632 (define_expand "neon_copysignf<mode>"
2633 [(match_operand:VCVTF 0 "register_operand")
2634 (match_operand:VCVTF 1 "register_operand")
2635 (match_operand:VCVTF 2 "register_operand")]
2636 "TARGET_NEON"
2637 "{
2638 rtx v_bitmask_cast;
2639 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2640 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2641 rtvec v = rtvec_alloc (n_elt);
2642
2643 /* Create bitmask for vector select. */
2644 for (i = 0; i < n_elt; ++i)
2645 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2646
2647 emit_move_insn (v_bitmask,
2648 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2649 emit_move_insn (operands[0], operands[2]);
2650 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2651 <VCVTF:V_cmp_result>mode, 0);
2652 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2653 operands[1]));
2654
2655 DONE;
2656 }"
2657 )
2658
2659 (define_insn "neon_vqneg<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2662 UNSPEC_VQNEG))]
2663 "TARGET_NEON"
2664 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2665 [(set_attr "type" "neon_qneg<q>")]
2666 )
2667
2668 (define_insn "neon_vcls<mode>"
2669 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2670 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2671 UNSPEC_VCLS))]
2672 "TARGET_NEON"
2673 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2674 [(set_attr "type" "neon_cls<q>")]
2675 )
2676
2677 (define_insn "clz<mode>2"
2678 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2679 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2680 "TARGET_NEON"
2681 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2682 [(set_attr "type" "neon_cnt<q>")]
2683 )
2684
2685 (define_expand "neon_vclz<mode>"
2686 [(match_operand:VDQIW 0 "s_register_operand" "")
2687 (match_operand:VDQIW 1 "s_register_operand" "")]
2688 "TARGET_NEON"
2689 {
2690 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2691 DONE;
2692 })
2693
2694 (define_insn "popcount<mode>2"
2695 [(set (match_operand:VE 0 "s_register_operand" "=w")
2696 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2697 "TARGET_NEON"
2698 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2699 [(set_attr "type" "neon_cnt<q>")]
2700 )
2701
2702 (define_expand "neon_vcnt<mode>"
2703 [(match_operand:VE 0 "s_register_operand" "=w")
2704 (match_operand:VE 1 "s_register_operand" "w")]
2705 "TARGET_NEON"
2706 {
2707 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2708 DONE;
2709 })
2710
2711 (define_insn "neon_vrecpe<mode>"
2712 [(set (match_operand:V32 0 "s_register_operand" "=w")
2713 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2714 UNSPEC_VRECPE))]
2715 "TARGET_NEON"
2716 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2717 [(set_attr "type" "neon_fp_recpe_s<q>")]
2718 )
2719
2720 (define_insn "neon_vrsqrte<mode>"
2721 [(set (match_operand:V32 0 "s_register_operand" "=w")
2722 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2723 UNSPEC_VRSQRTE))]
2724 "TARGET_NEON"
2725 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2726 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2727 )
2728
2729 (define_expand "neon_vmvn<mode>"
2730 [(match_operand:VDQIW 0 "s_register_operand" "")
2731 (match_operand:VDQIW 1 "s_register_operand" "")]
2732 "TARGET_NEON"
2733 {
2734 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2735 DONE;
2736 })
2737
2738 (define_insn "neon_vget_lane<mode>_sext_internal"
2739 [(set (match_operand:SI 0 "s_register_operand" "=r")
2740 (sign_extend:SI
2741 (vec_select:<V_elem>
2742 (match_operand:VD 1 "s_register_operand" "w")
2743 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2744 "TARGET_NEON"
2745 {
2746 if (BYTES_BIG_ENDIAN)
2747 {
2748 int elt = INTVAL (operands[2]);
2749 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2750 operands[2] = GEN_INT (elt);
2751 }
2752 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2753 }
2754 [(set_attr "type" "neon_to_gp")]
2755 )
2756
2757 (define_insn "neon_vget_lane<mode>_zext_internal"
2758 [(set (match_operand:SI 0 "s_register_operand" "=r")
2759 (zero_extend:SI
2760 (vec_select:<V_elem>
2761 (match_operand:VD 1 "s_register_operand" "w")
2762 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2763 "TARGET_NEON"
2764 {
2765 if (BYTES_BIG_ENDIAN)
2766 {
2767 int elt = INTVAL (operands[2]);
2768 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2769 operands[2] = GEN_INT (elt);
2770 }
2771 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2772 }
2773 [(set_attr "type" "neon_to_gp")]
2774 )
2775
2776 (define_insn "neon_vget_lane<mode>_sext_internal"
2777 [(set (match_operand:SI 0 "s_register_operand" "=r")
2778 (sign_extend:SI
2779 (vec_select:<V_elem>
2780 (match_operand:VQ2 1 "s_register_operand" "w")
2781 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2782 "TARGET_NEON"
2783 {
2784 rtx ops[3];
2785 int regno = REGNO (operands[1]);
2786 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2787 unsigned int elt = INTVAL (operands[2]);
2788 unsigned int elt_adj = elt % halfelts;
2789
2790 if (BYTES_BIG_ENDIAN)
2791 elt_adj = halfelts - 1 - elt_adj;
2792
2793 ops[0] = operands[0];
2794 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2795 ops[2] = GEN_INT (elt_adj);
2796 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2797
2798 return "";
2799 }
2800 [(set_attr "type" "neon_to_gp_q")]
2801 )
2802
2803 (define_insn "neon_vget_lane<mode>_zext_internal"
2804 [(set (match_operand:SI 0 "s_register_operand" "=r")
2805 (zero_extend:SI
2806 (vec_select:<V_elem>
2807 (match_operand:VQ2 1 "s_register_operand" "w")
2808 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2809 "TARGET_NEON"
2810 {
2811 rtx ops[3];
2812 int regno = REGNO (operands[1]);
2813 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2814 unsigned int elt = INTVAL (operands[2]);
2815 unsigned int elt_adj = elt % halfelts;
2816
2817 if (BYTES_BIG_ENDIAN)
2818 elt_adj = halfelts - 1 - elt_adj;
2819
2820 ops[0] = operands[0];
2821 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2822 ops[2] = GEN_INT (elt_adj);
2823 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2824
2825 return "";
2826 }
2827 [(set_attr "type" "neon_to_gp_q")]
2828 )
2829
2830 (define_expand "neon_vget_lane<mode>"
2831 [(match_operand:<V_ext> 0 "s_register_operand" "")
2832 (match_operand:VDQW 1 "s_register_operand" "")
2833 (match_operand:SI 2 "immediate_operand" "")]
2834 "TARGET_NEON"
2835 {
2836 if (BYTES_BIG_ENDIAN)
2837 {
2838 /* The intrinsics are defined in terms of a model where the
2839 element ordering in memory is vldm order, whereas the generic
2840 RTL is defined in terms of a model where the element ordering
2841 in memory is array order. Convert the lane number to conform
2842 to this model. */
2843 unsigned int elt = INTVAL (operands[2]);
2844 unsigned int reg_nelts
2845 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2846 elt ^= reg_nelts - 1;
2847 operands[2] = GEN_INT (elt);
2848 }
2849
2850 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2851 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2852 else
2853 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2854 operands[1],
2855 operands[2]));
2856 DONE;
2857 })
2858
2859 (define_expand "neon_vget_laneu<mode>"
2860 [(match_operand:<V_ext> 0 "s_register_operand" "")
2861 (match_operand:VDQIW 1 "s_register_operand" "")
2862 (match_operand:SI 2 "immediate_operand" "")]
2863 "TARGET_NEON"
2864 {
2865 if (BYTES_BIG_ENDIAN)
2866 {
2867 /* The intrinsics are defined in terms of a model where the
2868 element ordering in memory is vldm order, whereas the generic
2869 RTL is defined in terms of a model where the element ordering
2870 in memory is array order. Convert the lane number to conform
2871 to this model. */
2872 unsigned int elt = INTVAL (operands[2]);
2873 unsigned int reg_nelts
2874 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2875 elt ^= reg_nelts - 1;
2876 operands[2] = GEN_INT (elt);
2877 }
2878
2879 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2880 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2881 else
2882 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2883 operands[1],
2884 operands[2]));
2885 DONE;
2886 })
2887
2888 (define_expand "neon_vget_lanedi"
2889 [(match_operand:DI 0 "s_register_operand" "=r")
2890 (match_operand:DI 1 "s_register_operand" "w")
2891 (match_operand:SI 2 "immediate_operand" "")]
2892 "TARGET_NEON"
2893 {
2894 emit_move_insn (operands[0], operands[1]);
2895 DONE;
2896 })
2897
2898 (define_expand "neon_vget_lanev2di"
2899 [(match_operand:DI 0 "s_register_operand" "")
2900 (match_operand:V2DI 1 "s_register_operand" "")
2901 (match_operand:SI 2 "immediate_operand" "")]
2902 "TARGET_NEON"
2903 {
2904 int lane;
2905
2906 if (BYTES_BIG_ENDIAN)
2907 {
2908 /* The intrinsics are defined in terms of a model where the
2909 element ordering in memory is vldm order, whereas the generic
2910 RTL is defined in terms of a model where the element ordering
2911 in memory is array order. Convert the lane number to conform
2912 to this model. */
2913 unsigned int elt = INTVAL (operands[2]);
2914 unsigned int reg_nelts = 2;
2915 elt ^= reg_nelts - 1;
2916 operands[2] = GEN_INT (elt);
2917 }
2918
2919 lane = INTVAL (operands[2]);
2920 gcc_assert ((lane ==0) || (lane == 1));
2921 emit_move_insn (operands[0], lane == 0
2922 ? gen_lowpart (DImode, operands[1])
2923 : gen_highpart (DImode, operands[1]));
2924 DONE;
2925 })
2926
2927 (define_expand "neon_vset_lane<mode>"
2928 [(match_operand:VDQ 0 "s_register_operand" "=w")
2929 (match_operand:<V_elem> 1 "s_register_operand" "r")
2930 (match_operand:VDQ 2 "s_register_operand" "0")
2931 (match_operand:SI 3 "immediate_operand" "i")]
2932 "TARGET_NEON"
2933 {
2934 unsigned int elt = INTVAL (operands[3]);
2935
2936 if (BYTES_BIG_ENDIAN)
2937 {
2938 unsigned int reg_nelts
2939 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2940 elt ^= reg_nelts - 1;
2941 }
2942
2943 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2944 GEN_INT (1 << elt), operands[2]));
2945 DONE;
2946 })
2947
2948 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2949
2950 (define_expand "neon_vset_lanedi"
2951 [(match_operand:DI 0 "s_register_operand" "=w")
2952 (match_operand:DI 1 "s_register_operand" "r")
2953 (match_operand:DI 2 "s_register_operand" "0")
2954 (match_operand:SI 3 "immediate_operand" "i")]
2955 "TARGET_NEON"
2956 {
2957 emit_move_insn (operands[0], operands[1]);
2958 DONE;
2959 })
2960
2961 (define_expand "neon_vcreate<mode>"
2962 [(match_operand:VD_RE 0 "s_register_operand" "")
2963 (match_operand:DI 1 "general_operand" "")]
2964 "TARGET_NEON"
2965 {
2966 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2967 emit_move_insn (operands[0], src);
2968 DONE;
2969 })
2970
2971 (define_insn "neon_vdup_n<mode>"
2972 [(set (match_operand:VX 0 "s_register_operand" "=w")
2973 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2974 "TARGET_NEON"
2975 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2976 [(set_attr "type" "neon_from_gp<q>")]
2977 )
2978
2979 (define_insn "neon_vdup_nv4hf"
2980 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2981 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
2982 "TARGET_NEON"
2983 "vdup.16\t%P0, %1"
2984 [(set_attr "type" "neon_from_gp")]
2985 )
2986
2987 (define_insn "neon_vdup_nv8hf"
2988 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
2989 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
2990 "TARGET_NEON"
2991 "vdup.16\t%q0, %1"
2992 [(set_attr "type" "neon_from_gp_q")]
2993 )
2994
2995 (define_insn "neon_vdup_n<mode>"
2996 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2997 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2998 "TARGET_NEON"
2999 "@
3000 vdup.<V_sz_elem>\t%<V_reg>0, %1
3001 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3002 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3003 )
3004
3005 (define_expand "neon_vdup_ndi"
3006 [(match_operand:DI 0 "s_register_operand" "=w")
3007 (match_operand:DI 1 "s_register_operand" "r")]
3008 "TARGET_NEON"
3009 {
3010 emit_move_insn (operands[0], operands[1]);
3011 DONE;
3012 }
3013 )
3014
3015 (define_insn "neon_vdup_nv2di"
3016 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3017 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3018 "TARGET_NEON"
3019 "@
3020 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3021 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3022 [(set_attr "length" "8")
3023 (set_attr "type" "multiple")]
3024 )
3025
3026 (define_insn "neon_vdup_lane<mode>_internal"
3027 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3028 (vec_duplicate:VDQW
3029 (vec_select:<V_elem>
3030 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3031 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3032 "TARGET_NEON"
3033 {
3034 if (BYTES_BIG_ENDIAN)
3035 {
3036 int elt = INTVAL (operands[2]);
3037 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3038 operands[2] = GEN_INT (elt);
3039 }
3040 if (<Is_d_reg>)
3041 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3042 else
3043 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3044 }
3045 [(set_attr "type" "neon_dup<q>")]
3046 )
3047
3048 (define_insn "neon_vdup_lane<mode>_internal"
3049 [(set (match_operand:VH 0 "s_register_operand" "=w")
3050 (vec_duplicate:VH
3051 (vec_select:<V_elem>
3052 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3053 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3054 "TARGET_NEON && TARGET_FP16"
3055 {
3056 if (BYTES_BIG_ENDIAN)
3057 {
3058 int elt = INTVAL (operands[2]);
3059 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3060 operands[2] = GEN_INT (elt);
3061 }
3062 if (<Is_d_reg>)
3063 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3064 else
3065 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3066 }
3067 [(set_attr "type" "neon_dup<q>")]
3068 )
3069
3070 (define_expand "neon_vdup_lane<mode>"
3071 [(match_operand:VDQW 0 "s_register_operand" "=w")
3072 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3073 (match_operand:SI 2 "immediate_operand" "i")]
3074 "TARGET_NEON"
3075 {
3076 if (BYTES_BIG_ENDIAN)
3077 {
3078 unsigned int elt = INTVAL (operands[2]);
3079 unsigned int reg_nelts
3080 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3081 elt ^= reg_nelts - 1;
3082 operands[2] = GEN_INT (elt);
3083 }
3084 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3085 operands[2]));
3086 DONE;
3087 })
3088
3089 (define_expand "neon_vdup_lane<mode>"
3090 [(match_operand:VH 0 "s_register_operand")
3091 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3092 (match_operand:SI 2 "immediate_operand")]
3093 "TARGET_NEON && TARGET_FP16"
3094 {
3095 if (BYTES_BIG_ENDIAN)
3096 {
3097 unsigned int elt = INTVAL (operands[2]);
3098 unsigned int reg_nelts
3099 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3100 elt ^= reg_nelts - 1;
3101 operands[2] = GEN_INT (elt);
3102 }
3103 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3104 operands[2]));
3105 DONE;
3106 })
3107
3108 ; Scalar index is ignored, since only zero is valid here.
3109 (define_expand "neon_vdup_lanedi"
3110 [(match_operand:DI 0 "s_register_operand" "=w")
3111 (match_operand:DI 1 "s_register_operand" "w")
3112 (match_operand:SI 2 "immediate_operand" "i")]
3113 "TARGET_NEON"
3114 {
3115 emit_move_insn (operands[0], operands[1]);
3116 DONE;
3117 })
3118
3119 ; Likewise for v2di, as the DImode second operand has only a single element.
3120 (define_expand "neon_vdup_lanev2di"
3121 [(match_operand:V2DI 0 "s_register_operand" "=w")
3122 (match_operand:DI 1 "s_register_operand" "w")
3123 (match_operand:SI 2 "immediate_operand" "i")]
3124 "TARGET_NEON"
3125 {
3126 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3127 DONE;
3128 })
3129
3130 ; Disabled before reload because we don't want combine doing something silly,
3131 ; but used by the post-reload expansion of neon_vcombine.
3132 (define_insn "*neon_vswp<mode>"
3133 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3134 (match_operand:VDQX 1 "s_register_operand" "+w"))
3135 (set (match_dup 1) (match_dup 0))]
3136 "TARGET_NEON && reload_completed"
3137 "vswp\t%<V_reg>0, %<V_reg>1"
3138 [(set_attr "type" "neon_permute<q>")]
3139 )
3140
3141 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3142 ;; dest vector.
3143 ;; FIXME: A different implementation of this builtin could make it much
3144 ;; more likely that we wouldn't actually need to output anything (we could make
3145 ;; it so that the reg allocator puts things in the right places magically
3146 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3147
3148 (define_insn_and_split "neon_vcombine<mode>"
3149 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3150 (vec_concat:<V_DOUBLE>
3151 (match_operand:VDX 1 "s_register_operand" "w")
3152 (match_operand:VDX 2 "s_register_operand" "w")))]
3153 "TARGET_NEON"
3154 "#"
3155 "&& reload_completed"
3156 [(const_int 0)]
3157 {
3158 neon_split_vcombine (operands);
3159 DONE;
3160 }
3161 [(set_attr "type" "multiple")]
3162 )
3163
3164 (define_expand "neon_vget_high<mode>"
3165 [(match_operand:<V_HALF> 0 "s_register_operand")
3166 (match_operand:VQX 1 "s_register_operand")]
3167 "TARGET_NEON"
3168 {
3169 emit_move_insn (operands[0],
3170 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3171 GET_MODE_SIZE (<V_HALF>mode)));
3172 DONE;
3173 })
3174
3175 (define_expand "neon_vget_low<mode>"
3176 [(match_operand:<V_HALF> 0 "s_register_operand")
3177 (match_operand:VQX 1 "s_register_operand")]
3178 "TARGET_NEON"
3179 {
3180 emit_move_insn (operands[0],
3181 simplify_gen_subreg (<V_HALF>mode, operands[1],
3182 <MODE>mode, 0));
3183 DONE;
3184 })
3185
3186 (define_insn "float<mode><V_cvtto>2"
3187 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3188 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3189 "TARGET_NEON && !flag_rounding_math"
3190 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3191 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3192 )
3193
3194 (define_insn "floatuns<mode><V_cvtto>2"
3195 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3196 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3197 "TARGET_NEON && !flag_rounding_math"
3198 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3199 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3200 )
3201
3202 (define_insn "fix_trunc<mode><V_cvtto>2"
3203 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3204 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3205 "TARGET_NEON"
3206 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3207 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3208 )
3209
3210 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3211 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3212 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3213 "TARGET_NEON"
3214 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3215 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3216 )
3217
3218 (define_insn "neon_vcvt<sup><mode>"
3219 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3220 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3221 VCVT_US))]
3222 "TARGET_NEON"
3223 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3224 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3225 )
3226
3227 (define_insn "neon_vcvt<sup><mode>"
3228 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3229 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3230 VCVT_US))]
3231 "TARGET_NEON"
3232 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3233 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3234 )
3235
3236 (define_insn "neon_vcvtv4sfv4hf"
3237 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3238 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3239 UNSPEC_VCVT))]
3240 "TARGET_NEON && TARGET_FP16"
3241 "vcvt.f32.f16\t%q0, %P1"
3242 [(set_attr "type" "neon_fp_cvt_widen_h")]
3243 )
3244
3245 (define_insn "neon_vcvtv4hfv4sf"
3246 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3247 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3248 UNSPEC_VCVT))]
3249 "TARGET_NEON && TARGET_FP16"
3250 "vcvt.f16.f32\t%P0, %q1"
3251 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3252 )
3253
3254 (define_insn "neon_vcvt<sup>_n<mode>"
3255 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3256 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3257 (match_operand:SI 2 "immediate_operand" "i")]
3258 VCVT_US_N))]
3259 "TARGET_NEON"
3260 {
3261 neon_const_bounds (operands[2], 1, 33);
3262 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3263 }
3264 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3265 )
3266
3267 (define_insn "neon_vcvt<sup>_n<mode>"
3268 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3269 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3270 (match_operand:SI 2 "immediate_operand" "i")]
3271 VCVT_US_N))]
3272 "TARGET_NEON"
3273 {
3274 neon_const_bounds (operands[2], 1, 33);
3275 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3276 }
3277 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3278 )
3279
3280 (define_insn "neon_vmovn<mode>"
3281 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3282 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3283 UNSPEC_VMOVN))]
3284 "TARGET_NEON"
3285 "vmovn.<V_if_elem>\t%P0, %q1"
3286 [(set_attr "type" "neon_shift_imm_narrow_q")]
3287 )
3288
3289 (define_insn "neon_vqmovn<sup><mode>"
3290 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3291 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3292 VQMOVN))]
3293 "TARGET_NEON"
3294 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3295 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3296 )
3297
3298 (define_insn "neon_vqmovun<mode>"
3299 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3300 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3301 UNSPEC_VQMOVUN))]
3302 "TARGET_NEON"
3303 "vqmovun.<V_s_elem>\t%P0, %q1"
3304 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3305 )
3306
3307 (define_insn "neon_vmovl<sup><mode>"
3308 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3309 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3310 VMOVL))]
3311 "TARGET_NEON"
3312 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3313 [(set_attr "type" "neon_shift_imm_long")]
3314 )
3315
3316 (define_insn "neon_vmul_lane<mode>"
3317 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3318 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3319 (match_operand:VMD 2 "s_register_operand"
3320 "<scalar_mul_constraint>")
3321 (match_operand:SI 3 "immediate_operand" "i")]
3322 UNSPEC_VMUL_LANE))]
3323 "TARGET_NEON"
3324 {
3325 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3326 }
3327 [(set (attr "type")
3328 (if_then_else (match_test "<Is_float_mode>")
3329 (const_string "neon_fp_mul_s_scalar<q>")
3330 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3331 )
3332
3333 (define_insn "neon_vmul_lane<mode>"
3334 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3335 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3336 (match_operand:<V_HALF> 2 "s_register_operand"
3337 "<scalar_mul_constraint>")
3338 (match_operand:SI 3 "immediate_operand" "i")]
3339 UNSPEC_VMUL_LANE))]
3340 "TARGET_NEON"
3341 {
3342 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3343 }
3344 [(set (attr "type")
3345 (if_then_else (match_test "<Is_float_mode>")
3346 (const_string "neon_fp_mul_s_scalar<q>")
3347 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3348 )
3349
3350 (define_insn "neon_vmull<sup>_lane<mode>"
3351 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3352 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3353 (match_operand:VMDI 2 "s_register_operand"
3354 "<scalar_mul_constraint>")
3355 (match_operand:SI 3 "immediate_operand" "i")]
3356 VMULL_LANE))]
3357 "TARGET_NEON"
3358 {
3359 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3360 }
3361 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3362 )
3363
3364 (define_insn "neon_vqdmull_lane<mode>"
3365 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3366 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3367 (match_operand:VMDI 2 "s_register_operand"
3368 "<scalar_mul_constraint>")
3369 (match_operand:SI 3 "immediate_operand" "i")]
3370 UNSPEC_VQDMULL_LANE))]
3371 "TARGET_NEON"
3372 {
3373 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3374 }
3375 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3376 )
3377
3378 (define_insn "neon_vq<r>dmulh_lane<mode>"
3379 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3380 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3381 (match_operand:<V_HALF> 2 "s_register_operand"
3382 "<scalar_mul_constraint>")
3383 (match_operand:SI 3 "immediate_operand" "i")]
3384 VQDMULH_LANE))]
3385 "TARGET_NEON"
3386 {
3387 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3388 }
3389 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3390 )
3391
3392 (define_insn "neon_vq<r>dmulh_lane<mode>"
3393 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3394 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3395 (match_operand:VMDI 2 "s_register_operand"
3396 "<scalar_mul_constraint>")
3397 (match_operand:SI 3 "immediate_operand" "i")]
3398 VQDMULH_LANE))]
3399 "TARGET_NEON"
3400 {
3401 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3402 }
3403 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3404 )
3405
3406 ;; vqrdmlah_lane, vqrdmlsh_lane
3407 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3408 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3409 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3410 (match_operand:VMQI 2 "s_register_operand" "w")
3411 (match_operand:<V_HALF> 3 "s_register_operand"
3412 "<scalar_mul_constraint>")
3413 (match_operand:SI 4 "immediate_operand" "i")]
3414 VQRDMLH_AS))]
3415 "TARGET_NEON_RDMA"
3416 {
3417 return
3418 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3419 }
3420 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3421 )
3422
3423 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3424 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3425 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3426 (match_operand:VMDI 2 "s_register_operand" "w")
3427 (match_operand:VMDI 3 "s_register_operand"
3428 "<scalar_mul_constraint>")
3429 (match_operand:SI 4 "immediate_operand" "i")]
3430 VQRDMLH_AS))]
3431 "TARGET_NEON_RDMA"
3432 {
3433 return
3434 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3435 }
3436 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3437 )
3438
3439 (define_insn "neon_vmla_lane<mode>"
3440 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3441 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3442 (match_operand:VMD 2 "s_register_operand" "w")
3443 (match_operand:VMD 3 "s_register_operand"
3444 "<scalar_mul_constraint>")
3445 (match_operand:SI 4 "immediate_operand" "i")]
3446 UNSPEC_VMLA_LANE))]
3447 "TARGET_NEON"
3448 {
3449 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3450 }
3451 [(set (attr "type")
3452 (if_then_else (match_test "<Is_float_mode>")
3453 (const_string "neon_fp_mla_s_scalar<q>")
3454 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3455 )
3456
3457 (define_insn "neon_vmla_lane<mode>"
3458 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3459 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3460 (match_operand:VMQ 2 "s_register_operand" "w")
3461 (match_operand:<V_HALF> 3 "s_register_operand"
3462 "<scalar_mul_constraint>")
3463 (match_operand:SI 4 "immediate_operand" "i")]
3464 UNSPEC_VMLA_LANE))]
3465 "TARGET_NEON"
3466 {
3467 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3468 }
3469 [(set (attr "type")
3470 (if_then_else (match_test "<Is_float_mode>")
3471 (const_string "neon_fp_mla_s_scalar<q>")
3472 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3473 )
3474
3475 (define_insn "neon_vmlal<sup>_lane<mode>"
3476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3477 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3478 (match_operand:VMDI 2 "s_register_operand" "w")
3479 (match_operand:VMDI 3 "s_register_operand"
3480 "<scalar_mul_constraint>")
3481 (match_operand:SI 4 "immediate_operand" "i")]
3482 VMLAL_LANE))]
3483 "TARGET_NEON"
3484 {
3485 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3486 }
3487 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3488 )
3489
3490 (define_insn "neon_vqdmlal_lane<mode>"
3491 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3492 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3493 (match_operand:VMDI 2 "s_register_operand" "w")
3494 (match_operand:VMDI 3 "s_register_operand"
3495 "<scalar_mul_constraint>")
3496 (match_operand:SI 4 "immediate_operand" "i")]
3497 UNSPEC_VQDMLAL_LANE))]
3498 "TARGET_NEON"
3499 {
3500 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3501 }
3502 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3503 )
3504
3505 (define_insn "neon_vmls_lane<mode>"
3506 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3507 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3508 (match_operand:VMD 2 "s_register_operand" "w")
3509 (match_operand:VMD 3 "s_register_operand"
3510 "<scalar_mul_constraint>")
3511 (match_operand:SI 4 "immediate_operand" "i")]
3512 UNSPEC_VMLS_LANE))]
3513 "TARGET_NEON"
3514 {
3515 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3516 }
3517 [(set (attr "type")
3518 (if_then_else (match_test "<Is_float_mode>")
3519 (const_string "neon_fp_mla_s_scalar<q>")
3520 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3521 )
3522
3523 (define_insn "neon_vmls_lane<mode>"
3524 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3525 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3526 (match_operand:VMQ 2 "s_register_operand" "w")
3527 (match_operand:<V_HALF> 3 "s_register_operand"
3528 "<scalar_mul_constraint>")
3529 (match_operand:SI 4 "immediate_operand" "i")]
3530 UNSPEC_VMLS_LANE))]
3531 "TARGET_NEON"
3532 {
3533 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3534 }
3535 [(set (attr "type")
3536 (if_then_else (match_test "<Is_float_mode>")
3537 (const_string "neon_fp_mla_s_scalar<q>")
3538 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3539 )
3540
3541 (define_insn "neon_vmlsl<sup>_lane<mode>"
3542 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3543 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3544 (match_operand:VMDI 2 "s_register_operand" "w")
3545 (match_operand:VMDI 3 "s_register_operand"
3546 "<scalar_mul_constraint>")
3547 (match_operand:SI 4 "immediate_operand" "i")]
3548 VMLSL_LANE))]
3549 "TARGET_NEON"
3550 {
3551 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3552 }
3553 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3554 )
3555
3556 (define_insn "neon_vqdmlsl_lane<mode>"
3557 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3558 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3559 (match_operand:VMDI 2 "s_register_operand" "w")
3560 (match_operand:VMDI 3 "s_register_operand"
3561 "<scalar_mul_constraint>")
3562 (match_operand:SI 4 "immediate_operand" "i")]
3563 UNSPEC_VQDMLSL_LANE))]
3564 "TARGET_NEON"
3565 {
3566 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3567 }
3568 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3569 )
3570
3571 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3572 ; core register into a temp register, then use a scalar taken from that. This
3573 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3574 ; or extracted from another vector. The latter case it's currently better to
3575 ; use the "_lane" variant, and the former case can probably be implemented
3576 ; using vld1_lane, but that hasn't been done yet.
3577
3578 (define_expand "neon_vmul_n<mode>"
3579 [(match_operand:VMD 0 "s_register_operand" "")
3580 (match_operand:VMD 1 "s_register_operand" "")
3581 (match_operand:<V_elem> 2 "s_register_operand" "")]
3582 "TARGET_NEON"
3583 {
3584 rtx tmp = gen_reg_rtx (<MODE>mode);
3585 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3586 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3587 const0_rtx));
3588 DONE;
3589 })
3590
3591 (define_expand "neon_vmul_n<mode>"
3592 [(match_operand:VMQ 0 "s_register_operand" "")
3593 (match_operand:VMQ 1 "s_register_operand" "")
3594 (match_operand:<V_elem> 2 "s_register_operand" "")]
3595 "TARGET_NEON"
3596 {
3597 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3598 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3599 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3600 const0_rtx));
3601 DONE;
3602 })
3603
3604 (define_expand "neon_vmulls_n<mode>"
3605 [(match_operand:<V_widen> 0 "s_register_operand" "")
3606 (match_operand:VMDI 1 "s_register_operand" "")
3607 (match_operand:<V_elem> 2 "s_register_operand" "")]
3608 "TARGET_NEON"
3609 {
3610 rtx tmp = gen_reg_rtx (<MODE>mode);
3611 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3612 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3613 const0_rtx));
3614 DONE;
3615 })
3616
3617 (define_expand "neon_vmullu_n<mode>"
3618 [(match_operand:<V_widen> 0 "s_register_operand" "")
3619 (match_operand:VMDI 1 "s_register_operand" "")
3620 (match_operand:<V_elem> 2 "s_register_operand" "")]
3621 "TARGET_NEON"
3622 {
3623 rtx tmp = gen_reg_rtx (<MODE>mode);
3624 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3625 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3626 const0_rtx));
3627 DONE;
3628 })
3629
3630 (define_expand "neon_vqdmull_n<mode>"
3631 [(match_operand:<V_widen> 0 "s_register_operand" "")
3632 (match_operand:VMDI 1 "s_register_operand" "")
3633 (match_operand:<V_elem> 2 "s_register_operand" "")]
3634 "TARGET_NEON"
3635 {
3636 rtx tmp = gen_reg_rtx (<MODE>mode);
3637 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3638 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3639 const0_rtx));
3640 DONE;
3641 })
3642
3643 (define_expand "neon_vqdmulh_n<mode>"
3644 [(match_operand:VMDI 0 "s_register_operand" "")
3645 (match_operand:VMDI 1 "s_register_operand" "")
3646 (match_operand:<V_elem> 2 "s_register_operand" "")]
3647 "TARGET_NEON"
3648 {
3649 rtx tmp = gen_reg_rtx (<MODE>mode);
3650 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3651 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3652 const0_rtx));
3653 DONE;
3654 })
3655
3656 (define_expand "neon_vqrdmulh_n<mode>"
3657 [(match_operand:VMDI 0 "s_register_operand" "")
3658 (match_operand:VMDI 1 "s_register_operand" "")
3659 (match_operand:<V_elem> 2 "s_register_operand" "")]
3660 "TARGET_NEON"
3661 {
3662 rtx tmp = gen_reg_rtx (<MODE>mode);
3663 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3664 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3665 const0_rtx));
3666 DONE;
3667 })
3668
3669 (define_expand "neon_vqdmulh_n<mode>"
3670 [(match_operand:VMQI 0 "s_register_operand" "")
3671 (match_operand:VMQI 1 "s_register_operand" "")
3672 (match_operand:<V_elem> 2 "s_register_operand" "")]
3673 "TARGET_NEON"
3674 {
3675 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3676 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3677 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3678 const0_rtx));
3679 DONE;
3680 })
3681
3682 (define_expand "neon_vqrdmulh_n<mode>"
3683 [(match_operand:VMQI 0 "s_register_operand" "")
3684 (match_operand:VMQI 1 "s_register_operand" "")
3685 (match_operand:<V_elem> 2 "s_register_operand" "")]
3686 "TARGET_NEON"
3687 {
3688 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3689 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3690 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3691 const0_rtx));
3692 DONE;
3693 })
3694
3695 (define_expand "neon_vmla_n<mode>"
3696 [(match_operand:VMD 0 "s_register_operand" "")
3697 (match_operand:VMD 1 "s_register_operand" "")
3698 (match_operand:VMD 2 "s_register_operand" "")
3699 (match_operand:<V_elem> 3 "s_register_operand" "")]
3700 "TARGET_NEON"
3701 {
3702 rtx tmp = gen_reg_rtx (<MODE>mode);
3703 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3704 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3705 tmp, const0_rtx));
3706 DONE;
3707 })
3708
3709 (define_expand "neon_vmla_n<mode>"
3710 [(match_operand:VMQ 0 "s_register_operand" "")
3711 (match_operand:VMQ 1 "s_register_operand" "")
3712 (match_operand:VMQ 2 "s_register_operand" "")
3713 (match_operand:<V_elem> 3 "s_register_operand" "")]
3714 "TARGET_NEON"
3715 {
3716 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3717 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3718 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3719 tmp, const0_rtx));
3720 DONE;
3721 })
3722
3723 (define_expand "neon_vmlals_n<mode>"
3724 [(match_operand:<V_widen> 0 "s_register_operand" "")
3725 (match_operand:<V_widen> 1 "s_register_operand" "")
3726 (match_operand:VMDI 2 "s_register_operand" "")
3727 (match_operand:<V_elem> 3 "s_register_operand" "")]
3728 "TARGET_NEON"
3729 {
3730 rtx tmp = gen_reg_rtx (<MODE>mode);
3731 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3732 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3733 tmp, const0_rtx));
3734 DONE;
3735 })
3736
3737 (define_expand "neon_vmlalu_n<mode>"
3738 [(match_operand:<V_widen> 0 "s_register_operand" "")
3739 (match_operand:<V_widen> 1 "s_register_operand" "")
3740 (match_operand:VMDI 2 "s_register_operand" "")
3741 (match_operand:<V_elem> 3 "s_register_operand" "")]
3742 "TARGET_NEON"
3743 {
3744 rtx tmp = gen_reg_rtx (<MODE>mode);
3745 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3746 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3747 tmp, const0_rtx));
3748 DONE;
3749 })
3750
3751 (define_expand "neon_vqdmlal_n<mode>"
3752 [(match_operand:<V_widen> 0 "s_register_operand" "")
3753 (match_operand:<V_widen> 1 "s_register_operand" "")
3754 (match_operand:VMDI 2 "s_register_operand" "")
3755 (match_operand:<V_elem> 3 "s_register_operand" "")]
3756 "TARGET_NEON"
3757 {
3758 rtx tmp = gen_reg_rtx (<MODE>mode);
3759 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3760 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3761 tmp, const0_rtx));
3762 DONE;
3763 })
3764
3765 (define_expand "neon_vmls_n<mode>"
3766 [(match_operand:VMD 0 "s_register_operand" "")
3767 (match_operand:VMD 1 "s_register_operand" "")
3768 (match_operand:VMD 2 "s_register_operand" "")
3769 (match_operand:<V_elem> 3 "s_register_operand" "")]
3770 "TARGET_NEON"
3771 {
3772 rtx tmp = gen_reg_rtx (<MODE>mode);
3773 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3774 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3775 tmp, const0_rtx));
3776 DONE;
3777 })
3778
3779 (define_expand "neon_vmls_n<mode>"
3780 [(match_operand:VMQ 0 "s_register_operand" "")
3781 (match_operand:VMQ 1 "s_register_operand" "")
3782 (match_operand:VMQ 2 "s_register_operand" "")
3783 (match_operand:<V_elem> 3 "s_register_operand" "")]
3784 "TARGET_NEON"
3785 {
3786 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3787 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3788 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3789 tmp, const0_rtx));
3790 DONE;
3791 })
3792
3793 (define_expand "neon_vmlsls_n<mode>"
3794 [(match_operand:<V_widen> 0 "s_register_operand" "")
3795 (match_operand:<V_widen> 1 "s_register_operand" "")
3796 (match_operand:VMDI 2 "s_register_operand" "")
3797 (match_operand:<V_elem> 3 "s_register_operand" "")]
3798 "TARGET_NEON"
3799 {
3800 rtx tmp = gen_reg_rtx (<MODE>mode);
3801 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3802 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3803 tmp, const0_rtx));
3804 DONE;
3805 })
3806
3807 (define_expand "neon_vmlslu_n<mode>"
3808 [(match_operand:<V_widen> 0 "s_register_operand" "")
3809 (match_operand:<V_widen> 1 "s_register_operand" "")
3810 (match_operand:VMDI 2 "s_register_operand" "")
3811 (match_operand:<V_elem> 3 "s_register_operand" "")]
3812 "TARGET_NEON"
3813 {
3814 rtx tmp = gen_reg_rtx (<MODE>mode);
3815 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3816 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3817 tmp, const0_rtx));
3818 DONE;
3819 })
3820
3821 (define_expand "neon_vqdmlsl_n<mode>"
3822 [(match_operand:<V_widen> 0 "s_register_operand" "")
3823 (match_operand:<V_widen> 1 "s_register_operand" "")
3824 (match_operand:VMDI 2 "s_register_operand" "")
3825 (match_operand:<V_elem> 3 "s_register_operand" "")]
3826 "TARGET_NEON"
3827 {
3828 rtx tmp = gen_reg_rtx (<MODE>mode);
3829 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3830 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3831 tmp, const0_rtx));
3832 DONE;
3833 })
3834
3835 (define_insn "neon_vext<mode>"
3836 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3837 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3838 (match_operand:VDQX 2 "s_register_operand" "w")
3839 (match_operand:SI 3 "immediate_operand" "i")]
3840 UNSPEC_VEXT))]
3841 "TARGET_NEON"
3842 {
3843 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3844 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3845 }
3846 [(set_attr "type" "neon_ext<q>")]
3847 )
3848
3849 (define_insn "neon_vrev64<mode>"
3850 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3851 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3852 UNSPEC_VREV64))]
3853 "TARGET_NEON"
3854 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3855 [(set_attr "type" "neon_rev<q>")]
3856 )
3857
3858 (define_insn "neon_vrev32<mode>"
3859 [(set (match_operand:VX 0 "s_register_operand" "=w")
3860 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3861 UNSPEC_VREV32))]
3862 "TARGET_NEON"
3863 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3864 [(set_attr "type" "neon_rev<q>")]
3865 )
3866
3867 (define_insn "neon_vrev16<mode>"
3868 [(set (match_operand:VE 0 "s_register_operand" "=w")
3869 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3870 UNSPEC_VREV16))]
3871 "TARGET_NEON"
3872 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3873 [(set_attr "type" "neon_rev<q>")]
3874 )
3875
3876 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3877 ; allocation. For an intrinsic of form:
3878 ; rD = vbsl_* (rS, rN, rM)
3879 ; We can use any of:
3880 ; vbsl rS, rN, rM (if D = S)
3881 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3882 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3883
3884 (define_insn "neon_vbsl<mode>_internal"
3885 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3886 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3887 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3888 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3889 UNSPEC_VBSL))]
3890 "TARGET_NEON"
3891 "@
3892 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3893 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3894 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3895 [(set_attr "type" "neon_bsl<q>")]
3896 )
3897
3898 (define_expand "neon_vbsl<mode>"
3899 [(set (match_operand:VDQX 0 "s_register_operand" "")
3900 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3901 (match_operand:VDQX 2 "s_register_operand" "")
3902 (match_operand:VDQX 3 "s_register_operand" "")]
3903 UNSPEC_VBSL))]
3904 "TARGET_NEON"
3905 {
3906 /* We can't alias operands together if they have different modes. */
3907 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3908 })
3909
3910 ;; vshl, vrshl
3911 (define_insn "neon_v<shift_op><sup><mode>"
3912 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3913 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3914 (match_operand:VDQIX 2 "s_register_operand" "w")]
3915 VSHL))]
3916 "TARGET_NEON"
3917 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3918 [(set_attr "type" "neon_shift_imm<q>")]
3919 )
3920
3921 ;; vqshl, vqrshl
3922 (define_insn "neon_v<shift_op><sup><mode>"
3923 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3924 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3925 (match_operand:VDQIX 2 "s_register_operand" "w")]
3926 VQSHL))]
3927 "TARGET_NEON"
3928 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3929 [(set_attr "type" "neon_sat_shift_imm<q>")]
3930 )
3931
3932 ;; vshr_n, vrshr_n
3933 (define_insn "neon_v<shift_op><sup>_n<mode>"
3934 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3935 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3936 (match_operand:SI 2 "immediate_operand" "i")]
3937 VSHR_N))]
3938 "TARGET_NEON"
3939 {
3940 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3941 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3942 }
3943 [(set_attr "type" "neon_shift_imm<q>")]
3944 )
3945
3946 ;; vshrn_n, vrshrn_n
3947 (define_insn "neon_v<shift_op>_n<mode>"
3948 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3949 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3950 (match_operand:SI 2 "immediate_operand" "i")]
3951 VSHRN_N))]
3952 "TARGET_NEON"
3953 {
3954 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3955 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3956 }
3957 [(set_attr "type" "neon_shift_imm_narrow_q")]
3958 )
3959
3960 ;; vqshrn_n, vqrshrn_n
3961 (define_insn "neon_v<shift_op><sup>_n<mode>"
3962 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3963 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3964 (match_operand:SI 2 "immediate_operand" "i")]
3965 VQSHRN_N))]
3966 "TARGET_NEON"
3967 {
3968 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3969 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3970 }
3971 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3972 )
3973
3974 ;; vqshrun_n, vqrshrun_n
3975 (define_insn "neon_v<shift_op>_n<mode>"
3976 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3977 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3978 (match_operand:SI 2 "immediate_operand" "i")]
3979 VQSHRUN_N))]
3980 "TARGET_NEON"
3981 {
3982 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3983 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3984 }
3985 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3986 )
3987
3988 (define_insn "neon_vshl_n<mode>"
3989 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3990 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3991 (match_operand:SI 2 "immediate_operand" "i")]
3992 UNSPEC_VSHL_N))]
3993 "TARGET_NEON"
3994 {
3995 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3996 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3997 }
3998 [(set_attr "type" "neon_shift_imm<q>")]
3999 )
4000
4001 (define_insn "neon_vqshl_<sup>_n<mode>"
4002 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4003 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4004 (match_operand:SI 2 "immediate_operand" "i")]
4005 VQSHL_N))]
4006 "TARGET_NEON"
4007 {
4008 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4009 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4010 }
4011 [(set_attr "type" "neon_sat_shift_imm<q>")]
4012 )
4013
4014 (define_insn "neon_vqshlu_n<mode>"
4015 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4016 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4017 (match_operand:SI 2 "immediate_operand" "i")]
4018 UNSPEC_VQSHLU_N))]
4019 "TARGET_NEON"
4020 {
4021 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4022 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4023 }
4024 [(set_attr "type" "neon_sat_shift_imm<q>")]
4025 )
4026
4027 (define_insn "neon_vshll<sup>_n<mode>"
4028 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4029 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4030 (match_operand:SI 2 "immediate_operand" "i")]
4031 VSHLL_N))]
4032 "TARGET_NEON"
4033 {
4034 /* The boundaries are: 0 < imm <= size. */
4035 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4036 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4037 }
4038 [(set_attr "type" "neon_shift_imm_long")]
4039 )
4040
4041 ;; vsra_n, vrsra_n
4042 (define_insn "neon_v<shift_op><sup>_n<mode>"
4043 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4044 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4045 (match_operand:VDQIX 2 "s_register_operand" "w")
4046 (match_operand:SI 3 "immediate_operand" "i")]
4047 VSRA_N))]
4048 "TARGET_NEON"
4049 {
4050 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4051 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4052 }
4053 [(set_attr "type" "neon_shift_acc<q>")]
4054 )
4055
4056 (define_insn "neon_vsri_n<mode>"
4057 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4058 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4059 (match_operand:VDQIX 2 "s_register_operand" "w")
4060 (match_operand:SI 3 "immediate_operand" "i")]
4061 UNSPEC_VSRI))]
4062 "TARGET_NEON"
4063 {
4064 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4065 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4066 }
4067 [(set_attr "type" "neon_shift_reg<q>")]
4068 )
4069
4070 (define_insn "neon_vsli_n<mode>"
4071 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4072 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4073 (match_operand:VDQIX 2 "s_register_operand" "w")
4074 (match_operand:SI 3 "immediate_operand" "i")]
4075 UNSPEC_VSLI))]
4076 "TARGET_NEON"
4077 {
4078 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4079 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4080 }
4081 [(set_attr "type" "neon_shift_reg<q>")]
4082 )
4083
4084 (define_insn "neon_vtbl1v8qi"
4085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4086 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4087 (match_operand:V8QI 2 "s_register_operand" "w")]
4088 UNSPEC_VTBL))]
4089 "TARGET_NEON"
4090 "vtbl.8\t%P0, {%P1}, %P2"
4091 [(set_attr "type" "neon_tbl1")]
4092 )
4093
4094 (define_insn "neon_vtbl2v8qi"
4095 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4096 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4097 (match_operand:V8QI 2 "s_register_operand" "w")]
4098 UNSPEC_VTBL))]
4099 "TARGET_NEON"
4100 {
4101 rtx ops[4];
4102 int tabbase = REGNO (operands[1]);
4103
4104 ops[0] = operands[0];
4105 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4106 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4107 ops[3] = operands[2];
4108 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4109
4110 return "";
4111 }
4112 [(set_attr "type" "neon_tbl2")]
4113 )
4114
4115 (define_insn "neon_vtbl3v8qi"
4116 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4117 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4118 (match_operand:V8QI 2 "s_register_operand" "w")]
4119 UNSPEC_VTBL))]
4120 "TARGET_NEON"
4121 {
4122 rtx ops[5];
4123 int tabbase = REGNO (operands[1]);
4124
4125 ops[0] = operands[0];
4126 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4127 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4128 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4129 ops[4] = operands[2];
4130 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4131
4132 return "";
4133 }
4134 [(set_attr "type" "neon_tbl3")]
4135 )
4136
4137 (define_insn "neon_vtbl4v8qi"
4138 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4139 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4140 (match_operand:V8QI 2 "s_register_operand" "w")]
4141 UNSPEC_VTBL))]
4142 "TARGET_NEON"
4143 {
4144 rtx ops[6];
4145 int tabbase = REGNO (operands[1]);
4146
4147 ops[0] = operands[0];
4148 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4149 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4150 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4151 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4152 ops[5] = operands[2];
4153 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4154
4155 return "";
4156 }
4157 [(set_attr "type" "neon_tbl4")]
4158 )
4159
4160 ;; These three are used by the vec_perm infrastructure for V16QImode.
4161 (define_insn_and_split "neon_vtbl1v16qi"
4162 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4163 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4164 (match_operand:V16QI 2 "s_register_operand" "w")]
4165 UNSPEC_VTBL))]
4166 "TARGET_NEON"
4167 "#"
4168 "&& reload_completed"
4169 [(const_int 0)]
4170 {
4171 rtx op0, op1, op2, part0, part2;
4172 unsigned ofs;
4173
4174 op0 = operands[0];
4175 op1 = gen_lowpart (TImode, operands[1]);
4176 op2 = operands[2];
4177
4178 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4179 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4180 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4181 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4182
4183 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4184 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4185 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4186 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4187 DONE;
4188 }
4189 [(set_attr "type" "multiple")]
4190 )
4191
4192 (define_insn_and_split "neon_vtbl2v16qi"
4193 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4194 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4195 (match_operand:V16QI 2 "s_register_operand" "w")]
4196 UNSPEC_VTBL))]
4197 "TARGET_NEON"
4198 "#"
4199 "&& reload_completed"
4200 [(const_int 0)]
4201 {
4202 rtx op0, op1, op2, part0, part2;
4203 unsigned ofs;
4204
4205 op0 = operands[0];
4206 op1 = operands[1];
4207 op2 = operands[2];
4208
4209 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4210 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4211 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4212 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4213
4214 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4215 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4216 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4217 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4218 DONE;
4219 }
4220 [(set_attr "type" "multiple")]
4221 )
4222
4223 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4224 ;; handle quad-word input modes, producing octa-word output modes. But
4225 ;; that requires us to add support for octa-word vector modes in moves.
4226 ;; That seems overkill for this one use in vec_perm.
4227 (define_insn_and_split "neon_vcombinev16qi"
4228 [(set (match_operand:OI 0 "s_register_operand" "=w")
4229 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4230 (match_operand:V16QI 2 "s_register_operand" "w")]
4231 UNSPEC_VCONCAT))]
4232 "TARGET_NEON"
4233 "#"
4234 "&& reload_completed"
4235 [(const_int 0)]
4236 {
4237 neon_split_vcombine (operands);
4238 DONE;
4239 }
4240 [(set_attr "type" "multiple")]
4241 )
4242
4243 (define_insn "neon_vtbx1v8qi"
4244 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4245 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4246 (match_operand:V8QI 2 "s_register_operand" "w")
4247 (match_operand:V8QI 3 "s_register_operand" "w")]
4248 UNSPEC_VTBX))]
4249 "TARGET_NEON"
4250 "vtbx.8\t%P0, {%P2}, %P3"
4251 [(set_attr "type" "neon_tbl1")]
4252 )
4253
4254 (define_insn "neon_vtbx2v8qi"
4255 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4256 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4257 (match_operand:TI 2 "s_register_operand" "w")
4258 (match_operand:V8QI 3 "s_register_operand" "w")]
4259 UNSPEC_VTBX))]
4260 "TARGET_NEON"
4261 {
4262 rtx ops[4];
4263 int tabbase = REGNO (operands[2]);
4264
4265 ops[0] = operands[0];
4266 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4267 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4268 ops[3] = operands[3];
4269 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4270
4271 return "";
4272 }
4273 [(set_attr "type" "neon_tbl2")]
4274 )
4275
4276 (define_insn "neon_vtbx3v8qi"
4277 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4278 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4279 (match_operand:EI 2 "s_register_operand" "w")
4280 (match_operand:V8QI 3 "s_register_operand" "w")]
4281 UNSPEC_VTBX))]
4282 "TARGET_NEON"
4283 {
4284 rtx ops[5];
4285 int tabbase = REGNO (operands[2]);
4286
4287 ops[0] = operands[0];
4288 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4289 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4290 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4291 ops[4] = operands[3];
4292 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4293
4294 return "";
4295 }
4296 [(set_attr "type" "neon_tbl3")]
4297 )
4298
4299 (define_insn "neon_vtbx4v8qi"
4300 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4301 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4302 (match_operand:OI 2 "s_register_operand" "w")
4303 (match_operand:V8QI 3 "s_register_operand" "w")]
4304 UNSPEC_VTBX))]
4305 "TARGET_NEON"
4306 {
4307 rtx ops[6];
4308 int tabbase = REGNO (operands[2]);
4309
4310 ops[0] = operands[0];
4311 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4312 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4313 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4314 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4315 ops[5] = operands[3];
4316 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4317
4318 return "";
4319 }
4320 [(set_attr "type" "neon_tbl4")]
4321 )
4322
4323 (define_expand "neon_vtrn<mode>_internal"
4324 [(parallel
4325 [(set (match_operand:VDQWH 0 "s_register_operand")
4326 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4327 (match_operand:VDQWH 2 "s_register_operand")]
4328 UNSPEC_VTRN1))
4329 (set (match_operand:VDQWH 3 "s_register_operand")
4330 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4331 "TARGET_NEON"
4332 ""
4333 )
4334
4335 ;; Note: Different operand numbering to handle tied registers correctly.
4336 (define_insn "*neon_vtrn<mode>_insn"
4337 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4338 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4339 (match_operand:VDQWH 3 "s_register_operand" "2")]
4340 UNSPEC_VTRN1))
4341 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4342 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4343 UNSPEC_VTRN2))]
4344 "TARGET_NEON"
4345 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4346 [(set_attr "type" "neon_permute<q>")]
4347 )
4348
4349 (define_expand "neon_vzip<mode>_internal"
4350 [(parallel
4351 [(set (match_operand:VDQWH 0 "s_register_operand")
4352 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4353 (match_operand:VDQWH 2 "s_register_operand")]
4354 UNSPEC_VZIP1))
4355 (set (match_operand:VDQWH 3 "s_register_operand")
4356 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4357 "TARGET_NEON"
4358 ""
4359 )
4360
4361 ;; Note: Different operand numbering to handle tied registers correctly.
4362 (define_insn "*neon_vzip<mode>_insn"
4363 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4364 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4365 (match_operand:VDQWH 3 "s_register_operand" "2")]
4366 UNSPEC_VZIP1))
4367 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4368 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4369 UNSPEC_VZIP2))]
4370 "TARGET_NEON"
4371 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4372 [(set_attr "type" "neon_zip<q>")]
4373 )
4374
4375 (define_expand "neon_vuzp<mode>_internal"
4376 [(parallel
4377 [(set (match_operand:VDQWH 0 "s_register_operand")
4378 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4379 (match_operand:VDQWH 2 "s_register_operand")]
4380 UNSPEC_VUZP1))
4381 (set (match_operand:VDQWH 3 "s_register_operand" "")
4382 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4383 "TARGET_NEON"
4384 ""
4385 )
4386
4387 ;; Note: Different operand numbering to handle tied registers correctly.
4388 (define_insn "*neon_vuzp<mode>_insn"
4389 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4390 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4391 (match_operand:VDQWH 3 "s_register_operand" "2")]
4392 UNSPEC_VUZP1))
4393 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4394 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4395 UNSPEC_VUZP2))]
4396 "TARGET_NEON"
4397 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4398 [(set_attr "type" "neon_zip<q>")]
4399 )
4400
4401 (define_expand "vec_load_lanes<mode><mode>"
4402 [(set (match_operand:VDQX 0 "s_register_operand")
4403 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4404 UNSPEC_VLD1))]
4405 "TARGET_NEON")
4406
4407 (define_insn "neon_vld1<mode>"
4408 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4409 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4410 UNSPEC_VLD1))]
4411 "TARGET_NEON"
4412 "vld1.<V_sz_elem>\t%h0, %A1"
4413 [(set_attr "type" "neon_load1_1reg<q>")]
4414 )
4415
4416 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4417 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4418 ;; lane order here.
4419 (define_insn "neon_vld1_lane<mode>"
4420 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4421 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4422 (match_operand:VDX 2 "s_register_operand" "0")
4423 (match_operand:SI 3 "immediate_operand" "i")]
4424 UNSPEC_VLD1_LANE))]
4425 "TARGET_NEON"
4426 {
4427 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4428 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4429 operands[3] = GEN_INT (lane);
4430 if (max == 1)
4431 return "vld1.<V_sz_elem>\t%P0, %A1";
4432 else
4433 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4434 }
4435 [(set_attr "type" "neon_load1_one_lane<q>")]
4436 )
4437
4438 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4439 ;; here on big endian targets.
4440 (define_insn "neon_vld1_lane<mode>"
4441 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4442 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4443 (match_operand:VQX 2 "s_register_operand" "0")
4444 (match_operand:SI 3 "immediate_operand" "i")]
4445 UNSPEC_VLD1_LANE))]
4446 "TARGET_NEON"
4447 {
4448 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4449 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4450 operands[3] = GEN_INT (lane);
4451 int regno = REGNO (operands[0]);
4452 if (lane >= max / 2)
4453 {
4454 lane -= max / 2;
4455 regno += 2;
4456 operands[3] = GEN_INT (lane);
4457 }
4458 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4459 if (max == 2)
4460 return "vld1.<V_sz_elem>\t%P0, %A1";
4461 else
4462 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4463 }
4464 [(set_attr "type" "neon_load1_one_lane<q>")]
4465 )
4466
4467 (define_insn "neon_vld1_dup<mode>"
4468 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4469 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4470 "TARGET_NEON"
4471 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4472 [(set_attr "type" "neon_load1_all_lanes<q>")]
4473 )
4474
4475 ;; Special case for DImode. Treat it exactly like a simple load.
4476 (define_expand "neon_vld1_dupdi"
4477 [(set (match_operand:DI 0 "s_register_operand" "")
4478 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4479 UNSPEC_VLD1))]
4480 "TARGET_NEON"
4481 ""
4482 )
4483
4484 (define_insn "neon_vld1_dup<mode>"
4485 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4486 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4487 "TARGET_NEON"
4488 {
4489 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4490 }
4491 [(set_attr "type" "neon_load1_all_lanes<q>")]
4492 )
4493
4494 (define_insn_and_split "neon_vld1_dupv2di"
4495 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4496 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4497 "TARGET_NEON"
4498 "#"
4499 "&& reload_completed"
4500 [(const_int 0)]
4501 {
4502 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4503 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4504 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4505 DONE;
4506 }
4507 [(set_attr "length" "8")
4508 (set_attr "type" "neon_load1_all_lanes_q")]
4509 )
4510
4511 (define_expand "vec_store_lanes<mode><mode>"
4512 [(set (match_operand:VDQX 0 "neon_struct_operand")
4513 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4514 UNSPEC_VST1))]
4515 "TARGET_NEON")
4516
4517 (define_insn "neon_vst1<mode>"
4518 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4519 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4520 UNSPEC_VST1))]
4521 "TARGET_NEON"
4522 "vst1.<V_sz_elem>\t%h1, %A0"
4523 [(set_attr "type" "neon_store1_1reg<q>")])
4524
4525 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4526 ;; here on big endian targets.
4527 (define_insn "neon_vst1_lane<mode>"
4528 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4529 (unspec:<V_elem>
4530 [(match_operand:VDX 1 "s_register_operand" "w")
4531 (match_operand:SI 2 "immediate_operand" "i")]
4532 UNSPEC_VST1_LANE))]
4533 "TARGET_NEON"
4534 {
4535 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4536 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4537 operands[2] = GEN_INT (lane);
4538 if (max == 1)
4539 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4540 else
4541 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4542 }
4543 [(set_attr "type" "neon_store1_one_lane<q>")]
4544 )
4545
4546 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4547 ;; here on big endian targets.
4548 (define_insn "neon_vst1_lane<mode>"
4549 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4550 (unspec:<V_elem>
4551 [(match_operand:VQX 1 "s_register_operand" "w")
4552 (match_operand:SI 2 "immediate_operand" "i")]
4553 UNSPEC_VST1_LANE))]
4554 "TARGET_NEON"
4555 {
4556 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4557 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4558 int regno = REGNO (operands[1]);
4559 if (lane >= max / 2)
4560 {
4561 lane -= max / 2;
4562 regno += 2;
4563 }
4564 operands[2] = GEN_INT (lane);
4565 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4566 if (max == 2)
4567 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4568 else
4569 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4570 }
4571 [(set_attr "type" "neon_store1_one_lane<q>")]
4572 )
4573
4574 (define_expand "vec_load_lanesti<mode>"
4575 [(set (match_operand:TI 0 "s_register_operand")
4576 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4577 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4578 UNSPEC_VLD2))]
4579 "TARGET_NEON")
4580
4581 (define_insn "neon_vld2<mode>"
4582 [(set (match_operand:TI 0 "s_register_operand" "=w")
4583 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4584 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4585 UNSPEC_VLD2))]
4586 "TARGET_NEON"
4587 {
4588 if (<V_sz_elem> == 64)
4589 return "vld1.64\t%h0, %A1";
4590 else
4591 return "vld2.<V_sz_elem>\t%h0, %A1";
4592 }
4593 [(set (attr "type")
4594 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4595 (const_string "neon_load1_2reg<q>")
4596 (const_string "neon_load2_2reg<q>")))]
4597 )
4598
4599 (define_expand "vec_load_lanesoi<mode>"
4600 [(set (match_operand:OI 0 "s_register_operand")
4601 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4602 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4603 UNSPEC_VLD2))]
4604 "TARGET_NEON")
4605
4606 (define_insn "neon_vld2<mode>"
4607 [(set (match_operand:OI 0 "s_register_operand" "=w")
4608 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4609 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4610 UNSPEC_VLD2))]
4611 "TARGET_NEON"
4612 "vld2.<V_sz_elem>\t%h0, %A1"
4613 [(set_attr "type" "neon_load2_2reg_q")])
4614
4615 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4616 ;; here on big endian targets.
4617 (define_insn "neon_vld2_lane<mode>"
4618 [(set (match_operand:TI 0 "s_register_operand" "=w")
4619 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4620 (match_operand:TI 2 "s_register_operand" "0")
4621 (match_operand:SI 3 "immediate_operand" "i")
4622 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4623 UNSPEC_VLD2_LANE))]
4624 "TARGET_NEON"
4625 {
4626 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4627 int regno = REGNO (operands[0]);
4628 rtx ops[4];
4629 ops[0] = gen_rtx_REG (DImode, regno);
4630 ops[1] = gen_rtx_REG (DImode, regno + 2);
4631 ops[2] = operands[1];
4632 ops[3] = GEN_INT (lane);
4633 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4634 return "";
4635 }
4636 [(set_attr "type" "neon_load2_one_lane<q>")]
4637 )
4638
4639 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4640 ;; here on big endian targets.
4641 (define_insn "neon_vld2_lane<mode>"
4642 [(set (match_operand:OI 0 "s_register_operand" "=w")
4643 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4644 (match_operand:OI 2 "s_register_operand" "0")
4645 (match_operand:SI 3 "immediate_operand" "i")
4646 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4647 UNSPEC_VLD2_LANE))]
4648 "TARGET_NEON"
4649 {
4650 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4651 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4652 int regno = REGNO (operands[0]);
4653 rtx ops[4];
4654 if (lane >= max / 2)
4655 {
4656 lane -= max / 2;
4657 regno += 2;
4658 }
4659 ops[0] = gen_rtx_REG (DImode, regno);
4660 ops[1] = gen_rtx_REG (DImode, regno + 4);
4661 ops[2] = operands[1];
4662 ops[3] = GEN_INT (lane);
4663 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4664 return "";
4665 }
4666 [(set_attr "type" "neon_load2_one_lane<q>")]
4667 )
4668
4669 (define_insn "neon_vld2_dup<mode>"
4670 [(set (match_operand:TI 0 "s_register_operand" "=w")
4671 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4672 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4673 UNSPEC_VLD2_DUP))]
4674 "TARGET_NEON"
4675 {
4676 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4677 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4678 else
4679 return "vld1.<V_sz_elem>\t%h0, %A1";
4680 }
4681 [(set (attr "type")
4682 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4683 (const_string "neon_load2_all_lanes<q>")
4684 (const_string "neon_load1_1reg<q>")))]
4685 )
4686
4687 (define_expand "vec_store_lanesti<mode>"
4688 [(set (match_operand:TI 0 "neon_struct_operand")
4689 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4690 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4691 UNSPEC_VST2))]
4692 "TARGET_NEON")
4693
4694 (define_insn "neon_vst2<mode>"
4695 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4696 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4697 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4698 UNSPEC_VST2))]
4699 "TARGET_NEON"
4700 {
4701 if (<V_sz_elem> == 64)
4702 return "vst1.64\t%h1, %A0";
4703 else
4704 return "vst2.<V_sz_elem>\t%h1, %A0";
4705 }
4706 [(set (attr "type")
4707 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4708 (const_string "neon_store1_2reg<q>")
4709 (const_string "neon_store2_one_lane<q>")))]
4710 )
4711
4712 (define_expand "vec_store_lanesoi<mode>"
4713 [(set (match_operand:OI 0 "neon_struct_operand")
4714 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4715 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4716 UNSPEC_VST2))]
4717 "TARGET_NEON")
4718
4719 (define_insn "neon_vst2<mode>"
4720 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4721 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4722 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4723 UNSPEC_VST2))]
4724 "TARGET_NEON"
4725 "vst2.<V_sz_elem>\t%h1, %A0"
4726 [(set_attr "type" "neon_store2_4reg<q>")]
4727 )
4728
4729 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4730 ;; here on big endian targets.
4731 (define_insn "neon_vst2_lane<mode>"
4732 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4733 (unspec:<V_two_elem>
4734 [(match_operand:TI 1 "s_register_operand" "w")
4735 (match_operand:SI 2 "immediate_operand" "i")
4736 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4737 UNSPEC_VST2_LANE))]
4738 "TARGET_NEON"
4739 {
4740 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4741 int regno = REGNO (operands[1]);
4742 rtx ops[4];
4743 ops[0] = operands[0];
4744 ops[1] = gen_rtx_REG (DImode, regno);
4745 ops[2] = gen_rtx_REG (DImode, regno + 2);
4746 ops[3] = GEN_INT (lane);
4747 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4748 return "";
4749 }
4750 [(set_attr "type" "neon_store2_one_lane<q>")]
4751 )
4752
4753 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4754 ;; here on big endian targets.
4755 (define_insn "neon_vst2_lane<mode>"
4756 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4757 (unspec:<V_two_elem>
4758 [(match_operand:OI 1 "s_register_operand" "w")
4759 (match_operand:SI 2 "immediate_operand" "i")
4760 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4761 UNSPEC_VST2_LANE))]
4762 "TARGET_NEON"
4763 {
4764 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4765 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4766 int regno = REGNO (operands[1]);
4767 rtx ops[4];
4768 if (lane >= max / 2)
4769 {
4770 lane -= max / 2;
4771 regno += 2;
4772 }
4773 ops[0] = operands[0];
4774 ops[1] = gen_rtx_REG (DImode, regno);
4775 ops[2] = gen_rtx_REG (DImode, regno + 4);
4776 ops[3] = GEN_INT (lane);
4777 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4778 return "";
4779 }
4780 [(set_attr "type" "neon_store2_one_lane<q>")]
4781 )
4782
4783 (define_expand "vec_load_lanesei<mode>"
4784 [(set (match_operand:EI 0 "s_register_operand")
4785 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4786 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4787 UNSPEC_VLD3))]
4788 "TARGET_NEON")
4789
4790 (define_insn "neon_vld3<mode>"
4791 [(set (match_operand:EI 0 "s_register_operand" "=w")
4792 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4793 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4794 UNSPEC_VLD3))]
4795 "TARGET_NEON"
4796 {
4797 if (<V_sz_elem> == 64)
4798 return "vld1.64\t%h0, %A1";
4799 else
4800 return "vld3.<V_sz_elem>\t%h0, %A1";
4801 }
4802 [(set (attr "type")
4803 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4804 (const_string "neon_load1_3reg<q>")
4805 (const_string "neon_load3_3reg<q>")))]
4806 )
4807
4808 (define_expand "vec_load_lanesci<mode>"
4809 [(match_operand:CI 0 "s_register_operand")
4810 (match_operand:CI 1 "neon_struct_operand")
4811 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812 "TARGET_NEON"
4813 {
4814 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4815 DONE;
4816 })
4817
4818 (define_expand "neon_vld3<mode>"
4819 [(match_operand:CI 0 "s_register_operand")
4820 (match_operand:CI 1 "neon_struct_operand")
4821 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4822 "TARGET_NEON"
4823 {
4824 rtx mem;
4825
4826 mem = adjust_address (operands[1], EImode, 0);
4827 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4828 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4829 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4830 DONE;
4831 })
4832
4833 (define_insn "neon_vld3qa<mode>"
4834 [(set (match_operand:CI 0 "s_register_operand" "=w")
4835 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4836 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4837 UNSPEC_VLD3A))]
4838 "TARGET_NEON"
4839 {
4840 int regno = REGNO (operands[0]);
4841 rtx ops[4];
4842 ops[0] = gen_rtx_REG (DImode, regno);
4843 ops[1] = gen_rtx_REG (DImode, regno + 4);
4844 ops[2] = gen_rtx_REG (DImode, regno + 8);
4845 ops[3] = operands[1];
4846 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4847 return "";
4848 }
4849 [(set_attr "type" "neon_load3_3reg<q>")]
4850 )
4851
4852 (define_insn "neon_vld3qb<mode>"
4853 [(set (match_operand:CI 0 "s_register_operand" "=w")
4854 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4855 (match_operand:CI 2 "s_register_operand" "0")
4856 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4857 UNSPEC_VLD3B))]
4858 "TARGET_NEON"
4859 {
4860 int regno = REGNO (operands[0]);
4861 rtx ops[4];
4862 ops[0] = gen_rtx_REG (DImode, regno + 2);
4863 ops[1] = gen_rtx_REG (DImode, regno + 6);
4864 ops[2] = gen_rtx_REG (DImode, regno + 10);
4865 ops[3] = operands[1];
4866 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4867 return "";
4868 }
4869 [(set_attr "type" "neon_load3_3reg<q>")]
4870 )
4871
4872 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4873 ;; here on big endian targets.
4874 (define_insn "neon_vld3_lane<mode>"
4875 [(set (match_operand:EI 0 "s_register_operand" "=w")
4876 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4877 (match_operand:EI 2 "s_register_operand" "0")
4878 (match_operand:SI 3 "immediate_operand" "i")
4879 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4880 UNSPEC_VLD3_LANE))]
4881 "TARGET_NEON"
4882 {
4883 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4884 int regno = REGNO (operands[0]);
4885 rtx ops[5];
4886 ops[0] = gen_rtx_REG (DImode, regno);
4887 ops[1] = gen_rtx_REG (DImode, regno + 2);
4888 ops[2] = gen_rtx_REG (DImode, regno + 4);
4889 ops[3] = operands[1];
4890 ops[4] = GEN_INT (lane);
4891 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4892 ops);
4893 return "";
4894 }
4895 [(set_attr "type" "neon_load3_one_lane<q>")]
4896 )
4897
4898 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4899 ;; here on big endian targets.
4900 (define_insn "neon_vld3_lane<mode>"
4901 [(set (match_operand:CI 0 "s_register_operand" "=w")
4902 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4903 (match_operand:CI 2 "s_register_operand" "0")
4904 (match_operand:SI 3 "immediate_operand" "i")
4905 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4906 UNSPEC_VLD3_LANE))]
4907 "TARGET_NEON"
4908 {
4909 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4910 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4911 int regno = REGNO (operands[0]);
4912 rtx ops[5];
4913 if (lane >= max / 2)
4914 {
4915 lane -= max / 2;
4916 regno += 2;
4917 }
4918 ops[0] = gen_rtx_REG (DImode, regno);
4919 ops[1] = gen_rtx_REG (DImode, regno + 4);
4920 ops[2] = gen_rtx_REG (DImode, regno + 8);
4921 ops[3] = operands[1];
4922 ops[4] = GEN_INT (lane);
4923 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4924 ops);
4925 return "";
4926 }
4927 [(set_attr "type" "neon_load3_one_lane<q>")]
4928 )
4929
4930 (define_insn "neon_vld3_dup<mode>"
4931 [(set (match_operand:EI 0 "s_register_operand" "=w")
4932 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4933 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4934 UNSPEC_VLD3_DUP))]
4935 "TARGET_NEON"
4936 {
4937 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4938 {
4939 int regno = REGNO (operands[0]);
4940 rtx ops[4];
4941 ops[0] = gen_rtx_REG (DImode, regno);
4942 ops[1] = gen_rtx_REG (DImode, regno + 2);
4943 ops[2] = gen_rtx_REG (DImode, regno + 4);
4944 ops[3] = operands[1];
4945 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4946 return "";
4947 }
4948 else
4949 return "vld1.<V_sz_elem>\t%h0, %A1";
4950 }
4951 [(set (attr "type")
4952 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4953 (const_string "neon_load3_all_lanes<q>")
4954 (const_string "neon_load1_1reg<q>")))])
4955
4956 (define_expand "vec_store_lanesei<mode>"
4957 [(set (match_operand:EI 0 "neon_struct_operand")
4958 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4959 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4960 UNSPEC_VST3))]
4961 "TARGET_NEON")
4962
4963 (define_insn "neon_vst3<mode>"
4964 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4965 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4966 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4967 UNSPEC_VST3))]
4968 "TARGET_NEON"
4969 {
4970 if (<V_sz_elem> == 64)
4971 return "vst1.64\t%h1, %A0";
4972 else
4973 return "vst3.<V_sz_elem>\t%h1, %A0";
4974 }
4975 [(set (attr "type")
4976 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4977 (const_string "neon_store1_3reg<q>")
4978 (const_string "neon_store3_one_lane<q>")))])
4979
4980 (define_expand "vec_store_lanesci<mode>"
4981 [(match_operand:CI 0 "neon_struct_operand")
4982 (match_operand:CI 1 "s_register_operand")
4983 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4984 "TARGET_NEON"
4985 {
4986 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4987 DONE;
4988 })
4989
4990 (define_expand "neon_vst3<mode>"
4991 [(match_operand:CI 0 "neon_struct_operand")
4992 (match_operand:CI 1 "s_register_operand")
4993 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4994 "TARGET_NEON"
4995 {
4996 rtx mem;
4997
4998 mem = adjust_address (operands[0], EImode, 0);
4999 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5000 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5001 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5002 DONE;
5003 })
5004
5005 (define_insn "neon_vst3qa<mode>"
5006 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5007 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5008 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5009 UNSPEC_VST3A))]
5010 "TARGET_NEON"
5011 {
5012 int regno = REGNO (operands[1]);
5013 rtx ops[4];
5014 ops[0] = operands[0];
5015 ops[1] = gen_rtx_REG (DImode, regno);
5016 ops[2] = gen_rtx_REG (DImode, regno + 4);
5017 ops[3] = gen_rtx_REG (DImode, regno + 8);
5018 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5019 return "";
5020 }
5021 [(set_attr "type" "neon_store3_3reg<q>")]
5022 )
5023
5024 (define_insn "neon_vst3qb<mode>"
5025 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5026 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5027 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5028 UNSPEC_VST3B))]
5029 "TARGET_NEON"
5030 {
5031 int regno = REGNO (operands[1]);
5032 rtx ops[4];
5033 ops[0] = operands[0];
5034 ops[1] = gen_rtx_REG (DImode, regno + 2);
5035 ops[2] = gen_rtx_REG (DImode, regno + 6);
5036 ops[3] = gen_rtx_REG (DImode, regno + 10);
5037 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5038 return "";
5039 }
5040 [(set_attr "type" "neon_store3_3reg<q>")]
5041 )
5042
5043 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5044 ;; here on big endian targets.
5045 (define_insn "neon_vst3_lane<mode>"
5046 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5047 (unspec:<V_three_elem>
5048 [(match_operand:EI 1 "s_register_operand" "w")
5049 (match_operand:SI 2 "immediate_operand" "i")
5050 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5051 UNSPEC_VST3_LANE))]
5052 "TARGET_NEON"
5053 {
5054 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5055 int regno = REGNO (operands[1]);
5056 rtx ops[5];
5057 ops[0] = operands[0];
5058 ops[1] = gen_rtx_REG (DImode, regno);
5059 ops[2] = gen_rtx_REG (DImode, regno + 2);
5060 ops[3] = gen_rtx_REG (DImode, regno + 4);
5061 ops[4] = GEN_INT (lane);
5062 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5063 ops);
5064 return "";
5065 }
5066 [(set_attr "type" "neon_store3_one_lane<q>")]
5067 )
5068
5069 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5070 ;; here on big endian targets.
5071 (define_insn "neon_vst3_lane<mode>"
5072 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5073 (unspec:<V_three_elem>
5074 [(match_operand:CI 1 "s_register_operand" "w")
5075 (match_operand:SI 2 "immediate_operand" "i")
5076 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5077 UNSPEC_VST3_LANE))]
5078 "TARGET_NEON"
5079 {
5080 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5081 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5082 int regno = REGNO (operands[1]);
5083 rtx ops[5];
5084 if (lane >= max / 2)
5085 {
5086 lane -= max / 2;
5087 regno += 2;
5088 }
5089 ops[0] = operands[0];
5090 ops[1] = gen_rtx_REG (DImode, regno);
5091 ops[2] = gen_rtx_REG (DImode, regno + 4);
5092 ops[3] = gen_rtx_REG (DImode, regno + 8);
5093 ops[4] = GEN_INT (lane);
5094 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5095 ops);
5096 return "";
5097 }
5098 [(set_attr "type" "neon_store3_one_lane<q>")]
5099 )
5100
5101 (define_expand "vec_load_lanesoi<mode>"
5102 [(set (match_operand:OI 0 "s_register_operand")
5103 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5104 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5105 UNSPEC_VLD4))]
5106 "TARGET_NEON")
5107
5108 (define_insn "neon_vld4<mode>"
5109 [(set (match_operand:OI 0 "s_register_operand" "=w")
5110 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5111 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5112 UNSPEC_VLD4))]
5113 "TARGET_NEON"
5114 {
5115 if (<V_sz_elem> == 64)
5116 return "vld1.64\t%h0, %A1";
5117 else
5118 return "vld4.<V_sz_elem>\t%h0, %A1";
5119 }
5120 [(set (attr "type")
5121 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5122 (const_string "neon_load1_4reg<q>")
5123 (const_string "neon_load4_4reg<q>")))]
5124 )
5125
5126 (define_expand "vec_load_lanesxi<mode>"
5127 [(match_operand:XI 0 "s_register_operand")
5128 (match_operand:XI 1 "neon_struct_operand")
5129 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5130 "TARGET_NEON"
5131 {
5132 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5133 DONE;
5134 })
5135
5136 (define_expand "neon_vld4<mode>"
5137 [(match_operand:XI 0 "s_register_operand")
5138 (match_operand:XI 1 "neon_struct_operand")
5139 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140 "TARGET_NEON"
5141 {
5142 rtx mem;
5143
5144 mem = adjust_address (operands[1], OImode, 0);
5145 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5146 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5147 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5148 DONE;
5149 })
5150
5151 (define_insn "neon_vld4qa<mode>"
5152 [(set (match_operand:XI 0 "s_register_operand" "=w")
5153 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5154 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5155 UNSPEC_VLD4A))]
5156 "TARGET_NEON"
5157 {
5158 int regno = REGNO (operands[0]);
5159 rtx ops[5];
5160 ops[0] = gen_rtx_REG (DImode, regno);
5161 ops[1] = gen_rtx_REG (DImode, regno + 4);
5162 ops[2] = gen_rtx_REG (DImode, regno + 8);
5163 ops[3] = gen_rtx_REG (DImode, regno + 12);
5164 ops[4] = operands[1];
5165 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5166 return "";
5167 }
5168 [(set_attr "type" "neon_load4_4reg<q>")]
5169 )
5170
5171 (define_insn "neon_vld4qb<mode>"
5172 [(set (match_operand:XI 0 "s_register_operand" "=w")
5173 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5174 (match_operand:XI 2 "s_register_operand" "0")
5175 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5176 UNSPEC_VLD4B))]
5177 "TARGET_NEON"
5178 {
5179 int regno = REGNO (operands[0]);
5180 rtx ops[5];
5181 ops[0] = gen_rtx_REG (DImode, regno + 2);
5182 ops[1] = gen_rtx_REG (DImode, regno + 6);
5183 ops[2] = gen_rtx_REG (DImode, regno + 10);
5184 ops[3] = gen_rtx_REG (DImode, regno + 14);
5185 ops[4] = operands[1];
5186 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5187 return "";
5188 }
5189 [(set_attr "type" "neon_load4_4reg<q>")]
5190 )
5191
5192 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5193 ;; here on big endian targets.
5194 (define_insn "neon_vld4_lane<mode>"
5195 [(set (match_operand:OI 0 "s_register_operand" "=w")
5196 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5197 (match_operand:OI 2 "s_register_operand" "0")
5198 (match_operand:SI 3 "immediate_operand" "i")
5199 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5200 UNSPEC_VLD4_LANE))]
5201 "TARGET_NEON"
5202 {
5203 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5204 int regno = REGNO (operands[0]);
5205 rtx ops[6];
5206 ops[0] = gen_rtx_REG (DImode, regno);
5207 ops[1] = gen_rtx_REG (DImode, regno + 2);
5208 ops[2] = gen_rtx_REG (DImode, regno + 4);
5209 ops[3] = gen_rtx_REG (DImode, regno + 6);
5210 ops[4] = operands[1];
5211 ops[5] = GEN_INT (lane);
5212 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5213 ops);
5214 return "";
5215 }
5216 [(set_attr "type" "neon_load4_one_lane<q>")]
5217 )
5218
5219 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5220 ;; here on big endian targets.
5221 (define_insn "neon_vld4_lane<mode>"
5222 [(set (match_operand:XI 0 "s_register_operand" "=w")
5223 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5224 (match_operand:XI 2 "s_register_operand" "0")
5225 (match_operand:SI 3 "immediate_operand" "i")
5226 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5227 UNSPEC_VLD4_LANE))]
5228 "TARGET_NEON"
5229 {
5230 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5231 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5232 int regno = REGNO (operands[0]);
5233 rtx ops[6];
5234 if (lane >= max / 2)
5235 {
5236 lane -= max / 2;
5237 regno += 2;
5238 }
5239 ops[0] = gen_rtx_REG (DImode, regno);
5240 ops[1] = gen_rtx_REG (DImode, regno + 4);
5241 ops[2] = gen_rtx_REG (DImode, regno + 8);
5242 ops[3] = gen_rtx_REG (DImode, regno + 12);
5243 ops[4] = operands[1];
5244 ops[5] = GEN_INT (lane);
5245 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5246 ops);
5247 return "";
5248 }
5249 [(set_attr "type" "neon_load4_one_lane<q>")]
5250 )
5251
5252 (define_insn "neon_vld4_dup<mode>"
5253 [(set (match_operand:OI 0 "s_register_operand" "=w")
5254 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5255 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5256 UNSPEC_VLD4_DUP))]
5257 "TARGET_NEON"
5258 {
5259 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5260 {
5261 int regno = REGNO (operands[0]);
5262 rtx ops[5];
5263 ops[0] = gen_rtx_REG (DImode, regno);
5264 ops[1] = gen_rtx_REG (DImode, regno + 2);
5265 ops[2] = gen_rtx_REG (DImode, regno + 4);
5266 ops[3] = gen_rtx_REG (DImode, regno + 6);
5267 ops[4] = operands[1];
5268 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5269 ops);
5270 return "";
5271 }
5272 else
5273 return "vld1.<V_sz_elem>\t%h0, %A1";
5274 }
5275 [(set (attr "type")
5276 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5277 (const_string "neon_load4_all_lanes<q>")
5278 (const_string "neon_load1_1reg<q>")))]
5279 )
5280
5281 (define_expand "vec_store_lanesoi<mode>"
5282 [(set (match_operand:OI 0 "neon_struct_operand")
5283 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5284 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5285 UNSPEC_VST4))]
5286 "TARGET_NEON")
5287
5288 (define_insn "neon_vst4<mode>"
5289 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5290 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5291 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5292 UNSPEC_VST4))]
5293 "TARGET_NEON"
5294 {
5295 if (<V_sz_elem> == 64)
5296 return "vst1.64\t%h1, %A0";
5297 else
5298 return "vst4.<V_sz_elem>\t%h1, %A0";
5299 }
5300 [(set (attr "type")
5301 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5302 (const_string "neon_store1_4reg<q>")
5303 (const_string "neon_store4_4reg<q>")))]
5304 )
5305
5306 (define_expand "vec_store_lanesxi<mode>"
5307 [(match_operand:XI 0 "neon_struct_operand")
5308 (match_operand:XI 1 "s_register_operand")
5309 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5310 "TARGET_NEON"
5311 {
5312 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5313 DONE;
5314 })
5315
5316 (define_expand "neon_vst4<mode>"
5317 [(match_operand:XI 0 "neon_struct_operand")
5318 (match_operand:XI 1 "s_register_operand")
5319 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5320 "TARGET_NEON"
5321 {
5322 rtx mem;
5323
5324 mem = adjust_address (operands[0], OImode, 0);
5325 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5326 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5327 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5328 DONE;
5329 })
5330
5331 (define_insn "neon_vst4qa<mode>"
5332 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5333 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5334 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335 UNSPEC_VST4A))]
5336 "TARGET_NEON"
5337 {
5338 int regno = REGNO (operands[1]);
5339 rtx ops[5];
5340 ops[0] = operands[0];
5341 ops[1] = gen_rtx_REG (DImode, regno);
5342 ops[2] = gen_rtx_REG (DImode, regno + 4);
5343 ops[3] = gen_rtx_REG (DImode, regno + 8);
5344 ops[4] = gen_rtx_REG (DImode, regno + 12);
5345 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5346 return "";
5347 }
5348 [(set_attr "type" "neon_store4_4reg<q>")]
5349 )
5350
5351 (define_insn "neon_vst4qb<mode>"
5352 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5353 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5354 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5355 UNSPEC_VST4B))]
5356 "TARGET_NEON"
5357 {
5358 int regno = REGNO (operands[1]);
5359 rtx ops[5];
5360 ops[0] = operands[0];
5361 ops[1] = gen_rtx_REG (DImode, regno + 2);
5362 ops[2] = gen_rtx_REG (DImode, regno + 6);
5363 ops[3] = gen_rtx_REG (DImode, regno + 10);
5364 ops[4] = gen_rtx_REG (DImode, regno + 14);
5365 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5366 return "";
5367 }
5368 [(set_attr "type" "neon_store4_4reg<q>")]
5369 )
5370
5371 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5372 ;; here on big endian targets.
5373 (define_insn "neon_vst4_lane<mode>"
5374 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5375 (unspec:<V_four_elem>
5376 [(match_operand:OI 1 "s_register_operand" "w")
5377 (match_operand:SI 2 "immediate_operand" "i")
5378 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5379 UNSPEC_VST4_LANE))]
5380 "TARGET_NEON"
5381 {
5382 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5383 int regno = REGNO (operands[1]);
5384 rtx ops[6];
5385 ops[0] = operands[0];
5386 ops[1] = gen_rtx_REG (DImode, regno);
5387 ops[2] = gen_rtx_REG (DImode, regno + 2);
5388 ops[3] = gen_rtx_REG (DImode, regno + 4);
5389 ops[4] = gen_rtx_REG (DImode, regno + 6);
5390 ops[5] = GEN_INT (lane);
5391 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5392 ops);
5393 return "";
5394 }
5395 [(set_attr "type" "neon_store4_one_lane<q>")]
5396 )
5397
5398 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5399 ;; here on big endian targets.
5400 (define_insn "neon_vst4_lane<mode>"
5401 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5402 (unspec:<V_four_elem>
5403 [(match_operand:XI 1 "s_register_operand" "w")
5404 (match_operand:SI 2 "immediate_operand" "i")
5405 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5406 UNSPEC_VST4_LANE))]
5407 "TARGET_NEON"
5408 {
5409 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5410 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5411 int regno = REGNO (operands[1]);
5412 rtx ops[6];
5413 if (lane >= max / 2)
5414 {
5415 lane -= max / 2;
5416 regno += 2;
5417 }
5418 ops[0] = operands[0];
5419 ops[1] = gen_rtx_REG (DImode, regno);
5420 ops[2] = gen_rtx_REG (DImode, regno + 4);
5421 ops[3] = gen_rtx_REG (DImode, regno + 8);
5422 ops[4] = gen_rtx_REG (DImode, regno + 12);
5423 ops[5] = GEN_INT (lane);
5424 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5425 ops);
5426 return "";
5427 }
5428 [(set_attr "type" "neon_store4_4reg<q>")]
5429 )
5430
5431 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5432 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5433 (SE:<V_unpack> (vec_select:<V_HALF>
5434 (match_operand:VU 1 "register_operand" "w")
5435 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5436 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5437 "vmovl.<US><V_sz_elem> %q0, %e1"
5438 [(set_attr "type" "neon_shift_imm_long")]
5439 )
5440
5441 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5442 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5443 (SE:<V_unpack> (vec_select:<V_HALF>
5444 (match_operand:VU 1 "register_operand" "w")
5445 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5446 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5447 "vmovl.<US><V_sz_elem> %q0, %f1"
5448 [(set_attr "type" "neon_shift_imm_long")]
5449 )
5450
5451 (define_expand "vec_unpack<US>_hi_<mode>"
5452 [(match_operand:<V_unpack> 0 "register_operand" "")
5453 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5454 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5455 {
5456 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5457 rtx t1;
5458 int i;
5459 for (i = 0; i < (<V_mode_nunits>/2); i++)
5460 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5461
5462 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5463 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5464 operands[1],
5465 t1));
5466 DONE;
5467 }
5468 )
5469
5470 (define_expand "vec_unpack<US>_lo_<mode>"
5471 [(match_operand:<V_unpack> 0 "register_operand" "")
5472 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5473 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5474 {
5475 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5476 rtx t1;
5477 int i;
5478 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5479 RTVEC_ELT (v, i) = GEN_INT (i);
5480 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5481 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5482 operands[1],
5483 t1));
5484 DONE;
5485 }
5486 )
5487
5488 (define_insn "neon_vec_<US>mult_lo_<mode>"
5489 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5490 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5491 (match_operand:VU 1 "register_operand" "w")
5492 (match_operand:VU 2 "vect_par_constant_low" "")))
5493 (SE:<V_unpack> (vec_select:<V_HALF>
5494 (match_operand:VU 3 "register_operand" "w")
5495 (match_dup 2)))))]
5496 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5497 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5498 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5499 )
5500
5501 (define_expand "vec_widen_<US>mult_lo_<mode>"
5502 [(match_operand:<V_unpack> 0 "register_operand" "")
5503 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5504 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5505 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5506 {
5507 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5508 rtx t1;
5509 int i;
5510 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5511 RTVEC_ELT (v, i) = GEN_INT (i);
5512 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5513
5514 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5515 operands[1],
5516 t1,
5517 operands[2]));
5518 DONE;
5519 }
5520 )
5521
5522 (define_insn "neon_vec_<US>mult_hi_<mode>"
5523 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5524 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5525 (match_operand:VU 1 "register_operand" "w")
5526 (match_operand:VU 2 "vect_par_constant_high" "")))
5527 (SE:<V_unpack> (vec_select:<V_HALF>
5528 (match_operand:VU 3 "register_operand" "w")
5529 (match_dup 2)))))]
5530 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5531 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5532 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5533 )
5534
5535 (define_expand "vec_widen_<US>mult_hi_<mode>"
5536 [(match_operand:<V_unpack> 0 "register_operand" "")
5537 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5538 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5539 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5540 {
5541 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5542 rtx t1;
5543 int i;
5544 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5545 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5546 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5547
5548 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5549 operands[1],
5550 t1,
5551 operands[2]));
5552 DONE;
5553
5554 }
5555 )
5556
5557 (define_insn "neon_vec_<US>shiftl_<mode>"
5558 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5559 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5560 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5561 "TARGET_NEON"
5562 {
5563 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5564 }
5565 [(set_attr "type" "neon_shift_imm_long")]
5566 )
5567
5568 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5569 [(match_operand:<V_unpack> 0 "register_operand" "")
5570 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5571 (match_operand:SI 2 "immediate_operand" "i")]
5572 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5573 {
5574 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5575 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5576 operands[2]));
5577 DONE;
5578 }
5579 )
5580
5581 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5582 [(match_operand:<V_unpack> 0 "register_operand" "")
5583 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5584 (match_operand:SI 2 "immediate_operand" "i")]
5585 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5586 {
5587 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5588 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5589 GET_MODE_SIZE (<V_HALF>mode)),
5590 operands[2]));
5591 DONE;
5592 }
5593 )
5594
5595 ;; Vectorize for non-neon-quad case
5596 (define_insn "neon_unpack<US>_<mode>"
5597 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5598 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5599 "TARGET_NEON"
5600 "vmovl.<US><V_sz_elem> %q0, %P1"
5601 [(set_attr "type" "neon_move")]
5602 )
5603
5604 (define_expand "vec_unpack<US>_lo_<mode>"
5605 [(match_operand:<V_double_width> 0 "register_operand" "")
5606 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5607 "TARGET_NEON"
5608 {
5609 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5610 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5611 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5612
5613 DONE;
5614 }
5615 )
5616
5617 (define_expand "vec_unpack<US>_hi_<mode>"
5618 [(match_operand:<V_double_width> 0 "register_operand" "")
5619 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5620 "TARGET_NEON"
5621 {
5622 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5623 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5624 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5625
5626 DONE;
5627 }
5628 )
5629
5630 (define_insn "neon_vec_<US>mult_<mode>"
5631 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5632 (mult:<V_widen> (SE:<V_widen>
5633 (match_operand:VDI 1 "register_operand" "w"))
5634 (SE:<V_widen>
5635 (match_operand:VDI 2 "register_operand" "w"))))]
5636 "TARGET_NEON"
5637 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5638 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5639 )
5640
5641 (define_expand "vec_widen_<US>mult_hi_<mode>"
5642 [(match_operand:<V_double_width> 0 "register_operand" "")
5643 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5644 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5645 "TARGET_NEON"
5646 {
5647 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5648 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5649 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5650
5651 DONE;
5652
5653 }
5654 )
5655
5656 (define_expand "vec_widen_<US>mult_lo_<mode>"
5657 [(match_operand:<V_double_width> 0 "register_operand" "")
5658 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5659 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5660 "TARGET_NEON"
5661 {
5662 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5663 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5664 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5665
5666 DONE;
5667
5668 }
5669 )
5670
5671 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5672 [(match_operand:<V_double_width> 0 "register_operand" "")
5673 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5674 (match_operand:SI 2 "immediate_operand" "i")]
5675 "TARGET_NEON"
5676 {
5677 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5678 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5679 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5680
5681 DONE;
5682 }
5683 )
5684
5685 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5686 [(match_operand:<V_double_width> 0 "register_operand" "")
5687 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5688 (match_operand:SI 2 "immediate_operand" "i")]
5689 "TARGET_NEON"
5690 {
5691 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5692 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5693 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5694
5695 DONE;
5696 }
5697 )
5698
5699 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5700 ; because the ordering of vector elements in Q registers is different from what
5701 ; the semantics of the instructions require.
5702
5703 (define_insn "vec_pack_trunc_<mode>"
5704 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5705 (vec_concat:<V_narrow_pack>
5706 (truncate:<V_narrow>
5707 (match_operand:VN 1 "register_operand" "w"))
5708 (truncate:<V_narrow>
5709 (match_operand:VN 2 "register_operand" "w"))))]
5710 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5711 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5712 [(set_attr "type" "multiple")
5713 (set_attr "length" "8")]
5714 )
5715
5716 ;; For the non-quad case.
5717 (define_insn "neon_vec_pack_trunc_<mode>"
5718 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5719 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5720 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5721 "vmovn.i<V_sz_elem>\t%P0, %q1"
5722 [(set_attr "type" "neon_move_narrow_q")]
5723 )
5724
5725 (define_expand "vec_pack_trunc_<mode>"
5726 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5727 (match_operand:VSHFT 1 "register_operand" "")
5728 (match_operand:VSHFT 2 "register_operand")]
5729 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5730 {
5731 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5732
5733 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5734 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5735 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5736 DONE;
5737 })
5738
5739 (define_insn "neon_vabd<mode>_2"
5740 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5741 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5742 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5743 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5744 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5745 [(set (attr "type")
5746 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5747 (const_string "neon_fp_abd_s<q>")
5748 (const_string "neon_abd<q>")))]
5749 )
5750
5751 (define_insn "neon_vabd<mode>_3"
5752 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5753 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5754 (match_operand:VDQ 2 "s_register_operand" "w")]
5755 UNSPEC_VSUB)))]
5756 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5757 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5758 [(set (attr "type")
5759 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5760 (const_string "neon_fp_abd_s<q>")
5761 (const_string "neon_abd<q>")))]
5762 )
5763
5764 ;; Copy from core-to-neon regs, then extend, not vice-versa
5765
5766 (define_split
5767 [(set (match_operand:DI 0 "s_register_operand" "")
5768 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5769 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5770 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5771 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5772 {
5773 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5774 })
5775
5776 (define_split
5777 [(set (match_operand:DI 0 "s_register_operand" "")
5778 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5779 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5780 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5781 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5782 {
5783 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5784 })
5785
5786 (define_split
5787 [(set (match_operand:DI 0 "s_register_operand" "")
5788 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5789 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5790 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5791 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5792 {
5793 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5794 })
5795
5796 (define_split
5797 [(set (match_operand:DI 0 "s_register_operand" "")
5798 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5799 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5800 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5801 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5802 {
5803 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5804 })
5805
5806 (define_split
5807 [(set (match_operand:DI 0 "s_register_operand" "")
5808 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5809 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5810 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5811 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5812 {
5813 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5814 })
5815
5816 (define_split
5817 [(set (match_operand:DI 0 "s_register_operand" "")
5818 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5819 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5820 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5821 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5822 {
5823 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5824 })