[ARM][2/4] Replace casts of 1 to HOST_WIDE_INT by HOST_WIDE_INT_1 and HOST_WIDE_INT_1U
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
144 {
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
147 this case. */
148 if (can_create_pseudo_p ())
149 {
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
152 }
153 })
154
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
159 {
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
162 this case. */
163 if (can_create_pseudo_p ())
164 {
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
167 }
168 })
169
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
173 "TARGET_NEON
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
176 {
177 switch (which_alternative)
178 {
179 case 0: return "#";
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
182 }
183 }
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187 (define_split
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
193 {
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
196 rtx dest[2], src[2];
197
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203 neon_disambiguate_copy (operands, dest, src, 2);
204 })
205
206 (define_split
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
212 {
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
215 rtx dest[2], src[2];
216
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222 neon_disambiguate_copy (operands, dest, src, 2);
223 })
224
225 (define_split
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
232 {
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
235 rtx dest[3], src[3];
236
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244 neon_disambiguate_copy (operands, dest, src, 3);
245 })
246
247 (define_split
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
255 {
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
258 rtx dest[4], src[4];
259
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269 neon_disambiguate_copy (operands, dest, src, 4);
270 })
271
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277 {
278 rtx adjust_mem;
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
285
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
288 else
289 adjust_mem = operands[0];
290
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295 })
296
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
304
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308 " Um")]
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
313
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
321
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325 " Um")]
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
330
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333 (vec_merge:VD_LANE
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
338 "TARGET_NEON"
339 {
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
344
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347 else
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349 }
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354 (vec_merge:VQ2
355 (vec_duplicate:VQ2
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
359 "TARGET_NEON"
360 {
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
366
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
369
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
372
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375 else
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377 }
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379 )
380
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383 (vec_merge:V2DI
384 (vec_duplicate:V2DI
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
388 "TARGET_NEON"
389 {
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
392
393 operands[0] = gen_rtx_REG (DImode, regno);
394
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
397 else
398 return "vmov\t%P0, %Q1, %R1";
399 }
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401 )
402
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
407 "TARGET_NEON"
408 {
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
412 DONE;
413 })
414
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (vec_select:<V_elem>
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420 "TARGET_NEON"
421 {
422 if (BYTES_BIG_ENDIAN)
423 {
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
427 }
428
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 else
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 }
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435 )
436
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439 (vec_select:<V_elem>
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442 "TARGET_NEON"
443 {
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
448
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
451
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
454
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457 else
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459 }
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461 )
462
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465 (vec_select:DI
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468 "TARGET_NEON"
469 {
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472 operands[1] = gen_rtx_REG (DImode, regno);
473
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
476 else
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
478 }
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480 )
481
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
485 "TARGET_NEON"
486 {
487 neon_expand_vector_init (operands[0], operands[1]);
488 DONE;
489 })
490
491 ;; Doubleword and quadword arithmetic.
492
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
495
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502 [(set (attr "type")
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
506 )
507
508 (define_insn "adddi3_neon"
509 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
510 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
511 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
512 (clobber (reg:CC CC_REGNUM))]
513 "TARGET_NEON"
514 {
515 switch (which_alternative)
516 {
517 case 0: /* fall through */
518 case 3: return "vadd.i64\t%P0, %P1, %P2";
519 case 1: return "#";
520 case 2: return "#";
521 case 4: return "#";
522 case 5: return "#";
523 case 6: return "#";
524 default: gcc_unreachable ();
525 }
526 }
527 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
528 multiple,multiple,multiple")
529 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
530 (set_attr "length" "*,8,8,*,8,8,8")
531 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
532 )
533
534 (define_insn "*sub<mode>3_neon"
535 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
536 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
537 (match_operand:VDQ 2 "s_register_operand" "w")))]
538 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
539 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
540 [(set (attr "type")
541 (if_then_else (match_test "<Is_float_mode>")
542 (const_string "neon_fp_addsub_s<q>")
543 (const_string "neon_sub<q>")))]
544 )
545
546 (define_insn "subdi3_neon"
547 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
548 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
549 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
550 (clobber (reg:CC CC_REGNUM))]
551 "TARGET_NEON"
552 {
553 switch (which_alternative)
554 {
555 case 0: /* fall through */
556 case 4: return "vsub.i64\t%P0, %P1, %P2";
557 case 1: /* fall through */
558 case 2: /* fall through */
559 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
560 default: gcc_unreachable ();
561 }
562 }
563 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
564 (set_attr "conds" "*,clob,clob,clob,*")
565 (set_attr "length" "*,8,8,8,*")
566 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
567 )
568
569 (define_insn "*mul<mode>3_neon"
570 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
571 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
572 (match_operand:VDQW 2 "s_register_operand" "w")))]
573 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
574 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575 [(set (attr "type")
576 (if_then_else (match_test "<Is_float_mode>")
577 (const_string "neon_fp_mul_s<q>")
578 (const_string "neon_mul_<V_elem_ch><q>")))]
579 )
580
581 (define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
583 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
584 (match_operand:VDQW 3 "s_register_operand" "w"))
585 (match_operand:VDQW 1 "s_register_operand" "0")))]
586 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
587 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
588 [(set (attr "type")
589 (if_then_else (match_test "<Is_float_mode>")
590 (const_string "neon_fp_mla_s<q>")
591 (const_string "neon_mla_<V_elem_ch><q>")))]
592 )
593
594 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
597 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
598 (match_operand:VDQW 3 "s_register_operand" "w"))))]
599 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
600 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
601 [(set (attr "type")
602 (if_then_else (match_test "<Is_float_mode>")
603 (const_string "neon_fp_mla_s<q>")
604 (const_string "neon_mla_<V_elem_ch><q>")))]
605 )
606
607 ;; Fused multiply-accumulate
608 ;; We define each insn twice here:
609 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
610 ;; to be able to use when converting to FMA.
611 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
612 (define_insn "fma<VCVTF:mode>4"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
618 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "fma<VCVTF:mode>4_intrinsic"
623 [(set (match_operand:VCVTF 0 "register_operand" "=w")
624 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
625 (match_operand:VCVTF 2 "register_operand" "w")
626 (match_operand:VCVTF 3 "register_operand" "0")))]
627 "TARGET_NEON && TARGET_FMA"
628 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 [(set_attr "type" "neon_fp_mla_s<q>")]
630 )
631
632 (define_insn "*fmsub<VCVTF:mode>4"
633 [(set (match_operand:VCVTF 0 "register_operand" "=w")
634 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
635 (match_operand:VCVTF 2 "register_operand" "w")
636 (match_operand:VCVTF 3 "register_operand" "0")))]
637 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
638 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
639 [(set_attr "type" "neon_fp_mla_s<q>")]
640 )
641
642 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
643 [(set (match_operand:VCVTF 0 "register_operand" "=w")
644 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
645 (match_operand:VCVTF 2 "register_operand" "w")
646 (match_operand:VCVTF 3 "register_operand" "0")))]
647 "TARGET_NEON && TARGET_FMA"
648 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
649 [(set_attr "type" "neon_fp_mla_s<q>")]
650 )
651
652 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
654 (unspec:VCVTF [(match_operand:VCVTF 1
655 "s_register_operand" "w")]
656 NEON_VRINT))]
657 "TARGET_NEON && TARGET_FPU_ARMV8"
658 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
659 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
660 )
661
662 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
663 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
664 (FIXUORS:<V_cmp_result> (unspec:VCVTF
665 [(match_operand:VCVTF 1 "register_operand" "w")]
666 NEON_VCVT)))]
667 "TARGET_NEON && TARGET_FPU_ARMV8"
668 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
669 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
670 (set_attr "predicable" "no")]
671 )
672
673 (define_insn "ior<mode>3"
674 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
675 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
676 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
677 "TARGET_NEON"
678 {
679 switch (which_alternative)
680 {
681 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
682 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
683 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
684 default: gcc_unreachable ();
685 }
686 }
687 [(set_attr "type" "neon_logic<q>")]
688 )
689
690 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
691 ;; vorr. We support the pseudo-instruction vand instead, because that
692 ;; corresponds to the canonical form the middle-end expects to use for
693 ;; immediate bitwise-ANDs.
694
695 (define_insn "and<mode>3"
696 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
697 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
698 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
699 "TARGET_NEON"
700 {
701 switch (which_alternative)
702 {
703 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
704 case 1: return neon_output_logic_immediate ("vand", &operands[2],
705 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
706 default: gcc_unreachable ();
707 }
708 }
709 [(set_attr "type" "neon_logic<q>")]
710 )
711
712 (define_insn "orn<mode>3_neon"
713 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
714 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
715 (match_operand:VDQ 1 "s_register_operand" "w")))]
716 "TARGET_NEON"
717 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
718 [(set_attr "type" "neon_logic<q>")]
719 )
720
721 ;; TODO: investigate whether we should disable
722 ;; this and bicdi3_neon for the A8 in line with the other
723 ;; changes above.
724 (define_insn_and_split "orndi3_neon"
725 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
726 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
727 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
728 "TARGET_NEON"
729 "@
730 vorn\t%P0, %P1, %P2
731 #
732 #
733 #"
734 "reload_completed &&
735 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
736 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
737 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
738 "
739 {
740 if (TARGET_THUMB2)
741 {
742 operands[3] = gen_highpart (SImode, operands[0]);
743 operands[0] = gen_lowpart (SImode, operands[0]);
744 operands[4] = gen_highpart (SImode, operands[2]);
745 operands[2] = gen_lowpart (SImode, operands[2]);
746 operands[5] = gen_highpart (SImode, operands[1]);
747 operands[1] = gen_lowpart (SImode, operands[1]);
748 }
749 else
750 {
751 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
752 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
753 DONE;
754 }
755 }"
756 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
757 (set_attr "length" "*,16,8,8")
758 (set_attr "arch" "any,a,t2,t2")]
759 )
760
761 (define_insn "bic<mode>3_neon"
762 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
763 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
764 (match_operand:VDQ 1 "s_register_operand" "w")))]
765 "TARGET_NEON"
766 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767 [(set_attr "type" "neon_logic<q>")]
768 )
769
770 ;; Compare to *anddi_notdi_di.
771 (define_insn "bicdi3_neon"
772 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
773 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
774 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
775 "TARGET_NEON"
776 "@
777 vbic\t%P0, %P1, %P2
778 #
779 #"
780 [(set_attr "type" "neon_logic,multiple,multiple")
781 (set_attr "length" "*,8,8")]
782 )
783
784 (define_insn "xor<mode>3"
785 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
786 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
787 (match_operand:VDQ 2 "s_register_operand" "w")))]
788 "TARGET_NEON"
789 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
790 [(set_attr "type" "neon_logic<q>")]
791 )
792
793 (define_insn "one_cmpl<mode>2"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
795 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
796 "TARGET_NEON"
797 "vmvn\t%<V_reg>0, %<V_reg>1"
798 [(set_attr "type" "neon_move<q>")]
799 )
800
801 (define_insn "abs<mode>2"
802 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
803 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
804 "TARGET_NEON"
805 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
806 [(set (attr "type")
807 (if_then_else (match_test "<Is_float_mode>")
808 (const_string "neon_fp_abs_s<q>")
809 (const_string "neon_abs<q>")))]
810 )
811
812 (define_insn "neg<mode>2"
813 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
814 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
815 "TARGET_NEON"
816 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
817 [(set (attr "type")
818 (if_then_else (match_test "<Is_float_mode>")
819 (const_string "neon_fp_neg_s<q>")
820 (const_string "neon_neg<q>")))]
821 )
822
823 (define_insn "negdi2_neon"
824 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
825 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
826 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
827 (clobber (reg:CC CC_REGNUM))]
828 "TARGET_NEON"
829 "#"
830 [(set_attr "length" "8")
831 (set_attr "type" "multiple")]
832 )
833
834 ; Split negdi2_neon for vfp registers
835 (define_split
836 [(set (match_operand:DI 0 "s_register_operand" "")
837 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
838 (clobber (match_scratch:DI 2 ""))
839 (clobber (reg:CC CC_REGNUM))]
840 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
841 [(set (match_dup 2) (const_int 0))
842 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
843 (clobber (reg:CC CC_REGNUM))])]
844 {
845 if (!REG_P (operands[2]))
846 operands[2] = operands[0];
847 }
848 )
849
850 ; Split negdi2_neon for core registers
851 (define_split
852 [(set (match_operand:DI 0 "s_register_operand" "")
853 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
854 (clobber (match_scratch:DI 2 ""))
855 (clobber (reg:CC CC_REGNUM))]
856 "TARGET_32BIT && reload_completed
857 && arm_general_register_operand (operands[0], DImode)"
858 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
859 (clobber (reg:CC CC_REGNUM))])]
860 ""
861 )
862
863 (define_insn "*umin<mode>3_neon"
864 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
865 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
866 (match_operand:VDQIW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set_attr "type" "neon_minmax<q>")]
870 )
871
872 (define_insn "*umax<mode>3_neon"
873 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
874 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
875 (match_operand:VDQIW 2 "s_register_operand" "w")))]
876 "TARGET_NEON"
877 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
878 [(set_attr "type" "neon_minmax<q>")]
879 )
880
881 (define_insn "*smin<mode>3_neon"
882 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
883 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
884 (match_operand:VDQW 2 "s_register_operand" "w")))]
885 "TARGET_NEON"
886 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887 [(set (attr "type")
888 (if_then_else (match_test "<Is_float_mode>")
889 (const_string "neon_fp_minmax_s<q>")
890 (const_string "neon_minmax<q>")))]
891 )
892
893 (define_insn "*smax<mode>3_neon"
894 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
895 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
896 (match_operand:VDQW 2 "s_register_operand" "w")))]
897 "TARGET_NEON"
898 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
899 [(set (attr "type")
900 (if_then_else (match_test "<Is_float_mode>")
901 (const_string "neon_fp_minmax_s<q>")
902 (const_string "neon_minmax<q>")))]
903 )
904
905 ; TODO: V2DI shifts are current disabled because there are bugs in the
906 ; generic vectorizer code. It ends up creating a V2DI constructor with
907 ; SImode elements.
908
909 (define_insn "vashl<mode>3"
910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
911 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
912 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
913 "TARGET_NEON"
914 {
915 switch (which_alternative)
916 {
917 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
918 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
919 <MODE>mode,
920 VALID_NEON_QREG_MODE (<MODE>mode),
921 true);
922 default: gcc_unreachable ();
923 }
924 }
925 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
926 )
927
928 (define_insn "vashr<mode>3_imm"
929 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
930 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
931 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
932 "TARGET_NEON"
933 {
934 return neon_output_shift_immediate ("vshr", 's', &operands[2],
935 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
936 false);
937 }
938 [(set_attr "type" "neon_shift_imm<q>")]
939 )
940
941 (define_insn "vlshr<mode>3_imm"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
943 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
944 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
945 "TARGET_NEON"
946 {
947 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
948 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
949 false);
950 }
951 [(set_attr "type" "neon_shift_imm<q>")]
952 )
953
954 ; Used for implementing logical shift-right, which is a left-shift by a negative
955 ; amount, with signed operands. This is essentially the same as ashl<mode>3
956 ; above, but using an unspec in case GCC tries anything tricky with negative
957 ; shift amounts.
958
959 (define_insn "ashl<mode>3_signed"
960 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
961 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
962 (match_operand:VDQI 2 "s_register_operand" "w")]
963 UNSPEC_ASHIFT_SIGNED))]
964 "TARGET_NEON"
965 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
966 [(set_attr "type" "neon_shift_reg<q>")]
967 )
968
969 ; Used for implementing logical shift-right, which is a left-shift by a negative
970 ; amount, with unsigned operands.
971
972 (define_insn "ashl<mode>3_unsigned"
973 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
974 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
975 (match_operand:VDQI 2 "s_register_operand" "w")]
976 UNSPEC_ASHIFT_UNSIGNED))]
977 "TARGET_NEON"
978 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
979 [(set_attr "type" "neon_shift_reg<q>")]
980 )
981
982 (define_expand "vashr<mode>3"
983 [(set (match_operand:VDQIW 0 "s_register_operand" "")
984 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
985 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
986 "TARGET_NEON"
987 {
988 if (s_register_operand (operands[2], <MODE>mode))
989 {
990 rtx neg = gen_reg_rtx (<MODE>mode);
991 emit_insn (gen_neg<mode>2 (neg, operands[2]));
992 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
993 }
994 else
995 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
996 DONE;
997 })
998
999 (define_expand "vlshr<mode>3"
1000 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1001 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1002 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1003 "TARGET_NEON"
1004 {
1005 if (s_register_operand (operands[2], <MODE>mode))
1006 {
1007 rtx neg = gen_reg_rtx (<MODE>mode);
1008 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1009 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1010 }
1011 else
1012 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1013 DONE;
1014 })
1015
1016 ;; 64-bit shifts
1017
1018 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1019 ;; leaving the upper half uninitalized. This is OK since the shift
1020 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1021 ;; data flow analysis however, we pretend the full register is set
1022 ;; using an unspec.
1023 (define_insn "neon_load_count"
1024 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1025 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1026 UNSPEC_LOAD_COUNT))]
1027 "TARGET_NEON"
1028 "@
1029 vld1.32\t{%P0[0]}, %A1
1030 vmov.32\t%P0[0], %1"
1031 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1032 )
1033
1034 (define_insn "ashldi3_neon_noclobber"
1035 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1036 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1037 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1038 "TARGET_NEON && reload_completed
1039 && (!CONST_INT_P (operands[2])
1040 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1041 "@
1042 vshl.u64\t%P0, %P1, %2
1043 vshl.u64\t%P0, %P1, %P2"
1044 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1045 )
1046
1047 (define_insn_and_split "ashldi3_neon"
1048 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1049 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1050 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1051 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1052 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1053 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1054 (clobber (reg:CC_C CC_REGNUM))]
1055 "TARGET_NEON"
1056 "#"
1057 "TARGET_NEON && reload_completed"
1058 [(const_int 0)]
1059 "
1060 {
1061 if (IS_VFP_REGNUM (REGNO (operands[0])))
1062 {
1063 if (CONST_INT_P (operands[2]))
1064 {
1065 if (INTVAL (operands[2]) < 1)
1066 {
1067 emit_insn (gen_movdi (operands[0], operands[1]));
1068 DONE;
1069 }
1070 else if (INTVAL (operands[2]) > 63)
1071 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1072 }
1073 else
1074 {
1075 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1076 operands[2] = operands[5];
1077 }
1078
1079 /* Ditch the unnecessary clobbers. */
1080 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1081 operands[2]));
1082 }
1083 else
1084 {
1085 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1086 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1087 || REGNO (operands[0]) == REGNO (operands[1])))
1088 /* This clobbers CC. */
1089 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1090 else
1091 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1092 operands[2], operands[3], operands[4]);
1093 }
1094 DONE;
1095 }"
1096 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1097 (set_attr "opt" "*,*,speed,speed,*,*")
1098 (set_attr "type" "multiple")]
1099 )
1100
1101 ; The shift amount needs to be negated for right-shifts
1102 (define_insn "signed_shift_di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "=w")
1104 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1105 (match_operand:DI 2 "s_register_operand" " w")]
1106 UNSPEC_ASHIFT_SIGNED))]
1107 "TARGET_NEON && reload_completed"
1108 "vshl.s64\t%P0, %P1, %P2"
1109 [(set_attr "type" "neon_shift_reg")]
1110 )
1111
1112 ; The shift amount needs to be negated for right-shifts
1113 (define_insn "unsigned_shift_di3_neon"
1114 [(set (match_operand:DI 0 "s_register_operand" "=w")
1115 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1116 (match_operand:DI 2 "s_register_operand" " w")]
1117 UNSPEC_ASHIFT_UNSIGNED))]
1118 "TARGET_NEON && reload_completed"
1119 "vshl.u64\t%P0, %P1, %P2"
1120 [(set_attr "type" "neon_shift_reg")]
1121 )
1122
1123 (define_insn "ashrdi3_neon_imm_noclobber"
1124 [(set (match_operand:DI 0 "s_register_operand" "=w")
1125 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1126 (match_operand:DI 2 "const_int_operand" " i")))]
1127 "TARGET_NEON && reload_completed
1128 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1129 "vshr.s64\t%P0, %P1, %2"
1130 [(set_attr "type" "neon_shift_imm")]
1131 )
1132
1133 (define_insn "lshrdi3_neon_imm_noclobber"
1134 [(set (match_operand:DI 0 "s_register_operand" "=w")
1135 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1136 (match_operand:DI 2 "const_int_operand" " i")))]
1137 "TARGET_NEON && reload_completed
1138 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1139 "vshr.u64\t%P0, %P1, %2"
1140 [(set_attr "type" "neon_shift_imm")]
1141 )
1142
1143 ;; ashrdi3_neon
1144 ;; lshrdi3_neon
1145 (define_insn_and_split "<shift>di3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1147 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1148 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1149 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1150 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1152 (clobber (reg:CC CC_REGNUM))]
1153 "TARGET_NEON"
1154 "#"
1155 "TARGET_NEON && reload_completed"
1156 [(const_int 0)]
1157 "
1158 {
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1160 {
1161 if (CONST_INT_P (operands[2]))
1162 {
1163 if (INTVAL (operands[2]) < 1)
1164 {
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1166 DONE;
1167 }
1168 else if (INTVAL (operands[2]) > 64)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1170
1171 /* Ditch the unnecessary clobbers. */
1172 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1173 operands[1],
1174 operands[2]));
1175 }
1176 else
1177 {
1178 /* We must use a negative left-shift. */
1179 emit_insn (gen_negsi2 (operands[3], operands[2]));
1180 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1181 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1182 operands[5]));
1183 }
1184 }
1185 else
1186 {
1187 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1188 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1189 || REGNO (operands[0]) == REGNO (operands[1])))
1190 /* This clobbers CC. */
1191 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1192 else
1193 /* This clobbers CC (ASHIFTRT by register only). */
1194 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1195 operands[2], operands[3], operands[4]);
1196 }
1197
1198 DONE;
1199 }"
1200 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1201 (set_attr "opt" "*,*,speed,speed,*,*")
1202 (set_attr "type" "multiple")]
1203 )
1204
1205 ;; Widening operations
1206
1207 (define_expand "widen_ssum<mode>3"
1208 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1209 (plus:<V_double_width>
1210 (sign_extend:<V_double_width>
1211 (match_operand:VQI 1 "s_register_operand" ""))
1212 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1213 "TARGET_NEON"
1214 {
1215 machine_mode mode = GET_MODE (operands[1]);
1216 rtx p1, p2;
1217
1218 p1 = arm_simd_vect_par_cnst_half (mode, false);
1219 p2 = arm_simd_vect_par_cnst_half (mode, true);
1220
1221 if (operands[0] != operands[2])
1222 emit_move_insn (operands[0], operands[2]);
1223
1224 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1225 operands[1],
1226 p1,
1227 operands[0]));
1228 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1229 operands[1],
1230 p2,
1231 operands[0]));
1232 DONE;
1233 }
1234 )
1235
1236 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1237 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1238 (plus:<VW:V_widen>
1239 (sign_extend:<VW:V_widen>
1240 (vec_select:VW
1241 (match_operand:VQI 1 "s_register_operand" "%w")
1242 (match_operand:VQI 2 "vect_par_constant_low" "")))
1243 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1244 "TARGET_NEON"
1245 {
1246 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1247 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1248 }
1249 [(set_attr "type" "neon_add_widen")])
1250
1251 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1252 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1253 (plus:<VW:V_widen>
1254 (sign_extend:<VW:V_widen>
1255 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1256 (match_operand:VQI 2 "vect_par_constant_high" "")))
1257 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1258 "TARGET_NEON"
1259 {
1260 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1261 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1262 }
1263 [(set_attr "type" "neon_add_widen")])
1264
1265 (define_insn "widen_ssum<mode>3"
1266 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1267 (plus:<V_widen>
1268 (sign_extend:<V_widen>
1269 (match_operand:VW 1 "s_register_operand" "%w"))
1270 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1271 "TARGET_NEON"
1272 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1273 [(set_attr "type" "neon_add_widen")]
1274 )
1275
1276 (define_expand "widen_usum<mode>3"
1277 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1278 (plus:<V_double_width>
1279 (zero_extend:<V_double_width>
1280 (match_operand:VQI 1 "s_register_operand" ""))
1281 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1282 "TARGET_NEON"
1283 {
1284 machine_mode mode = GET_MODE (operands[1]);
1285 rtx p1, p2;
1286
1287 p1 = arm_simd_vect_par_cnst_half (mode, false);
1288 p2 = arm_simd_vect_par_cnst_half (mode, true);
1289
1290 if (operands[0] != operands[2])
1291 emit_move_insn (operands[0], operands[2]);
1292
1293 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1294 operands[1],
1295 p1,
1296 operands[0]));
1297 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1298 operands[1],
1299 p2,
1300 operands[0]));
1301 DONE;
1302 }
1303 )
1304
1305 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1306 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1307 (plus:<VW:V_widen>
1308 (zero_extend:<VW:V_widen>
1309 (vec_select:VW
1310 (match_operand:VQI 1 "s_register_operand" "%w")
1311 (match_operand:VQI 2 "vect_par_constant_low" "")))
1312 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1313 "TARGET_NEON"
1314 {
1315 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1316 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1317 }
1318 [(set_attr "type" "neon_add_widen")])
1319
1320 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1321 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1322 (plus:<VW:V_widen>
1323 (zero_extend:<VW:V_widen>
1324 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1325 (match_operand:VQI 2 "vect_par_constant_high" "")))
1326 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1327 "TARGET_NEON"
1328 {
1329 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1330 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1331 }
1332 [(set_attr "type" "neon_add_widen")])
1333
1334 (define_insn "widen_usum<mode>3"
1335 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1336 (plus:<V_widen> (zero_extend:<V_widen>
1337 (match_operand:VW 1 "s_register_operand" "%w"))
1338 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1339 "TARGET_NEON"
1340 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1341 [(set_attr "type" "neon_add_widen")]
1342 )
1343
1344 ;; Helpers for quad-word reduction operations
1345
1346 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1347 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1348 ; N/2-element vector.
1349
1350 (define_insn "quad_halves_<code>v4si"
1351 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1352 (VQH_OPS:V2SI
1353 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1354 (parallel [(const_int 0) (const_int 1)]))
1355 (vec_select:V2SI (match_dup 1)
1356 (parallel [(const_int 2) (const_int 3)]))))]
1357 "TARGET_NEON"
1358 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1359 [(set_attr "vqh_mnem" "<VQH_mnem>")
1360 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1361 )
1362
1363 (define_insn "quad_halves_<code>v4sf"
1364 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1365 (VQHS_OPS:V2SF
1366 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1367 (parallel [(const_int 0) (const_int 1)]))
1368 (vec_select:V2SF (match_dup 1)
1369 (parallel [(const_int 2) (const_int 3)]))))]
1370 "TARGET_NEON && flag_unsafe_math_optimizations"
1371 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1372 [(set_attr "vqh_mnem" "<VQH_mnem>")
1373 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1374 )
1375
1376 (define_insn "quad_halves_<code>v8hi"
1377 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1378 (VQH_OPS:V4HI
1379 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1380 (parallel [(const_int 0) (const_int 1)
1381 (const_int 2) (const_int 3)]))
1382 (vec_select:V4HI (match_dup 1)
1383 (parallel [(const_int 4) (const_int 5)
1384 (const_int 6) (const_int 7)]))))]
1385 "TARGET_NEON"
1386 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1387 [(set_attr "vqh_mnem" "<VQH_mnem>")
1388 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1389 )
1390
1391 (define_insn "quad_halves_<code>v16qi"
1392 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1393 (VQH_OPS:V8QI
1394 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1395 (parallel [(const_int 0) (const_int 1)
1396 (const_int 2) (const_int 3)
1397 (const_int 4) (const_int 5)
1398 (const_int 6) (const_int 7)]))
1399 (vec_select:V8QI (match_dup 1)
1400 (parallel [(const_int 8) (const_int 9)
1401 (const_int 10) (const_int 11)
1402 (const_int 12) (const_int 13)
1403 (const_int 14) (const_int 15)]))))]
1404 "TARGET_NEON"
1405 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1406 [(set_attr "vqh_mnem" "<VQH_mnem>")
1407 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1408 )
1409
1410 (define_expand "move_hi_quad_<mode>"
1411 [(match_operand:ANY128 0 "s_register_operand" "")
1412 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1413 "TARGET_NEON"
1414 {
1415 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1416 GET_MODE_SIZE (<V_HALF>mode)),
1417 operands[1]);
1418 DONE;
1419 })
1420
1421 (define_expand "move_lo_quad_<mode>"
1422 [(match_operand:ANY128 0 "s_register_operand" "")
1423 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1424 "TARGET_NEON"
1425 {
1426 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1427 <MODE>mode, 0),
1428 operands[1]);
1429 DONE;
1430 })
1431
1432 ;; Reduction operations
1433
1434 (define_expand "reduc_plus_scal_<mode>"
1435 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1436 (match_operand:VD 1 "s_register_operand" "")]
1437 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1438 {
1439 rtx vec = gen_reg_rtx (<MODE>mode);
1440 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1441 &gen_neon_vpadd_internal<mode>);
1442 /* The same result is actually computed into every element. */
1443 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1444 DONE;
1445 })
1446
1447 (define_expand "reduc_plus_scal_<mode>"
1448 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1449 (match_operand:VQ 1 "s_register_operand" "")]
1450 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1451 && !BYTES_BIG_ENDIAN"
1452 {
1453 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1454
1455 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1456 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1457
1458 DONE;
1459 })
1460
1461 (define_expand "reduc_plus_scal_v2di"
1462 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1463 (match_operand:V2DI 1 "s_register_operand" "")]
1464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1465 {
1466 rtx vec = gen_reg_rtx (V2DImode);
1467
1468 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1469 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1470
1471 DONE;
1472 })
1473
1474 (define_insn "arm_reduc_plus_internal_v2di"
1475 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1476 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1477 UNSPEC_VPADD))]
1478 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1479 "vadd.i64\t%e0, %e1, %f1"
1480 [(set_attr "type" "neon_add_q")]
1481 )
1482
1483 (define_expand "reduc_smin_scal_<mode>"
1484 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1485 (match_operand:VD 1 "s_register_operand" "")]
1486 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1487 {
1488 rtx vec = gen_reg_rtx (<MODE>mode);
1489
1490 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1491 &gen_neon_vpsmin<mode>);
1492 /* The result is computed into every element of the vector. */
1493 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1494 DONE;
1495 })
1496
1497 (define_expand "reduc_smin_scal_<mode>"
1498 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1499 (match_operand:VQ 1 "s_register_operand" "")]
1500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1501 && !BYTES_BIG_ENDIAN"
1502 {
1503 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1504
1505 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1506 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1507
1508 DONE;
1509 })
1510
1511 (define_expand "reduc_smax_scal_<mode>"
1512 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1513 (match_operand:VD 1 "s_register_operand" "")]
1514 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1515 {
1516 rtx vec = gen_reg_rtx (<MODE>mode);
1517 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1518 &gen_neon_vpsmax<mode>);
1519 /* The result is computed into every element of the vector. */
1520 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1521 DONE;
1522 })
1523
1524 (define_expand "reduc_smax_scal_<mode>"
1525 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1526 (match_operand:VQ 1 "s_register_operand" "")]
1527 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1528 && !BYTES_BIG_ENDIAN"
1529 {
1530 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1531
1532 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1533 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1534
1535 DONE;
1536 })
1537
1538 (define_expand "reduc_umin_scal_<mode>"
1539 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1540 (match_operand:VDI 1 "s_register_operand" "")]
1541 "TARGET_NEON"
1542 {
1543 rtx vec = gen_reg_rtx (<MODE>mode);
1544 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1545 &gen_neon_vpumin<mode>);
1546 /* The result is computed into every element of the vector. */
1547 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1548 DONE;
1549 })
1550
1551 (define_expand "reduc_umin_scal_<mode>"
1552 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1553 (match_operand:VQI 1 "s_register_operand" "")]
1554 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1555 {
1556 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1557
1558 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1559 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1560
1561 DONE;
1562 })
1563
1564 (define_expand "reduc_umax_scal_<mode>"
1565 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1566 (match_operand:VDI 1 "s_register_operand" "")]
1567 "TARGET_NEON"
1568 {
1569 rtx vec = gen_reg_rtx (<MODE>mode);
1570 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1571 &gen_neon_vpumax<mode>);
1572 /* The result is computed into every element of the vector. */
1573 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1574 DONE;
1575 })
1576
1577 (define_expand "reduc_umax_scal_<mode>"
1578 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1579 (match_operand:VQI 1 "s_register_operand" "")]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1581 {
1582 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1583
1584 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1585 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1586
1587 DONE;
1588 })
1589
1590 (define_insn "neon_vpadd_internal<mode>"
1591 [(set (match_operand:VD 0 "s_register_operand" "=w")
1592 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1593 (match_operand:VD 2 "s_register_operand" "w")]
1594 UNSPEC_VPADD))]
1595 "TARGET_NEON"
1596 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1597 ;; Assume this schedules like vadd.
1598 [(set (attr "type")
1599 (if_then_else (match_test "<Is_float_mode>")
1600 (const_string "neon_fp_reduc_add_s<q>")
1601 (const_string "neon_reduc_add<q>")))]
1602 )
1603
1604 (define_insn "neon_vpsmin<mode>"
1605 [(set (match_operand:VD 0 "s_register_operand" "=w")
1606 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1607 (match_operand:VD 2 "s_register_operand" "w")]
1608 UNSPEC_VPSMIN))]
1609 "TARGET_NEON"
1610 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1611 [(set (attr "type")
1612 (if_then_else (match_test "<Is_float_mode>")
1613 (const_string "neon_fp_reduc_minmax_s<q>")
1614 (const_string "neon_reduc_minmax<q>")))]
1615 )
1616
1617 (define_insn "neon_vpsmax<mode>"
1618 [(set (match_operand:VD 0 "s_register_operand" "=w")
1619 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1620 (match_operand:VD 2 "s_register_operand" "w")]
1621 UNSPEC_VPSMAX))]
1622 "TARGET_NEON"
1623 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1624 [(set (attr "type")
1625 (if_then_else (match_test "<Is_float_mode>")
1626 (const_string "neon_fp_reduc_minmax_s<q>")
1627 (const_string "neon_reduc_minmax<q>")))]
1628 )
1629
1630 (define_insn "neon_vpumin<mode>"
1631 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1632 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1633 (match_operand:VDI 2 "s_register_operand" "w")]
1634 UNSPEC_VPUMIN))]
1635 "TARGET_NEON"
1636 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1637 [(set_attr "type" "neon_reduc_minmax<q>")]
1638 )
1639
1640 (define_insn "neon_vpumax<mode>"
1641 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1642 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1643 (match_operand:VDI 2 "s_register_operand" "w")]
1644 UNSPEC_VPUMAX))]
1645 "TARGET_NEON"
1646 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1647 [(set_attr "type" "neon_reduc_minmax<q>")]
1648 )
1649
1650 ;; Saturating arithmetic
1651
1652 ; NOTE: Neon supports many more saturating variants of instructions than the
1653 ; following, but these are all GCC currently understands.
1654 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1655 ; yet either, although these patterns may be used by intrinsics when they're
1656 ; added.
1657
1658 (define_insn "*ss_add<mode>_neon"
1659 [(set (match_operand:VD 0 "s_register_operand" "=w")
1660 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1661 (match_operand:VD 2 "s_register_operand" "w")))]
1662 "TARGET_NEON"
1663 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1664 [(set_attr "type" "neon_qadd<q>")]
1665 )
1666
1667 (define_insn "*us_add<mode>_neon"
1668 [(set (match_operand:VD 0 "s_register_operand" "=w")
1669 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1670 (match_operand:VD 2 "s_register_operand" "w")))]
1671 "TARGET_NEON"
1672 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1673 [(set_attr "type" "neon_qadd<q>")]
1674 )
1675
1676 (define_insn "*ss_sub<mode>_neon"
1677 [(set (match_operand:VD 0 "s_register_operand" "=w")
1678 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1679 (match_operand:VD 2 "s_register_operand" "w")))]
1680 "TARGET_NEON"
1681 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1682 [(set_attr "type" "neon_qsub<q>")]
1683 )
1684
1685 (define_insn "*us_sub<mode>_neon"
1686 [(set (match_operand:VD 0 "s_register_operand" "=w")
1687 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1688 (match_operand:VD 2 "s_register_operand" "w")))]
1689 "TARGET_NEON"
1690 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1691 [(set_attr "type" "neon_qsub<q>")]
1692 )
1693
1694 ;; Conditional instructions. These are comparisons with conditional moves for
1695 ;; vectors. They perform the assignment:
1696 ;;
1697 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1698 ;;
1699 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1700 ;; element-wise.
1701
1702 (define_expand "vcond<mode><mode>"
1703 [(set (match_operand:VDQW 0 "s_register_operand" "")
1704 (if_then_else:VDQW
1705 (match_operator 3 "comparison_operator"
1706 [(match_operand:VDQW 4 "s_register_operand" "")
1707 (match_operand:VDQW 5 "nonmemory_operand" "")])
1708 (match_operand:VDQW 1 "s_register_operand" "")
1709 (match_operand:VDQW 2 "s_register_operand" "")))]
1710 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1711 {
1712 int inverse = 0;
1713 int use_zero_form = 0;
1714 int swap_bsl_operands = 0;
1715 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1716 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1717
1718 rtx (*base_comparison) (rtx, rtx, rtx);
1719 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1720
1721 switch (GET_CODE (operands[3]))
1722 {
1723 case GE:
1724 case GT:
1725 case LE:
1726 case LT:
1727 case EQ:
1728 if (operands[5] == CONST0_RTX (<MODE>mode))
1729 {
1730 use_zero_form = 1;
1731 break;
1732 }
1733 /* Fall through. */
1734 default:
1735 if (!REG_P (operands[5]))
1736 operands[5] = force_reg (<MODE>mode, operands[5]);
1737 }
1738
1739 switch (GET_CODE (operands[3]))
1740 {
1741 case LT:
1742 case UNLT:
1743 inverse = 1;
1744 /* Fall through. */
1745 case GE:
1746 case UNGE:
1747 case ORDERED:
1748 case UNORDERED:
1749 base_comparison = gen_neon_vcge<mode>;
1750 complimentary_comparison = gen_neon_vcgt<mode>;
1751 break;
1752 case LE:
1753 case UNLE:
1754 inverse = 1;
1755 /* Fall through. */
1756 case GT:
1757 case UNGT:
1758 base_comparison = gen_neon_vcgt<mode>;
1759 complimentary_comparison = gen_neon_vcge<mode>;
1760 break;
1761 case EQ:
1762 case NE:
1763 case UNEQ:
1764 base_comparison = gen_neon_vceq<mode>;
1765 complimentary_comparison = gen_neon_vceq<mode>;
1766 break;
1767 default:
1768 gcc_unreachable ();
1769 }
1770
1771 switch (GET_CODE (operands[3]))
1772 {
1773 case LT:
1774 case LE:
1775 case GT:
1776 case GE:
1777 case EQ:
1778 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1779 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1780 a GE b -> a GE b
1781 a GT b -> a GT b
1782 a LE b -> b GE a
1783 a LT b -> b GT a
1784 a EQ b -> a EQ b
1785 Note that there also exist direct comparison against 0 forms,
1786 so catch those as a special case. */
1787 if (use_zero_form)
1788 {
1789 inverse = 0;
1790 switch (GET_CODE (operands[3]))
1791 {
1792 case LT:
1793 base_comparison = gen_neon_vclt<mode>;
1794 break;
1795 case LE:
1796 base_comparison = gen_neon_vcle<mode>;
1797 break;
1798 default:
1799 /* Do nothing, other zero form cases already have the correct
1800 base_comparison. */
1801 break;
1802 }
1803 }
1804
1805 if (!inverse)
1806 emit_insn (base_comparison (mask, operands[4], operands[5]));
1807 else
1808 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1809 break;
1810 case UNLT:
1811 case UNLE:
1812 case UNGT:
1813 case UNGE:
1814 case NE:
1815 /* Vector compare returns false for lanes which are unordered, so if we use
1816 the inverse of the comparison we actually want to emit, then
1817 swap the operands to BSL, we will end up with the correct result.
1818 Note that a NE NaN and NaN NE b are true for all a, b.
1819
1820 Our transformations are:
1821 a GE b -> !(b GT a)
1822 a GT b -> !(b GE a)
1823 a LE b -> !(a GT b)
1824 a LT b -> !(a GE b)
1825 a NE b -> !(a EQ b) */
1826
1827 if (inverse)
1828 emit_insn (base_comparison (mask, operands[4], operands[5]));
1829 else
1830 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1831
1832 swap_bsl_operands = 1;
1833 break;
1834 case UNEQ:
1835 /* We check (a > b || b > a). combining these comparisons give us
1836 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1837 will then give us (a == b || a UNORDERED b) as intended. */
1838
1839 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1840 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1841 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1842 swap_bsl_operands = 1;
1843 break;
1844 case UNORDERED:
1845 /* Operands are ORDERED iff (a > b || b >= a).
1846 Swapping the operands to BSL will give the UNORDERED case. */
1847 swap_bsl_operands = 1;
1848 /* Fall through. */
1849 case ORDERED:
1850 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1851 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1852 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1853 break;
1854 default:
1855 gcc_unreachable ();
1856 }
1857
1858 if (swap_bsl_operands)
1859 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1860 operands[1]));
1861 else
1862 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1863 operands[2]));
1864 DONE;
1865 })
1866
1867 (define_expand "vcondu<mode><mode>"
1868 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1869 (if_then_else:VDQIW
1870 (match_operator 3 "arm_comparison_operator"
1871 [(match_operand:VDQIW 4 "s_register_operand" "")
1872 (match_operand:VDQIW 5 "s_register_operand" "")])
1873 (match_operand:VDQIW 1 "s_register_operand" "")
1874 (match_operand:VDQIW 2 "s_register_operand" "")))]
1875 "TARGET_NEON"
1876 {
1877 rtx mask;
1878 int inverse = 0, immediate_zero = 0;
1879
1880 mask = gen_reg_rtx (<V_cmp_result>mode);
1881
1882 if (operands[5] == CONST0_RTX (<MODE>mode))
1883 immediate_zero = 1;
1884 else if (!REG_P (operands[5]))
1885 operands[5] = force_reg (<MODE>mode, operands[5]);
1886
1887 switch (GET_CODE (operands[3]))
1888 {
1889 case GEU:
1890 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1891 break;
1892
1893 case GTU:
1894 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1895 break;
1896
1897 case EQ:
1898 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1899 break;
1900
1901 case LEU:
1902 if (immediate_zero)
1903 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1904 else
1905 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1906 break;
1907
1908 case LTU:
1909 if (immediate_zero)
1910 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1911 else
1912 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1913 break;
1914
1915 case NE:
1916 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1917 inverse = 1;
1918 break;
1919
1920 default:
1921 gcc_unreachable ();
1922 }
1923
1924 if (inverse)
1925 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1926 operands[1]));
1927 else
1928 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1929 operands[2]));
1930
1931 DONE;
1932 })
1933
1934 ;; Patterns for builtins.
1935
1936 ; good for plain vadd, vaddq.
1937
1938 (define_expand "neon_vadd<mode>"
1939 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1940 (match_operand:VCVTF 1 "s_register_operand" "w")
1941 (match_operand:VCVTF 2 "s_register_operand" "w")]
1942 "TARGET_NEON"
1943 {
1944 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1945 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1946 else
1947 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1948 operands[2]));
1949 DONE;
1950 })
1951
1952 ; Note that NEON operations don't support the full IEEE 754 standard: in
1953 ; particular, denormal values are flushed to zero. This means that GCC cannot
1954 ; use those instructions for autovectorization, etc. unless
1955 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1956 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1957 ; header) must work in either case: if -funsafe-math-optimizations is given,
1958 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1959 ; expand to unspecs (which may potentially limit the extent to which they might
1960 ; be optimized by generic code).
1961
1962 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1963
1964 (define_insn "neon_vadd<mode>_unspec"
1965 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1966 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1967 (match_operand:VCVTF 2 "s_register_operand" "w")]
1968 UNSPEC_VADD))]
1969 "TARGET_NEON"
1970 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1971 [(set (attr "type")
1972 (if_then_else (match_test "<Is_float_mode>")
1973 (const_string "neon_fp_addsub_s<q>")
1974 (const_string "neon_add<q>")))]
1975 )
1976
1977 (define_insn "neon_vaddl<sup><mode>"
1978 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1979 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1980 (match_operand:VDI 2 "s_register_operand" "w")]
1981 VADDL))]
1982 "TARGET_NEON"
1983 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1984 [(set_attr "type" "neon_add_long")]
1985 )
1986
1987 (define_insn "neon_vaddw<sup><mode>"
1988 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1989 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1990 (match_operand:VDI 2 "s_register_operand" "w")]
1991 VADDW))]
1992 "TARGET_NEON"
1993 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1994 [(set_attr "type" "neon_add_widen")]
1995 )
1996
1997 ; vhadd and vrhadd.
1998
1999 (define_insn "neon_v<r>hadd<sup><mode>"
2000 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2001 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2002 (match_operand:VDQIW 2 "s_register_operand" "w")]
2003 VHADD))]
2004 "TARGET_NEON"
2005 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2006 [(set_attr "type" "neon_add_halve_q")]
2007 )
2008
2009 (define_insn "neon_vqadd<sup><mode>"
2010 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2011 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2012 (match_operand:VDQIX 2 "s_register_operand" "w")]
2013 VQADD))]
2014 "TARGET_NEON"
2015 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2016 [(set_attr "type" "neon_qadd<q>")]
2017 )
2018
2019 (define_insn "neon_v<r>addhn<mode>"
2020 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2021 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2022 (match_operand:VN 2 "s_register_operand" "w")]
2023 VADDHN))]
2024 "TARGET_NEON"
2025 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2026 [(set_attr "type" "neon_add_halve_narrow_q")]
2027 )
2028
2029 ;; Polynomial and Float multiplication.
2030 (define_insn "neon_vmul<pf><mode>"
2031 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2032 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2033 (match_operand:VPF 2 "s_register_operand" "w")]
2034 UNSPEC_VMUL))]
2035 "TARGET_NEON"
2036 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2037 [(set (attr "type")
2038 (if_then_else (match_test "<Is_float_mode>")
2039 (const_string "neon_fp_mul_s<q>")
2040 (const_string "neon_mul_<V_elem_ch><q>")))]
2041 )
2042
2043 (define_expand "neon_vmla<mode>"
2044 [(match_operand:VDQW 0 "s_register_operand" "=w")
2045 (match_operand:VDQW 1 "s_register_operand" "0")
2046 (match_operand:VDQW 2 "s_register_operand" "w")
2047 (match_operand:VDQW 3 "s_register_operand" "w")]
2048 "TARGET_NEON"
2049 {
2050 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2051 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2052 operands[2], operands[3]));
2053 else
2054 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2055 operands[2], operands[3]));
2056 DONE;
2057 })
2058
2059 (define_expand "neon_vfma<VCVTF:mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand")
2061 (match_operand:VCVTF 1 "s_register_operand")
2062 (match_operand:VCVTF 2 "s_register_operand")
2063 (match_operand:VCVTF 3 "s_register_operand")]
2064 "TARGET_NEON && TARGET_FMA"
2065 {
2066 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2067 operands[1]));
2068 DONE;
2069 })
2070
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand")
2073 (match_operand:VCVTF 1 "s_register_operand")
2074 (match_operand:VCVTF 2 "s_register_operand")
2075 (match_operand:VCVTF 3 "s_register_operand")]
2076 "TARGET_NEON && TARGET_FMA"
2077 {
2078 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2079 operands[1]));
2080 DONE;
2081 })
2082
2083 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2084
2085 (define_insn "neon_vmla<mode>_unspec"
2086 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2087 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2088 (match_operand:VDQW 2 "s_register_operand" "w")
2089 (match_operand:VDQW 3 "s_register_operand" "w")]
2090 UNSPEC_VMLA))]
2091 "TARGET_NEON"
2092 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2093 [(set (attr "type")
2094 (if_then_else (match_test "<Is_float_mode>")
2095 (const_string "neon_fp_mla_s<q>")
2096 (const_string "neon_mla_<V_elem_ch><q>")))]
2097 )
2098
2099 (define_insn "neon_vmlal<sup><mode>"
2100 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2101 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2102 (match_operand:VW 2 "s_register_operand" "w")
2103 (match_operand:VW 3 "s_register_operand" "w")]
2104 VMLAL))]
2105 "TARGET_NEON"
2106 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2107 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2108 )
2109
2110 (define_expand "neon_vmls<mode>"
2111 [(match_operand:VDQW 0 "s_register_operand" "=w")
2112 (match_operand:VDQW 1 "s_register_operand" "0")
2113 (match_operand:VDQW 2 "s_register_operand" "w")
2114 (match_operand:VDQW 3 "s_register_operand" "w")]
2115 "TARGET_NEON"
2116 {
2117 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2118 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2119 operands[1], operands[2], operands[3]));
2120 else
2121 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2122 operands[2], operands[3]));
2123 DONE;
2124 })
2125
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2127
2128 (define_insn "neon_vmls<mode>_unspec"
2129 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2131 (match_operand:VDQW 2 "s_register_operand" "w")
2132 (match_operand:VDQW 3 "s_register_operand" "w")]
2133 UNSPEC_VMLS))]
2134 "TARGET_NEON"
2135 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2136 [(set (attr "type")
2137 (if_then_else (match_test "<Is_float_mode>")
2138 (const_string "neon_fp_mla_s<q>")
2139 (const_string "neon_mla_<V_elem_ch><q>")))]
2140 )
2141
2142 (define_insn "neon_vmlsl<sup><mode>"
2143 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2144 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2145 (match_operand:VW 2 "s_register_operand" "w")
2146 (match_operand:VW 3 "s_register_operand" "w")]
2147 VMLSL))]
2148 "TARGET_NEON"
2149 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2150 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2151 )
2152
2153 ;; vqdmulh, vqrdmulh
2154 (define_insn "neon_vq<r>dmulh<mode>"
2155 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2156 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2157 (match_operand:VMDQI 2 "s_register_operand" "w")]
2158 VQDMULH))]
2159 "TARGET_NEON"
2160 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2161 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2162 )
2163
2164 ;; vqrdmlah, vqrdmlsh
2165 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2166 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2167 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2168 (match_operand:VMDQI 2 "s_register_operand" "w")
2169 (match_operand:VMDQI 3 "s_register_operand" "w")]
2170 VQRDMLH_AS))]
2171 "TARGET_NEON_RDMA"
2172 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2173 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2174 )
2175
2176 (define_insn "neon_vqdmlal<mode>"
2177 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2178 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2179 (match_operand:VMDI 2 "s_register_operand" "w")
2180 (match_operand:VMDI 3 "s_register_operand" "w")]
2181 UNSPEC_VQDMLAL))]
2182 "TARGET_NEON"
2183 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2184 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2185 )
2186
2187 (define_insn "neon_vqdmlsl<mode>"
2188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2189 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2190 (match_operand:VMDI 2 "s_register_operand" "w")
2191 (match_operand:VMDI 3 "s_register_operand" "w")]
2192 UNSPEC_VQDMLSL))]
2193 "TARGET_NEON"
2194 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2195 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2196 )
2197
2198 (define_insn "neon_vmull<sup><mode>"
2199 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2200 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2201 (match_operand:VW 2 "s_register_operand" "w")]
2202 VMULL))]
2203 "TARGET_NEON"
2204 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2205 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2206 )
2207
2208 (define_insn "neon_vqdmull<mode>"
2209 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2210 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2211 (match_operand:VMDI 2 "s_register_operand" "w")]
2212 UNSPEC_VQDMULL))]
2213 "TARGET_NEON"
2214 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2215 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2216 )
2217
2218 (define_expand "neon_vsub<mode>"
2219 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2220 (match_operand:VCVTF 1 "s_register_operand" "w")
2221 (match_operand:VCVTF 2 "s_register_operand" "w")]
2222 "TARGET_NEON"
2223 {
2224 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2225 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2226 else
2227 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2228 operands[2]));
2229 DONE;
2230 })
2231
2232 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2233
2234 (define_insn "neon_vsub<mode>_unspec"
2235 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2236 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2237 (match_operand:VCVTF 2 "s_register_operand" "w")]
2238 UNSPEC_VSUB))]
2239 "TARGET_NEON"
2240 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2241 [(set (attr "type")
2242 (if_then_else (match_test "<Is_float_mode>")
2243 (const_string "neon_fp_addsub_s<q>")
2244 (const_string "neon_sub<q>")))]
2245 )
2246
2247 (define_insn "neon_vsubl<sup><mode>"
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2250 (match_operand:VDI 2 "s_register_operand" "w")]
2251 VSUBL))]
2252 "TARGET_NEON"
2253 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2254 [(set_attr "type" "neon_sub_long")]
2255 )
2256
2257 (define_insn "neon_vsubw<sup><mode>"
2258 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2259 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2260 (match_operand:VDI 2 "s_register_operand" "w")]
2261 VSUBW))]
2262 "TARGET_NEON"
2263 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2264 [(set_attr "type" "neon_sub_widen")]
2265 )
2266
2267 (define_insn "neon_vqsub<sup><mode>"
2268 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2269 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2270 (match_operand:VDQIX 2 "s_register_operand" "w")]
2271 VQSUB))]
2272 "TARGET_NEON"
2273 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_qsub<q>")]
2275 )
2276
2277 (define_insn "neon_vhsub<sup><mode>"
2278 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2279 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2280 (match_operand:VDQIW 2 "s_register_operand" "w")]
2281 VHSUB))]
2282 "TARGET_NEON"
2283 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_sub_halve<q>")]
2285 )
2286
2287 (define_insn "neon_v<r>subhn<mode>"
2288 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2289 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2290 (match_operand:VN 2 "s_register_operand" "w")]
2291 VSUBHN))]
2292 "TARGET_NEON"
2293 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2294 [(set_attr "type" "neon_sub_halve_narrow_q")]
2295 )
2296
2297 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2298 ;; without unsafe math optimizations.
2299 (define_expand "neon_vc<cmp_op><mode>"
2300 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2301 (neg:<V_cmp_result>
2302 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2303 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2304 "TARGET_NEON"
2305 {
2306 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2307 are enabled. */
2308 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2309 && !flag_unsafe_math_optimizations)
2310 {
2311 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2312 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2313 whereas this expander iterates over the integer modes as well,
2314 but we will never expand to UNSPECs for the integer comparisons. */
2315 switch (<MODE>mode)
2316 {
2317 case V2SFmode:
2318 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2319 operands[1],
2320 operands[2]));
2321 break;
2322 case V4SFmode:
2323 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2324 operands[1],
2325 operands[2]));
2326 break;
2327 default:
2328 gcc_unreachable ();
2329 }
2330 }
2331 else
2332 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2333 operands[1],
2334 operands[2]));
2335 DONE;
2336 }
2337 )
2338
2339 (define_insn "neon_vc<cmp_op><mode>_insn"
2340 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2341 (neg:<V_cmp_result>
2342 (COMPARISONS:<V_cmp_result>
2343 (match_operand:VDQW 1 "s_register_operand" "w,w")
2344 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2345 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2346 && !flag_unsafe_math_optimizations)"
2347 {
2348 char pattern[100];
2349 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2350 " %%<V_reg>1, %s",
2351 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2352 ? "f" : "<cmp_type>",
2353 which_alternative == 0
2354 ? "%<V_reg>2" : "#0");
2355 output_asm_insn (pattern, operands);
2356 return "";
2357 }
2358 [(set (attr "type")
2359 (if_then_else (match_operand 2 "zero_operand")
2360 (const_string "neon_compare_zero<q>")
2361 (const_string "neon_compare<q>")))]
2362 )
2363
2364 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2365 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2366 (unspec:<V_cmp_result>
2367 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2368 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2369 NEON_VCMP))]
2370 "TARGET_NEON"
2371 {
2372 char pattern[100];
2373 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2374 " %%<V_reg>1, %s",
2375 which_alternative == 0
2376 ? "%<V_reg>2" : "#0");
2377 output_asm_insn (pattern, operands);
2378 return "";
2379 }
2380 [(set_attr "type" "neon_fp_compare_s<q>")]
2381 )
2382
2383 (define_insn "neon_vc<cmp_op>u<mode>"
2384 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2385 (neg:<V_cmp_result>
2386 (GTUGEU:<V_cmp_result>
2387 (match_operand:VDQIW 1 "s_register_operand" "w")
2388 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2389 "TARGET_NEON"
2390 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2391 [(set_attr "type" "neon_compare<q>")]
2392 )
2393
2394 (define_expand "neon_vca<cmp_op><mode>"
2395 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2396 (neg:<V_cmp_result>
2397 (GTGE:<V_cmp_result>
2398 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2399 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2400 "TARGET_NEON"
2401 {
2402 if (flag_unsafe_math_optimizations)
2403 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2404 operands[2]));
2405 else
2406 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2407 operands[1],
2408 operands[2]));
2409 DONE;
2410 }
2411 )
2412
2413 (define_insn "neon_vca<cmp_op><mode>_insn"
2414 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2415 (neg:<V_cmp_result>
2416 (GTGE:<V_cmp_result>
2417 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2418 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2419 "TARGET_NEON && flag_unsafe_math_optimizations"
2420 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2421 [(set_attr "type" "neon_fp_compare_s<q>")]
2422 )
2423
2424 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2425 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2426 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2427 (match_operand:VCVTF 2 "s_register_operand" "w")]
2428 NEON_VACMP))]
2429 "TARGET_NEON"
2430 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2431 [(set_attr "type" "neon_fp_compare_s<q>")]
2432 )
2433
2434 (define_insn "neon_vtst<mode>"
2435 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2436 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2437 (match_operand:VDQIW 2 "s_register_operand" "w")]
2438 UNSPEC_VTST))]
2439 "TARGET_NEON"
2440 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2441 [(set_attr "type" "neon_tst<q>")]
2442 )
2443
2444 (define_insn "neon_vabd<sup><mode>"
2445 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2446 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2447 (match_operand:VDQIW 2 "s_register_operand" "w")]
2448 VABD))]
2449 "TARGET_NEON"
2450 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2451 [(set_attr "type" "neon_abd<q>")]
2452 )
2453
2454 (define_insn "neon_vabdf<mode>"
2455 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2456 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2457 (match_operand:VCVTF 2 "s_register_operand" "w")]
2458 UNSPEC_VABD_F))]
2459 "TARGET_NEON"
2460 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2461 [(set_attr "type" "neon_fp_abd_s<q>")]
2462 )
2463
2464 (define_insn "neon_vabdl<sup><mode>"
2465 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2466 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2467 (match_operand:VW 2 "s_register_operand" "w")]
2468 VABDL))]
2469 "TARGET_NEON"
2470 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2471 [(set_attr "type" "neon_abd_long")]
2472 )
2473
2474 (define_insn "neon_vaba<sup><mode>"
2475 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2476 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2477 (match_operand:VDQIW 3 "s_register_operand" "w")]
2478 VABD)
2479 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2480 "TARGET_NEON"
2481 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2482 [(set_attr "type" "neon_arith_acc<q>")]
2483 )
2484
2485 (define_insn "neon_vabal<sup><mode>"
2486 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2487 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2488 (match_operand:VW 3 "s_register_operand" "w")]
2489 VABDL)
2490 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2491 "TARGET_NEON"
2492 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2493 [(set_attr "type" "neon_arith_acc<q>")]
2494 )
2495
2496 (define_insn "neon_v<maxmin><sup><mode>"
2497 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2498 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2499 (match_operand:VDQIW 2 "s_register_operand" "w")]
2500 VMAXMIN))]
2501 "TARGET_NEON"
2502 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2503 [(set_attr "type" "neon_minmax<q>")]
2504 )
2505
2506 (define_insn "neon_v<maxmin>f<mode>"
2507 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2508 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2509 (match_operand:VCVTF 2 "s_register_operand" "w")]
2510 VMAXMINF))]
2511 "TARGET_NEON"
2512 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2513 [(set_attr "type" "neon_fp_minmax_s<q>")]
2514 )
2515
2516 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2517 (define_insn "<fmaxmin><mode>3"
2518 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2519 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2520 (match_operand:VCVTF 2 "s_register_operand" "w")]
2521 VMAXMINFNM))]
2522 "TARGET_NEON && TARGET_FPU_ARMV8"
2523 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2524 [(set_attr "type" "neon_fp_minmax_s<q>")]
2525 )
2526
2527 (define_expand "neon_vpadd<mode>"
2528 [(match_operand:VD 0 "s_register_operand" "=w")
2529 (match_operand:VD 1 "s_register_operand" "w")
2530 (match_operand:VD 2 "s_register_operand" "w")]
2531 "TARGET_NEON"
2532 {
2533 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2534 operands[2]));
2535 DONE;
2536 })
2537
2538 (define_insn "neon_vpaddl<sup><mode>"
2539 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2540 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2541 VPADDL))]
2542 "TARGET_NEON"
2543 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2544 [(set_attr "type" "neon_reduc_add_long")]
2545 )
2546
2547 (define_insn "neon_vpadal<sup><mode>"
2548 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2549 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2550 (match_operand:VDQIW 2 "s_register_operand" "w")]
2551 VPADAL))]
2552 "TARGET_NEON"
2553 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2554 [(set_attr "type" "neon_reduc_add_acc")]
2555 )
2556
2557 (define_insn "neon_vp<maxmin><sup><mode>"
2558 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2559 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2560 (match_operand:VDI 2 "s_register_operand" "w")]
2561 VPMAXMIN))]
2562 "TARGET_NEON"
2563 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2564 [(set_attr "type" "neon_reduc_minmax<q>")]
2565 )
2566
2567 (define_insn "neon_vp<maxmin>f<mode>"
2568 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2569 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2570 (match_operand:VCVTF 2 "s_register_operand" "w")]
2571 VPMAXMINF))]
2572 "TARGET_NEON"
2573 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2574 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2575 )
2576
2577 (define_insn "neon_vrecps<mode>"
2578 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2580 (match_operand:VCVTF 2 "s_register_operand" "w")]
2581 UNSPEC_VRECPS))]
2582 "TARGET_NEON"
2583 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2584 [(set_attr "type" "neon_fp_recps_s<q>")]
2585 )
2586
2587 (define_insn "neon_vrsqrts<mode>"
2588 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2589 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2590 (match_operand:VCVTF 2 "s_register_operand" "w")]
2591 UNSPEC_VRSQRTS))]
2592 "TARGET_NEON"
2593 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2594 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2595 )
2596
2597 (define_expand "neon_vabs<mode>"
2598 [(match_operand:VDQW 0 "s_register_operand" "")
2599 (match_operand:VDQW 1 "s_register_operand" "")]
2600 "TARGET_NEON"
2601 {
2602 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2603 DONE;
2604 })
2605
2606 (define_insn "neon_vqabs<mode>"
2607 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2608 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2609 UNSPEC_VQABS))]
2610 "TARGET_NEON"
2611 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2612 [(set_attr "type" "neon_qabs<q>")]
2613 )
2614
2615 (define_insn "neon_bswap<mode>"
2616 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2617 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2618 "TARGET_NEON"
2619 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2620 [(set_attr "type" "neon_rev<q>")]
2621 )
2622
2623 (define_expand "neon_vneg<mode>"
2624 [(match_operand:VDQW 0 "s_register_operand" "")
2625 (match_operand:VDQW 1 "s_register_operand" "")]
2626 "TARGET_NEON"
2627 {
2628 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2629 DONE;
2630 })
2631
2632 (define_expand "neon_copysignf<mode>"
2633 [(match_operand:VCVTF 0 "register_operand")
2634 (match_operand:VCVTF 1 "register_operand")
2635 (match_operand:VCVTF 2 "register_operand")]
2636 "TARGET_NEON"
2637 "{
2638 rtx v_bitmask_cast;
2639 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2640 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2641 rtvec v = rtvec_alloc (n_elt);
2642
2643 /* Create bitmask for vector select. */
2644 for (i = 0; i < n_elt; ++i)
2645 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2646
2647 emit_move_insn (v_bitmask,
2648 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2649 emit_move_insn (operands[0], operands[2]);
2650 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2651 <VCVTF:V_cmp_result>mode, 0);
2652 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2653 operands[1]));
2654
2655 DONE;
2656 }"
2657 )
2658
2659 (define_insn "neon_vqneg<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2662 UNSPEC_VQNEG))]
2663 "TARGET_NEON"
2664 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2665 [(set_attr "type" "neon_qneg<q>")]
2666 )
2667
2668 (define_insn "neon_vcls<mode>"
2669 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2670 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2671 UNSPEC_VCLS))]
2672 "TARGET_NEON"
2673 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2674 [(set_attr "type" "neon_cls<q>")]
2675 )
2676
2677 (define_insn "clz<mode>2"
2678 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2679 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2680 "TARGET_NEON"
2681 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2682 [(set_attr "type" "neon_cnt<q>")]
2683 )
2684
2685 (define_expand "neon_vclz<mode>"
2686 [(match_operand:VDQIW 0 "s_register_operand" "")
2687 (match_operand:VDQIW 1 "s_register_operand" "")]
2688 "TARGET_NEON"
2689 {
2690 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2691 DONE;
2692 })
2693
2694 (define_insn "popcount<mode>2"
2695 [(set (match_operand:VE 0 "s_register_operand" "=w")
2696 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2697 "TARGET_NEON"
2698 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2699 [(set_attr "type" "neon_cnt<q>")]
2700 )
2701
2702 (define_expand "neon_vcnt<mode>"
2703 [(match_operand:VE 0 "s_register_operand" "=w")
2704 (match_operand:VE 1 "s_register_operand" "w")]
2705 "TARGET_NEON"
2706 {
2707 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2708 DONE;
2709 })
2710
2711 (define_insn "neon_vrecpe<mode>"
2712 [(set (match_operand:V32 0 "s_register_operand" "=w")
2713 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2714 UNSPEC_VRECPE))]
2715 "TARGET_NEON"
2716 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2717 [(set_attr "type" "neon_fp_recpe_s<q>")]
2718 )
2719
2720 (define_insn "neon_vrsqrte<mode>"
2721 [(set (match_operand:V32 0 "s_register_operand" "=w")
2722 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2723 UNSPEC_VRSQRTE))]
2724 "TARGET_NEON"
2725 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2726 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2727 )
2728
2729 (define_expand "neon_vmvn<mode>"
2730 [(match_operand:VDQIW 0 "s_register_operand" "")
2731 (match_operand:VDQIW 1 "s_register_operand" "")]
2732 "TARGET_NEON"
2733 {
2734 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2735 DONE;
2736 })
2737
2738 (define_insn "neon_vget_lane<mode>_sext_internal"
2739 [(set (match_operand:SI 0 "s_register_operand" "=r")
2740 (sign_extend:SI
2741 (vec_select:<V_elem>
2742 (match_operand:VD 1 "s_register_operand" "w")
2743 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2744 "TARGET_NEON"
2745 {
2746 if (BYTES_BIG_ENDIAN)
2747 {
2748 int elt = INTVAL (operands[2]);
2749 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2750 operands[2] = GEN_INT (elt);
2751 }
2752 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2753 }
2754 [(set_attr "type" "neon_to_gp")]
2755 )
2756
2757 (define_insn "neon_vget_lane<mode>_zext_internal"
2758 [(set (match_operand:SI 0 "s_register_operand" "=r")
2759 (zero_extend:SI
2760 (vec_select:<V_elem>
2761 (match_operand:VD 1 "s_register_operand" "w")
2762 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2763 "TARGET_NEON"
2764 {
2765 if (BYTES_BIG_ENDIAN)
2766 {
2767 int elt = INTVAL (operands[2]);
2768 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2769 operands[2] = GEN_INT (elt);
2770 }
2771 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2772 }
2773 [(set_attr "type" "neon_to_gp")]
2774 )
2775
2776 (define_insn "neon_vget_lane<mode>_sext_internal"
2777 [(set (match_operand:SI 0 "s_register_operand" "=r")
2778 (sign_extend:SI
2779 (vec_select:<V_elem>
2780 (match_operand:VQ2 1 "s_register_operand" "w")
2781 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2782 "TARGET_NEON"
2783 {
2784 rtx ops[3];
2785 int regno = REGNO (operands[1]);
2786 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2787 unsigned int elt = INTVAL (operands[2]);
2788 unsigned int elt_adj = elt % halfelts;
2789
2790 if (BYTES_BIG_ENDIAN)
2791 elt_adj = halfelts - 1 - elt_adj;
2792
2793 ops[0] = operands[0];
2794 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2795 ops[2] = GEN_INT (elt_adj);
2796 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2797
2798 return "";
2799 }
2800 [(set_attr "type" "neon_to_gp_q")]
2801 )
2802
2803 (define_insn "neon_vget_lane<mode>_zext_internal"
2804 [(set (match_operand:SI 0 "s_register_operand" "=r")
2805 (zero_extend:SI
2806 (vec_select:<V_elem>
2807 (match_operand:VQ2 1 "s_register_operand" "w")
2808 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2809 "TARGET_NEON"
2810 {
2811 rtx ops[3];
2812 int regno = REGNO (operands[1]);
2813 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2814 unsigned int elt = INTVAL (operands[2]);
2815 unsigned int elt_adj = elt % halfelts;
2816
2817 if (BYTES_BIG_ENDIAN)
2818 elt_adj = halfelts - 1 - elt_adj;
2819
2820 ops[0] = operands[0];
2821 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2822 ops[2] = GEN_INT (elt_adj);
2823 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2824
2825 return "";
2826 }
2827 [(set_attr "type" "neon_to_gp_q")]
2828 )
2829
2830 (define_expand "neon_vget_lane<mode>"
2831 [(match_operand:<V_ext> 0 "s_register_operand" "")
2832 (match_operand:VDQW 1 "s_register_operand" "")
2833 (match_operand:SI 2 "immediate_operand" "")]
2834 "TARGET_NEON"
2835 {
2836 if (BYTES_BIG_ENDIAN)
2837 {
2838 /* The intrinsics are defined in terms of a model where the
2839 element ordering in memory is vldm order, whereas the generic
2840 RTL is defined in terms of a model where the element ordering
2841 in memory is array order. Convert the lane number to conform
2842 to this model. */
2843 unsigned int elt = INTVAL (operands[2]);
2844 unsigned int reg_nelts
2845 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2846 elt ^= reg_nelts - 1;
2847 operands[2] = GEN_INT (elt);
2848 }
2849
2850 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2851 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2852 else
2853 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2854 operands[1],
2855 operands[2]));
2856 DONE;
2857 })
2858
2859 (define_expand "neon_vget_laneu<mode>"
2860 [(match_operand:<V_ext> 0 "s_register_operand" "")
2861 (match_operand:VDQIW 1 "s_register_operand" "")
2862 (match_operand:SI 2 "immediate_operand" "")]
2863 "TARGET_NEON"
2864 {
2865 if (BYTES_BIG_ENDIAN)
2866 {
2867 /* The intrinsics are defined in terms of a model where the
2868 element ordering in memory is vldm order, whereas the generic
2869 RTL is defined in terms of a model where the element ordering
2870 in memory is array order. Convert the lane number to conform
2871 to this model. */
2872 unsigned int elt = INTVAL (operands[2]);
2873 unsigned int reg_nelts
2874 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2875 elt ^= reg_nelts - 1;
2876 operands[2] = GEN_INT (elt);
2877 }
2878
2879 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2880 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2881 else
2882 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2883 operands[1],
2884 operands[2]));
2885 DONE;
2886 })
2887
2888 (define_expand "neon_vget_lanedi"
2889 [(match_operand:DI 0 "s_register_operand" "=r")
2890 (match_operand:DI 1 "s_register_operand" "w")
2891 (match_operand:SI 2 "immediate_operand" "")]
2892 "TARGET_NEON"
2893 {
2894 emit_move_insn (operands[0], operands[1]);
2895 DONE;
2896 })
2897
2898 (define_expand "neon_vget_lanev2di"
2899 [(match_operand:DI 0 "s_register_operand" "")
2900 (match_operand:V2DI 1 "s_register_operand" "")
2901 (match_operand:SI 2 "immediate_operand" "")]
2902 "TARGET_NEON"
2903 {
2904 int lane;
2905
2906 if (BYTES_BIG_ENDIAN)
2907 {
2908 /* The intrinsics are defined in terms of a model where the
2909 element ordering in memory is vldm order, whereas the generic
2910 RTL is defined in terms of a model where the element ordering
2911 in memory is array order. Convert the lane number to conform
2912 to this model. */
2913 unsigned int elt = INTVAL (operands[2]);
2914 unsigned int reg_nelts = 2;
2915 elt ^= reg_nelts - 1;
2916 operands[2] = GEN_INT (elt);
2917 }
2918
2919 lane = INTVAL (operands[2]);
2920 gcc_assert ((lane ==0) || (lane == 1));
2921 emit_move_insn (operands[0], lane == 0
2922 ? gen_lowpart (DImode, operands[1])
2923 : gen_highpart (DImode, operands[1]));
2924 DONE;
2925 })
2926
2927 (define_expand "neon_vset_lane<mode>"
2928 [(match_operand:VDQ 0 "s_register_operand" "=w")
2929 (match_operand:<V_elem> 1 "s_register_operand" "r")
2930 (match_operand:VDQ 2 "s_register_operand" "0")
2931 (match_operand:SI 3 "immediate_operand" "i")]
2932 "TARGET_NEON"
2933 {
2934 unsigned int elt = INTVAL (operands[3]);
2935
2936 if (BYTES_BIG_ENDIAN)
2937 {
2938 unsigned int reg_nelts
2939 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2940 elt ^= reg_nelts - 1;
2941 }
2942
2943 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2944 GEN_INT (1 << elt), operands[2]));
2945 DONE;
2946 })
2947
2948 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2949
2950 (define_expand "neon_vset_lanedi"
2951 [(match_operand:DI 0 "s_register_operand" "=w")
2952 (match_operand:DI 1 "s_register_operand" "r")
2953 (match_operand:DI 2 "s_register_operand" "0")
2954 (match_operand:SI 3 "immediate_operand" "i")]
2955 "TARGET_NEON"
2956 {
2957 emit_move_insn (operands[0], operands[1]);
2958 DONE;
2959 })
2960
2961 (define_expand "neon_vcreate<mode>"
2962 [(match_operand:VD_RE 0 "s_register_operand" "")
2963 (match_operand:DI 1 "general_operand" "")]
2964 "TARGET_NEON"
2965 {
2966 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2967 emit_move_insn (operands[0], src);
2968 DONE;
2969 })
2970
2971 (define_insn "neon_vdup_n<mode>"
2972 [(set (match_operand:VX 0 "s_register_operand" "=w")
2973 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2974 "TARGET_NEON"
2975 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2976 [(set_attr "type" "neon_from_gp<q>")]
2977 )
2978
2979 (define_insn "neon_vdup_nv4hf"
2980 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2981 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
2982 "TARGET_NEON"
2983 "vdup.16\t%P0, %1"
2984 [(set_attr "type" "neon_from_gp")]
2985 )
2986
2987 (define_insn "neon_vdup_nv8hf"
2988 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
2989 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
2990 "TARGET_NEON"
2991 "vdup.16\t%q0, %1"
2992 [(set_attr "type" "neon_from_gp_q")]
2993 )
2994
2995 (define_insn "neon_vdup_n<mode>"
2996 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2997 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2998 "TARGET_NEON"
2999 "@
3000 vdup.<V_sz_elem>\t%<V_reg>0, %1
3001 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3002 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3003 )
3004
3005 (define_expand "neon_vdup_ndi"
3006 [(match_operand:DI 0 "s_register_operand" "=w")
3007 (match_operand:DI 1 "s_register_operand" "r")]
3008 "TARGET_NEON"
3009 {
3010 emit_move_insn (operands[0], operands[1]);
3011 DONE;
3012 }
3013 )
3014
3015 (define_insn "neon_vdup_nv2di"
3016 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3017 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3018 "TARGET_NEON"
3019 "@
3020 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3021 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3022 [(set_attr "length" "8")
3023 (set_attr "type" "multiple")]
3024 )
3025
3026 (define_insn "neon_vdup_lane<mode>_internal"
3027 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3028 (vec_duplicate:VDQW
3029 (vec_select:<V_elem>
3030 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3031 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3032 "TARGET_NEON"
3033 {
3034 if (BYTES_BIG_ENDIAN)
3035 {
3036 int elt = INTVAL (operands[2]);
3037 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3038 operands[2] = GEN_INT (elt);
3039 }
3040 if (<Is_d_reg>)
3041 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3042 else
3043 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3044 }
3045 [(set_attr "type" "neon_dup<q>")]
3046 )
3047
3048 (define_expand "neon_vdup_lane<mode>"
3049 [(match_operand:VDQW 0 "s_register_operand" "=w")
3050 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3051 (match_operand:SI 2 "immediate_operand" "i")]
3052 "TARGET_NEON"
3053 {
3054 if (BYTES_BIG_ENDIAN)
3055 {
3056 unsigned int elt = INTVAL (operands[2]);
3057 unsigned int reg_nelts
3058 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3059 elt ^= reg_nelts - 1;
3060 operands[2] = GEN_INT (elt);
3061 }
3062 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3063 operands[2]));
3064 DONE;
3065 })
3066
3067 ; Scalar index is ignored, since only zero is valid here.
3068 (define_expand "neon_vdup_lanedi"
3069 [(match_operand:DI 0 "s_register_operand" "=w")
3070 (match_operand:DI 1 "s_register_operand" "w")
3071 (match_operand:SI 2 "immediate_operand" "i")]
3072 "TARGET_NEON"
3073 {
3074 emit_move_insn (operands[0], operands[1]);
3075 DONE;
3076 })
3077
3078 ; Likewise for v2di, as the DImode second operand has only a single element.
3079 (define_expand "neon_vdup_lanev2di"
3080 [(match_operand:V2DI 0 "s_register_operand" "=w")
3081 (match_operand:DI 1 "s_register_operand" "w")
3082 (match_operand:SI 2 "immediate_operand" "i")]
3083 "TARGET_NEON"
3084 {
3085 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3086 DONE;
3087 })
3088
3089 ; Disabled before reload because we don't want combine doing something silly,
3090 ; but used by the post-reload expansion of neon_vcombine.
3091 (define_insn "*neon_vswp<mode>"
3092 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3093 (match_operand:VDQX 1 "s_register_operand" "+w"))
3094 (set (match_dup 1) (match_dup 0))]
3095 "TARGET_NEON && reload_completed"
3096 "vswp\t%<V_reg>0, %<V_reg>1"
3097 [(set_attr "type" "neon_permute<q>")]
3098 )
3099
3100 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3101 ;; dest vector.
3102 ;; FIXME: A different implementation of this builtin could make it much
3103 ;; more likely that we wouldn't actually need to output anything (we could make
3104 ;; it so that the reg allocator puts things in the right places magically
3105 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3106
3107 (define_insn_and_split "neon_vcombine<mode>"
3108 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3109 (vec_concat:<V_DOUBLE>
3110 (match_operand:VDX 1 "s_register_operand" "w")
3111 (match_operand:VDX 2 "s_register_operand" "w")))]
3112 "TARGET_NEON"
3113 "#"
3114 "&& reload_completed"
3115 [(const_int 0)]
3116 {
3117 neon_split_vcombine (operands);
3118 DONE;
3119 }
3120 [(set_attr "type" "multiple")]
3121 )
3122
3123 (define_expand "neon_vget_high<mode>"
3124 [(match_operand:<V_HALF> 0 "s_register_operand")
3125 (match_operand:VQX 1 "s_register_operand")]
3126 "TARGET_NEON"
3127 {
3128 emit_move_insn (operands[0],
3129 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3130 GET_MODE_SIZE (<V_HALF>mode)));
3131 DONE;
3132 })
3133
3134 (define_expand "neon_vget_low<mode>"
3135 [(match_operand:<V_HALF> 0 "s_register_operand")
3136 (match_operand:VQX 1 "s_register_operand")]
3137 "TARGET_NEON"
3138 {
3139 emit_move_insn (operands[0],
3140 simplify_gen_subreg (<V_HALF>mode, operands[1],
3141 <MODE>mode, 0));
3142 DONE;
3143 })
3144
3145 (define_insn "float<mode><V_cvtto>2"
3146 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3147 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3148 "TARGET_NEON && !flag_rounding_math"
3149 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3150 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3151 )
3152
3153 (define_insn "floatuns<mode><V_cvtto>2"
3154 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3155 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3156 "TARGET_NEON && !flag_rounding_math"
3157 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3158 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3159 )
3160
3161 (define_insn "fix_trunc<mode><V_cvtto>2"
3162 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3163 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3164 "TARGET_NEON"
3165 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3166 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3167 )
3168
3169 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3170 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3171 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3172 "TARGET_NEON"
3173 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3174 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3175 )
3176
3177 (define_insn "neon_vcvt<sup><mode>"
3178 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3179 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3180 VCVT_US))]
3181 "TARGET_NEON"
3182 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3183 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3184 )
3185
3186 (define_insn "neon_vcvt<sup><mode>"
3187 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3188 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3189 VCVT_US))]
3190 "TARGET_NEON"
3191 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3192 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3193 )
3194
3195 (define_insn "neon_vcvtv4sfv4hf"
3196 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3197 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3198 UNSPEC_VCVT))]
3199 "TARGET_NEON && TARGET_FP16"
3200 "vcvt.f32.f16\t%q0, %P1"
3201 [(set_attr "type" "neon_fp_cvt_widen_h")]
3202 )
3203
3204 (define_insn "neon_vcvtv4hfv4sf"
3205 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3206 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3207 UNSPEC_VCVT))]
3208 "TARGET_NEON && TARGET_FP16"
3209 "vcvt.f16.f32\t%P0, %q1"
3210 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3211 )
3212
3213 (define_insn "neon_vcvt<sup>_n<mode>"
3214 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3215 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3216 (match_operand:SI 2 "immediate_operand" "i")]
3217 VCVT_US_N))]
3218 "TARGET_NEON"
3219 {
3220 neon_const_bounds (operands[2], 1, 33);
3221 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3222 }
3223 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3224 )
3225
3226 (define_insn "neon_vcvt<sup>_n<mode>"
3227 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3228 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3229 (match_operand:SI 2 "immediate_operand" "i")]
3230 VCVT_US_N))]
3231 "TARGET_NEON"
3232 {
3233 neon_const_bounds (operands[2], 1, 33);
3234 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3235 }
3236 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3237 )
3238
3239 (define_insn "neon_vmovn<mode>"
3240 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3241 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3242 UNSPEC_VMOVN))]
3243 "TARGET_NEON"
3244 "vmovn.<V_if_elem>\t%P0, %q1"
3245 [(set_attr "type" "neon_shift_imm_narrow_q")]
3246 )
3247
3248 (define_insn "neon_vqmovn<sup><mode>"
3249 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3250 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3251 VQMOVN))]
3252 "TARGET_NEON"
3253 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3254 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3255 )
3256
3257 (define_insn "neon_vqmovun<mode>"
3258 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3259 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3260 UNSPEC_VQMOVUN))]
3261 "TARGET_NEON"
3262 "vqmovun.<V_s_elem>\t%P0, %q1"
3263 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3264 )
3265
3266 (define_insn "neon_vmovl<sup><mode>"
3267 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3268 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3269 VMOVL))]
3270 "TARGET_NEON"
3271 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3272 [(set_attr "type" "neon_shift_imm_long")]
3273 )
3274
3275 (define_insn "neon_vmul_lane<mode>"
3276 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3277 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3278 (match_operand:VMD 2 "s_register_operand"
3279 "<scalar_mul_constraint>")
3280 (match_operand:SI 3 "immediate_operand" "i")]
3281 UNSPEC_VMUL_LANE))]
3282 "TARGET_NEON"
3283 {
3284 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3285 }
3286 [(set (attr "type")
3287 (if_then_else (match_test "<Is_float_mode>")
3288 (const_string "neon_fp_mul_s_scalar<q>")
3289 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3290 )
3291
3292 (define_insn "neon_vmul_lane<mode>"
3293 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3294 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3295 (match_operand:<V_HALF> 2 "s_register_operand"
3296 "<scalar_mul_constraint>")
3297 (match_operand:SI 3 "immediate_operand" "i")]
3298 UNSPEC_VMUL_LANE))]
3299 "TARGET_NEON"
3300 {
3301 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3302 }
3303 [(set (attr "type")
3304 (if_then_else (match_test "<Is_float_mode>")
3305 (const_string "neon_fp_mul_s_scalar<q>")
3306 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3307 )
3308
3309 (define_insn "neon_vmull<sup>_lane<mode>"
3310 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3311 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3312 (match_operand:VMDI 2 "s_register_operand"
3313 "<scalar_mul_constraint>")
3314 (match_operand:SI 3 "immediate_operand" "i")]
3315 VMULL_LANE))]
3316 "TARGET_NEON"
3317 {
3318 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3319 }
3320 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3321 )
3322
3323 (define_insn "neon_vqdmull_lane<mode>"
3324 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3325 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3326 (match_operand:VMDI 2 "s_register_operand"
3327 "<scalar_mul_constraint>")
3328 (match_operand:SI 3 "immediate_operand" "i")]
3329 UNSPEC_VQDMULL_LANE))]
3330 "TARGET_NEON"
3331 {
3332 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3333 }
3334 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3335 )
3336
3337 (define_insn "neon_vq<r>dmulh_lane<mode>"
3338 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3339 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3340 (match_operand:<V_HALF> 2 "s_register_operand"
3341 "<scalar_mul_constraint>")
3342 (match_operand:SI 3 "immediate_operand" "i")]
3343 VQDMULH_LANE))]
3344 "TARGET_NEON"
3345 {
3346 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3347 }
3348 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3349 )
3350
3351 (define_insn "neon_vq<r>dmulh_lane<mode>"
3352 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3353 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3354 (match_operand:VMDI 2 "s_register_operand"
3355 "<scalar_mul_constraint>")
3356 (match_operand:SI 3 "immediate_operand" "i")]
3357 VQDMULH_LANE))]
3358 "TARGET_NEON"
3359 {
3360 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3361 }
3362 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3363 )
3364
3365 ;; vqrdmlah_lane, vqrdmlsh_lane
3366 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3367 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3368 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3369 (match_operand:VMQI 2 "s_register_operand" "w")
3370 (match_operand:<V_HALF> 3 "s_register_operand"
3371 "<scalar_mul_constraint>")
3372 (match_operand:SI 4 "immediate_operand" "i")]
3373 VQRDMLH_AS))]
3374 "TARGET_NEON_RDMA"
3375 {
3376 return
3377 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3378 }
3379 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3380 )
3381
3382 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3383 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3384 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3385 (match_operand:VMDI 2 "s_register_operand" "w")
3386 (match_operand:VMDI 3 "s_register_operand"
3387 "<scalar_mul_constraint>")
3388 (match_operand:SI 4 "immediate_operand" "i")]
3389 VQRDMLH_AS))]
3390 "TARGET_NEON_RDMA"
3391 {
3392 return
3393 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3394 }
3395 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3396 )
3397
3398 (define_insn "neon_vmla_lane<mode>"
3399 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3400 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3401 (match_operand:VMD 2 "s_register_operand" "w")
3402 (match_operand:VMD 3 "s_register_operand"
3403 "<scalar_mul_constraint>")
3404 (match_operand:SI 4 "immediate_operand" "i")]
3405 UNSPEC_VMLA_LANE))]
3406 "TARGET_NEON"
3407 {
3408 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3409 }
3410 [(set (attr "type")
3411 (if_then_else (match_test "<Is_float_mode>")
3412 (const_string "neon_fp_mla_s_scalar<q>")
3413 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3414 )
3415
3416 (define_insn "neon_vmla_lane<mode>"
3417 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3418 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3419 (match_operand:VMQ 2 "s_register_operand" "w")
3420 (match_operand:<V_HALF> 3 "s_register_operand"
3421 "<scalar_mul_constraint>")
3422 (match_operand:SI 4 "immediate_operand" "i")]
3423 UNSPEC_VMLA_LANE))]
3424 "TARGET_NEON"
3425 {
3426 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3427 }
3428 [(set (attr "type")
3429 (if_then_else (match_test "<Is_float_mode>")
3430 (const_string "neon_fp_mla_s_scalar<q>")
3431 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3432 )
3433
3434 (define_insn "neon_vmlal<sup>_lane<mode>"
3435 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3436 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3437 (match_operand:VMDI 2 "s_register_operand" "w")
3438 (match_operand:VMDI 3 "s_register_operand"
3439 "<scalar_mul_constraint>")
3440 (match_operand:SI 4 "immediate_operand" "i")]
3441 VMLAL_LANE))]
3442 "TARGET_NEON"
3443 {
3444 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3445 }
3446 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3447 )
3448
3449 (define_insn "neon_vqdmlal_lane<mode>"
3450 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3451 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3452 (match_operand:VMDI 2 "s_register_operand" "w")
3453 (match_operand:VMDI 3 "s_register_operand"
3454 "<scalar_mul_constraint>")
3455 (match_operand:SI 4 "immediate_operand" "i")]
3456 UNSPEC_VQDMLAL_LANE))]
3457 "TARGET_NEON"
3458 {
3459 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3460 }
3461 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3462 )
3463
3464 (define_insn "neon_vmls_lane<mode>"
3465 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3466 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3467 (match_operand:VMD 2 "s_register_operand" "w")
3468 (match_operand:VMD 3 "s_register_operand"
3469 "<scalar_mul_constraint>")
3470 (match_operand:SI 4 "immediate_operand" "i")]
3471 UNSPEC_VMLS_LANE))]
3472 "TARGET_NEON"
3473 {
3474 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3475 }
3476 [(set (attr "type")
3477 (if_then_else (match_test "<Is_float_mode>")
3478 (const_string "neon_fp_mla_s_scalar<q>")
3479 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3480 )
3481
3482 (define_insn "neon_vmls_lane<mode>"
3483 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3484 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3485 (match_operand:VMQ 2 "s_register_operand" "w")
3486 (match_operand:<V_HALF> 3 "s_register_operand"
3487 "<scalar_mul_constraint>")
3488 (match_operand:SI 4 "immediate_operand" "i")]
3489 UNSPEC_VMLS_LANE))]
3490 "TARGET_NEON"
3491 {
3492 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3493 }
3494 [(set (attr "type")
3495 (if_then_else (match_test "<Is_float_mode>")
3496 (const_string "neon_fp_mla_s_scalar<q>")
3497 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3498 )
3499
3500 (define_insn "neon_vmlsl<sup>_lane<mode>"
3501 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3502 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3503 (match_operand:VMDI 2 "s_register_operand" "w")
3504 (match_operand:VMDI 3 "s_register_operand"
3505 "<scalar_mul_constraint>")
3506 (match_operand:SI 4 "immediate_operand" "i")]
3507 VMLSL_LANE))]
3508 "TARGET_NEON"
3509 {
3510 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3511 }
3512 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3513 )
3514
3515 (define_insn "neon_vqdmlsl_lane<mode>"
3516 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3517 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3518 (match_operand:VMDI 2 "s_register_operand" "w")
3519 (match_operand:VMDI 3 "s_register_operand"
3520 "<scalar_mul_constraint>")
3521 (match_operand:SI 4 "immediate_operand" "i")]
3522 UNSPEC_VQDMLSL_LANE))]
3523 "TARGET_NEON"
3524 {
3525 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3526 }
3527 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3528 )
3529
3530 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3531 ; core register into a temp register, then use a scalar taken from that. This
3532 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3533 ; or extracted from another vector. The latter case it's currently better to
3534 ; use the "_lane" variant, and the former case can probably be implemented
3535 ; using vld1_lane, but that hasn't been done yet.
3536
3537 (define_expand "neon_vmul_n<mode>"
3538 [(match_operand:VMD 0 "s_register_operand" "")
3539 (match_operand:VMD 1 "s_register_operand" "")
3540 (match_operand:<V_elem> 2 "s_register_operand" "")]
3541 "TARGET_NEON"
3542 {
3543 rtx tmp = gen_reg_rtx (<MODE>mode);
3544 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3545 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3546 const0_rtx));
3547 DONE;
3548 })
3549
3550 (define_expand "neon_vmul_n<mode>"
3551 [(match_operand:VMQ 0 "s_register_operand" "")
3552 (match_operand:VMQ 1 "s_register_operand" "")
3553 (match_operand:<V_elem> 2 "s_register_operand" "")]
3554 "TARGET_NEON"
3555 {
3556 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3557 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3558 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3559 const0_rtx));
3560 DONE;
3561 })
3562
3563 (define_expand "neon_vmulls_n<mode>"
3564 [(match_operand:<V_widen> 0 "s_register_operand" "")
3565 (match_operand:VMDI 1 "s_register_operand" "")
3566 (match_operand:<V_elem> 2 "s_register_operand" "")]
3567 "TARGET_NEON"
3568 {
3569 rtx tmp = gen_reg_rtx (<MODE>mode);
3570 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3571 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3572 const0_rtx));
3573 DONE;
3574 })
3575
3576 (define_expand "neon_vmullu_n<mode>"
3577 [(match_operand:<V_widen> 0 "s_register_operand" "")
3578 (match_operand:VMDI 1 "s_register_operand" "")
3579 (match_operand:<V_elem> 2 "s_register_operand" "")]
3580 "TARGET_NEON"
3581 {
3582 rtx tmp = gen_reg_rtx (<MODE>mode);
3583 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3584 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3585 const0_rtx));
3586 DONE;
3587 })
3588
3589 (define_expand "neon_vqdmull_n<mode>"
3590 [(match_operand:<V_widen> 0 "s_register_operand" "")
3591 (match_operand:VMDI 1 "s_register_operand" "")
3592 (match_operand:<V_elem> 2 "s_register_operand" "")]
3593 "TARGET_NEON"
3594 {
3595 rtx tmp = gen_reg_rtx (<MODE>mode);
3596 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3597 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3598 const0_rtx));
3599 DONE;
3600 })
3601
3602 (define_expand "neon_vqdmulh_n<mode>"
3603 [(match_operand:VMDI 0 "s_register_operand" "")
3604 (match_operand:VMDI 1 "s_register_operand" "")
3605 (match_operand:<V_elem> 2 "s_register_operand" "")]
3606 "TARGET_NEON"
3607 {
3608 rtx tmp = gen_reg_rtx (<MODE>mode);
3609 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3610 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3611 const0_rtx));
3612 DONE;
3613 })
3614
3615 (define_expand "neon_vqrdmulh_n<mode>"
3616 [(match_operand:VMDI 0 "s_register_operand" "")
3617 (match_operand:VMDI 1 "s_register_operand" "")
3618 (match_operand:<V_elem> 2 "s_register_operand" "")]
3619 "TARGET_NEON"
3620 {
3621 rtx tmp = gen_reg_rtx (<MODE>mode);
3622 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3623 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3624 const0_rtx));
3625 DONE;
3626 })
3627
3628 (define_expand "neon_vqdmulh_n<mode>"
3629 [(match_operand:VMQI 0 "s_register_operand" "")
3630 (match_operand:VMQI 1 "s_register_operand" "")
3631 (match_operand:<V_elem> 2 "s_register_operand" "")]
3632 "TARGET_NEON"
3633 {
3634 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3635 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3636 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3637 const0_rtx));
3638 DONE;
3639 })
3640
3641 (define_expand "neon_vqrdmulh_n<mode>"
3642 [(match_operand:VMQI 0 "s_register_operand" "")
3643 (match_operand:VMQI 1 "s_register_operand" "")
3644 (match_operand:<V_elem> 2 "s_register_operand" "")]
3645 "TARGET_NEON"
3646 {
3647 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3648 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3649 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3650 const0_rtx));
3651 DONE;
3652 })
3653
3654 (define_expand "neon_vmla_n<mode>"
3655 [(match_operand:VMD 0 "s_register_operand" "")
3656 (match_operand:VMD 1 "s_register_operand" "")
3657 (match_operand:VMD 2 "s_register_operand" "")
3658 (match_operand:<V_elem> 3 "s_register_operand" "")]
3659 "TARGET_NEON"
3660 {
3661 rtx tmp = gen_reg_rtx (<MODE>mode);
3662 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3663 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3664 tmp, const0_rtx));
3665 DONE;
3666 })
3667
3668 (define_expand "neon_vmla_n<mode>"
3669 [(match_operand:VMQ 0 "s_register_operand" "")
3670 (match_operand:VMQ 1 "s_register_operand" "")
3671 (match_operand:VMQ 2 "s_register_operand" "")
3672 (match_operand:<V_elem> 3 "s_register_operand" "")]
3673 "TARGET_NEON"
3674 {
3675 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3676 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3677 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3678 tmp, const0_rtx));
3679 DONE;
3680 })
3681
3682 (define_expand "neon_vmlals_n<mode>"
3683 [(match_operand:<V_widen> 0 "s_register_operand" "")
3684 (match_operand:<V_widen> 1 "s_register_operand" "")
3685 (match_operand:VMDI 2 "s_register_operand" "")
3686 (match_operand:<V_elem> 3 "s_register_operand" "")]
3687 "TARGET_NEON"
3688 {
3689 rtx tmp = gen_reg_rtx (<MODE>mode);
3690 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3691 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3692 tmp, const0_rtx));
3693 DONE;
3694 })
3695
3696 (define_expand "neon_vmlalu_n<mode>"
3697 [(match_operand:<V_widen> 0 "s_register_operand" "")
3698 (match_operand:<V_widen> 1 "s_register_operand" "")
3699 (match_operand:VMDI 2 "s_register_operand" "")
3700 (match_operand:<V_elem> 3 "s_register_operand" "")]
3701 "TARGET_NEON"
3702 {
3703 rtx tmp = gen_reg_rtx (<MODE>mode);
3704 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3705 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3706 tmp, const0_rtx));
3707 DONE;
3708 })
3709
3710 (define_expand "neon_vqdmlal_n<mode>"
3711 [(match_operand:<V_widen> 0 "s_register_operand" "")
3712 (match_operand:<V_widen> 1 "s_register_operand" "")
3713 (match_operand:VMDI 2 "s_register_operand" "")
3714 (match_operand:<V_elem> 3 "s_register_operand" "")]
3715 "TARGET_NEON"
3716 {
3717 rtx tmp = gen_reg_rtx (<MODE>mode);
3718 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3719 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3720 tmp, const0_rtx));
3721 DONE;
3722 })
3723
3724 (define_expand "neon_vmls_n<mode>"
3725 [(match_operand:VMD 0 "s_register_operand" "")
3726 (match_operand:VMD 1 "s_register_operand" "")
3727 (match_operand:VMD 2 "s_register_operand" "")
3728 (match_operand:<V_elem> 3 "s_register_operand" "")]
3729 "TARGET_NEON"
3730 {
3731 rtx tmp = gen_reg_rtx (<MODE>mode);
3732 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3733 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3734 tmp, const0_rtx));
3735 DONE;
3736 })
3737
3738 (define_expand "neon_vmls_n<mode>"
3739 [(match_operand:VMQ 0 "s_register_operand" "")
3740 (match_operand:VMQ 1 "s_register_operand" "")
3741 (match_operand:VMQ 2 "s_register_operand" "")
3742 (match_operand:<V_elem> 3 "s_register_operand" "")]
3743 "TARGET_NEON"
3744 {
3745 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3746 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3747 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3748 tmp, const0_rtx));
3749 DONE;
3750 })
3751
3752 (define_expand "neon_vmlsls_n<mode>"
3753 [(match_operand:<V_widen> 0 "s_register_operand" "")
3754 (match_operand:<V_widen> 1 "s_register_operand" "")
3755 (match_operand:VMDI 2 "s_register_operand" "")
3756 (match_operand:<V_elem> 3 "s_register_operand" "")]
3757 "TARGET_NEON"
3758 {
3759 rtx tmp = gen_reg_rtx (<MODE>mode);
3760 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3761 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3762 tmp, const0_rtx));
3763 DONE;
3764 })
3765
3766 (define_expand "neon_vmlslu_n<mode>"
3767 [(match_operand:<V_widen> 0 "s_register_operand" "")
3768 (match_operand:<V_widen> 1 "s_register_operand" "")
3769 (match_operand:VMDI 2 "s_register_operand" "")
3770 (match_operand:<V_elem> 3 "s_register_operand" "")]
3771 "TARGET_NEON"
3772 {
3773 rtx tmp = gen_reg_rtx (<MODE>mode);
3774 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3775 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3776 tmp, const0_rtx));
3777 DONE;
3778 })
3779
3780 (define_expand "neon_vqdmlsl_n<mode>"
3781 [(match_operand:<V_widen> 0 "s_register_operand" "")
3782 (match_operand:<V_widen> 1 "s_register_operand" "")
3783 (match_operand:VMDI 2 "s_register_operand" "")
3784 (match_operand:<V_elem> 3 "s_register_operand" "")]
3785 "TARGET_NEON"
3786 {
3787 rtx tmp = gen_reg_rtx (<MODE>mode);
3788 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3789 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3790 tmp, const0_rtx));
3791 DONE;
3792 })
3793
3794 (define_insn "neon_vext<mode>"
3795 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3796 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3797 (match_operand:VDQX 2 "s_register_operand" "w")
3798 (match_operand:SI 3 "immediate_operand" "i")]
3799 UNSPEC_VEXT))]
3800 "TARGET_NEON"
3801 {
3802 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3803 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3804 }
3805 [(set_attr "type" "neon_ext<q>")]
3806 )
3807
3808 (define_insn "neon_vrev64<mode>"
3809 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3810 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3811 UNSPEC_VREV64))]
3812 "TARGET_NEON"
3813 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3814 [(set_attr "type" "neon_rev<q>")]
3815 )
3816
3817 (define_insn "neon_vrev32<mode>"
3818 [(set (match_operand:VX 0 "s_register_operand" "=w")
3819 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3820 UNSPEC_VREV32))]
3821 "TARGET_NEON"
3822 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3823 [(set_attr "type" "neon_rev<q>")]
3824 )
3825
3826 (define_insn "neon_vrev16<mode>"
3827 [(set (match_operand:VE 0 "s_register_operand" "=w")
3828 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3829 UNSPEC_VREV16))]
3830 "TARGET_NEON"
3831 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3832 [(set_attr "type" "neon_rev<q>")]
3833 )
3834
3835 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3836 ; allocation. For an intrinsic of form:
3837 ; rD = vbsl_* (rS, rN, rM)
3838 ; We can use any of:
3839 ; vbsl rS, rN, rM (if D = S)
3840 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3841 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3842
3843 (define_insn "neon_vbsl<mode>_internal"
3844 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3845 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3846 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3847 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3848 UNSPEC_VBSL))]
3849 "TARGET_NEON"
3850 "@
3851 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3852 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3853 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3854 [(set_attr "type" "neon_bsl<q>")]
3855 )
3856
3857 (define_expand "neon_vbsl<mode>"
3858 [(set (match_operand:VDQX 0 "s_register_operand" "")
3859 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3860 (match_operand:VDQX 2 "s_register_operand" "")
3861 (match_operand:VDQX 3 "s_register_operand" "")]
3862 UNSPEC_VBSL))]
3863 "TARGET_NEON"
3864 {
3865 /* We can't alias operands together if they have different modes. */
3866 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3867 })
3868
3869 ;; vshl, vrshl
3870 (define_insn "neon_v<shift_op><sup><mode>"
3871 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3872 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3873 (match_operand:VDQIX 2 "s_register_operand" "w")]
3874 VSHL))]
3875 "TARGET_NEON"
3876 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3877 [(set_attr "type" "neon_shift_imm<q>")]
3878 )
3879
3880 ;; vqshl, vqrshl
3881 (define_insn "neon_v<shift_op><sup><mode>"
3882 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3883 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3884 (match_operand:VDQIX 2 "s_register_operand" "w")]
3885 VQSHL))]
3886 "TARGET_NEON"
3887 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3888 [(set_attr "type" "neon_sat_shift_imm<q>")]
3889 )
3890
3891 ;; vshr_n, vrshr_n
3892 (define_insn "neon_v<shift_op><sup>_n<mode>"
3893 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3894 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3895 (match_operand:SI 2 "immediate_operand" "i")]
3896 VSHR_N))]
3897 "TARGET_NEON"
3898 {
3899 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3900 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3901 }
3902 [(set_attr "type" "neon_shift_imm<q>")]
3903 )
3904
3905 ;; vshrn_n, vrshrn_n
3906 (define_insn "neon_v<shift_op>_n<mode>"
3907 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3908 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3909 (match_operand:SI 2 "immediate_operand" "i")]
3910 VSHRN_N))]
3911 "TARGET_NEON"
3912 {
3913 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3914 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3915 }
3916 [(set_attr "type" "neon_shift_imm_narrow_q")]
3917 )
3918
3919 ;; vqshrn_n, vqrshrn_n
3920 (define_insn "neon_v<shift_op><sup>_n<mode>"
3921 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3922 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3923 (match_operand:SI 2 "immediate_operand" "i")]
3924 VQSHRN_N))]
3925 "TARGET_NEON"
3926 {
3927 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3928 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3929 }
3930 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3931 )
3932
3933 ;; vqshrun_n, vqrshrun_n
3934 (define_insn "neon_v<shift_op>_n<mode>"
3935 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3936 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3937 (match_operand:SI 2 "immediate_operand" "i")]
3938 VQSHRUN_N))]
3939 "TARGET_NEON"
3940 {
3941 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3942 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3943 }
3944 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3945 )
3946
3947 (define_insn "neon_vshl_n<mode>"
3948 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3949 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3950 (match_operand:SI 2 "immediate_operand" "i")]
3951 UNSPEC_VSHL_N))]
3952 "TARGET_NEON"
3953 {
3954 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3955 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3956 }
3957 [(set_attr "type" "neon_shift_imm<q>")]
3958 )
3959
3960 (define_insn "neon_vqshl_<sup>_n<mode>"
3961 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3962 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3963 (match_operand:SI 2 "immediate_operand" "i")]
3964 VQSHL_N))]
3965 "TARGET_NEON"
3966 {
3967 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3968 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3969 }
3970 [(set_attr "type" "neon_sat_shift_imm<q>")]
3971 )
3972
3973 (define_insn "neon_vqshlu_n<mode>"
3974 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3975 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3976 (match_operand:SI 2 "immediate_operand" "i")]
3977 UNSPEC_VQSHLU_N))]
3978 "TARGET_NEON"
3979 {
3980 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3981 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3982 }
3983 [(set_attr "type" "neon_sat_shift_imm<q>")]
3984 )
3985
3986 (define_insn "neon_vshll<sup>_n<mode>"
3987 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3988 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3989 (match_operand:SI 2 "immediate_operand" "i")]
3990 VSHLL_N))]
3991 "TARGET_NEON"
3992 {
3993 /* The boundaries are: 0 < imm <= size. */
3994 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3995 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3996 }
3997 [(set_attr "type" "neon_shift_imm_long")]
3998 )
3999
4000 ;; vsra_n, vrsra_n
4001 (define_insn "neon_v<shift_op><sup>_n<mode>"
4002 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4003 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4004 (match_operand:VDQIX 2 "s_register_operand" "w")
4005 (match_operand:SI 3 "immediate_operand" "i")]
4006 VSRA_N))]
4007 "TARGET_NEON"
4008 {
4009 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4010 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4011 }
4012 [(set_attr "type" "neon_shift_acc<q>")]
4013 )
4014
4015 (define_insn "neon_vsri_n<mode>"
4016 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4017 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4018 (match_operand:VDQIX 2 "s_register_operand" "w")
4019 (match_operand:SI 3 "immediate_operand" "i")]
4020 UNSPEC_VSRI))]
4021 "TARGET_NEON"
4022 {
4023 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4024 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4025 }
4026 [(set_attr "type" "neon_shift_reg<q>")]
4027 )
4028
4029 (define_insn "neon_vsli_n<mode>"
4030 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4031 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4032 (match_operand:VDQIX 2 "s_register_operand" "w")
4033 (match_operand:SI 3 "immediate_operand" "i")]
4034 UNSPEC_VSLI))]
4035 "TARGET_NEON"
4036 {
4037 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4038 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4039 }
4040 [(set_attr "type" "neon_shift_reg<q>")]
4041 )
4042
4043 (define_insn "neon_vtbl1v8qi"
4044 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4045 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4046 (match_operand:V8QI 2 "s_register_operand" "w")]
4047 UNSPEC_VTBL))]
4048 "TARGET_NEON"
4049 "vtbl.8\t%P0, {%P1}, %P2"
4050 [(set_attr "type" "neon_tbl1")]
4051 )
4052
4053 (define_insn "neon_vtbl2v8qi"
4054 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4055 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4056 (match_operand:V8QI 2 "s_register_operand" "w")]
4057 UNSPEC_VTBL))]
4058 "TARGET_NEON"
4059 {
4060 rtx ops[4];
4061 int tabbase = REGNO (operands[1]);
4062
4063 ops[0] = operands[0];
4064 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4065 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4066 ops[3] = operands[2];
4067 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4068
4069 return "";
4070 }
4071 [(set_attr "type" "neon_tbl2")]
4072 )
4073
4074 (define_insn "neon_vtbl3v8qi"
4075 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4076 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4077 (match_operand:V8QI 2 "s_register_operand" "w")]
4078 UNSPEC_VTBL))]
4079 "TARGET_NEON"
4080 {
4081 rtx ops[5];
4082 int tabbase = REGNO (operands[1]);
4083
4084 ops[0] = operands[0];
4085 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4086 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4087 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4088 ops[4] = operands[2];
4089 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4090
4091 return "";
4092 }
4093 [(set_attr "type" "neon_tbl3")]
4094 )
4095
4096 (define_insn "neon_vtbl4v8qi"
4097 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4098 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4099 (match_operand:V8QI 2 "s_register_operand" "w")]
4100 UNSPEC_VTBL))]
4101 "TARGET_NEON"
4102 {
4103 rtx ops[6];
4104 int tabbase = REGNO (operands[1]);
4105
4106 ops[0] = operands[0];
4107 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4108 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4109 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4110 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4111 ops[5] = operands[2];
4112 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4113
4114 return "";
4115 }
4116 [(set_attr "type" "neon_tbl4")]
4117 )
4118
4119 ;; These three are used by the vec_perm infrastructure for V16QImode.
4120 (define_insn_and_split "neon_vtbl1v16qi"
4121 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4122 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4123 (match_operand:V16QI 2 "s_register_operand" "w")]
4124 UNSPEC_VTBL))]
4125 "TARGET_NEON"
4126 "#"
4127 "&& reload_completed"
4128 [(const_int 0)]
4129 {
4130 rtx op0, op1, op2, part0, part2;
4131 unsigned ofs;
4132
4133 op0 = operands[0];
4134 op1 = gen_lowpart (TImode, operands[1]);
4135 op2 = operands[2];
4136
4137 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4138 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4139 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4140 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4141
4142 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4143 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4144 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4145 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4146 DONE;
4147 }
4148 [(set_attr "type" "multiple")]
4149 )
4150
4151 (define_insn_and_split "neon_vtbl2v16qi"
4152 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4153 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4154 (match_operand:V16QI 2 "s_register_operand" "w")]
4155 UNSPEC_VTBL))]
4156 "TARGET_NEON"
4157 "#"
4158 "&& reload_completed"
4159 [(const_int 0)]
4160 {
4161 rtx op0, op1, op2, part0, part2;
4162 unsigned ofs;
4163
4164 op0 = operands[0];
4165 op1 = operands[1];
4166 op2 = operands[2];
4167
4168 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4169 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4170 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4171 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4172
4173 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4174 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4175 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4176 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4177 DONE;
4178 }
4179 [(set_attr "type" "multiple")]
4180 )
4181
4182 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4183 ;; handle quad-word input modes, producing octa-word output modes. But
4184 ;; that requires us to add support for octa-word vector modes in moves.
4185 ;; That seems overkill for this one use in vec_perm.
4186 (define_insn_and_split "neon_vcombinev16qi"
4187 [(set (match_operand:OI 0 "s_register_operand" "=w")
4188 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4189 (match_operand:V16QI 2 "s_register_operand" "w")]
4190 UNSPEC_VCONCAT))]
4191 "TARGET_NEON"
4192 "#"
4193 "&& reload_completed"
4194 [(const_int 0)]
4195 {
4196 neon_split_vcombine (operands);
4197 DONE;
4198 }
4199 [(set_attr "type" "multiple")]
4200 )
4201
4202 (define_insn "neon_vtbx1v8qi"
4203 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4204 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4205 (match_operand:V8QI 2 "s_register_operand" "w")
4206 (match_operand:V8QI 3 "s_register_operand" "w")]
4207 UNSPEC_VTBX))]
4208 "TARGET_NEON"
4209 "vtbx.8\t%P0, {%P2}, %P3"
4210 [(set_attr "type" "neon_tbl1")]
4211 )
4212
4213 (define_insn "neon_vtbx2v8qi"
4214 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4215 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4216 (match_operand:TI 2 "s_register_operand" "w")
4217 (match_operand:V8QI 3 "s_register_operand" "w")]
4218 UNSPEC_VTBX))]
4219 "TARGET_NEON"
4220 {
4221 rtx ops[4];
4222 int tabbase = REGNO (operands[2]);
4223
4224 ops[0] = operands[0];
4225 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4226 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4227 ops[3] = operands[3];
4228 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4229
4230 return "";
4231 }
4232 [(set_attr "type" "neon_tbl2")]
4233 )
4234
4235 (define_insn "neon_vtbx3v8qi"
4236 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4237 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4238 (match_operand:EI 2 "s_register_operand" "w")
4239 (match_operand:V8QI 3 "s_register_operand" "w")]
4240 UNSPEC_VTBX))]
4241 "TARGET_NEON"
4242 {
4243 rtx ops[5];
4244 int tabbase = REGNO (operands[2]);
4245
4246 ops[0] = operands[0];
4247 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4248 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4249 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4250 ops[4] = operands[3];
4251 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4252
4253 return "";
4254 }
4255 [(set_attr "type" "neon_tbl3")]
4256 )
4257
4258 (define_insn "neon_vtbx4v8qi"
4259 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4260 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4261 (match_operand:OI 2 "s_register_operand" "w")
4262 (match_operand:V8QI 3 "s_register_operand" "w")]
4263 UNSPEC_VTBX))]
4264 "TARGET_NEON"
4265 {
4266 rtx ops[6];
4267 int tabbase = REGNO (operands[2]);
4268
4269 ops[0] = operands[0];
4270 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4271 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4272 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4273 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4274 ops[5] = operands[3];
4275 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4276
4277 return "";
4278 }
4279 [(set_attr "type" "neon_tbl4")]
4280 )
4281
4282 (define_expand "neon_vtrn<mode>_internal"
4283 [(parallel
4284 [(set (match_operand:VDQW 0 "s_register_operand" "")
4285 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4286 (match_operand:VDQW 2 "s_register_operand" "")]
4287 UNSPEC_VTRN1))
4288 (set (match_operand:VDQW 3 "s_register_operand" "")
4289 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4290 "TARGET_NEON"
4291 ""
4292 )
4293
4294 ;; Note: Different operand numbering to handle tied registers correctly.
4295 (define_insn "*neon_vtrn<mode>_insn"
4296 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4297 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4298 (match_operand:VDQW 3 "s_register_operand" "2")]
4299 UNSPEC_VTRN1))
4300 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4301 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4302 UNSPEC_VTRN2))]
4303 "TARGET_NEON"
4304 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4305 [(set_attr "type" "neon_permute<q>")]
4306 )
4307
4308 (define_expand "neon_vzip<mode>_internal"
4309 [(parallel
4310 [(set (match_operand:VDQW 0 "s_register_operand" "")
4311 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4312 (match_operand:VDQW 2 "s_register_operand" "")]
4313 UNSPEC_VZIP1))
4314 (set (match_operand:VDQW 3 "s_register_operand" "")
4315 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4316 "TARGET_NEON"
4317 ""
4318 )
4319
4320 ;; Note: Different operand numbering to handle tied registers correctly.
4321 (define_insn "*neon_vzip<mode>_insn"
4322 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4323 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4324 (match_operand:VDQW 3 "s_register_operand" "2")]
4325 UNSPEC_VZIP1))
4326 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4327 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4328 UNSPEC_VZIP2))]
4329 "TARGET_NEON"
4330 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4331 [(set_attr "type" "neon_zip<q>")]
4332 )
4333
4334 (define_expand "neon_vuzp<mode>_internal"
4335 [(parallel
4336 [(set (match_operand:VDQW 0 "s_register_operand" "")
4337 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4338 (match_operand:VDQW 2 "s_register_operand" "")]
4339 UNSPEC_VUZP1))
4340 (set (match_operand:VDQW 3 "s_register_operand" "")
4341 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4342 "TARGET_NEON"
4343 ""
4344 )
4345
4346 ;; Note: Different operand numbering to handle tied registers correctly.
4347 (define_insn "*neon_vuzp<mode>_insn"
4348 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4349 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4350 (match_operand:VDQW 3 "s_register_operand" "2")]
4351 UNSPEC_VUZP1))
4352 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4353 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4354 UNSPEC_VUZP2))]
4355 "TARGET_NEON"
4356 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4357 [(set_attr "type" "neon_zip<q>")]
4358 )
4359
4360 (define_expand "vec_load_lanes<mode><mode>"
4361 [(set (match_operand:VDQX 0 "s_register_operand")
4362 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4363 UNSPEC_VLD1))]
4364 "TARGET_NEON")
4365
4366 (define_insn "neon_vld1<mode>"
4367 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4368 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4369 UNSPEC_VLD1))]
4370 "TARGET_NEON"
4371 "vld1.<V_sz_elem>\t%h0, %A1"
4372 [(set_attr "type" "neon_load1_1reg<q>")]
4373 )
4374
4375 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4376 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4377 ;; lane order here.
4378 (define_insn "neon_vld1_lane<mode>"
4379 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4380 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4381 (match_operand:VDX 2 "s_register_operand" "0")
4382 (match_operand:SI 3 "immediate_operand" "i")]
4383 UNSPEC_VLD1_LANE))]
4384 "TARGET_NEON"
4385 {
4386 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4387 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4388 operands[3] = GEN_INT (lane);
4389 if (max == 1)
4390 return "vld1.<V_sz_elem>\t%P0, %A1";
4391 else
4392 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4393 }
4394 [(set_attr "type" "neon_load1_one_lane<q>")]
4395 )
4396
4397 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4398 ;; here on big endian targets.
4399 (define_insn "neon_vld1_lane<mode>"
4400 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4401 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4402 (match_operand:VQX 2 "s_register_operand" "0")
4403 (match_operand:SI 3 "immediate_operand" "i")]
4404 UNSPEC_VLD1_LANE))]
4405 "TARGET_NEON"
4406 {
4407 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4408 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4409 operands[3] = GEN_INT (lane);
4410 int regno = REGNO (operands[0]);
4411 if (lane >= max / 2)
4412 {
4413 lane -= max / 2;
4414 regno += 2;
4415 operands[3] = GEN_INT (lane);
4416 }
4417 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4418 if (max == 2)
4419 return "vld1.<V_sz_elem>\t%P0, %A1";
4420 else
4421 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4422 }
4423 [(set_attr "type" "neon_load1_one_lane<q>")]
4424 )
4425
4426 (define_insn "neon_vld1_dup<mode>"
4427 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4428 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4429 "TARGET_NEON"
4430 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4431 [(set_attr "type" "neon_load1_all_lanes<q>")]
4432 )
4433
4434 ;; Special case for DImode. Treat it exactly like a simple load.
4435 (define_expand "neon_vld1_dupdi"
4436 [(set (match_operand:DI 0 "s_register_operand" "")
4437 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4438 UNSPEC_VLD1))]
4439 "TARGET_NEON"
4440 ""
4441 )
4442
4443 (define_insn "neon_vld1_dup<mode>"
4444 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4445 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4446 "TARGET_NEON"
4447 {
4448 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4449 }
4450 [(set_attr "type" "neon_load1_all_lanes<q>")]
4451 )
4452
4453 (define_insn_and_split "neon_vld1_dupv2di"
4454 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4455 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4456 "TARGET_NEON"
4457 "#"
4458 "&& reload_completed"
4459 [(const_int 0)]
4460 {
4461 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4462 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4463 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4464 DONE;
4465 }
4466 [(set_attr "length" "8")
4467 (set_attr "type" "neon_load1_all_lanes_q")]
4468 )
4469
4470 (define_expand "vec_store_lanes<mode><mode>"
4471 [(set (match_operand:VDQX 0 "neon_struct_operand")
4472 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4473 UNSPEC_VST1))]
4474 "TARGET_NEON")
4475
4476 (define_insn "neon_vst1<mode>"
4477 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4478 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4479 UNSPEC_VST1))]
4480 "TARGET_NEON"
4481 "vst1.<V_sz_elem>\t%h1, %A0"
4482 [(set_attr "type" "neon_store1_1reg<q>")])
4483
4484 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4485 ;; here on big endian targets.
4486 (define_insn "neon_vst1_lane<mode>"
4487 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4488 (unspec:<V_elem>
4489 [(match_operand:VDX 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4491 UNSPEC_VST1_LANE))]
4492 "TARGET_NEON"
4493 {
4494 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4495 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4496 operands[2] = GEN_INT (lane);
4497 if (max == 1)
4498 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4499 else
4500 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4501 }
4502 [(set_attr "type" "neon_store1_one_lane<q>")]
4503 )
4504
4505 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4506 ;; here on big endian targets.
4507 (define_insn "neon_vst1_lane<mode>"
4508 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4509 (unspec:<V_elem>
4510 [(match_operand:VQX 1 "s_register_operand" "w")
4511 (match_operand:SI 2 "immediate_operand" "i")]
4512 UNSPEC_VST1_LANE))]
4513 "TARGET_NEON"
4514 {
4515 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4516 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4517 int regno = REGNO (operands[1]);
4518 if (lane >= max / 2)
4519 {
4520 lane -= max / 2;
4521 regno += 2;
4522 }
4523 operands[2] = GEN_INT (lane);
4524 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4525 if (max == 2)
4526 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4527 else
4528 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4529 }
4530 [(set_attr "type" "neon_store1_one_lane<q>")]
4531 )
4532
4533 (define_expand "vec_load_lanesti<mode>"
4534 [(set (match_operand:TI 0 "s_register_operand")
4535 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4536 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4537 UNSPEC_VLD2))]
4538 "TARGET_NEON")
4539
4540 (define_insn "neon_vld2<mode>"
4541 [(set (match_operand:TI 0 "s_register_operand" "=w")
4542 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544 UNSPEC_VLD2))]
4545 "TARGET_NEON"
4546 {
4547 if (<V_sz_elem> == 64)
4548 return "vld1.64\t%h0, %A1";
4549 else
4550 return "vld2.<V_sz_elem>\t%h0, %A1";
4551 }
4552 [(set (attr "type")
4553 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4554 (const_string "neon_load1_2reg<q>")
4555 (const_string "neon_load2_2reg<q>")))]
4556 )
4557
4558 (define_expand "vec_load_lanesoi<mode>"
4559 [(set (match_operand:OI 0 "s_register_operand")
4560 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4561 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4562 UNSPEC_VLD2))]
4563 "TARGET_NEON")
4564
4565 (define_insn "neon_vld2<mode>"
4566 [(set (match_operand:OI 0 "s_register_operand" "=w")
4567 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4568 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 UNSPEC_VLD2))]
4570 "TARGET_NEON"
4571 "vld2.<V_sz_elem>\t%h0, %A1"
4572 [(set_attr "type" "neon_load2_2reg_q")])
4573
4574 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4575 ;; here on big endian targets.
4576 (define_insn "neon_vld2_lane<mode>"
4577 [(set (match_operand:TI 0 "s_register_operand" "=w")
4578 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4579 (match_operand:TI 2 "s_register_operand" "0")
4580 (match_operand:SI 3 "immediate_operand" "i")
4581 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4582 UNSPEC_VLD2_LANE))]
4583 "TARGET_NEON"
4584 {
4585 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4586 int regno = REGNO (operands[0]);
4587 rtx ops[4];
4588 ops[0] = gen_rtx_REG (DImode, regno);
4589 ops[1] = gen_rtx_REG (DImode, regno + 2);
4590 ops[2] = operands[1];
4591 ops[3] = GEN_INT (lane);
4592 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4593 return "";
4594 }
4595 [(set_attr "type" "neon_load2_one_lane<q>")]
4596 )
4597
4598 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4599 ;; here on big endian targets.
4600 (define_insn "neon_vld2_lane<mode>"
4601 [(set (match_operand:OI 0 "s_register_operand" "=w")
4602 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4603 (match_operand:OI 2 "s_register_operand" "0")
4604 (match_operand:SI 3 "immediate_operand" "i")
4605 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4606 UNSPEC_VLD2_LANE))]
4607 "TARGET_NEON"
4608 {
4609 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4610 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4611 int regno = REGNO (operands[0]);
4612 rtx ops[4];
4613 if (lane >= max / 2)
4614 {
4615 lane -= max / 2;
4616 regno += 2;
4617 }
4618 ops[0] = gen_rtx_REG (DImode, regno);
4619 ops[1] = gen_rtx_REG (DImode, regno + 4);
4620 ops[2] = operands[1];
4621 ops[3] = GEN_INT (lane);
4622 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4623 return "";
4624 }
4625 [(set_attr "type" "neon_load2_one_lane<q>")]
4626 )
4627
4628 (define_insn "neon_vld2_dup<mode>"
4629 [(set (match_operand:TI 0 "s_register_operand" "=w")
4630 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4631 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4632 UNSPEC_VLD2_DUP))]
4633 "TARGET_NEON"
4634 {
4635 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4636 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4637 else
4638 return "vld1.<V_sz_elem>\t%h0, %A1";
4639 }
4640 [(set (attr "type")
4641 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4642 (const_string "neon_load2_all_lanes<q>")
4643 (const_string "neon_load1_1reg<q>")))]
4644 )
4645
4646 (define_expand "vec_store_lanesti<mode>"
4647 [(set (match_operand:TI 0 "neon_struct_operand")
4648 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4649 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4650 UNSPEC_VST2))]
4651 "TARGET_NEON")
4652
4653 (define_insn "neon_vst2<mode>"
4654 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4655 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4656 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4657 UNSPEC_VST2))]
4658 "TARGET_NEON"
4659 {
4660 if (<V_sz_elem> == 64)
4661 return "vst1.64\t%h1, %A0";
4662 else
4663 return "vst2.<V_sz_elem>\t%h1, %A0";
4664 }
4665 [(set (attr "type")
4666 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4667 (const_string "neon_store1_2reg<q>")
4668 (const_string "neon_store2_one_lane<q>")))]
4669 )
4670
4671 (define_expand "vec_store_lanesoi<mode>"
4672 [(set (match_operand:OI 0 "neon_struct_operand")
4673 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4674 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4675 UNSPEC_VST2))]
4676 "TARGET_NEON")
4677
4678 (define_insn "neon_vst2<mode>"
4679 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4680 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4681 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4682 UNSPEC_VST2))]
4683 "TARGET_NEON"
4684 "vst2.<V_sz_elem>\t%h1, %A0"
4685 [(set_attr "type" "neon_store2_4reg<q>")]
4686 )
4687
4688 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4689 ;; here on big endian targets.
4690 (define_insn "neon_vst2_lane<mode>"
4691 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4692 (unspec:<V_two_elem>
4693 [(match_operand:TI 1 "s_register_operand" "w")
4694 (match_operand:SI 2 "immediate_operand" "i")
4695 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4696 UNSPEC_VST2_LANE))]
4697 "TARGET_NEON"
4698 {
4699 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4700 int regno = REGNO (operands[1]);
4701 rtx ops[4];
4702 ops[0] = operands[0];
4703 ops[1] = gen_rtx_REG (DImode, regno);
4704 ops[2] = gen_rtx_REG (DImode, regno + 2);
4705 ops[3] = GEN_INT (lane);
4706 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4707 return "";
4708 }
4709 [(set_attr "type" "neon_store2_one_lane<q>")]
4710 )
4711
4712 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4713 ;; here on big endian targets.
4714 (define_insn "neon_vst2_lane<mode>"
4715 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4716 (unspec:<V_two_elem>
4717 [(match_operand:OI 1 "s_register_operand" "w")
4718 (match_operand:SI 2 "immediate_operand" "i")
4719 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4720 UNSPEC_VST2_LANE))]
4721 "TARGET_NEON"
4722 {
4723 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4724 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4725 int regno = REGNO (operands[1]);
4726 rtx ops[4];
4727 if (lane >= max / 2)
4728 {
4729 lane -= max / 2;
4730 regno += 2;
4731 }
4732 ops[0] = operands[0];
4733 ops[1] = gen_rtx_REG (DImode, regno);
4734 ops[2] = gen_rtx_REG (DImode, regno + 4);
4735 ops[3] = GEN_INT (lane);
4736 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4737 return "";
4738 }
4739 [(set_attr "type" "neon_store2_one_lane<q>")]
4740 )
4741
4742 (define_expand "vec_load_lanesei<mode>"
4743 [(set (match_operand:EI 0 "s_register_operand")
4744 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4745 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4746 UNSPEC_VLD3))]
4747 "TARGET_NEON")
4748
4749 (define_insn "neon_vld3<mode>"
4750 [(set (match_operand:EI 0 "s_register_operand" "=w")
4751 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4752 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4753 UNSPEC_VLD3))]
4754 "TARGET_NEON"
4755 {
4756 if (<V_sz_elem> == 64)
4757 return "vld1.64\t%h0, %A1";
4758 else
4759 return "vld3.<V_sz_elem>\t%h0, %A1";
4760 }
4761 [(set (attr "type")
4762 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4763 (const_string "neon_load1_3reg<q>")
4764 (const_string "neon_load3_3reg<q>")))]
4765 )
4766
4767 (define_expand "vec_load_lanesci<mode>"
4768 [(match_operand:CI 0 "s_register_operand")
4769 (match_operand:CI 1 "neon_struct_operand")
4770 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4771 "TARGET_NEON"
4772 {
4773 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4774 DONE;
4775 })
4776
4777 (define_expand "neon_vld3<mode>"
4778 [(match_operand:CI 0 "s_register_operand")
4779 (match_operand:CI 1 "neon_struct_operand")
4780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4781 "TARGET_NEON"
4782 {
4783 rtx mem;
4784
4785 mem = adjust_address (operands[1], EImode, 0);
4786 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4787 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4788 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4789 DONE;
4790 })
4791
4792 (define_insn "neon_vld3qa<mode>"
4793 [(set (match_operand:CI 0 "s_register_operand" "=w")
4794 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4795 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4796 UNSPEC_VLD3A))]
4797 "TARGET_NEON"
4798 {
4799 int regno = REGNO (operands[0]);
4800 rtx ops[4];
4801 ops[0] = gen_rtx_REG (DImode, regno);
4802 ops[1] = gen_rtx_REG (DImode, regno + 4);
4803 ops[2] = gen_rtx_REG (DImode, regno + 8);
4804 ops[3] = operands[1];
4805 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4806 return "";
4807 }
4808 [(set_attr "type" "neon_load3_3reg<q>")]
4809 )
4810
4811 (define_insn "neon_vld3qb<mode>"
4812 [(set (match_operand:CI 0 "s_register_operand" "=w")
4813 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4814 (match_operand:CI 2 "s_register_operand" "0")
4815 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4816 UNSPEC_VLD3B))]
4817 "TARGET_NEON"
4818 {
4819 int regno = REGNO (operands[0]);
4820 rtx ops[4];
4821 ops[0] = gen_rtx_REG (DImode, regno + 2);
4822 ops[1] = gen_rtx_REG (DImode, regno + 6);
4823 ops[2] = gen_rtx_REG (DImode, regno + 10);
4824 ops[3] = operands[1];
4825 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4826 return "";
4827 }
4828 [(set_attr "type" "neon_load3_3reg<q>")]
4829 )
4830
4831 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4832 ;; here on big endian targets.
4833 (define_insn "neon_vld3_lane<mode>"
4834 [(set (match_operand:EI 0 "s_register_operand" "=w")
4835 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4836 (match_operand:EI 2 "s_register_operand" "0")
4837 (match_operand:SI 3 "immediate_operand" "i")
4838 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4839 UNSPEC_VLD3_LANE))]
4840 "TARGET_NEON"
4841 {
4842 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4843 int regno = REGNO (operands[0]);
4844 rtx ops[5];
4845 ops[0] = gen_rtx_REG (DImode, regno);
4846 ops[1] = gen_rtx_REG (DImode, regno + 2);
4847 ops[2] = gen_rtx_REG (DImode, regno + 4);
4848 ops[3] = operands[1];
4849 ops[4] = GEN_INT (lane);
4850 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4851 ops);
4852 return "";
4853 }
4854 [(set_attr "type" "neon_load3_one_lane<q>")]
4855 )
4856
4857 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4858 ;; here on big endian targets.
4859 (define_insn "neon_vld3_lane<mode>"
4860 [(set (match_operand:CI 0 "s_register_operand" "=w")
4861 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4862 (match_operand:CI 2 "s_register_operand" "0")
4863 (match_operand:SI 3 "immediate_operand" "i")
4864 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4865 UNSPEC_VLD3_LANE))]
4866 "TARGET_NEON"
4867 {
4868 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4869 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4870 int regno = REGNO (operands[0]);
4871 rtx ops[5];
4872 if (lane >= max / 2)
4873 {
4874 lane -= max / 2;
4875 regno += 2;
4876 }
4877 ops[0] = gen_rtx_REG (DImode, regno);
4878 ops[1] = gen_rtx_REG (DImode, regno + 4);
4879 ops[2] = gen_rtx_REG (DImode, regno + 8);
4880 ops[3] = operands[1];
4881 ops[4] = GEN_INT (lane);
4882 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4883 ops);
4884 return "";
4885 }
4886 [(set_attr "type" "neon_load3_one_lane<q>")]
4887 )
4888
4889 (define_insn "neon_vld3_dup<mode>"
4890 [(set (match_operand:EI 0 "s_register_operand" "=w")
4891 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4892 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4893 UNSPEC_VLD3_DUP))]
4894 "TARGET_NEON"
4895 {
4896 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4897 {
4898 int regno = REGNO (operands[0]);
4899 rtx ops[4];
4900 ops[0] = gen_rtx_REG (DImode, regno);
4901 ops[1] = gen_rtx_REG (DImode, regno + 2);
4902 ops[2] = gen_rtx_REG (DImode, regno + 4);
4903 ops[3] = operands[1];
4904 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4905 return "";
4906 }
4907 else
4908 return "vld1.<V_sz_elem>\t%h0, %A1";
4909 }
4910 [(set (attr "type")
4911 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4912 (const_string "neon_load3_all_lanes<q>")
4913 (const_string "neon_load1_1reg<q>")))])
4914
4915 (define_expand "vec_store_lanesei<mode>"
4916 [(set (match_operand:EI 0 "neon_struct_operand")
4917 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4918 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4919 UNSPEC_VST3))]
4920 "TARGET_NEON")
4921
4922 (define_insn "neon_vst3<mode>"
4923 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4924 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4925 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 UNSPEC_VST3))]
4927 "TARGET_NEON"
4928 {
4929 if (<V_sz_elem> == 64)
4930 return "vst1.64\t%h1, %A0";
4931 else
4932 return "vst3.<V_sz_elem>\t%h1, %A0";
4933 }
4934 [(set (attr "type")
4935 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4936 (const_string "neon_store1_3reg<q>")
4937 (const_string "neon_store3_one_lane<q>")))])
4938
4939 (define_expand "vec_store_lanesci<mode>"
4940 [(match_operand:CI 0 "neon_struct_operand")
4941 (match_operand:CI 1 "s_register_operand")
4942 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4943 "TARGET_NEON"
4944 {
4945 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4946 DONE;
4947 })
4948
4949 (define_expand "neon_vst3<mode>"
4950 [(match_operand:CI 0 "neon_struct_operand")
4951 (match_operand:CI 1 "s_register_operand")
4952 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4953 "TARGET_NEON"
4954 {
4955 rtx mem;
4956
4957 mem = adjust_address (operands[0], EImode, 0);
4958 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4959 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4960 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4961 DONE;
4962 })
4963
4964 (define_insn "neon_vst3qa<mode>"
4965 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4966 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4967 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4968 UNSPEC_VST3A))]
4969 "TARGET_NEON"
4970 {
4971 int regno = REGNO (operands[1]);
4972 rtx ops[4];
4973 ops[0] = operands[0];
4974 ops[1] = gen_rtx_REG (DImode, regno);
4975 ops[2] = gen_rtx_REG (DImode, regno + 4);
4976 ops[3] = gen_rtx_REG (DImode, regno + 8);
4977 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4978 return "";
4979 }
4980 [(set_attr "type" "neon_store3_3reg<q>")]
4981 )
4982
4983 (define_insn "neon_vst3qb<mode>"
4984 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4985 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4986 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4987 UNSPEC_VST3B))]
4988 "TARGET_NEON"
4989 {
4990 int regno = REGNO (operands[1]);
4991 rtx ops[4];
4992 ops[0] = operands[0];
4993 ops[1] = gen_rtx_REG (DImode, regno + 2);
4994 ops[2] = gen_rtx_REG (DImode, regno + 6);
4995 ops[3] = gen_rtx_REG (DImode, regno + 10);
4996 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4997 return "";
4998 }
4999 [(set_attr "type" "neon_store3_3reg<q>")]
5000 )
5001
5002 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5003 ;; here on big endian targets.
5004 (define_insn "neon_vst3_lane<mode>"
5005 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5006 (unspec:<V_three_elem>
5007 [(match_operand:EI 1 "s_register_operand" "w")
5008 (match_operand:SI 2 "immediate_operand" "i")
5009 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010 UNSPEC_VST3_LANE))]
5011 "TARGET_NEON"
5012 {
5013 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5014 int regno = REGNO (operands[1]);
5015 rtx ops[5];
5016 ops[0] = operands[0];
5017 ops[1] = gen_rtx_REG (DImode, regno);
5018 ops[2] = gen_rtx_REG (DImode, regno + 2);
5019 ops[3] = gen_rtx_REG (DImode, regno + 4);
5020 ops[4] = GEN_INT (lane);
5021 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5022 ops);
5023 return "";
5024 }
5025 [(set_attr "type" "neon_store3_one_lane<q>")]
5026 )
5027
5028 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5029 ;; here on big endian targets.
5030 (define_insn "neon_vst3_lane<mode>"
5031 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5032 (unspec:<V_three_elem>
5033 [(match_operand:CI 1 "s_register_operand" "w")
5034 (match_operand:SI 2 "immediate_operand" "i")
5035 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5036 UNSPEC_VST3_LANE))]
5037 "TARGET_NEON"
5038 {
5039 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5040 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5041 int regno = REGNO (operands[1]);
5042 rtx ops[5];
5043 if (lane >= max / 2)
5044 {
5045 lane -= max / 2;
5046 regno += 2;
5047 }
5048 ops[0] = operands[0];
5049 ops[1] = gen_rtx_REG (DImode, regno);
5050 ops[2] = gen_rtx_REG (DImode, regno + 4);
5051 ops[3] = gen_rtx_REG (DImode, regno + 8);
5052 ops[4] = GEN_INT (lane);
5053 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5054 ops);
5055 return "";
5056 }
5057 [(set_attr "type" "neon_store3_one_lane<q>")]
5058 )
5059
5060 (define_expand "vec_load_lanesoi<mode>"
5061 [(set (match_operand:OI 0 "s_register_operand")
5062 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5063 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 UNSPEC_VLD4))]
5065 "TARGET_NEON")
5066
5067 (define_insn "neon_vld4<mode>"
5068 [(set (match_operand:OI 0 "s_register_operand" "=w")
5069 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5070 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5071 UNSPEC_VLD4))]
5072 "TARGET_NEON"
5073 {
5074 if (<V_sz_elem> == 64)
5075 return "vld1.64\t%h0, %A1";
5076 else
5077 return "vld4.<V_sz_elem>\t%h0, %A1";
5078 }
5079 [(set (attr "type")
5080 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5081 (const_string "neon_load1_4reg<q>")
5082 (const_string "neon_load4_4reg<q>")))]
5083 )
5084
5085 (define_expand "vec_load_lanesxi<mode>"
5086 [(match_operand:XI 0 "s_register_operand")
5087 (match_operand:XI 1 "neon_struct_operand")
5088 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5089 "TARGET_NEON"
5090 {
5091 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5092 DONE;
5093 })
5094
5095 (define_expand "neon_vld4<mode>"
5096 [(match_operand:XI 0 "s_register_operand")
5097 (match_operand:XI 1 "neon_struct_operand")
5098 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5099 "TARGET_NEON"
5100 {
5101 rtx mem;
5102
5103 mem = adjust_address (operands[1], OImode, 0);
5104 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5105 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5106 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5107 DONE;
5108 })
5109
5110 (define_insn "neon_vld4qa<mode>"
5111 [(set (match_operand:XI 0 "s_register_operand" "=w")
5112 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5113 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5114 UNSPEC_VLD4A))]
5115 "TARGET_NEON"
5116 {
5117 int regno = REGNO (operands[0]);
5118 rtx ops[5];
5119 ops[0] = gen_rtx_REG (DImode, regno);
5120 ops[1] = gen_rtx_REG (DImode, regno + 4);
5121 ops[2] = gen_rtx_REG (DImode, regno + 8);
5122 ops[3] = gen_rtx_REG (DImode, regno + 12);
5123 ops[4] = operands[1];
5124 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5125 return "";
5126 }
5127 [(set_attr "type" "neon_load4_4reg<q>")]
5128 )
5129
5130 (define_insn "neon_vld4qb<mode>"
5131 [(set (match_operand:XI 0 "s_register_operand" "=w")
5132 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5133 (match_operand:XI 2 "s_register_operand" "0")
5134 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135 UNSPEC_VLD4B))]
5136 "TARGET_NEON"
5137 {
5138 int regno = REGNO (operands[0]);
5139 rtx ops[5];
5140 ops[0] = gen_rtx_REG (DImode, regno + 2);
5141 ops[1] = gen_rtx_REG (DImode, regno + 6);
5142 ops[2] = gen_rtx_REG (DImode, regno + 10);
5143 ops[3] = gen_rtx_REG (DImode, regno + 14);
5144 ops[4] = operands[1];
5145 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5146 return "";
5147 }
5148 [(set_attr "type" "neon_load4_4reg<q>")]
5149 )
5150
5151 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5152 ;; here on big endian targets.
5153 (define_insn "neon_vld4_lane<mode>"
5154 [(set (match_operand:OI 0 "s_register_operand" "=w")
5155 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5156 (match_operand:OI 2 "s_register_operand" "0")
5157 (match_operand:SI 3 "immediate_operand" "i")
5158 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5159 UNSPEC_VLD4_LANE))]
5160 "TARGET_NEON"
5161 {
5162 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5163 int regno = REGNO (operands[0]);
5164 rtx ops[6];
5165 ops[0] = gen_rtx_REG (DImode, regno);
5166 ops[1] = gen_rtx_REG (DImode, regno + 2);
5167 ops[2] = gen_rtx_REG (DImode, regno + 4);
5168 ops[3] = gen_rtx_REG (DImode, regno + 6);
5169 ops[4] = operands[1];
5170 ops[5] = GEN_INT (lane);
5171 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5172 ops);
5173 return "";
5174 }
5175 [(set_attr "type" "neon_load4_one_lane<q>")]
5176 )
5177
5178 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5179 ;; here on big endian targets.
5180 (define_insn "neon_vld4_lane<mode>"
5181 [(set (match_operand:XI 0 "s_register_operand" "=w")
5182 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5183 (match_operand:XI 2 "s_register_operand" "0")
5184 (match_operand:SI 3 "immediate_operand" "i")
5185 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186 UNSPEC_VLD4_LANE))]
5187 "TARGET_NEON"
5188 {
5189 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5190 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5191 int regno = REGNO (operands[0]);
5192 rtx ops[6];
5193 if (lane >= max / 2)
5194 {
5195 lane -= max / 2;
5196 regno += 2;
5197 }
5198 ops[0] = gen_rtx_REG (DImode, regno);
5199 ops[1] = gen_rtx_REG (DImode, regno + 4);
5200 ops[2] = gen_rtx_REG (DImode, regno + 8);
5201 ops[3] = gen_rtx_REG (DImode, regno + 12);
5202 ops[4] = operands[1];
5203 ops[5] = GEN_INT (lane);
5204 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5205 ops);
5206 return "";
5207 }
5208 [(set_attr "type" "neon_load4_one_lane<q>")]
5209 )
5210
5211 (define_insn "neon_vld4_dup<mode>"
5212 [(set (match_operand:OI 0 "s_register_operand" "=w")
5213 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5214 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5215 UNSPEC_VLD4_DUP))]
5216 "TARGET_NEON"
5217 {
5218 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5219 {
5220 int regno = REGNO (operands[0]);
5221 rtx ops[5];
5222 ops[0] = gen_rtx_REG (DImode, regno);
5223 ops[1] = gen_rtx_REG (DImode, regno + 2);
5224 ops[2] = gen_rtx_REG (DImode, regno + 4);
5225 ops[3] = gen_rtx_REG (DImode, regno + 6);
5226 ops[4] = operands[1];
5227 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5228 ops);
5229 return "";
5230 }
5231 else
5232 return "vld1.<V_sz_elem>\t%h0, %A1";
5233 }
5234 [(set (attr "type")
5235 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5236 (const_string "neon_load4_all_lanes<q>")
5237 (const_string "neon_load1_1reg<q>")))]
5238 )
5239
5240 (define_expand "vec_store_lanesoi<mode>"
5241 [(set (match_operand:OI 0 "neon_struct_operand")
5242 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5243 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5244 UNSPEC_VST4))]
5245 "TARGET_NEON")
5246
5247 (define_insn "neon_vst4<mode>"
5248 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5249 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5250 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5251 UNSPEC_VST4))]
5252 "TARGET_NEON"
5253 {
5254 if (<V_sz_elem> == 64)
5255 return "vst1.64\t%h1, %A0";
5256 else
5257 return "vst4.<V_sz_elem>\t%h1, %A0";
5258 }
5259 [(set (attr "type")
5260 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5261 (const_string "neon_store1_4reg<q>")
5262 (const_string "neon_store4_4reg<q>")))]
5263 )
5264
5265 (define_expand "vec_store_lanesxi<mode>"
5266 [(match_operand:XI 0 "neon_struct_operand")
5267 (match_operand:XI 1 "s_register_operand")
5268 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5269 "TARGET_NEON"
5270 {
5271 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5272 DONE;
5273 })
5274
5275 (define_expand "neon_vst4<mode>"
5276 [(match_operand:XI 0 "neon_struct_operand")
5277 (match_operand:XI 1 "s_register_operand")
5278 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5279 "TARGET_NEON"
5280 {
5281 rtx mem;
5282
5283 mem = adjust_address (operands[0], OImode, 0);
5284 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5285 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5286 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5287 DONE;
5288 })
5289
5290 (define_insn "neon_vst4qa<mode>"
5291 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5292 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5293 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5294 UNSPEC_VST4A))]
5295 "TARGET_NEON"
5296 {
5297 int regno = REGNO (operands[1]);
5298 rtx ops[5];
5299 ops[0] = operands[0];
5300 ops[1] = gen_rtx_REG (DImode, regno);
5301 ops[2] = gen_rtx_REG (DImode, regno + 4);
5302 ops[3] = gen_rtx_REG (DImode, regno + 8);
5303 ops[4] = gen_rtx_REG (DImode, regno + 12);
5304 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5305 return "";
5306 }
5307 [(set_attr "type" "neon_store4_4reg<q>")]
5308 )
5309
5310 (define_insn "neon_vst4qb<mode>"
5311 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5312 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5313 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5314 UNSPEC_VST4B))]
5315 "TARGET_NEON"
5316 {
5317 int regno = REGNO (operands[1]);
5318 rtx ops[5];
5319 ops[0] = operands[0];
5320 ops[1] = gen_rtx_REG (DImode, regno + 2);
5321 ops[2] = gen_rtx_REG (DImode, regno + 6);
5322 ops[3] = gen_rtx_REG (DImode, regno + 10);
5323 ops[4] = gen_rtx_REG (DImode, regno + 14);
5324 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5325 return "";
5326 }
5327 [(set_attr "type" "neon_store4_4reg<q>")]
5328 )
5329
5330 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5331 ;; here on big endian targets.
5332 (define_insn "neon_vst4_lane<mode>"
5333 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5334 (unspec:<V_four_elem>
5335 [(match_operand:OI 1 "s_register_operand" "w")
5336 (match_operand:SI 2 "immediate_operand" "i")
5337 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5338 UNSPEC_VST4_LANE))]
5339 "TARGET_NEON"
5340 {
5341 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5342 int regno = REGNO (operands[1]);
5343 rtx ops[6];
5344 ops[0] = operands[0];
5345 ops[1] = gen_rtx_REG (DImode, regno);
5346 ops[2] = gen_rtx_REG (DImode, regno + 2);
5347 ops[3] = gen_rtx_REG (DImode, regno + 4);
5348 ops[4] = gen_rtx_REG (DImode, regno + 6);
5349 ops[5] = GEN_INT (lane);
5350 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5351 ops);
5352 return "";
5353 }
5354 [(set_attr "type" "neon_store4_one_lane<q>")]
5355 )
5356
5357 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5358 ;; here on big endian targets.
5359 (define_insn "neon_vst4_lane<mode>"
5360 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5361 (unspec:<V_four_elem>
5362 [(match_operand:XI 1 "s_register_operand" "w")
5363 (match_operand:SI 2 "immediate_operand" "i")
5364 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5365 UNSPEC_VST4_LANE))]
5366 "TARGET_NEON"
5367 {
5368 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5369 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5370 int regno = REGNO (operands[1]);
5371 rtx ops[6];
5372 if (lane >= max / 2)
5373 {
5374 lane -= max / 2;
5375 regno += 2;
5376 }
5377 ops[0] = operands[0];
5378 ops[1] = gen_rtx_REG (DImode, regno);
5379 ops[2] = gen_rtx_REG (DImode, regno + 4);
5380 ops[3] = gen_rtx_REG (DImode, regno + 8);
5381 ops[4] = gen_rtx_REG (DImode, regno + 12);
5382 ops[5] = GEN_INT (lane);
5383 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5384 ops);
5385 return "";
5386 }
5387 [(set_attr "type" "neon_store4_4reg<q>")]
5388 )
5389
5390 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5391 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5392 (SE:<V_unpack> (vec_select:<V_HALF>
5393 (match_operand:VU 1 "register_operand" "w")
5394 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5395 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5396 "vmovl.<US><V_sz_elem> %q0, %e1"
5397 [(set_attr "type" "neon_shift_imm_long")]
5398 )
5399
5400 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5401 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5402 (SE:<V_unpack> (vec_select:<V_HALF>
5403 (match_operand:VU 1 "register_operand" "w")
5404 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5405 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5406 "vmovl.<US><V_sz_elem> %q0, %f1"
5407 [(set_attr "type" "neon_shift_imm_long")]
5408 )
5409
5410 (define_expand "vec_unpack<US>_hi_<mode>"
5411 [(match_operand:<V_unpack> 0 "register_operand" "")
5412 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5414 {
5415 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5416 rtx t1;
5417 int i;
5418 for (i = 0; i < (<V_mode_nunits>/2); i++)
5419 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5420
5421 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5422 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5423 operands[1],
5424 t1));
5425 DONE;
5426 }
5427 )
5428
5429 (define_expand "vec_unpack<US>_lo_<mode>"
5430 [(match_operand:<V_unpack> 0 "register_operand" "")
5431 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5432 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5433 {
5434 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5435 rtx t1;
5436 int i;
5437 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5438 RTVEC_ELT (v, i) = GEN_INT (i);
5439 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5440 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5441 operands[1],
5442 t1));
5443 DONE;
5444 }
5445 )
5446
5447 (define_insn "neon_vec_<US>mult_lo_<mode>"
5448 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5449 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5450 (match_operand:VU 1 "register_operand" "w")
5451 (match_operand:VU 2 "vect_par_constant_low" "")))
5452 (SE:<V_unpack> (vec_select:<V_HALF>
5453 (match_operand:VU 3 "register_operand" "w")
5454 (match_dup 2)))))]
5455 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5456 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5457 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5458 )
5459
5460 (define_expand "vec_widen_<US>mult_lo_<mode>"
5461 [(match_operand:<V_unpack> 0 "register_operand" "")
5462 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5463 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5465 {
5466 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5467 rtx t1;
5468 int i;
5469 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5470 RTVEC_ELT (v, i) = GEN_INT (i);
5471 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5472
5473 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5474 operands[1],
5475 t1,
5476 operands[2]));
5477 DONE;
5478 }
5479 )
5480
5481 (define_insn "neon_vec_<US>mult_hi_<mode>"
5482 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5483 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5484 (match_operand:VU 1 "register_operand" "w")
5485 (match_operand:VU 2 "vect_par_constant_high" "")))
5486 (SE:<V_unpack> (vec_select:<V_HALF>
5487 (match_operand:VU 3 "register_operand" "w")
5488 (match_dup 2)))))]
5489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5490 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5491 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5492 )
5493
5494 (define_expand "vec_widen_<US>mult_hi_<mode>"
5495 [(match_operand:<V_unpack> 0 "register_operand" "")
5496 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5497 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5499 {
5500 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5501 rtx t1;
5502 int i;
5503 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5504 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5505 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5506
5507 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5508 operands[1],
5509 t1,
5510 operands[2]));
5511 DONE;
5512
5513 }
5514 )
5515
5516 (define_insn "neon_vec_<US>shiftl_<mode>"
5517 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5518 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5519 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5520 "TARGET_NEON"
5521 {
5522 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5523 }
5524 [(set_attr "type" "neon_shift_imm_long")]
5525 )
5526
5527 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5528 [(match_operand:<V_unpack> 0 "register_operand" "")
5529 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5530 (match_operand:SI 2 "immediate_operand" "i")]
5531 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5532 {
5533 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5534 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5535 operands[2]));
5536 DONE;
5537 }
5538 )
5539
5540 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5541 [(match_operand:<V_unpack> 0 "register_operand" "")
5542 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5543 (match_operand:SI 2 "immediate_operand" "i")]
5544 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5545 {
5546 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5547 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5548 GET_MODE_SIZE (<V_HALF>mode)),
5549 operands[2]));
5550 DONE;
5551 }
5552 )
5553
5554 ;; Vectorize for non-neon-quad case
5555 (define_insn "neon_unpack<US>_<mode>"
5556 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5557 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5558 "TARGET_NEON"
5559 "vmovl.<US><V_sz_elem> %q0, %P1"
5560 [(set_attr "type" "neon_move")]
5561 )
5562
5563 (define_expand "vec_unpack<US>_lo_<mode>"
5564 [(match_operand:<V_double_width> 0 "register_operand" "")
5565 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5566 "TARGET_NEON"
5567 {
5568 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5569 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5570 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5571
5572 DONE;
5573 }
5574 )
5575
5576 (define_expand "vec_unpack<US>_hi_<mode>"
5577 [(match_operand:<V_double_width> 0 "register_operand" "")
5578 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5579 "TARGET_NEON"
5580 {
5581 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5582 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5583 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5584
5585 DONE;
5586 }
5587 )
5588
5589 (define_insn "neon_vec_<US>mult_<mode>"
5590 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5591 (mult:<V_widen> (SE:<V_widen>
5592 (match_operand:VDI 1 "register_operand" "w"))
5593 (SE:<V_widen>
5594 (match_operand:VDI 2 "register_operand" "w"))))]
5595 "TARGET_NEON"
5596 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5597 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5598 )
5599
5600 (define_expand "vec_widen_<US>mult_hi_<mode>"
5601 [(match_operand:<V_double_width> 0 "register_operand" "")
5602 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5603 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5604 "TARGET_NEON"
5605 {
5606 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5607 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5608 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5609
5610 DONE;
5611
5612 }
5613 )
5614
5615 (define_expand "vec_widen_<US>mult_lo_<mode>"
5616 [(match_operand:<V_double_width> 0 "register_operand" "")
5617 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5618 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5619 "TARGET_NEON"
5620 {
5621 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5622 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5623 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5624
5625 DONE;
5626
5627 }
5628 )
5629
5630 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5631 [(match_operand:<V_double_width> 0 "register_operand" "")
5632 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5633 (match_operand:SI 2 "immediate_operand" "i")]
5634 "TARGET_NEON"
5635 {
5636 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5637 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5638 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5639
5640 DONE;
5641 }
5642 )
5643
5644 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5645 [(match_operand:<V_double_width> 0 "register_operand" "")
5646 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5647 (match_operand:SI 2 "immediate_operand" "i")]
5648 "TARGET_NEON"
5649 {
5650 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5651 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5652 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5653
5654 DONE;
5655 }
5656 )
5657
5658 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5659 ; because the ordering of vector elements in Q registers is different from what
5660 ; the semantics of the instructions require.
5661
5662 (define_insn "vec_pack_trunc_<mode>"
5663 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5664 (vec_concat:<V_narrow_pack>
5665 (truncate:<V_narrow>
5666 (match_operand:VN 1 "register_operand" "w"))
5667 (truncate:<V_narrow>
5668 (match_operand:VN 2 "register_operand" "w"))))]
5669 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5670 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5671 [(set_attr "type" "multiple")
5672 (set_attr "length" "8")]
5673 )
5674
5675 ;; For the non-quad case.
5676 (define_insn "neon_vec_pack_trunc_<mode>"
5677 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5678 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5679 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5680 "vmovn.i<V_sz_elem>\t%P0, %q1"
5681 [(set_attr "type" "neon_move_narrow_q")]
5682 )
5683
5684 (define_expand "vec_pack_trunc_<mode>"
5685 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5686 (match_operand:VSHFT 1 "register_operand" "")
5687 (match_operand:VSHFT 2 "register_operand")]
5688 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5689 {
5690 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5691
5692 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5693 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5694 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5695 DONE;
5696 })
5697
5698 (define_insn "neon_vabd<mode>_2"
5699 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5700 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5701 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5702 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5703 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5704 [(set (attr "type")
5705 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5706 (const_string "neon_fp_abd_s<q>")
5707 (const_string "neon_abd<q>")))]
5708 )
5709
5710 (define_insn "neon_vabd<mode>_3"
5711 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5712 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5713 (match_operand:VDQ 2 "s_register_operand" "w")]
5714 UNSPEC_VSUB)))]
5715 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5716 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5717 [(set (attr "type")
5718 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5719 (const_string "neon_fp_abd_s<q>")
5720 (const_string "neon_abd<q>")))]
5721 )
5722
5723 ;; Copy from core-to-neon regs, then extend, not vice-versa
5724
5725 (define_split
5726 [(set (match_operand:DI 0 "s_register_operand" "")
5727 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5728 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5729 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5730 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5731 {
5732 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5733 })
5734
5735 (define_split
5736 [(set (match_operand:DI 0 "s_register_operand" "")
5737 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5738 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5739 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5740 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5741 {
5742 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5743 })
5744
5745 (define_split
5746 [(set (match_operand:DI 0 "s_register_operand" "")
5747 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5748 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5749 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5750 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5751 {
5752 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5753 })
5754
5755 (define_split
5756 [(set (match_operand:DI 0 "s_register_operand" "")
5757 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5758 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5759 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5760 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5761 {
5762 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5763 })
5764
5765 (define_split
5766 [(set (match_operand:DI 0 "s_register_operand" "")
5767 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5768 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5769 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5770 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5771 {
5772 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5773 })
5774
5775 (define_split
5776 [(set (match_operand:DI 0 "s_register_operand" "")
5777 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5778 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5779 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5780 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5781 {
5782 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5783 })