[ARM] Legitimize addresses for movmisalign<mode> for Neon.
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
143 "TARGET_NEON
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
146 {
147 switch (which_alternative)
148 {
149 case 0: return "#";
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
156
157 (define_split
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
172
173 neon_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[3], src[3];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
213
214 neon_disambiguate_copy (operands, dest, src, 3);
215 })
216
217 (define_split
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
225 {
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
228 rtx dest[4], src[4];
229
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
238
239 neon_disambiguate_copy (operands, dest, src, 4);
240 })
241
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
247 {
248 rtx adjust_mem;
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
255
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
258 else
259 adjust_mem = operands[0];
260
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
264
265 })
266
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
274
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
278 " Um")]
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
283
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
291
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
295 " Um")]
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_store1_1reg<q>")])
300
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
303 (vec_merge:VD
304 (vec_duplicate:VD
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
308 "TARGET_NEON"
309 {
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
314
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
317 else
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
319 }
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
324 (vec_merge:VQ
325 (vec_duplicate:VQ
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
336
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
339
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
342
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
345 else
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
347 }
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
349 )
350
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
353 (vec_merge:V2DI
354 (vec_duplicate:V2DI
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
358 "TARGET_NEON"
359 {
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
362
363 operands[0] = gen_rtx_REG (DImode, regno);
364
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
367 else
368 return "vmov\t%P0, %Q1, %R1";
369 }
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
371 )
372
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
382 DONE;
383 })
384
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
387 (vec_select:<V_elem>
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
390 "TARGET_NEON"
391 {
392 if (BYTES_BIG_ENDIAN)
393 {
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
397 }
398
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
401 else
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
403 }
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
405 )
406
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (vec_select:<V_elem>
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 "TARGET_NEON"
413 {
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
418
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
421
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
424
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
427 else
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
429 }
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
431 )
432
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
435 (vec_select:DI
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
438 "TARGET_NEON"
439 {
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
441
442 operands[1] = gen_rtx_REG (DImode, regno);
443
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
446 else
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
448 }
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
450 )
451
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
455 "TARGET_NEON"
456 {
457 neon_expand_vector_init (operands[0], operands[1]);
458 DONE;
459 })
460
461 ;; Doubleword and quadword arithmetic.
462
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
465
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472 [(set (attr "type")
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
476 )
477
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
483 "TARGET_NEON"
484 {
485 switch (which_alternative)
486 {
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
489 case 1: return "#";
490 case 2: return "#";
491 case 4: return "#";
492 case 5: return "#";
493 case 6: return "#";
494 default: gcc_unreachable ();
495 }
496 }
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
502 )
503
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
514 )
515
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
521 "TARGET_NEON"
522 {
523 switch (which_alternative)
524 {
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
531 }
532 }
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
537 )
538
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
545 [(set (attr "type")
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
549 )
550
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
558 [(set (attr "type")
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
562 )
563
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
575 )
576
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
590 )
591
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
600 )
601
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
610 )
611
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
626 NEON_VRINT))]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
630 )
631
632 (define_insn "ior<mode>3"
633 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
634 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
635 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
636 "TARGET_NEON"
637 {
638 switch (which_alternative)
639 {
640 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
641 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
642 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
643 default: gcc_unreachable ();
644 }
645 }
646 [(set_attr "type" "neon_logic<q>")]
647 )
648
649 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
650 ;; vorr. We support the pseudo-instruction vand instead, because that
651 ;; corresponds to the canonical form the middle-end expects to use for
652 ;; immediate bitwise-ANDs.
653
654 (define_insn "and<mode>3"
655 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
656 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
657 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
658 "TARGET_NEON"
659 {
660 switch (which_alternative)
661 {
662 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
663 case 1: return neon_output_logic_immediate ("vand", &operands[2],
664 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
665 default: gcc_unreachable ();
666 }
667 }
668 [(set_attr "type" "neon_logic<q>")]
669 )
670
671 (define_insn "orn<mode>3_neon"
672 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
673 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
674 (match_operand:VDQ 1 "s_register_operand" "w")))]
675 "TARGET_NEON"
676 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_logic<q>")]
678 )
679
680 ;; TODO: investigate whether we should disable
681 ;; this and bicdi3_neon for the A8 in line with the other
682 ;; changes above.
683 (define_insn_and_split "orndi3_neon"
684 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
685 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
686 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
687 "TARGET_NEON"
688 "@
689 vorn\t%P0, %P1, %P2
690 #
691 #
692 #"
693 "reload_completed &&
694 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
695 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
696 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
697 "
698 {
699 if (TARGET_THUMB2)
700 {
701 operands[3] = gen_highpart (SImode, operands[0]);
702 operands[0] = gen_lowpart (SImode, operands[0]);
703 operands[4] = gen_highpart (SImode, operands[2]);
704 operands[2] = gen_lowpart (SImode, operands[2]);
705 operands[5] = gen_highpart (SImode, operands[1]);
706 operands[1] = gen_lowpart (SImode, operands[1]);
707 }
708 else
709 {
710 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
711 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
712 DONE;
713 }
714 }"
715 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
716 (set_attr "length" "*,16,8,8")
717 (set_attr "arch" "any,a,t2,t2")]
718 )
719
720 (define_insn "bic<mode>3_neon"
721 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
722 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
723 (match_operand:VDQ 1 "s_register_operand" "w")))]
724 "TARGET_NEON"
725 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_logic<q>")]
727 )
728
729 ;; Compare to *anddi_notdi_di.
730 (define_insn "bicdi3_neon"
731 [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
732 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
733 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
734 "TARGET_NEON"
735 "@
736 vbic\t%P0, %P1, %P2
737 #
738 #"
739 [(set_attr "type" "neon_logic,multiple,multiple")
740 (set_attr "length" "*,8,8")]
741 )
742
743 (define_insn "xor<mode>3"
744 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
745 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
746 (match_operand:VDQ 2 "s_register_operand" "w")))]
747 "TARGET_NEON"
748 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
749 [(set_attr "type" "neon_logic<q>")]
750 )
751
752 (define_insn "one_cmpl<mode>2"
753 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
754 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
755 "TARGET_NEON"
756 "vmvn\t%<V_reg>0, %<V_reg>1"
757 [(set_attr "type" "neon_move<q>")]
758 )
759
760 (define_insn "abs<mode>2"
761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
762 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
763 "TARGET_NEON"
764 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
765 [(set (attr "type")
766 (if_then_else (match_test "<Is_float_mode>")
767 (const_string "neon_fp_abs_s<q>")
768 (const_string "neon_abs<q>")))]
769 )
770
771 (define_insn "neg<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
774 "TARGET_NEON"
775 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
776 [(set (attr "type")
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_neg_s<q>")
779 (const_string "neon_neg<q>")))]
780 )
781
782 (define_insn "negdi2_neon"
783 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
784 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
785 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
786 (clobber (reg:CC CC_REGNUM))]
787 "TARGET_NEON"
788 "#"
789 [(set_attr "length" "8")
790 (set_attr "type" "multiple")]
791 )
792
793 ; Split negdi2_neon for vfp registers
794 (define_split
795 [(set (match_operand:DI 0 "s_register_operand" "")
796 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
797 (clobber (match_scratch:DI 2 ""))
798 (clobber (reg:CC CC_REGNUM))]
799 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
800 [(set (match_dup 2) (const_int 0))
801 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
802 (clobber (reg:CC CC_REGNUM))])]
803 {
804 if (!REG_P (operands[2]))
805 operands[2] = operands[0];
806 }
807 )
808
809 ; Split negdi2_neon for core registers
810 (define_split
811 [(set (match_operand:DI 0 "s_register_operand" "")
812 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
813 (clobber (match_scratch:DI 2 ""))
814 (clobber (reg:CC CC_REGNUM))]
815 "TARGET_32BIT && reload_completed
816 && arm_general_register_operand (operands[0], DImode)"
817 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
818 (clobber (reg:CC CC_REGNUM))])]
819 ""
820 )
821
822 (define_insn "*umin<mode>3_neon"
823 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
824 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
825 (match_operand:VDQIW 2 "s_register_operand" "w")))]
826 "TARGET_NEON"
827 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
828 [(set_attr "type" "neon_minmax<q>")]
829 )
830
831 (define_insn "*umax<mode>3_neon"
832 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
833 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
834 (match_operand:VDQIW 2 "s_register_operand" "w")))]
835 "TARGET_NEON"
836 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
837 [(set_attr "type" "neon_minmax<q>")]
838 )
839
840 (define_insn "*smin<mode>3_neon"
841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
842 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
843 (match_operand:VDQW 2 "s_register_operand" "w")))]
844 "TARGET_NEON"
845 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
846 [(set (attr "type")
847 (if_then_else (match_test "<Is_float_mode>")
848 (const_string "neon_fp_minmax_s<q>")
849 (const_string "neon_minmax<q>")))]
850 )
851
852 (define_insn "*smax<mode>3_neon"
853 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
854 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
855 (match_operand:VDQW 2 "s_register_operand" "w")))]
856 "TARGET_NEON"
857 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
858 [(set (attr "type")
859 (if_then_else (match_test "<Is_float_mode>")
860 (const_string "neon_fp_minmax_s<q>")
861 (const_string "neon_minmax<q>")))]
862 )
863
864 ; TODO: V2DI shifts are current disabled because there are bugs in the
865 ; generic vectorizer code. It ends up creating a V2DI constructor with
866 ; SImode elements.
867
868 (define_insn "vashl<mode>3"
869 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
870 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
871 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
872 "TARGET_NEON"
873 {
874 switch (which_alternative)
875 {
876 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
877 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
878 <MODE>mode,
879 VALID_NEON_QREG_MODE (<MODE>mode),
880 true);
881 default: gcc_unreachable ();
882 }
883 }
884 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
885 )
886
887 (define_insn "vashr<mode>3_imm"
888 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
889 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
890 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
891 "TARGET_NEON"
892 {
893 return neon_output_shift_immediate ("vshr", 's', &operands[2],
894 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
895 false);
896 }
897 [(set_attr "type" "neon_shift_imm<q>")]
898 )
899
900 (define_insn "vlshr<mode>3_imm"
901 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
902 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
903 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
904 "TARGET_NEON"
905 {
906 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
907 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
908 false);
909 }
910 [(set_attr "type" "neon_shift_imm<q>")]
911 )
912
913 ; Used for implementing logical shift-right, which is a left-shift by a negative
914 ; amount, with signed operands. This is essentially the same as ashl<mode>3
915 ; above, but using an unspec in case GCC tries anything tricky with negative
916 ; shift amounts.
917
918 (define_insn "ashl<mode>3_signed"
919 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
920 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
921 (match_operand:VDQI 2 "s_register_operand" "w")]
922 UNSPEC_ASHIFT_SIGNED))]
923 "TARGET_NEON"
924 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
925 [(set_attr "type" "neon_shift_reg<q>")]
926 )
927
928 ; Used for implementing logical shift-right, which is a left-shift by a negative
929 ; amount, with unsigned operands.
930
931 (define_insn "ashl<mode>3_unsigned"
932 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
933 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
934 (match_operand:VDQI 2 "s_register_operand" "w")]
935 UNSPEC_ASHIFT_UNSIGNED))]
936 "TARGET_NEON"
937 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
938 [(set_attr "type" "neon_shift_reg<q>")]
939 )
940
941 (define_expand "vashr<mode>3"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "")
943 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
944 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
945 "TARGET_NEON"
946 {
947 if (s_register_operand (operands[2], <MODE>mode))
948 {
949 rtx neg = gen_reg_rtx (<MODE>mode);
950 emit_insn (gen_neg<mode>2 (neg, operands[2]));
951 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
952 }
953 else
954 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
955 DONE;
956 })
957
958 (define_expand "vlshr<mode>3"
959 [(set (match_operand:VDQIW 0 "s_register_operand" "")
960 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
961 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
962 "TARGET_NEON"
963 {
964 if (s_register_operand (operands[2], <MODE>mode))
965 {
966 rtx neg = gen_reg_rtx (<MODE>mode);
967 emit_insn (gen_neg<mode>2 (neg, operands[2]));
968 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
969 }
970 else
971 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
972 DONE;
973 })
974
975 ;; 64-bit shifts
976
977 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
978 ;; leaving the upper half uninitalized. This is OK since the shift
979 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
980 ;; data flow analysis however, we pretend the full register is set
981 ;; using an unspec.
982 (define_insn "neon_load_count"
983 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
984 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
985 UNSPEC_LOAD_COUNT))]
986 "TARGET_NEON"
987 "@
988 vld1.32\t{%P0[0]}, %A1
989 vmov.32\t%P0[0], %1"
990 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
991 )
992
993 (define_insn "ashldi3_neon_noclobber"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
996 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
997 "TARGET_NEON && reload_completed
998 && (!CONST_INT_P (operands[2])
999 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1000 "@
1001 vshl.u64\t%P0, %P1, %2
1002 vshl.u64\t%P0, %P1, %P2"
1003 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1004 )
1005
1006 (define_insn_and_split "ashldi3_neon"
1007 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1008 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1009 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1010 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1011 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1012 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1013 (clobber (reg:CC_C CC_REGNUM))]
1014 "TARGET_NEON"
1015 "#"
1016 "TARGET_NEON && reload_completed"
1017 [(const_int 0)]
1018 "
1019 {
1020 if (IS_VFP_REGNUM (REGNO (operands[0])))
1021 {
1022 if (CONST_INT_P (operands[2]))
1023 {
1024 if (INTVAL (operands[2]) < 1)
1025 {
1026 emit_insn (gen_movdi (operands[0], operands[1]));
1027 DONE;
1028 }
1029 else if (INTVAL (operands[2]) > 63)
1030 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1031 }
1032 else
1033 {
1034 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1035 operands[2] = operands[5];
1036 }
1037
1038 /* Ditch the unnecessary clobbers. */
1039 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1040 operands[2]));
1041 }
1042 else
1043 {
1044 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1045 /* This clobbers CC. */
1046 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1047 else
1048 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1049 operands[2], operands[3], operands[4]);
1050 }
1051 DONE;
1052 }"
1053 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1054 (set_attr "opt" "*,*,speed,speed,*,*")
1055 (set_attr "type" "multiple")]
1056 )
1057
1058 ; The shift amount needs to be negated for right-shifts
1059 (define_insn "signed_shift_di3_neon"
1060 [(set (match_operand:DI 0 "s_register_operand" "=w")
1061 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1062 (match_operand:DI 2 "s_register_operand" " w")]
1063 UNSPEC_ASHIFT_SIGNED))]
1064 "TARGET_NEON && reload_completed"
1065 "vshl.s64\t%P0, %P1, %P2"
1066 [(set_attr "type" "neon_shift_reg")]
1067 )
1068
1069 ; The shift amount needs to be negated for right-shifts
1070 (define_insn "unsigned_shift_di3_neon"
1071 [(set (match_operand:DI 0 "s_register_operand" "=w")
1072 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1073 (match_operand:DI 2 "s_register_operand" " w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1075 "TARGET_NEON && reload_completed"
1076 "vshl.u64\t%P0, %P1, %P2"
1077 [(set_attr "type" "neon_shift_reg")]
1078 )
1079
1080 (define_insn "ashrdi3_neon_imm_noclobber"
1081 [(set (match_operand:DI 0 "s_register_operand" "=w")
1082 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1083 (match_operand:DI 2 "const_int_operand" " i")))]
1084 "TARGET_NEON && reload_completed
1085 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1086 "vshr.s64\t%P0, %P1, %2"
1087 [(set_attr "type" "neon_shift_imm")]
1088 )
1089
1090 (define_insn "lshrdi3_neon_imm_noclobber"
1091 [(set (match_operand:DI 0 "s_register_operand" "=w")
1092 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1093 (match_operand:DI 2 "const_int_operand" " i")))]
1094 "TARGET_NEON && reload_completed
1095 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1096 "vshr.u64\t%P0, %P1, %2"
1097 [(set_attr "type" "neon_shift_imm")]
1098 )
1099
1100 ;; ashrdi3_neon
1101 ;; lshrdi3_neon
1102 (define_insn_and_split "<shift>di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1104 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1105 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1106 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1107 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1108 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1109 (clobber (reg:CC CC_REGNUM))]
1110 "TARGET_NEON"
1111 "#"
1112 "TARGET_NEON && reload_completed"
1113 [(const_int 0)]
1114 "
1115 {
1116 if (IS_VFP_REGNUM (REGNO (operands[0])))
1117 {
1118 if (CONST_INT_P (operands[2]))
1119 {
1120 if (INTVAL (operands[2]) < 1)
1121 {
1122 emit_insn (gen_movdi (operands[0], operands[1]));
1123 DONE;
1124 }
1125 else if (INTVAL (operands[2]) > 64)
1126 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1127
1128 /* Ditch the unnecessary clobbers. */
1129 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1130 operands[1],
1131 operands[2]));
1132 }
1133 else
1134 {
1135 /* We must use a negative left-shift. */
1136 emit_insn (gen_negsi2 (operands[3], operands[2]));
1137 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1138 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1139 operands[5]));
1140 }
1141 }
1142 else
1143 {
1144 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
1145 /* This clobbers CC. */
1146 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1147 else
1148 /* This clobbers CC (ASHIFTRT by register only). */
1149 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1150 operands[2], operands[3], operands[4]);
1151 }
1152
1153 DONE;
1154 }"
1155 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1156 (set_attr "opt" "*,*,speed,speed,*,*")
1157 (set_attr "type" "multiple")]
1158 )
1159
1160 ;; Widening operations
1161
1162 (define_insn "widen_ssum<mode>3"
1163 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1164 (plus:<V_widen> (sign_extend:<V_widen>
1165 (match_operand:VW 1 "s_register_operand" "%w"))
1166 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1167 "TARGET_NEON"
1168 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1169 [(set_attr "type" "neon_add_widen")]
1170 )
1171
1172 (define_insn "widen_usum<mode>3"
1173 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1174 (plus:<V_widen> (zero_extend:<V_widen>
1175 (match_operand:VW 1 "s_register_operand" "%w"))
1176 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1177 "TARGET_NEON"
1178 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1179 [(set_attr "type" "neon_add_widen")]
1180 )
1181
1182 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1183 ;; shift-count granularity. That's good enough for the middle-end's current
1184 ;; needs.
1185
1186 ;; Note that it's not safe to perform such an operation in big-endian mode,
1187 ;; due to element-ordering issues.
1188
1189 (define_expand "vec_shr_<mode>"
1190 [(match_operand:VDQ 0 "s_register_operand" "")
1191 (match_operand:VDQ 1 "s_register_operand" "")
1192 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1193 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1194 {
1195 rtx zero_reg;
1196 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1197 const int width = GET_MODE_BITSIZE (<MODE>mode);
1198 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1199 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1200 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1201
1202 if (num_bits == width)
1203 {
1204 emit_move_insn (operands[0], operands[1]);
1205 DONE;
1206 }
1207
1208 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1209 operands[0] = gen_lowpart (bvecmode, operands[0]);
1210 operands[1] = gen_lowpart (bvecmode, operands[1]);
1211
1212 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1213 GEN_INT (num_bits / BITS_PER_UNIT)));
1214 DONE;
1215 })
1216
1217 (define_expand "vec_shl_<mode>"
1218 [(match_operand:VDQ 0 "s_register_operand" "")
1219 (match_operand:VDQ 1 "s_register_operand" "")
1220 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1221 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1222 {
1223 rtx zero_reg;
1224 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1225 const int width = GET_MODE_BITSIZE (<MODE>mode);
1226 const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1227 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1228 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1229
1230 if (num_bits == 0)
1231 {
1232 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1233 DONE;
1234 }
1235
1236 num_bits = width - num_bits;
1237
1238 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1239 operands[0] = gen_lowpart (bvecmode, operands[0]);
1240 operands[1] = gen_lowpart (bvecmode, operands[1]);
1241
1242 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1243 GEN_INT (num_bits / BITS_PER_UNIT)));
1244 DONE;
1245 })
1246
1247 ;; Helpers for quad-word reduction operations
1248
1249 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1250 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1251 ; N/2-element vector.
1252
1253 (define_insn "quad_halves_<code>v4si"
1254 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1255 (vqh_ops:V2SI
1256 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1257 (parallel [(const_int 0) (const_int 1)]))
1258 (vec_select:V2SI (match_dup 1)
1259 (parallel [(const_int 2) (const_int 3)]))))]
1260 "TARGET_NEON"
1261 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1262 [(set_attr "vqh_mnem" "<VQH_mnem>")
1263 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1264 )
1265
1266 (define_insn "quad_halves_<code>v4sf"
1267 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1268 (vqhs_ops:V2SF
1269 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1270 (parallel [(const_int 0) (const_int 1)]))
1271 (vec_select:V2SF (match_dup 1)
1272 (parallel [(const_int 2) (const_int 3)]))))]
1273 "TARGET_NEON && flag_unsafe_math_optimizations"
1274 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1275 [(set_attr "vqh_mnem" "<VQH_mnem>")
1276 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1277 )
1278
1279 (define_insn "quad_halves_<code>v8hi"
1280 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1281 (vqh_ops:V4HI
1282 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1283 (parallel [(const_int 0) (const_int 1)
1284 (const_int 2) (const_int 3)]))
1285 (vec_select:V4HI (match_dup 1)
1286 (parallel [(const_int 4) (const_int 5)
1287 (const_int 6) (const_int 7)]))))]
1288 "TARGET_NEON"
1289 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1292 )
1293
1294 (define_insn "quad_halves_<code>v16qi"
1295 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1296 (vqh_ops:V8QI
1297 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)
1300 (const_int 4) (const_int 5)
1301 (const_int 6) (const_int 7)]))
1302 (vec_select:V8QI (match_dup 1)
1303 (parallel [(const_int 8) (const_int 9)
1304 (const_int 10) (const_int 11)
1305 (const_int 12) (const_int 13)
1306 (const_int 14) (const_int 15)]))))]
1307 "TARGET_NEON"
1308 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1309 [(set_attr "vqh_mnem" "<VQH_mnem>")
1310 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1311 )
1312
1313 (define_expand "move_hi_quad_<mode>"
1314 [(match_operand:ANY128 0 "s_register_operand" "")
1315 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1316 "TARGET_NEON"
1317 {
1318 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1319 GET_MODE_SIZE (<V_HALF>mode)),
1320 operands[1]);
1321 DONE;
1322 })
1323
1324 (define_expand "move_lo_quad_<mode>"
1325 [(match_operand:ANY128 0 "s_register_operand" "")
1326 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1327 "TARGET_NEON"
1328 {
1329 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1330 <MODE>mode, 0),
1331 operands[1]);
1332 DONE;
1333 })
1334
1335 ;; Reduction operations
1336
1337 (define_expand "reduc_splus_<mode>"
1338 [(match_operand:VD 0 "s_register_operand" "")
1339 (match_operand:VD 1 "s_register_operand" "")]
1340 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1341 {
1342 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1343 &gen_neon_vpadd_internal<mode>);
1344 DONE;
1345 })
1346
1347 (define_expand "reduc_splus_<mode>"
1348 [(match_operand:VQ 0 "s_register_operand" "")
1349 (match_operand:VQ 1 "s_register_operand" "")]
1350 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1351 && !BYTES_BIG_ENDIAN"
1352 {
1353 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1354 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1355
1356 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1357 emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
1358 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1359
1360 DONE;
1361 })
1362
1363 (define_insn "reduc_splus_v2di"
1364 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1365 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1366 UNSPEC_VPADD))]
1367 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1368 "vadd.i64\t%e0, %e1, %f1"
1369 [(set_attr "type" "neon_add_q")]
1370 )
1371
1372 ;; NEON does not distinguish between signed and unsigned addition except on
1373 ;; widening operations.
1374 (define_expand "reduc_uplus_<mode>"
1375 [(match_operand:VDQI 0 "s_register_operand" "")
1376 (match_operand:VDQI 1 "s_register_operand" "")]
1377 "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
1378 {
1379 emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
1380 DONE;
1381 })
1382
1383 (define_expand "reduc_smin_<mode>"
1384 [(match_operand:VD 0 "s_register_operand" "")
1385 (match_operand:VD 1 "s_register_operand" "")]
1386 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1387 {
1388 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1389 &gen_neon_vpsmin<mode>);
1390 DONE;
1391 })
1392
1393 (define_expand "reduc_smin_<mode>"
1394 [(match_operand:VQ 0 "s_register_operand" "")
1395 (match_operand:VQ 1 "s_register_operand" "")]
1396 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1397 && !BYTES_BIG_ENDIAN"
1398 {
1399 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1400 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1401
1402 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1403 emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
1404 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1405
1406 DONE;
1407 })
1408
1409 (define_expand "reduc_smax_<mode>"
1410 [(match_operand:VD 0 "s_register_operand" "")
1411 (match_operand:VD 1 "s_register_operand" "")]
1412 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1413 {
1414 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1415 &gen_neon_vpsmax<mode>);
1416 DONE;
1417 })
1418
1419 (define_expand "reduc_smax_<mode>"
1420 [(match_operand:VQ 0 "s_register_operand" "")
1421 (match_operand:VQ 1 "s_register_operand" "")]
1422 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1423 && !BYTES_BIG_ENDIAN"
1424 {
1425 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1426 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1427
1428 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1429 emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
1430 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1431
1432 DONE;
1433 })
1434
1435 (define_expand "reduc_umin_<mode>"
1436 [(match_operand:VDI 0 "s_register_operand" "")
1437 (match_operand:VDI 1 "s_register_operand" "")]
1438 "TARGET_NEON"
1439 {
1440 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1441 &gen_neon_vpumin<mode>);
1442 DONE;
1443 })
1444
1445 (define_expand "reduc_umin_<mode>"
1446 [(match_operand:VQI 0 "s_register_operand" "")
1447 (match_operand:VQI 1 "s_register_operand" "")]
1448 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1449 {
1450 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1451 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1452
1453 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1454 emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
1455 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1456
1457 DONE;
1458 })
1459
1460 (define_expand "reduc_umax_<mode>"
1461 [(match_operand:VDI 0 "s_register_operand" "")
1462 (match_operand:VDI 1 "s_register_operand" "")]
1463 "TARGET_NEON"
1464 {
1465 neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
1466 &gen_neon_vpumax<mode>);
1467 DONE;
1468 })
1469
1470 (define_expand "reduc_umax_<mode>"
1471 [(match_operand:VQI 0 "s_register_operand" "")
1472 (match_operand:VQI 1 "s_register_operand" "")]
1473 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1474 {
1475 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1476 rtx res_d = gen_reg_rtx (<V_HALF>mode);
1477
1478 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1479 emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
1480 emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
1481
1482 DONE;
1483 })
1484
1485 (define_insn "neon_vpadd_internal<mode>"
1486 [(set (match_operand:VD 0 "s_register_operand" "=w")
1487 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1488 (match_operand:VD 2 "s_register_operand" "w")]
1489 UNSPEC_VPADD))]
1490 "TARGET_NEON"
1491 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1492 ;; Assume this schedules like vadd.
1493 [(set (attr "type")
1494 (if_then_else (match_test "<Is_float_mode>")
1495 (const_string "neon_fp_reduc_add_s<q>")
1496 (const_string "neon_reduc_add<q>")))]
1497 )
1498
1499 (define_insn "neon_vpsmin<mode>"
1500 [(set (match_operand:VD 0 "s_register_operand" "=w")
1501 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1502 (match_operand:VD 2 "s_register_operand" "w")]
1503 UNSPEC_VPSMIN))]
1504 "TARGET_NEON"
1505 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1506 [(set (attr "type")
1507 (if_then_else (match_test "<Is_float_mode>")
1508 (const_string "neon_fp_reduc_minmax_s<q>")
1509 (const_string "neon_reduc_minmax<q>")))]
1510 )
1511
1512 (define_insn "neon_vpsmax<mode>"
1513 [(set (match_operand:VD 0 "s_register_operand" "=w")
1514 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1515 (match_operand:VD 2 "s_register_operand" "w")]
1516 UNSPEC_VPSMAX))]
1517 "TARGET_NEON"
1518 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1519 [(set (attr "type")
1520 (if_then_else (match_test "<Is_float_mode>")
1521 (const_string "neon_fp_reduc_minmax_s<q>")
1522 (const_string "neon_reduc_minmax<q>")))]
1523 )
1524
1525 (define_insn "neon_vpumin<mode>"
1526 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1527 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1528 (match_operand:VDI 2 "s_register_operand" "w")]
1529 UNSPEC_VPUMIN))]
1530 "TARGET_NEON"
1531 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1532 [(set_attr "type" "neon_reduc_minmax<q>")]
1533 )
1534
1535 (define_insn "neon_vpumax<mode>"
1536 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1537 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1538 (match_operand:VDI 2 "s_register_operand" "w")]
1539 UNSPEC_VPUMAX))]
1540 "TARGET_NEON"
1541 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1542 [(set_attr "type" "neon_reduc_minmax<q>")]
1543 )
1544
1545 ;; Saturating arithmetic
1546
1547 ; NOTE: Neon supports many more saturating variants of instructions than the
1548 ; following, but these are all GCC currently understands.
1549 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1550 ; yet either, although these patterns may be used by intrinsics when they're
1551 ; added.
1552
1553 (define_insn "*ss_add<mode>_neon"
1554 [(set (match_operand:VD 0 "s_register_operand" "=w")
1555 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1556 (match_operand:VD 2 "s_register_operand" "w")))]
1557 "TARGET_NEON"
1558 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1559 [(set_attr "type" "neon_qadd<q>")]
1560 )
1561
1562 (define_insn "*us_add<mode>_neon"
1563 [(set (match_operand:VD 0 "s_register_operand" "=w")
1564 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1565 (match_operand:VD 2 "s_register_operand" "w")))]
1566 "TARGET_NEON"
1567 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1568 [(set_attr "type" "neon_qadd<q>")]
1569 )
1570
1571 (define_insn "*ss_sub<mode>_neon"
1572 [(set (match_operand:VD 0 "s_register_operand" "=w")
1573 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1574 (match_operand:VD 2 "s_register_operand" "w")))]
1575 "TARGET_NEON"
1576 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1577 [(set_attr "type" "neon_qsub<q>")]
1578 )
1579
1580 (define_insn "*us_sub<mode>_neon"
1581 [(set (match_operand:VD 0 "s_register_operand" "=w")
1582 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1583 (match_operand:VD 2 "s_register_operand" "w")))]
1584 "TARGET_NEON"
1585 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1586 [(set_attr "type" "neon_qsub<q>")]
1587 )
1588
1589 ;; Conditional instructions. These are comparisons with conditional moves for
1590 ;; vectors. They perform the assignment:
1591 ;;
1592 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1593 ;;
1594 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1595 ;; element-wise.
1596
1597 (define_expand "vcond<mode><mode>"
1598 [(set (match_operand:VDQW 0 "s_register_operand" "")
1599 (if_then_else:VDQW
1600 (match_operator 3 "comparison_operator"
1601 [(match_operand:VDQW 4 "s_register_operand" "")
1602 (match_operand:VDQW 5 "nonmemory_operand" "")])
1603 (match_operand:VDQW 1 "s_register_operand" "")
1604 (match_operand:VDQW 2 "s_register_operand" "")))]
1605 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1606 {
1607 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1608 ? 3 : 1;
1609 rtx magic_rtx = GEN_INT (magic_word);
1610 int inverse = 0;
1611 int use_zero_form = 0;
1612 int swap_bsl_operands = 0;
1613 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1614 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1615
1616 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1617 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1618
1619 switch (GET_CODE (operands[3]))
1620 {
1621 case GE:
1622 case GT:
1623 case LE:
1624 case LT:
1625 case EQ:
1626 if (operands[5] == CONST0_RTX (<MODE>mode))
1627 {
1628 use_zero_form = 1;
1629 break;
1630 }
1631 /* Fall through. */
1632 default:
1633 if (!REG_P (operands[5]))
1634 operands[5] = force_reg (<MODE>mode, operands[5]);
1635 }
1636
1637 switch (GET_CODE (operands[3]))
1638 {
1639 case LT:
1640 case UNLT:
1641 inverse = 1;
1642 /* Fall through. */
1643 case GE:
1644 case UNGE:
1645 case ORDERED:
1646 case UNORDERED:
1647 base_comparison = gen_neon_vcge<mode>;
1648 complimentary_comparison = gen_neon_vcgt<mode>;
1649 break;
1650 case LE:
1651 case UNLE:
1652 inverse = 1;
1653 /* Fall through. */
1654 case GT:
1655 case UNGT:
1656 base_comparison = gen_neon_vcgt<mode>;
1657 complimentary_comparison = gen_neon_vcge<mode>;
1658 break;
1659 case EQ:
1660 case NE:
1661 case UNEQ:
1662 base_comparison = gen_neon_vceq<mode>;
1663 complimentary_comparison = gen_neon_vceq<mode>;
1664 break;
1665 default:
1666 gcc_unreachable ();
1667 }
1668
1669 switch (GET_CODE (operands[3]))
1670 {
1671 case LT:
1672 case LE:
1673 case GT:
1674 case GE:
1675 case EQ:
1676 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1677 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1678 a GE b -> a GE b
1679 a GT b -> a GT b
1680 a LE b -> b GE a
1681 a LT b -> b GT a
1682 a EQ b -> a EQ b
1683 Note that there also exist direct comparison against 0 forms,
1684 so catch those as a special case. */
1685 if (use_zero_form)
1686 {
1687 inverse = 0;
1688 switch (GET_CODE (operands[3]))
1689 {
1690 case LT:
1691 base_comparison = gen_neon_vclt<mode>;
1692 break;
1693 case LE:
1694 base_comparison = gen_neon_vcle<mode>;
1695 break;
1696 default:
1697 /* Do nothing, other zero form cases already have the correct
1698 base_comparison. */
1699 break;
1700 }
1701 }
1702
1703 if (!inverse)
1704 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1705 else
1706 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1707 break;
1708 case UNLT:
1709 case UNLE:
1710 case UNGT:
1711 case UNGE:
1712 case NE:
1713 /* Vector compare returns false for lanes which are unordered, so if we use
1714 the inverse of the comparison we actually want to emit, then
1715 swap the operands to BSL, we will end up with the correct result.
1716 Note that a NE NaN and NaN NE b are true for all a, b.
1717
1718 Our transformations are:
1719 a GE b -> !(b GT a)
1720 a GT b -> !(b GE a)
1721 a LE b -> !(a GT b)
1722 a LT b -> !(a GE b)
1723 a NE b -> !(a EQ b) */
1724
1725 if (inverse)
1726 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1727 else
1728 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1729
1730 swap_bsl_operands = 1;
1731 break;
1732 case UNEQ:
1733 /* We check (a > b || b > a). combining these comparisons give us
1734 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1735 will then give us (a == b || a UNORDERED b) as intended. */
1736
1737 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1738 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1739 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1740 swap_bsl_operands = 1;
1741 break;
1742 case UNORDERED:
1743 /* Operands are ORDERED iff (a > b || b >= a).
1744 Swapping the operands to BSL will give the UNORDERED case. */
1745 swap_bsl_operands = 1;
1746 /* Fall through. */
1747 case ORDERED:
1748 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1749 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1750 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1751 break;
1752 default:
1753 gcc_unreachable ();
1754 }
1755
1756 if (swap_bsl_operands)
1757 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1758 operands[1]));
1759 else
1760 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1761 operands[2]));
1762 DONE;
1763 })
1764
1765 (define_expand "vcondu<mode><mode>"
1766 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1767 (if_then_else:VDQIW
1768 (match_operator 3 "arm_comparison_operator"
1769 [(match_operand:VDQIW 4 "s_register_operand" "")
1770 (match_operand:VDQIW 5 "s_register_operand" "")])
1771 (match_operand:VDQIW 1 "s_register_operand" "")
1772 (match_operand:VDQIW 2 "s_register_operand" "")))]
1773 "TARGET_NEON"
1774 {
1775 rtx mask;
1776 int inverse = 0, immediate_zero = 0;
1777
1778 mask = gen_reg_rtx (<V_cmp_result>mode);
1779
1780 if (operands[5] == CONST0_RTX (<MODE>mode))
1781 immediate_zero = 1;
1782 else if (!REG_P (operands[5]))
1783 operands[5] = force_reg (<MODE>mode, operands[5]);
1784
1785 switch (GET_CODE (operands[3]))
1786 {
1787 case GEU:
1788 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1789 const0_rtx));
1790 break;
1791
1792 case GTU:
1793 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1794 const0_rtx));
1795 break;
1796
1797 case EQ:
1798 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1799 const0_rtx));
1800 break;
1801
1802 case LEU:
1803 if (immediate_zero)
1804 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1805 const0_rtx));
1806 else
1807 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1808 const0_rtx));
1809 break;
1810
1811 case LTU:
1812 if (immediate_zero)
1813 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1814 const0_rtx));
1815 else
1816 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1817 const0_rtx));
1818 break;
1819
1820 case NE:
1821 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1822 const0_rtx));
1823 inverse = 1;
1824 break;
1825
1826 default:
1827 gcc_unreachable ();
1828 }
1829
1830 if (inverse)
1831 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1832 operands[1]));
1833 else
1834 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1835 operands[2]));
1836
1837 DONE;
1838 })
1839
1840 ;; Patterns for builtins.
1841
1842 ; good for plain vadd, vaddq.
1843
1844 (define_expand "neon_vadd<mode>"
1845 [(match_operand:VDQX 0 "s_register_operand" "=w")
1846 (match_operand:VDQX 1 "s_register_operand" "w")
1847 (match_operand:VDQX 2 "s_register_operand" "w")
1848 (match_operand:SI 3 "immediate_operand" "i")]
1849 "TARGET_NEON"
1850 {
1851 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1852 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1853 else
1854 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1855 operands[2]));
1856 DONE;
1857 })
1858
1859 ; Note that NEON operations don't support the full IEEE 754 standard: in
1860 ; particular, denormal values are flushed to zero. This means that GCC cannot
1861 ; use those instructions for autovectorization, etc. unless
1862 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1863 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1864 ; header) must work in either case: if -funsafe-math-optimizations is given,
1865 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1866 ; expand to unspecs (which may potentially limit the extent to which they might
1867 ; be optimized by generic code).
1868
1869 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1870
1871 (define_insn "neon_vadd<mode>_unspec"
1872 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
1873 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
1874 (match_operand:VDQX 2 "s_register_operand" "w")]
1875 UNSPEC_VADD))]
1876 "TARGET_NEON"
1877 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1878 [(set (attr "type")
1879 (if_then_else (match_test "<Is_float_mode>")
1880 (const_string "neon_fp_addsub_s<q>")
1881 (const_string "neon_add<q>")))]
1882 )
1883
1884 ; operand 3 represents in bits:
1885 ; bit 0: signed (vs unsigned).
1886 ; bit 1: rounding (vs none).
1887
1888 (define_insn "neon_vaddl<mode>"
1889 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1890 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1891 (match_operand:VDI 2 "s_register_operand" "w")
1892 (match_operand:SI 3 "immediate_operand" "i")]
1893 UNSPEC_VADDL))]
1894 "TARGET_NEON"
1895 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1896 [(set_attr "type" "neon_add_long")]
1897 )
1898
1899 (define_insn "neon_vaddw<mode>"
1900 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1901 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1902 (match_operand:VDI 2 "s_register_operand" "w")
1903 (match_operand:SI 3 "immediate_operand" "i")]
1904 UNSPEC_VADDW))]
1905 "TARGET_NEON"
1906 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1907 [(set_attr "type" "neon_add_widen")]
1908 )
1909
1910 ; vhadd and vrhadd.
1911
1912 (define_insn "neon_vhadd<mode>"
1913 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1914 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1915 (match_operand:VDQIW 2 "s_register_operand" "w")
1916 (match_operand:SI 3 "immediate_operand" "i")]
1917 UNSPEC_VHADD))]
1918 "TARGET_NEON"
1919 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1920 [(set_attr "type" "neon_add_halve_q")]
1921 )
1922
1923 (define_insn "neon_vqadd<mode>"
1924 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1925 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1926 (match_operand:VDQIX 2 "s_register_operand" "w")
1927 (match_operand:SI 3 "immediate_operand" "i")]
1928 UNSPEC_VQADD))]
1929 "TARGET_NEON"
1930 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1931 [(set_attr "type" "neon_qadd<q>")]
1932 )
1933
1934 (define_insn "neon_vaddhn<mode>"
1935 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1936 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1937 (match_operand:VN 2 "s_register_operand" "w")
1938 (match_operand:SI 3 "immediate_operand" "i")]
1939 UNSPEC_VADDHN))]
1940 "TARGET_NEON"
1941 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1942 [(set_attr "type" "neon_add_halve_narrow_q")]
1943 )
1944
1945 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1946 ;; polynomial multiplication case that can specified by operand 3.
1947 (define_insn "neon_vmul<mode>"
1948 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1949 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1950 (match_operand:VDQW 2 "s_register_operand" "w")
1951 (match_operand:SI 3 "immediate_operand" "i")]
1952 UNSPEC_VMUL))]
1953 "TARGET_NEON"
1954 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1955 [(set (attr "type")
1956 (if_then_else (match_test "<Is_float_mode>")
1957 (const_string "neon_fp_mul_s<q>")
1958 (const_string "neon_mul_<V_elem_ch><q>")))]
1959 )
1960
1961 (define_expand "neon_vmla<mode>"
1962 [(match_operand:VDQW 0 "s_register_operand" "=w")
1963 (match_operand:VDQW 1 "s_register_operand" "0")
1964 (match_operand:VDQW 2 "s_register_operand" "w")
1965 (match_operand:VDQW 3 "s_register_operand" "w")
1966 (match_operand:SI 4 "immediate_operand" "i")]
1967 "TARGET_NEON"
1968 {
1969 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1970 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1971 operands[2], operands[3]));
1972 else
1973 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1974 operands[2], operands[3]));
1975 DONE;
1976 })
1977
1978 (define_expand "neon_vfma<VCVTF:mode>"
1979 [(match_operand:VCVTF 0 "s_register_operand")
1980 (match_operand:VCVTF 1 "s_register_operand")
1981 (match_operand:VCVTF 2 "s_register_operand")
1982 (match_operand:VCVTF 3 "s_register_operand")
1983 (match_operand:SI 4 "immediate_operand")]
1984 "TARGET_NEON && TARGET_FMA"
1985 {
1986 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1987 operands[1]));
1988 DONE;
1989 })
1990
1991 (define_expand "neon_vfms<VCVTF:mode>"
1992 [(match_operand:VCVTF 0 "s_register_operand")
1993 (match_operand:VCVTF 1 "s_register_operand")
1994 (match_operand:VCVTF 2 "s_register_operand")
1995 (match_operand:VCVTF 3 "s_register_operand")
1996 (match_operand:SI 4 "immediate_operand")]
1997 "TARGET_NEON && TARGET_FMA"
1998 {
1999 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2000 operands[1]));
2001 DONE;
2002 })
2003
2004 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2005
2006 (define_insn "neon_vmla<mode>_unspec"
2007 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2008 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2009 (match_operand:VDQW 2 "s_register_operand" "w")
2010 (match_operand:VDQW 3 "s_register_operand" "w")]
2011 UNSPEC_VMLA))]
2012 "TARGET_NEON"
2013 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2014 [(set (attr "type")
2015 (if_then_else (match_test "<Is_float_mode>")
2016 (const_string "neon_fp_mla_s<q>")
2017 (const_string "neon_mla_<V_elem_ch><q>")))]
2018 )
2019
2020 (define_insn "neon_vmlal<mode>"
2021 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2022 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2023 (match_operand:VW 2 "s_register_operand" "w")
2024 (match_operand:VW 3 "s_register_operand" "w")
2025 (match_operand:SI 4 "immediate_operand" "i")]
2026 UNSPEC_VMLAL))]
2027 "TARGET_NEON"
2028 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2029 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2030 )
2031
2032 (define_expand "neon_vmls<mode>"
2033 [(match_operand:VDQW 0 "s_register_operand" "=w")
2034 (match_operand:VDQW 1 "s_register_operand" "0")
2035 (match_operand:VDQW 2 "s_register_operand" "w")
2036 (match_operand:VDQW 3 "s_register_operand" "w")
2037 (match_operand:SI 4 "immediate_operand" "i")]
2038 "TARGET_NEON"
2039 {
2040 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2041 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2042 operands[1], operands[2], operands[3]));
2043 else
2044 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2045 operands[2], operands[3]));
2046 DONE;
2047 })
2048
2049 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2050
2051 (define_insn "neon_vmls<mode>_unspec"
2052 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2053 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2054 (match_operand:VDQW 2 "s_register_operand" "w")
2055 (match_operand:VDQW 3 "s_register_operand" "w")]
2056 UNSPEC_VMLS))]
2057 "TARGET_NEON"
2058 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2059 [(set (attr "type")
2060 (if_then_else (match_test "<Is_float_mode>")
2061 (const_string "neon_fp_mla_s<q>")
2062 (const_string "neon_mla_<V_elem_ch><q>")))]
2063 )
2064
2065 (define_insn "neon_vmlsl<mode>"
2066 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2067 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2068 (match_operand:VW 2 "s_register_operand" "w")
2069 (match_operand:VW 3 "s_register_operand" "w")
2070 (match_operand:SI 4 "immediate_operand" "i")]
2071 UNSPEC_VMLSL))]
2072 "TARGET_NEON"
2073 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2074 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2075 )
2076
2077 (define_insn "neon_vqdmulh<mode>"
2078 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2079 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2080 (match_operand:VMDQI 2 "s_register_operand" "w")
2081 (match_operand:SI 3 "immediate_operand" "i")]
2082 UNSPEC_VQDMULH))]
2083 "TARGET_NEON"
2084 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2085 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2086 )
2087
2088 (define_insn "neon_vqdmlal<mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2091 (match_operand:VMDI 2 "s_register_operand" "w")
2092 (match_operand:VMDI 3 "s_register_operand" "w")
2093 (match_operand:SI 4 "immediate_operand" "i")]
2094 UNSPEC_VQDMLAL))]
2095 "TARGET_NEON"
2096 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2097 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2098 )
2099
2100 (define_insn "neon_vqdmlsl<mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2103 (match_operand:VMDI 2 "s_register_operand" "w")
2104 (match_operand:VMDI 3 "s_register_operand" "w")
2105 (match_operand:SI 4 "immediate_operand" "i")]
2106 UNSPEC_VQDMLSL))]
2107 "TARGET_NEON"
2108 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2109 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2110 )
2111
2112 (define_insn "neon_vmull<mode>"
2113 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2114 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2115 (match_operand:VW 2 "s_register_operand" "w")
2116 (match_operand:SI 3 "immediate_operand" "i")]
2117 UNSPEC_VMULL))]
2118 "TARGET_NEON"
2119 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2120 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2121 )
2122
2123 (define_insn "neon_vqdmull<mode>"
2124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2125 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2126 (match_operand:VMDI 2 "s_register_operand" "w")
2127 (match_operand:SI 3 "immediate_operand" "i")]
2128 UNSPEC_VQDMULL))]
2129 "TARGET_NEON"
2130 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2131 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2132 )
2133
2134 (define_expand "neon_vsub<mode>"
2135 [(match_operand:VDQX 0 "s_register_operand" "=w")
2136 (match_operand:VDQX 1 "s_register_operand" "w")
2137 (match_operand:VDQX 2 "s_register_operand" "w")
2138 (match_operand:SI 3 "immediate_operand" "i")]
2139 "TARGET_NEON"
2140 {
2141 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2142 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2143 else
2144 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2145 operands[2]));
2146 DONE;
2147 })
2148
2149 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2150
2151 (define_insn "neon_vsub<mode>_unspec"
2152 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
2153 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
2154 (match_operand:VDQX 2 "s_register_operand" "w")]
2155 UNSPEC_VSUB))]
2156 "TARGET_NEON"
2157 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2158 [(set (attr "type")
2159 (if_then_else (match_test "<Is_float_mode>")
2160 (const_string "neon_fp_addsub_s<q>")
2161 (const_string "neon_sub<q>")))]
2162 )
2163
2164 (define_insn "neon_vsubl<mode>"
2165 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2166 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2167 (match_operand:VDI 2 "s_register_operand" "w")
2168 (match_operand:SI 3 "immediate_operand" "i")]
2169 UNSPEC_VSUBL))]
2170 "TARGET_NEON"
2171 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2172 [(set_attr "type" "neon_sub_long")]
2173 )
2174
2175 (define_insn "neon_vsubw<mode>"
2176 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2177 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2178 (match_operand:VDI 2 "s_register_operand" "w")
2179 (match_operand:SI 3 "immediate_operand" "i")]
2180 UNSPEC_VSUBW))]
2181 "TARGET_NEON"
2182 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2183 [(set_attr "type" "neon_sub_widen")]
2184 )
2185
2186 (define_insn "neon_vqsub<mode>"
2187 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2188 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2189 (match_operand:VDQIX 2 "s_register_operand" "w")
2190 (match_operand:SI 3 "immediate_operand" "i")]
2191 UNSPEC_VQSUB))]
2192 "TARGET_NEON"
2193 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2194 [(set_attr "type" "neon_qsub<q>")]
2195 )
2196
2197 (define_insn "neon_vhsub<mode>"
2198 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2199 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2200 (match_operand:VDQIW 2 "s_register_operand" "w")
2201 (match_operand:SI 3 "immediate_operand" "i")]
2202 UNSPEC_VHSUB))]
2203 "TARGET_NEON"
2204 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2205 [(set_attr "type" "neon_sub_halve<q>")]
2206 )
2207
2208 (define_insn "neon_vsubhn<mode>"
2209 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2210 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2211 (match_operand:VN 2 "s_register_operand" "w")
2212 (match_operand:SI 3 "immediate_operand" "i")]
2213 UNSPEC_VSUBHN))]
2214 "TARGET_NEON"
2215 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2216 [(set_attr "type" "neon_sub_halve_narrow_q")]
2217 )
2218
2219 (define_insn "neon_vceq<mode>"
2220 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2221 (unspec:<V_cmp_result>
2222 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2223 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2224 (match_operand:SI 3 "immediate_operand" "i,i")]
2225 UNSPEC_VCEQ))]
2226 "TARGET_NEON"
2227 "@
2228 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2229 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2230 [(set (attr "type")
2231 (if_then_else (match_test "<Is_float_mode>")
2232 (const_string "neon_fp_compare_s<q>")
2233 (if_then_else (match_operand 2 "zero_operand")
2234 (const_string "neon_compare_zero<q>")
2235 (const_string "neon_compare<q>"))))]
2236 )
2237
2238 (define_insn "neon_vcge<mode>"
2239 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2240 (unspec:<V_cmp_result>
2241 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2242 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2243 (match_operand:SI 3 "immediate_operand" "i,i")]
2244 UNSPEC_VCGE))]
2245 "TARGET_NEON"
2246 "@
2247 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2248 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2249 [(set (attr "type")
2250 (if_then_else (match_test "<Is_float_mode>")
2251 (const_string "neon_fp_compare_s<q>")
2252 (if_then_else (match_operand 2 "zero_operand")
2253 (const_string "neon_compare_zero<q>")
2254 (const_string "neon_compare<q>"))))]
2255 )
2256
2257 (define_insn "neon_vcgeu<mode>"
2258 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2259 (unspec:<V_cmp_result>
2260 [(match_operand:VDQIW 1 "s_register_operand" "w")
2261 (match_operand:VDQIW 2 "s_register_operand" "w")
2262 (match_operand:SI 3 "immediate_operand" "i")]
2263 UNSPEC_VCGEU))]
2264 "TARGET_NEON"
2265 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2266 [(set_attr "type" "neon_compare<q>")]
2267 )
2268
2269 (define_insn "neon_vcgt<mode>"
2270 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2271 (unspec:<V_cmp_result>
2272 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2273 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2274 (match_operand:SI 3 "immediate_operand" "i,i")]
2275 UNSPEC_VCGT))]
2276 "TARGET_NEON"
2277 "@
2278 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2279 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2280 [(set (attr "type")
2281 (if_then_else (match_test "<Is_float_mode>")
2282 (const_string "neon_fp_compare_s<q>")
2283 (if_then_else (match_operand 2 "zero_operand")
2284 (const_string "neon_compare_zero<q>")
2285 (const_string "neon_compare<q>"))))]
2286 )
2287
2288 (define_insn "neon_vcgtu<mode>"
2289 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2290 (unspec:<V_cmp_result>
2291 [(match_operand:VDQIW 1 "s_register_operand" "w")
2292 (match_operand:VDQIW 2 "s_register_operand" "w")
2293 (match_operand:SI 3 "immediate_operand" "i")]
2294 UNSPEC_VCGTU))]
2295 "TARGET_NEON"
2296 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2297 [(set_attr "type" "neon_compare<q>")]
2298 )
2299
2300 ;; VCLE and VCLT only support comparisons with immediate zero (register
2301 ;; variants are VCGE and VCGT with operands reversed).
2302
2303 (define_insn "neon_vcle<mode>"
2304 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2305 (unspec:<V_cmp_result>
2306 [(match_operand:VDQW 1 "s_register_operand" "w")
2307 (match_operand:VDQW 2 "zero_operand" "Dz")
2308 (match_operand:SI 3 "immediate_operand" "i")]
2309 UNSPEC_VCLE))]
2310 "TARGET_NEON"
2311 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2312 [(set (attr "type")
2313 (if_then_else (match_test "<Is_float_mode>")
2314 (const_string "neon_fp_compare_s<q>")
2315 (if_then_else (match_operand 2 "zero_operand")
2316 (const_string "neon_compare_zero<q>")
2317 (const_string "neon_compare<q>"))))]
2318 )
2319
2320 (define_insn "neon_vclt<mode>"
2321 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2322 (unspec:<V_cmp_result>
2323 [(match_operand:VDQW 1 "s_register_operand" "w")
2324 (match_operand:VDQW 2 "zero_operand" "Dz")
2325 (match_operand:SI 3 "immediate_operand" "i")]
2326 UNSPEC_VCLT))]
2327 "TARGET_NEON"
2328 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2329 [(set (attr "type")
2330 (if_then_else (match_test "<Is_float_mode>")
2331 (const_string "neon_fp_compare_s<q>")
2332 (if_then_else (match_operand 2 "zero_operand")
2333 (const_string "neon_compare_zero<q>")
2334 (const_string "neon_compare<q>"))))]
2335 )
2336
2337 (define_insn "neon_vcage<mode>"
2338 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2339 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2340 (match_operand:VCVTF 2 "s_register_operand" "w")
2341 (match_operand:SI 3 "immediate_operand" "i")]
2342 UNSPEC_VCAGE))]
2343 "TARGET_NEON"
2344 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2345 [(set_attr "type" "neon_fp_compare_s<q>")]
2346 )
2347
2348 (define_insn "neon_vcagt<mode>"
2349 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2350 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2351 (match_operand:VCVTF 2 "s_register_operand" "w")
2352 (match_operand:SI 3 "immediate_operand" "i")]
2353 UNSPEC_VCAGT))]
2354 "TARGET_NEON"
2355 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_fp_compare_s<q>")]
2357 )
2358
2359 (define_insn "neon_vtst<mode>"
2360 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2361 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2362 (match_operand:VDQIW 2 "s_register_operand" "w")
2363 (match_operand:SI 3 "immediate_operand" "i")]
2364 UNSPEC_VTST))]
2365 "TARGET_NEON"
2366 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2367 [(set_attr "type" "neon_tst<q>")]
2368 )
2369
2370 (define_insn "neon_vabd<mode>"
2371 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2372 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2373 (match_operand:VDQW 2 "s_register_operand" "w")
2374 (match_operand:SI 3 "immediate_operand" "i")]
2375 UNSPEC_VABD))]
2376 "TARGET_NEON"
2377 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2378 [(set (attr "type")
2379 (if_then_else (match_test "<Is_float_mode>")
2380 (const_string "neon_fp_abd_s<q>")
2381 (const_string "neon_abd<q>")))]
2382 )
2383
2384 (define_insn "neon_vabdl<mode>"
2385 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2386 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2387 (match_operand:VW 2 "s_register_operand" "w")
2388 (match_operand:SI 3 "immediate_operand" "i")]
2389 UNSPEC_VABDL))]
2390 "TARGET_NEON"
2391 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2392 [(set_attr "type" "neon_abd_long")]
2393 )
2394
2395 (define_insn "neon_vaba<mode>"
2396 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2397 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2398 (match_operand:VDQIW 3 "s_register_operand" "w")
2399 (match_operand:SI 4 "immediate_operand" "i")]
2400 UNSPEC_VABD)
2401 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2402 "TARGET_NEON"
2403 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2404 [(set_attr "type" "neon_arith_acc<q>")]
2405 )
2406
2407 (define_insn "neon_vabal<mode>"
2408 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2409 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2410 (match_operand:VW 3 "s_register_operand" "w")
2411 (match_operand:SI 4 "immediate_operand" "i")]
2412 UNSPEC_VABDL)
2413 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2414 "TARGET_NEON"
2415 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2416 [(set_attr "type" "neon_arith_acc<q>")]
2417 )
2418
2419 (define_insn "neon_vmax<mode>"
2420 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2421 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2422 (match_operand:VDQW 2 "s_register_operand" "w")
2423 (match_operand:SI 3 "immediate_operand" "i")]
2424 UNSPEC_VMAX))]
2425 "TARGET_NEON"
2426 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2427 [(set (attr "type")
2428 (if_then_else (match_test "<Is_float_mode>")
2429 (const_string "neon_fp_minmax_s<q>")
2430 (const_string "neon_minmax<q>")))]
2431 )
2432
2433 (define_insn "neon_vmin<mode>"
2434 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2435 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2436 (match_operand:VDQW 2 "s_register_operand" "w")
2437 (match_operand:SI 3 "immediate_operand" "i")]
2438 UNSPEC_VMIN))]
2439 "TARGET_NEON"
2440 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2441 [(set (attr "type")
2442 (if_then_else (match_test "<Is_float_mode>")
2443 (const_string "neon_fp_minmax_s<q>")
2444 (const_string "neon_minmax<q>")))]
2445 )
2446
2447 (define_expand "neon_vpadd<mode>"
2448 [(match_operand:VD 0 "s_register_operand" "=w")
2449 (match_operand:VD 1 "s_register_operand" "w")
2450 (match_operand:VD 2 "s_register_operand" "w")
2451 (match_operand:SI 3 "immediate_operand" "i")]
2452 "TARGET_NEON"
2453 {
2454 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2455 operands[2]));
2456 DONE;
2457 })
2458
2459 (define_insn "neon_vpaddl<mode>"
2460 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2461 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2462 (match_operand:SI 2 "immediate_operand" "i")]
2463 UNSPEC_VPADDL))]
2464 "TARGET_NEON"
2465 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2466 [(set_attr "type" "neon_reduc_add_long")]
2467 )
2468
2469 (define_insn "neon_vpadal<mode>"
2470 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2471 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2472 (match_operand:VDQIW 2 "s_register_operand" "w")
2473 (match_operand:SI 3 "immediate_operand" "i")]
2474 UNSPEC_VPADAL))]
2475 "TARGET_NEON"
2476 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2477 [(set_attr "type" "neon_reduc_add_acc")]
2478 )
2479
2480 (define_insn "neon_vpmax<mode>"
2481 [(set (match_operand:VD 0 "s_register_operand" "=w")
2482 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2483 (match_operand:VD 2 "s_register_operand" "w")
2484 (match_operand:SI 3 "immediate_operand" "i")]
2485 UNSPEC_VPMAX))]
2486 "TARGET_NEON"
2487 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2488 [(set (attr "type")
2489 (if_then_else (match_test "<Is_float_mode>")
2490 (const_string "neon_fp_reduc_minmax_s<q>")
2491 (const_string "neon_reduc_minmax<q>")))]
2492 )
2493
2494 (define_insn "neon_vpmin<mode>"
2495 [(set (match_operand:VD 0 "s_register_operand" "=w")
2496 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2497 (match_operand:VD 2 "s_register_operand" "w")
2498 (match_operand:SI 3 "immediate_operand" "i")]
2499 UNSPEC_VPMIN))]
2500 "TARGET_NEON"
2501 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2502 [(set (attr "type")
2503 (if_then_else (match_test "<Is_float_mode>")
2504 (const_string "neon_fp_reduc_minmax_s<q>")
2505 (const_string "neon_reduc_minmax<q>")))]
2506 )
2507
2508 (define_insn "neon_vrecps<mode>"
2509 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2510 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2511 (match_operand:VCVTF 2 "s_register_operand" "w")
2512 (match_operand:SI 3 "immediate_operand" "i")]
2513 UNSPEC_VRECPS))]
2514 "TARGET_NEON"
2515 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2516 [(set_attr "type" "neon_fp_recps_s<q>")]
2517 )
2518
2519 (define_insn "neon_vrsqrts<mode>"
2520 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2521 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2522 (match_operand:VCVTF 2 "s_register_operand" "w")
2523 (match_operand:SI 3 "immediate_operand" "i")]
2524 UNSPEC_VRSQRTS))]
2525 "TARGET_NEON"
2526 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2527 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2528 )
2529
2530 (define_expand "neon_vabs<mode>"
2531 [(match_operand:VDQW 0 "s_register_operand" "")
2532 (match_operand:VDQW 1 "s_register_operand" "")
2533 (match_operand:SI 2 "immediate_operand" "")]
2534 "TARGET_NEON"
2535 {
2536 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2537 DONE;
2538 })
2539
2540 (define_insn "neon_vqabs<mode>"
2541 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2542 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2543 (match_operand:SI 2 "immediate_operand" "i")]
2544 UNSPEC_VQABS))]
2545 "TARGET_NEON"
2546 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_qabs<q>")]
2548 )
2549
2550 (define_expand "neon_vneg<mode>"
2551 [(match_operand:VDQW 0 "s_register_operand" "")
2552 (match_operand:VDQW 1 "s_register_operand" "")
2553 (match_operand:SI 2 "immediate_operand" "")]
2554 "TARGET_NEON"
2555 {
2556 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2557 DONE;
2558 })
2559
2560 (define_insn "neon_vqneg<mode>"
2561 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2562 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2563 (match_operand:SI 2 "immediate_operand" "i")]
2564 UNSPEC_VQNEG))]
2565 "TARGET_NEON"
2566 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2567 [(set_attr "type" "neon_qneg<q>")]
2568 )
2569
2570 (define_insn "neon_vcls<mode>"
2571 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2572 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2573 (match_operand:SI 2 "immediate_operand" "i")]
2574 UNSPEC_VCLS))]
2575 "TARGET_NEON"
2576 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2577 [(set_attr "type" "neon_cls<q>")]
2578 )
2579
2580 (define_insn "clz<mode>2"
2581 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2582 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2583 "TARGET_NEON"
2584 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2585 [(set_attr "type" "neon_cnt<q>")]
2586 )
2587
2588 (define_expand "neon_vclz<mode>"
2589 [(match_operand:VDQIW 0 "s_register_operand" "")
2590 (match_operand:VDQIW 1 "s_register_operand" "")
2591 (match_operand:SI 2 "immediate_operand" "")]
2592 "TARGET_NEON"
2593 {
2594 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2595 DONE;
2596 })
2597
2598 (define_insn "popcount<mode>2"
2599 [(set (match_operand:VE 0 "s_register_operand" "=w")
2600 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2601 "TARGET_NEON"
2602 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2603 [(set_attr "type" "neon_cnt<q>")]
2604 )
2605
2606 (define_expand "neon_vcnt<mode>"
2607 [(match_operand:VE 0 "s_register_operand" "=w")
2608 (match_operand:VE 1 "s_register_operand" "w")
2609 (match_operand:SI 2 "immediate_operand" "i")]
2610 "TARGET_NEON"
2611 {
2612 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2613 DONE;
2614 })
2615
2616 (define_insn "neon_vrecpe<mode>"
2617 [(set (match_operand:V32 0 "s_register_operand" "=w")
2618 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2619 (match_operand:SI 2 "immediate_operand" "i")]
2620 UNSPEC_VRECPE))]
2621 "TARGET_NEON"
2622 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2623 [(set_attr "type" "neon_fp_recpe_s<q>")]
2624 )
2625
2626 (define_insn "neon_vrsqrte<mode>"
2627 [(set (match_operand:V32 0 "s_register_operand" "=w")
2628 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2629 (match_operand:SI 2 "immediate_operand" "i")]
2630 UNSPEC_VRSQRTE))]
2631 "TARGET_NEON"
2632 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2633 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2634 )
2635
2636 (define_expand "neon_vmvn<mode>"
2637 [(match_operand:VDQIW 0 "s_register_operand" "")
2638 (match_operand:VDQIW 1 "s_register_operand" "")
2639 (match_operand:SI 2 "immediate_operand" "")]
2640 "TARGET_NEON"
2641 {
2642 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2643 DONE;
2644 })
2645
2646 (define_insn "neon_vget_lane<mode>_sext_internal"
2647 [(set (match_operand:SI 0 "s_register_operand" "=r")
2648 (sign_extend:SI
2649 (vec_select:<V_elem>
2650 (match_operand:VD 1 "s_register_operand" "w")
2651 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2652 "TARGET_NEON"
2653 {
2654 if (BYTES_BIG_ENDIAN)
2655 {
2656 int elt = INTVAL (operands[2]);
2657 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2658 operands[2] = GEN_INT (elt);
2659 }
2660 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2661 }
2662 [(set_attr "type" "neon_to_gp")]
2663 )
2664
2665 (define_insn "neon_vget_lane<mode>_zext_internal"
2666 [(set (match_operand:SI 0 "s_register_operand" "=r")
2667 (zero_extend:SI
2668 (vec_select:<V_elem>
2669 (match_operand:VD 1 "s_register_operand" "w")
2670 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2671 "TARGET_NEON"
2672 {
2673 if (BYTES_BIG_ENDIAN)
2674 {
2675 int elt = INTVAL (operands[2]);
2676 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2677 operands[2] = GEN_INT (elt);
2678 }
2679 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2680 }
2681 [(set_attr "type" "neon_to_gp")]
2682 )
2683
2684 (define_insn "neon_vget_lane<mode>_sext_internal"
2685 [(set (match_operand:SI 0 "s_register_operand" "=r")
2686 (sign_extend:SI
2687 (vec_select:<V_elem>
2688 (match_operand:VQ 1 "s_register_operand" "w")
2689 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2690 "TARGET_NEON"
2691 {
2692 rtx ops[3];
2693 int regno = REGNO (operands[1]);
2694 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2695 unsigned int elt = INTVAL (operands[2]);
2696 unsigned int elt_adj = elt % halfelts;
2697
2698 if (BYTES_BIG_ENDIAN)
2699 elt_adj = halfelts - 1 - elt_adj;
2700
2701 ops[0] = operands[0];
2702 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2703 ops[2] = GEN_INT (elt_adj);
2704 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2705
2706 return "";
2707 }
2708 [(set_attr "type" "neon_to_gp_q")]
2709 )
2710
2711 (define_insn "neon_vget_lane<mode>_zext_internal"
2712 [(set (match_operand:SI 0 "s_register_operand" "=r")
2713 (zero_extend:SI
2714 (vec_select:<V_elem>
2715 (match_operand:VQ 1 "s_register_operand" "w")
2716 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2717 "TARGET_NEON"
2718 {
2719 rtx ops[3];
2720 int regno = REGNO (operands[1]);
2721 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2722 unsigned int elt = INTVAL (operands[2]);
2723 unsigned int elt_adj = elt % halfelts;
2724
2725 if (BYTES_BIG_ENDIAN)
2726 elt_adj = halfelts - 1 - elt_adj;
2727
2728 ops[0] = operands[0];
2729 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2730 ops[2] = GEN_INT (elt_adj);
2731 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2732
2733 return "";
2734 }
2735 [(set_attr "type" "neon_to_gp_q")]
2736 )
2737
2738 (define_expand "neon_vget_lane<mode>"
2739 [(match_operand:<V_ext> 0 "s_register_operand" "")
2740 (match_operand:VDQW 1 "s_register_operand" "")
2741 (match_operand:SI 2 "immediate_operand" "")
2742 (match_operand:SI 3 "immediate_operand" "")]
2743 "TARGET_NEON"
2744 {
2745 HOST_WIDE_INT magic = INTVAL (operands[3]);
2746 rtx insn;
2747
2748 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2749
2750 if (BYTES_BIG_ENDIAN)
2751 {
2752 /* The intrinsics are defined in terms of a model where the
2753 element ordering in memory is vldm order, whereas the generic
2754 RTL is defined in terms of a model where the element ordering
2755 in memory is array order. Convert the lane number to conform
2756 to this model. */
2757 unsigned int elt = INTVAL (operands[2]);
2758 unsigned int reg_nelts
2759 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2760 elt ^= reg_nelts - 1;
2761 operands[2] = GEN_INT (elt);
2762 }
2763
2764 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2765 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2766 else
2767 {
2768 if ((magic & 1) != 0)
2769 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2770 operands[2]);
2771 else
2772 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2773 operands[2]);
2774 }
2775 emit_insn (insn);
2776 DONE;
2777 })
2778
2779 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2780 ; elements.
2781
2782 (define_expand "neon_vget_lanedi"
2783 [(match_operand:DI 0 "s_register_operand" "=r")
2784 (match_operand:DI 1 "s_register_operand" "w")
2785 (match_operand:SI 2 "immediate_operand" "i")
2786 (match_operand:SI 3 "immediate_operand" "i")]
2787 "TARGET_NEON"
2788 {
2789 neon_lane_bounds (operands[2], 0, 1);
2790 emit_move_insn (operands[0], operands[1]);
2791 DONE;
2792 })
2793
2794 (define_expand "neon_vget_lanev2di"
2795 [(match_operand:DI 0 "s_register_operand" "")
2796 (match_operand:V2DI 1 "s_register_operand" "")
2797 (match_operand:SI 2 "immediate_operand" "")
2798 (match_operand:SI 3 "immediate_operand" "")]
2799 "TARGET_NEON"
2800 {
2801 switch (INTVAL (operands[2]))
2802 {
2803 case 0:
2804 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2805 break;
2806 case 1:
2807 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2808 break;
2809 default:
2810 neon_lane_bounds (operands[2], 0, 1);
2811 FAIL;
2812 }
2813 DONE;
2814 })
2815
2816 (define_expand "neon_vset_lane<mode>"
2817 [(match_operand:VDQ 0 "s_register_operand" "=w")
2818 (match_operand:<V_elem> 1 "s_register_operand" "r")
2819 (match_operand:VDQ 2 "s_register_operand" "0")
2820 (match_operand:SI 3 "immediate_operand" "i")]
2821 "TARGET_NEON"
2822 {
2823 unsigned int elt = INTVAL (operands[3]);
2824 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2825
2826 if (BYTES_BIG_ENDIAN)
2827 {
2828 unsigned int reg_nelts
2829 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2830 elt ^= reg_nelts - 1;
2831 }
2832
2833 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2834 GEN_INT (1 << elt), operands[2]));
2835 DONE;
2836 })
2837
2838 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2839
2840 (define_expand "neon_vset_lanedi"
2841 [(match_operand:DI 0 "s_register_operand" "=w")
2842 (match_operand:DI 1 "s_register_operand" "r")
2843 (match_operand:DI 2 "s_register_operand" "0")
2844 (match_operand:SI 3 "immediate_operand" "i")]
2845 "TARGET_NEON"
2846 {
2847 neon_lane_bounds (operands[3], 0, 1);
2848 emit_move_insn (operands[0], operands[1]);
2849 DONE;
2850 })
2851
2852 (define_expand "neon_vcreate<mode>"
2853 [(match_operand:VDX 0 "s_register_operand" "")
2854 (match_operand:DI 1 "general_operand" "")]
2855 "TARGET_NEON"
2856 {
2857 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2858 emit_move_insn (operands[0], src);
2859 DONE;
2860 })
2861
2862 (define_insn "neon_vdup_n<mode>"
2863 [(set (match_operand:VX 0 "s_register_operand" "=w")
2864 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2865 "TARGET_NEON"
2866 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2867 [(set_attr "type" "neon_from_gp<q>")]
2868 )
2869
2870 (define_insn "neon_vdup_n<mode>"
2871 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2872 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2873 "TARGET_NEON"
2874 "@
2875 vdup.<V_sz_elem>\t%<V_reg>0, %1
2876 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2877 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2878 )
2879
2880 (define_expand "neon_vdup_ndi"
2881 [(match_operand:DI 0 "s_register_operand" "=w")
2882 (match_operand:DI 1 "s_register_operand" "r")]
2883 "TARGET_NEON"
2884 {
2885 emit_move_insn (operands[0], operands[1]);
2886 DONE;
2887 }
2888 )
2889
2890 (define_insn "neon_vdup_nv2di"
2891 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2892 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2893 "TARGET_NEON"
2894 "@
2895 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2896 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2897 [(set_attr "length" "8")
2898 (set_attr "type" "multiple")]
2899 )
2900
2901 (define_insn "neon_vdup_lane<mode>_internal"
2902 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2903 (vec_duplicate:VDQW
2904 (vec_select:<V_elem>
2905 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2906 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2907 "TARGET_NEON"
2908 {
2909 if (BYTES_BIG_ENDIAN)
2910 {
2911 int elt = INTVAL (operands[2]);
2912 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2913 operands[2] = GEN_INT (elt);
2914 }
2915 if (<Is_d_reg>)
2916 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2917 else
2918 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2919 }
2920 [(set_attr "type" "neon_dup<q>")]
2921 )
2922
2923 (define_expand "neon_vdup_lane<mode>"
2924 [(match_operand:VDQW 0 "s_register_operand" "=w")
2925 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2926 (match_operand:SI 2 "immediate_operand" "i")]
2927 "TARGET_NEON"
2928 {
2929 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2930 if (BYTES_BIG_ENDIAN)
2931 {
2932 unsigned int elt = INTVAL (operands[2]);
2933 unsigned int reg_nelts
2934 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2935 elt ^= reg_nelts - 1;
2936 operands[2] = GEN_INT (elt);
2937 }
2938 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2939 operands[2]));
2940 DONE;
2941 })
2942
2943 ; Scalar index is ignored, since only zero is valid here.
2944 (define_expand "neon_vdup_lanedi"
2945 [(match_operand:DI 0 "s_register_operand" "=w")
2946 (match_operand:DI 1 "s_register_operand" "w")
2947 (match_operand:SI 2 "immediate_operand" "i")]
2948 "TARGET_NEON"
2949 {
2950 neon_lane_bounds (operands[2], 0, 1);
2951 emit_move_insn (operands[0], operands[1]);
2952 DONE;
2953 })
2954
2955 ; Likewise for v2di, as the DImode second operand has only a single element.
2956 (define_expand "neon_vdup_lanev2di"
2957 [(match_operand:V2DI 0 "s_register_operand" "=w")
2958 (match_operand:DI 1 "s_register_operand" "w")
2959 (match_operand:SI 2 "immediate_operand" "i")]
2960 "TARGET_NEON"
2961 {
2962 neon_lane_bounds (operands[2], 0, 1);
2963 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2964 DONE;
2965 })
2966
2967 ; Disabled before reload because we don't want combine doing something silly,
2968 ; but used by the post-reload expansion of neon_vcombine.
2969 (define_insn "*neon_vswp<mode>"
2970 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2971 (match_operand:VDQX 1 "s_register_operand" "+w"))
2972 (set (match_dup 1) (match_dup 0))]
2973 "TARGET_NEON && reload_completed"
2974 "vswp\t%<V_reg>0, %<V_reg>1"
2975 [(set_attr "type" "neon_permute<q>")]
2976 )
2977
2978 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2979 ;; dest vector.
2980 ;; FIXME: A different implementation of this builtin could make it much
2981 ;; more likely that we wouldn't actually need to output anything (we could make
2982 ;; it so that the reg allocator puts things in the right places magically
2983 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2984
2985 (define_insn_and_split "neon_vcombine<mode>"
2986 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2987 (vec_concat:<V_DOUBLE>
2988 (match_operand:VDX 1 "s_register_operand" "w")
2989 (match_operand:VDX 2 "s_register_operand" "w")))]
2990 "TARGET_NEON"
2991 "#"
2992 "&& reload_completed"
2993 [(const_int 0)]
2994 {
2995 neon_split_vcombine (operands);
2996 DONE;
2997 }
2998 [(set_attr "type" "multiple")]
2999 )
3000
3001 (define_expand "neon_vget_high<mode>"
3002 [(match_operand:<V_HALF> 0 "s_register_operand")
3003 (match_operand:VQX 1 "s_register_operand")]
3004 "TARGET_NEON"
3005 {
3006 emit_move_insn (operands[0],
3007 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3008 GET_MODE_SIZE (<V_HALF>mode)));
3009 DONE;
3010 })
3011
3012 (define_expand "neon_vget_low<mode>"
3013 [(match_operand:<V_HALF> 0 "s_register_operand")
3014 (match_operand:VQX 1 "s_register_operand")]
3015 "TARGET_NEON"
3016 {
3017 emit_move_insn (operands[0],
3018 simplify_gen_subreg (<V_HALF>mode, operands[1],
3019 <MODE>mode, 0));
3020 DONE;
3021 })
3022
3023 (define_insn "float<mode><V_cvtto>2"
3024 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3025 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3026 "TARGET_NEON && !flag_rounding_math"
3027 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3028 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3029 )
3030
3031 (define_insn "floatuns<mode><V_cvtto>2"
3032 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3033 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3034 "TARGET_NEON && !flag_rounding_math"
3035 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3036 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3037 )
3038
3039 (define_insn "fix_trunc<mode><V_cvtto>2"
3040 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3041 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3042 "TARGET_NEON"
3043 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3044 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3045 )
3046
3047 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3048 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3049 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3050 "TARGET_NEON"
3051 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3052 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3053 )
3054
3055 (define_insn "neon_vcvt<mode>"
3056 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3057 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3058 (match_operand:SI 2 "immediate_operand" "i")]
3059 UNSPEC_VCVT))]
3060 "TARGET_NEON"
3061 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3062 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3063 )
3064
3065 (define_insn "neon_vcvt<mode>"
3066 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3067 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3068 (match_operand:SI 2 "immediate_operand" "i")]
3069 UNSPEC_VCVT))]
3070 "TARGET_NEON"
3071 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3072 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3073 )
3074
3075 (define_insn "neon_vcvtv4sfv4hf"
3076 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3077 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3078 UNSPEC_VCVT))]
3079 "TARGET_NEON && TARGET_FP16"
3080 "vcvt.f32.f16\t%q0, %P1"
3081 [(set_attr "type" "neon_fp_cvt_widen_h")]
3082 )
3083
3084 (define_insn "neon_vcvtv4hfv4sf"
3085 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3086 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3087 UNSPEC_VCVT))]
3088 "TARGET_NEON && TARGET_FP16"
3089 "vcvt.f16.f32\t%P0, %q1"
3090 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3091 )
3092
3093 (define_insn "neon_vcvt_n<mode>"
3094 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3095 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3096 (match_operand:SI 2 "immediate_operand" "i")
3097 (match_operand:SI 3 "immediate_operand" "i")]
3098 UNSPEC_VCVT_N))]
3099 "TARGET_NEON"
3100 {
3101 neon_const_bounds (operands[2], 1, 33);
3102 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3103 }
3104 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3105 )
3106
3107 (define_insn "neon_vcvt_n<mode>"
3108 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3109 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3110 (match_operand:SI 2 "immediate_operand" "i")
3111 (match_operand:SI 3 "immediate_operand" "i")]
3112 UNSPEC_VCVT_N))]
3113 "TARGET_NEON"
3114 {
3115 neon_const_bounds (operands[2], 1, 33);
3116 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3117 }
3118 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3119 )
3120
3121 (define_insn "neon_vmovn<mode>"
3122 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3123 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3124 (match_operand:SI 2 "immediate_operand" "i")]
3125 UNSPEC_VMOVN))]
3126 "TARGET_NEON"
3127 "vmovn.<V_if_elem>\t%P0, %q1"
3128 [(set_attr "type" "neon_shift_imm_narrow_q")]
3129 )
3130
3131 (define_insn "neon_vqmovn<mode>"
3132 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3133 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3134 (match_operand:SI 2 "immediate_operand" "i")]
3135 UNSPEC_VQMOVN))]
3136 "TARGET_NEON"
3137 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3138 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3139 )
3140
3141 (define_insn "neon_vqmovun<mode>"
3142 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3143 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3144 (match_operand:SI 2 "immediate_operand" "i")]
3145 UNSPEC_VQMOVUN))]
3146 "TARGET_NEON"
3147 "vqmovun.<V_s_elem>\t%P0, %q1"
3148 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3149 )
3150
3151 (define_insn "neon_vmovl<mode>"
3152 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3153 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3154 (match_operand:SI 2 "immediate_operand" "i")]
3155 UNSPEC_VMOVL))]
3156 "TARGET_NEON"
3157 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3158 [(set_attr "type" "neon_shift_imm_long")]
3159 )
3160
3161 (define_insn "neon_vmul_lane<mode>"
3162 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3163 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3164 (match_operand:VMD 2 "s_register_operand"
3165 "<scalar_mul_constraint>")
3166 (match_operand:SI 3 "immediate_operand" "i")
3167 (match_operand:SI 4 "immediate_operand" "i")]
3168 UNSPEC_VMUL_LANE))]
3169 "TARGET_NEON"
3170 {
3171 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3172 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3173 }
3174 [(set (attr "type")
3175 (if_then_else (match_test "<Is_float_mode>")
3176 (const_string "neon_fp_mul_s_scalar<q>")
3177 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3178 )
3179
3180 (define_insn "neon_vmul_lane<mode>"
3181 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3182 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3183 (match_operand:<V_HALF> 2 "s_register_operand"
3184 "<scalar_mul_constraint>")
3185 (match_operand:SI 3 "immediate_operand" "i")
3186 (match_operand:SI 4 "immediate_operand" "i")]
3187 UNSPEC_VMUL_LANE))]
3188 "TARGET_NEON"
3189 {
3190 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3191 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3192 }
3193 [(set (attr "type")
3194 (if_then_else (match_test "<Is_float_mode>")
3195 (const_string "neon_fp_mul_s_scalar<q>")
3196 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3197 )
3198
3199 (define_insn "neon_vmull_lane<mode>"
3200 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3201 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3202 (match_operand:VMDI 2 "s_register_operand"
3203 "<scalar_mul_constraint>")
3204 (match_operand:SI 3 "immediate_operand" "i")
3205 (match_operand:SI 4 "immediate_operand" "i")]
3206 UNSPEC_VMULL_LANE))]
3207 "TARGET_NEON"
3208 {
3209 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3210 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3211 }
3212 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3213 )
3214
3215 (define_insn "neon_vqdmull_lane<mode>"
3216 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3217 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3218 (match_operand:VMDI 2 "s_register_operand"
3219 "<scalar_mul_constraint>")
3220 (match_operand:SI 3 "immediate_operand" "i")
3221 (match_operand:SI 4 "immediate_operand" "i")]
3222 UNSPEC_VQDMULL_LANE))]
3223 "TARGET_NEON"
3224 {
3225 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3226 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3227 }
3228 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3229 )
3230
3231 (define_insn "neon_vqdmulh_lane<mode>"
3232 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3233 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3234 (match_operand:<V_HALF> 2 "s_register_operand"
3235 "<scalar_mul_constraint>")
3236 (match_operand:SI 3 "immediate_operand" "i")
3237 (match_operand:SI 4 "immediate_operand" "i")]
3238 UNSPEC_VQDMULH_LANE))]
3239 "TARGET_NEON"
3240 {
3241 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3242 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3243 }
3244 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3245 )
3246
3247 (define_insn "neon_vqdmulh_lane<mode>"
3248 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3249 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3250 (match_operand:VMDI 2 "s_register_operand"
3251 "<scalar_mul_constraint>")
3252 (match_operand:SI 3 "immediate_operand" "i")
3253 (match_operand:SI 4 "immediate_operand" "i")]
3254 UNSPEC_VQDMULH_LANE))]
3255 "TARGET_NEON"
3256 {
3257 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3258 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3259 }
3260 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3261 )
3262
3263 (define_insn "neon_vmla_lane<mode>"
3264 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3265 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3266 (match_operand:VMD 2 "s_register_operand" "w")
3267 (match_operand:VMD 3 "s_register_operand"
3268 "<scalar_mul_constraint>")
3269 (match_operand:SI 4 "immediate_operand" "i")
3270 (match_operand:SI 5 "immediate_operand" "i")]
3271 UNSPEC_VMLA_LANE))]
3272 "TARGET_NEON"
3273 {
3274 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3275 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3276 }
3277 [(set (attr "type")
3278 (if_then_else (match_test "<Is_float_mode>")
3279 (const_string "neon_fp_mla_s_scalar<q>")
3280 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3281 )
3282
3283 (define_insn "neon_vmla_lane<mode>"
3284 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3285 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3286 (match_operand:VMQ 2 "s_register_operand" "w")
3287 (match_operand:<V_HALF> 3 "s_register_operand"
3288 "<scalar_mul_constraint>")
3289 (match_operand:SI 4 "immediate_operand" "i")
3290 (match_operand:SI 5 "immediate_operand" "i")]
3291 UNSPEC_VMLA_LANE))]
3292 "TARGET_NEON"
3293 {
3294 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3295 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3296 }
3297 [(set (attr "type")
3298 (if_then_else (match_test "<Is_float_mode>")
3299 (const_string "neon_fp_mla_s_scalar<q>")
3300 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3301 )
3302
3303 (define_insn "neon_vmlal_lane<mode>"
3304 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3305 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3306 (match_operand:VMDI 2 "s_register_operand" "w")
3307 (match_operand:VMDI 3 "s_register_operand"
3308 "<scalar_mul_constraint>")
3309 (match_operand:SI 4 "immediate_operand" "i")
3310 (match_operand:SI 5 "immediate_operand" "i")]
3311 UNSPEC_VMLAL_LANE))]
3312 "TARGET_NEON"
3313 {
3314 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3315 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3316 }
3317 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3318 )
3319
3320 (define_insn "neon_vqdmlal_lane<mode>"
3321 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3322 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3323 (match_operand:VMDI 2 "s_register_operand" "w")
3324 (match_operand:VMDI 3 "s_register_operand"
3325 "<scalar_mul_constraint>")
3326 (match_operand:SI 4 "immediate_operand" "i")
3327 (match_operand:SI 5 "immediate_operand" "i")]
3328 UNSPEC_VQDMLAL_LANE))]
3329 "TARGET_NEON"
3330 {
3331 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3332 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3333 }
3334 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3335 )
3336
3337 (define_insn "neon_vmls_lane<mode>"
3338 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3339 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3340 (match_operand:VMD 2 "s_register_operand" "w")
3341 (match_operand:VMD 3 "s_register_operand"
3342 "<scalar_mul_constraint>")
3343 (match_operand:SI 4 "immediate_operand" "i")
3344 (match_operand:SI 5 "immediate_operand" "i")]
3345 UNSPEC_VMLS_LANE))]
3346 "TARGET_NEON"
3347 {
3348 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3349 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3350 }
3351 [(set (attr "type")
3352 (if_then_else (match_test "<Is_float_mode>")
3353 (const_string "neon_fp_mla_s_scalar<q>")
3354 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3355 )
3356
3357 (define_insn "neon_vmls_lane<mode>"
3358 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3359 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3360 (match_operand:VMQ 2 "s_register_operand" "w")
3361 (match_operand:<V_HALF> 3 "s_register_operand"
3362 "<scalar_mul_constraint>")
3363 (match_operand:SI 4 "immediate_operand" "i")
3364 (match_operand:SI 5 "immediate_operand" "i")]
3365 UNSPEC_VMLS_LANE))]
3366 "TARGET_NEON"
3367 {
3368 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3369 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3370 }
3371 [(set (attr "type")
3372 (if_then_else (match_test "<Is_float_mode>")
3373 (const_string "neon_fp_mla_s_scalar<q>")
3374 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3375 )
3376
3377 (define_insn "neon_vmlsl_lane<mode>"
3378 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3379 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3380 (match_operand:VMDI 2 "s_register_operand" "w")
3381 (match_operand:VMDI 3 "s_register_operand"
3382 "<scalar_mul_constraint>")
3383 (match_operand:SI 4 "immediate_operand" "i")
3384 (match_operand:SI 5 "immediate_operand" "i")]
3385 UNSPEC_VMLSL_LANE))]
3386 "TARGET_NEON"
3387 {
3388 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3389 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3390 }
3391 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3392 )
3393
3394 (define_insn "neon_vqdmlsl_lane<mode>"
3395 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3396 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3397 (match_operand:VMDI 2 "s_register_operand" "w")
3398 (match_operand:VMDI 3 "s_register_operand"
3399 "<scalar_mul_constraint>")
3400 (match_operand:SI 4 "immediate_operand" "i")
3401 (match_operand:SI 5 "immediate_operand" "i")]
3402 UNSPEC_VQDMLSL_LANE))]
3403 "TARGET_NEON"
3404 {
3405 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3406 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3407 }
3408 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3409 )
3410
3411 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3412 ; core register into a temp register, then use a scalar taken from that. This
3413 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3414 ; or extracted from another vector. The latter case it's currently better to
3415 ; use the "_lane" variant, and the former case can probably be implemented
3416 ; using vld1_lane, but that hasn't been done yet.
3417
3418 (define_expand "neon_vmul_n<mode>"
3419 [(match_operand:VMD 0 "s_register_operand" "")
3420 (match_operand:VMD 1 "s_register_operand" "")
3421 (match_operand:<V_elem> 2 "s_register_operand" "")
3422 (match_operand:SI 3 "immediate_operand" "")]
3423 "TARGET_NEON"
3424 {
3425 rtx tmp = gen_reg_rtx (<MODE>mode);
3426 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3427 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3428 const0_rtx, const0_rtx));
3429 DONE;
3430 })
3431
3432 (define_expand "neon_vmul_n<mode>"
3433 [(match_operand:VMQ 0 "s_register_operand" "")
3434 (match_operand:VMQ 1 "s_register_operand" "")
3435 (match_operand:<V_elem> 2 "s_register_operand" "")
3436 (match_operand:SI 3 "immediate_operand" "")]
3437 "TARGET_NEON"
3438 {
3439 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3440 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3441 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3442 const0_rtx, const0_rtx));
3443 DONE;
3444 })
3445
3446 (define_expand "neon_vmull_n<mode>"
3447 [(match_operand:<V_widen> 0 "s_register_operand" "")
3448 (match_operand:VMDI 1 "s_register_operand" "")
3449 (match_operand:<V_elem> 2 "s_register_operand" "")
3450 (match_operand:SI 3 "immediate_operand" "")]
3451 "TARGET_NEON"
3452 {
3453 rtx tmp = gen_reg_rtx (<MODE>mode);
3454 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3455 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3456 const0_rtx, operands[3]));
3457 DONE;
3458 })
3459
3460 (define_expand "neon_vqdmull_n<mode>"
3461 [(match_operand:<V_widen> 0 "s_register_operand" "")
3462 (match_operand:VMDI 1 "s_register_operand" "")
3463 (match_operand:<V_elem> 2 "s_register_operand" "")
3464 (match_operand:SI 3 "immediate_operand" "")]
3465 "TARGET_NEON"
3466 {
3467 rtx tmp = gen_reg_rtx (<MODE>mode);
3468 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3469 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3470 const0_rtx, const0_rtx));
3471 DONE;
3472 })
3473
3474 (define_expand "neon_vqdmulh_n<mode>"
3475 [(match_operand:VMDI 0 "s_register_operand" "")
3476 (match_operand:VMDI 1 "s_register_operand" "")
3477 (match_operand:<V_elem> 2 "s_register_operand" "")
3478 (match_operand:SI 3 "immediate_operand" "")]
3479 "TARGET_NEON"
3480 {
3481 rtx tmp = gen_reg_rtx (<MODE>mode);
3482 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3483 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3484 const0_rtx, operands[3]));
3485 DONE;
3486 })
3487
3488 (define_expand "neon_vqdmulh_n<mode>"
3489 [(match_operand:VMQI 0 "s_register_operand" "")
3490 (match_operand:VMQI 1 "s_register_operand" "")
3491 (match_operand:<V_elem> 2 "s_register_operand" "")
3492 (match_operand:SI 3 "immediate_operand" "")]
3493 "TARGET_NEON"
3494 {
3495 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3496 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3497 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3498 const0_rtx, operands[3]));
3499 DONE;
3500 })
3501
3502 (define_expand "neon_vmla_n<mode>"
3503 [(match_operand:VMD 0 "s_register_operand" "")
3504 (match_operand:VMD 1 "s_register_operand" "")
3505 (match_operand:VMD 2 "s_register_operand" "")
3506 (match_operand:<V_elem> 3 "s_register_operand" "")
3507 (match_operand:SI 4 "immediate_operand" "")]
3508 "TARGET_NEON"
3509 {
3510 rtx tmp = gen_reg_rtx (<MODE>mode);
3511 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3512 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3513 tmp, const0_rtx, operands[4]));
3514 DONE;
3515 })
3516
3517 (define_expand "neon_vmla_n<mode>"
3518 [(match_operand:VMQ 0 "s_register_operand" "")
3519 (match_operand:VMQ 1 "s_register_operand" "")
3520 (match_operand:VMQ 2 "s_register_operand" "")
3521 (match_operand:<V_elem> 3 "s_register_operand" "")
3522 (match_operand:SI 4 "immediate_operand" "")]
3523 "TARGET_NEON"
3524 {
3525 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3526 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3527 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3528 tmp, const0_rtx, operands[4]));
3529 DONE;
3530 })
3531
3532 (define_expand "neon_vmlal_n<mode>"
3533 [(match_operand:<V_widen> 0 "s_register_operand" "")
3534 (match_operand:<V_widen> 1 "s_register_operand" "")
3535 (match_operand:VMDI 2 "s_register_operand" "")
3536 (match_operand:<V_elem> 3 "s_register_operand" "")
3537 (match_operand:SI 4 "immediate_operand" "")]
3538 "TARGET_NEON"
3539 {
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3543 tmp, const0_rtx, operands[4]));
3544 DONE;
3545 })
3546
3547 (define_expand "neon_vqdmlal_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")
3552 (match_operand:SI 4 "immediate_operand" "")]
3553 "TARGET_NEON"
3554 {
3555 rtx tmp = gen_reg_rtx (<MODE>mode);
3556 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3557 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3558 tmp, const0_rtx, operands[4]));
3559 DONE;
3560 })
3561
3562 (define_expand "neon_vmls_n<mode>"
3563 [(match_operand:VMD 0 "s_register_operand" "")
3564 (match_operand:VMD 1 "s_register_operand" "")
3565 (match_operand:VMD 2 "s_register_operand" "")
3566 (match_operand:<V_elem> 3 "s_register_operand" "")
3567 (match_operand:SI 4 "immediate_operand" "")]
3568 "TARGET_NEON"
3569 {
3570 rtx tmp = gen_reg_rtx (<MODE>mode);
3571 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3572 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3573 tmp, const0_rtx, operands[4]));
3574 DONE;
3575 })
3576
3577 (define_expand "neon_vmls_n<mode>"
3578 [(match_operand:VMQ 0 "s_register_operand" "")
3579 (match_operand:VMQ 1 "s_register_operand" "")
3580 (match_operand:VMQ 2 "s_register_operand" "")
3581 (match_operand:<V_elem> 3 "s_register_operand" "")
3582 (match_operand:SI 4 "immediate_operand" "")]
3583 "TARGET_NEON"
3584 {
3585 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3586 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3587 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3588 tmp, const0_rtx, operands[4]));
3589 DONE;
3590 })
3591
3592 (define_expand "neon_vmlsl_n<mode>"
3593 [(match_operand:<V_widen> 0 "s_register_operand" "")
3594 (match_operand:<V_widen> 1 "s_register_operand" "")
3595 (match_operand:VMDI 2 "s_register_operand" "")
3596 (match_operand:<V_elem> 3 "s_register_operand" "")
3597 (match_operand:SI 4 "immediate_operand" "")]
3598 "TARGET_NEON"
3599 {
3600 rtx tmp = gen_reg_rtx (<MODE>mode);
3601 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3602 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3603 tmp, const0_rtx, operands[4]));
3604 DONE;
3605 })
3606
3607 (define_expand "neon_vqdmlsl_n<mode>"
3608 [(match_operand:<V_widen> 0 "s_register_operand" "")
3609 (match_operand:<V_widen> 1 "s_register_operand" "")
3610 (match_operand:VMDI 2 "s_register_operand" "")
3611 (match_operand:<V_elem> 3 "s_register_operand" "")
3612 (match_operand:SI 4 "immediate_operand" "")]
3613 "TARGET_NEON"
3614 {
3615 rtx tmp = gen_reg_rtx (<MODE>mode);
3616 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3617 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3618 tmp, const0_rtx, operands[4]));
3619 DONE;
3620 })
3621
3622 (define_insn "neon_vext<mode>"
3623 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3624 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3625 (match_operand:VDQX 2 "s_register_operand" "w")
3626 (match_operand:SI 3 "immediate_operand" "i")]
3627 UNSPEC_VEXT))]
3628 "TARGET_NEON"
3629 {
3630 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3631 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3632 }
3633 [(set_attr "type" "neon_ext<q>")]
3634 )
3635
3636 (define_insn "neon_vrev64<mode>"
3637 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3638 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3639 (match_operand:SI 2 "immediate_operand" "i")]
3640 UNSPEC_VREV64))]
3641 "TARGET_NEON"
3642 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3643 [(set_attr "type" "neon_rev<q>")]
3644 )
3645
3646 (define_insn "neon_vrev32<mode>"
3647 [(set (match_operand:VX 0 "s_register_operand" "=w")
3648 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3649 (match_operand:SI 2 "immediate_operand" "i")]
3650 UNSPEC_VREV32))]
3651 "TARGET_NEON"
3652 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3653 [(set_attr "type" "neon_rev<q>")]
3654 )
3655
3656 (define_insn "neon_vrev16<mode>"
3657 [(set (match_operand:VE 0 "s_register_operand" "=w")
3658 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3659 (match_operand:SI 2 "immediate_operand" "i")]
3660 UNSPEC_VREV16))]
3661 "TARGET_NEON"
3662 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3663 [(set_attr "type" "neon_rev<q>")]
3664 )
3665
3666 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3667 ; allocation. For an intrinsic of form:
3668 ; rD = vbsl_* (rS, rN, rM)
3669 ; We can use any of:
3670 ; vbsl rS, rN, rM (if D = S)
3671 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3672 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3673
3674 (define_insn "neon_vbsl<mode>_internal"
3675 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3676 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3677 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3678 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3679 UNSPEC_VBSL))]
3680 "TARGET_NEON"
3681 "@
3682 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3683 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3684 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3685 [(set_attr "type" "neon_bsl<q>")]
3686 )
3687
3688 (define_expand "neon_vbsl<mode>"
3689 [(set (match_operand:VDQX 0 "s_register_operand" "")
3690 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3691 (match_operand:VDQX 2 "s_register_operand" "")
3692 (match_operand:VDQX 3 "s_register_operand" "")]
3693 UNSPEC_VBSL))]
3694 "TARGET_NEON"
3695 {
3696 /* We can't alias operands together if they have different modes. */
3697 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3698 })
3699
3700 (define_insn "neon_vshl<mode>"
3701 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3702 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3703 (match_operand:VDQIX 2 "s_register_operand" "w")
3704 (match_operand:SI 3 "immediate_operand" "i")]
3705 UNSPEC_VSHL))]
3706 "TARGET_NEON"
3707 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3708 [(set_attr "type" "neon_shift_imm<q>")]
3709 )
3710
3711 (define_insn "neon_vqshl<mode>"
3712 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3713 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3714 (match_operand:VDQIX 2 "s_register_operand" "w")
3715 (match_operand:SI 3 "immediate_operand" "i")]
3716 UNSPEC_VQSHL))]
3717 "TARGET_NEON"
3718 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3719 [(set_attr "type" "neon_sat_shift_imm<q>")]
3720 )
3721
3722 (define_insn "neon_vshr_n<mode>"
3723 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3724 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3725 (match_operand:SI 2 "immediate_operand" "i")
3726 (match_operand:SI 3 "immediate_operand" "i")]
3727 UNSPEC_VSHR_N))]
3728 "TARGET_NEON"
3729 {
3730 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3731 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3732 }
3733 [(set_attr "type" "neon_shift_imm<q>")]
3734 )
3735
3736 (define_insn "neon_vshrn_n<mode>"
3737 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3738 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3739 (match_operand:SI 2 "immediate_operand" "i")
3740 (match_operand:SI 3 "immediate_operand" "i")]
3741 UNSPEC_VSHRN_N))]
3742 "TARGET_NEON"
3743 {
3744 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3745 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3746 }
3747 [(set_attr "type" "neon_shift_imm_narrow_q")]
3748 )
3749
3750 (define_insn "neon_vqshrn_n<mode>"
3751 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3752 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3753 (match_operand:SI 2 "immediate_operand" "i")
3754 (match_operand:SI 3 "immediate_operand" "i")]
3755 UNSPEC_VQSHRN_N))]
3756 "TARGET_NEON"
3757 {
3758 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3759 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3760 }
3761 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3762 )
3763
3764 (define_insn "neon_vqshrun_n<mode>"
3765 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3766 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3767 (match_operand:SI 2 "immediate_operand" "i")
3768 (match_operand:SI 3 "immediate_operand" "i")]
3769 UNSPEC_VQSHRUN_N))]
3770 "TARGET_NEON"
3771 {
3772 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3773 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3774 }
3775 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3776 )
3777
3778 (define_insn "neon_vshl_n<mode>"
3779 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3780 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3781 (match_operand:SI 2 "immediate_operand" "i")
3782 (match_operand:SI 3 "immediate_operand" "i")]
3783 UNSPEC_VSHL_N))]
3784 "TARGET_NEON"
3785 {
3786 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3787 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3788 }
3789 [(set_attr "type" "neon_shift_imm<q>")]
3790 )
3791
3792 (define_insn "neon_vqshl_n<mode>"
3793 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3794 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3795 (match_operand:SI 2 "immediate_operand" "i")
3796 (match_operand:SI 3 "immediate_operand" "i")]
3797 UNSPEC_VQSHL_N))]
3798 "TARGET_NEON"
3799 {
3800 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3801 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3802 }
3803 [(set_attr "type" "neon_sat_shift_imm<q>")]
3804 )
3805
3806 (define_insn "neon_vqshlu_n<mode>"
3807 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3808 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3809 (match_operand:SI 2 "immediate_operand" "i")
3810 (match_operand:SI 3 "immediate_operand" "i")]
3811 UNSPEC_VQSHLU_N))]
3812 "TARGET_NEON"
3813 {
3814 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3815 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3816 }
3817 [(set_attr "type" "neon_sat_shift_imm<q>")]
3818 )
3819
3820 (define_insn "neon_vshll_n<mode>"
3821 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3822 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3823 (match_operand:SI 2 "immediate_operand" "i")
3824 (match_operand:SI 3 "immediate_operand" "i")]
3825 UNSPEC_VSHLL_N))]
3826 "TARGET_NEON"
3827 {
3828 /* The boundaries are: 0 < imm <= size. */
3829 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3830 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3831 }
3832 [(set_attr "type" "neon_shift_imm_long")]
3833 )
3834
3835 (define_insn "neon_vsra_n<mode>"
3836 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3837 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3838 (match_operand:VDQIX 2 "s_register_operand" "w")
3839 (match_operand:SI 3 "immediate_operand" "i")
3840 (match_operand:SI 4 "immediate_operand" "i")]
3841 UNSPEC_VSRA_N))]
3842 "TARGET_NEON"
3843 {
3844 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3845 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3846 }
3847 [(set_attr "type" "neon_shift_acc<q>")]
3848 )
3849
3850 (define_insn "neon_vsri_n<mode>"
3851 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3852 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3853 (match_operand:VDQIX 2 "s_register_operand" "w")
3854 (match_operand:SI 3 "immediate_operand" "i")]
3855 UNSPEC_VSRI))]
3856 "TARGET_NEON"
3857 {
3858 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3859 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3860 }
3861 [(set_attr "type" "neon_shift_reg<q>")]
3862 )
3863
3864 (define_insn "neon_vsli_n<mode>"
3865 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3866 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3867 (match_operand:VDQIX 2 "s_register_operand" "w")
3868 (match_operand:SI 3 "immediate_operand" "i")]
3869 UNSPEC_VSLI))]
3870 "TARGET_NEON"
3871 {
3872 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3873 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3874 }
3875 [(set_attr "type" "neon_shift_reg<q>")]
3876 )
3877
3878 (define_insn "neon_vtbl1v8qi"
3879 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3880 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3881 (match_operand:V8QI 2 "s_register_operand" "w")]
3882 UNSPEC_VTBL))]
3883 "TARGET_NEON"
3884 "vtbl.8\t%P0, {%P1}, %P2"
3885 [(set_attr "type" "neon_tbl1")]
3886 )
3887
3888 (define_insn "neon_vtbl2v8qi"
3889 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3890 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3891 (match_operand:V8QI 2 "s_register_operand" "w")]
3892 UNSPEC_VTBL))]
3893 "TARGET_NEON"
3894 {
3895 rtx ops[4];
3896 int tabbase = REGNO (operands[1]);
3897
3898 ops[0] = operands[0];
3899 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3900 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3901 ops[3] = operands[2];
3902 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3903
3904 return "";
3905 }
3906 [(set_attr "type" "neon_tbl2")]
3907 )
3908
3909 (define_insn "neon_vtbl3v8qi"
3910 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3911 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3912 (match_operand:V8QI 2 "s_register_operand" "w")]
3913 UNSPEC_VTBL))]
3914 "TARGET_NEON"
3915 {
3916 rtx ops[5];
3917 int tabbase = REGNO (operands[1]);
3918
3919 ops[0] = operands[0];
3920 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3921 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3922 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3923 ops[4] = operands[2];
3924 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3925
3926 return "";
3927 }
3928 [(set_attr "type" "neon_tbl3")]
3929 )
3930
3931 (define_insn "neon_vtbl4v8qi"
3932 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3933 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3934 (match_operand:V8QI 2 "s_register_operand" "w")]
3935 UNSPEC_VTBL))]
3936 "TARGET_NEON"
3937 {
3938 rtx ops[6];
3939 int tabbase = REGNO (operands[1]);
3940
3941 ops[0] = operands[0];
3942 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3943 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3944 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3945 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3946 ops[5] = operands[2];
3947 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3948
3949 return "";
3950 }
3951 [(set_attr "type" "neon_tbl4")]
3952 )
3953
3954 ;; These three are used by the vec_perm infrastructure for V16QImode.
3955 (define_insn_and_split "neon_vtbl1v16qi"
3956 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3957 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3958 (match_operand:V16QI 2 "s_register_operand" "w")]
3959 UNSPEC_VTBL))]
3960 "TARGET_NEON"
3961 "#"
3962 "&& reload_completed"
3963 [(const_int 0)]
3964 {
3965 rtx op0, op1, op2, part0, part2;
3966 unsigned ofs;
3967
3968 op0 = operands[0];
3969 op1 = gen_lowpart (TImode, operands[1]);
3970 op2 = operands[2];
3971
3972 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3973 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3974 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3975 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3976
3977 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3978 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3979 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3980 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3981 DONE;
3982 }
3983 [(set_attr "type" "multiple")]
3984 )
3985
3986 (define_insn_and_split "neon_vtbl2v16qi"
3987 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3988 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3989 (match_operand:V16QI 2 "s_register_operand" "w")]
3990 UNSPEC_VTBL))]
3991 "TARGET_NEON"
3992 "#"
3993 "&& reload_completed"
3994 [(const_int 0)]
3995 {
3996 rtx op0, op1, op2, part0, part2;
3997 unsigned ofs;
3998
3999 op0 = operands[0];
4000 op1 = operands[1];
4001 op2 = operands[2];
4002
4003 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4004 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4005 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4006 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4007
4008 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4009 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4010 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4011 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4012 DONE;
4013 }
4014 [(set_attr "type" "multiple")]
4015 )
4016
4017 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4018 ;; handle quad-word input modes, producing octa-word output modes. But
4019 ;; that requires us to add support for octa-word vector modes in moves.
4020 ;; That seems overkill for this one use in vec_perm.
4021 (define_insn_and_split "neon_vcombinev16qi"
4022 [(set (match_operand:OI 0 "s_register_operand" "=w")
4023 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4024 (match_operand:V16QI 2 "s_register_operand" "w")]
4025 UNSPEC_VCONCAT))]
4026 "TARGET_NEON"
4027 "#"
4028 "&& reload_completed"
4029 [(const_int 0)]
4030 {
4031 neon_split_vcombine (operands);
4032 DONE;
4033 }
4034 [(set_attr "type" "multiple")]
4035 )
4036
4037 (define_insn "neon_vtbx1v8qi"
4038 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4039 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4040 (match_operand:V8QI 2 "s_register_operand" "w")
4041 (match_operand:V8QI 3 "s_register_operand" "w")]
4042 UNSPEC_VTBX))]
4043 "TARGET_NEON"
4044 "vtbx.8\t%P0, {%P2}, %P3"
4045 [(set_attr "type" "neon_tbl1")]
4046 )
4047
4048 (define_insn "neon_vtbx2v8qi"
4049 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4050 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4051 (match_operand:TI 2 "s_register_operand" "w")
4052 (match_operand:V8QI 3 "s_register_operand" "w")]
4053 UNSPEC_VTBX))]
4054 "TARGET_NEON"
4055 {
4056 rtx ops[4];
4057 int tabbase = REGNO (operands[2]);
4058
4059 ops[0] = operands[0];
4060 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4061 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4062 ops[3] = operands[3];
4063 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4064
4065 return "";
4066 }
4067 [(set_attr "type" "neon_tbl2")]
4068 )
4069
4070 (define_insn "neon_vtbx3v8qi"
4071 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4072 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4073 (match_operand:EI 2 "s_register_operand" "w")
4074 (match_operand:V8QI 3 "s_register_operand" "w")]
4075 UNSPEC_VTBX))]
4076 "TARGET_NEON"
4077 {
4078 rtx ops[5];
4079 int tabbase = REGNO (operands[2]);
4080
4081 ops[0] = operands[0];
4082 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4083 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4084 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4085 ops[4] = operands[3];
4086 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4087
4088 return "";
4089 }
4090 [(set_attr "type" "neon_tbl3")]
4091 )
4092
4093 (define_insn "neon_vtbx4v8qi"
4094 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4095 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4096 (match_operand:OI 2 "s_register_operand" "w")
4097 (match_operand:V8QI 3 "s_register_operand" "w")]
4098 UNSPEC_VTBX))]
4099 "TARGET_NEON"
4100 {
4101 rtx ops[6];
4102 int tabbase = REGNO (operands[2]);
4103
4104 ops[0] = operands[0];
4105 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4106 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4107 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4108 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4109 ops[5] = operands[3];
4110 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4111
4112 return "";
4113 }
4114 [(set_attr "type" "neon_tbl4")]
4115 )
4116
4117 (define_expand "neon_vtrn<mode>_internal"
4118 [(parallel
4119 [(set (match_operand:VDQW 0 "s_register_operand" "")
4120 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4121 (match_operand:VDQW 2 "s_register_operand" "")]
4122 UNSPEC_VTRN1))
4123 (set (match_operand:VDQW 3 "s_register_operand" "")
4124 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4125 "TARGET_NEON"
4126 ""
4127 )
4128
4129 ;; Note: Different operand numbering to handle tied registers correctly.
4130 (define_insn "*neon_vtrn<mode>_insn"
4131 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4132 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4133 (match_operand:VDQW 3 "s_register_operand" "2")]
4134 UNSPEC_VTRN1))
4135 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4136 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4137 UNSPEC_VTRN2))]
4138 "TARGET_NEON"
4139 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4140 [(set_attr "type" "neon_permute<q>")]
4141 )
4142
4143 (define_expand "neon_vtrn<mode>"
4144 [(match_operand:SI 0 "s_register_operand" "r")
4145 (match_operand:VDQW 1 "s_register_operand" "w")
4146 (match_operand:VDQW 2 "s_register_operand" "w")]
4147 "TARGET_NEON"
4148 {
4149 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
4150 operands[0], operands[1], operands[2]);
4151 DONE;
4152 })
4153
4154 (define_expand "neon_vzip<mode>_internal"
4155 [(parallel
4156 [(set (match_operand:VDQW 0 "s_register_operand" "")
4157 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4158 (match_operand:VDQW 2 "s_register_operand" "")]
4159 UNSPEC_VZIP1))
4160 (set (match_operand:VDQW 3 "s_register_operand" "")
4161 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4162 "TARGET_NEON"
4163 ""
4164 )
4165
4166 ;; Note: Different operand numbering to handle tied registers correctly.
4167 (define_insn "*neon_vzip<mode>_insn"
4168 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4169 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4170 (match_operand:VDQW 3 "s_register_operand" "2")]
4171 UNSPEC_VZIP1))
4172 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4173 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4174 UNSPEC_VZIP2))]
4175 "TARGET_NEON"
4176 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4177 [(set_attr "type" "neon_zip<q>")]
4178 )
4179
4180 (define_expand "neon_vzip<mode>"
4181 [(match_operand:SI 0 "s_register_operand" "r")
4182 (match_operand:VDQW 1 "s_register_operand" "w")
4183 (match_operand:VDQW 2 "s_register_operand" "w")]
4184 "TARGET_NEON"
4185 {
4186 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
4187 operands[0], operands[1], operands[2]);
4188 DONE;
4189 })
4190
4191 (define_expand "neon_vuzp<mode>_internal"
4192 [(parallel
4193 [(set (match_operand:VDQW 0 "s_register_operand" "")
4194 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4195 (match_operand:VDQW 2 "s_register_operand" "")]
4196 UNSPEC_VUZP1))
4197 (set (match_operand:VDQW 3 "s_register_operand" "")
4198 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4199 "TARGET_NEON"
4200 ""
4201 )
4202
4203 ;; Note: Different operand numbering to handle tied registers correctly.
4204 (define_insn "*neon_vuzp<mode>_insn"
4205 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4206 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4207 (match_operand:VDQW 3 "s_register_operand" "2")]
4208 UNSPEC_VUZP1))
4209 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4210 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4211 UNSPEC_VUZP2))]
4212 "TARGET_NEON"
4213 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4214 [(set_attr "type" "neon_zip<q>")]
4215 )
4216
4217 (define_expand "neon_vuzp<mode>"
4218 [(match_operand:SI 0 "s_register_operand" "r")
4219 (match_operand:VDQW 1 "s_register_operand" "w")
4220 (match_operand:VDQW 2 "s_register_operand" "w")]
4221 "TARGET_NEON"
4222 {
4223 neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
4224 operands[0], operands[1], operands[2]);
4225 DONE;
4226 })
4227
4228 (define_expand "neon_vreinterpretv8qi<mode>"
4229 [(match_operand:V8QI 0 "s_register_operand" "")
4230 (match_operand:VDX 1 "s_register_operand" "")]
4231 "TARGET_NEON"
4232 {
4233 neon_reinterpret (operands[0], operands[1]);
4234 DONE;
4235 })
4236
4237 (define_expand "neon_vreinterpretv4hi<mode>"
4238 [(match_operand:V4HI 0 "s_register_operand" "")
4239 (match_operand:VDX 1 "s_register_operand" "")]
4240 "TARGET_NEON"
4241 {
4242 neon_reinterpret (operands[0], operands[1]);
4243 DONE;
4244 })
4245
4246 (define_expand "neon_vreinterpretv2si<mode>"
4247 [(match_operand:V2SI 0 "s_register_operand" "")
4248 (match_operand:VDX 1 "s_register_operand" "")]
4249 "TARGET_NEON"
4250 {
4251 neon_reinterpret (operands[0], operands[1]);
4252 DONE;
4253 })
4254
4255 (define_expand "neon_vreinterpretv2sf<mode>"
4256 [(match_operand:V2SF 0 "s_register_operand" "")
4257 (match_operand:VDX 1 "s_register_operand" "")]
4258 "TARGET_NEON"
4259 {
4260 neon_reinterpret (operands[0], operands[1]);
4261 DONE;
4262 })
4263
4264 (define_expand "neon_vreinterpretdi<mode>"
4265 [(match_operand:DI 0 "s_register_operand" "")
4266 (match_operand:VDX 1 "s_register_operand" "")]
4267 "TARGET_NEON"
4268 {
4269 neon_reinterpret (operands[0], operands[1]);
4270 DONE;
4271 })
4272
4273 (define_expand "neon_vreinterpretti<mode>"
4274 [(match_operand:TI 0 "s_register_operand" "")
4275 (match_operand:VQXMOV 1 "s_register_operand" "")]
4276 "TARGET_NEON"
4277 {
4278 neon_reinterpret (operands[0], operands[1]);
4279 DONE;
4280 })
4281
4282
4283 (define_expand "neon_vreinterpretv16qi<mode>"
4284 [(match_operand:V16QI 0 "s_register_operand" "")
4285 (match_operand:VQXMOV 1 "s_register_operand" "")]
4286 "TARGET_NEON"
4287 {
4288 neon_reinterpret (operands[0], operands[1]);
4289 DONE;
4290 })
4291
4292 (define_expand "neon_vreinterpretv8hi<mode>"
4293 [(match_operand:V8HI 0 "s_register_operand" "")
4294 (match_operand:VQXMOV 1 "s_register_operand" "")]
4295 "TARGET_NEON"
4296 {
4297 neon_reinterpret (operands[0], operands[1]);
4298 DONE;
4299 })
4300
4301 (define_expand "neon_vreinterpretv4si<mode>"
4302 [(match_operand:V4SI 0 "s_register_operand" "")
4303 (match_operand:VQXMOV 1 "s_register_operand" "")]
4304 "TARGET_NEON"
4305 {
4306 neon_reinterpret (operands[0], operands[1]);
4307 DONE;
4308 })
4309
4310 (define_expand "neon_vreinterpretv4sf<mode>"
4311 [(match_operand:V4SF 0 "s_register_operand" "")
4312 (match_operand:VQXMOV 1 "s_register_operand" "")]
4313 "TARGET_NEON"
4314 {
4315 neon_reinterpret (operands[0], operands[1]);
4316 DONE;
4317 })
4318
4319 (define_expand "neon_vreinterpretv2di<mode>"
4320 [(match_operand:V2DI 0 "s_register_operand" "")
4321 (match_operand:VQXMOV 1 "s_register_operand" "")]
4322 "TARGET_NEON"
4323 {
4324 neon_reinterpret (operands[0], operands[1]);
4325 DONE;
4326 })
4327
4328 (define_expand "vec_load_lanes<mode><mode>"
4329 [(set (match_operand:VDQX 0 "s_register_operand")
4330 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4331 UNSPEC_VLD1))]
4332 "TARGET_NEON")
4333
4334 (define_insn "neon_vld1<mode>"
4335 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4336 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4337 UNSPEC_VLD1))]
4338 "TARGET_NEON"
4339 "vld1.<V_sz_elem>\t%h0, %A1"
4340 [(set_attr "type" "neon_load1_1reg<q>")]
4341 )
4342
4343 (define_insn "neon_vld1_lane<mode>"
4344 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4345 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4346 (match_operand:VDX 2 "s_register_operand" "0")
4347 (match_operand:SI 3 "immediate_operand" "i")]
4348 UNSPEC_VLD1_LANE))]
4349 "TARGET_NEON"
4350 {
4351 HOST_WIDE_INT lane = INTVAL (operands[3]);
4352 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4353 if (lane < 0 || lane >= max)
4354 error ("lane out of range");
4355 if (max == 1)
4356 return "vld1.<V_sz_elem>\t%P0, %A1";
4357 else
4358 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4359 }
4360 [(set_attr "type" "neon_load1_one_lane<q>")]
4361 )
4362
4363 (define_insn "neon_vld1_lane<mode>"
4364 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4365 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4366 (match_operand:VQX 2 "s_register_operand" "0")
4367 (match_operand:SI 3 "immediate_operand" "i")]
4368 UNSPEC_VLD1_LANE))]
4369 "TARGET_NEON"
4370 {
4371 HOST_WIDE_INT lane = INTVAL (operands[3]);
4372 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4373 int regno = REGNO (operands[0]);
4374 if (lane < 0 || lane >= max)
4375 error ("lane out of range");
4376 else if (lane >= max / 2)
4377 {
4378 lane -= max / 2;
4379 regno += 2;
4380 operands[3] = GEN_INT (lane);
4381 }
4382 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4383 if (max == 2)
4384 return "vld1.<V_sz_elem>\t%P0, %A1";
4385 else
4386 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4387 }
4388 [(set_attr "type" "neon_load1_one_lane<q>")]
4389 )
4390
4391 (define_insn "neon_vld1_dup<mode>"
4392 [(set (match_operand:VD 0 "s_register_operand" "=w")
4393 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4394 "TARGET_NEON"
4395 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4396 [(set_attr "type" "neon_load1_all_lanes<q>")]
4397 )
4398
4399 ;; Special case for DImode. Treat it exactly like a simple load.
4400 (define_expand "neon_vld1_dupdi"
4401 [(set (match_operand:DI 0 "s_register_operand" "")
4402 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4403 UNSPEC_VLD1))]
4404 "TARGET_NEON"
4405 ""
4406 )
4407
4408 (define_insn "neon_vld1_dup<mode>"
4409 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4410 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4411 "TARGET_NEON"
4412 {
4413 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4414 }
4415 [(set_attr "type" "neon_load1_all_lanes<q>")]
4416 )
4417
4418 (define_insn_and_split "neon_vld1_dupv2di"
4419 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4420 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4421 "TARGET_NEON"
4422 "#"
4423 "&& reload_completed"
4424 [(const_int 0)]
4425 {
4426 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4427 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4428 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4429 DONE;
4430 }
4431 [(set_attr "length" "8")
4432 (set_attr "type" "neon_load1_all_lanes_q")]
4433 )
4434
4435 (define_expand "vec_store_lanes<mode><mode>"
4436 [(set (match_operand:VDQX 0 "neon_struct_operand")
4437 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4438 UNSPEC_VST1))]
4439 "TARGET_NEON")
4440
4441 (define_insn "neon_vst1<mode>"
4442 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4443 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4444 UNSPEC_VST1))]
4445 "TARGET_NEON"
4446 "vst1.<V_sz_elem>\t%h1, %A0"
4447 [(set_attr "type" "neon_store1_1reg<q>")])
4448
4449 (define_insn "neon_vst1_lane<mode>"
4450 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4451 (unspec:<V_elem>
4452 [(match_operand:VDX 1 "s_register_operand" "w")
4453 (match_operand:SI 2 "immediate_operand" "i")]
4454 UNSPEC_VST1_LANE))]
4455 "TARGET_NEON"
4456 {
4457 HOST_WIDE_INT lane = INTVAL (operands[2]);
4458 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4459 if (lane < 0 || lane >= max)
4460 error ("lane out of range");
4461 if (max == 1)
4462 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4463 else
4464 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4465 }
4466 [(set_attr "type" "neon_store1_one_lane<q>")]
4467 )
4468
4469 (define_insn "neon_vst1_lane<mode>"
4470 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4471 (unspec:<V_elem>
4472 [(match_operand:VQX 1 "s_register_operand" "w")
4473 (match_operand:SI 2 "immediate_operand" "i")]
4474 UNSPEC_VST1_LANE))]
4475 "TARGET_NEON"
4476 {
4477 HOST_WIDE_INT lane = INTVAL (operands[2]);
4478 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4479 int regno = REGNO (operands[1]);
4480 if (lane < 0 || lane >= max)
4481 error ("lane out of range");
4482 else if (lane >= max / 2)
4483 {
4484 lane -= max / 2;
4485 regno += 2;
4486 operands[2] = GEN_INT (lane);
4487 }
4488 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4489 if (max == 2)
4490 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4491 else
4492 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4493 }
4494 [(set_attr "type" "neon_store1_one_lane<q>")]
4495 )
4496
4497 (define_expand "vec_load_lanesti<mode>"
4498 [(set (match_operand:TI 0 "s_register_operand")
4499 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4500 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4501 UNSPEC_VLD2))]
4502 "TARGET_NEON")
4503
4504 (define_insn "neon_vld2<mode>"
4505 [(set (match_operand:TI 0 "s_register_operand" "=w")
4506 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4507 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4508 UNSPEC_VLD2))]
4509 "TARGET_NEON"
4510 {
4511 if (<V_sz_elem> == 64)
4512 return "vld1.64\t%h0, %A1";
4513 else
4514 return "vld2.<V_sz_elem>\t%h0, %A1";
4515 }
4516 [(set (attr "type")
4517 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4518 (const_string "neon_load1_2reg<q>")
4519 (const_string "neon_load2_2reg<q>")))]
4520 )
4521
4522 (define_expand "vec_load_lanesoi<mode>"
4523 [(set (match_operand:OI 0 "s_register_operand")
4524 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4525 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4526 UNSPEC_VLD2))]
4527 "TARGET_NEON")
4528
4529 (define_insn "neon_vld2<mode>"
4530 [(set (match_operand:OI 0 "s_register_operand" "=w")
4531 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4532 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4533 UNSPEC_VLD2))]
4534 "TARGET_NEON"
4535 "vld2.<V_sz_elem>\t%h0, %A1"
4536 [(set_attr "type" "neon_load2_2reg_q")])
4537
4538 (define_insn "neon_vld2_lane<mode>"
4539 [(set (match_operand:TI 0 "s_register_operand" "=w")
4540 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4541 (match_operand:TI 2 "s_register_operand" "0")
4542 (match_operand:SI 3 "immediate_operand" "i")
4543 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544 UNSPEC_VLD2_LANE))]
4545 "TARGET_NEON"
4546 {
4547 HOST_WIDE_INT lane = INTVAL (operands[3]);
4548 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4549 int regno = REGNO (operands[0]);
4550 rtx ops[4];
4551 if (lane < 0 || lane >= max)
4552 error ("lane out of range");
4553 ops[0] = gen_rtx_REG (DImode, regno);
4554 ops[1] = gen_rtx_REG (DImode, regno + 2);
4555 ops[2] = operands[1];
4556 ops[3] = operands[3];
4557 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4558 return "";
4559 }
4560 [(set_attr "type" "neon_load2_one_lane<q>")]
4561 )
4562
4563 (define_insn "neon_vld2_lane<mode>"
4564 [(set (match_operand:OI 0 "s_register_operand" "=w")
4565 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4566 (match_operand:OI 2 "s_register_operand" "0")
4567 (match_operand:SI 3 "immediate_operand" "i")
4568 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 UNSPEC_VLD2_LANE))]
4570 "TARGET_NEON"
4571 {
4572 HOST_WIDE_INT lane = INTVAL (operands[3]);
4573 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4574 int regno = REGNO (operands[0]);
4575 rtx ops[4];
4576 if (lane < 0 || lane >= max)
4577 error ("lane out of range");
4578 else if (lane >= max / 2)
4579 {
4580 lane -= max / 2;
4581 regno += 2;
4582 }
4583 ops[0] = gen_rtx_REG (DImode, regno);
4584 ops[1] = gen_rtx_REG (DImode, regno + 4);
4585 ops[2] = operands[1];
4586 ops[3] = GEN_INT (lane);
4587 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4588 return "";
4589 }
4590 [(set_attr "type" "neon_load2_one_lane<q>")]
4591 )
4592
4593 (define_insn "neon_vld2_dup<mode>"
4594 [(set (match_operand:TI 0 "s_register_operand" "=w")
4595 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4596 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4597 UNSPEC_VLD2_DUP))]
4598 "TARGET_NEON"
4599 {
4600 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4601 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4602 else
4603 return "vld1.<V_sz_elem>\t%h0, %A1";
4604 }
4605 [(set (attr "type")
4606 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4607 (const_string "neon_load2_all_lanes<q>")
4608 (const_string "neon_load1_1reg<q>")))]
4609 )
4610
4611 (define_expand "vec_store_lanesti<mode>"
4612 [(set (match_operand:TI 0 "neon_struct_operand")
4613 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4614 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4615 UNSPEC_VST2))]
4616 "TARGET_NEON")
4617
4618 (define_insn "neon_vst2<mode>"
4619 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4620 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4621 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4622 UNSPEC_VST2))]
4623 "TARGET_NEON"
4624 {
4625 if (<V_sz_elem> == 64)
4626 return "vst1.64\t%h1, %A0";
4627 else
4628 return "vst2.<V_sz_elem>\t%h1, %A0";
4629 }
4630 [(set (attr "type")
4631 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4632 (const_string "neon_store1_2reg<q>")
4633 (const_string "neon_store2_one_lane<q>")))]
4634 )
4635
4636 (define_expand "vec_store_lanesoi<mode>"
4637 [(set (match_operand:OI 0 "neon_struct_operand")
4638 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4639 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4640 UNSPEC_VST2))]
4641 "TARGET_NEON")
4642
4643 (define_insn "neon_vst2<mode>"
4644 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4645 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4646 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4647 UNSPEC_VST2))]
4648 "TARGET_NEON"
4649 "vst2.<V_sz_elem>\t%h1, %A0"
4650 [(set_attr "type" "neon_store2_4reg<q>")]
4651 )
4652
4653 (define_insn "neon_vst2_lane<mode>"
4654 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4655 (unspec:<V_two_elem>
4656 [(match_operand:TI 1 "s_register_operand" "w")
4657 (match_operand:SI 2 "immediate_operand" "i")
4658 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4659 UNSPEC_VST2_LANE))]
4660 "TARGET_NEON"
4661 {
4662 HOST_WIDE_INT lane = INTVAL (operands[2]);
4663 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4664 int regno = REGNO (operands[1]);
4665 rtx ops[4];
4666 if (lane < 0 || lane >= max)
4667 error ("lane out of range");
4668 ops[0] = operands[0];
4669 ops[1] = gen_rtx_REG (DImode, regno);
4670 ops[2] = gen_rtx_REG (DImode, regno + 2);
4671 ops[3] = operands[2];
4672 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4673 return "";
4674 }
4675 [(set_attr "type" "neon_store2_one_lane<q>")]
4676 )
4677
4678 (define_insn "neon_vst2_lane<mode>"
4679 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4680 (unspec:<V_two_elem>
4681 [(match_operand:OI 1 "s_register_operand" "w")
4682 (match_operand:SI 2 "immediate_operand" "i")
4683 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4684 UNSPEC_VST2_LANE))]
4685 "TARGET_NEON"
4686 {
4687 HOST_WIDE_INT lane = INTVAL (operands[2]);
4688 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4689 int regno = REGNO (operands[1]);
4690 rtx ops[4];
4691 if (lane < 0 || lane >= max)
4692 error ("lane out of range");
4693 else if (lane >= max / 2)
4694 {
4695 lane -= max / 2;
4696 regno += 2;
4697 }
4698 ops[0] = operands[0];
4699 ops[1] = gen_rtx_REG (DImode, regno);
4700 ops[2] = gen_rtx_REG (DImode, regno + 4);
4701 ops[3] = GEN_INT (lane);
4702 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4703 return "";
4704 }
4705 [(set_attr "type" "neon_store2_one_lane<q>")]
4706 )
4707
4708 (define_expand "vec_load_lanesei<mode>"
4709 [(set (match_operand:EI 0 "s_register_operand")
4710 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4711 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4712 UNSPEC_VLD3))]
4713 "TARGET_NEON")
4714
4715 (define_insn "neon_vld3<mode>"
4716 [(set (match_operand:EI 0 "s_register_operand" "=w")
4717 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4718 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4719 UNSPEC_VLD3))]
4720 "TARGET_NEON"
4721 {
4722 if (<V_sz_elem> == 64)
4723 return "vld1.64\t%h0, %A1";
4724 else
4725 return "vld3.<V_sz_elem>\t%h0, %A1";
4726 }
4727 [(set (attr "type")
4728 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4729 (const_string "neon_load1_3reg<q>")
4730 (const_string "neon_load3_3reg<q>")))]
4731 )
4732
4733 (define_expand "vec_load_lanesci<mode>"
4734 [(match_operand:CI 0 "s_register_operand")
4735 (match_operand:CI 1 "neon_struct_operand")
4736 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4737 "TARGET_NEON"
4738 {
4739 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4740 DONE;
4741 })
4742
4743 (define_expand "neon_vld3<mode>"
4744 [(match_operand:CI 0 "s_register_operand")
4745 (match_operand:CI 1 "neon_struct_operand")
4746 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747 "TARGET_NEON"
4748 {
4749 rtx mem;
4750
4751 mem = adjust_address (operands[1], EImode, 0);
4752 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4753 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4754 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4755 DONE;
4756 })
4757
4758 (define_insn "neon_vld3qa<mode>"
4759 [(set (match_operand:CI 0 "s_register_operand" "=w")
4760 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4761 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4762 UNSPEC_VLD3A))]
4763 "TARGET_NEON"
4764 {
4765 int regno = REGNO (operands[0]);
4766 rtx ops[4];
4767 ops[0] = gen_rtx_REG (DImode, regno);
4768 ops[1] = gen_rtx_REG (DImode, regno + 4);
4769 ops[2] = gen_rtx_REG (DImode, regno + 8);
4770 ops[3] = operands[1];
4771 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4772 return "";
4773 }
4774 [(set_attr "type" "neon_load3_3reg<q>")]
4775 )
4776
4777 (define_insn "neon_vld3qb<mode>"
4778 [(set (match_operand:CI 0 "s_register_operand" "=w")
4779 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4780 (match_operand:CI 2 "s_register_operand" "0")
4781 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4782 UNSPEC_VLD3B))]
4783 "TARGET_NEON"
4784 {
4785 int regno = REGNO (operands[0]);
4786 rtx ops[4];
4787 ops[0] = gen_rtx_REG (DImode, regno + 2);
4788 ops[1] = gen_rtx_REG (DImode, regno + 6);
4789 ops[2] = gen_rtx_REG (DImode, regno + 10);
4790 ops[3] = operands[1];
4791 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4792 return "";
4793 }
4794 [(set_attr "type" "neon_load3_3reg<q>")]
4795 )
4796
4797 (define_insn "neon_vld3_lane<mode>"
4798 [(set (match_operand:EI 0 "s_register_operand" "=w")
4799 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4800 (match_operand:EI 2 "s_register_operand" "0")
4801 (match_operand:SI 3 "immediate_operand" "i")
4802 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4803 UNSPEC_VLD3_LANE))]
4804 "TARGET_NEON"
4805 {
4806 HOST_WIDE_INT lane = INTVAL (operands[3]);
4807 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4808 int regno = REGNO (operands[0]);
4809 rtx ops[5];
4810 if (lane < 0 || lane >= max)
4811 error ("lane out of range");
4812 ops[0] = gen_rtx_REG (DImode, regno);
4813 ops[1] = gen_rtx_REG (DImode, regno + 2);
4814 ops[2] = gen_rtx_REG (DImode, regno + 4);
4815 ops[3] = operands[1];
4816 ops[4] = operands[3];
4817 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4818 ops);
4819 return "";
4820 }
4821 [(set_attr "type" "neon_load3_one_lane<q>")]
4822 )
4823
4824 (define_insn "neon_vld3_lane<mode>"
4825 [(set (match_operand:CI 0 "s_register_operand" "=w")
4826 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4827 (match_operand:CI 2 "s_register_operand" "0")
4828 (match_operand:SI 3 "immediate_operand" "i")
4829 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4830 UNSPEC_VLD3_LANE))]
4831 "TARGET_NEON"
4832 {
4833 HOST_WIDE_INT lane = INTVAL (operands[3]);
4834 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4835 int regno = REGNO (operands[0]);
4836 rtx ops[5];
4837 if (lane < 0 || lane >= max)
4838 error ("lane out of range");
4839 else if (lane >= max / 2)
4840 {
4841 lane -= max / 2;
4842 regno += 2;
4843 }
4844 ops[0] = gen_rtx_REG (DImode, regno);
4845 ops[1] = gen_rtx_REG (DImode, regno + 4);
4846 ops[2] = gen_rtx_REG (DImode, regno + 8);
4847 ops[3] = operands[1];
4848 ops[4] = GEN_INT (lane);
4849 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4850 ops);
4851 return "";
4852 }
4853 [(set_attr "type" "neon_load3_one_lane<q>")]
4854 )
4855
4856 (define_insn "neon_vld3_dup<mode>"
4857 [(set (match_operand:EI 0 "s_register_operand" "=w")
4858 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4859 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4860 UNSPEC_VLD3_DUP))]
4861 "TARGET_NEON"
4862 {
4863 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4864 {
4865 int regno = REGNO (operands[0]);
4866 rtx ops[4];
4867 ops[0] = gen_rtx_REG (DImode, regno);
4868 ops[1] = gen_rtx_REG (DImode, regno + 2);
4869 ops[2] = gen_rtx_REG (DImode, regno + 4);
4870 ops[3] = operands[1];
4871 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4872 return "";
4873 }
4874 else
4875 return "vld1.<V_sz_elem>\t%h0, %A1";
4876 }
4877 [(set (attr "type")
4878 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4879 (const_string "neon_load3_all_lanes<q>")
4880 (const_string "neon_load1_1reg<q>")))])
4881
4882 (define_expand "vec_store_lanesei<mode>"
4883 [(set (match_operand:EI 0 "neon_struct_operand")
4884 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4885 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4886 UNSPEC_VST3))]
4887 "TARGET_NEON")
4888
4889 (define_insn "neon_vst3<mode>"
4890 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4891 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4892 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4893 UNSPEC_VST3))]
4894 "TARGET_NEON"
4895 {
4896 if (<V_sz_elem> == 64)
4897 return "vst1.64\t%h1, %A0";
4898 else
4899 return "vst3.<V_sz_elem>\t%h1, %A0";
4900 }
4901 [(set (attr "type")
4902 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4903 (const_string "neon_store1_3reg<q>")
4904 (const_string "neon_store3_one_lane<q>")))])
4905
4906 (define_expand "vec_store_lanesci<mode>"
4907 [(match_operand:CI 0 "neon_struct_operand")
4908 (match_operand:CI 1 "s_register_operand")
4909 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4910 "TARGET_NEON"
4911 {
4912 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4913 DONE;
4914 })
4915
4916 (define_expand "neon_vst3<mode>"
4917 [(match_operand:CI 0 "neon_struct_operand")
4918 (match_operand:CI 1 "s_register_operand")
4919 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4920 "TARGET_NEON"
4921 {
4922 rtx mem;
4923
4924 mem = adjust_address (operands[0], EImode, 0);
4925 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4926 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4927 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4928 DONE;
4929 })
4930
4931 (define_insn "neon_vst3qa<mode>"
4932 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4933 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4934 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4935 UNSPEC_VST3A))]
4936 "TARGET_NEON"
4937 {
4938 int regno = REGNO (operands[1]);
4939 rtx ops[4];
4940 ops[0] = operands[0];
4941 ops[1] = gen_rtx_REG (DImode, regno);
4942 ops[2] = gen_rtx_REG (DImode, regno + 4);
4943 ops[3] = gen_rtx_REG (DImode, regno + 8);
4944 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4945 return "";
4946 }
4947 [(set_attr "type" "neon_store3_3reg<q>")]
4948 )
4949
4950 (define_insn "neon_vst3qb<mode>"
4951 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4952 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4953 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4954 UNSPEC_VST3B))]
4955 "TARGET_NEON"
4956 {
4957 int regno = REGNO (operands[1]);
4958 rtx ops[4];
4959 ops[0] = operands[0];
4960 ops[1] = gen_rtx_REG (DImode, regno + 2);
4961 ops[2] = gen_rtx_REG (DImode, regno + 6);
4962 ops[3] = gen_rtx_REG (DImode, regno + 10);
4963 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4964 return "";
4965 }
4966 [(set_attr "type" "neon_store3_3reg<q>")]
4967 )
4968
4969 (define_insn "neon_vst3_lane<mode>"
4970 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4971 (unspec:<V_three_elem>
4972 [(match_operand:EI 1 "s_register_operand" "w")
4973 (match_operand:SI 2 "immediate_operand" "i")
4974 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4975 UNSPEC_VST3_LANE))]
4976 "TARGET_NEON"
4977 {
4978 HOST_WIDE_INT lane = INTVAL (operands[2]);
4979 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4980 int regno = REGNO (operands[1]);
4981 rtx ops[5];
4982 if (lane < 0 || lane >= max)
4983 error ("lane out of range");
4984 ops[0] = operands[0];
4985 ops[1] = gen_rtx_REG (DImode, regno);
4986 ops[2] = gen_rtx_REG (DImode, regno + 2);
4987 ops[3] = gen_rtx_REG (DImode, regno + 4);
4988 ops[4] = operands[2];
4989 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4990 ops);
4991 return "";
4992 }
4993 [(set_attr "type" "neon_store3_one_lane<q>")]
4994 )
4995
4996 (define_insn "neon_vst3_lane<mode>"
4997 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4998 (unspec:<V_three_elem>
4999 [(match_operand:CI 1 "s_register_operand" "w")
5000 (match_operand:SI 2 "immediate_operand" "i")
5001 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002 UNSPEC_VST3_LANE))]
5003 "TARGET_NEON"
5004 {
5005 HOST_WIDE_INT lane = INTVAL (operands[2]);
5006 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5007 int regno = REGNO (operands[1]);
5008 rtx ops[5];
5009 if (lane < 0 || lane >= max)
5010 error ("lane out of range");
5011 else if (lane >= max / 2)
5012 {
5013 lane -= max / 2;
5014 regno += 2;
5015 }
5016 ops[0] = operands[0];
5017 ops[1] = gen_rtx_REG (DImode, regno);
5018 ops[2] = gen_rtx_REG (DImode, regno + 4);
5019 ops[3] = gen_rtx_REG (DImode, regno + 8);
5020 ops[4] = GEN_INT (lane);
5021 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5022 ops);
5023 return "";
5024 }
5025 [(set_attr "type" "neon_store3_one_lane<q>")]
5026 )
5027
5028 (define_expand "vec_load_lanesoi<mode>"
5029 [(set (match_operand:OI 0 "s_register_operand")
5030 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5031 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5032 UNSPEC_VLD4))]
5033 "TARGET_NEON")
5034
5035 (define_insn "neon_vld4<mode>"
5036 [(set (match_operand:OI 0 "s_register_operand" "=w")
5037 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5038 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5039 UNSPEC_VLD4))]
5040 "TARGET_NEON"
5041 {
5042 if (<V_sz_elem> == 64)
5043 return "vld1.64\t%h0, %A1";
5044 else
5045 return "vld4.<V_sz_elem>\t%h0, %A1";
5046 }
5047 [(set (attr "type")
5048 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5049 (const_string "neon_load1_4reg<q>")
5050 (const_string "neon_load4_4reg<q>")))]
5051 )
5052
5053 (define_expand "vec_load_lanesxi<mode>"
5054 [(match_operand:XI 0 "s_register_operand")
5055 (match_operand:XI 1 "neon_struct_operand")
5056 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5057 "TARGET_NEON"
5058 {
5059 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5060 DONE;
5061 })
5062
5063 (define_expand "neon_vld4<mode>"
5064 [(match_operand:XI 0 "s_register_operand")
5065 (match_operand:XI 1 "neon_struct_operand")
5066 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067 "TARGET_NEON"
5068 {
5069 rtx mem;
5070
5071 mem = adjust_address (operands[1], OImode, 0);
5072 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5073 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5074 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5075 DONE;
5076 })
5077
5078 (define_insn "neon_vld4qa<mode>"
5079 [(set (match_operand:XI 0 "s_register_operand" "=w")
5080 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5081 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5082 UNSPEC_VLD4A))]
5083 "TARGET_NEON"
5084 {
5085 int regno = REGNO (operands[0]);
5086 rtx ops[5];
5087 ops[0] = gen_rtx_REG (DImode, regno);
5088 ops[1] = gen_rtx_REG (DImode, regno + 4);
5089 ops[2] = gen_rtx_REG (DImode, regno + 8);
5090 ops[3] = gen_rtx_REG (DImode, regno + 12);
5091 ops[4] = operands[1];
5092 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5093 return "";
5094 }
5095 [(set_attr "type" "neon_load4_4reg<q>")]
5096 )
5097
5098 (define_insn "neon_vld4qb<mode>"
5099 [(set (match_operand:XI 0 "s_register_operand" "=w")
5100 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5101 (match_operand:XI 2 "s_register_operand" "0")
5102 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5103 UNSPEC_VLD4B))]
5104 "TARGET_NEON"
5105 {
5106 int regno = REGNO (operands[0]);
5107 rtx ops[5];
5108 ops[0] = gen_rtx_REG (DImode, regno + 2);
5109 ops[1] = gen_rtx_REG (DImode, regno + 6);
5110 ops[2] = gen_rtx_REG (DImode, regno + 10);
5111 ops[3] = gen_rtx_REG (DImode, regno + 14);
5112 ops[4] = operands[1];
5113 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5114 return "";
5115 }
5116 [(set_attr "type" "neon_load4_4reg<q>")]
5117 )
5118
5119 (define_insn "neon_vld4_lane<mode>"
5120 [(set (match_operand:OI 0 "s_register_operand" "=w")
5121 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5122 (match_operand:OI 2 "s_register_operand" "0")
5123 (match_operand:SI 3 "immediate_operand" "i")
5124 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125 UNSPEC_VLD4_LANE))]
5126 "TARGET_NEON"
5127 {
5128 HOST_WIDE_INT lane = INTVAL (operands[3]);
5129 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5130 int regno = REGNO (operands[0]);
5131 rtx ops[6];
5132 if (lane < 0 || lane >= max)
5133 error ("lane out of range");
5134 ops[0] = gen_rtx_REG (DImode, regno);
5135 ops[1] = gen_rtx_REG (DImode, regno + 2);
5136 ops[2] = gen_rtx_REG (DImode, regno + 4);
5137 ops[3] = gen_rtx_REG (DImode, regno + 6);
5138 ops[4] = operands[1];
5139 ops[5] = operands[3];
5140 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5141 ops);
5142 return "";
5143 }
5144 [(set_attr "type" "neon_load4_one_lane<q>")]
5145 )
5146
5147 (define_insn "neon_vld4_lane<mode>"
5148 [(set (match_operand:XI 0 "s_register_operand" "=w")
5149 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5150 (match_operand:XI 2 "s_register_operand" "0")
5151 (match_operand:SI 3 "immediate_operand" "i")
5152 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5153 UNSPEC_VLD4_LANE))]
5154 "TARGET_NEON"
5155 {
5156 HOST_WIDE_INT lane = INTVAL (operands[3]);
5157 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5158 int regno = REGNO (operands[0]);
5159 rtx ops[6];
5160 if (lane < 0 || lane >= max)
5161 error ("lane out of range");
5162 else if (lane >= max / 2)
5163 {
5164 lane -= max / 2;
5165 regno += 2;
5166 }
5167 ops[0] = gen_rtx_REG (DImode, regno);
5168 ops[1] = gen_rtx_REG (DImode, regno + 4);
5169 ops[2] = gen_rtx_REG (DImode, regno + 8);
5170 ops[3] = gen_rtx_REG (DImode, regno + 12);
5171 ops[4] = operands[1];
5172 ops[5] = GEN_INT (lane);
5173 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5174 ops);
5175 return "";
5176 }
5177 [(set_attr "type" "neon_load4_one_lane<q>")]
5178 )
5179
5180 (define_insn "neon_vld4_dup<mode>"
5181 [(set (match_operand:OI 0 "s_register_operand" "=w")
5182 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5183 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5184 UNSPEC_VLD4_DUP))]
5185 "TARGET_NEON"
5186 {
5187 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5188 {
5189 int regno = REGNO (operands[0]);
5190 rtx ops[5];
5191 ops[0] = gen_rtx_REG (DImode, regno);
5192 ops[1] = gen_rtx_REG (DImode, regno + 2);
5193 ops[2] = gen_rtx_REG (DImode, regno + 4);
5194 ops[3] = gen_rtx_REG (DImode, regno + 6);
5195 ops[4] = operands[1];
5196 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5197 ops);
5198 return "";
5199 }
5200 else
5201 return "vld1.<V_sz_elem>\t%h0, %A1";
5202 }
5203 [(set (attr "type")
5204 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5205 (const_string "neon_load4_all_lanes<q>")
5206 (const_string "neon_load1_1reg<q>")))]
5207 )
5208
5209 (define_expand "vec_store_lanesoi<mode>"
5210 [(set (match_operand:OI 0 "neon_struct_operand")
5211 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5212 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5213 UNSPEC_VST4))]
5214 "TARGET_NEON")
5215
5216 (define_insn "neon_vst4<mode>"
5217 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5218 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5219 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220 UNSPEC_VST4))]
5221 "TARGET_NEON"
5222 {
5223 if (<V_sz_elem> == 64)
5224 return "vst1.64\t%h1, %A0";
5225 else
5226 return "vst4.<V_sz_elem>\t%h1, %A0";
5227 }
5228 [(set (attr "type")
5229 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5230 (const_string "neon_store1_4reg<q>")
5231 (const_string "neon_store4_4reg<q>")))]
5232 )
5233
5234 (define_expand "vec_store_lanesxi<mode>"
5235 [(match_operand:XI 0 "neon_struct_operand")
5236 (match_operand:XI 1 "s_register_operand")
5237 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5238 "TARGET_NEON"
5239 {
5240 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5241 DONE;
5242 })
5243
5244 (define_expand "neon_vst4<mode>"
5245 [(match_operand:XI 0 "neon_struct_operand")
5246 (match_operand:XI 1 "s_register_operand")
5247 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5248 "TARGET_NEON"
5249 {
5250 rtx mem;
5251
5252 mem = adjust_address (operands[0], OImode, 0);
5253 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5254 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5255 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5256 DONE;
5257 })
5258
5259 (define_insn "neon_vst4qa<mode>"
5260 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5261 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5262 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 UNSPEC_VST4A))]
5264 "TARGET_NEON"
5265 {
5266 int regno = REGNO (operands[1]);
5267 rtx ops[5];
5268 ops[0] = operands[0];
5269 ops[1] = gen_rtx_REG (DImode, regno);
5270 ops[2] = gen_rtx_REG (DImode, regno + 4);
5271 ops[3] = gen_rtx_REG (DImode, regno + 8);
5272 ops[4] = gen_rtx_REG (DImode, regno + 12);
5273 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5274 return "";
5275 }
5276 [(set_attr "type" "neon_store4_4reg<q>")]
5277 )
5278
5279 (define_insn "neon_vst4qb<mode>"
5280 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5281 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5282 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5283 UNSPEC_VST4B))]
5284 "TARGET_NEON"
5285 {
5286 int regno = REGNO (operands[1]);
5287 rtx ops[5];
5288 ops[0] = operands[0];
5289 ops[1] = gen_rtx_REG (DImode, regno + 2);
5290 ops[2] = gen_rtx_REG (DImode, regno + 6);
5291 ops[3] = gen_rtx_REG (DImode, regno + 10);
5292 ops[4] = gen_rtx_REG (DImode, regno + 14);
5293 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5294 return "";
5295 }
5296 [(set_attr "type" "neon_store4_4reg<q>")]
5297 )
5298
5299 (define_insn "neon_vst4_lane<mode>"
5300 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5301 (unspec:<V_four_elem>
5302 [(match_operand:OI 1 "s_register_operand" "w")
5303 (match_operand:SI 2 "immediate_operand" "i")
5304 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5305 UNSPEC_VST4_LANE))]
5306 "TARGET_NEON"
5307 {
5308 HOST_WIDE_INT lane = INTVAL (operands[2]);
5309 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5310 int regno = REGNO (operands[1]);
5311 rtx ops[6];
5312 if (lane < 0 || lane >= max)
5313 error ("lane out of range");
5314 ops[0] = operands[0];
5315 ops[1] = gen_rtx_REG (DImode, regno);
5316 ops[2] = gen_rtx_REG (DImode, regno + 2);
5317 ops[3] = gen_rtx_REG (DImode, regno + 4);
5318 ops[4] = gen_rtx_REG (DImode, regno + 6);
5319 ops[5] = operands[2];
5320 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5321 ops);
5322 return "";
5323 }
5324 [(set_attr "type" "neon_store4_one_lane<q>")]
5325 )
5326
5327 (define_insn "neon_vst4_lane<mode>"
5328 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5329 (unspec:<V_four_elem>
5330 [(match_operand:XI 1 "s_register_operand" "w")
5331 (match_operand:SI 2 "immediate_operand" "i")
5332 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5333 UNSPEC_VST4_LANE))]
5334 "TARGET_NEON"
5335 {
5336 HOST_WIDE_INT lane = INTVAL (operands[2]);
5337 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5338 int regno = REGNO (operands[1]);
5339 rtx ops[6];
5340 if (lane < 0 || lane >= max)
5341 error ("lane out of range");
5342 else if (lane >= max / 2)
5343 {
5344 lane -= max / 2;
5345 regno += 2;
5346 }
5347 ops[0] = operands[0];
5348 ops[1] = gen_rtx_REG (DImode, regno);
5349 ops[2] = gen_rtx_REG (DImode, regno + 4);
5350 ops[3] = gen_rtx_REG (DImode, regno + 8);
5351 ops[4] = gen_rtx_REG (DImode, regno + 12);
5352 ops[5] = GEN_INT (lane);
5353 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5354 ops);
5355 return "";
5356 }
5357 [(set_attr "type" "neon_store4_4reg<q>")]
5358 )
5359
5360 (define_expand "neon_vand<mode>"
5361 [(match_operand:VDQX 0 "s_register_operand" "")
5362 (match_operand:VDQX 1 "s_register_operand" "")
5363 (match_operand:VDQX 2 "neon_inv_logic_op2" "")
5364 (match_operand:SI 3 "immediate_operand" "")]
5365 "TARGET_NEON"
5366 {
5367 emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
5368 DONE;
5369 })
5370
5371 (define_expand "neon_vorr<mode>"
5372 [(match_operand:VDQX 0 "s_register_operand" "")
5373 (match_operand:VDQX 1 "s_register_operand" "")
5374 (match_operand:VDQX 2 "neon_logic_op2" "")
5375 (match_operand:SI 3 "immediate_operand" "")]
5376 "TARGET_NEON"
5377 {
5378 emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
5379 DONE;
5380 })
5381
5382 (define_expand "neon_veor<mode>"
5383 [(match_operand:VDQX 0 "s_register_operand" "")
5384 (match_operand:VDQX 1 "s_register_operand" "")
5385 (match_operand:VDQX 2 "s_register_operand" "")
5386 (match_operand:SI 3 "immediate_operand" "")]
5387 "TARGET_NEON"
5388 {
5389 emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
5390 DONE;
5391 })
5392
5393 (define_expand "neon_vbic<mode>"
5394 [(match_operand:VDQX 0 "s_register_operand" "")
5395 (match_operand:VDQX 1 "s_register_operand" "")
5396 (match_operand:VDQX 2 "neon_logic_op2" "")
5397 (match_operand:SI 3 "immediate_operand" "")]
5398 "TARGET_NEON"
5399 {
5400 emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
5401 DONE;
5402 })
5403
5404 (define_expand "neon_vorn<mode>"
5405 [(match_operand:VDQX 0 "s_register_operand" "")
5406 (match_operand:VDQX 1 "s_register_operand" "")
5407 (match_operand:VDQX 2 "neon_inv_logic_op2" "")
5408 (match_operand:SI 3 "immediate_operand" "")]
5409 "TARGET_NEON"
5410 {
5411 emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
5412 DONE;
5413 })
5414
5415 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5416 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5417 (SE:<V_unpack> (vec_select:<V_HALF>
5418 (match_operand:VU 1 "register_operand" "w")
5419 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5420 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5421 "vmovl.<US><V_sz_elem> %q0, %e1"
5422 [(set_attr "type" "neon_shift_imm_long")]
5423 )
5424
5425 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5426 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5427 (SE:<V_unpack> (vec_select:<V_HALF>
5428 (match_operand:VU 1 "register_operand" "w")
5429 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5430 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5431 "vmovl.<US><V_sz_elem> %q0, %f1"
5432 [(set_attr "type" "neon_shift_imm_long")]
5433 )
5434
5435 (define_expand "vec_unpack<US>_hi_<mode>"
5436 [(match_operand:<V_unpack> 0 "register_operand" "")
5437 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5438 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5439 {
5440 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5441 rtx t1;
5442 int i;
5443 for (i = 0; i < (<V_mode_nunits>/2); i++)
5444 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5445
5446 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5447 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5448 operands[1],
5449 t1));
5450 DONE;
5451 }
5452 )
5453
5454 (define_expand "vec_unpack<US>_lo_<mode>"
5455 [(match_operand:<V_unpack> 0 "register_operand" "")
5456 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5457 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5458 {
5459 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5460 rtx t1;
5461 int i;
5462 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5463 RTVEC_ELT (v, i) = GEN_INT (i);
5464 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5465 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5466 operands[1],
5467 t1));
5468 DONE;
5469 }
5470 )
5471
5472 (define_insn "neon_vec_<US>mult_lo_<mode>"
5473 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5474 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5475 (match_operand:VU 1 "register_operand" "w")
5476 (match_operand:VU 2 "vect_par_constant_low" "")))
5477 (SE:<V_unpack> (vec_select:<V_HALF>
5478 (match_operand:VU 3 "register_operand" "w")
5479 (match_dup 2)))))]
5480 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5481 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5482 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5483 )
5484
5485 (define_expand "vec_widen_<US>mult_lo_<mode>"
5486 [(match_operand:<V_unpack> 0 "register_operand" "")
5487 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5488 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5490 {
5491 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5492 rtx t1;
5493 int i;
5494 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5495 RTVEC_ELT (v, i) = GEN_INT (i);
5496 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5497
5498 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5499 operands[1],
5500 t1,
5501 operands[2]));
5502 DONE;
5503 }
5504 )
5505
5506 (define_insn "neon_vec_<US>mult_hi_<mode>"
5507 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5508 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5509 (match_operand:VU 1 "register_operand" "w")
5510 (match_operand:VU 2 "vect_par_constant_high" "")))
5511 (SE:<V_unpack> (vec_select:<V_HALF>
5512 (match_operand:VU 3 "register_operand" "w")
5513 (match_dup 2)))))]
5514 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5515 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5516 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5517 )
5518
5519 (define_expand "vec_widen_<US>mult_hi_<mode>"
5520 [(match_operand:<V_unpack> 0 "register_operand" "")
5521 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5522 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5523 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5524 {
5525 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5526 rtx t1;
5527 int i;
5528 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5529 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5530 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5531
5532 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5533 operands[1],
5534 t1,
5535 operands[2]));
5536 DONE;
5537
5538 }
5539 )
5540
5541 (define_insn "neon_vec_<US>shiftl_<mode>"
5542 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5543 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5544 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5545 "TARGET_NEON"
5546 {
5547 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5548 }
5549 [(set_attr "type" "neon_shift_imm_long")]
5550 )
5551
5552 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5553 [(match_operand:<V_unpack> 0 "register_operand" "")
5554 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5555 (match_operand:SI 2 "immediate_operand" "i")]
5556 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5557 {
5558 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5559 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5560 operands[2]));
5561 DONE;
5562 }
5563 )
5564
5565 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5566 [(match_operand:<V_unpack> 0 "register_operand" "")
5567 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5568 (match_operand:SI 2 "immediate_operand" "i")]
5569 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5570 {
5571 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5572 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5573 GET_MODE_SIZE (<V_HALF>mode)),
5574 operands[2]));
5575 DONE;
5576 }
5577 )
5578
5579 ;; Vectorize for non-neon-quad case
5580 (define_insn "neon_unpack<US>_<mode>"
5581 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5582 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5583 "TARGET_NEON"
5584 "vmovl.<US><V_sz_elem> %q0, %P1"
5585 [(set_attr "type" "neon_move")]
5586 )
5587
5588 (define_expand "vec_unpack<US>_lo_<mode>"
5589 [(match_operand:<V_double_width> 0 "register_operand" "")
5590 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5591 "TARGET_NEON"
5592 {
5593 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5594 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5595 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5596
5597 DONE;
5598 }
5599 )
5600
5601 (define_expand "vec_unpack<US>_hi_<mode>"
5602 [(match_operand:<V_double_width> 0 "register_operand" "")
5603 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5604 "TARGET_NEON"
5605 {
5606 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5607 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5608 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5609
5610 DONE;
5611 }
5612 )
5613
5614 (define_insn "neon_vec_<US>mult_<mode>"
5615 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5616 (mult:<V_widen> (SE:<V_widen>
5617 (match_operand:VDI 1 "register_operand" "w"))
5618 (SE:<V_widen>
5619 (match_operand:VDI 2 "register_operand" "w"))))]
5620 "TARGET_NEON"
5621 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5622 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5623 )
5624
5625 (define_expand "vec_widen_<US>mult_hi_<mode>"
5626 [(match_operand:<V_double_width> 0 "register_operand" "")
5627 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5628 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5629 "TARGET_NEON"
5630 {
5631 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5632 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5633 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5634
5635 DONE;
5636
5637 }
5638 )
5639
5640 (define_expand "vec_widen_<US>mult_lo_<mode>"
5641 [(match_operand:<V_double_width> 0 "register_operand" "")
5642 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5643 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5644 "TARGET_NEON"
5645 {
5646 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5647 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5648 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5649
5650 DONE;
5651
5652 }
5653 )
5654
5655 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5656 [(match_operand:<V_double_width> 0 "register_operand" "")
5657 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5658 (match_operand:SI 2 "immediate_operand" "i")]
5659 "TARGET_NEON"
5660 {
5661 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5662 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5663 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5664
5665 DONE;
5666 }
5667 )
5668
5669 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5670 [(match_operand:<V_double_width> 0 "register_operand" "")
5671 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5672 (match_operand:SI 2 "immediate_operand" "i")]
5673 "TARGET_NEON"
5674 {
5675 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5676 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5677 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5678
5679 DONE;
5680 }
5681 )
5682
5683 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5684 ; because the ordering of vector elements in Q registers is different from what
5685 ; the semantics of the instructions require.
5686
5687 (define_insn "vec_pack_trunc_<mode>"
5688 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5689 (vec_concat:<V_narrow_pack>
5690 (truncate:<V_narrow>
5691 (match_operand:VN 1 "register_operand" "w"))
5692 (truncate:<V_narrow>
5693 (match_operand:VN 2 "register_operand" "w"))))]
5694 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5695 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5696 [(set_attr "type" "multiple")
5697 (set_attr "length" "8")]
5698 )
5699
5700 ;; For the non-quad case.
5701 (define_insn "neon_vec_pack_trunc_<mode>"
5702 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5703 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5704 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5705 "vmovn.i<V_sz_elem>\t%P0, %q1"
5706 [(set_attr "type" "neon_move_narrow_q")]
5707 )
5708
5709 (define_expand "vec_pack_trunc_<mode>"
5710 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5711 (match_operand:VSHFT 1 "register_operand" "")
5712 (match_operand:VSHFT 2 "register_operand")]
5713 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5714 {
5715 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5716
5717 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5718 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5719 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5720 DONE;
5721 })
5722
5723 (define_insn "neon_vabd<mode>_2"
5724 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5725 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5726 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5727 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5728 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5729 [(set (attr "type")
5730 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5731 (const_string "neon_fp_abd_s<q>")
5732 (const_string "neon_abd<q>")))]
5733 )
5734
5735 (define_insn "neon_vabd<mode>_3"
5736 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5737 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5738 (match_operand:VDQ 2 "s_register_operand" "w")]
5739 UNSPEC_VSUB)))]
5740 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5741 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5742 [(set (attr "type")
5743 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5744 (const_string "neon_fp_abd_s<q>")
5745 (const_string "neon_abd<q>")))]
5746 )
5747
5748 ;; Copy from core-to-neon regs, then extend, not vice-versa
5749
5750 (define_split
5751 [(set (match_operand:DI 0 "s_register_operand" "")
5752 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5753 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5754 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5755 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5756 {
5757 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5758 })
5759
5760 (define_split
5761 [(set (match_operand:DI 0 "s_register_operand" "")
5762 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5763 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5764 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5765 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5766 {
5767 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5768 })
5769
5770 (define_split
5771 [(set (match_operand:DI 0 "s_register_operand" "")
5772 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5773 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5774 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5775 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5776 {
5777 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5778 })
5779
5780 (define_split
5781 [(set (match_operand:DI 0 "s_register_operand" "")
5782 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5783 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5784 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5785 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5786 {
5787 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5788 })
5789
5790 (define_split
5791 [(set (match_operand:DI 0 "s_register_operand" "")
5792 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5793 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5794 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5795 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5796 {
5797 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5798 })
5799
5800 (define_split
5801 [(set (match_operand:DI 0 "s_register_operand" "")
5802 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5803 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5804 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5805 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5806 {
5807 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5808 })