Update copyright years.
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
143 "TARGET_NEON
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
146 {
147 switch (which_alternative)
148 {
149 case 0: return "#";
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
156
157 (define_split
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
172
173 neon_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[3], src[3];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
213
214 neon_disambiguate_copy (operands, dest, src, 3);
215 })
216
217 (define_split
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
225 {
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
228 rtx dest[4], src[4];
229
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
238
239 neon_disambiguate_copy (operands, dest, src, 4);
240 })
241
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
247 {
248 rtx adjust_mem;
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
255
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
258 else
259 adjust_mem = operands[0];
260
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
264
265 })
266
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
274
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
278 " Um")]
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
283
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
291
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
295 " Um")]
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
300
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
303 (vec_merge:VD
304 (vec_duplicate:VD
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
308 "TARGET_NEON"
309 {
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
314
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
317 else
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
319 }
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
324 (vec_merge:VQ2
325 (vec_duplicate:VQ2
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
336
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
339
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
342
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
345 else
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
347 }
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
349 )
350
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
353 (vec_merge:V2DI
354 (vec_duplicate:V2DI
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
358 "TARGET_NEON"
359 {
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
362
363 operands[0] = gen_rtx_REG (DImode, regno);
364
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
367 else
368 return "vmov\t%P0, %Q1, %R1";
369 }
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
371 )
372
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
382 DONE;
383 })
384
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
387 (vec_select:<V_elem>
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
390 "TARGET_NEON"
391 {
392 if (BYTES_BIG_ENDIAN)
393 {
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
397 }
398
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
401 else
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
403 }
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
405 )
406
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (vec_select:<V_elem>
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 "TARGET_NEON"
413 {
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
418
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
421
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
424
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
427 else
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
429 }
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
431 )
432
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
435 (vec_select:DI
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
438 "TARGET_NEON"
439 {
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
441
442 operands[1] = gen_rtx_REG (DImode, regno);
443
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
446 else
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
448 }
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
450 )
451
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
455 "TARGET_NEON"
456 {
457 neon_expand_vector_init (operands[0], operands[1]);
458 DONE;
459 })
460
461 ;; Doubleword and quadword arithmetic.
462
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
465
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472 [(set (attr "type")
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
476 )
477
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
483 "TARGET_NEON"
484 {
485 switch (which_alternative)
486 {
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
489 case 1: return "#";
490 case 2: return "#";
491 case 4: return "#";
492 case 5: return "#";
493 case 6: return "#";
494 default: gcc_unreachable ();
495 }
496 }
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
502 )
503
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
514 )
515
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
521 "TARGET_NEON"
522 {
523 switch (which_alternative)
524 {
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
531 }
532 }
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
537 )
538
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
545 [(set (attr "type")
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
549 )
550
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
558 [(set (attr "type")
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
562 )
563
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
575 )
576
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
590 )
591
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
600 )
601
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
610 )
611
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
626 NEON_VRINT))]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
630 )
631
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
636 NEON_VCVT)))]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
641 )
642
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
647 "TARGET_NEON"
648 {
649 switch (which_alternative)
650 {
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
655 }
656 }
657 [(set_attr "type" "neon_logic<q>")]
658 )
659
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
664
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
669 "TARGET_NEON"
670 {
671 switch (which_alternative)
672 {
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
677 }
678 }
679 [(set_attr "type" "neon_logic<q>")]
680 )
681
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
686 "TARGET_NEON"
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
689 )
690
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
693 ;; changes above.
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
698 "TARGET_NEON"
699 "@
700 vorn\t%P0, %P1, %P2
701 #
702 #
703 #"
704 "reload_completed &&
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
708 "
709 {
710 if (TARGET_THUMB2)
711 {
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
718 }
719 else
720 {
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
723 DONE;
724 }
725 }"
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
729 )
730
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
735 "TARGET_NEON"
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
745 "TARGET_NEON"
746 "@
747 vbic\t%P0, %P1, %P2
748 #
749 #"
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
752 )
753
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
758 "TARGET_NEON"
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
761 )
762
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "TARGET_NEON"
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
769 )
770
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
774 "TARGET_NEON"
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
776 [(set (attr "type")
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
780 )
781
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
785 "TARGET_NEON"
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
787 [(set (attr "type")
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
791 )
792
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
798 "TARGET_NEON"
799 "#"
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
802 )
803
804 ; Split negdi2_neon for vfp registers
805 (define_split
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
814 {
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
817 }
818 )
819
820 ; Split negdi2_neon for core registers
821 (define_split
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
830 ""
831 )
832
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
837 "TARGET_NEON"
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
840 )
841
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
846 "TARGET_NEON"
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
849 )
850
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
855 "TARGET_NEON"
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
857 [(set (attr "type")
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
861 )
862
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set (attr "type")
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
873 )
874
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
877 ; SImode elements.
878
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
883 "TARGET_NEON"
884 {
885 switch (which_alternative)
886 {
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
889 <MODE>mode,
890 VALID_NEON_QREG_MODE (<MODE>mode),
891 true);
892 default: gcc_unreachable ();
893 }
894 }
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
896 )
897
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
902 "TARGET_NEON"
903 {
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
906 false);
907 }
908 [(set_attr "type" "neon_shift_imm<q>")]
909 )
910
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
915 "TARGET_NEON"
916 {
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
919 false);
920 }
921 [(set_attr "type" "neon_shift_imm<q>")]
922 )
923
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
927 ; shift amounts.
928
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
934 "TARGET_NEON"
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
937 )
938
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
941
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
947 "TARGET_NEON"
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
950 )
951
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
956 "TARGET_NEON"
957 {
958 if (s_register_operand (operands[2], <MODE>mode))
959 {
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
963 }
964 else
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
966 DONE;
967 })
968
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
973 "TARGET_NEON"
974 {
975 if (s_register_operand (operands[2], <MODE>mode))
976 {
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
980 }
981 else
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
983 DONE;
984 })
985
986 ;; 64-bit shifts
987
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
992 ;; using an unspec.
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
996 UNSPEC_LOAD_COUNT))]
997 "TARGET_NEON"
998 "@
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1002 )
1003
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1011 "@
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1015 )
1016
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1025 "TARGET_NEON"
1026 "#"
1027 "TARGET_NEON && reload_completed"
1028 [(const_int 0)]
1029 "
1030 {
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1032 {
1033 if (CONST_INT_P (operands[2]))
1034 {
1035 if (INTVAL (operands[2]) < 1)
1036 {
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1038 DONE;
1039 }
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1042 }
1043 else
1044 {
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1047 }
1048
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1051 operands[2]));
1052 }
1053 else
1054 {
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1060 else
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1063 }
1064 DONE;
1065 }"
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1069 )
1070
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1080 )
1081
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1091 )
1092
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1101 )
1102
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1111 )
1112
1113 ;; ashrdi3_neon
1114 ;; lshrdi3_neon
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1123 "TARGET_NEON"
1124 "#"
1125 "TARGET_NEON && reload_completed"
1126 [(const_int 0)]
1127 "
1128 {
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1130 {
1131 if (CONST_INT_P (operands[2]))
1132 {
1133 if (INTVAL (operands[2]) < 1)
1134 {
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1136 DONE;
1137 }
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1140
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1143 operands[1],
1144 operands[2]));
1145 }
1146 else
1147 {
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1152 operands[5]));
1153 }
1154 }
1155 else
1156 {
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1162 else
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1166 }
1167
1168 DONE;
1169 }"
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1173 )
1174
1175 ;; Widening operations
1176
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1182 "TARGET_NEON"
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1185 )
1186
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1192 "TARGET_NEON"
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1195 )
1196
1197 ;; Helpers for quad-word reduction operations
1198
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1202
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1205 (VQH_OPS:V2SI
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1210 "TARGET_NEON"
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1214 )
1215
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1218 (VQHS_OPS:V2SF
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1227 )
1228
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1231 (VQH_OPS:V4HI
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1238 "TARGET_NEON"
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1242 )
1243
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1246 (VQH_OPS:V8QI
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1257 "TARGET_NEON"
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1261 )
1262
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1266 "TARGET_NEON"
1267 {
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1270 operands[1]);
1271 DONE;
1272 })
1273
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1277 "TARGET_NEON"
1278 {
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1280 <MODE>mode, 0),
1281 operands[1]);
1282 DONE;
1283 })
1284
1285 ;; Reduction operations
1286
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1291 {
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1297 DONE;
1298 })
1299
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1305 {
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1307
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1310
1311 DONE;
1312 })
1313
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1318 {
1319 rtx vec = gen_reg_rtx (V2DImode);
1320
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1323
1324 DONE;
1325 })
1326
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1330 UNSPEC_VPADD))]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1334 )
1335
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1340 {
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1342
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1347 DONE;
1348 })
1349
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1355 {
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1357
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1360
1361 DONE;
1362 })
1363
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1368 {
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1374 DONE;
1375 })
1376
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1382 {
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1384
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1387
1388 DONE;
1389 })
1390
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1394 "TARGET_NEON"
1395 {
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1401 DONE;
1402 })
1403
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1408 {
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1410
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1413
1414 DONE;
1415 })
1416
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1420 "TARGET_NEON"
1421 {
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1427 DONE;
1428 })
1429
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1434 {
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1436
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1439
1440 DONE;
1441 })
1442
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1447 UNSPEC_VPADD))]
1448 "TARGET_NEON"
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1451 [(set (attr "type")
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1455 )
1456
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1461 UNSPEC_VPSMIN))]
1462 "TARGET_NEON"
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1464 [(set (attr "type")
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1468 )
1469
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1474 UNSPEC_VPSMAX))]
1475 "TARGET_NEON"
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1477 [(set (attr "type")
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1481 )
1482
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1487 UNSPEC_VPUMIN))]
1488 "TARGET_NEON"
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1491 )
1492
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1497 UNSPEC_VPUMAX))]
1498 "TARGET_NEON"
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1501 )
1502
1503 ;; Saturating arithmetic
1504
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1509 ; added.
1510
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1515 "TARGET_NEON"
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1518 )
1519
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1524 "TARGET_NEON"
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1527 )
1528
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1533 "TARGET_NEON"
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1536 )
1537
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1542 "TARGET_NEON"
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1545 )
1546
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1549 ;;
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1551 ;;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1553 ;; element-wise.
1554
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1557 (if_then_else:VDQW
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1564 {
1565 int inverse = 0;
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1570
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1573
1574 switch (GET_CODE (operands[3]))
1575 {
1576 case GE:
1577 case GT:
1578 case LE:
1579 case LT:
1580 case EQ:
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1582 {
1583 use_zero_form = 1;
1584 break;
1585 }
1586 /* Fall through. */
1587 default:
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1590 }
1591
1592 switch (GET_CODE (operands[3]))
1593 {
1594 case LT:
1595 case UNLT:
1596 inverse = 1;
1597 /* Fall through. */
1598 case GE:
1599 case UNGE:
1600 case ORDERED:
1601 case UNORDERED:
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1604 break;
1605 case LE:
1606 case UNLE:
1607 inverse = 1;
1608 /* Fall through. */
1609 case GT:
1610 case UNGT:
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1613 break;
1614 case EQ:
1615 case NE:
1616 case UNEQ:
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1619 break;
1620 default:
1621 gcc_unreachable ();
1622 }
1623
1624 switch (GET_CODE (operands[3]))
1625 {
1626 case LT:
1627 case LE:
1628 case GT:
1629 case GE:
1630 case EQ:
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1633 a GE b -> a GE b
1634 a GT b -> a GT b
1635 a LE b -> b GE a
1636 a LT b -> b GT a
1637 a EQ b -> a EQ b
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1640 if (use_zero_form)
1641 {
1642 inverse = 0;
1643 switch (GET_CODE (operands[3]))
1644 {
1645 case LT:
1646 base_comparison = gen_neon_vclt<mode>;
1647 break;
1648 case LE:
1649 base_comparison = gen_neon_vcle<mode>;
1650 break;
1651 default:
1652 /* Do nothing, other zero form cases already have the correct
1653 base_comparison. */
1654 break;
1655 }
1656 }
1657
1658 if (!inverse)
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1660 else
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1662 break;
1663 case UNLT:
1664 case UNLE:
1665 case UNGT:
1666 case UNGE:
1667 case NE:
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1672
1673 Our transformations are:
1674 a GE b -> !(b GT a)
1675 a GT b -> !(b GE a)
1676 a LE b -> !(a GT b)
1677 a LT b -> !(a GE b)
1678 a NE b -> !(a EQ b) */
1679
1680 if (inverse)
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1682 else
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1684
1685 swap_bsl_operands = 1;
1686 break;
1687 case UNEQ:
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1691
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1696 break;
1697 case UNORDERED:
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1701 /* Fall through. */
1702 case ORDERED:
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1706 break;
1707 default:
1708 gcc_unreachable ();
1709 }
1710
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1713 operands[1]));
1714 else
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1716 operands[2]));
1717 DONE;
1718 })
1719
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1722 (if_then_else:VDQIW
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1728 "TARGET_NEON"
1729 {
1730 rtx mask;
1731 int inverse = 0, immediate_zero = 0;
1732
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1734
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1736 immediate_zero = 1;
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1739
1740 switch (GET_CODE (operands[3]))
1741 {
1742 case GEU:
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1744 break;
1745
1746 case GTU:
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1748 break;
1749
1750 case EQ:
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1752 break;
1753
1754 case LEU:
1755 if (immediate_zero)
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1757 else
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1759 break;
1760
1761 case LTU:
1762 if (immediate_zero)
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1764 else
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1766 break;
1767
1768 case NE:
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1770 inverse = 1;
1771 break;
1772
1773 default:
1774 gcc_unreachable ();
1775 }
1776
1777 if (inverse)
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1779 operands[1]));
1780 else
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1782 operands[2]));
1783
1784 DONE;
1785 })
1786
1787 ;; Patterns for builtins.
1788
1789 ; good for plain vadd, vaddq.
1790
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1795 "TARGET_NEON"
1796 {
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1799 else
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1801 operands[2]));
1802 DONE;
1803 })
1804
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1814
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1816
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1821 UNSPEC_VADD))]
1822 "TARGET_NEON"
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1824 [(set (attr "type")
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1828 )
1829
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1834 VADDL))]
1835 "TARGET_NEON"
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1838 )
1839
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1844 VADDW))]
1845 "TARGET_NEON"
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1848 )
1849
1850 ; vhadd and vrhadd.
1851
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1856 VHADD))]
1857 "TARGET_NEON"
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1860 )
1861
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1866 VQADD))]
1867 "TARGET_NEON"
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1870 )
1871
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1876 VADDHN))]
1877 "TARGET_NEON"
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1880 )
1881
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1887 UNSPEC_VMUL))]
1888 "TARGET_NEON"
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1890 [(set (attr "type")
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1894 )
1895
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1901 "TARGET_NEON"
1902 {
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1906 else
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1909 DONE;
1910 })
1911
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1918 {
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1920 operands[1]));
1921 DONE;
1922 })
1923
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1930 {
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1932 operands[1]));
1933 DONE;
1934 })
1935
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1937
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1943 UNSPEC_VMLA))]
1944 "TARGET_NEON"
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1946 [(set (attr "type")
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1950 )
1951
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1957 VMLAL))]
1958 "TARGET_NEON"
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1961 )
1962
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1968 "TARGET_NEON"
1969 {
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1973 else
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1976 DONE;
1977 })
1978
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1980
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1986 UNSPEC_VMLS))]
1987 "TARGET_NEON"
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1989 [(set (attr "type")
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1993 )
1994
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2000 VMLSL))]
2001 "TARGET_NEON"
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2004 )
2005
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2011 VQDMULH))]
2012 "TARGET_NEON"
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2015 )
2016
2017 ;; vqrdmlah, vqrdmlsh
2018 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2019 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2020 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2021 (match_operand:VMDQI 2 "s_register_operand" "w")
2022 (match_operand:VMDQI 3 "s_register_operand" "w")]
2023 VQRDMLH_AS))]
2024 "TARGET_NEON_RDMA"
2025 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2026 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2027 )
2028
2029 (define_insn "neon_vqdmlal<mode>"
2030 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2031 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2032 (match_operand:VMDI 2 "s_register_operand" "w")
2033 (match_operand:VMDI 3 "s_register_operand" "w")]
2034 UNSPEC_VQDMLAL))]
2035 "TARGET_NEON"
2036 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2037 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2038 )
2039
2040 (define_insn "neon_vqdmlsl<mode>"
2041 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2042 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2043 (match_operand:VMDI 2 "s_register_operand" "w")
2044 (match_operand:VMDI 3 "s_register_operand" "w")]
2045 UNSPEC_VQDMLSL))]
2046 "TARGET_NEON"
2047 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2048 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2049 )
2050
2051 (define_insn "neon_vmull<sup><mode>"
2052 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2053 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2054 (match_operand:VW 2 "s_register_operand" "w")]
2055 VMULL))]
2056 "TARGET_NEON"
2057 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2058 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2059 )
2060
2061 (define_insn "neon_vqdmull<mode>"
2062 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2063 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2064 (match_operand:VMDI 2 "s_register_operand" "w")]
2065 UNSPEC_VQDMULL))]
2066 "TARGET_NEON"
2067 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2068 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2069 )
2070
2071 (define_expand "neon_vsub<mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2073 (match_operand:VCVTF 1 "s_register_operand" "w")
2074 (match_operand:VCVTF 2 "s_register_operand" "w")]
2075 "TARGET_NEON"
2076 {
2077 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2078 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2079 else
2080 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2081 operands[2]));
2082 DONE;
2083 })
2084
2085 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2086
2087 (define_insn "neon_vsub<mode>_unspec"
2088 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2089 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2090 (match_operand:VCVTF 2 "s_register_operand" "w")]
2091 UNSPEC_VSUB))]
2092 "TARGET_NEON"
2093 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2094 [(set (attr "type")
2095 (if_then_else (match_test "<Is_float_mode>")
2096 (const_string "neon_fp_addsub_s<q>")
2097 (const_string "neon_sub<q>")))]
2098 )
2099
2100 (define_insn "neon_vsubl<sup><mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2103 (match_operand:VDI 2 "s_register_operand" "w")]
2104 VSUBL))]
2105 "TARGET_NEON"
2106 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2107 [(set_attr "type" "neon_sub_long")]
2108 )
2109
2110 (define_insn "neon_vsubw<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2114 VSUBW))]
2115 "TARGET_NEON"
2116 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2117 [(set_attr "type" "neon_sub_widen")]
2118 )
2119
2120 (define_insn "neon_vqsub<sup><mode>"
2121 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2122 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2123 (match_operand:VDQIX 2 "s_register_operand" "w")]
2124 VQSUB))]
2125 "TARGET_NEON"
2126 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2127 [(set_attr "type" "neon_qsub<q>")]
2128 )
2129
2130 (define_insn "neon_vhsub<sup><mode>"
2131 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2132 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2133 (match_operand:VDQIW 2 "s_register_operand" "w")]
2134 VHSUB))]
2135 "TARGET_NEON"
2136 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2137 [(set_attr "type" "neon_sub_halve<q>")]
2138 )
2139
2140 (define_insn "neon_v<r>subhn<mode>"
2141 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2142 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2143 (match_operand:VN 2 "s_register_operand" "w")]
2144 VSUBHN))]
2145 "TARGET_NEON"
2146 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2147 [(set_attr "type" "neon_sub_halve_narrow_q")]
2148 )
2149
2150 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2151 ;; without unsafe math optimizations.
2152 (define_expand "neon_vc<cmp_op><mode>"
2153 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2154 (neg:<V_cmp_result>
2155 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2156 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2157 "TARGET_NEON"
2158 {
2159 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2160 are enabled. */
2161 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2162 && !flag_unsafe_math_optimizations)
2163 {
2164 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2165 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2166 whereas this expander iterates over the integer modes as well,
2167 but we will never expand to UNSPECs for the integer comparisons. */
2168 switch (<MODE>mode)
2169 {
2170 case V2SFmode:
2171 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2172 operands[1],
2173 operands[2]));
2174 break;
2175 case V4SFmode:
2176 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2177 operands[1],
2178 operands[2]));
2179 break;
2180 default:
2181 gcc_unreachable ();
2182 }
2183 }
2184 else
2185 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2186 operands[1],
2187 operands[2]));
2188 DONE;
2189 }
2190 )
2191
2192 (define_insn "neon_vc<cmp_op><mode>_insn"
2193 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2194 (neg:<V_cmp_result>
2195 (COMPARISONS:<V_cmp_result>
2196 (match_operand:VDQW 1 "s_register_operand" "w,w")
2197 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2198 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2199 && !flag_unsafe_math_optimizations)"
2200 {
2201 char pattern[100];
2202 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2203 " %%<V_reg>1, %s",
2204 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2205 ? "f" : "<cmp_type>",
2206 which_alternative == 0
2207 ? "%<V_reg>2" : "#0");
2208 output_asm_insn (pattern, operands);
2209 return "";
2210 }
2211 [(set (attr "type")
2212 (if_then_else (match_operand 2 "zero_operand")
2213 (const_string "neon_compare_zero<q>")
2214 (const_string "neon_compare<q>")))]
2215 )
2216
2217 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2218 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2219 (unspec:<V_cmp_result>
2220 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2221 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2222 NEON_VCMP))]
2223 "TARGET_NEON"
2224 {
2225 char pattern[100];
2226 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2227 " %%<V_reg>1, %s",
2228 which_alternative == 0
2229 ? "%<V_reg>2" : "#0");
2230 output_asm_insn (pattern, operands);
2231 return "";
2232 }
2233 [(set_attr "type" "neon_fp_compare_s<q>")]
2234 )
2235
2236 (define_insn "neon_vc<cmp_op>u<mode>"
2237 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2238 (neg:<V_cmp_result>
2239 (GTUGEU:<V_cmp_result>
2240 (match_operand:VDQIW 1 "s_register_operand" "w")
2241 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2242 "TARGET_NEON"
2243 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2244 [(set_attr "type" "neon_compare<q>")]
2245 )
2246
2247 (define_expand "neon_vca<cmp_op><mode>"
2248 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2249 (neg:<V_cmp_result>
2250 (GTGE:<V_cmp_result>
2251 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2252 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2253 "TARGET_NEON"
2254 {
2255 if (flag_unsafe_math_optimizations)
2256 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2257 operands[2]));
2258 else
2259 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2260 operands[1],
2261 operands[2]));
2262 DONE;
2263 }
2264 )
2265
2266 (define_insn "neon_vca<cmp_op><mode>_insn"
2267 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2268 (neg:<V_cmp_result>
2269 (GTGE:<V_cmp_result>
2270 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2271 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2272 "TARGET_NEON && flag_unsafe_math_optimizations"
2273 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_fp_compare_s<q>")]
2275 )
2276
2277 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2278 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2279 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2280 (match_operand:VCVTF 2 "s_register_operand" "w")]
2281 NEON_VACMP))]
2282 "TARGET_NEON"
2283 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_fp_compare_s<q>")]
2285 )
2286
2287 (define_insn "neon_vtst<mode>"
2288 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2289 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2290 (match_operand:VDQIW 2 "s_register_operand" "w")]
2291 UNSPEC_VTST))]
2292 "TARGET_NEON"
2293 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2294 [(set_attr "type" "neon_tst<q>")]
2295 )
2296
2297 (define_insn "neon_vabd<sup><mode>"
2298 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2299 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2300 (match_operand:VDQIW 2 "s_register_operand" "w")]
2301 VABD))]
2302 "TARGET_NEON"
2303 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2304 [(set_attr "type" "neon_abd<q>")]
2305 )
2306
2307 (define_insn "neon_vabdf<mode>"
2308 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2309 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2310 (match_operand:VCVTF 2 "s_register_operand" "w")]
2311 UNSPEC_VABD_F))]
2312 "TARGET_NEON"
2313 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2314 [(set_attr "type" "neon_fp_abd_s<q>")]
2315 )
2316
2317 (define_insn "neon_vabdl<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2320 (match_operand:VW 2 "s_register_operand" "w")]
2321 VABDL))]
2322 "TARGET_NEON"
2323 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2324 [(set_attr "type" "neon_abd_long")]
2325 )
2326
2327 (define_insn "neon_vaba<sup><mode>"
2328 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2329 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2330 (match_operand:VDQIW 3 "s_register_operand" "w")]
2331 VABD)
2332 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2333 "TARGET_NEON"
2334 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2335 [(set_attr "type" "neon_arith_acc<q>")]
2336 )
2337
2338 (define_insn "neon_vabal<sup><mode>"
2339 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2340 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2341 (match_operand:VW 3 "s_register_operand" "w")]
2342 VABDL)
2343 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2344 "TARGET_NEON"
2345 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2346 [(set_attr "type" "neon_arith_acc<q>")]
2347 )
2348
2349 (define_insn "neon_v<maxmin><sup><mode>"
2350 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2351 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2352 (match_operand:VDQIW 2 "s_register_operand" "w")]
2353 VMAXMIN))]
2354 "TARGET_NEON"
2355 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_minmax<q>")]
2357 )
2358
2359 (define_insn "neon_v<maxmin>f<mode>"
2360 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2361 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2362 (match_operand:VCVTF 2 "s_register_operand" "w")]
2363 VMAXMINF))]
2364 "TARGET_NEON"
2365 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2366 [(set_attr "type" "neon_fp_minmax_s<q>")]
2367 )
2368
2369 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2370 (define_insn "<fmaxmin><mode>3"
2371 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2372 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2373 (match_operand:VCVTF 2 "s_register_operand" "w")]
2374 VMAXMINFNM))]
2375 "TARGET_NEON && TARGET_FPU_ARMV8"
2376 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2377 [(set_attr "type" "neon_fp_minmax_s<q>")]
2378 )
2379
2380 (define_expand "neon_vpadd<mode>"
2381 [(match_operand:VD 0 "s_register_operand" "=w")
2382 (match_operand:VD 1 "s_register_operand" "w")
2383 (match_operand:VD 2 "s_register_operand" "w")]
2384 "TARGET_NEON"
2385 {
2386 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2387 operands[2]));
2388 DONE;
2389 })
2390
2391 (define_insn "neon_vpaddl<sup><mode>"
2392 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2393 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2394 VPADDL))]
2395 "TARGET_NEON"
2396 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2397 [(set_attr "type" "neon_reduc_add_long")]
2398 )
2399
2400 (define_insn "neon_vpadal<sup><mode>"
2401 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2402 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2403 (match_operand:VDQIW 2 "s_register_operand" "w")]
2404 VPADAL))]
2405 "TARGET_NEON"
2406 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2407 [(set_attr "type" "neon_reduc_add_acc")]
2408 )
2409
2410 (define_insn "neon_vp<maxmin><sup><mode>"
2411 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2412 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2413 (match_operand:VDI 2 "s_register_operand" "w")]
2414 VPMAXMIN))]
2415 "TARGET_NEON"
2416 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2417 [(set_attr "type" "neon_reduc_minmax<q>")]
2418 )
2419
2420 (define_insn "neon_vp<maxmin>f<mode>"
2421 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2422 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2423 (match_operand:VCVTF 2 "s_register_operand" "w")]
2424 VPMAXMINF))]
2425 "TARGET_NEON"
2426 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2427 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2428 )
2429
2430 (define_insn "neon_vrecps<mode>"
2431 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2432 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2433 (match_operand:VCVTF 2 "s_register_operand" "w")]
2434 UNSPEC_VRECPS))]
2435 "TARGET_NEON"
2436 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2437 [(set_attr "type" "neon_fp_recps_s<q>")]
2438 )
2439
2440 (define_insn "neon_vrsqrts<mode>"
2441 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2442 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2443 (match_operand:VCVTF 2 "s_register_operand" "w")]
2444 UNSPEC_VRSQRTS))]
2445 "TARGET_NEON"
2446 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2447 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2448 )
2449
2450 (define_expand "neon_vabs<mode>"
2451 [(match_operand:VDQW 0 "s_register_operand" "")
2452 (match_operand:VDQW 1 "s_register_operand" "")]
2453 "TARGET_NEON"
2454 {
2455 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2456 DONE;
2457 })
2458
2459 (define_insn "neon_vqabs<mode>"
2460 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2461 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2462 UNSPEC_VQABS))]
2463 "TARGET_NEON"
2464 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2465 [(set_attr "type" "neon_qabs<q>")]
2466 )
2467
2468 (define_insn "neon_bswap<mode>"
2469 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2470 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2471 "TARGET_NEON"
2472 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2473 [(set_attr "type" "neon_rev<q>")]
2474 )
2475
2476 (define_expand "neon_vneg<mode>"
2477 [(match_operand:VDQW 0 "s_register_operand" "")
2478 (match_operand:VDQW 1 "s_register_operand" "")]
2479 "TARGET_NEON"
2480 {
2481 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2482 DONE;
2483 })
2484
2485 (define_expand "neon_copysignf<mode>"
2486 [(match_operand:VCVTF 0 "register_operand")
2487 (match_operand:VCVTF 1 "register_operand")
2488 (match_operand:VCVTF 2 "register_operand")]
2489 "TARGET_NEON"
2490 "{
2491 rtx v_bitmask_cast;
2492 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2493 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2494 rtvec v = rtvec_alloc (n_elt);
2495
2496 /* Create bitmask for vector select. */
2497 for (i = 0; i < n_elt; ++i)
2498 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2499
2500 emit_move_insn (v_bitmask,
2501 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2502 emit_move_insn (operands[0], operands[2]);
2503 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2504 <VCVTF:V_cmp_result>mode, 0);
2505 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2506 operands[1]));
2507
2508 DONE;
2509 }"
2510 )
2511
2512 (define_insn "neon_vqneg<mode>"
2513 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2514 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2515 UNSPEC_VQNEG))]
2516 "TARGET_NEON"
2517 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2518 [(set_attr "type" "neon_qneg<q>")]
2519 )
2520
2521 (define_insn "neon_vcls<mode>"
2522 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2523 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2524 UNSPEC_VCLS))]
2525 "TARGET_NEON"
2526 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2527 [(set_attr "type" "neon_cls<q>")]
2528 )
2529
2530 (define_insn "clz<mode>2"
2531 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2532 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2533 "TARGET_NEON"
2534 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2535 [(set_attr "type" "neon_cnt<q>")]
2536 )
2537
2538 (define_expand "neon_vclz<mode>"
2539 [(match_operand:VDQIW 0 "s_register_operand" "")
2540 (match_operand:VDQIW 1 "s_register_operand" "")]
2541 "TARGET_NEON"
2542 {
2543 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2544 DONE;
2545 })
2546
2547 (define_insn "popcount<mode>2"
2548 [(set (match_operand:VE 0 "s_register_operand" "=w")
2549 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2550 "TARGET_NEON"
2551 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2552 [(set_attr "type" "neon_cnt<q>")]
2553 )
2554
2555 (define_expand "neon_vcnt<mode>"
2556 [(match_operand:VE 0 "s_register_operand" "=w")
2557 (match_operand:VE 1 "s_register_operand" "w")]
2558 "TARGET_NEON"
2559 {
2560 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2561 DONE;
2562 })
2563
2564 (define_insn "neon_vrecpe<mode>"
2565 [(set (match_operand:V32 0 "s_register_operand" "=w")
2566 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2567 UNSPEC_VRECPE))]
2568 "TARGET_NEON"
2569 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2570 [(set_attr "type" "neon_fp_recpe_s<q>")]
2571 )
2572
2573 (define_insn "neon_vrsqrte<mode>"
2574 [(set (match_operand:V32 0 "s_register_operand" "=w")
2575 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2576 UNSPEC_VRSQRTE))]
2577 "TARGET_NEON"
2578 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2579 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2580 )
2581
2582 (define_expand "neon_vmvn<mode>"
2583 [(match_operand:VDQIW 0 "s_register_operand" "")
2584 (match_operand:VDQIW 1 "s_register_operand" "")]
2585 "TARGET_NEON"
2586 {
2587 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2588 DONE;
2589 })
2590
2591 (define_insn "neon_vget_lane<mode>_sext_internal"
2592 [(set (match_operand:SI 0 "s_register_operand" "=r")
2593 (sign_extend:SI
2594 (vec_select:<V_elem>
2595 (match_operand:VD 1 "s_register_operand" "w")
2596 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2597 "TARGET_NEON"
2598 {
2599 if (BYTES_BIG_ENDIAN)
2600 {
2601 int elt = INTVAL (operands[2]);
2602 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2603 operands[2] = GEN_INT (elt);
2604 }
2605 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2606 }
2607 [(set_attr "type" "neon_to_gp")]
2608 )
2609
2610 (define_insn "neon_vget_lane<mode>_zext_internal"
2611 [(set (match_operand:SI 0 "s_register_operand" "=r")
2612 (zero_extend:SI
2613 (vec_select:<V_elem>
2614 (match_operand:VD 1 "s_register_operand" "w")
2615 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2616 "TARGET_NEON"
2617 {
2618 if (BYTES_BIG_ENDIAN)
2619 {
2620 int elt = INTVAL (operands[2]);
2621 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2622 operands[2] = GEN_INT (elt);
2623 }
2624 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2625 }
2626 [(set_attr "type" "neon_to_gp")]
2627 )
2628
2629 (define_insn "neon_vget_lane<mode>_sext_internal"
2630 [(set (match_operand:SI 0 "s_register_operand" "=r")
2631 (sign_extend:SI
2632 (vec_select:<V_elem>
2633 (match_operand:VQ2 1 "s_register_operand" "w")
2634 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2635 "TARGET_NEON"
2636 {
2637 rtx ops[3];
2638 int regno = REGNO (operands[1]);
2639 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2640 unsigned int elt = INTVAL (operands[2]);
2641 unsigned int elt_adj = elt % halfelts;
2642
2643 if (BYTES_BIG_ENDIAN)
2644 elt_adj = halfelts - 1 - elt_adj;
2645
2646 ops[0] = operands[0];
2647 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2648 ops[2] = GEN_INT (elt_adj);
2649 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2650
2651 return "";
2652 }
2653 [(set_attr "type" "neon_to_gp_q")]
2654 )
2655
2656 (define_insn "neon_vget_lane<mode>_zext_internal"
2657 [(set (match_operand:SI 0 "s_register_operand" "=r")
2658 (zero_extend:SI
2659 (vec_select:<V_elem>
2660 (match_operand:VQ2 1 "s_register_operand" "w")
2661 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2662 "TARGET_NEON"
2663 {
2664 rtx ops[3];
2665 int regno = REGNO (operands[1]);
2666 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2667 unsigned int elt = INTVAL (operands[2]);
2668 unsigned int elt_adj = elt % halfelts;
2669
2670 if (BYTES_BIG_ENDIAN)
2671 elt_adj = halfelts - 1 - elt_adj;
2672
2673 ops[0] = operands[0];
2674 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2675 ops[2] = GEN_INT (elt_adj);
2676 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2677
2678 return "";
2679 }
2680 [(set_attr "type" "neon_to_gp_q")]
2681 )
2682
2683 (define_expand "neon_vget_lane<mode>"
2684 [(match_operand:<V_ext> 0 "s_register_operand" "")
2685 (match_operand:VDQW 1 "s_register_operand" "")
2686 (match_operand:SI 2 "immediate_operand" "")]
2687 "TARGET_NEON"
2688 {
2689 if (BYTES_BIG_ENDIAN)
2690 {
2691 /* The intrinsics are defined in terms of a model where the
2692 element ordering in memory is vldm order, whereas the generic
2693 RTL is defined in terms of a model where the element ordering
2694 in memory is array order. Convert the lane number to conform
2695 to this model. */
2696 unsigned int elt = INTVAL (operands[2]);
2697 unsigned int reg_nelts
2698 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2699 elt ^= reg_nelts - 1;
2700 operands[2] = GEN_INT (elt);
2701 }
2702
2703 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2704 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2705 else
2706 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2707 operands[1],
2708 operands[2]));
2709 DONE;
2710 })
2711
2712 (define_expand "neon_vget_laneu<mode>"
2713 [(match_operand:<V_ext> 0 "s_register_operand" "")
2714 (match_operand:VDQIW 1 "s_register_operand" "")
2715 (match_operand:SI 2 "immediate_operand" "")]
2716 "TARGET_NEON"
2717 {
2718 if (BYTES_BIG_ENDIAN)
2719 {
2720 /* The intrinsics are defined in terms of a model where the
2721 element ordering in memory is vldm order, whereas the generic
2722 RTL is defined in terms of a model where the element ordering
2723 in memory is array order. Convert the lane number to conform
2724 to this model. */
2725 unsigned int elt = INTVAL (operands[2]);
2726 unsigned int reg_nelts
2727 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2728 elt ^= reg_nelts - 1;
2729 operands[2] = GEN_INT (elt);
2730 }
2731
2732 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2733 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2734 else
2735 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2736 operands[1],
2737 operands[2]));
2738 DONE;
2739 })
2740
2741 (define_expand "neon_vget_lanedi"
2742 [(match_operand:DI 0 "s_register_operand" "=r")
2743 (match_operand:DI 1 "s_register_operand" "w")
2744 (match_operand:SI 2 "immediate_operand" "")]
2745 "TARGET_NEON"
2746 {
2747 emit_move_insn (operands[0], operands[1]);
2748 DONE;
2749 })
2750
2751 (define_expand "neon_vget_lanev2di"
2752 [(match_operand:DI 0 "s_register_operand" "")
2753 (match_operand:V2DI 1 "s_register_operand" "")
2754 (match_operand:SI 2 "immediate_operand" "")]
2755 "TARGET_NEON"
2756 {
2757 int lane;
2758
2759 if (BYTES_BIG_ENDIAN)
2760 {
2761 /* The intrinsics are defined in terms of a model where the
2762 element ordering in memory is vldm order, whereas the generic
2763 RTL is defined in terms of a model where the element ordering
2764 in memory is array order. Convert the lane number to conform
2765 to this model. */
2766 unsigned int elt = INTVAL (operands[2]);
2767 unsigned int reg_nelts = 2;
2768 elt ^= reg_nelts - 1;
2769 operands[2] = GEN_INT (elt);
2770 }
2771
2772 lane = INTVAL (operands[2]);
2773 gcc_assert ((lane ==0) || (lane == 1));
2774 emit_move_insn (operands[0], lane == 0
2775 ? gen_lowpart (DImode, operands[1])
2776 : gen_highpart (DImode, operands[1]));
2777 DONE;
2778 })
2779
2780 (define_expand "neon_vset_lane<mode>"
2781 [(match_operand:VDQ 0 "s_register_operand" "=w")
2782 (match_operand:<V_elem> 1 "s_register_operand" "r")
2783 (match_operand:VDQ 2 "s_register_operand" "0")
2784 (match_operand:SI 3 "immediate_operand" "i")]
2785 "TARGET_NEON"
2786 {
2787 unsigned int elt = INTVAL (operands[3]);
2788
2789 if (BYTES_BIG_ENDIAN)
2790 {
2791 unsigned int reg_nelts
2792 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2793 elt ^= reg_nelts - 1;
2794 }
2795
2796 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2797 GEN_INT (1 << elt), operands[2]));
2798 DONE;
2799 })
2800
2801 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2802
2803 (define_expand "neon_vset_lanedi"
2804 [(match_operand:DI 0 "s_register_operand" "=w")
2805 (match_operand:DI 1 "s_register_operand" "r")
2806 (match_operand:DI 2 "s_register_operand" "0")
2807 (match_operand:SI 3 "immediate_operand" "i")]
2808 "TARGET_NEON"
2809 {
2810 emit_move_insn (operands[0], operands[1]);
2811 DONE;
2812 })
2813
2814 (define_expand "neon_vcreate<mode>"
2815 [(match_operand:VD_RE 0 "s_register_operand" "")
2816 (match_operand:DI 1 "general_operand" "")]
2817 "TARGET_NEON"
2818 {
2819 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2820 emit_move_insn (operands[0], src);
2821 DONE;
2822 })
2823
2824 (define_insn "neon_vdup_n<mode>"
2825 [(set (match_operand:VX 0 "s_register_operand" "=w")
2826 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2827 "TARGET_NEON"
2828 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2829 [(set_attr "type" "neon_from_gp<q>")]
2830 )
2831
2832 (define_insn "neon_vdup_n<mode>"
2833 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2834 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2835 "TARGET_NEON"
2836 "@
2837 vdup.<V_sz_elem>\t%<V_reg>0, %1
2838 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2839 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2840 )
2841
2842 (define_expand "neon_vdup_ndi"
2843 [(match_operand:DI 0 "s_register_operand" "=w")
2844 (match_operand:DI 1 "s_register_operand" "r")]
2845 "TARGET_NEON"
2846 {
2847 emit_move_insn (operands[0], operands[1]);
2848 DONE;
2849 }
2850 )
2851
2852 (define_insn "neon_vdup_nv2di"
2853 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2854 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2855 "TARGET_NEON"
2856 "@
2857 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2858 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2859 [(set_attr "length" "8")
2860 (set_attr "type" "multiple")]
2861 )
2862
2863 (define_insn "neon_vdup_lane<mode>_internal"
2864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2865 (vec_duplicate:VDQW
2866 (vec_select:<V_elem>
2867 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2868 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2869 "TARGET_NEON"
2870 {
2871 if (BYTES_BIG_ENDIAN)
2872 {
2873 int elt = INTVAL (operands[2]);
2874 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2875 operands[2] = GEN_INT (elt);
2876 }
2877 if (<Is_d_reg>)
2878 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2879 else
2880 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2881 }
2882 [(set_attr "type" "neon_dup<q>")]
2883 )
2884
2885 (define_expand "neon_vdup_lane<mode>"
2886 [(match_operand:VDQW 0 "s_register_operand" "=w")
2887 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2888 (match_operand:SI 2 "immediate_operand" "i")]
2889 "TARGET_NEON"
2890 {
2891 if (BYTES_BIG_ENDIAN)
2892 {
2893 unsigned int elt = INTVAL (operands[2]);
2894 unsigned int reg_nelts
2895 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2896 elt ^= reg_nelts - 1;
2897 operands[2] = GEN_INT (elt);
2898 }
2899 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2900 operands[2]));
2901 DONE;
2902 })
2903
2904 ; Scalar index is ignored, since only zero is valid here.
2905 (define_expand "neon_vdup_lanedi"
2906 [(match_operand:DI 0 "s_register_operand" "=w")
2907 (match_operand:DI 1 "s_register_operand" "w")
2908 (match_operand:SI 2 "immediate_operand" "i")]
2909 "TARGET_NEON"
2910 {
2911 emit_move_insn (operands[0], operands[1]);
2912 DONE;
2913 })
2914
2915 ; Likewise for v2di, as the DImode second operand has only a single element.
2916 (define_expand "neon_vdup_lanev2di"
2917 [(match_operand:V2DI 0 "s_register_operand" "=w")
2918 (match_operand:DI 1 "s_register_operand" "w")
2919 (match_operand:SI 2 "immediate_operand" "i")]
2920 "TARGET_NEON"
2921 {
2922 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2923 DONE;
2924 })
2925
2926 ; Disabled before reload because we don't want combine doing something silly,
2927 ; but used by the post-reload expansion of neon_vcombine.
2928 (define_insn "*neon_vswp<mode>"
2929 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2930 (match_operand:VDQX 1 "s_register_operand" "+w"))
2931 (set (match_dup 1) (match_dup 0))]
2932 "TARGET_NEON && reload_completed"
2933 "vswp\t%<V_reg>0, %<V_reg>1"
2934 [(set_attr "type" "neon_permute<q>")]
2935 )
2936
2937 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2938 ;; dest vector.
2939 ;; FIXME: A different implementation of this builtin could make it much
2940 ;; more likely that we wouldn't actually need to output anything (we could make
2941 ;; it so that the reg allocator puts things in the right places magically
2942 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2943
2944 (define_insn_and_split "neon_vcombine<mode>"
2945 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2946 (vec_concat:<V_DOUBLE>
2947 (match_operand:VDX 1 "s_register_operand" "w")
2948 (match_operand:VDX 2 "s_register_operand" "w")))]
2949 "TARGET_NEON"
2950 "#"
2951 "&& reload_completed"
2952 [(const_int 0)]
2953 {
2954 neon_split_vcombine (operands);
2955 DONE;
2956 }
2957 [(set_attr "type" "multiple")]
2958 )
2959
2960 (define_expand "neon_vget_high<mode>"
2961 [(match_operand:<V_HALF> 0 "s_register_operand")
2962 (match_operand:VQX 1 "s_register_operand")]
2963 "TARGET_NEON"
2964 {
2965 emit_move_insn (operands[0],
2966 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2967 GET_MODE_SIZE (<V_HALF>mode)));
2968 DONE;
2969 })
2970
2971 (define_expand "neon_vget_low<mode>"
2972 [(match_operand:<V_HALF> 0 "s_register_operand")
2973 (match_operand:VQX 1 "s_register_operand")]
2974 "TARGET_NEON"
2975 {
2976 emit_move_insn (operands[0],
2977 simplify_gen_subreg (<V_HALF>mode, operands[1],
2978 <MODE>mode, 0));
2979 DONE;
2980 })
2981
2982 (define_insn "float<mode><V_cvtto>2"
2983 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2984 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2985 "TARGET_NEON && !flag_rounding_math"
2986 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2987 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2988 )
2989
2990 (define_insn "floatuns<mode><V_cvtto>2"
2991 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2992 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2993 "TARGET_NEON && !flag_rounding_math"
2994 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2995 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2996 )
2997
2998 (define_insn "fix_trunc<mode><V_cvtto>2"
2999 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3000 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3001 "TARGET_NEON"
3002 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3003 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3004 )
3005
3006 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3007 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3008 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3009 "TARGET_NEON"
3010 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3011 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3012 )
3013
3014 (define_insn "neon_vcvt<sup><mode>"
3015 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3016 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3017 VCVT_US))]
3018 "TARGET_NEON"
3019 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3020 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3021 )
3022
3023 (define_insn "neon_vcvt<sup><mode>"
3024 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3025 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3026 VCVT_US))]
3027 "TARGET_NEON"
3028 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3029 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3030 )
3031
3032 (define_insn "neon_vcvtv4sfv4hf"
3033 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3034 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3035 UNSPEC_VCVT))]
3036 "TARGET_NEON && TARGET_FP16"
3037 "vcvt.f32.f16\t%q0, %P1"
3038 [(set_attr "type" "neon_fp_cvt_widen_h")]
3039 )
3040
3041 (define_insn "neon_vcvtv4hfv4sf"
3042 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3043 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3044 UNSPEC_VCVT))]
3045 "TARGET_NEON && TARGET_FP16"
3046 "vcvt.f16.f32\t%P0, %q1"
3047 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3048 )
3049
3050 (define_insn "neon_vcvt<sup>_n<mode>"
3051 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3052 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3053 (match_operand:SI 2 "immediate_operand" "i")]
3054 VCVT_US_N))]
3055 "TARGET_NEON"
3056 {
3057 neon_const_bounds (operands[2], 1, 33);
3058 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3059 }
3060 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3061 )
3062
3063 (define_insn "neon_vcvt<sup>_n<mode>"
3064 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3065 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3066 (match_operand:SI 2 "immediate_operand" "i")]
3067 VCVT_US_N))]
3068 "TARGET_NEON"
3069 {
3070 neon_const_bounds (operands[2], 1, 33);
3071 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3072 }
3073 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3074 )
3075
3076 (define_insn "neon_vmovn<mode>"
3077 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3078 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3079 UNSPEC_VMOVN))]
3080 "TARGET_NEON"
3081 "vmovn.<V_if_elem>\t%P0, %q1"
3082 [(set_attr "type" "neon_shift_imm_narrow_q")]
3083 )
3084
3085 (define_insn "neon_vqmovn<sup><mode>"
3086 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3087 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3088 VQMOVN))]
3089 "TARGET_NEON"
3090 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3091 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3092 )
3093
3094 (define_insn "neon_vqmovun<mode>"
3095 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3096 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3097 UNSPEC_VQMOVUN))]
3098 "TARGET_NEON"
3099 "vqmovun.<V_s_elem>\t%P0, %q1"
3100 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3101 )
3102
3103 (define_insn "neon_vmovl<sup><mode>"
3104 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3105 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3106 VMOVL))]
3107 "TARGET_NEON"
3108 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3109 [(set_attr "type" "neon_shift_imm_long")]
3110 )
3111
3112 (define_insn "neon_vmul_lane<mode>"
3113 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3114 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3115 (match_operand:VMD 2 "s_register_operand"
3116 "<scalar_mul_constraint>")
3117 (match_operand:SI 3 "immediate_operand" "i")]
3118 UNSPEC_VMUL_LANE))]
3119 "TARGET_NEON"
3120 {
3121 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3122 }
3123 [(set (attr "type")
3124 (if_then_else (match_test "<Is_float_mode>")
3125 (const_string "neon_fp_mul_s_scalar<q>")
3126 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3127 )
3128
3129 (define_insn "neon_vmul_lane<mode>"
3130 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3131 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3132 (match_operand:<V_HALF> 2 "s_register_operand"
3133 "<scalar_mul_constraint>")
3134 (match_operand:SI 3 "immediate_operand" "i")]
3135 UNSPEC_VMUL_LANE))]
3136 "TARGET_NEON"
3137 {
3138 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3139 }
3140 [(set (attr "type")
3141 (if_then_else (match_test "<Is_float_mode>")
3142 (const_string "neon_fp_mul_s_scalar<q>")
3143 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3144 )
3145
3146 (define_insn "neon_vmull<sup>_lane<mode>"
3147 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3148 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3149 (match_operand:VMDI 2 "s_register_operand"
3150 "<scalar_mul_constraint>")
3151 (match_operand:SI 3 "immediate_operand" "i")]
3152 VMULL_LANE))]
3153 "TARGET_NEON"
3154 {
3155 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3156 }
3157 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3158 )
3159
3160 (define_insn "neon_vqdmull_lane<mode>"
3161 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3162 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3163 (match_operand:VMDI 2 "s_register_operand"
3164 "<scalar_mul_constraint>")
3165 (match_operand:SI 3 "immediate_operand" "i")]
3166 UNSPEC_VQDMULL_LANE))]
3167 "TARGET_NEON"
3168 {
3169 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3170 }
3171 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3172 )
3173
3174 (define_insn "neon_vq<r>dmulh_lane<mode>"
3175 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3176 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3177 (match_operand:<V_HALF> 2 "s_register_operand"
3178 "<scalar_mul_constraint>")
3179 (match_operand:SI 3 "immediate_operand" "i")]
3180 VQDMULH_LANE))]
3181 "TARGET_NEON"
3182 {
3183 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3184 }
3185 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3186 )
3187
3188 (define_insn "neon_vq<r>dmulh_lane<mode>"
3189 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3190 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3191 (match_operand:VMDI 2 "s_register_operand"
3192 "<scalar_mul_constraint>")
3193 (match_operand:SI 3 "immediate_operand" "i")]
3194 VQDMULH_LANE))]
3195 "TARGET_NEON"
3196 {
3197 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3198 }
3199 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3200 )
3201
3202 ;; vqrdmlah_lane, vqrdmlsh_lane
3203 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3204 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3205 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3206 (match_operand:VMQI 2 "s_register_operand" "w")
3207 (match_operand:<V_HALF> 3 "s_register_operand"
3208 "<scalar_mul_constraint>")
3209 (match_operand:SI 4 "immediate_operand" "i")]
3210 VQRDMLH_AS))]
3211 "TARGET_NEON_RDMA"
3212 {
3213 return
3214 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3215 }
3216 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3217 )
3218
3219 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3220 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3221 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3222 (match_operand:VMDI 2 "s_register_operand" "w")
3223 (match_operand:VMDI 3 "s_register_operand"
3224 "<scalar_mul_constraint>")
3225 (match_operand:SI 4 "immediate_operand" "i")]
3226 VQRDMLH_AS))]
3227 "TARGET_NEON_RDMA"
3228 {
3229 return
3230 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3231 }
3232 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3233 )
3234
3235 (define_insn "neon_vmla_lane<mode>"
3236 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3237 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3238 (match_operand:VMD 2 "s_register_operand" "w")
3239 (match_operand:VMD 3 "s_register_operand"
3240 "<scalar_mul_constraint>")
3241 (match_operand:SI 4 "immediate_operand" "i")]
3242 UNSPEC_VMLA_LANE))]
3243 "TARGET_NEON"
3244 {
3245 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3246 }
3247 [(set (attr "type")
3248 (if_then_else (match_test "<Is_float_mode>")
3249 (const_string "neon_fp_mla_s_scalar<q>")
3250 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3251 )
3252
3253 (define_insn "neon_vmla_lane<mode>"
3254 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3255 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3256 (match_operand:VMQ 2 "s_register_operand" "w")
3257 (match_operand:<V_HALF> 3 "s_register_operand"
3258 "<scalar_mul_constraint>")
3259 (match_operand:SI 4 "immediate_operand" "i")]
3260 UNSPEC_VMLA_LANE))]
3261 "TARGET_NEON"
3262 {
3263 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3264 }
3265 [(set (attr "type")
3266 (if_then_else (match_test "<Is_float_mode>")
3267 (const_string "neon_fp_mla_s_scalar<q>")
3268 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3269 )
3270
3271 (define_insn "neon_vmlal<sup>_lane<mode>"
3272 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3273 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3274 (match_operand:VMDI 2 "s_register_operand" "w")
3275 (match_operand:VMDI 3 "s_register_operand"
3276 "<scalar_mul_constraint>")
3277 (match_operand:SI 4 "immediate_operand" "i")]
3278 VMLAL_LANE))]
3279 "TARGET_NEON"
3280 {
3281 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3282 }
3283 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3284 )
3285
3286 (define_insn "neon_vqdmlal_lane<mode>"
3287 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3288 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3289 (match_operand:VMDI 2 "s_register_operand" "w")
3290 (match_operand:VMDI 3 "s_register_operand"
3291 "<scalar_mul_constraint>")
3292 (match_operand:SI 4 "immediate_operand" "i")]
3293 UNSPEC_VQDMLAL_LANE))]
3294 "TARGET_NEON"
3295 {
3296 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3297 }
3298 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3299 )
3300
3301 (define_insn "neon_vmls_lane<mode>"
3302 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3303 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3304 (match_operand:VMD 2 "s_register_operand" "w")
3305 (match_operand:VMD 3 "s_register_operand"
3306 "<scalar_mul_constraint>")
3307 (match_operand:SI 4 "immediate_operand" "i")]
3308 UNSPEC_VMLS_LANE))]
3309 "TARGET_NEON"
3310 {
3311 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3312 }
3313 [(set (attr "type")
3314 (if_then_else (match_test "<Is_float_mode>")
3315 (const_string "neon_fp_mla_s_scalar<q>")
3316 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3317 )
3318
3319 (define_insn "neon_vmls_lane<mode>"
3320 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3321 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3322 (match_operand:VMQ 2 "s_register_operand" "w")
3323 (match_operand:<V_HALF> 3 "s_register_operand"
3324 "<scalar_mul_constraint>")
3325 (match_operand:SI 4 "immediate_operand" "i")]
3326 UNSPEC_VMLS_LANE))]
3327 "TARGET_NEON"
3328 {
3329 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3330 }
3331 [(set (attr "type")
3332 (if_then_else (match_test "<Is_float_mode>")
3333 (const_string "neon_fp_mla_s_scalar<q>")
3334 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3335 )
3336
3337 (define_insn "neon_vmlsl<sup>_lane<mode>"
3338 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3339 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3340 (match_operand:VMDI 2 "s_register_operand" "w")
3341 (match_operand:VMDI 3 "s_register_operand"
3342 "<scalar_mul_constraint>")
3343 (match_operand:SI 4 "immediate_operand" "i")]
3344 VMLSL_LANE))]
3345 "TARGET_NEON"
3346 {
3347 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3348 }
3349 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3350 )
3351
3352 (define_insn "neon_vqdmlsl_lane<mode>"
3353 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3354 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3355 (match_operand:VMDI 2 "s_register_operand" "w")
3356 (match_operand:VMDI 3 "s_register_operand"
3357 "<scalar_mul_constraint>")
3358 (match_operand:SI 4 "immediate_operand" "i")]
3359 UNSPEC_VQDMLSL_LANE))]
3360 "TARGET_NEON"
3361 {
3362 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3363 }
3364 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3365 )
3366
3367 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3368 ; core register into a temp register, then use a scalar taken from that. This
3369 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3370 ; or extracted from another vector. The latter case it's currently better to
3371 ; use the "_lane" variant, and the former case can probably be implemented
3372 ; using vld1_lane, but that hasn't been done yet.
3373
3374 (define_expand "neon_vmul_n<mode>"
3375 [(match_operand:VMD 0 "s_register_operand" "")
3376 (match_operand:VMD 1 "s_register_operand" "")
3377 (match_operand:<V_elem> 2 "s_register_operand" "")]
3378 "TARGET_NEON"
3379 {
3380 rtx tmp = gen_reg_rtx (<MODE>mode);
3381 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3382 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3383 const0_rtx));
3384 DONE;
3385 })
3386
3387 (define_expand "neon_vmul_n<mode>"
3388 [(match_operand:VMQ 0 "s_register_operand" "")
3389 (match_operand:VMQ 1 "s_register_operand" "")
3390 (match_operand:<V_elem> 2 "s_register_operand" "")]
3391 "TARGET_NEON"
3392 {
3393 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3394 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3395 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3396 const0_rtx));
3397 DONE;
3398 })
3399
3400 (define_expand "neon_vmulls_n<mode>"
3401 [(match_operand:<V_widen> 0 "s_register_operand" "")
3402 (match_operand:VMDI 1 "s_register_operand" "")
3403 (match_operand:<V_elem> 2 "s_register_operand" "")]
3404 "TARGET_NEON"
3405 {
3406 rtx tmp = gen_reg_rtx (<MODE>mode);
3407 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3408 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3409 const0_rtx));
3410 DONE;
3411 })
3412
3413 (define_expand "neon_vmullu_n<mode>"
3414 [(match_operand:<V_widen> 0 "s_register_operand" "")
3415 (match_operand:VMDI 1 "s_register_operand" "")
3416 (match_operand:<V_elem> 2 "s_register_operand" "")]
3417 "TARGET_NEON"
3418 {
3419 rtx tmp = gen_reg_rtx (<MODE>mode);
3420 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3421 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3422 const0_rtx));
3423 DONE;
3424 })
3425
3426 (define_expand "neon_vqdmull_n<mode>"
3427 [(match_operand:<V_widen> 0 "s_register_operand" "")
3428 (match_operand:VMDI 1 "s_register_operand" "")
3429 (match_operand:<V_elem> 2 "s_register_operand" "")]
3430 "TARGET_NEON"
3431 {
3432 rtx tmp = gen_reg_rtx (<MODE>mode);
3433 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3434 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3435 const0_rtx));
3436 DONE;
3437 })
3438
3439 (define_expand "neon_vqdmulh_n<mode>"
3440 [(match_operand:VMDI 0 "s_register_operand" "")
3441 (match_operand:VMDI 1 "s_register_operand" "")
3442 (match_operand:<V_elem> 2 "s_register_operand" "")]
3443 "TARGET_NEON"
3444 {
3445 rtx tmp = gen_reg_rtx (<MODE>mode);
3446 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3447 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3448 const0_rtx));
3449 DONE;
3450 })
3451
3452 (define_expand "neon_vqrdmulh_n<mode>"
3453 [(match_operand:VMDI 0 "s_register_operand" "")
3454 (match_operand:VMDI 1 "s_register_operand" "")
3455 (match_operand:<V_elem> 2 "s_register_operand" "")]
3456 "TARGET_NEON"
3457 {
3458 rtx tmp = gen_reg_rtx (<MODE>mode);
3459 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3460 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3461 const0_rtx));
3462 DONE;
3463 })
3464
3465 (define_expand "neon_vqdmulh_n<mode>"
3466 [(match_operand:VMQI 0 "s_register_operand" "")
3467 (match_operand:VMQI 1 "s_register_operand" "")
3468 (match_operand:<V_elem> 2 "s_register_operand" "")]
3469 "TARGET_NEON"
3470 {
3471 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3472 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3473 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3474 const0_rtx));
3475 DONE;
3476 })
3477
3478 (define_expand "neon_vqrdmulh_n<mode>"
3479 [(match_operand:VMQI 0 "s_register_operand" "")
3480 (match_operand:VMQI 1 "s_register_operand" "")
3481 (match_operand:<V_elem> 2 "s_register_operand" "")]
3482 "TARGET_NEON"
3483 {
3484 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3485 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3486 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3487 const0_rtx));
3488 DONE;
3489 })
3490
3491 (define_expand "neon_vmla_n<mode>"
3492 [(match_operand:VMD 0 "s_register_operand" "")
3493 (match_operand:VMD 1 "s_register_operand" "")
3494 (match_operand:VMD 2 "s_register_operand" "")
3495 (match_operand:<V_elem> 3 "s_register_operand" "")]
3496 "TARGET_NEON"
3497 {
3498 rtx tmp = gen_reg_rtx (<MODE>mode);
3499 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3500 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3501 tmp, const0_rtx));
3502 DONE;
3503 })
3504
3505 (define_expand "neon_vmla_n<mode>"
3506 [(match_operand:VMQ 0 "s_register_operand" "")
3507 (match_operand:VMQ 1 "s_register_operand" "")
3508 (match_operand:VMQ 2 "s_register_operand" "")
3509 (match_operand:<V_elem> 3 "s_register_operand" "")]
3510 "TARGET_NEON"
3511 {
3512 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3513 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3514 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3515 tmp, const0_rtx));
3516 DONE;
3517 })
3518
3519 (define_expand "neon_vmlals_n<mode>"
3520 [(match_operand:<V_widen> 0 "s_register_operand" "")
3521 (match_operand:<V_widen> 1 "s_register_operand" "")
3522 (match_operand:VMDI 2 "s_register_operand" "")
3523 (match_operand:<V_elem> 3 "s_register_operand" "")]
3524 "TARGET_NEON"
3525 {
3526 rtx tmp = gen_reg_rtx (<MODE>mode);
3527 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3528 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3529 tmp, const0_rtx));
3530 DONE;
3531 })
3532
3533 (define_expand "neon_vmlalu_n<mode>"
3534 [(match_operand:<V_widen> 0 "s_register_operand" "")
3535 (match_operand:<V_widen> 1 "s_register_operand" "")
3536 (match_operand:VMDI 2 "s_register_operand" "")
3537 (match_operand:<V_elem> 3 "s_register_operand" "")]
3538 "TARGET_NEON"
3539 {
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3543 tmp, const0_rtx));
3544 DONE;
3545 })
3546
3547 (define_expand "neon_vqdmlal_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")]
3552 "TARGET_NEON"
3553 {
3554 rtx tmp = gen_reg_rtx (<MODE>mode);
3555 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3556 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3557 tmp, const0_rtx));
3558 DONE;
3559 })
3560
3561 (define_expand "neon_vmls_n<mode>"
3562 [(match_operand:VMD 0 "s_register_operand" "")
3563 (match_operand:VMD 1 "s_register_operand" "")
3564 (match_operand:VMD 2 "s_register_operand" "")
3565 (match_operand:<V_elem> 3 "s_register_operand" "")]
3566 "TARGET_NEON"
3567 {
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3571 tmp, const0_rtx));
3572 DONE;
3573 })
3574
3575 (define_expand "neon_vmls_n<mode>"
3576 [(match_operand:VMQ 0 "s_register_operand" "")
3577 (match_operand:VMQ 1 "s_register_operand" "")
3578 (match_operand:VMQ 2 "s_register_operand" "")
3579 (match_operand:<V_elem> 3 "s_register_operand" "")]
3580 "TARGET_NEON"
3581 {
3582 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3583 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3584 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3585 tmp, const0_rtx));
3586 DONE;
3587 })
3588
3589 (define_expand "neon_vmlsls_n<mode>"
3590 [(match_operand:<V_widen> 0 "s_register_operand" "")
3591 (match_operand:<V_widen> 1 "s_register_operand" "")
3592 (match_operand:VMDI 2 "s_register_operand" "")
3593 (match_operand:<V_elem> 3 "s_register_operand" "")]
3594 "TARGET_NEON"
3595 {
3596 rtx tmp = gen_reg_rtx (<MODE>mode);
3597 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3598 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3599 tmp, const0_rtx));
3600 DONE;
3601 })
3602
3603 (define_expand "neon_vmlslu_n<mode>"
3604 [(match_operand:<V_widen> 0 "s_register_operand" "")
3605 (match_operand:<V_widen> 1 "s_register_operand" "")
3606 (match_operand:VMDI 2 "s_register_operand" "")
3607 (match_operand:<V_elem> 3 "s_register_operand" "")]
3608 "TARGET_NEON"
3609 {
3610 rtx tmp = gen_reg_rtx (<MODE>mode);
3611 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3612 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3613 tmp, const0_rtx));
3614 DONE;
3615 })
3616
3617 (define_expand "neon_vqdmlsl_n<mode>"
3618 [(match_operand:<V_widen> 0 "s_register_operand" "")
3619 (match_operand:<V_widen> 1 "s_register_operand" "")
3620 (match_operand:VMDI 2 "s_register_operand" "")
3621 (match_operand:<V_elem> 3 "s_register_operand" "")]
3622 "TARGET_NEON"
3623 {
3624 rtx tmp = gen_reg_rtx (<MODE>mode);
3625 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3626 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3627 tmp, const0_rtx));
3628 DONE;
3629 })
3630
3631 (define_insn "neon_vext<mode>"
3632 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3633 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3634 (match_operand:VDQX 2 "s_register_operand" "w")
3635 (match_operand:SI 3 "immediate_operand" "i")]
3636 UNSPEC_VEXT))]
3637 "TARGET_NEON"
3638 {
3639 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3640 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3641 }
3642 [(set_attr "type" "neon_ext<q>")]
3643 )
3644
3645 (define_insn "neon_vrev64<mode>"
3646 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3647 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3648 UNSPEC_VREV64))]
3649 "TARGET_NEON"
3650 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3651 [(set_attr "type" "neon_rev<q>")]
3652 )
3653
3654 (define_insn "neon_vrev32<mode>"
3655 [(set (match_operand:VX 0 "s_register_operand" "=w")
3656 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3657 UNSPEC_VREV32))]
3658 "TARGET_NEON"
3659 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3660 [(set_attr "type" "neon_rev<q>")]
3661 )
3662
3663 (define_insn "neon_vrev16<mode>"
3664 [(set (match_operand:VE 0 "s_register_operand" "=w")
3665 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3666 UNSPEC_VREV16))]
3667 "TARGET_NEON"
3668 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3669 [(set_attr "type" "neon_rev<q>")]
3670 )
3671
3672 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3673 ; allocation. For an intrinsic of form:
3674 ; rD = vbsl_* (rS, rN, rM)
3675 ; We can use any of:
3676 ; vbsl rS, rN, rM (if D = S)
3677 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3678 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3679
3680 (define_insn "neon_vbsl<mode>_internal"
3681 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3682 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3683 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3684 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3685 UNSPEC_VBSL))]
3686 "TARGET_NEON"
3687 "@
3688 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3689 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3690 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3691 [(set_attr "type" "neon_bsl<q>")]
3692 )
3693
3694 (define_expand "neon_vbsl<mode>"
3695 [(set (match_operand:VDQX 0 "s_register_operand" "")
3696 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3697 (match_operand:VDQX 2 "s_register_operand" "")
3698 (match_operand:VDQX 3 "s_register_operand" "")]
3699 UNSPEC_VBSL))]
3700 "TARGET_NEON"
3701 {
3702 /* We can't alias operands together if they have different modes. */
3703 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3704 })
3705
3706 ;; vshl, vrshl
3707 (define_insn "neon_v<shift_op><sup><mode>"
3708 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3709 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3710 (match_operand:VDQIX 2 "s_register_operand" "w")]
3711 VSHL))]
3712 "TARGET_NEON"
3713 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3714 [(set_attr "type" "neon_shift_imm<q>")]
3715 )
3716
3717 ;; vqshl, vqrshl
3718 (define_insn "neon_v<shift_op><sup><mode>"
3719 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3720 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3721 (match_operand:VDQIX 2 "s_register_operand" "w")]
3722 VQSHL))]
3723 "TARGET_NEON"
3724 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3725 [(set_attr "type" "neon_sat_shift_imm<q>")]
3726 )
3727
3728 ;; vshr_n, vrshr_n
3729 (define_insn "neon_v<shift_op><sup>_n<mode>"
3730 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3731 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3732 (match_operand:SI 2 "immediate_operand" "i")]
3733 VSHR_N))]
3734 "TARGET_NEON"
3735 {
3736 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3737 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3738 }
3739 [(set_attr "type" "neon_shift_imm<q>")]
3740 )
3741
3742 ;; vshrn_n, vrshrn_n
3743 (define_insn "neon_v<shift_op>_n<mode>"
3744 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3745 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3746 (match_operand:SI 2 "immediate_operand" "i")]
3747 VSHRN_N))]
3748 "TARGET_NEON"
3749 {
3750 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3751 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3752 }
3753 [(set_attr "type" "neon_shift_imm_narrow_q")]
3754 )
3755
3756 ;; vqshrn_n, vqrshrn_n
3757 (define_insn "neon_v<shift_op><sup>_n<mode>"
3758 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3759 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3760 (match_operand:SI 2 "immediate_operand" "i")]
3761 VQSHRN_N))]
3762 "TARGET_NEON"
3763 {
3764 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3765 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3766 }
3767 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3768 )
3769
3770 ;; vqshrun_n, vqrshrun_n
3771 (define_insn "neon_v<shift_op>_n<mode>"
3772 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3773 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3774 (match_operand:SI 2 "immediate_operand" "i")]
3775 VQSHRUN_N))]
3776 "TARGET_NEON"
3777 {
3778 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3779 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3780 }
3781 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3782 )
3783
3784 (define_insn "neon_vshl_n<mode>"
3785 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3786 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3787 (match_operand:SI 2 "immediate_operand" "i")]
3788 UNSPEC_VSHL_N))]
3789 "TARGET_NEON"
3790 {
3791 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3792 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3793 }
3794 [(set_attr "type" "neon_shift_imm<q>")]
3795 )
3796
3797 (define_insn "neon_vqshl_<sup>_n<mode>"
3798 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3799 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3800 (match_operand:SI 2 "immediate_operand" "i")]
3801 VQSHL_N))]
3802 "TARGET_NEON"
3803 {
3804 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3805 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3806 }
3807 [(set_attr "type" "neon_sat_shift_imm<q>")]
3808 )
3809
3810 (define_insn "neon_vqshlu_n<mode>"
3811 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3812 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3813 (match_operand:SI 2 "immediate_operand" "i")]
3814 UNSPEC_VQSHLU_N))]
3815 "TARGET_NEON"
3816 {
3817 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3818 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3819 }
3820 [(set_attr "type" "neon_sat_shift_imm<q>")]
3821 )
3822
3823 (define_insn "neon_vshll<sup>_n<mode>"
3824 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3825 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3826 (match_operand:SI 2 "immediate_operand" "i")]
3827 VSHLL_N))]
3828 "TARGET_NEON"
3829 {
3830 /* The boundaries are: 0 < imm <= size. */
3831 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3832 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3833 }
3834 [(set_attr "type" "neon_shift_imm_long")]
3835 )
3836
3837 ;; vsra_n, vrsra_n
3838 (define_insn "neon_v<shift_op><sup>_n<mode>"
3839 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3840 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3841 (match_operand:VDQIX 2 "s_register_operand" "w")
3842 (match_operand:SI 3 "immediate_operand" "i")]
3843 VSRA_N))]
3844 "TARGET_NEON"
3845 {
3846 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3847 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3848 }
3849 [(set_attr "type" "neon_shift_acc<q>")]
3850 )
3851
3852 (define_insn "neon_vsri_n<mode>"
3853 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3854 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3855 (match_operand:VDQIX 2 "s_register_operand" "w")
3856 (match_operand:SI 3 "immediate_operand" "i")]
3857 UNSPEC_VSRI))]
3858 "TARGET_NEON"
3859 {
3860 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3861 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3862 }
3863 [(set_attr "type" "neon_shift_reg<q>")]
3864 )
3865
3866 (define_insn "neon_vsli_n<mode>"
3867 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3868 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3869 (match_operand:VDQIX 2 "s_register_operand" "w")
3870 (match_operand:SI 3 "immediate_operand" "i")]
3871 UNSPEC_VSLI))]
3872 "TARGET_NEON"
3873 {
3874 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3875 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3876 }
3877 [(set_attr "type" "neon_shift_reg<q>")]
3878 )
3879
3880 (define_insn "neon_vtbl1v8qi"
3881 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3882 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3883 (match_operand:V8QI 2 "s_register_operand" "w")]
3884 UNSPEC_VTBL))]
3885 "TARGET_NEON"
3886 "vtbl.8\t%P0, {%P1}, %P2"
3887 [(set_attr "type" "neon_tbl1")]
3888 )
3889
3890 (define_insn "neon_vtbl2v8qi"
3891 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3892 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3893 (match_operand:V8QI 2 "s_register_operand" "w")]
3894 UNSPEC_VTBL))]
3895 "TARGET_NEON"
3896 {
3897 rtx ops[4];
3898 int tabbase = REGNO (operands[1]);
3899
3900 ops[0] = operands[0];
3901 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3902 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3903 ops[3] = operands[2];
3904 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3905
3906 return "";
3907 }
3908 [(set_attr "type" "neon_tbl2")]
3909 )
3910
3911 (define_insn "neon_vtbl3v8qi"
3912 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3913 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3914 (match_operand:V8QI 2 "s_register_operand" "w")]
3915 UNSPEC_VTBL))]
3916 "TARGET_NEON"
3917 {
3918 rtx ops[5];
3919 int tabbase = REGNO (operands[1]);
3920
3921 ops[0] = operands[0];
3922 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3923 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3924 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3925 ops[4] = operands[2];
3926 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3927
3928 return "";
3929 }
3930 [(set_attr "type" "neon_tbl3")]
3931 )
3932
3933 (define_insn "neon_vtbl4v8qi"
3934 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3935 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3936 (match_operand:V8QI 2 "s_register_operand" "w")]
3937 UNSPEC_VTBL))]
3938 "TARGET_NEON"
3939 {
3940 rtx ops[6];
3941 int tabbase = REGNO (operands[1]);
3942
3943 ops[0] = operands[0];
3944 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3945 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3946 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3947 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3948 ops[5] = operands[2];
3949 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3950
3951 return "";
3952 }
3953 [(set_attr "type" "neon_tbl4")]
3954 )
3955
3956 ;; These three are used by the vec_perm infrastructure for V16QImode.
3957 (define_insn_and_split "neon_vtbl1v16qi"
3958 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3959 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3960 (match_operand:V16QI 2 "s_register_operand" "w")]
3961 UNSPEC_VTBL))]
3962 "TARGET_NEON"
3963 "#"
3964 "&& reload_completed"
3965 [(const_int 0)]
3966 {
3967 rtx op0, op1, op2, part0, part2;
3968 unsigned ofs;
3969
3970 op0 = operands[0];
3971 op1 = gen_lowpart (TImode, operands[1]);
3972 op2 = operands[2];
3973
3974 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3975 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3976 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3977 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3978
3979 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3980 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3981 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3982 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3983 DONE;
3984 }
3985 [(set_attr "type" "multiple")]
3986 )
3987
3988 (define_insn_and_split "neon_vtbl2v16qi"
3989 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3990 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3991 (match_operand:V16QI 2 "s_register_operand" "w")]
3992 UNSPEC_VTBL))]
3993 "TARGET_NEON"
3994 "#"
3995 "&& reload_completed"
3996 [(const_int 0)]
3997 {
3998 rtx op0, op1, op2, part0, part2;
3999 unsigned ofs;
4000
4001 op0 = operands[0];
4002 op1 = operands[1];
4003 op2 = operands[2];
4004
4005 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4006 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4007 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4008 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4009
4010 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4011 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4012 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4013 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4014 DONE;
4015 }
4016 [(set_attr "type" "multiple")]
4017 )
4018
4019 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4020 ;; handle quad-word input modes, producing octa-word output modes. But
4021 ;; that requires us to add support for octa-word vector modes in moves.
4022 ;; That seems overkill for this one use in vec_perm.
4023 (define_insn_and_split "neon_vcombinev16qi"
4024 [(set (match_operand:OI 0 "s_register_operand" "=w")
4025 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4026 (match_operand:V16QI 2 "s_register_operand" "w")]
4027 UNSPEC_VCONCAT))]
4028 "TARGET_NEON"
4029 "#"
4030 "&& reload_completed"
4031 [(const_int 0)]
4032 {
4033 neon_split_vcombine (operands);
4034 DONE;
4035 }
4036 [(set_attr "type" "multiple")]
4037 )
4038
4039 (define_insn "neon_vtbx1v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:V8QI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4044 UNSPEC_VTBX))]
4045 "TARGET_NEON"
4046 "vtbx.8\t%P0, {%P2}, %P3"
4047 [(set_attr "type" "neon_tbl1")]
4048 )
4049
4050 (define_insn "neon_vtbx2v8qi"
4051 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4052 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4053 (match_operand:TI 2 "s_register_operand" "w")
4054 (match_operand:V8QI 3 "s_register_operand" "w")]
4055 UNSPEC_VTBX))]
4056 "TARGET_NEON"
4057 {
4058 rtx ops[4];
4059 int tabbase = REGNO (operands[2]);
4060
4061 ops[0] = operands[0];
4062 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4063 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4064 ops[3] = operands[3];
4065 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4066
4067 return "";
4068 }
4069 [(set_attr "type" "neon_tbl2")]
4070 )
4071
4072 (define_insn "neon_vtbx3v8qi"
4073 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4074 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4075 (match_operand:EI 2 "s_register_operand" "w")
4076 (match_operand:V8QI 3 "s_register_operand" "w")]
4077 UNSPEC_VTBX))]
4078 "TARGET_NEON"
4079 {
4080 rtx ops[5];
4081 int tabbase = REGNO (operands[2]);
4082
4083 ops[0] = operands[0];
4084 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4085 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4086 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4087 ops[4] = operands[3];
4088 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4089
4090 return "";
4091 }
4092 [(set_attr "type" "neon_tbl3")]
4093 )
4094
4095 (define_insn "neon_vtbx4v8qi"
4096 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4097 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4098 (match_operand:OI 2 "s_register_operand" "w")
4099 (match_operand:V8QI 3 "s_register_operand" "w")]
4100 UNSPEC_VTBX))]
4101 "TARGET_NEON"
4102 {
4103 rtx ops[6];
4104 int tabbase = REGNO (operands[2]);
4105
4106 ops[0] = operands[0];
4107 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4108 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4109 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4110 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4111 ops[5] = operands[3];
4112 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4113
4114 return "";
4115 }
4116 [(set_attr "type" "neon_tbl4")]
4117 )
4118
4119 (define_expand "neon_vtrn<mode>_internal"
4120 [(parallel
4121 [(set (match_operand:VDQW 0 "s_register_operand" "")
4122 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4123 (match_operand:VDQW 2 "s_register_operand" "")]
4124 UNSPEC_VTRN1))
4125 (set (match_operand:VDQW 3 "s_register_operand" "")
4126 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4127 "TARGET_NEON"
4128 ""
4129 )
4130
4131 ;; Note: Different operand numbering to handle tied registers correctly.
4132 (define_insn "*neon_vtrn<mode>_insn"
4133 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4134 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4135 (match_operand:VDQW 3 "s_register_operand" "2")]
4136 UNSPEC_VTRN1))
4137 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4138 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4139 UNSPEC_VTRN2))]
4140 "TARGET_NEON"
4141 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4142 [(set_attr "type" "neon_permute<q>")]
4143 )
4144
4145 (define_expand "neon_vzip<mode>_internal"
4146 [(parallel
4147 [(set (match_operand:VDQW 0 "s_register_operand" "")
4148 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4149 (match_operand:VDQW 2 "s_register_operand" "")]
4150 UNSPEC_VZIP1))
4151 (set (match_operand:VDQW 3 "s_register_operand" "")
4152 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4153 "TARGET_NEON"
4154 ""
4155 )
4156
4157 ;; Note: Different operand numbering to handle tied registers correctly.
4158 (define_insn "*neon_vzip<mode>_insn"
4159 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4160 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4161 (match_operand:VDQW 3 "s_register_operand" "2")]
4162 UNSPEC_VZIP1))
4163 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4164 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4165 UNSPEC_VZIP2))]
4166 "TARGET_NEON"
4167 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4168 [(set_attr "type" "neon_zip<q>")]
4169 )
4170
4171 (define_expand "neon_vuzp<mode>_internal"
4172 [(parallel
4173 [(set (match_operand:VDQW 0 "s_register_operand" "")
4174 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4175 (match_operand:VDQW 2 "s_register_operand" "")]
4176 UNSPEC_VUZP1))
4177 (set (match_operand:VDQW 3 "s_register_operand" "")
4178 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4179 "TARGET_NEON"
4180 ""
4181 )
4182
4183 ;; Note: Different operand numbering to handle tied registers correctly.
4184 (define_insn "*neon_vuzp<mode>_insn"
4185 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4186 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4187 (match_operand:VDQW 3 "s_register_operand" "2")]
4188 UNSPEC_VUZP1))
4189 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4190 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4191 UNSPEC_VUZP2))]
4192 "TARGET_NEON"
4193 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4194 [(set_attr "type" "neon_zip<q>")]
4195 )
4196
4197 (define_expand "neon_vreinterpretv8qi<mode>"
4198 [(match_operand:V8QI 0 "s_register_operand" "")
4199 (match_operand:VD_RE 1 "s_register_operand" "")]
4200 "TARGET_NEON"
4201 {
4202 neon_reinterpret (operands[0], operands[1]);
4203 DONE;
4204 })
4205
4206 (define_expand "neon_vreinterpretv4hi<mode>"
4207 [(match_operand:V4HI 0 "s_register_operand" "")
4208 (match_operand:VD_RE 1 "s_register_operand" "")]
4209 "TARGET_NEON"
4210 {
4211 neon_reinterpret (operands[0], operands[1]);
4212 DONE;
4213 })
4214
4215 (define_expand "neon_vreinterpretv2si<mode>"
4216 [(match_operand:V2SI 0 "s_register_operand" "")
4217 (match_operand:VD_RE 1 "s_register_operand" "")]
4218 "TARGET_NEON"
4219 {
4220 neon_reinterpret (operands[0], operands[1]);
4221 DONE;
4222 })
4223
4224 (define_expand "neon_vreinterpretv2sf<mode>"
4225 [(match_operand:V2SF 0 "s_register_operand" "")
4226 (match_operand:VD_RE 1 "s_register_operand" "")]
4227 "TARGET_NEON"
4228 {
4229 neon_reinterpret (operands[0], operands[1]);
4230 DONE;
4231 })
4232
4233 (define_expand "neon_vreinterpretdi<mode>"
4234 [(match_operand:DI 0 "s_register_operand" "")
4235 (match_operand:VD_RE 1 "s_register_operand" "")]
4236 "TARGET_NEON"
4237 {
4238 neon_reinterpret (operands[0], operands[1]);
4239 DONE;
4240 })
4241
4242 (define_expand "neon_vreinterpretti<mode>"
4243 [(match_operand:TI 0 "s_register_operand" "")
4244 (match_operand:VQXMOV 1 "s_register_operand" "")]
4245 "TARGET_NEON"
4246 {
4247 neon_reinterpret (operands[0], operands[1]);
4248 DONE;
4249 })
4250
4251
4252 (define_expand "neon_vreinterpretv16qi<mode>"
4253 [(match_operand:V16QI 0 "s_register_operand" "")
4254 (match_operand:VQXMOV 1 "s_register_operand" "")]
4255 "TARGET_NEON"
4256 {
4257 neon_reinterpret (operands[0], operands[1]);
4258 DONE;
4259 })
4260
4261 (define_expand "neon_vreinterpretv8hi<mode>"
4262 [(match_operand:V8HI 0 "s_register_operand" "")
4263 (match_operand:VQXMOV 1 "s_register_operand" "")]
4264 "TARGET_NEON"
4265 {
4266 neon_reinterpret (operands[0], operands[1]);
4267 DONE;
4268 })
4269
4270 (define_expand "neon_vreinterpretv4si<mode>"
4271 [(match_operand:V4SI 0 "s_register_operand" "")
4272 (match_operand:VQXMOV 1 "s_register_operand" "")]
4273 "TARGET_NEON"
4274 {
4275 neon_reinterpret (operands[0], operands[1]);
4276 DONE;
4277 })
4278
4279 (define_expand "neon_vreinterpretv4sf<mode>"
4280 [(match_operand:V4SF 0 "s_register_operand" "")
4281 (match_operand:VQXMOV 1 "s_register_operand" "")]
4282 "TARGET_NEON"
4283 {
4284 neon_reinterpret (operands[0], operands[1]);
4285 DONE;
4286 })
4287
4288 (define_expand "neon_vreinterpretv2di<mode>"
4289 [(match_operand:V2DI 0 "s_register_operand" "")
4290 (match_operand:VQXMOV 1 "s_register_operand" "")]
4291 "TARGET_NEON"
4292 {
4293 neon_reinterpret (operands[0], operands[1]);
4294 DONE;
4295 })
4296
4297 (define_expand "vec_load_lanes<mode><mode>"
4298 [(set (match_operand:VDQX 0 "s_register_operand")
4299 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4300 UNSPEC_VLD1))]
4301 "TARGET_NEON")
4302
4303 (define_insn "neon_vld1<mode>"
4304 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4305 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4306 UNSPEC_VLD1))]
4307 "TARGET_NEON"
4308 "vld1.<V_sz_elem>\t%h0, %A1"
4309 [(set_attr "type" "neon_load1_1reg<q>")]
4310 )
4311
4312 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4313 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4314 ;; lane order here.
4315 (define_insn "neon_vld1_lane<mode>"
4316 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4317 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4318 (match_operand:VDX 2 "s_register_operand" "0")
4319 (match_operand:SI 3 "immediate_operand" "i")]
4320 UNSPEC_VLD1_LANE))]
4321 "TARGET_NEON"
4322 {
4323 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4324 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4325 operands[3] = GEN_INT (lane);
4326 if (max == 1)
4327 return "vld1.<V_sz_elem>\t%P0, %A1";
4328 else
4329 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4330 }
4331 [(set_attr "type" "neon_load1_one_lane<q>")]
4332 )
4333
4334 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4335 ;; here on big endian targets.
4336 (define_insn "neon_vld1_lane<mode>"
4337 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4338 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4339 (match_operand:VQX 2 "s_register_operand" "0")
4340 (match_operand:SI 3 "immediate_operand" "i")]
4341 UNSPEC_VLD1_LANE))]
4342 "TARGET_NEON"
4343 {
4344 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4345 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4346 operands[3] = GEN_INT (lane);
4347 int regno = REGNO (operands[0]);
4348 if (lane >= max / 2)
4349 {
4350 lane -= max / 2;
4351 regno += 2;
4352 operands[3] = GEN_INT (lane);
4353 }
4354 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4355 if (max == 2)
4356 return "vld1.<V_sz_elem>\t%P0, %A1";
4357 else
4358 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4359 }
4360 [(set_attr "type" "neon_load1_one_lane<q>")]
4361 )
4362
4363 (define_insn "neon_vld1_dup<mode>"
4364 [(set (match_operand:VD 0 "s_register_operand" "=w")
4365 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4366 "TARGET_NEON"
4367 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4368 [(set_attr "type" "neon_load1_all_lanes<q>")]
4369 )
4370
4371 ;; Special case for DImode. Treat it exactly like a simple load.
4372 (define_expand "neon_vld1_dupdi"
4373 [(set (match_operand:DI 0 "s_register_operand" "")
4374 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4375 UNSPEC_VLD1))]
4376 "TARGET_NEON"
4377 ""
4378 )
4379
4380 (define_insn "neon_vld1_dup<mode>"
4381 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4382 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4383 "TARGET_NEON"
4384 {
4385 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4386 }
4387 [(set_attr "type" "neon_load1_all_lanes<q>")]
4388 )
4389
4390 (define_insn_and_split "neon_vld1_dupv2di"
4391 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4392 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4393 "TARGET_NEON"
4394 "#"
4395 "&& reload_completed"
4396 [(const_int 0)]
4397 {
4398 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4399 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4400 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4401 DONE;
4402 }
4403 [(set_attr "length" "8")
4404 (set_attr "type" "neon_load1_all_lanes_q")]
4405 )
4406
4407 (define_expand "vec_store_lanes<mode><mode>"
4408 [(set (match_operand:VDQX 0 "neon_struct_operand")
4409 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4410 UNSPEC_VST1))]
4411 "TARGET_NEON")
4412
4413 (define_insn "neon_vst1<mode>"
4414 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4415 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4416 UNSPEC_VST1))]
4417 "TARGET_NEON"
4418 "vst1.<V_sz_elem>\t%h1, %A0"
4419 [(set_attr "type" "neon_store1_1reg<q>")])
4420
4421 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4422 ;; here on big endian targets.
4423 (define_insn "neon_vst1_lane<mode>"
4424 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4425 (unspec:<V_elem>
4426 [(match_operand:VDX 1 "s_register_operand" "w")
4427 (match_operand:SI 2 "immediate_operand" "i")]
4428 UNSPEC_VST1_LANE))]
4429 "TARGET_NEON"
4430 {
4431 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4432 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4433 operands[2] = GEN_INT (lane);
4434 if (max == 1)
4435 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4436 else
4437 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4438 }
4439 [(set_attr "type" "neon_store1_one_lane<q>")]
4440 )
4441
4442 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4443 ;; here on big endian targets.
4444 (define_insn "neon_vst1_lane<mode>"
4445 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4446 (unspec:<V_elem>
4447 [(match_operand:VQX 1 "s_register_operand" "w")
4448 (match_operand:SI 2 "immediate_operand" "i")]
4449 UNSPEC_VST1_LANE))]
4450 "TARGET_NEON"
4451 {
4452 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4453 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4454 int regno = REGNO (operands[1]);
4455 if (lane >= max / 2)
4456 {
4457 lane -= max / 2;
4458 regno += 2;
4459 }
4460 operands[2] = GEN_INT (lane);
4461 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4462 if (max == 2)
4463 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4464 else
4465 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4466 }
4467 [(set_attr "type" "neon_store1_one_lane<q>")]
4468 )
4469
4470 (define_expand "vec_load_lanesti<mode>"
4471 [(set (match_operand:TI 0 "s_register_operand")
4472 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4473 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4474 UNSPEC_VLD2))]
4475 "TARGET_NEON")
4476
4477 (define_insn "neon_vld2<mode>"
4478 [(set (match_operand:TI 0 "s_register_operand" "=w")
4479 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4480 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4481 UNSPEC_VLD2))]
4482 "TARGET_NEON"
4483 {
4484 if (<V_sz_elem> == 64)
4485 return "vld1.64\t%h0, %A1";
4486 else
4487 return "vld2.<V_sz_elem>\t%h0, %A1";
4488 }
4489 [(set (attr "type")
4490 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4491 (const_string "neon_load1_2reg<q>")
4492 (const_string "neon_load2_2reg<q>")))]
4493 )
4494
4495 (define_expand "vec_load_lanesoi<mode>"
4496 [(set (match_operand:OI 0 "s_register_operand")
4497 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4498 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4499 UNSPEC_VLD2))]
4500 "TARGET_NEON")
4501
4502 (define_insn "neon_vld2<mode>"
4503 [(set (match_operand:OI 0 "s_register_operand" "=w")
4504 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4505 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4506 UNSPEC_VLD2))]
4507 "TARGET_NEON"
4508 "vld2.<V_sz_elem>\t%h0, %A1"
4509 [(set_attr "type" "neon_load2_2reg_q")])
4510
4511 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4512 ;; here on big endian targets.
4513 (define_insn "neon_vld2_lane<mode>"
4514 [(set (match_operand:TI 0 "s_register_operand" "=w")
4515 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4516 (match_operand:TI 2 "s_register_operand" "0")
4517 (match_operand:SI 3 "immediate_operand" "i")
4518 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4519 UNSPEC_VLD2_LANE))]
4520 "TARGET_NEON"
4521 {
4522 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4523 int regno = REGNO (operands[0]);
4524 rtx ops[4];
4525 ops[0] = gen_rtx_REG (DImode, regno);
4526 ops[1] = gen_rtx_REG (DImode, regno + 2);
4527 ops[2] = operands[1];
4528 ops[3] = GEN_INT (lane);
4529 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4530 return "";
4531 }
4532 [(set_attr "type" "neon_load2_one_lane<q>")]
4533 )
4534
4535 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4536 ;; here on big endian targets.
4537 (define_insn "neon_vld2_lane<mode>"
4538 [(set (match_operand:OI 0 "s_register_operand" "=w")
4539 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4540 (match_operand:OI 2 "s_register_operand" "0")
4541 (match_operand:SI 3 "immediate_operand" "i")
4542 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4543 UNSPEC_VLD2_LANE))]
4544 "TARGET_NEON"
4545 {
4546 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4547 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4548 int regno = REGNO (operands[0]);
4549 rtx ops[4];
4550 if (lane >= max / 2)
4551 {
4552 lane -= max / 2;
4553 regno += 2;
4554 }
4555 ops[0] = gen_rtx_REG (DImode, regno);
4556 ops[1] = gen_rtx_REG (DImode, regno + 4);
4557 ops[2] = operands[1];
4558 ops[3] = GEN_INT (lane);
4559 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4560 return "";
4561 }
4562 [(set_attr "type" "neon_load2_one_lane<q>")]
4563 )
4564
4565 (define_insn "neon_vld2_dup<mode>"
4566 [(set (match_operand:TI 0 "s_register_operand" "=w")
4567 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4568 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 UNSPEC_VLD2_DUP))]
4570 "TARGET_NEON"
4571 {
4572 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4573 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4574 else
4575 return "vld1.<V_sz_elem>\t%h0, %A1";
4576 }
4577 [(set (attr "type")
4578 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4579 (const_string "neon_load2_all_lanes<q>")
4580 (const_string "neon_load1_1reg<q>")))]
4581 )
4582
4583 (define_expand "vec_store_lanesti<mode>"
4584 [(set (match_operand:TI 0 "neon_struct_operand")
4585 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4586 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4587 UNSPEC_VST2))]
4588 "TARGET_NEON")
4589
4590 (define_insn "neon_vst2<mode>"
4591 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4592 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4593 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4594 UNSPEC_VST2))]
4595 "TARGET_NEON"
4596 {
4597 if (<V_sz_elem> == 64)
4598 return "vst1.64\t%h1, %A0";
4599 else
4600 return "vst2.<V_sz_elem>\t%h1, %A0";
4601 }
4602 [(set (attr "type")
4603 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4604 (const_string "neon_store1_2reg<q>")
4605 (const_string "neon_store2_one_lane<q>")))]
4606 )
4607
4608 (define_expand "vec_store_lanesoi<mode>"
4609 [(set (match_operand:OI 0 "neon_struct_operand")
4610 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4611 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4612 UNSPEC_VST2))]
4613 "TARGET_NEON")
4614
4615 (define_insn "neon_vst2<mode>"
4616 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4617 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4618 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4619 UNSPEC_VST2))]
4620 "TARGET_NEON"
4621 "vst2.<V_sz_elem>\t%h1, %A0"
4622 [(set_attr "type" "neon_store2_4reg<q>")]
4623 )
4624
4625 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4626 ;; here on big endian targets.
4627 (define_insn "neon_vst2_lane<mode>"
4628 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4629 (unspec:<V_two_elem>
4630 [(match_operand:TI 1 "s_register_operand" "w")
4631 (match_operand:SI 2 "immediate_operand" "i")
4632 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4633 UNSPEC_VST2_LANE))]
4634 "TARGET_NEON"
4635 {
4636 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4637 int regno = REGNO (operands[1]);
4638 rtx ops[4];
4639 ops[0] = operands[0];
4640 ops[1] = gen_rtx_REG (DImode, regno);
4641 ops[2] = gen_rtx_REG (DImode, regno + 2);
4642 ops[3] = GEN_INT (lane);
4643 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4644 return "";
4645 }
4646 [(set_attr "type" "neon_store2_one_lane<q>")]
4647 )
4648
4649 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4650 ;; here on big endian targets.
4651 (define_insn "neon_vst2_lane<mode>"
4652 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4653 (unspec:<V_two_elem>
4654 [(match_operand:OI 1 "s_register_operand" "w")
4655 (match_operand:SI 2 "immediate_operand" "i")
4656 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4657 UNSPEC_VST2_LANE))]
4658 "TARGET_NEON"
4659 {
4660 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4661 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4662 int regno = REGNO (operands[1]);
4663 rtx ops[4];
4664 if (lane >= max / 2)
4665 {
4666 lane -= max / 2;
4667 regno += 2;
4668 }
4669 ops[0] = operands[0];
4670 ops[1] = gen_rtx_REG (DImode, regno);
4671 ops[2] = gen_rtx_REG (DImode, regno + 4);
4672 ops[3] = GEN_INT (lane);
4673 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4674 return "";
4675 }
4676 [(set_attr "type" "neon_store2_one_lane<q>")]
4677 )
4678
4679 (define_expand "vec_load_lanesei<mode>"
4680 [(set (match_operand:EI 0 "s_register_operand")
4681 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4682 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4683 UNSPEC_VLD3))]
4684 "TARGET_NEON")
4685
4686 (define_insn "neon_vld3<mode>"
4687 [(set (match_operand:EI 0 "s_register_operand" "=w")
4688 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4689 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4690 UNSPEC_VLD3))]
4691 "TARGET_NEON"
4692 {
4693 if (<V_sz_elem> == 64)
4694 return "vld1.64\t%h0, %A1";
4695 else
4696 return "vld3.<V_sz_elem>\t%h0, %A1";
4697 }
4698 [(set (attr "type")
4699 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4700 (const_string "neon_load1_3reg<q>")
4701 (const_string "neon_load3_3reg<q>")))]
4702 )
4703
4704 (define_expand "vec_load_lanesci<mode>"
4705 [(match_operand:CI 0 "s_register_operand")
4706 (match_operand:CI 1 "neon_struct_operand")
4707 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4708 "TARGET_NEON"
4709 {
4710 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4711 DONE;
4712 })
4713
4714 (define_expand "neon_vld3<mode>"
4715 [(match_operand:CI 0 "s_register_operand")
4716 (match_operand:CI 1 "neon_struct_operand")
4717 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4718 "TARGET_NEON"
4719 {
4720 rtx mem;
4721
4722 mem = adjust_address (operands[1], EImode, 0);
4723 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4724 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4725 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4726 DONE;
4727 })
4728
4729 (define_insn "neon_vld3qa<mode>"
4730 [(set (match_operand:CI 0 "s_register_operand" "=w")
4731 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4732 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4733 UNSPEC_VLD3A))]
4734 "TARGET_NEON"
4735 {
4736 int regno = REGNO (operands[0]);
4737 rtx ops[4];
4738 ops[0] = gen_rtx_REG (DImode, regno);
4739 ops[1] = gen_rtx_REG (DImode, regno + 4);
4740 ops[2] = gen_rtx_REG (DImode, regno + 8);
4741 ops[3] = operands[1];
4742 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4743 return "";
4744 }
4745 [(set_attr "type" "neon_load3_3reg<q>")]
4746 )
4747
4748 (define_insn "neon_vld3qb<mode>"
4749 [(set (match_operand:CI 0 "s_register_operand" "=w")
4750 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4751 (match_operand:CI 2 "s_register_operand" "0")
4752 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4753 UNSPEC_VLD3B))]
4754 "TARGET_NEON"
4755 {
4756 int regno = REGNO (operands[0]);
4757 rtx ops[4];
4758 ops[0] = gen_rtx_REG (DImode, regno + 2);
4759 ops[1] = gen_rtx_REG (DImode, regno + 6);
4760 ops[2] = gen_rtx_REG (DImode, regno + 10);
4761 ops[3] = operands[1];
4762 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4763 return "";
4764 }
4765 [(set_attr "type" "neon_load3_3reg<q>")]
4766 )
4767
4768 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4769 ;; here on big endian targets.
4770 (define_insn "neon_vld3_lane<mode>"
4771 [(set (match_operand:EI 0 "s_register_operand" "=w")
4772 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4773 (match_operand:EI 2 "s_register_operand" "0")
4774 (match_operand:SI 3 "immediate_operand" "i")
4775 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4776 UNSPEC_VLD3_LANE))]
4777 "TARGET_NEON"
4778 {
4779 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4780 int regno = REGNO (operands[0]);
4781 rtx ops[5];
4782 ops[0] = gen_rtx_REG (DImode, regno);
4783 ops[1] = gen_rtx_REG (DImode, regno + 2);
4784 ops[2] = gen_rtx_REG (DImode, regno + 4);
4785 ops[3] = operands[1];
4786 ops[4] = GEN_INT (lane);
4787 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4788 ops);
4789 return "";
4790 }
4791 [(set_attr "type" "neon_load3_one_lane<q>")]
4792 )
4793
4794 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4795 ;; here on big endian targets.
4796 (define_insn "neon_vld3_lane<mode>"
4797 [(set (match_operand:CI 0 "s_register_operand" "=w")
4798 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4799 (match_operand:CI 2 "s_register_operand" "0")
4800 (match_operand:SI 3 "immediate_operand" "i")
4801 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4802 UNSPEC_VLD3_LANE))]
4803 "TARGET_NEON"
4804 {
4805 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4806 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4807 int regno = REGNO (operands[0]);
4808 rtx ops[5];
4809 if (lane >= max / 2)
4810 {
4811 lane -= max / 2;
4812 regno += 2;
4813 }
4814 ops[0] = gen_rtx_REG (DImode, regno);
4815 ops[1] = gen_rtx_REG (DImode, regno + 4);
4816 ops[2] = gen_rtx_REG (DImode, regno + 8);
4817 ops[3] = operands[1];
4818 ops[4] = GEN_INT (lane);
4819 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4820 ops);
4821 return "";
4822 }
4823 [(set_attr "type" "neon_load3_one_lane<q>")]
4824 )
4825
4826 (define_insn "neon_vld3_dup<mode>"
4827 [(set (match_operand:EI 0 "s_register_operand" "=w")
4828 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4829 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4830 UNSPEC_VLD3_DUP))]
4831 "TARGET_NEON"
4832 {
4833 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4834 {
4835 int regno = REGNO (operands[0]);
4836 rtx ops[4];
4837 ops[0] = gen_rtx_REG (DImode, regno);
4838 ops[1] = gen_rtx_REG (DImode, regno + 2);
4839 ops[2] = gen_rtx_REG (DImode, regno + 4);
4840 ops[3] = operands[1];
4841 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4842 return "";
4843 }
4844 else
4845 return "vld1.<V_sz_elem>\t%h0, %A1";
4846 }
4847 [(set (attr "type")
4848 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4849 (const_string "neon_load3_all_lanes<q>")
4850 (const_string "neon_load1_1reg<q>")))])
4851
4852 (define_expand "vec_store_lanesei<mode>"
4853 [(set (match_operand:EI 0 "neon_struct_operand")
4854 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4855 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4856 UNSPEC_VST3))]
4857 "TARGET_NEON")
4858
4859 (define_insn "neon_vst3<mode>"
4860 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4861 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4862 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863 UNSPEC_VST3))]
4864 "TARGET_NEON"
4865 {
4866 if (<V_sz_elem> == 64)
4867 return "vst1.64\t%h1, %A0";
4868 else
4869 return "vst3.<V_sz_elem>\t%h1, %A0";
4870 }
4871 [(set (attr "type")
4872 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4873 (const_string "neon_store1_3reg<q>")
4874 (const_string "neon_store3_one_lane<q>")))])
4875
4876 (define_expand "vec_store_lanesci<mode>"
4877 [(match_operand:CI 0 "neon_struct_operand")
4878 (match_operand:CI 1 "s_register_operand")
4879 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4880 "TARGET_NEON"
4881 {
4882 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4883 DONE;
4884 })
4885
4886 (define_expand "neon_vst3<mode>"
4887 [(match_operand:CI 0 "neon_struct_operand")
4888 (match_operand:CI 1 "s_register_operand")
4889 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4890 "TARGET_NEON"
4891 {
4892 rtx mem;
4893
4894 mem = adjust_address (operands[0], EImode, 0);
4895 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4896 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4897 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4898 DONE;
4899 })
4900
4901 (define_insn "neon_vst3qa<mode>"
4902 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4903 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4904 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4905 UNSPEC_VST3A))]
4906 "TARGET_NEON"
4907 {
4908 int regno = REGNO (operands[1]);
4909 rtx ops[4];
4910 ops[0] = operands[0];
4911 ops[1] = gen_rtx_REG (DImode, regno);
4912 ops[2] = gen_rtx_REG (DImode, regno + 4);
4913 ops[3] = gen_rtx_REG (DImode, regno + 8);
4914 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4915 return "";
4916 }
4917 [(set_attr "type" "neon_store3_3reg<q>")]
4918 )
4919
4920 (define_insn "neon_vst3qb<mode>"
4921 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4922 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4923 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4924 UNSPEC_VST3B))]
4925 "TARGET_NEON"
4926 {
4927 int regno = REGNO (operands[1]);
4928 rtx ops[4];
4929 ops[0] = operands[0];
4930 ops[1] = gen_rtx_REG (DImode, regno + 2);
4931 ops[2] = gen_rtx_REG (DImode, regno + 6);
4932 ops[3] = gen_rtx_REG (DImode, regno + 10);
4933 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4934 return "";
4935 }
4936 [(set_attr "type" "neon_store3_3reg<q>")]
4937 )
4938
4939 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4940 ;; here on big endian targets.
4941 (define_insn "neon_vst3_lane<mode>"
4942 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4943 (unspec:<V_three_elem>
4944 [(match_operand:EI 1 "s_register_operand" "w")
4945 (match_operand:SI 2 "immediate_operand" "i")
4946 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4947 UNSPEC_VST3_LANE))]
4948 "TARGET_NEON"
4949 {
4950 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4951 int regno = REGNO (operands[1]);
4952 rtx ops[5];
4953 ops[0] = operands[0];
4954 ops[1] = gen_rtx_REG (DImode, regno);
4955 ops[2] = gen_rtx_REG (DImode, regno + 2);
4956 ops[3] = gen_rtx_REG (DImode, regno + 4);
4957 ops[4] = GEN_INT (lane);
4958 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4959 ops);
4960 return "";
4961 }
4962 [(set_attr "type" "neon_store3_one_lane<q>")]
4963 )
4964
4965 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4966 ;; here on big endian targets.
4967 (define_insn "neon_vst3_lane<mode>"
4968 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4969 (unspec:<V_three_elem>
4970 [(match_operand:CI 1 "s_register_operand" "w")
4971 (match_operand:SI 2 "immediate_operand" "i")
4972 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4973 UNSPEC_VST3_LANE))]
4974 "TARGET_NEON"
4975 {
4976 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4977 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4978 int regno = REGNO (operands[1]);
4979 rtx ops[5];
4980 if (lane >= max / 2)
4981 {
4982 lane -= max / 2;
4983 regno += 2;
4984 }
4985 ops[0] = operands[0];
4986 ops[1] = gen_rtx_REG (DImode, regno);
4987 ops[2] = gen_rtx_REG (DImode, regno + 4);
4988 ops[3] = gen_rtx_REG (DImode, regno + 8);
4989 ops[4] = GEN_INT (lane);
4990 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4991 ops);
4992 return "";
4993 }
4994 [(set_attr "type" "neon_store3_one_lane<q>")]
4995 )
4996
4997 (define_expand "vec_load_lanesoi<mode>"
4998 [(set (match_operand:OI 0 "s_register_operand")
4999 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5000 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5001 UNSPEC_VLD4))]
5002 "TARGET_NEON")
5003
5004 (define_insn "neon_vld4<mode>"
5005 [(set (match_operand:OI 0 "s_register_operand" "=w")
5006 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5007 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5008 UNSPEC_VLD4))]
5009 "TARGET_NEON"
5010 {
5011 if (<V_sz_elem> == 64)
5012 return "vld1.64\t%h0, %A1";
5013 else
5014 return "vld4.<V_sz_elem>\t%h0, %A1";
5015 }
5016 [(set (attr "type")
5017 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5018 (const_string "neon_load1_4reg<q>")
5019 (const_string "neon_load4_4reg<q>")))]
5020 )
5021
5022 (define_expand "vec_load_lanesxi<mode>"
5023 [(match_operand:XI 0 "s_register_operand")
5024 (match_operand:XI 1 "neon_struct_operand")
5025 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5026 "TARGET_NEON"
5027 {
5028 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5029 DONE;
5030 })
5031
5032 (define_expand "neon_vld4<mode>"
5033 [(match_operand:XI 0 "s_register_operand")
5034 (match_operand:XI 1 "neon_struct_operand")
5035 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5036 "TARGET_NEON"
5037 {
5038 rtx mem;
5039
5040 mem = adjust_address (operands[1], OImode, 0);
5041 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5042 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5043 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5044 DONE;
5045 })
5046
5047 (define_insn "neon_vld4qa<mode>"
5048 [(set (match_operand:XI 0 "s_register_operand" "=w")
5049 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5050 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5051 UNSPEC_VLD4A))]
5052 "TARGET_NEON"
5053 {
5054 int regno = REGNO (operands[0]);
5055 rtx ops[5];
5056 ops[0] = gen_rtx_REG (DImode, regno);
5057 ops[1] = gen_rtx_REG (DImode, regno + 4);
5058 ops[2] = gen_rtx_REG (DImode, regno + 8);
5059 ops[3] = gen_rtx_REG (DImode, regno + 12);
5060 ops[4] = operands[1];
5061 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5062 return "";
5063 }
5064 [(set_attr "type" "neon_load4_4reg<q>")]
5065 )
5066
5067 (define_insn "neon_vld4qb<mode>"
5068 [(set (match_operand:XI 0 "s_register_operand" "=w")
5069 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5070 (match_operand:XI 2 "s_register_operand" "0")
5071 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5072 UNSPEC_VLD4B))]
5073 "TARGET_NEON"
5074 {
5075 int regno = REGNO (operands[0]);
5076 rtx ops[5];
5077 ops[0] = gen_rtx_REG (DImode, regno + 2);
5078 ops[1] = gen_rtx_REG (DImode, regno + 6);
5079 ops[2] = gen_rtx_REG (DImode, regno + 10);
5080 ops[3] = gen_rtx_REG (DImode, regno + 14);
5081 ops[4] = operands[1];
5082 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5083 return "";
5084 }
5085 [(set_attr "type" "neon_load4_4reg<q>")]
5086 )
5087
5088 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5089 ;; here on big endian targets.
5090 (define_insn "neon_vld4_lane<mode>"
5091 [(set (match_operand:OI 0 "s_register_operand" "=w")
5092 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5093 (match_operand:OI 2 "s_register_operand" "0")
5094 (match_operand:SI 3 "immediate_operand" "i")
5095 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5096 UNSPEC_VLD4_LANE))]
5097 "TARGET_NEON"
5098 {
5099 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5100 int regno = REGNO (operands[0]);
5101 rtx ops[6];
5102 ops[0] = gen_rtx_REG (DImode, regno);
5103 ops[1] = gen_rtx_REG (DImode, regno + 2);
5104 ops[2] = gen_rtx_REG (DImode, regno + 4);
5105 ops[3] = gen_rtx_REG (DImode, regno + 6);
5106 ops[4] = operands[1];
5107 ops[5] = GEN_INT (lane);
5108 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5109 ops);
5110 return "";
5111 }
5112 [(set_attr "type" "neon_load4_one_lane<q>")]
5113 )
5114
5115 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5116 ;; here on big endian targets.
5117 (define_insn "neon_vld4_lane<mode>"
5118 [(set (match_operand:XI 0 "s_register_operand" "=w")
5119 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5120 (match_operand:XI 2 "s_register_operand" "0")
5121 (match_operand:SI 3 "immediate_operand" "i")
5122 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5123 UNSPEC_VLD4_LANE))]
5124 "TARGET_NEON"
5125 {
5126 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5127 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5128 int regno = REGNO (operands[0]);
5129 rtx ops[6];
5130 if (lane >= max / 2)
5131 {
5132 lane -= max / 2;
5133 regno += 2;
5134 }
5135 ops[0] = gen_rtx_REG (DImode, regno);
5136 ops[1] = gen_rtx_REG (DImode, regno + 4);
5137 ops[2] = gen_rtx_REG (DImode, regno + 8);
5138 ops[3] = gen_rtx_REG (DImode, regno + 12);
5139 ops[4] = operands[1];
5140 ops[5] = GEN_INT (lane);
5141 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5142 ops);
5143 return "";
5144 }
5145 [(set_attr "type" "neon_load4_one_lane<q>")]
5146 )
5147
5148 (define_insn "neon_vld4_dup<mode>"
5149 [(set (match_operand:OI 0 "s_register_operand" "=w")
5150 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5151 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5152 UNSPEC_VLD4_DUP))]
5153 "TARGET_NEON"
5154 {
5155 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5156 {
5157 int regno = REGNO (operands[0]);
5158 rtx ops[5];
5159 ops[0] = gen_rtx_REG (DImode, regno);
5160 ops[1] = gen_rtx_REG (DImode, regno + 2);
5161 ops[2] = gen_rtx_REG (DImode, regno + 4);
5162 ops[3] = gen_rtx_REG (DImode, regno + 6);
5163 ops[4] = operands[1];
5164 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5165 ops);
5166 return "";
5167 }
5168 else
5169 return "vld1.<V_sz_elem>\t%h0, %A1";
5170 }
5171 [(set (attr "type")
5172 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5173 (const_string "neon_load4_all_lanes<q>")
5174 (const_string "neon_load1_1reg<q>")))]
5175 )
5176
5177 (define_expand "vec_store_lanesoi<mode>"
5178 [(set (match_operand:OI 0 "neon_struct_operand")
5179 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5180 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5181 UNSPEC_VST4))]
5182 "TARGET_NEON")
5183
5184 (define_insn "neon_vst4<mode>"
5185 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5186 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5187 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5188 UNSPEC_VST4))]
5189 "TARGET_NEON"
5190 {
5191 if (<V_sz_elem> == 64)
5192 return "vst1.64\t%h1, %A0";
5193 else
5194 return "vst4.<V_sz_elem>\t%h1, %A0";
5195 }
5196 [(set (attr "type")
5197 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5198 (const_string "neon_store1_4reg<q>")
5199 (const_string "neon_store4_4reg<q>")))]
5200 )
5201
5202 (define_expand "vec_store_lanesxi<mode>"
5203 [(match_operand:XI 0 "neon_struct_operand")
5204 (match_operand:XI 1 "s_register_operand")
5205 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5206 "TARGET_NEON"
5207 {
5208 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5209 DONE;
5210 })
5211
5212 (define_expand "neon_vst4<mode>"
5213 [(match_operand:XI 0 "neon_struct_operand")
5214 (match_operand:XI 1 "s_register_operand")
5215 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5216 "TARGET_NEON"
5217 {
5218 rtx mem;
5219
5220 mem = adjust_address (operands[0], OImode, 0);
5221 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5222 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5223 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5224 DONE;
5225 })
5226
5227 (define_insn "neon_vst4qa<mode>"
5228 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5229 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5230 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5231 UNSPEC_VST4A))]
5232 "TARGET_NEON"
5233 {
5234 int regno = REGNO (operands[1]);
5235 rtx ops[5];
5236 ops[0] = operands[0];
5237 ops[1] = gen_rtx_REG (DImode, regno);
5238 ops[2] = gen_rtx_REG (DImode, regno + 4);
5239 ops[3] = gen_rtx_REG (DImode, regno + 8);
5240 ops[4] = gen_rtx_REG (DImode, regno + 12);
5241 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5242 return "";
5243 }
5244 [(set_attr "type" "neon_store4_4reg<q>")]
5245 )
5246
5247 (define_insn "neon_vst4qb<mode>"
5248 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5249 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5250 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5251 UNSPEC_VST4B))]
5252 "TARGET_NEON"
5253 {
5254 int regno = REGNO (operands[1]);
5255 rtx ops[5];
5256 ops[0] = operands[0];
5257 ops[1] = gen_rtx_REG (DImode, regno + 2);
5258 ops[2] = gen_rtx_REG (DImode, regno + 6);
5259 ops[3] = gen_rtx_REG (DImode, regno + 10);
5260 ops[4] = gen_rtx_REG (DImode, regno + 14);
5261 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5262 return "";
5263 }
5264 [(set_attr "type" "neon_store4_4reg<q>")]
5265 )
5266
5267 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5268 ;; here on big endian targets.
5269 (define_insn "neon_vst4_lane<mode>"
5270 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5271 (unspec:<V_four_elem>
5272 [(match_operand:OI 1 "s_register_operand" "w")
5273 (match_operand:SI 2 "immediate_operand" "i")
5274 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5275 UNSPEC_VST4_LANE))]
5276 "TARGET_NEON"
5277 {
5278 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5279 int regno = REGNO (operands[1]);
5280 rtx ops[6];
5281 ops[0] = operands[0];
5282 ops[1] = gen_rtx_REG (DImode, regno);
5283 ops[2] = gen_rtx_REG (DImode, regno + 2);
5284 ops[3] = gen_rtx_REG (DImode, regno + 4);
5285 ops[4] = gen_rtx_REG (DImode, regno + 6);
5286 ops[5] = GEN_INT (lane);
5287 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5288 ops);
5289 return "";
5290 }
5291 [(set_attr "type" "neon_store4_one_lane<q>")]
5292 )
5293
5294 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5295 ;; here on big endian targets.
5296 (define_insn "neon_vst4_lane<mode>"
5297 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5298 (unspec:<V_four_elem>
5299 [(match_operand:XI 1 "s_register_operand" "w")
5300 (match_operand:SI 2 "immediate_operand" "i")
5301 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5302 UNSPEC_VST4_LANE))]
5303 "TARGET_NEON"
5304 {
5305 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5306 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5307 int regno = REGNO (operands[1]);
5308 rtx ops[6];
5309 if (lane >= max / 2)
5310 {
5311 lane -= max / 2;
5312 regno += 2;
5313 }
5314 ops[0] = operands[0];
5315 ops[1] = gen_rtx_REG (DImode, regno);
5316 ops[2] = gen_rtx_REG (DImode, regno + 4);
5317 ops[3] = gen_rtx_REG (DImode, regno + 8);
5318 ops[4] = gen_rtx_REG (DImode, regno + 12);
5319 ops[5] = GEN_INT (lane);
5320 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5321 ops);
5322 return "";
5323 }
5324 [(set_attr "type" "neon_store4_4reg<q>")]
5325 )
5326
5327 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5328 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5329 (SE:<V_unpack> (vec_select:<V_HALF>
5330 (match_operand:VU 1 "register_operand" "w")
5331 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5332 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5333 "vmovl.<US><V_sz_elem> %q0, %e1"
5334 [(set_attr "type" "neon_shift_imm_long")]
5335 )
5336
5337 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5338 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5339 (SE:<V_unpack> (vec_select:<V_HALF>
5340 (match_operand:VU 1 "register_operand" "w")
5341 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5342 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5343 "vmovl.<US><V_sz_elem> %q0, %f1"
5344 [(set_attr "type" "neon_shift_imm_long")]
5345 )
5346
5347 (define_expand "vec_unpack<US>_hi_<mode>"
5348 [(match_operand:<V_unpack> 0 "register_operand" "")
5349 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5350 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5351 {
5352 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5353 rtx t1;
5354 int i;
5355 for (i = 0; i < (<V_mode_nunits>/2); i++)
5356 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5357
5358 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5359 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5360 operands[1],
5361 t1));
5362 DONE;
5363 }
5364 )
5365
5366 (define_expand "vec_unpack<US>_lo_<mode>"
5367 [(match_operand:<V_unpack> 0 "register_operand" "")
5368 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5369 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5370 {
5371 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5372 rtx t1;
5373 int i;
5374 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5375 RTVEC_ELT (v, i) = GEN_INT (i);
5376 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5377 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5378 operands[1],
5379 t1));
5380 DONE;
5381 }
5382 )
5383
5384 (define_insn "neon_vec_<US>mult_lo_<mode>"
5385 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5386 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5387 (match_operand:VU 1 "register_operand" "w")
5388 (match_operand:VU 2 "vect_par_constant_low" "")))
5389 (SE:<V_unpack> (vec_select:<V_HALF>
5390 (match_operand:VU 3 "register_operand" "w")
5391 (match_dup 2)))))]
5392 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5393 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5394 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5395 )
5396
5397 (define_expand "vec_widen_<US>mult_lo_<mode>"
5398 [(match_operand:<V_unpack> 0 "register_operand" "")
5399 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5400 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5401 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5402 {
5403 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5404 rtx t1;
5405 int i;
5406 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5407 RTVEC_ELT (v, i) = GEN_INT (i);
5408 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5409
5410 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5411 operands[1],
5412 t1,
5413 operands[2]));
5414 DONE;
5415 }
5416 )
5417
5418 (define_insn "neon_vec_<US>mult_hi_<mode>"
5419 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5420 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5421 (match_operand:VU 1 "register_operand" "w")
5422 (match_operand:VU 2 "vect_par_constant_high" "")))
5423 (SE:<V_unpack> (vec_select:<V_HALF>
5424 (match_operand:VU 3 "register_operand" "w")
5425 (match_dup 2)))))]
5426 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5427 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5428 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5429 )
5430
5431 (define_expand "vec_widen_<US>mult_hi_<mode>"
5432 [(match_operand:<V_unpack> 0 "register_operand" "")
5433 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5434 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5435 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5436 {
5437 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5438 rtx t1;
5439 int i;
5440 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5441 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5442 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5443
5444 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5445 operands[1],
5446 t1,
5447 operands[2]));
5448 DONE;
5449
5450 }
5451 )
5452
5453 (define_insn "neon_vec_<US>shiftl_<mode>"
5454 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5455 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5456 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5457 "TARGET_NEON"
5458 {
5459 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5460 }
5461 [(set_attr "type" "neon_shift_imm_long")]
5462 )
5463
5464 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5465 [(match_operand:<V_unpack> 0 "register_operand" "")
5466 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5467 (match_operand:SI 2 "immediate_operand" "i")]
5468 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5469 {
5470 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5471 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5472 operands[2]));
5473 DONE;
5474 }
5475 )
5476
5477 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5478 [(match_operand:<V_unpack> 0 "register_operand" "")
5479 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5480 (match_operand:SI 2 "immediate_operand" "i")]
5481 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5482 {
5483 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5484 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5485 GET_MODE_SIZE (<V_HALF>mode)),
5486 operands[2]));
5487 DONE;
5488 }
5489 )
5490
5491 ;; Vectorize for non-neon-quad case
5492 (define_insn "neon_unpack<US>_<mode>"
5493 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5494 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5495 "TARGET_NEON"
5496 "vmovl.<US><V_sz_elem> %q0, %P1"
5497 [(set_attr "type" "neon_move")]
5498 )
5499
5500 (define_expand "vec_unpack<US>_lo_<mode>"
5501 [(match_operand:<V_double_width> 0 "register_operand" "")
5502 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5503 "TARGET_NEON"
5504 {
5505 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5506 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5507 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5508
5509 DONE;
5510 }
5511 )
5512
5513 (define_expand "vec_unpack<US>_hi_<mode>"
5514 [(match_operand:<V_double_width> 0 "register_operand" "")
5515 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5516 "TARGET_NEON"
5517 {
5518 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5519 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5520 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5521
5522 DONE;
5523 }
5524 )
5525
5526 (define_insn "neon_vec_<US>mult_<mode>"
5527 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5528 (mult:<V_widen> (SE:<V_widen>
5529 (match_operand:VDI 1 "register_operand" "w"))
5530 (SE:<V_widen>
5531 (match_operand:VDI 2 "register_operand" "w"))))]
5532 "TARGET_NEON"
5533 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5534 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5535 )
5536
5537 (define_expand "vec_widen_<US>mult_hi_<mode>"
5538 [(match_operand:<V_double_width> 0 "register_operand" "")
5539 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5540 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5541 "TARGET_NEON"
5542 {
5543 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5544 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5545 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5546
5547 DONE;
5548
5549 }
5550 )
5551
5552 (define_expand "vec_widen_<US>mult_lo_<mode>"
5553 [(match_operand:<V_double_width> 0 "register_operand" "")
5554 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5555 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5556 "TARGET_NEON"
5557 {
5558 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5559 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5560 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5561
5562 DONE;
5563
5564 }
5565 )
5566
5567 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5568 [(match_operand:<V_double_width> 0 "register_operand" "")
5569 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5570 (match_operand:SI 2 "immediate_operand" "i")]
5571 "TARGET_NEON"
5572 {
5573 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5574 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5575 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5576
5577 DONE;
5578 }
5579 )
5580
5581 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5582 [(match_operand:<V_double_width> 0 "register_operand" "")
5583 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5584 (match_operand:SI 2 "immediate_operand" "i")]
5585 "TARGET_NEON"
5586 {
5587 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5588 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5589 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5590
5591 DONE;
5592 }
5593 )
5594
5595 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5596 ; because the ordering of vector elements in Q registers is different from what
5597 ; the semantics of the instructions require.
5598
5599 (define_insn "vec_pack_trunc_<mode>"
5600 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5601 (vec_concat:<V_narrow_pack>
5602 (truncate:<V_narrow>
5603 (match_operand:VN 1 "register_operand" "w"))
5604 (truncate:<V_narrow>
5605 (match_operand:VN 2 "register_operand" "w"))))]
5606 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5607 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5608 [(set_attr "type" "multiple")
5609 (set_attr "length" "8")]
5610 )
5611
5612 ;; For the non-quad case.
5613 (define_insn "neon_vec_pack_trunc_<mode>"
5614 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5615 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5616 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5617 "vmovn.i<V_sz_elem>\t%P0, %q1"
5618 [(set_attr "type" "neon_move_narrow_q")]
5619 )
5620
5621 (define_expand "vec_pack_trunc_<mode>"
5622 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5623 (match_operand:VSHFT 1 "register_operand" "")
5624 (match_operand:VSHFT 2 "register_operand")]
5625 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5626 {
5627 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5628
5629 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5630 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5631 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5632 DONE;
5633 })
5634
5635 (define_insn "neon_vabd<mode>_2"
5636 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5637 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5638 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5639 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5640 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5641 [(set (attr "type")
5642 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5643 (const_string "neon_fp_abd_s<q>")
5644 (const_string "neon_abd<q>")))]
5645 )
5646
5647 (define_insn "neon_vabd<mode>_3"
5648 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5649 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5650 (match_operand:VDQ 2 "s_register_operand" "w")]
5651 UNSPEC_VSUB)))]
5652 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5653 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5654 [(set (attr "type")
5655 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5656 (const_string "neon_fp_abd_s<q>")
5657 (const_string "neon_abd<q>")))]
5658 )
5659
5660 ;; Copy from core-to-neon regs, then extend, not vice-versa
5661
5662 (define_split
5663 [(set (match_operand:DI 0 "s_register_operand" "")
5664 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5665 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5666 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5667 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5668 {
5669 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5670 })
5671
5672 (define_split
5673 [(set (match_operand:DI 0 "s_register_operand" "")
5674 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5675 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5676 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5677 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5678 {
5679 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5680 })
5681
5682 (define_split
5683 [(set (match_operand:DI 0 "s_register_operand" "")
5684 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5685 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5686 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5687 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5688 {
5689 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5690 })
5691
5692 (define_split
5693 [(set (match_operand:DI 0 "s_register_operand" "")
5694 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5695 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5696 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5697 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5698 {
5699 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5700 })
5701
5702 (define_split
5703 [(set (match_operand:DI 0 "s_register_operand" "")
5704 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5705 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5706 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5707 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5708 {
5709 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5710 })
5711
5712 (define_split
5713 [(set (match_operand:DI 0 "s_register_operand" "")
5714 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5715 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5716 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5717 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5718 {
5719 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5720 })