[ARM] Add patterns for new instructions
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
143 "TARGET_NEON
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
146 {
147 switch (which_alternative)
148 {
149 case 0: return "#";
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
156
157 (define_split
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
172
173 neon_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[3], src[3];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
213
214 neon_disambiguate_copy (operands, dest, src, 3);
215 })
216
217 (define_split
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
225 {
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
228 rtx dest[4], src[4];
229
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
238
239 neon_disambiguate_copy (operands, dest, src, 4);
240 })
241
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
247 {
248 rtx adjust_mem;
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
255
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
258 else
259 adjust_mem = operands[0];
260
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
264
265 })
266
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
274
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
278 " Um")]
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
283
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
291
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
295 " Um")]
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
300
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
303 (vec_merge:VD
304 (vec_duplicate:VD
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
308 "TARGET_NEON"
309 {
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
314
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
317 else
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
319 }
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
324 (vec_merge:VQ2
325 (vec_duplicate:VQ2
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
336
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
339
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
342
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
345 else
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
347 }
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
349 )
350
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
353 (vec_merge:V2DI
354 (vec_duplicate:V2DI
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
358 "TARGET_NEON"
359 {
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
362
363 operands[0] = gen_rtx_REG (DImode, regno);
364
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
367 else
368 return "vmov\t%P0, %Q1, %R1";
369 }
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
371 )
372
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
382 DONE;
383 })
384
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
387 (vec_select:<V_elem>
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
390 "TARGET_NEON"
391 {
392 if (BYTES_BIG_ENDIAN)
393 {
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
397 }
398
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
401 else
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
403 }
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
405 )
406
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (vec_select:<V_elem>
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 "TARGET_NEON"
413 {
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
418
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
421
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
424
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
427 else
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
429 }
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
431 )
432
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
435 (vec_select:DI
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
438 "TARGET_NEON"
439 {
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
441
442 operands[1] = gen_rtx_REG (DImode, regno);
443
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
446 else
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
448 }
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
450 )
451
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
455 "TARGET_NEON"
456 {
457 neon_expand_vector_init (operands[0], operands[1]);
458 DONE;
459 })
460
461 ;; Doubleword and quadword arithmetic.
462
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
465
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472 [(set (attr "type")
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
476 )
477
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
483 "TARGET_NEON"
484 {
485 switch (which_alternative)
486 {
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
489 case 1: return "#";
490 case 2: return "#";
491 case 4: return "#";
492 case 5: return "#";
493 case 6: return "#";
494 default: gcc_unreachable ();
495 }
496 }
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
502 )
503
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
514 )
515
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
521 "TARGET_NEON"
522 {
523 switch (which_alternative)
524 {
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
531 }
532 }
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
537 )
538
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
545 [(set (attr "type")
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
549 )
550
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
558 [(set (attr "type")
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
562 )
563
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
575 )
576
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
590 )
591
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
600 )
601
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
610 )
611
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
626 NEON_VRINT))]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
630 )
631
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
636 NEON_VCVT)))]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
641 )
642
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
647 "TARGET_NEON"
648 {
649 switch (which_alternative)
650 {
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
655 }
656 }
657 [(set_attr "type" "neon_logic<q>")]
658 )
659
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
664
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
669 "TARGET_NEON"
670 {
671 switch (which_alternative)
672 {
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
677 }
678 }
679 [(set_attr "type" "neon_logic<q>")]
680 )
681
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
686 "TARGET_NEON"
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
689 )
690
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
693 ;; changes above.
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
698 "TARGET_NEON"
699 "@
700 vorn\t%P0, %P1, %P2
701 #
702 #
703 #"
704 "reload_completed &&
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
708 "
709 {
710 if (TARGET_THUMB2)
711 {
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
718 }
719 else
720 {
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
723 DONE;
724 }
725 }"
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
729 )
730
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
735 "TARGET_NEON"
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
745 "TARGET_NEON"
746 "@
747 vbic\t%P0, %P1, %P2
748 #
749 #"
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
752 )
753
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
758 "TARGET_NEON"
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
761 )
762
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "TARGET_NEON"
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
769 )
770
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
774 "TARGET_NEON"
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
776 [(set (attr "type")
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
780 )
781
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
785 "TARGET_NEON"
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
787 [(set (attr "type")
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
791 )
792
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
798 "TARGET_NEON"
799 "#"
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
802 )
803
804 ; Split negdi2_neon for vfp registers
805 (define_split
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
814 {
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
817 }
818 )
819
820 ; Split negdi2_neon for core registers
821 (define_split
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
830 ""
831 )
832
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
837 "TARGET_NEON"
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
840 )
841
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
846 "TARGET_NEON"
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
849 )
850
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
855 "TARGET_NEON"
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
857 [(set (attr "type")
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
861 )
862
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set (attr "type")
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
873 )
874
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
877 ; SImode elements.
878
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
883 "TARGET_NEON"
884 {
885 switch (which_alternative)
886 {
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
889 <MODE>mode,
890 VALID_NEON_QREG_MODE (<MODE>mode),
891 true);
892 default: gcc_unreachable ();
893 }
894 }
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
896 )
897
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
902 "TARGET_NEON"
903 {
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
906 false);
907 }
908 [(set_attr "type" "neon_shift_imm<q>")]
909 )
910
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
915 "TARGET_NEON"
916 {
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
919 false);
920 }
921 [(set_attr "type" "neon_shift_imm<q>")]
922 )
923
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
927 ; shift amounts.
928
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
934 "TARGET_NEON"
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
937 )
938
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
941
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
947 "TARGET_NEON"
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
950 )
951
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
956 "TARGET_NEON"
957 {
958 if (s_register_operand (operands[2], <MODE>mode))
959 {
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
963 }
964 else
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
966 DONE;
967 })
968
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
973 "TARGET_NEON"
974 {
975 if (s_register_operand (operands[2], <MODE>mode))
976 {
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
980 }
981 else
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
983 DONE;
984 })
985
986 ;; 64-bit shifts
987
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
992 ;; using an unspec.
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
996 UNSPEC_LOAD_COUNT))]
997 "TARGET_NEON"
998 "@
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1002 )
1003
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1011 "@
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1015 )
1016
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1025 "TARGET_NEON"
1026 "#"
1027 "TARGET_NEON && reload_completed"
1028 [(const_int 0)]
1029 "
1030 {
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1032 {
1033 if (CONST_INT_P (operands[2]))
1034 {
1035 if (INTVAL (operands[2]) < 1)
1036 {
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1038 DONE;
1039 }
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1042 }
1043 else
1044 {
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1047 }
1048
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1051 operands[2]));
1052 }
1053 else
1054 {
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1060 else
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1063 }
1064 DONE;
1065 }"
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1069 )
1070
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1080 )
1081
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1091 )
1092
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1101 )
1102
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1111 )
1112
1113 ;; ashrdi3_neon
1114 ;; lshrdi3_neon
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1123 "TARGET_NEON"
1124 "#"
1125 "TARGET_NEON && reload_completed"
1126 [(const_int 0)]
1127 "
1128 {
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1130 {
1131 if (CONST_INT_P (operands[2]))
1132 {
1133 if (INTVAL (operands[2]) < 1)
1134 {
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1136 DONE;
1137 }
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1140
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1143 operands[1],
1144 operands[2]));
1145 }
1146 else
1147 {
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1152 operands[5]));
1153 }
1154 }
1155 else
1156 {
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1162 else
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1166 }
1167
1168 DONE;
1169 }"
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1173 )
1174
1175 ;; Widening operations
1176
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1182 "TARGET_NEON"
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1185 )
1186
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1192 "TARGET_NEON"
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1195 )
1196
1197 ;; Helpers for quad-word reduction operations
1198
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1202
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1205 (VQH_OPS:V2SI
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1210 "TARGET_NEON"
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1214 )
1215
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1218 (VQHS_OPS:V2SF
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1227 )
1228
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1231 (VQH_OPS:V4HI
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1238 "TARGET_NEON"
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1242 )
1243
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1246 (VQH_OPS:V8QI
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1257 "TARGET_NEON"
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1261 )
1262
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1266 "TARGET_NEON"
1267 {
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1270 operands[1]);
1271 DONE;
1272 })
1273
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1277 "TARGET_NEON"
1278 {
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1280 <MODE>mode, 0),
1281 operands[1]);
1282 DONE;
1283 })
1284
1285 ;; Reduction operations
1286
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1291 {
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1297 DONE;
1298 })
1299
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1305 {
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1307
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1310
1311 DONE;
1312 })
1313
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1318 {
1319 rtx vec = gen_reg_rtx (V2DImode);
1320
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1323
1324 DONE;
1325 })
1326
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1330 UNSPEC_VPADD))]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1334 )
1335
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1340 {
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1342
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1347 DONE;
1348 })
1349
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1355 {
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1357
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1360
1361 DONE;
1362 })
1363
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1368 {
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1374 DONE;
1375 })
1376
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1382 {
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1384
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1387
1388 DONE;
1389 })
1390
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1394 "TARGET_NEON"
1395 {
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1401 DONE;
1402 })
1403
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1408 {
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1410
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1413
1414 DONE;
1415 })
1416
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1420 "TARGET_NEON"
1421 {
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1427 DONE;
1428 })
1429
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1434 {
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1436
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1439
1440 DONE;
1441 })
1442
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1447 UNSPEC_VPADD))]
1448 "TARGET_NEON"
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1451 [(set (attr "type")
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1455 )
1456
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1461 UNSPEC_VPSMIN))]
1462 "TARGET_NEON"
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1464 [(set (attr "type")
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1468 )
1469
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1474 UNSPEC_VPSMAX))]
1475 "TARGET_NEON"
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1477 [(set (attr "type")
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1481 )
1482
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1487 UNSPEC_VPUMIN))]
1488 "TARGET_NEON"
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1491 )
1492
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1497 UNSPEC_VPUMAX))]
1498 "TARGET_NEON"
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1501 )
1502
1503 ;; Saturating arithmetic
1504
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1509 ; added.
1510
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1515 "TARGET_NEON"
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1518 )
1519
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1524 "TARGET_NEON"
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1527 )
1528
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1533 "TARGET_NEON"
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1536 )
1537
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1542 "TARGET_NEON"
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1545 )
1546
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1549 ;;
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1551 ;;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1553 ;; element-wise.
1554
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1557 (if_then_else:VDQW
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1564 {
1565 int inverse = 0;
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1570
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1573
1574 switch (GET_CODE (operands[3]))
1575 {
1576 case GE:
1577 case GT:
1578 case LE:
1579 case LT:
1580 case EQ:
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1582 {
1583 use_zero_form = 1;
1584 break;
1585 }
1586 /* Fall through. */
1587 default:
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1590 }
1591
1592 switch (GET_CODE (operands[3]))
1593 {
1594 case LT:
1595 case UNLT:
1596 inverse = 1;
1597 /* Fall through. */
1598 case GE:
1599 case UNGE:
1600 case ORDERED:
1601 case UNORDERED:
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1604 break;
1605 case LE:
1606 case UNLE:
1607 inverse = 1;
1608 /* Fall through. */
1609 case GT:
1610 case UNGT:
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1613 break;
1614 case EQ:
1615 case NE:
1616 case UNEQ:
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1619 break;
1620 default:
1621 gcc_unreachable ();
1622 }
1623
1624 switch (GET_CODE (operands[3]))
1625 {
1626 case LT:
1627 case LE:
1628 case GT:
1629 case GE:
1630 case EQ:
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1633 a GE b -> a GE b
1634 a GT b -> a GT b
1635 a LE b -> b GE a
1636 a LT b -> b GT a
1637 a EQ b -> a EQ b
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1640 if (use_zero_form)
1641 {
1642 inverse = 0;
1643 switch (GET_CODE (operands[3]))
1644 {
1645 case LT:
1646 base_comparison = gen_neon_vclt<mode>;
1647 break;
1648 case LE:
1649 base_comparison = gen_neon_vcle<mode>;
1650 break;
1651 default:
1652 /* Do nothing, other zero form cases already have the correct
1653 base_comparison. */
1654 break;
1655 }
1656 }
1657
1658 if (!inverse)
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1660 else
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1662 break;
1663 case UNLT:
1664 case UNLE:
1665 case UNGT:
1666 case UNGE:
1667 case NE:
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1672
1673 Our transformations are:
1674 a GE b -> !(b GT a)
1675 a GT b -> !(b GE a)
1676 a LE b -> !(a GT b)
1677 a LT b -> !(a GE b)
1678 a NE b -> !(a EQ b) */
1679
1680 if (inverse)
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1682 else
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1684
1685 swap_bsl_operands = 1;
1686 break;
1687 case UNEQ:
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1691
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1696 break;
1697 case UNORDERED:
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1701 /* Fall through. */
1702 case ORDERED:
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1706 break;
1707 default:
1708 gcc_unreachable ();
1709 }
1710
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1713 operands[1]));
1714 else
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1716 operands[2]));
1717 DONE;
1718 })
1719
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1722 (if_then_else:VDQIW
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1728 "TARGET_NEON"
1729 {
1730 rtx mask;
1731 int inverse = 0, immediate_zero = 0;
1732
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1734
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1736 immediate_zero = 1;
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1739
1740 switch (GET_CODE (operands[3]))
1741 {
1742 case GEU:
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1744 break;
1745
1746 case GTU:
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1748 break;
1749
1750 case EQ:
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1752 break;
1753
1754 case LEU:
1755 if (immediate_zero)
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1757 else
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1759 break;
1760
1761 case LTU:
1762 if (immediate_zero)
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1764 else
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1766 break;
1767
1768 case NE:
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1770 inverse = 1;
1771 break;
1772
1773 default:
1774 gcc_unreachable ();
1775 }
1776
1777 if (inverse)
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1779 operands[1]));
1780 else
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1782 operands[2]));
1783
1784 DONE;
1785 })
1786
1787 ;; Patterns for builtins.
1788
1789 ; good for plain vadd, vaddq.
1790
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1795 "TARGET_NEON"
1796 {
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1799 else
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1801 operands[2]));
1802 DONE;
1803 })
1804
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1814
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1816
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1821 UNSPEC_VADD))]
1822 "TARGET_NEON"
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1824 [(set (attr "type")
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1828 )
1829
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1834 VADDL))]
1835 "TARGET_NEON"
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1838 )
1839
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1844 VADDW))]
1845 "TARGET_NEON"
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1848 )
1849
1850 ; vhadd and vrhadd.
1851
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1856 VHADD))]
1857 "TARGET_NEON"
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1860 )
1861
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1866 VQADD))]
1867 "TARGET_NEON"
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1870 )
1871
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1876 VADDHN))]
1877 "TARGET_NEON"
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1880 )
1881
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1887 UNSPEC_VMUL))]
1888 "TARGET_NEON"
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1890 [(set (attr "type")
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1894 )
1895
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1901 "TARGET_NEON"
1902 {
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1906 else
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1909 DONE;
1910 })
1911
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1918 {
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1920 operands[1]));
1921 DONE;
1922 })
1923
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1930 {
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1932 operands[1]));
1933 DONE;
1934 })
1935
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1937
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1943 UNSPEC_VMLA))]
1944 "TARGET_NEON"
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1946 [(set (attr "type")
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1950 )
1951
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1957 VMLAL))]
1958 "TARGET_NEON"
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1961 )
1962
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1968 "TARGET_NEON"
1969 {
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1973 else
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1976 DONE;
1977 })
1978
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1980
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1986 UNSPEC_VMLS))]
1987 "TARGET_NEON"
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1989 [(set (attr "type")
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1993 )
1994
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2000 VMLSL))]
2001 "TARGET_NEON"
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2004 )
2005
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2011 VQDMULH))]
2012 "TARGET_NEON"
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2015 )
2016
2017 ;; vqrdmlah, vqrdmlsh
2018 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2019 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2020 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2021 (match_operand:VMDQI 2 "s_register_operand" "w")
2022 (match_operand:VMDQI 3 "s_register_operand" "w")]
2023 VQRDMLH_AS))]
2024 "TARGET_NEON_RDMA"
2025 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2026 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2027 )
2028
2029 (define_insn "neon_vqdmlal<mode>"
2030 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2031 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2032 (match_operand:VMDI 2 "s_register_operand" "w")
2033 (match_operand:VMDI 3 "s_register_operand" "w")]
2034 UNSPEC_VQDMLAL))]
2035 "TARGET_NEON"
2036 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2037 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2038 )
2039
2040 (define_insn "neon_vqdmlsl<mode>"
2041 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2042 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2043 (match_operand:VMDI 2 "s_register_operand" "w")
2044 (match_operand:VMDI 3 "s_register_operand" "w")]
2045 UNSPEC_VQDMLSL))]
2046 "TARGET_NEON"
2047 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2048 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2049 )
2050
2051 (define_insn "neon_vmull<sup><mode>"
2052 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2053 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2054 (match_operand:VW 2 "s_register_operand" "w")]
2055 VMULL))]
2056 "TARGET_NEON"
2057 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2058 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2059 )
2060
2061 (define_insn "neon_vqdmull<mode>"
2062 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2063 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2064 (match_operand:VMDI 2 "s_register_operand" "w")]
2065 UNSPEC_VQDMULL))]
2066 "TARGET_NEON"
2067 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2068 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2069 )
2070
2071 (define_expand "neon_vsub<mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2073 (match_operand:VCVTF 1 "s_register_operand" "w")
2074 (match_operand:VCVTF 2 "s_register_operand" "w")]
2075 "TARGET_NEON"
2076 {
2077 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2078 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2079 else
2080 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2081 operands[2]));
2082 DONE;
2083 })
2084
2085 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2086
2087 (define_insn "neon_vsub<mode>_unspec"
2088 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2089 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2090 (match_operand:VCVTF 2 "s_register_operand" "w")]
2091 UNSPEC_VSUB))]
2092 "TARGET_NEON"
2093 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2094 [(set (attr "type")
2095 (if_then_else (match_test "<Is_float_mode>")
2096 (const_string "neon_fp_addsub_s<q>")
2097 (const_string "neon_sub<q>")))]
2098 )
2099
2100 (define_insn "neon_vsubl<sup><mode>"
2101 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2102 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2103 (match_operand:VDI 2 "s_register_operand" "w")]
2104 VSUBL))]
2105 "TARGET_NEON"
2106 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2107 [(set_attr "type" "neon_sub_long")]
2108 )
2109
2110 (define_insn "neon_vsubw<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2114 VSUBW))]
2115 "TARGET_NEON"
2116 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2117 [(set_attr "type" "neon_sub_widen")]
2118 )
2119
2120 (define_insn "neon_vqsub<sup><mode>"
2121 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2122 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2123 (match_operand:VDQIX 2 "s_register_operand" "w")]
2124 VQSUB))]
2125 "TARGET_NEON"
2126 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2127 [(set_attr "type" "neon_qsub<q>")]
2128 )
2129
2130 (define_insn "neon_vhsub<sup><mode>"
2131 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2132 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2133 (match_operand:VDQIW 2 "s_register_operand" "w")]
2134 VHSUB))]
2135 "TARGET_NEON"
2136 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2137 [(set_attr "type" "neon_sub_halve<q>")]
2138 )
2139
2140 (define_insn "neon_v<r>subhn<mode>"
2141 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2142 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2143 (match_operand:VN 2 "s_register_operand" "w")]
2144 VSUBHN))]
2145 "TARGET_NEON"
2146 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2147 [(set_attr "type" "neon_sub_halve_narrow_q")]
2148 )
2149
2150 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2151 ;; without unsafe math optimizations.
2152 (define_expand "neon_vc<cmp_op><mode>"
2153 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2154 (neg:<V_cmp_result>
2155 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2156 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2157 "TARGET_NEON"
2158 {
2159 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2160 are enabled. */
2161 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2162 && !flag_unsafe_math_optimizations)
2163 {
2164 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2165 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2166 whereas this expander iterates over the integer modes as well,
2167 but we will never expand to UNSPECs for the integer comparisons. */
2168 switch (<MODE>mode)
2169 {
2170 case V2SFmode:
2171 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2172 operands[1],
2173 operands[2]));
2174 break;
2175 case V4SFmode:
2176 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2177 operands[1],
2178 operands[2]));
2179 break;
2180 default:
2181 gcc_unreachable ();
2182 }
2183 }
2184 else
2185 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2186 operands[1],
2187 operands[2]));
2188 DONE;
2189 }
2190 )
2191
2192 (define_insn "neon_vc<cmp_op><mode>_insn"
2193 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2194 (neg:<V_cmp_result>
2195 (COMPARISONS:<V_cmp_result>
2196 (match_operand:VDQW 1 "s_register_operand" "w,w")
2197 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2198 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2199 && !flag_unsafe_math_optimizations)"
2200 {
2201 char pattern[100];
2202 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2203 " %%<V_reg>1, %s",
2204 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2205 ? "f" : "<cmp_type>",
2206 which_alternative == 0
2207 ? "%<V_reg>2" : "#0");
2208 output_asm_insn (pattern, operands);
2209 return "";
2210 }
2211 [(set (attr "type")
2212 (if_then_else (match_operand 2 "zero_operand")
2213 (const_string "neon_compare_zero<q>")
2214 (const_string "neon_compare<q>")))]
2215 )
2216
2217 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2218 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2219 (unspec:<V_cmp_result>
2220 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2221 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2222 NEON_VCMP))]
2223 "TARGET_NEON"
2224 {
2225 char pattern[100];
2226 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2227 " %%<V_reg>1, %s",
2228 which_alternative == 0
2229 ? "%<V_reg>2" : "#0");
2230 output_asm_insn (pattern, operands);
2231 return "";
2232 }
2233 [(set_attr "type" "neon_fp_compare_s<q>")]
2234 )
2235
2236 (define_insn "neon_vc<cmp_op>u<mode>"
2237 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2238 (neg:<V_cmp_result>
2239 (GTUGEU:<V_cmp_result>
2240 (match_operand:VDQIW 1 "s_register_operand" "w")
2241 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2242 "TARGET_NEON"
2243 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2244 [(set_attr "type" "neon_compare<q>")]
2245 )
2246
2247 (define_expand "neon_vca<cmp_op><mode>"
2248 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2249 (neg:<V_cmp_result>
2250 (GTGE:<V_cmp_result>
2251 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2252 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2253 "TARGET_NEON"
2254 {
2255 if (flag_unsafe_math_optimizations)
2256 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2257 operands[2]));
2258 else
2259 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2260 operands[1],
2261 operands[2]));
2262 DONE;
2263 }
2264 )
2265
2266 (define_insn "neon_vca<cmp_op><mode>_insn"
2267 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2268 (neg:<V_cmp_result>
2269 (GTGE:<V_cmp_result>
2270 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2271 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2272 "TARGET_NEON && flag_unsafe_math_optimizations"
2273 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_fp_compare_s<q>")]
2275 )
2276
2277 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2278 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2279 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2280 (match_operand:VCVTF 2 "s_register_operand" "w")]
2281 NEON_VACMP))]
2282 "TARGET_NEON"
2283 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_fp_compare_s<q>")]
2285 )
2286
2287 (define_insn "neon_vtst<mode>"
2288 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2289 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2290 (match_operand:VDQIW 2 "s_register_operand" "w")]
2291 UNSPEC_VTST))]
2292 "TARGET_NEON"
2293 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2294 [(set_attr "type" "neon_tst<q>")]
2295 )
2296
2297 (define_insn "neon_vabd<sup><mode>"
2298 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2299 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2300 (match_operand:VDQIW 2 "s_register_operand" "w")]
2301 VABD))]
2302 "TARGET_NEON"
2303 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2304 [(set_attr "type" "neon_abd<q>")]
2305 )
2306
2307 (define_insn "neon_vabdf<mode>"
2308 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2309 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2310 (match_operand:VCVTF 2 "s_register_operand" "w")]
2311 UNSPEC_VABD_F))]
2312 "TARGET_NEON"
2313 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2314 [(set_attr "type" "neon_fp_abd_s<q>")]
2315 )
2316
2317 (define_insn "neon_vabdl<sup><mode>"
2318 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2319 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2320 (match_operand:VW 2 "s_register_operand" "w")]
2321 VABDL))]
2322 "TARGET_NEON"
2323 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2324 [(set_attr "type" "neon_abd_long")]
2325 )
2326
2327 (define_insn "neon_vaba<sup><mode>"
2328 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2329 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2330 (match_operand:VDQIW 3 "s_register_operand" "w")]
2331 VABD)
2332 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2333 "TARGET_NEON"
2334 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2335 [(set_attr "type" "neon_arith_acc<q>")]
2336 )
2337
2338 (define_insn "neon_vabal<sup><mode>"
2339 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2340 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2341 (match_operand:VW 3 "s_register_operand" "w")]
2342 VABDL)
2343 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2344 "TARGET_NEON"
2345 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2346 [(set_attr "type" "neon_arith_acc<q>")]
2347 )
2348
2349 (define_insn "neon_v<maxmin><sup><mode>"
2350 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2351 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2352 (match_operand:VDQIW 2 "s_register_operand" "w")]
2353 VMAXMIN))]
2354 "TARGET_NEON"
2355 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2356 [(set_attr "type" "neon_minmax<q>")]
2357 )
2358
2359 (define_insn "neon_v<maxmin>f<mode>"
2360 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2361 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2362 (match_operand:VCVTF 2 "s_register_operand" "w")]
2363 VMAXMINF))]
2364 "TARGET_NEON"
2365 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2366 [(set_attr "type" "neon_fp_minmax_s<q>")]
2367 )
2368
2369 (define_expand "neon_vpadd<mode>"
2370 [(match_operand:VD 0 "s_register_operand" "=w")
2371 (match_operand:VD 1 "s_register_operand" "w")
2372 (match_operand:VD 2 "s_register_operand" "w")]
2373 "TARGET_NEON"
2374 {
2375 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2376 operands[2]));
2377 DONE;
2378 })
2379
2380 (define_insn "neon_vpaddl<sup><mode>"
2381 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2382 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2383 VPADDL))]
2384 "TARGET_NEON"
2385 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2386 [(set_attr "type" "neon_reduc_add_long")]
2387 )
2388
2389 (define_insn "neon_vpadal<sup><mode>"
2390 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2391 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2392 (match_operand:VDQIW 2 "s_register_operand" "w")]
2393 VPADAL))]
2394 "TARGET_NEON"
2395 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2396 [(set_attr "type" "neon_reduc_add_acc")]
2397 )
2398
2399 (define_insn "neon_vp<maxmin><sup><mode>"
2400 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2401 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2402 (match_operand:VDI 2 "s_register_operand" "w")]
2403 VPMAXMIN))]
2404 "TARGET_NEON"
2405 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2406 [(set_attr "type" "neon_reduc_minmax<q>")]
2407 )
2408
2409 (define_insn "neon_vp<maxmin>f<mode>"
2410 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2411 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2412 (match_operand:VCVTF 2 "s_register_operand" "w")]
2413 VPMAXMINF))]
2414 "TARGET_NEON"
2415 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2416 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2417 )
2418
2419 (define_insn "neon_vrecps<mode>"
2420 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2421 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2422 (match_operand:VCVTF 2 "s_register_operand" "w")]
2423 UNSPEC_VRECPS))]
2424 "TARGET_NEON"
2425 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2426 [(set_attr "type" "neon_fp_recps_s<q>")]
2427 )
2428
2429 (define_insn "neon_vrsqrts<mode>"
2430 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2431 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2432 (match_operand:VCVTF 2 "s_register_operand" "w")]
2433 UNSPEC_VRSQRTS))]
2434 "TARGET_NEON"
2435 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2436 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2437 )
2438
2439 (define_expand "neon_vabs<mode>"
2440 [(match_operand:VDQW 0 "s_register_operand" "")
2441 (match_operand:VDQW 1 "s_register_operand" "")]
2442 "TARGET_NEON"
2443 {
2444 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2445 DONE;
2446 })
2447
2448 (define_insn "neon_vqabs<mode>"
2449 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2450 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2451 UNSPEC_VQABS))]
2452 "TARGET_NEON"
2453 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2454 [(set_attr "type" "neon_qabs<q>")]
2455 )
2456
2457 (define_insn "neon_bswap<mode>"
2458 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2459 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2460 "TARGET_NEON"
2461 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2462 [(set_attr "type" "neon_rev<q>")]
2463 )
2464
2465 (define_expand "neon_vneg<mode>"
2466 [(match_operand:VDQW 0 "s_register_operand" "")
2467 (match_operand:VDQW 1 "s_register_operand" "")]
2468 "TARGET_NEON"
2469 {
2470 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2471 DONE;
2472 })
2473
2474 (define_expand "neon_copysignf<mode>"
2475 [(match_operand:VCVTF 0 "register_operand")
2476 (match_operand:VCVTF 1 "register_operand")
2477 (match_operand:VCVTF 2 "register_operand")]
2478 "TARGET_NEON"
2479 "{
2480 rtx v_bitmask_cast;
2481 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2482 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2483 rtvec v = rtvec_alloc (n_elt);
2484
2485 /* Create bitmask for vector select. */
2486 for (i = 0; i < n_elt; ++i)
2487 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2488
2489 emit_move_insn (v_bitmask,
2490 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2491 emit_move_insn (operands[0], operands[2]);
2492 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2493 <VCVTF:V_cmp_result>mode, 0);
2494 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2495 operands[1]));
2496
2497 DONE;
2498 }"
2499 )
2500
2501 (define_insn "neon_vqneg<mode>"
2502 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2503 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2504 UNSPEC_VQNEG))]
2505 "TARGET_NEON"
2506 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2507 [(set_attr "type" "neon_qneg<q>")]
2508 )
2509
2510 (define_insn "neon_vcls<mode>"
2511 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2512 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2513 UNSPEC_VCLS))]
2514 "TARGET_NEON"
2515 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2516 [(set_attr "type" "neon_cls<q>")]
2517 )
2518
2519 (define_insn "clz<mode>2"
2520 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2521 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2522 "TARGET_NEON"
2523 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2524 [(set_attr "type" "neon_cnt<q>")]
2525 )
2526
2527 (define_expand "neon_vclz<mode>"
2528 [(match_operand:VDQIW 0 "s_register_operand" "")
2529 (match_operand:VDQIW 1 "s_register_operand" "")]
2530 "TARGET_NEON"
2531 {
2532 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2533 DONE;
2534 })
2535
2536 (define_insn "popcount<mode>2"
2537 [(set (match_operand:VE 0 "s_register_operand" "=w")
2538 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2539 "TARGET_NEON"
2540 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2541 [(set_attr "type" "neon_cnt<q>")]
2542 )
2543
2544 (define_expand "neon_vcnt<mode>"
2545 [(match_operand:VE 0 "s_register_operand" "=w")
2546 (match_operand:VE 1 "s_register_operand" "w")]
2547 "TARGET_NEON"
2548 {
2549 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2550 DONE;
2551 })
2552
2553 (define_insn "neon_vrecpe<mode>"
2554 [(set (match_operand:V32 0 "s_register_operand" "=w")
2555 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2556 UNSPEC_VRECPE))]
2557 "TARGET_NEON"
2558 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2559 [(set_attr "type" "neon_fp_recpe_s<q>")]
2560 )
2561
2562 (define_insn "neon_vrsqrte<mode>"
2563 [(set (match_operand:V32 0 "s_register_operand" "=w")
2564 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2565 UNSPEC_VRSQRTE))]
2566 "TARGET_NEON"
2567 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2568 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2569 )
2570
2571 (define_expand "neon_vmvn<mode>"
2572 [(match_operand:VDQIW 0 "s_register_operand" "")
2573 (match_operand:VDQIW 1 "s_register_operand" "")]
2574 "TARGET_NEON"
2575 {
2576 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2577 DONE;
2578 })
2579
2580 (define_insn "neon_vget_lane<mode>_sext_internal"
2581 [(set (match_operand:SI 0 "s_register_operand" "=r")
2582 (sign_extend:SI
2583 (vec_select:<V_elem>
2584 (match_operand:VD 1 "s_register_operand" "w")
2585 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2586 "TARGET_NEON"
2587 {
2588 if (BYTES_BIG_ENDIAN)
2589 {
2590 int elt = INTVAL (operands[2]);
2591 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2592 operands[2] = GEN_INT (elt);
2593 }
2594 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2595 }
2596 [(set_attr "type" "neon_to_gp")]
2597 )
2598
2599 (define_insn "neon_vget_lane<mode>_zext_internal"
2600 [(set (match_operand:SI 0 "s_register_operand" "=r")
2601 (zero_extend:SI
2602 (vec_select:<V_elem>
2603 (match_operand:VD 1 "s_register_operand" "w")
2604 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2605 "TARGET_NEON"
2606 {
2607 if (BYTES_BIG_ENDIAN)
2608 {
2609 int elt = INTVAL (operands[2]);
2610 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2611 operands[2] = GEN_INT (elt);
2612 }
2613 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2614 }
2615 [(set_attr "type" "neon_to_gp")]
2616 )
2617
2618 (define_insn "neon_vget_lane<mode>_sext_internal"
2619 [(set (match_operand:SI 0 "s_register_operand" "=r")
2620 (sign_extend:SI
2621 (vec_select:<V_elem>
2622 (match_operand:VQ2 1 "s_register_operand" "w")
2623 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2624 "TARGET_NEON"
2625 {
2626 rtx ops[3];
2627 int regno = REGNO (operands[1]);
2628 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2629 unsigned int elt = INTVAL (operands[2]);
2630 unsigned int elt_adj = elt % halfelts;
2631
2632 if (BYTES_BIG_ENDIAN)
2633 elt_adj = halfelts - 1 - elt_adj;
2634
2635 ops[0] = operands[0];
2636 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2637 ops[2] = GEN_INT (elt_adj);
2638 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2639
2640 return "";
2641 }
2642 [(set_attr "type" "neon_to_gp_q")]
2643 )
2644
2645 (define_insn "neon_vget_lane<mode>_zext_internal"
2646 [(set (match_operand:SI 0 "s_register_operand" "=r")
2647 (zero_extend:SI
2648 (vec_select:<V_elem>
2649 (match_operand:VQ2 1 "s_register_operand" "w")
2650 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2651 "TARGET_NEON"
2652 {
2653 rtx ops[3];
2654 int regno = REGNO (operands[1]);
2655 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2656 unsigned int elt = INTVAL (operands[2]);
2657 unsigned int elt_adj = elt % halfelts;
2658
2659 if (BYTES_BIG_ENDIAN)
2660 elt_adj = halfelts - 1 - elt_adj;
2661
2662 ops[0] = operands[0];
2663 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2664 ops[2] = GEN_INT (elt_adj);
2665 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2666
2667 return "";
2668 }
2669 [(set_attr "type" "neon_to_gp_q")]
2670 )
2671
2672 (define_expand "neon_vget_lane<mode>"
2673 [(match_operand:<V_ext> 0 "s_register_operand" "")
2674 (match_operand:VDQW 1 "s_register_operand" "")
2675 (match_operand:SI 2 "immediate_operand" "")]
2676 "TARGET_NEON"
2677 {
2678 if (BYTES_BIG_ENDIAN)
2679 {
2680 /* The intrinsics are defined in terms of a model where the
2681 element ordering in memory is vldm order, whereas the generic
2682 RTL is defined in terms of a model where the element ordering
2683 in memory is array order. Convert the lane number to conform
2684 to this model. */
2685 unsigned int elt = INTVAL (operands[2]);
2686 unsigned int reg_nelts
2687 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2688 elt ^= reg_nelts - 1;
2689 operands[2] = GEN_INT (elt);
2690 }
2691
2692 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2693 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2694 else
2695 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2696 operands[1],
2697 operands[2]));
2698 DONE;
2699 })
2700
2701 (define_expand "neon_vget_laneu<mode>"
2702 [(match_operand:<V_ext> 0 "s_register_operand" "")
2703 (match_operand:VDQIW 1 "s_register_operand" "")
2704 (match_operand:SI 2 "immediate_operand" "")]
2705 "TARGET_NEON"
2706 {
2707 if (BYTES_BIG_ENDIAN)
2708 {
2709 /* The intrinsics are defined in terms of a model where the
2710 element ordering in memory is vldm order, whereas the generic
2711 RTL is defined in terms of a model where the element ordering
2712 in memory is array order. Convert the lane number to conform
2713 to this model. */
2714 unsigned int elt = INTVAL (operands[2]);
2715 unsigned int reg_nelts
2716 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2717 elt ^= reg_nelts - 1;
2718 operands[2] = GEN_INT (elt);
2719 }
2720
2721 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2722 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2723 else
2724 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2725 operands[1],
2726 operands[2]));
2727 DONE;
2728 })
2729
2730 (define_expand "neon_vget_lanedi"
2731 [(match_operand:DI 0 "s_register_operand" "=r")
2732 (match_operand:DI 1 "s_register_operand" "w")
2733 (match_operand:SI 2 "immediate_operand" "")]
2734 "TARGET_NEON"
2735 {
2736 emit_move_insn (operands[0], operands[1]);
2737 DONE;
2738 })
2739
2740 (define_expand "neon_vget_lanev2di"
2741 [(match_operand:DI 0 "s_register_operand" "")
2742 (match_operand:V2DI 1 "s_register_operand" "")
2743 (match_operand:SI 2 "immediate_operand" "")]
2744 "TARGET_NEON"
2745 {
2746 int lane;
2747
2748 if (BYTES_BIG_ENDIAN)
2749 {
2750 /* The intrinsics are defined in terms of a model where the
2751 element ordering in memory is vldm order, whereas the generic
2752 RTL is defined in terms of a model where the element ordering
2753 in memory is array order. Convert the lane number to conform
2754 to this model. */
2755 unsigned int elt = INTVAL (operands[2]);
2756 unsigned int reg_nelts = 2;
2757 elt ^= reg_nelts - 1;
2758 operands[2] = GEN_INT (elt);
2759 }
2760
2761 lane = INTVAL (operands[2]);
2762 gcc_assert ((lane ==0) || (lane == 1));
2763 emit_move_insn (operands[0], lane == 0
2764 ? gen_lowpart (DImode, operands[1])
2765 : gen_highpart (DImode, operands[1]));
2766 DONE;
2767 })
2768
2769 (define_expand "neon_vset_lane<mode>"
2770 [(match_operand:VDQ 0 "s_register_operand" "=w")
2771 (match_operand:<V_elem> 1 "s_register_operand" "r")
2772 (match_operand:VDQ 2 "s_register_operand" "0")
2773 (match_operand:SI 3 "immediate_operand" "i")]
2774 "TARGET_NEON"
2775 {
2776 unsigned int elt = INTVAL (operands[3]);
2777
2778 if (BYTES_BIG_ENDIAN)
2779 {
2780 unsigned int reg_nelts
2781 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2782 elt ^= reg_nelts - 1;
2783 }
2784
2785 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2786 GEN_INT (1 << elt), operands[2]));
2787 DONE;
2788 })
2789
2790 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2791
2792 (define_expand "neon_vset_lanedi"
2793 [(match_operand:DI 0 "s_register_operand" "=w")
2794 (match_operand:DI 1 "s_register_operand" "r")
2795 (match_operand:DI 2 "s_register_operand" "0")
2796 (match_operand:SI 3 "immediate_operand" "i")]
2797 "TARGET_NEON"
2798 {
2799 emit_move_insn (operands[0], operands[1]);
2800 DONE;
2801 })
2802
2803 (define_expand "neon_vcreate<mode>"
2804 [(match_operand:VD_RE 0 "s_register_operand" "")
2805 (match_operand:DI 1 "general_operand" "")]
2806 "TARGET_NEON"
2807 {
2808 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2809 emit_move_insn (operands[0], src);
2810 DONE;
2811 })
2812
2813 (define_insn "neon_vdup_n<mode>"
2814 [(set (match_operand:VX 0 "s_register_operand" "=w")
2815 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2816 "TARGET_NEON"
2817 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2818 [(set_attr "type" "neon_from_gp<q>")]
2819 )
2820
2821 (define_insn "neon_vdup_n<mode>"
2822 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2823 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2824 "TARGET_NEON"
2825 "@
2826 vdup.<V_sz_elem>\t%<V_reg>0, %1
2827 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2828 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2829 )
2830
2831 (define_expand "neon_vdup_ndi"
2832 [(match_operand:DI 0 "s_register_operand" "=w")
2833 (match_operand:DI 1 "s_register_operand" "r")]
2834 "TARGET_NEON"
2835 {
2836 emit_move_insn (operands[0], operands[1]);
2837 DONE;
2838 }
2839 )
2840
2841 (define_insn "neon_vdup_nv2di"
2842 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2843 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2844 "TARGET_NEON"
2845 "@
2846 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2847 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2848 [(set_attr "length" "8")
2849 (set_attr "type" "multiple")]
2850 )
2851
2852 (define_insn "neon_vdup_lane<mode>_internal"
2853 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2854 (vec_duplicate:VDQW
2855 (vec_select:<V_elem>
2856 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2857 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2858 "TARGET_NEON"
2859 {
2860 if (BYTES_BIG_ENDIAN)
2861 {
2862 int elt = INTVAL (operands[2]);
2863 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2864 operands[2] = GEN_INT (elt);
2865 }
2866 if (<Is_d_reg>)
2867 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2868 else
2869 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2870 }
2871 [(set_attr "type" "neon_dup<q>")]
2872 )
2873
2874 (define_expand "neon_vdup_lane<mode>"
2875 [(match_operand:VDQW 0 "s_register_operand" "=w")
2876 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2877 (match_operand:SI 2 "immediate_operand" "i")]
2878 "TARGET_NEON"
2879 {
2880 if (BYTES_BIG_ENDIAN)
2881 {
2882 unsigned int elt = INTVAL (operands[2]);
2883 unsigned int reg_nelts
2884 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2885 elt ^= reg_nelts - 1;
2886 operands[2] = GEN_INT (elt);
2887 }
2888 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2889 operands[2]));
2890 DONE;
2891 })
2892
2893 ; Scalar index is ignored, since only zero is valid here.
2894 (define_expand "neon_vdup_lanedi"
2895 [(match_operand:DI 0 "s_register_operand" "=w")
2896 (match_operand:DI 1 "s_register_operand" "w")
2897 (match_operand:SI 2 "immediate_operand" "i")]
2898 "TARGET_NEON"
2899 {
2900 emit_move_insn (operands[0], operands[1]);
2901 DONE;
2902 })
2903
2904 ; Likewise for v2di, as the DImode second operand has only a single element.
2905 (define_expand "neon_vdup_lanev2di"
2906 [(match_operand:V2DI 0 "s_register_operand" "=w")
2907 (match_operand:DI 1 "s_register_operand" "w")
2908 (match_operand:SI 2 "immediate_operand" "i")]
2909 "TARGET_NEON"
2910 {
2911 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2912 DONE;
2913 })
2914
2915 ; Disabled before reload because we don't want combine doing something silly,
2916 ; but used by the post-reload expansion of neon_vcombine.
2917 (define_insn "*neon_vswp<mode>"
2918 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2919 (match_operand:VDQX 1 "s_register_operand" "+w"))
2920 (set (match_dup 1) (match_dup 0))]
2921 "TARGET_NEON && reload_completed"
2922 "vswp\t%<V_reg>0, %<V_reg>1"
2923 [(set_attr "type" "neon_permute<q>")]
2924 )
2925
2926 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2927 ;; dest vector.
2928 ;; FIXME: A different implementation of this builtin could make it much
2929 ;; more likely that we wouldn't actually need to output anything (we could make
2930 ;; it so that the reg allocator puts things in the right places magically
2931 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2932
2933 (define_insn_and_split "neon_vcombine<mode>"
2934 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2935 (vec_concat:<V_DOUBLE>
2936 (match_operand:VDX 1 "s_register_operand" "w")
2937 (match_operand:VDX 2 "s_register_operand" "w")))]
2938 "TARGET_NEON"
2939 "#"
2940 "&& reload_completed"
2941 [(const_int 0)]
2942 {
2943 neon_split_vcombine (operands);
2944 DONE;
2945 }
2946 [(set_attr "type" "multiple")]
2947 )
2948
2949 (define_expand "neon_vget_high<mode>"
2950 [(match_operand:<V_HALF> 0 "s_register_operand")
2951 (match_operand:VQX 1 "s_register_operand")]
2952 "TARGET_NEON"
2953 {
2954 emit_move_insn (operands[0],
2955 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2956 GET_MODE_SIZE (<V_HALF>mode)));
2957 DONE;
2958 })
2959
2960 (define_expand "neon_vget_low<mode>"
2961 [(match_operand:<V_HALF> 0 "s_register_operand")
2962 (match_operand:VQX 1 "s_register_operand")]
2963 "TARGET_NEON"
2964 {
2965 emit_move_insn (operands[0],
2966 simplify_gen_subreg (<V_HALF>mode, operands[1],
2967 <MODE>mode, 0));
2968 DONE;
2969 })
2970
2971 (define_insn "float<mode><V_cvtto>2"
2972 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2973 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2974 "TARGET_NEON && !flag_rounding_math"
2975 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2976 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2977 )
2978
2979 (define_insn "floatuns<mode><V_cvtto>2"
2980 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2981 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2982 "TARGET_NEON && !flag_rounding_math"
2983 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2984 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2985 )
2986
2987 (define_insn "fix_trunc<mode><V_cvtto>2"
2988 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2989 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2990 "TARGET_NEON"
2991 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
2992 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2993 )
2994
2995 (define_insn "fixuns_trunc<mode><V_cvtto>2"
2996 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2997 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2998 "TARGET_NEON"
2999 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3000 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3001 )
3002
3003 (define_insn "neon_vcvt<sup><mode>"
3004 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3005 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3006 VCVT_US))]
3007 "TARGET_NEON"
3008 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3009 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3010 )
3011
3012 (define_insn "neon_vcvt<sup><mode>"
3013 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3014 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3015 VCVT_US))]
3016 "TARGET_NEON"
3017 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3018 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3019 )
3020
3021 (define_insn "neon_vcvtv4sfv4hf"
3022 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3023 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3024 UNSPEC_VCVT))]
3025 "TARGET_NEON && TARGET_FP16"
3026 "vcvt.f32.f16\t%q0, %P1"
3027 [(set_attr "type" "neon_fp_cvt_widen_h")]
3028 )
3029
3030 (define_insn "neon_vcvtv4hfv4sf"
3031 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3032 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3033 UNSPEC_VCVT))]
3034 "TARGET_NEON && TARGET_FP16"
3035 "vcvt.f16.f32\t%P0, %q1"
3036 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3037 )
3038
3039 (define_insn "neon_vcvt<sup>_n<mode>"
3040 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3041 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3042 (match_operand:SI 2 "immediate_operand" "i")]
3043 VCVT_US_N))]
3044 "TARGET_NEON"
3045 {
3046 neon_const_bounds (operands[2], 1, 33);
3047 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3048 }
3049 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3050 )
3051
3052 (define_insn "neon_vcvt<sup>_n<mode>"
3053 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3054 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3055 (match_operand:SI 2 "immediate_operand" "i")]
3056 VCVT_US_N))]
3057 "TARGET_NEON"
3058 {
3059 neon_const_bounds (operands[2], 1, 33);
3060 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3061 }
3062 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3063 )
3064
3065 (define_insn "neon_vmovn<mode>"
3066 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3067 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3068 UNSPEC_VMOVN))]
3069 "TARGET_NEON"
3070 "vmovn.<V_if_elem>\t%P0, %q1"
3071 [(set_attr "type" "neon_shift_imm_narrow_q")]
3072 )
3073
3074 (define_insn "neon_vqmovn<sup><mode>"
3075 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3076 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3077 VQMOVN))]
3078 "TARGET_NEON"
3079 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3080 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3081 )
3082
3083 (define_insn "neon_vqmovun<mode>"
3084 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3085 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3086 UNSPEC_VQMOVUN))]
3087 "TARGET_NEON"
3088 "vqmovun.<V_s_elem>\t%P0, %q1"
3089 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3090 )
3091
3092 (define_insn "neon_vmovl<sup><mode>"
3093 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3094 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3095 VMOVL))]
3096 "TARGET_NEON"
3097 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3098 [(set_attr "type" "neon_shift_imm_long")]
3099 )
3100
3101 (define_insn "neon_vmul_lane<mode>"
3102 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3103 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3104 (match_operand:VMD 2 "s_register_operand"
3105 "<scalar_mul_constraint>")
3106 (match_operand:SI 3 "immediate_operand" "i")]
3107 UNSPEC_VMUL_LANE))]
3108 "TARGET_NEON"
3109 {
3110 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3111 }
3112 [(set (attr "type")
3113 (if_then_else (match_test "<Is_float_mode>")
3114 (const_string "neon_fp_mul_s_scalar<q>")
3115 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3116 )
3117
3118 (define_insn "neon_vmul_lane<mode>"
3119 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3120 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3121 (match_operand:<V_HALF> 2 "s_register_operand"
3122 "<scalar_mul_constraint>")
3123 (match_operand:SI 3 "immediate_operand" "i")]
3124 UNSPEC_VMUL_LANE))]
3125 "TARGET_NEON"
3126 {
3127 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3128 }
3129 [(set (attr "type")
3130 (if_then_else (match_test "<Is_float_mode>")
3131 (const_string "neon_fp_mul_s_scalar<q>")
3132 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3133 )
3134
3135 (define_insn "neon_vmull<sup>_lane<mode>"
3136 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3137 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3138 (match_operand:VMDI 2 "s_register_operand"
3139 "<scalar_mul_constraint>")
3140 (match_operand:SI 3 "immediate_operand" "i")]
3141 VMULL_LANE))]
3142 "TARGET_NEON"
3143 {
3144 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3145 }
3146 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3147 )
3148
3149 (define_insn "neon_vqdmull_lane<mode>"
3150 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3151 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3152 (match_operand:VMDI 2 "s_register_operand"
3153 "<scalar_mul_constraint>")
3154 (match_operand:SI 3 "immediate_operand" "i")]
3155 UNSPEC_VQDMULL_LANE))]
3156 "TARGET_NEON"
3157 {
3158 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3159 }
3160 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3161 )
3162
3163 (define_insn "neon_vq<r>dmulh_lane<mode>"
3164 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3165 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3166 (match_operand:<V_HALF> 2 "s_register_operand"
3167 "<scalar_mul_constraint>")
3168 (match_operand:SI 3 "immediate_operand" "i")]
3169 VQDMULH_LANE))]
3170 "TARGET_NEON"
3171 {
3172 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3173 }
3174 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3175 )
3176
3177 (define_insn "neon_vq<r>dmulh_lane<mode>"
3178 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3179 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3180 (match_operand:VMDI 2 "s_register_operand"
3181 "<scalar_mul_constraint>")
3182 (match_operand:SI 3 "immediate_operand" "i")]
3183 VQDMULH_LANE))]
3184 "TARGET_NEON"
3185 {
3186 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3187 }
3188 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3189 )
3190
3191 ;; vqrdmlah_lane, vqrdmlsh_lane
3192 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3193 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3194 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3195 (match_operand:VMQI 2 "s_register_operand" "w")
3196 (match_operand:<V_HALF> 3 "s_register_operand"
3197 "<scalar_mul_constraint>")
3198 (match_operand:SI 4 "immediate_operand" "i")]
3199 VQRDMLH_AS))]
3200 "TARGET_NEON_RDMA"
3201 {
3202 return
3203 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3204 }
3205 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3206 )
3207
3208 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3209 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3210 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3211 (match_operand:VMDI 2 "s_register_operand" "w")
3212 (match_operand:VMDI 3 "s_register_operand"
3213 "<scalar_mul_constraint>")
3214 (match_operand:SI 4 "immediate_operand" "i")]
3215 VQRDMLH_AS))]
3216 "TARGET_NEON_RDMA"
3217 {
3218 return
3219 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3220 }
3221 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3222 )
3223
3224 (define_insn "neon_vmla_lane<mode>"
3225 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3226 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3227 (match_operand:VMD 2 "s_register_operand" "w")
3228 (match_operand:VMD 3 "s_register_operand"
3229 "<scalar_mul_constraint>")
3230 (match_operand:SI 4 "immediate_operand" "i")]
3231 UNSPEC_VMLA_LANE))]
3232 "TARGET_NEON"
3233 {
3234 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3235 }
3236 [(set (attr "type")
3237 (if_then_else (match_test "<Is_float_mode>")
3238 (const_string "neon_fp_mla_s_scalar<q>")
3239 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3240 )
3241
3242 (define_insn "neon_vmla_lane<mode>"
3243 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3244 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3245 (match_operand:VMQ 2 "s_register_operand" "w")
3246 (match_operand:<V_HALF> 3 "s_register_operand"
3247 "<scalar_mul_constraint>")
3248 (match_operand:SI 4 "immediate_operand" "i")]
3249 UNSPEC_VMLA_LANE))]
3250 "TARGET_NEON"
3251 {
3252 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3253 }
3254 [(set (attr "type")
3255 (if_then_else (match_test "<Is_float_mode>")
3256 (const_string "neon_fp_mla_s_scalar<q>")
3257 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3258 )
3259
3260 (define_insn "neon_vmlal<sup>_lane<mode>"
3261 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3262 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3263 (match_operand:VMDI 2 "s_register_operand" "w")
3264 (match_operand:VMDI 3 "s_register_operand"
3265 "<scalar_mul_constraint>")
3266 (match_operand:SI 4 "immediate_operand" "i")]
3267 VMLAL_LANE))]
3268 "TARGET_NEON"
3269 {
3270 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3271 }
3272 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3273 )
3274
3275 (define_insn "neon_vqdmlal_lane<mode>"
3276 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3277 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3278 (match_operand:VMDI 2 "s_register_operand" "w")
3279 (match_operand:VMDI 3 "s_register_operand"
3280 "<scalar_mul_constraint>")
3281 (match_operand:SI 4 "immediate_operand" "i")]
3282 UNSPEC_VQDMLAL_LANE))]
3283 "TARGET_NEON"
3284 {
3285 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3286 }
3287 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3288 )
3289
3290 (define_insn "neon_vmls_lane<mode>"
3291 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3292 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3293 (match_operand:VMD 2 "s_register_operand" "w")
3294 (match_operand:VMD 3 "s_register_operand"
3295 "<scalar_mul_constraint>")
3296 (match_operand:SI 4 "immediate_operand" "i")]
3297 UNSPEC_VMLS_LANE))]
3298 "TARGET_NEON"
3299 {
3300 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3301 }
3302 [(set (attr "type")
3303 (if_then_else (match_test "<Is_float_mode>")
3304 (const_string "neon_fp_mla_s_scalar<q>")
3305 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3306 )
3307
3308 (define_insn "neon_vmls_lane<mode>"
3309 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3310 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3311 (match_operand:VMQ 2 "s_register_operand" "w")
3312 (match_operand:<V_HALF> 3 "s_register_operand"
3313 "<scalar_mul_constraint>")
3314 (match_operand:SI 4 "immediate_operand" "i")]
3315 UNSPEC_VMLS_LANE))]
3316 "TARGET_NEON"
3317 {
3318 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3319 }
3320 [(set (attr "type")
3321 (if_then_else (match_test "<Is_float_mode>")
3322 (const_string "neon_fp_mla_s_scalar<q>")
3323 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3324 )
3325
3326 (define_insn "neon_vmlsl<sup>_lane<mode>"
3327 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3328 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3329 (match_operand:VMDI 2 "s_register_operand" "w")
3330 (match_operand:VMDI 3 "s_register_operand"
3331 "<scalar_mul_constraint>")
3332 (match_operand:SI 4 "immediate_operand" "i")]
3333 VMLSL_LANE))]
3334 "TARGET_NEON"
3335 {
3336 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3337 }
3338 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3339 )
3340
3341 (define_insn "neon_vqdmlsl_lane<mode>"
3342 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3343 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3344 (match_operand:VMDI 2 "s_register_operand" "w")
3345 (match_operand:VMDI 3 "s_register_operand"
3346 "<scalar_mul_constraint>")
3347 (match_operand:SI 4 "immediate_operand" "i")]
3348 UNSPEC_VQDMLSL_LANE))]
3349 "TARGET_NEON"
3350 {
3351 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3352 }
3353 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3354 )
3355
3356 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3357 ; core register into a temp register, then use a scalar taken from that. This
3358 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3359 ; or extracted from another vector. The latter case it's currently better to
3360 ; use the "_lane" variant, and the former case can probably be implemented
3361 ; using vld1_lane, but that hasn't been done yet.
3362
3363 (define_expand "neon_vmul_n<mode>"
3364 [(match_operand:VMD 0 "s_register_operand" "")
3365 (match_operand:VMD 1 "s_register_operand" "")
3366 (match_operand:<V_elem> 2 "s_register_operand" "")]
3367 "TARGET_NEON"
3368 {
3369 rtx tmp = gen_reg_rtx (<MODE>mode);
3370 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3371 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3372 const0_rtx));
3373 DONE;
3374 })
3375
3376 (define_expand "neon_vmul_n<mode>"
3377 [(match_operand:VMQ 0 "s_register_operand" "")
3378 (match_operand:VMQ 1 "s_register_operand" "")
3379 (match_operand:<V_elem> 2 "s_register_operand" "")]
3380 "TARGET_NEON"
3381 {
3382 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3383 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3384 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3385 const0_rtx));
3386 DONE;
3387 })
3388
3389 (define_expand "neon_vmulls_n<mode>"
3390 [(match_operand:<V_widen> 0 "s_register_operand" "")
3391 (match_operand:VMDI 1 "s_register_operand" "")
3392 (match_operand:<V_elem> 2 "s_register_operand" "")]
3393 "TARGET_NEON"
3394 {
3395 rtx tmp = gen_reg_rtx (<MODE>mode);
3396 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3397 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3398 const0_rtx));
3399 DONE;
3400 })
3401
3402 (define_expand "neon_vmullu_n<mode>"
3403 [(match_operand:<V_widen> 0 "s_register_operand" "")
3404 (match_operand:VMDI 1 "s_register_operand" "")
3405 (match_operand:<V_elem> 2 "s_register_operand" "")]
3406 "TARGET_NEON"
3407 {
3408 rtx tmp = gen_reg_rtx (<MODE>mode);
3409 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3410 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3411 const0_rtx));
3412 DONE;
3413 })
3414
3415 (define_expand "neon_vqdmull_n<mode>"
3416 [(match_operand:<V_widen> 0 "s_register_operand" "")
3417 (match_operand:VMDI 1 "s_register_operand" "")
3418 (match_operand:<V_elem> 2 "s_register_operand" "")]
3419 "TARGET_NEON"
3420 {
3421 rtx tmp = gen_reg_rtx (<MODE>mode);
3422 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3423 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3424 const0_rtx));
3425 DONE;
3426 })
3427
3428 (define_expand "neon_vqdmulh_n<mode>"
3429 [(match_operand:VMDI 0 "s_register_operand" "")
3430 (match_operand:VMDI 1 "s_register_operand" "")
3431 (match_operand:<V_elem> 2 "s_register_operand" "")]
3432 "TARGET_NEON"
3433 {
3434 rtx tmp = gen_reg_rtx (<MODE>mode);
3435 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3436 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3437 const0_rtx));
3438 DONE;
3439 })
3440
3441 (define_expand "neon_vqrdmulh_n<mode>"
3442 [(match_operand:VMDI 0 "s_register_operand" "")
3443 (match_operand:VMDI 1 "s_register_operand" "")
3444 (match_operand:<V_elem> 2 "s_register_operand" "")]
3445 "TARGET_NEON"
3446 {
3447 rtx tmp = gen_reg_rtx (<MODE>mode);
3448 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3449 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3450 const0_rtx));
3451 DONE;
3452 })
3453
3454 (define_expand "neon_vqdmulh_n<mode>"
3455 [(match_operand:VMQI 0 "s_register_operand" "")
3456 (match_operand:VMQI 1 "s_register_operand" "")
3457 (match_operand:<V_elem> 2 "s_register_operand" "")]
3458 "TARGET_NEON"
3459 {
3460 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3461 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3462 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3463 const0_rtx));
3464 DONE;
3465 })
3466
3467 (define_expand "neon_vqrdmulh_n<mode>"
3468 [(match_operand:VMQI 0 "s_register_operand" "")
3469 (match_operand:VMQI 1 "s_register_operand" "")
3470 (match_operand:<V_elem> 2 "s_register_operand" "")]
3471 "TARGET_NEON"
3472 {
3473 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3474 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3475 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3476 const0_rtx));
3477 DONE;
3478 })
3479
3480 (define_expand "neon_vmla_n<mode>"
3481 [(match_operand:VMD 0 "s_register_operand" "")
3482 (match_operand:VMD 1 "s_register_operand" "")
3483 (match_operand:VMD 2 "s_register_operand" "")
3484 (match_operand:<V_elem> 3 "s_register_operand" "")]
3485 "TARGET_NEON"
3486 {
3487 rtx tmp = gen_reg_rtx (<MODE>mode);
3488 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3489 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3490 tmp, const0_rtx));
3491 DONE;
3492 })
3493
3494 (define_expand "neon_vmla_n<mode>"
3495 [(match_operand:VMQ 0 "s_register_operand" "")
3496 (match_operand:VMQ 1 "s_register_operand" "")
3497 (match_operand:VMQ 2 "s_register_operand" "")
3498 (match_operand:<V_elem> 3 "s_register_operand" "")]
3499 "TARGET_NEON"
3500 {
3501 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3502 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3503 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3504 tmp, const0_rtx));
3505 DONE;
3506 })
3507
3508 (define_expand "neon_vmlals_n<mode>"
3509 [(match_operand:<V_widen> 0 "s_register_operand" "")
3510 (match_operand:<V_widen> 1 "s_register_operand" "")
3511 (match_operand:VMDI 2 "s_register_operand" "")
3512 (match_operand:<V_elem> 3 "s_register_operand" "")]
3513 "TARGET_NEON"
3514 {
3515 rtx tmp = gen_reg_rtx (<MODE>mode);
3516 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3517 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3518 tmp, const0_rtx));
3519 DONE;
3520 })
3521
3522 (define_expand "neon_vmlalu_n<mode>"
3523 [(match_operand:<V_widen> 0 "s_register_operand" "")
3524 (match_operand:<V_widen> 1 "s_register_operand" "")
3525 (match_operand:VMDI 2 "s_register_operand" "")
3526 (match_operand:<V_elem> 3 "s_register_operand" "")]
3527 "TARGET_NEON"
3528 {
3529 rtx tmp = gen_reg_rtx (<MODE>mode);
3530 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3531 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3532 tmp, const0_rtx));
3533 DONE;
3534 })
3535
3536 (define_expand "neon_vqdmlal_n<mode>"
3537 [(match_operand:<V_widen> 0 "s_register_operand" "")
3538 (match_operand:<V_widen> 1 "s_register_operand" "")
3539 (match_operand:VMDI 2 "s_register_operand" "")
3540 (match_operand:<V_elem> 3 "s_register_operand" "")]
3541 "TARGET_NEON"
3542 {
3543 rtx tmp = gen_reg_rtx (<MODE>mode);
3544 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3545 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3546 tmp, const0_rtx));
3547 DONE;
3548 })
3549
3550 (define_expand "neon_vmls_n<mode>"
3551 [(match_operand:VMD 0 "s_register_operand" "")
3552 (match_operand:VMD 1 "s_register_operand" "")
3553 (match_operand:VMD 2 "s_register_operand" "")
3554 (match_operand:<V_elem> 3 "s_register_operand" "")]
3555 "TARGET_NEON"
3556 {
3557 rtx tmp = gen_reg_rtx (<MODE>mode);
3558 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3559 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3560 tmp, const0_rtx));
3561 DONE;
3562 })
3563
3564 (define_expand "neon_vmls_n<mode>"
3565 [(match_operand:VMQ 0 "s_register_operand" "")
3566 (match_operand:VMQ 1 "s_register_operand" "")
3567 (match_operand:VMQ 2 "s_register_operand" "")
3568 (match_operand:<V_elem> 3 "s_register_operand" "")]
3569 "TARGET_NEON"
3570 {
3571 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3572 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3573 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3574 tmp, const0_rtx));
3575 DONE;
3576 })
3577
3578 (define_expand "neon_vmlsls_n<mode>"
3579 [(match_operand:<V_widen> 0 "s_register_operand" "")
3580 (match_operand:<V_widen> 1 "s_register_operand" "")
3581 (match_operand:VMDI 2 "s_register_operand" "")
3582 (match_operand:<V_elem> 3 "s_register_operand" "")]
3583 "TARGET_NEON"
3584 {
3585 rtx tmp = gen_reg_rtx (<MODE>mode);
3586 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3587 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3588 tmp, const0_rtx));
3589 DONE;
3590 })
3591
3592 (define_expand "neon_vmlslu_n<mode>"
3593 [(match_operand:<V_widen> 0 "s_register_operand" "")
3594 (match_operand:<V_widen> 1 "s_register_operand" "")
3595 (match_operand:VMDI 2 "s_register_operand" "")
3596 (match_operand:<V_elem> 3 "s_register_operand" "")]
3597 "TARGET_NEON"
3598 {
3599 rtx tmp = gen_reg_rtx (<MODE>mode);
3600 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3601 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3602 tmp, const0_rtx));
3603 DONE;
3604 })
3605
3606 (define_expand "neon_vqdmlsl_n<mode>"
3607 [(match_operand:<V_widen> 0 "s_register_operand" "")
3608 (match_operand:<V_widen> 1 "s_register_operand" "")
3609 (match_operand:VMDI 2 "s_register_operand" "")
3610 (match_operand:<V_elem> 3 "s_register_operand" "")]
3611 "TARGET_NEON"
3612 {
3613 rtx tmp = gen_reg_rtx (<MODE>mode);
3614 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3615 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3616 tmp, const0_rtx));
3617 DONE;
3618 })
3619
3620 (define_insn "neon_vext<mode>"
3621 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3622 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3623 (match_operand:VDQX 2 "s_register_operand" "w")
3624 (match_operand:SI 3 "immediate_operand" "i")]
3625 UNSPEC_VEXT))]
3626 "TARGET_NEON"
3627 {
3628 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3629 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3630 }
3631 [(set_attr "type" "neon_ext<q>")]
3632 )
3633
3634 (define_insn "neon_vrev64<mode>"
3635 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3636 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3637 UNSPEC_VREV64))]
3638 "TARGET_NEON"
3639 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3640 [(set_attr "type" "neon_rev<q>")]
3641 )
3642
3643 (define_insn "neon_vrev32<mode>"
3644 [(set (match_operand:VX 0 "s_register_operand" "=w")
3645 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3646 UNSPEC_VREV32))]
3647 "TARGET_NEON"
3648 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3649 [(set_attr "type" "neon_rev<q>")]
3650 )
3651
3652 (define_insn "neon_vrev16<mode>"
3653 [(set (match_operand:VE 0 "s_register_operand" "=w")
3654 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3655 UNSPEC_VREV16))]
3656 "TARGET_NEON"
3657 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3658 [(set_attr "type" "neon_rev<q>")]
3659 )
3660
3661 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3662 ; allocation. For an intrinsic of form:
3663 ; rD = vbsl_* (rS, rN, rM)
3664 ; We can use any of:
3665 ; vbsl rS, rN, rM (if D = S)
3666 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3667 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3668
3669 (define_insn "neon_vbsl<mode>_internal"
3670 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3671 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3672 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3673 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3674 UNSPEC_VBSL))]
3675 "TARGET_NEON"
3676 "@
3677 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3678 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3679 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3680 [(set_attr "type" "neon_bsl<q>")]
3681 )
3682
3683 (define_expand "neon_vbsl<mode>"
3684 [(set (match_operand:VDQX 0 "s_register_operand" "")
3685 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3686 (match_operand:VDQX 2 "s_register_operand" "")
3687 (match_operand:VDQX 3 "s_register_operand" "")]
3688 UNSPEC_VBSL))]
3689 "TARGET_NEON"
3690 {
3691 /* We can't alias operands together if they have different modes. */
3692 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3693 })
3694
3695 ;; vshl, vrshl
3696 (define_insn "neon_v<shift_op><sup><mode>"
3697 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3698 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3699 (match_operand:VDQIX 2 "s_register_operand" "w")]
3700 VSHL))]
3701 "TARGET_NEON"
3702 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3703 [(set_attr "type" "neon_shift_imm<q>")]
3704 )
3705
3706 ;; vqshl, vqrshl
3707 (define_insn "neon_v<shift_op><sup><mode>"
3708 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3709 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3710 (match_operand:VDQIX 2 "s_register_operand" "w")]
3711 VQSHL))]
3712 "TARGET_NEON"
3713 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3714 [(set_attr "type" "neon_sat_shift_imm<q>")]
3715 )
3716
3717 ;; vshr_n, vrshr_n
3718 (define_insn "neon_v<shift_op><sup>_n<mode>"
3719 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3720 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3721 (match_operand:SI 2 "immediate_operand" "i")]
3722 VSHR_N))]
3723 "TARGET_NEON"
3724 {
3725 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3726 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3727 }
3728 [(set_attr "type" "neon_shift_imm<q>")]
3729 )
3730
3731 ;; vshrn_n, vrshrn_n
3732 (define_insn "neon_v<shift_op>_n<mode>"
3733 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3734 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3735 (match_operand:SI 2 "immediate_operand" "i")]
3736 VSHRN_N))]
3737 "TARGET_NEON"
3738 {
3739 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3740 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3741 }
3742 [(set_attr "type" "neon_shift_imm_narrow_q")]
3743 )
3744
3745 ;; vqshrn_n, vqrshrn_n
3746 (define_insn "neon_v<shift_op><sup>_n<mode>"
3747 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3748 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3749 (match_operand:SI 2 "immediate_operand" "i")]
3750 VQSHRN_N))]
3751 "TARGET_NEON"
3752 {
3753 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3754 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3755 }
3756 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3757 )
3758
3759 ;; vqshrun_n, vqrshrun_n
3760 (define_insn "neon_v<shift_op>_n<mode>"
3761 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3762 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3763 (match_operand:SI 2 "immediate_operand" "i")]
3764 VQSHRUN_N))]
3765 "TARGET_NEON"
3766 {
3767 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3768 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3769 }
3770 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3771 )
3772
3773 (define_insn "neon_vshl_n<mode>"
3774 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3775 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3776 (match_operand:SI 2 "immediate_operand" "i")]
3777 UNSPEC_VSHL_N))]
3778 "TARGET_NEON"
3779 {
3780 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3781 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3782 }
3783 [(set_attr "type" "neon_shift_imm<q>")]
3784 )
3785
3786 (define_insn "neon_vqshl_<sup>_n<mode>"
3787 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3788 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3789 (match_operand:SI 2 "immediate_operand" "i")]
3790 VQSHL_N))]
3791 "TARGET_NEON"
3792 {
3793 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3794 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3795 }
3796 [(set_attr "type" "neon_sat_shift_imm<q>")]
3797 )
3798
3799 (define_insn "neon_vqshlu_n<mode>"
3800 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3801 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3802 (match_operand:SI 2 "immediate_operand" "i")]
3803 UNSPEC_VQSHLU_N))]
3804 "TARGET_NEON"
3805 {
3806 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3807 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3808 }
3809 [(set_attr "type" "neon_sat_shift_imm<q>")]
3810 )
3811
3812 (define_insn "neon_vshll<sup>_n<mode>"
3813 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3814 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3815 (match_operand:SI 2 "immediate_operand" "i")]
3816 VSHLL_N))]
3817 "TARGET_NEON"
3818 {
3819 /* The boundaries are: 0 < imm <= size. */
3820 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3821 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3822 }
3823 [(set_attr "type" "neon_shift_imm_long")]
3824 )
3825
3826 ;; vsra_n, vrsra_n
3827 (define_insn "neon_v<shift_op><sup>_n<mode>"
3828 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3829 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3830 (match_operand:VDQIX 2 "s_register_operand" "w")
3831 (match_operand:SI 3 "immediate_operand" "i")]
3832 VSRA_N))]
3833 "TARGET_NEON"
3834 {
3835 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3836 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3837 }
3838 [(set_attr "type" "neon_shift_acc<q>")]
3839 )
3840
3841 (define_insn "neon_vsri_n<mode>"
3842 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3843 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3844 (match_operand:VDQIX 2 "s_register_operand" "w")
3845 (match_operand:SI 3 "immediate_operand" "i")]
3846 UNSPEC_VSRI))]
3847 "TARGET_NEON"
3848 {
3849 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3850 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3851 }
3852 [(set_attr "type" "neon_shift_reg<q>")]
3853 )
3854
3855 (define_insn "neon_vsli_n<mode>"
3856 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3857 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3858 (match_operand:VDQIX 2 "s_register_operand" "w")
3859 (match_operand:SI 3 "immediate_operand" "i")]
3860 UNSPEC_VSLI))]
3861 "TARGET_NEON"
3862 {
3863 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3864 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3865 }
3866 [(set_attr "type" "neon_shift_reg<q>")]
3867 )
3868
3869 (define_insn "neon_vtbl1v8qi"
3870 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3871 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3872 (match_operand:V8QI 2 "s_register_operand" "w")]
3873 UNSPEC_VTBL))]
3874 "TARGET_NEON"
3875 "vtbl.8\t%P0, {%P1}, %P2"
3876 [(set_attr "type" "neon_tbl1")]
3877 )
3878
3879 (define_insn "neon_vtbl2v8qi"
3880 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3881 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3882 (match_operand:V8QI 2 "s_register_operand" "w")]
3883 UNSPEC_VTBL))]
3884 "TARGET_NEON"
3885 {
3886 rtx ops[4];
3887 int tabbase = REGNO (operands[1]);
3888
3889 ops[0] = operands[0];
3890 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3891 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3892 ops[3] = operands[2];
3893 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3894
3895 return "";
3896 }
3897 [(set_attr "type" "neon_tbl2")]
3898 )
3899
3900 (define_insn "neon_vtbl3v8qi"
3901 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3902 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3903 (match_operand:V8QI 2 "s_register_operand" "w")]
3904 UNSPEC_VTBL))]
3905 "TARGET_NEON"
3906 {
3907 rtx ops[5];
3908 int tabbase = REGNO (operands[1]);
3909
3910 ops[0] = operands[0];
3911 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3912 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3913 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3914 ops[4] = operands[2];
3915 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3916
3917 return "";
3918 }
3919 [(set_attr "type" "neon_tbl3")]
3920 )
3921
3922 (define_insn "neon_vtbl4v8qi"
3923 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3924 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3925 (match_operand:V8QI 2 "s_register_operand" "w")]
3926 UNSPEC_VTBL))]
3927 "TARGET_NEON"
3928 {
3929 rtx ops[6];
3930 int tabbase = REGNO (operands[1]);
3931
3932 ops[0] = operands[0];
3933 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3934 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3935 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3936 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3937 ops[5] = operands[2];
3938 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3939
3940 return "";
3941 }
3942 [(set_attr "type" "neon_tbl4")]
3943 )
3944
3945 ;; These three are used by the vec_perm infrastructure for V16QImode.
3946 (define_insn_and_split "neon_vtbl1v16qi"
3947 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3948 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3949 (match_operand:V16QI 2 "s_register_operand" "w")]
3950 UNSPEC_VTBL))]
3951 "TARGET_NEON"
3952 "#"
3953 "&& reload_completed"
3954 [(const_int 0)]
3955 {
3956 rtx op0, op1, op2, part0, part2;
3957 unsigned ofs;
3958
3959 op0 = operands[0];
3960 op1 = gen_lowpart (TImode, operands[1]);
3961 op2 = operands[2];
3962
3963 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3964 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3965 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3966 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3967
3968 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3969 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3970 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3971 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3972 DONE;
3973 }
3974 [(set_attr "type" "multiple")]
3975 )
3976
3977 (define_insn_and_split "neon_vtbl2v16qi"
3978 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3979 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3980 (match_operand:V16QI 2 "s_register_operand" "w")]
3981 UNSPEC_VTBL))]
3982 "TARGET_NEON"
3983 "#"
3984 "&& reload_completed"
3985 [(const_int 0)]
3986 {
3987 rtx op0, op1, op2, part0, part2;
3988 unsigned ofs;
3989
3990 op0 = operands[0];
3991 op1 = operands[1];
3992 op2 = operands[2];
3993
3994 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3995 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3996 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3997 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3998
3999 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4000 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4001 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4002 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4003 DONE;
4004 }
4005 [(set_attr "type" "multiple")]
4006 )
4007
4008 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4009 ;; handle quad-word input modes, producing octa-word output modes. But
4010 ;; that requires us to add support for octa-word vector modes in moves.
4011 ;; That seems overkill for this one use in vec_perm.
4012 (define_insn_and_split "neon_vcombinev16qi"
4013 [(set (match_operand:OI 0 "s_register_operand" "=w")
4014 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4015 (match_operand:V16QI 2 "s_register_operand" "w")]
4016 UNSPEC_VCONCAT))]
4017 "TARGET_NEON"
4018 "#"
4019 "&& reload_completed"
4020 [(const_int 0)]
4021 {
4022 neon_split_vcombine (operands);
4023 DONE;
4024 }
4025 [(set_attr "type" "multiple")]
4026 )
4027
4028 (define_insn "neon_vtbx1v8qi"
4029 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4030 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4031 (match_operand:V8QI 2 "s_register_operand" "w")
4032 (match_operand:V8QI 3 "s_register_operand" "w")]
4033 UNSPEC_VTBX))]
4034 "TARGET_NEON"
4035 "vtbx.8\t%P0, {%P2}, %P3"
4036 [(set_attr "type" "neon_tbl1")]
4037 )
4038
4039 (define_insn "neon_vtbx2v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:TI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4044 UNSPEC_VTBX))]
4045 "TARGET_NEON"
4046 {
4047 rtx ops[4];
4048 int tabbase = REGNO (operands[2]);
4049
4050 ops[0] = operands[0];
4051 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4052 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4053 ops[3] = operands[3];
4054 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4055
4056 return "";
4057 }
4058 [(set_attr "type" "neon_tbl2")]
4059 )
4060
4061 (define_insn "neon_vtbx3v8qi"
4062 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4063 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4064 (match_operand:EI 2 "s_register_operand" "w")
4065 (match_operand:V8QI 3 "s_register_operand" "w")]
4066 UNSPEC_VTBX))]
4067 "TARGET_NEON"
4068 {
4069 rtx ops[5];
4070 int tabbase = REGNO (operands[2]);
4071
4072 ops[0] = operands[0];
4073 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4074 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4075 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4076 ops[4] = operands[3];
4077 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4078
4079 return "";
4080 }
4081 [(set_attr "type" "neon_tbl3")]
4082 )
4083
4084 (define_insn "neon_vtbx4v8qi"
4085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4086 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4087 (match_operand:OI 2 "s_register_operand" "w")
4088 (match_operand:V8QI 3 "s_register_operand" "w")]
4089 UNSPEC_VTBX))]
4090 "TARGET_NEON"
4091 {
4092 rtx ops[6];
4093 int tabbase = REGNO (operands[2]);
4094
4095 ops[0] = operands[0];
4096 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4097 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4098 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4099 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4100 ops[5] = operands[3];
4101 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4102
4103 return "";
4104 }
4105 [(set_attr "type" "neon_tbl4")]
4106 )
4107
4108 (define_expand "neon_vtrn<mode>_internal"
4109 [(parallel
4110 [(set (match_operand:VDQW 0 "s_register_operand" "")
4111 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4112 (match_operand:VDQW 2 "s_register_operand" "")]
4113 UNSPEC_VTRN1))
4114 (set (match_operand:VDQW 3 "s_register_operand" "")
4115 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4116 "TARGET_NEON"
4117 ""
4118 )
4119
4120 ;; Note: Different operand numbering to handle tied registers correctly.
4121 (define_insn "*neon_vtrn<mode>_insn"
4122 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4123 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4124 (match_operand:VDQW 3 "s_register_operand" "2")]
4125 UNSPEC_VTRN1))
4126 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4127 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4128 UNSPEC_VTRN2))]
4129 "TARGET_NEON"
4130 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4131 [(set_attr "type" "neon_permute<q>")]
4132 )
4133
4134 (define_expand "neon_vzip<mode>_internal"
4135 [(parallel
4136 [(set (match_operand:VDQW 0 "s_register_operand" "")
4137 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4138 (match_operand:VDQW 2 "s_register_operand" "")]
4139 UNSPEC_VZIP1))
4140 (set (match_operand:VDQW 3 "s_register_operand" "")
4141 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4142 "TARGET_NEON"
4143 ""
4144 )
4145
4146 ;; Note: Different operand numbering to handle tied registers correctly.
4147 (define_insn "*neon_vzip<mode>_insn"
4148 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4149 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4150 (match_operand:VDQW 3 "s_register_operand" "2")]
4151 UNSPEC_VZIP1))
4152 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4153 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4154 UNSPEC_VZIP2))]
4155 "TARGET_NEON"
4156 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4157 [(set_attr "type" "neon_zip<q>")]
4158 )
4159
4160 (define_expand "neon_vuzp<mode>_internal"
4161 [(parallel
4162 [(set (match_operand:VDQW 0 "s_register_operand" "")
4163 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4164 (match_operand:VDQW 2 "s_register_operand" "")]
4165 UNSPEC_VUZP1))
4166 (set (match_operand:VDQW 3 "s_register_operand" "")
4167 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4168 "TARGET_NEON"
4169 ""
4170 )
4171
4172 ;; Note: Different operand numbering to handle tied registers correctly.
4173 (define_insn "*neon_vuzp<mode>_insn"
4174 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4175 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4176 (match_operand:VDQW 3 "s_register_operand" "2")]
4177 UNSPEC_VUZP1))
4178 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4179 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4180 UNSPEC_VUZP2))]
4181 "TARGET_NEON"
4182 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4183 [(set_attr "type" "neon_zip<q>")]
4184 )
4185
4186 (define_expand "neon_vreinterpretv8qi<mode>"
4187 [(match_operand:V8QI 0 "s_register_operand" "")
4188 (match_operand:VD_RE 1 "s_register_operand" "")]
4189 "TARGET_NEON"
4190 {
4191 neon_reinterpret (operands[0], operands[1]);
4192 DONE;
4193 })
4194
4195 (define_expand "neon_vreinterpretv4hi<mode>"
4196 [(match_operand:V4HI 0 "s_register_operand" "")
4197 (match_operand:VD_RE 1 "s_register_operand" "")]
4198 "TARGET_NEON"
4199 {
4200 neon_reinterpret (operands[0], operands[1]);
4201 DONE;
4202 })
4203
4204 (define_expand "neon_vreinterpretv2si<mode>"
4205 [(match_operand:V2SI 0 "s_register_operand" "")
4206 (match_operand:VD_RE 1 "s_register_operand" "")]
4207 "TARGET_NEON"
4208 {
4209 neon_reinterpret (operands[0], operands[1]);
4210 DONE;
4211 })
4212
4213 (define_expand "neon_vreinterpretv2sf<mode>"
4214 [(match_operand:V2SF 0 "s_register_operand" "")
4215 (match_operand:VD_RE 1 "s_register_operand" "")]
4216 "TARGET_NEON"
4217 {
4218 neon_reinterpret (operands[0], operands[1]);
4219 DONE;
4220 })
4221
4222 (define_expand "neon_vreinterpretdi<mode>"
4223 [(match_operand:DI 0 "s_register_operand" "")
4224 (match_operand:VD_RE 1 "s_register_operand" "")]
4225 "TARGET_NEON"
4226 {
4227 neon_reinterpret (operands[0], operands[1]);
4228 DONE;
4229 })
4230
4231 (define_expand "neon_vreinterpretti<mode>"
4232 [(match_operand:TI 0 "s_register_operand" "")
4233 (match_operand:VQXMOV 1 "s_register_operand" "")]
4234 "TARGET_NEON"
4235 {
4236 neon_reinterpret (operands[0], operands[1]);
4237 DONE;
4238 })
4239
4240
4241 (define_expand "neon_vreinterpretv16qi<mode>"
4242 [(match_operand:V16QI 0 "s_register_operand" "")
4243 (match_operand:VQXMOV 1 "s_register_operand" "")]
4244 "TARGET_NEON"
4245 {
4246 neon_reinterpret (operands[0], operands[1]);
4247 DONE;
4248 })
4249
4250 (define_expand "neon_vreinterpretv8hi<mode>"
4251 [(match_operand:V8HI 0 "s_register_operand" "")
4252 (match_operand:VQXMOV 1 "s_register_operand" "")]
4253 "TARGET_NEON"
4254 {
4255 neon_reinterpret (operands[0], operands[1]);
4256 DONE;
4257 })
4258
4259 (define_expand "neon_vreinterpretv4si<mode>"
4260 [(match_operand:V4SI 0 "s_register_operand" "")
4261 (match_operand:VQXMOV 1 "s_register_operand" "")]
4262 "TARGET_NEON"
4263 {
4264 neon_reinterpret (operands[0], operands[1]);
4265 DONE;
4266 })
4267
4268 (define_expand "neon_vreinterpretv4sf<mode>"
4269 [(match_operand:V4SF 0 "s_register_operand" "")
4270 (match_operand:VQXMOV 1 "s_register_operand" "")]
4271 "TARGET_NEON"
4272 {
4273 neon_reinterpret (operands[0], operands[1]);
4274 DONE;
4275 })
4276
4277 (define_expand "neon_vreinterpretv2di<mode>"
4278 [(match_operand:V2DI 0 "s_register_operand" "")
4279 (match_operand:VQXMOV 1 "s_register_operand" "")]
4280 "TARGET_NEON"
4281 {
4282 neon_reinterpret (operands[0], operands[1]);
4283 DONE;
4284 })
4285
4286 (define_expand "vec_load_lanes<mode><mode>"
4287 [(set (match_operand:VDQX 0 "s_register_operand")
4288 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4289 UNSPEC_VLD1))]
4290 "TARGET_NEON")
4291
4292 (define_insn "neon_vld1<mode>"
4293 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4294 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4295 UNSPEC_VLD1))]
4296 "TARGET_NEON"
4297 "vld1.<V_sz_elem>\t%h0, %A1"
4298 [(set_attr "type" "neon_load1_1reg<q>")]
4299 )
4300
4301 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4302 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4303 ;; lane order here.
4304 (define_insn "neon_vld1_lane<mode>"
4305 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4306 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4307 (match_operand:VDX 2 "s_register_operand" "0")
4308 (match_operand:SI 3 "immediate_operand" "i")]
4309 UNSPEC_VLD1_LANE))]
4310 "TARGET_NEON"
4311 {
4312 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4313 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4314 operands[3] = GEN_INT (lane);
4315 if (max == 1)
4316 return "vld1.<V_sz_elem>\t%P0, %A1";
4317 else
4318 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4319 }
4320 [(set_attr "type" "neon_load1_one_lane<q>")]
4321 )
4322
4323 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4324 ;; here on big endian targets.
4325 (define_insn "neon_vld1_lane<mode>"
4326 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4327 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4328 (match_operand:VQX 2 "s_register_operand" "0")
4329 (match_operand:SI 3 "immediate_operand" "i")]
4330 UNSPEC_VLD1_LANE))]
4331 "TARGET_NEON"
4332 {
4333 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4334 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4335 operands[3] = GEN_INT (lane);
4336 int regno = REGNO (operands[0]);
4337 if (lane >= max / 2)
4338 {
4339 lane -= max / 2;
4340 regno += 2;
4341 operands[3] = GEN_INT (lane);
4342 }
4343 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4344 if (max == 2)
4345 return "vld1.<V_sz_elem>\t%P0, %A1";
4346 else
4347 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4348 }
4349 [(set_attr "type" "neon_load1_one_lane<q>")]
4350 )
4351
4352 (define_insn "neon_vld1_dup<mode>"
4353 [(set (match_operand:VD 0 "s_register_operand" "=w")
4354 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4355 "TARGET_NEON"
4356 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4357 [(set_attr "type" "neon_load1_all_lanes<q>")]
4358 )
4359
4360 ;; Special case for DImode. Treat it exactly like a simple load.
4361 (define_expand "neon_vld1_dupdi"
4362 [(set (match_operand:DI 0 "s_register_operand" "")
4363 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4364 UNSPEC_VLD1))]
4365 "TARGET_NEON"
4366 ""
4367 )
4368
4369 (define_insn "neon_vld1_dup<mode>"
4370 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4371 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4372 "TARGET_NEON"
4373 {
4374 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4375 }
4376 [(set_attr "type" "neon_load1_all_lanes<q>")]
4377 )
4378
4379 (define_insn_and_split "neon_vld1_dupv2di"
4380 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4381 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4382 "TARGET_NEON"
4383 "#"
4384 "&& reload_completed"
4385 [(const_int 0)]
4386 {
4387 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4388 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4389 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4390 DONE;
4391 }
4392 [(set_attr "length" "8")
4393 (set_attr "type" "neon_load1_all_lanes_q")]
4394 )
4395
4396 (define_expand "vec_store_lanes<mode><mode>"
4397 [(set (match_operand:VDQX 0 "neon_struct_operand")
4398 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4399 UNSPEC_VST1))]
4400 "TARGET_NEON")
4401
4402 (define_insn "neon_vst1<mode>"
4403 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4404 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4405 UNSPEC_VST1))]
4406 "TARGET_NEON"
4407 "vst1.<V_sz_elem>\t%h1, %A0"
4408 [(set_attr "type" "neon_store1_1reg<q>")])
4409
4410 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4411 ;; here on big endian targets.
4412 (define_insn "neon_vst1_lane<mode>"
4413 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4414 (unspec:<V_elem>
4415 [(match_operand:VDX 1 "s_register_operand" "w")
4416 (match_operand:SI 2 "immediate_operand" "i")]
4417 UNSPEC_VST1_LANE))]
4418 "TARGET_NEON"
4419 {
4420 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4421 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4422 operands[2] = GEN_INT (lane);
4423 if (max == 1)
4424 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4425 else
4426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4427 }
4428 [(set_attr "type" "neon_store1_one_lane<q>")]
4429 )
4430
4431 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4432 ;; here on big endian targets.
4433 (define_insn "neon_vst1_lane<mode>"
4434 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4435 (unspec:<V_elem>
4436 [(match_operand:VQX 1 "s_register_operand" "w")
4437 (match_operand:SI 2 "immediate_operand" "i")]
4438 UNSPEC_VST1_LANE))]
4439 "TARGET_NEON"
4440 {
4441 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4442 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4443 int regno = REGNO (operands[1]);
4444 if (lane >= max / 2)
4445 {
4446 lane -= max / 2;
4447 regno += 2;
4448 }
4449 operands[2] = GEN_INT (lane);
4450 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4451 if (max == 2)
4452 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4453 else
4454 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4455 }
4456 [(set_attr "type" "neon_store1_one_lane<q>")]
4457 )
4458
4459 (define_expand "vec_load_lanesti<mode>"
4460 [(set (match_operand:TI 0 "s_register_operand")
4461 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4462 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4463 UNSPEC_VLD2))]
4464 "TARGET_NEON")
4465
4466 (define_insn "neon_vld2<mode>"
4467 [(set (match_operand:TI 0 "s_register_operand" "=w")
4468 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4469 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4470 UNSPEC_VLD2))]
4471 "TARGET_NEON"
4472 {
4473 if (<V_sz_elem> == 64)
4474 return "vld1.64\t%h0, %A1";
4475 else
4476 return "vld2.<V_sz_elem>\t%h0, %A1";
4477 }
4478 [(set (attr "type")
4479 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4480 (const_string "neon_load1_2reg<q>")
4481 (const_string "neon_load2_2reg<q>")))]
4482 )
4483
4484 (define_expand "vec_load_lanesoi<mode>"
4485 [(set (match_operand:OI 0 "s_register_operand")
4486 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4487 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4488 UNSPEC_VLD2))]
4489 "TARGET_NEON")
4490
4491 (define_insn "neon_vld2<mode>"
4492 [(set (match_operand:OI 0 "s_register_operand" "=w")
4493 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4494 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4495 UNSPEC_VLD2))]
4496 "TARGET_NEON"
4497 "vld2.<V_sz_elem>\t%h0, %A1"
4498 [(set_attr "type" "neon_load2_2reg_q")])
4499
4500 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4501 ;; here on big endian targets.
4502 (define_insn "neon_vld2_lane<mode>"
4503 [(set (match_operand:TI 0 "s_register_operand" "=w")
4504 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4505 (match_operand:TI 2 "s_register_operand" "0")
4506 (match_operand:SI 3 "immediate_operand" "i")
4507 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4508 UNSPEC_VLD2_LANE))]
4509 "TARGET_NEON"
4510 {
4511 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4512 int regno = REGNO (operands[0]);
4513 rtx ops[4];
4514 ops[0] = gen_rtx_REG (DImode, regno);
4515 ops[1] = gen_rtx_REG (DImode, regno + 2);
4516 ops[2] = operands[1];
4517 ops[3] = GEN_INT (lane);
4518 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4519 return "";
4520 }
4521 [(set_attr "type" "neon_load2_one_lane<q>")]
4522 )
4523
4524 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4525 ;; here on big endian targets.
4526 (define_insn "neon_vld2_lane<mode>"
4527 [(set (match_operand:OI 0 "s_register_operand" "=w")
4528 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4529 (match_operand:OI 2 "s_register_operand" "0")
4530 (match_operand:SI 3 "immediate_operand" "i")
4531 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4532 UNSPEC_VLD2_LANE))]
4533 "TARGET_NEON"
4534 {
4535 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4536 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4537 int regno = REGNO (operands[0]);
4538 rtx ops[4];
4539 if (lane >= max / 2)
4540 {
4541 lane -= max / 2;
4542 regno += 2;
4543 }
4544 ops[0] = gen_rtx_REG (DImode, regno);
4545 ops[1] = gen_rtx_REG (DImode, regno + 4);
4546 ops[2] = operands[1];
4547 ops[3] = GEN_INT (lane);
4548 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4549 return "";
4550 }
4551 [(set_attr "type" "neon_load2_one_lane<q>")]
4552 )
4553
4554 (define_insn "neon_vld2_dup<mode>"
4555 [(set (match_operand:TI 0 "s_register_operand" "=w")
4556 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4557 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4558 UNSPEC_VLD2_DUP))]
4559 "TARGET_NEON"
4560 {
4561 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4562 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4563 else
4564 return "vld1.<V_sz_elem>\t%h0, %A1";
4565 }
4566 [(set (attr "type")
4567 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4568 (const_string "neon_load2_all_lanes<q>")
4569 (const_string "neon_load1_1reg<q>")))]
4570 )
4571
4572 (define_expand "vec_store_lanesti<mode>"
4573 [(set (match_operand:TI 0 "neon_struct_operand")
4574 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4575 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4576 UNSPEC_VST2))]
4577 "TARGET_NEON")
4578
4579 (define_insn "neon_vst2<mode>"
4580 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4581 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4582 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4583 UNSPEC_VST2))]
4584 "TARGET_NEON"
4585 {
4586 if (<V_sz_elem> == 64)
4587 return "vst1.64\t%h1, %A0";
4588 else
4589 return "vst2.<V_sz_elem>\t%h1, %A0";
4590 }
4591 [(set (attr "type")
4592 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4593 (const_string "neon_store1_2reg<q>")
4594 (const_string "neon_store2_one_lane<q>")))]
4595 )
4596
4597 (define_expand "vec_store_lanesoi<mode>"
4598 [(set (match_operand:OI 0 "neon_struct_operand")
4599 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4600 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4601 UNSPEC_VST2))]
4602 "TARGET_NEON")
4603
4604 (define_insn "neon_vst2<mode>"
4605 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4606 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4607 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4608 UNSPEC_VST2))]
4609 "TARGET_NEON"
4610 "vst2.<V_sz_elem>\t%h1, %A0"
4611 [(set_attr "type" "neon_store2_4reg<q>")]
4612 )
4613
4614 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4615 ;; here on big endian targets.
4616 (define_insn "neon_vst2_lane<mode>"
4617 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4618 (unspec:<V_two_elem>
4619 [(match_operand:TI 1 "s_register_operand" "w")
4620 (match_operand:SI 2 "immediate_operand" "i")
4621 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4622 UNSPEC_VST2_LANE))]
4623 "TARGET_NEON"
4624 {
4625 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4626 int regno = REGNO (operands[1]);
4627 rtx ops[4];
4628 ops[0] = operands[0];
4629 ops[1] = gen_rtx_REG (DImode, regno);
4630 ops[2] = gen_rtx_REG (DImode, regno + 2);
4631 ops[3] = GEN_INT (lane);
4632 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4633 return "";
4634 }
4635 [(set_attr "type" "neon_store2_one_lane<q>")]
4636 )
4637
4638 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4639 ;; here on big endian targets.
4640 (define_insn "neon_vst2_lane<mode>"
4641 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4642 (unspec:<V_two_elem>
4643 [(match_operand:OI 1 "s_register_operand" "w")
4644 (match_operand:SI 2 "immediate_operand" "i")
4645 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4646 UNSPEC_VST2_LANE))]
4647 "TARGET_NEON"
4648 {
4649 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4650 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4651 int regno = REGNO (operands[1]);
4652 rtx ops[4];
4653 if (lane >= max / 2)
4654 {
4655 lane -= max / 2;
4656 regno += 2;
4657 }
4658 ops[0] = operands[0];
4659 ops[1] = gen_rtx_REG (DImode, regno);
4660 ops[2] = gen_rtx_REG (DImode, regno + 4);
4661 ops[3] = GEN_INT (lane);
4662 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4663 return "";
4664 }
4665 [(set_attr "type" "neon_store2_one_lane<q>")]
4666 )
4667
4668 (define_expand "vec_load_lanesei<mode>"
4669 [(set (match_operand:EI 0 "s_register_operand")
4670 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4671 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4672 UNSPEC_VLD3))]
4673 "TARGET_NEON")
4674
4675 (define_insn "neon_vld3<mode>"
4676 [(set (match_operand:EI 0 "s_register_operand" "=w")
4677 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4678 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4679 UNSPEC_VLD3))]
4680 "TARGET_NEON"
4681 {
4682 if (<V_sz_elem> == 64)
4683 return "vld1.64\t%h0, %A1";
4684 else
4685 return "vld3.<V_sz_elem>\t%h0, %A1";
4686 }
4687 [(set (attr "type")
4688 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4689 (const_string "neon_load1_3reg<q>")
4690 (const_string "neon_load3_3reg<q>")))]
4691 )
4692
4693 (define_expand "vec_load_lanesci<mode>"
4694 [(match_operand:CI 0 "s_register_operand")
4695 (match_operand:CI 1 "neon_struct_operand")
4696 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4697 "TARGET_NEON"
4698 {
4699 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4700 DONE;
4701 })
4702
4703 (define_expand "neon_vld3<mode>"
4704 [(match_operand:CI 0 "s_register_operand")
4705 (match_operand:CI 1 "neon_struct_operand")
4706 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4707 "TARGET_NEON"
4708 {
4709 rtx mem;
4710
4711 mem = adjust_address (operands[1], EImode, 0);
4712 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4713 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4714 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4715 DONE;
4716 })
4717
4718 (define_insn "neon_vld3qa<mode>"
4719 [(set (match_operand:CI 0 "s_register_operand" "=w")
4720 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4721 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722 UNSPEC_VLD3A))]
4723 "TARGET_NEON"
4724 {
4725 int regno = REGNO (operands[0]);
4726 rtx ops[4];
4727 ops[0] = gen_rtx_REG (DImode, regno);
4728 ops[1] = gen_rtx_REG (DImode, regno + 4);
4729 ops[2] = gen_rtx_REG (DImode, regno + 8);
4730 ops[3] = operands[1];
4731 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4732 return "";
4733 }
4734 [(set_attr "type" "neon_load3_3reg<q>")]
4735 )
4736
4737 (define_insn "neon_vld3qb<mode>"
4738 [(set (match_operand:CI 0 "s_register_operand" "=w")
4739 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4740 (match_operand:CI 2 "s_register_operand" "0")
4741 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4742 UNSPEC_VLD3B))]
4743 "TARGET_NEON"
4744 {
4745 int regno = REGNO (operands[0]);
4746 rtx ops[4];
4747 ops[0] = gen_rtx_REG (DImode, regno + 2);
4748 ops[1] = gen_rtx_REG (DImode, regno + 6);
4749 ops[2] = gen_rtx_REG (DImode, regno + 10);
4750 ops[3] = operands[1];
4751 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4752 return "";
4753 }
4754 [(set_attr "type" "neon_load3_3reg<q>")]
4755 )
4756
4757 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4758 ;; here on big endian targets.
4759 (define_insn "neon_vld3_lane<mode>"
4760 [(set (match_operand:EI 0 "s_register_operand" "=w")
4761 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4762 (match_operand:EI 2 "s_register_operand" "0")
4763 (match_operand:SI 3 "immediate_operand" "i")
4764 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765 UNSPEC_VLD3_LANE))]
4766 "TARGET_NEON"
4767 {
4768 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4769 int regno = REGNO (operands[0]);
4770 rtx ops[5];
4771 ops[0] = gen_rtx_REG (DImode, regno);
4772 ops[1] = gen_rtx_REG (DImode, regno + 2);
4773 ops[2] = gen_rtx_REG (DImode, regno + 4);
4774 ops[3] = operands[1];
4775 ops[4] = GEN_INT (lane);
4776 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4777 ops);
4778 return "";
4779 }
4780 [(set_attr "type" "neon_load3_one_lane<q>")]
4781 )
4782
4783 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4784 ;; here on big endian targets.
4785 (define_insn "neon_vld3_lane<mode>"
4786 [(set (match_operand:CI 0 "s_register_operand" "=w")
4787 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4788 (match_operand:CI 2 "s_register_operand" "0")
4789 (match_operand:SI 3 "immediate_operand" "i")
4790 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4791 UNSPEC_VLD3_LANE))]
4792 "TARGET_NEON"
4793 {
4794 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4795 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4796 int regno = REGNO (operands[0]);
4797 rtx ops[5];
4798 if (lane >= max / 2)
4799 {
4800 lane -= max / 2;
4801 regno += 2;
4802 }
4803 ops[0] = gen_rtx_REG (DImode, regno);
4804 ops[1] = gen_rtx_REG (DImode, regno + 4);
4805 ops[2] = gen_rtx_REG (DImode, regno + 8);
4806 ops[3] = operands[1];
4807 ops[4] = GEN_INT (lane);
4808 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4809 ops);
4810 return "";
4811 }
4812 [(set_attr "type" "neon_load3_one_lane<q>")]
4813 )
4814
4815 (define_insn "neon_vld3_dup<mode>"
4816 [(set (match_operand:EI 0 "s_register_operand" "=w")
4817 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4818 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4819 UNSPEC_VLD3_DUP))]
4820 "TARGET_NEON"
4821 {
4822 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4823 {
4824 int regno = REGNO (operands[0]);
4825 rtx ops[4];
4826 ops[0] = gen_rtx_REG (DImode, regno);
4827 ops[1] = gen_rtx_REG (DImode, regno + 2);
4828 ops[2] = gen_rtx_REG (DImode, regno + 4);
4829 ops[3] = operands[1];
4830 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4831 return "";
4832 }
4833 else
4834 return "vld1.<V_sz_elem>\t%h0, %A1";
4835 }
4836 [(set (attr "type")
4837 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4838 (const_string "neon_load3_all_lanes<q>")
4839 (const_string "neon_load1_1reg<q>")))])
4840
4841 (define_expand "vec_store_lanesei<mode>"
4842 [(set (match_operand:EI 0 "neon_struct_operand")
4843 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4844 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845 UNSPEC_VST3))]
4846 "TARGET_NEON")
4847
4848 (define_insn "neon_vst3<mode>"
4849 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4850 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4851 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4852 UNSPEC_VST3))]
4853 "TARGET_NEON"
4854 {
4855 if (<V_sz_elem> == 64)
4856 return "vst1.64\t%h1, %A0";
4857 else
4858 return "vst3.<V_sz_elem>\t%h1, %A0";
4859 }
4860 [(set (attr "type")
4861 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4862 (const_string "neon_store1_3reg<q>")
4863 (const_string "neon_store3_one_lane<q>")))])
4864
4865 (define_expand "vec_store_lanesci<mode>"
4866 [(match_operand:CI 0 "neon_struct_operand")
4867 (match_operand:CI 1 "s_register_operand")
4868 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4869 "TARGET_NEON"
4870 {
4871 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4872 DONE;
4873 })
4874
4875 (define_expand "neon_vst3<mode>"
4876 [(match_operand:CI 0 "neon_struct_operand")
4877 (match_operand:CI 1 "s_register_operand")
4878 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4879 "TARGET_NEON"
4880 {
4881 rtx mem;
4882
4883 mem = adjust_address (operands[0], EImode, 0);
4884 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4885 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4886 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4887 DONE;
4888 })
4889
4890 (define_insn "neon_vst3qa<mode>"
4891 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4892 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4893 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4894 UNSPEC_VST3A))]
4895 "TARGET_NEON"
4896 {
4897 int regno = REGNO (operands[1]);
4898 rtx ops[4];
4899 ops[0] = operands[0];
4900 ops[1] = gen_rtx_REG (DImode, regno);
4901 ops[2] = gen_rtx_REG (DImode, regno + 4);
4902 ops[3] = gen_rtx_REG (DImode, regno + 8);
4903 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4904 return "";
4905 }
4906 [(set_attr "type" "neon_store3_3reg<q>")]
4907 )
4908
4909 (define_insn "neon_vst3qb<mode>"
4910 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4911 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4912 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913 UNSPEC_VST3B))]
4914 "TARGET_NEON"
4915 {
4916 int regno = REGNO (operands[1]);
4917 rtx ops[4];
4918 ops[0] = operands[0];
4919 ops[1] = gen_rtx_REG (DImode, regno + 2);
4920 ops[2] = gen_rtx_REG (DImode, regno + 6);
4921 ops[3] = gen_rtx_REG (DImode, regno + 10);
4922 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4923 return "";
4924 }
4925 [(set_attr "type" "neon_store3_3reg<q>")]
4926 )
4927
4928 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4929 ;; here on big endian targets.
4930 (define_insn "neon_vst3_lane<mode>"
4931 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4932 (unspec:<V_three_elem>
4933 [(match_operand:EI 1 "s_register_operand" "w")
4934 (match_operand:SI 2 "immediate_operand" "i")
4935 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4936 UNSPEC_VST3_LANE))]
4937 "TARGET_NEON"
4938 {
4939 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4940 int regno = REGNO (operands[1]);
4941 rtx ops[5];
4942 ops[0] = operands[0];
4943 ops[1] = gen_rtx_REG (DImode, regno);
4944 ops[2] = gen_rtx_REG (DImode, regno + 2);
4945 ops[3] = gen_rtx_REG (DImode, regno + 4);
4946 ops[4] = GEN_INT (lane);
4947 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4948 ops);
4949 return "";
4950 }
4951 [(set_attr "type" "neon_store3_one_lane<q>")]
4952 )
4953
4954 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4955 ;; here on big endian targets.
4956 (define_insn "neon_vst3_lane<mode>"
4957 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4958 (unspec:<V_three_elem>
4959 [(match_operand:CI 1 "s_register_operand" "w")
4960 (match_operand:SI 2 "immediate_operand" "i")
4961 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4962 UNSPEC_VST3_LANE))]
4963 "TARGET_NEON"
4964 {
4965 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4966 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4967 int regno = REGNO (operands[1]);
4968 rtx ops[5];
4969 if (lane >= max / 2)
4970 {
4971 lane -= max / 2;
4972 regno += 2;
4973 }
4974 ops[0] = operands[0];
4975 ops[1] = gen_rtx_REG (DImode, regno);
4976 ops[2] = gen_rtx_REG (DImode, regno + 4);
4977 ops[3] = gen_rtx_REG (DImode, regno + 8);
4978 ops[4] = GEN_INT (lane);
4979 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4980 ops);
4981 return "";
4982 }
4983 [(set_attr "type" "neon_store3_one_lane<q>")]
4984 )
4985
4986 (define_expand "vec_load_lanesoi<mode>"
4987 [(set (match_operand:OI 0 "s_register_operand")
4988 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4989 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990 UNSPEC_VLD4))]
4991 "TARGET_NEON")
4992
4993 (define_insn "neon_vld4<mode>"
4994 [(set (match_operand:OI 0 "s_register_operand" "=w")
4995 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4996 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4997 UNSPEC_VLD4))]
4998 "TARGET_NEON"
4999 {
5000 if (<V_sz_elem> == 64)
5001 return "vld1.64\t%h0, %A1";
5002 else
5003 return "vld4.<V_sz_elem>\t%h0, %A1";
5004 }
5005 [(set (attr "type")
5006 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5007 (const_string "neon_load1_4reg<q>")
5008 (const_string "neon_load4_4reg<q>")))]
5009 )
5010
5011 (define_expand "vec_load_lanesxi<mode>"
5012 [(match_operand:XI 0 "s_register_operand")
5013 (match_operand:XI 1 "neon_struct_operand")
5014 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5015 "TARGET_NEON"
5016 {
5017 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5018 DONE;
5019 })
5020
5021 (define_expand "neon_vld4<mode>"
5022 [(match_operand:XI 0 "s_register_operand")
5023 (match_operand:XI 1 "neon_struct_operand")
5024 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5025 "TARGET_NEON"
5026 {
5027 rtx mem;
5028
5029 mem = adjust_address (operands[1], OImode, 0);
5030 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5031 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5032 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5033 DONE;
5034 })
5035
5036 (define_insn "neon_vld4qa<mode>"
5037 [(set (match_operand:XI 0 "s_register_operand" "=w")
5038 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5039 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5040 UNSPEC_VLD4A))]
5041 "TARGET_NEON"
5042 {
5043 int regno = REGNO (operands[0]);
5044 rtx ops[5];
5045 ops[0] = gen_rtx_REG (DImode, regno);
5046 ops[1] = gen_rtx_REG (DImode, regno + 4);
5047 ops[2] = gen_rtx_REG (DImode, regno + 8);
5048 ops[3] = gen_rtx_REG (DImode, regno + 12);
5049 ops[4] = operands[1];
5050 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5051 return "";
5052 }
5053 [(set_attr "type" "neon_load4_4reg<q>")]
5054 )
5055
5056 (define_insn "neon_vld4qb<mode>"
5057 [(set (match_operand:XI 0 "s_register_operand" "=w")
5058 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5059 (match_operand:XI 2 "s_register_operand" "0")
5060 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5061 UNSPEC_VLD4B))]
5062 "TARGET_NEON"
5063 {
5064 int regno = REGNO (operands[0]);
5065 rtx ops[5];
5066 ops[0] = gen_rtx_REG (DImode, regno + 2);
5067 ops[1] = gen_rtx_REG (DImode, regno + 6);
5068 ops[2] = gen_rtx_REG (DImode, regno + 10);
5069 ops[3] = gen_rtx_REG (DImode, regno + 14);
5070 ops[4] = operands[1];
5071 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5072 return "";
5073 }
5074 [(set_attr "type" "neon_load4_4reg<q>")]
5075 )
5076
5077 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5078 ;; here on big endian targets.
5079 (define_insn "neon_vld4_lane<mode>"
5080 [(set (match_operand:OI 0 "s_register_operand" "=w")
5081 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5082 (match_operand:OI 2 "s_register_operand" "0")
5083 (match_operand:SI 3 "immediate_operand" "i")
5084 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 UNSPEC_VLD4_LANE))]
5086 "TARGET_NEON"
5087 {
5088 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5089 int regno = REGNO (operands[0]);
5090 rtx ops[6];
5091 ops[0] = gen_rtx_REG (DImode, regno);
5092 ops[1] = gen_rtx_REG (DImode, regno + 2);
5093 ops[2] = gen_rtx_REG (DImode, regno + 4);
5094 ops[3] = gen_rtx_REG (DImode, regno + 6);
5095 ops[4] = operands[1];
5096 ops[5] = GEN_INT (lane);
5097 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5098 ops);
5099 return "";
5100 }
5101 [(set_attr "type" "neon_load4_one_lane<q>")]
5102 )
5103
5104 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5105 ;; here on big endian targets.
5106 (define_insn "neon_vld4_lane<mode>"
5107 [(set (match_operand:XI 0 "s_register_operand" "=w")
5108 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5109 (match_operand:XI 2 "s_register_operand" "0")
5110 (match_operand:SI 3 "immediate_operand" "i")
5111 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5112 UNSPEC_VLD4_LANE))]
5113 "TARGET_NEON"
5114 {
5115 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5116 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5117 int regno = REGNO (operands[0]);
5118 rtx ops[6];
5119 if (lane >= max / 2)
5120 {
5121 lane -= max / 2;
5122 regno += 2;
5123 }
5124 ops[0] = gen_rtx_REG (DImode, regno);
5125 ops[1] = gen_rtx_REG (DImode, regno + 4);
5126 ops[2] = gen_rtx_REG (DImode, regno + 8);
5127 ops[3] = gen_rtx_REG (DImode, regno + 12);
5128 ops[4] = operands[1];
5129 ops[5] = GEN_INT (lane);
5130 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5131 ops);
5132 return "";
5133 }
5134 [(set_attr "type" "neon_load4_one_lane<q>")]
5135 )
5136
5137 (define_insn "neon_vld4_dup<mode>"
5138 [(set (match_operand:OI 0 "s_register_operand" "=w")
5139 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5140 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5141 UNSPEC_VLD4_DUP))]
5142 "TARGET_NEON"
5143 {
5144 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5145 {
5146 int regno = REGNO (operands[0]);
5147 rtx ops[5];
5148 ops[0] = gen_rtx_REG (DImode, regno);
5149 ops[1] = gen_rtx_REG (DImode, regno + 2);
5150 ops[2] = gen_rtx_REG (DImode, regno + 4);
5151 ops[3] = gen_rtx_REG (DImode, regno + 6);
5152 ops[4] = operands[1];
5153 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5154 ops);
5155 return "";
5156 }
5157 else
5158 return "vld1.<V_sz_elem>\t%h0, %A1";
5159 }
5160 [(set (attr "type")
5161 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5162 (const_string "neon_load4_all_lanes<q>")
5163 (const_string "neon_load1_1reg<q>")))]
5164 )
5165
5166 (define_expand "vec_store_lanesoi<mode>"
5167 [(set (match_operand:OI 0 "neon_struct_operand")
5168 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5169 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5170 UNSPEC_VST4))]
5171 "TARGET_NEON")
5172
5173 (define_insn "neon_vst4<mode>"
5174 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5175 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5176 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5177 UNSPEC_VST4))]
5178 "TARGET_NEON"
5179 {
5180 if (<V_sz_elem> == 64)
5181 return "vst1.64\t%h1, %A0";
5182 else
5183 return "vst4.<V_sz_elem>\t%h1, %A0";
5184 }
5185 [(set (attr "type")
5186 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5187 (const_string "neon_store1_4reg<q>")
5188 (const_string "neon_store4_4reg<q>")))]
5189 )
5190
5191 (define_expand "vec_store_lanesxi<mode>"
5192 [(match_operand:XI 0 "neon_struct_operand")
5193 (match_operand:XI 1 "s_register_operand")
5194 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195 "TARGET_NEON"
5196 {
5197 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5198 DONE;
5199 })
5200
5201 (define_expand "neon_vst4<mode>"
5202 [(match_operand:XI 0 "neon_struct_operand")
5203 (match_operand:XI 1 "s_register_operand")
5204 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5205 "TARGET_NEON"
5206 {
5207 rtx mem;
5208
5209 mem = adjust_address (operands[0], OImode, 0);
5210 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5211 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5212 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5213 DONE;
5214 })
5215
5216 (define_insn "neon_vst4qa<mode>"
5217 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5218 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5219 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220 UNSPEC_VST4A))]
5221 "TARGET_NEON"
5222 {
5223 int regno = REGNO (operands[1]);
5224 rtx ops[5];
5225 ops[0] = operands[0];
5226 ops[1] = gen_rtx_REG (DImode, regno);
5227 ops[2] = gen_rtx_REG (DImode, regno + 4);
5228 ops[3] = gen_rtx_REG (DImode, regno + 8);
5229 ops[4] = gen_rtx_REG (DImode, regno + 12);
5230 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5231 return "";
5232 }
5233 [(set_attr "type" "neon_store4_4reg<q>")]
5234 )
5235
5236 (define_insn "neon_vst4qb<mode>"
5237 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5238 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5239 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240 UNSPEC_VST4B))]
5241 "TARGET_NEON"
5242 {
5243 int regno = REGNO (operands[1]);
5244 rtx ops[5];
5245 ops[0] = operands[0];
5246 ops[1] = gen_rtx_REG (DImode, regno + 2);
5247 ops[2] = gen_rtx_REG (DImode, regno + 6);
5248 ops[3] = gen_rtx_REG (DImode, regno + 10);
5249 ops[4] = gen_rtx_REG (DImode, regno + 14);
5250 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5251 return "";
5252 }
5253 [(set_attr "type" "neon_store4_4reg<q>")]
5254 )
5255
5256 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5257 ;; here on big endian targets.
5258 (define_insn "neon_vst4_lane<mode>"
5259 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5260 (unspec:<V_four_elem>
5261 [(match_operand:OI 1 "s_register_operand" "w")
5262 (match_operand:SI 2 "immediate_operand" "i")
5263 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5264 UNSPEC_VST4_LANE))]
5265 "TARGET_NEON"
5266 {
5267 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5268 int regno = REGNO (operands[1]);
5269 rtx ops[6];
5270 ops[0] = operands[0];
5271 ops[1] = gen_rtx_REG (DImode, regno);
5272 ops[2] = gen_rtx_REG (DImode, regno + 2);
5273 ops[3] = gen_rtx_REG (DImode, regno + 4);
5274 ops[4] = gen_rtx_REG (DImode, regno + 6);
5275 ops[5] = GEN_INT (lane);
5276 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5277 ops);
5278 return "";
5279 }
5280 [(set_attr "type" "neon_store4_one_lane<q>")]
5281 )
5282
5283 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5284 ;; here on big endian targets.
5285 (define_insn "neon_vst4_lane<mode>"
5286 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5287 (unspec:<V_four_elem>
5288 [(match_operand:XI 1 "s_register_operand" "w")
5289 (match_operand:SI 2 "immediate_operand" "i")
5290 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291 UNSPEC_VST4_LANE))]
5292 "TARGET_NEON"
5293 {
5294 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5295 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5296 int regno = REGNO (operands[1]);
5297 rtx ops[6];
5298 if (lane >= max / 2)
5299 {
5300 lane -= max / 2;
5301 regno += 2;
5302 }
5303 ops[0] = operands[0];
5304 ops[1] = gen_rtx_REG (DImode, regno);
5305 ops[2] = gen_rtx_REG (DImode, regno + 4);
5306 ops[3] = gen_rtx_REG (DImode, regno + 8);
5307 ops[4] = gen_rtx_REG (DImode, regno + 12);
5308 ops[5] = GEN_INT (lane);
5309 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5310 ops);
5311 return "";
5312 }
5313 [(set_attr "type" "neon_store4_4reg<q>")]
5314 )
5315
5316 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5317 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5318 (SE:<V_unpack> (vec_select:<V_HALF>
5319 (match_operand:VU 1 "register_operand" "w")
5320 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5321 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5322 "vmovl.<US><V_sz_elem> %q0, %e1"
5323 [(set_attr "type" "neon_shift_imm_long")]
5324 )
5325
5326 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5327 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5328 (SE:<V_unpack> (vec_select:<V_HALF>
5329 (match_operand:VU 1 "register_operand" "w")
5330 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5332 "vmovl.<US><V_sz_elem> %q0, %f1"
5333 [(set_attr "type" "neon_shift_imm_long")]
5334 )
5335
5336 (define_expand "vec_unpack<US>_hi_<mode>"
5337 [(match_operand:<V_unpack> 0 "register_operand" "")
5338 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5339 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5340 {
5341 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5342 rtx t1;
5343 int i;
5344 for (i = 0; i < (<V_mode_nunits>/2); i++)
5345 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5346
5347 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5348 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5349 operands[1],
5350 t1));
5351 DONE;
5352 }
5353 )
5354
5355 (define_expand "vec_unpack<US>_lo_<mode>"
5356 [(match_operand:<V_unpack> 0 "register_operand" "")
5357 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5358 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5359 {
5360 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5361 rtx t1;
5362 int i;
5363 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5364 RTVEC_ELT (v, i) = GEN_INT (i);
5365 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5366 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5367 operands[1],
5368 t1));
5369 DONE;
5370 }
5371 )
5372
5373 (define_insn "neon_vec_<US>mult_lo_<mode>"
5374 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5375 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5376 (match_operand:VU 1 "register_operand" "w")
5377 (match_operand:VU 2 "vect_par_constant_low" "")))
5378 (SE:<V_unpack> (vec_select:<V_HALF>
5379 (match_operand:VU 3 "register_operand" "w")
5380 (match_dup 2)))))]
5381 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5382 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5383 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5384 )
5385
5386 (define_expand "vec_widen_<US>mult_lo_<mode>"
5387 [(match_operand:<V_unpack> 0 "register_operand" "")
5388 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5389 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5390 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5391 {
5392 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5393 rtx t1;
5394 int i;
5395 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5396 RTVEC_ELT (v, i) = GEN_INT (i);
5397 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5398
5399 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5400 operands[1],
5401 t1,
5402 operands[2]));
5403 DONE;
5404 }
5405 )
5406
5407 (define_insn "neon_vec_<US>mult_hi_<mode>"
5408 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5409 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5410 (match_operand:VU 1 "register_operand" "w")
5411 (match_operand:VU 2 "vect_par_constant_high" "")))
5412 (SE:<V_unpack> (vec_select:<V_HALF>
5413 (match_operand:VU 3 "register_operand" "w")
5414 (match_dup 2)))))]
5415 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5416 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5417 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5418 )
5419
5420 (define_expand "vec_widen_<US>mult_hi_<mode>"
5421 [(match_operand:<V_unpack> 0 "register_operand" "")
5422 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5423 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5424 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5425 {
5426 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5427 rtx t1;
5428 int i;
5429 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5430 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5431 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5432
5433 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5434 operands[1],
5435 t1,
5436 operands[2]));
5437 DONE;
5438
5439 }
5440 )
5441
5442 (define_insn "neon_vec_<US>shiftl_<mode>"
5443 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5444 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5445 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5446 "TARGET_NEON"
5447 {
5448 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5449 }
5450 [(set_attr "type" "neon_shift_imm_long")]
5451 )
5452
5453 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5454 [(match_operand:<V_unpack> 0 "register_operand" "")
5455 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5456 (match_operand:SI 2 "immediate_operand" "i")]
5457 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5458 {
5459 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5460 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5461 operands[2]));
5462 DONE;
5463 }
5464 )
5465
5466 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5467 [(match_operand:<V_unpack> 0 "register_operand" "")
5468 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5469 (match_operand:SI 2 "immediate_operand" "i")]
5470 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5471 {
5472 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5473 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5474 GET_MODE_SIZE (<V_HALF>mode)),
5475 operands[2]));
5476 DONE;
5477 }
5478 )
5479
5480 ;; Vectorize for non-neon-quad case
5481 (define_insn "neon_unpack<US>_<mode>"
5482 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5483 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5484 "TARGET_NEON"
5485 "vmovl.<US><V_sz_elem> %q0, %P1"
5486 [(set_attr "type" "neon_move")]
5487 )
5488
5489 (define_expand "vec_unpack<US>_lo_<mode>"
5490 [(match_operand:<V_double_width> 0 "register_operand" "")
5491 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5492 "TARGET_NEON"
5493 {
5494 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5495 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5496 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5497
5498 DONE;
5499 }
5500 )
5501
5502 (define_expand "vec_unpack<US>_hi_<mode>"
5503 [(match_operand:<V_double_width> 0 "register_operand" "")
5504 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5505 "TARGET_NEON"
5506 {
5507 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5508 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5509 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5510
5511 DONE;
5512 }
5513 )
5514
5515 (define_insn "neon_vec_<US>mult_<mode>"
5516 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5517 (mult:<V_widen> (SE:<V_widen>
5518 (match_operand:VDI 1 "register_operand" "w"))
5519 (SE:<V_widen>
5520 (match_operand:VDI 2 "register_operand" "w"))))]
5521 "TARGET_NEON"
5522 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5523 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5524 )
5525
5526 (define_expand "vec_widen_<US>mult_hi_<mode>"
5527 [(match_operand:<V_double_width> 0 "register_operand" "")
5528 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5529 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5530 "TARGET_NEON"
5531 {
5532 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5533 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5534 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5535
5536 DONE;
5537
5538 }
5539 )
5540
5541 (define_expand "vec_widen_<US>mult_lo_<mode>"
5542 [(match_operand:<V_double_width> 0 "register_operand" "")
5543 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5544 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5545 "TARGET_NEON"
5546 {
5547 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5548 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5549 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5550
5551 DONE;
5552
5553 }
5554 )
5555
5556 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5557 [(match_operand:<V_double_width> 0 "register_operand" "")
5558 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5559 (match_operand:SI 2 "immediate_operand" "i")]
5560 "TARGET_NEON"
5561 {
5562 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5563 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5564 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5565
5566 DONE;
5567 }
5568 )
5569
5570 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5571 [(match_operand:<V_double_width> 0 "register_operand" "")
5572 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5573 (match_operand:SI 2 "immediate_operand" "i")]
5574 "TARGET_NEON"
5575 {
5576 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5577 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5578 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5579
5580 DONE;
5581 }
5582 )
5583
5584 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5585 ; because the ordering of vector elements in Q registers is different from what
5586 ; the semantics of the instructions require.
5587
5588 (define_insn "vec_pack_trunc_<mode>"
5589 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5590 (vec_concat:<V_narrow_pack>
5591 (truncate:<V_narrow>
5592 (match_operand:VN 1 "register_operand" "w"))
5593 (truncate:<V_narrow>
5594 (match_operand:VN 2 "register_operand" "w"))))]
5595 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5596 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5597 [(set_attr "type" "multiple")
5598 (set_attr "length" "8")]
5599 )
5600
5601 ;; For the non-quad case.
5602 (define_insn "neon_vec_pack_trunc_<mode>"
5603 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5604 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5605 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5606 "vmovn.i<V_sz_elem>\t%P0, %q1"
5607 [(set_attr "type" "neon_move_narrow_q")]
5608 )
5609
5610 (define_expand "vec_pack_trunc_<mode>"
5611 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5612 (match_operand:VSHFT 1 "register_operand" "")
5613 (match_operand:VSHFT 2 "register_operand")]
5614 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5615 {
5616 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5617
5618 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5619 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5620 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5621 DONE;
5622 })
5623
5624 (define_insn "neon_vabd<mode>_2"
5625 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5626 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5627 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5629 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5630 [(set (attr "type")
5631 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5632 (const_string "neon_fp_abd_s<q>")
5633 (const_string "neon_abd<q>")))]
5634 )
5635
5636 (define_insn "neon_vabd<mode>_3"
5637 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5638 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5639 (match_operand:VDQ 2 "s_register_operand" "w")]
5640 UNSPEC_VSUB)))]
5641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5642 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5643 [(set (attr "type")
5644 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5645 (const_string "neon_fp_abd_s<q>")
5646 (const_string "neon_abd<q>")))]
5647 )
5648
5649 ;; Copy from core-to-neon regs, then extend, not vice-versa
5650
5651 (define_split
5652 [(set (match_operand:DI 0 "s_register_operand" "")
5653 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5654 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5655 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5656 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5657 {
5658 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5659 })
5660
5661 (define_split
5662 [(set (match_operand:DI 0 "s_register_operand" "")
5663 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5664 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5665 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5666 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5667 {
5668 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5669 })
5670
5671 (define_split
5672 [(set (match_operand:DI 0 "s_register_operand" "")
5673 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5674 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5675 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5676 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5677 {
5678 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5679 })
5680
5681 (define_split
5682 [(set (match_operand:DI 0 "s_register_operand" "")
5683 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5684 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5685 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5686 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5687 {
5688 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5689 })
5690
5691 (define_split
5692 [(set (match_operand:DI 0 "s_register_operand" "")
5693 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5694 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5695 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5696 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5697 {
5698 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5699 })
5700
5701 (define_split
5702 [(set (match_operand:DI 0 "s_register_operand" "")
5703 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5704 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5705 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5706 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5707 {
5708 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5709 })