[ARM] PR63870 Remove error for invalid lane numbers
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
143 "TARGET_NEON
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
146 {
147 switch (which_alternative)
148 {
149 case 0: return "#";
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
156
157 (define_split
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
172
173 neon_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[3], src[3];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
213
214 neon_disambiguate_copy (operands, dest, src, 3);
215 })
216
217 (define_split
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
225 {
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
228 rtx dest[4], src[4];
229
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
238
239 neon_disambiguate_copy (operands, dest, src, 4);
240 })
241
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
247 {
248 rtx adjust_mem;
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
255
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
258 else
259 adjust_mem = operands[0];
260
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
264
265 })
266
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
274
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
278 " Um")]
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
283
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
291
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
295 " Um")]
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
300
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
303 (vec_merge:VD
304 (vec_duplicate:VD
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
308 "TARGET_NEON"
309 {
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
314
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
317 else
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
319 }
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
324 (vec_merge:VQ2
325 (vec_duplicate:VQ2
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
336
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
339
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
342
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
345 else
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
347 }
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
349 )
350
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
353 (vec_merge:V2DI
354 (vec_duplicate:V2DI
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
358 "TARGET_NEON"
359 {
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
362
363 operands[0] = gen_rtx_REG (DImode, regno);
364
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
367 else
368 return "vmov\t%P0, %Q1, %R1";
369 }
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
371 )
372
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
382 DONE;
383 })
384
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
387 (vec_select:<V_elem>
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
390 "TARGET_NEON"
391 {
392 if (BYTES_BIG_ENDIAN)
393 {
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
397 }
398
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
401 else
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
403 }
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
405 )
406
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (vec_select:<V_elem>
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 "TARGET_NEON"
413 {
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
418
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
421
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
424
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
427 else
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
429 }
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
431 )
432
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
435 (vec_select:DI
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
438 "TARGET_NEON"
439 {
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
441
442 operands[1] = gen_rtx_REG (DImode, regno);
443
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
446 else
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
448 }
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
450 )
451
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
455 "TARGET_NEON"
456 {
457 neon_expand_vector_init (operands[0], operands[1]);
458 DONE;
459 })
460
461 ;; Doubleword and quadword arithmetic.
462
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
465
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472 [(set (attr "type")
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
476 )
477
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
483 "TARGET_NEON"
484 {
485 switch (which_alternative)
486 {
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
489 case 1: return "#";
490 case 2: return "#";
491 case 4: return "#";
492 case 5: return "#";
493 case 6: return "#";
494 default: gcc_unreachable ();
495 }
496 }
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
502 )
503
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
514 )
515
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
521 "TARGET_NEON"
522 {
523 switch (which_alternative)
524 {
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
531 }
532 }
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
537 )
538
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
545 [(set (attr "type")
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
549 )
550
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
558 [(set (attr "type")
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
562 )
563
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
575 )
576
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
590 )
591
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
600 )
601
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
610 )
611
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
626 NEON_VRINT))]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
630 )
631
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
636 NEON_VCVT)))]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
641 )
642
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
647 "TARGET_NEON"
648 {
649 switch (which_alternative)
650 {
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
655 }
656 }
657 [(set_attr "type" "neon_logic<q>")]
658 )
659
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
664
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
669 "TARGET_NEON"
670 {
671 switch (which_alternative)
672 {
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
677 }
678 }
679 [(set_attr "type" "neon_logic<q>")]
680 )
681
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
686 "TARGET_NEON"
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
689 )
690
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
693 ;; changes above.
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
698 "TARGET_NEON"
699 "@
700 vorn\t%P0, %P1, %P2
701 #
702 #
703 #"
704 "reload_completed &&
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
708 "
709 {
710 if (TARGET_THUMB2)
711 {
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
718 }
719 else
720 {
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
723 DONE;
724 }
725 }"
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
729 )
730
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
735 "TARGET_NEON"
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
745 "TARGET_NEON"
746 "@
747 vbic\t%P0, %P1, %P2
748 #
749 #"
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
752 )
753
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
758 "TARGET_NEON"
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
761 )
762
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "TARGET_NEON"
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
769 )
770
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
774 "TARGET_NEON"
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
776 [(set (attr "type")
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
780 )
781
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
785 "TARGET_NEON"
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
787 [(set (attr "type")
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
791 )
792
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
798 "TARGET_NEON"
799 "#"
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
802 )
803
804 ; Split negdi2_neon for vfp registers
805 (define_split
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
814 {
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
817 }
818 )
819
820 ; Split negdi2_neon for core registers
821 (define_split
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
830 ""
831 )
832
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
837 "TARGET_NEON"
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
840 )
841
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
846 "TARGET_NEON"
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
849 )
850
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
855 "TARGET_NEON"
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
857 [(set (attr "type")
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
861 )
862
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set (attr "type")
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
873 )
874
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
877 ; SImode elements.
878
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
883 "TARGET_NEON"
884 {
885 switch (which_alternative)
886 {
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
889 <MODE>mode,
890 VALID_NEON_QREG_MODE (<MODE>mode),
891 true);
892 default: gcc_unreachable ();
893 }
894 }
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
896 )
897
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
902 "TARGET_NEON"
903 {
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
906 false);
907 }
908 [(set_attr "type" "neon_shift_imm<q>")]
909 )
910
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
915 "TARGET_NEON"
916 {
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
919 false);
920 }
921 [(set_attr "type" "neon_shift_imm<q>")]
922 )
923
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
927 ; shift amounts.
928
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
934 "TARGET_NEON"
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
937 )
938
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
941
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
947 "TARGET_NEON"
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
950 )
951
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
956 "TARGET_NEON"
957 {
958 if (s_register_operand (operands[2], <MODE>mode))
959 {
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
963 }
964 else
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
966 DONE;
967 })
968
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
973 "TARGET_NEON"
974 {
975 if (s_register_operand (operands[2], <MODE>mode))
976 {
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
980 }
981 else
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
983 DONE;
984 })
985
986 ;; 64-bit shifts
987
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
992 ;; using an unspec.
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
996 UNSPEC_LOAD_COUNT))]
997 "TARGET_NEON"
998 "@
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1002 )
1003
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1011 "@
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1015 )
1016
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1025 "TARGET_NEON"
1026 "#"
1027 "TARGET_NEON && reload_completed"
1028 [(const_int 0)]
1029 "
1030 {
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1032 {
1033 if (CONST_INT_P (operands[2]))
1034 {
1035 if (INTVAL (operands[2]) < 1)
1036 {
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1038 DONE;
1039 }
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1042 }
1043 else
1044 {
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1047 }
1048
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1051 operands[2]));
1052 }
1053 else
1054 {
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1060 else
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1063 }
1064 DONE;
1065 }"
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1069 )
1070
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1080 )
1081
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1091 )
1092
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1101 )
1102
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1111 )
1112
1113 ;; ashrdi3_neon
1114 ;; lshrdi3_neon
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1123 "TARGET_NEON"
1124 "#"
1125 "TARGET_NEON && reload_completed"
1126 [(const_int 0)]
1127 "
1128 {
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1130 {
1131 if (CONST_INT_P (operands[2]))
1132 {
1133 if (INTVAL (operands[2]) < 1)
1134 {
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1136 DONE;
1137 }
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1140
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1143 operands[1],
1144 operands[2]));
1145 }
1146 else
1147 {
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1152 operands[5]));
1153 }
1154 }
1155 else
1156 {
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1162 else
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1166 }
1167
1168 DONE;
1169 }"
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1173 )
1174
1175 ;; Widening operations
1176
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1182 "TARGET_NEON"
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1185 )
1186
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1192 "TARGET_NEON"
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1195 )
1196
1197 ;; Helpers for quad-word reduction operations
1198
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1202
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1205 (VQH_OPS:V2SI
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1210 "TARGET_NEON"
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1214 )
1215
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1218 (VQHS_OPS:V2SF
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1227 )
1228
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1231 (VQH_OPS:V4HI
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1238 "TARGET_NEON"
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1242 )
1243
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1246 (VQH_OPS:V8QI
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1257 "TARGET_NEON"
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1261 )
1262
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1266 "TARGET_NEON"
1267 {
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1270 operands[1]);
1271 DONE;
1272 })
1273
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1277 "TARGET_NEON"
1278 {
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1280 <MODE>mode, 0),
1281 operands[1]);
1282 DONE;
1283 })
1284
1285 ;; Reduction operations
1286
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1291 {
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1297 DONE;
1298 })
1299
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1305 {
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1307
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1310
1311 DONE;
1312 })
1313
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1318 {
1319 rtx vec = gen_reg_rtx (V2DImode);
1320
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1323
1324 DONE;
1325 })
1326
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1330 UNSPEC_VPADD))]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1334 )
1335
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1340 {
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1342
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1347 DONE;
1348 })
1349
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1355 {
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1357
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1360
1361 DONE;
1362 })
1363
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1368 {
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1374 DONE;
1375 })
1376
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1382 {
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1384
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1387
1388 DONE;
1389 })
1390
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1394 "TARGET_NEON"
1395 {
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1401 DONE;
1402 })
1403
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1408 {
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1410
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1413
1414 DONE;
1415 })
1416
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1420 "TARGET_NEON"
1421 {
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1427 DONE;
1428 })
1429
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1434 {
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1436
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1439
1440 DONE;
1441 })
1442
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1447 UNSPEC_VPADD))]
1448 "TARGET_NEON"
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1451 [(set (attr "type")
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1455 )
1456
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1461 UNSPEC_VPSMIN))]
1462 "TARGET_NEON"
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1464 [(set (attr "type")
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1468 )
1469
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1474 UNSPEC_VPSMAX))]
1475 "TARGET_NEON"
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1477 [(set (attr "type")
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1481 )
1482
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1487 UNSPEC_VPUMIN))]
1488 "TARGET_NEON"
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1491 )
1492
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1497 UNSPEC_VPUMAX))]
1498 "TARGET_NEON"
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1501 )
1502
1503 ;; Saturating arithmetic
1504
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1509 ; added.
1510
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1515 "TARGET_NEON"
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1518 )
1519
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1524 "TARGET_NEON"
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1527 )
1528
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1533 "TARGET_NEON"
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1536 )
1537
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1542 "TARGET_NEON"
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1545 )
1546
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1549 ;;
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1551 ;;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1553 ;; element-wise.
1554
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1557 (if_then_else:VDQW
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1564 {
1565 int inverse = 0;
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1570
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1573
1574 switch (GET_CODE (operands[3]))
1575 {
1576 case GE:
1577 case GT:
1578 case LE:
1579 case LT:
1580 case EQ:
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1582 {
1583 use_zero_form = 1;
1584 break;
1585 }
1586 /* Fall through. */
1587 default:
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1590 }
1591
1592 switch (GET_CODE (operands[3]))
1593 {
1594 case LT:
1595 case UNLT:
1596 inverse = 1;
1597 /* Fall through. */
1598 case GE:
1599 case UNGE:
1600 case ORDERED:
1601 case UNORDERED:
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1604 break;
1605 case LE:
1606 case UNLE:
1607 inverse = 1;
1608 /* Fall through. */
1609 case GT:
1610 case UNGT:
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1613 break;
1614 case EQ:
1615 case NE:
1616 case UNEQ:
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1619 break;
1620 default:
1621 gcc_unreachable ();
1622 }
1623
1624 switch (GET_CODE (operands[3]))
1625 {
1626 case LT:
1627 case LE:
1628 case GT:
1629 case GE:
1630 case EQ:
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1633 a GE b -> a GE b
1634 a GT b -> a GT b
1635 a LE b -> b GE a
1636 a LT b -> b GT a
1637 a EQ b -> a EQ b
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1640 if (use_zero_form)
1641 {
1642 inverse = 0;
1643 switch (GET_CODE (operands[3]))
1644 {
1645 case LT:
1646 base_comparison = gen_neon_vclt<mode>;
1647 break;
1648 case LE:
1649 base_comparison = gen_neon_vcle<mode>;
1650 break;
1651 default:
1652 /* Do nothing, other zero form cases already have the correct
1653 base_comparison. */
1654 break;
1655 }
1656 }
1657
1658 if (!inverse)
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1660 else
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1662 break;
1663 case UNLT:
1664 case UNLE:
1665 case UNGT:
1666 case UNGE:
1667 case NE:
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1672
1673 Our transformations are:
1674 a GE b -> !(b GT a)
1675 a GT b -> !(b GE a)
1676 a LE b -> !(a GT b)
1677 a LT b -> !(a GE b)
1678 a NE b -> !(a EQ b) */
1679
1680 if (inverse)
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1682 else
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1684
1685 swap_bsl_operands = 1;
1686 break;
1687 case UNEQ:
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1691
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1696 break;
1697 case UNORDERED:
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1701 /* Fall through. */
1702 case ORDERED:
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1706 break;
1707 default:
1708 gcc_unreachable ();
1709 }
1710
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1713 operands[1]));
1714 else
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1716 operands[2]));
1717 DONE;
1718 })
1719
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1722 (if_then_else:VDQIW
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1728 "TARGET_NEON"
1729 {
1730 rtx mask;
1731 int inverse = 0, immediate_zero = 0;
1732
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1734
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1736 immediate_zero = 1;
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1739
1740 switch (GET_CODE (operands[3]))
1741 {
1742 case GEU:
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1744 break;
1745
1746 case GTU:
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1748 break;
1749
1750 case EQ:
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1752 break;
1753
1754 case LEU:
1755 if (immediate_zero)
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1757 else
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1759 break;
1760
1761 case LTU:
1762 if (immediate_zero)
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1764 else
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1766 break;
1767
1768 case NE:
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1770 inverse = 1;
1771 break;
1772
1773 default:
1774 gcc_unreachable ();
1775 }
1776
1777 if (inverse)
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1779 operands[1]));
1780 else
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1782 operands[2]));
1783
1784 DONE;
1785 })
1786
1787 ;; Patterns for builtins.
1788
1789 ; good for plain vadd, vaddq.
1790
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1795 "TARGET_NEON"
1796 {
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1799 else
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1801 operands[2]));
1802 DONE;
1803 })
1804
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1814
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1816
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1821 UNSPEC_VADD))]
1822 "TARGET_NEON"
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1824 [(set (attr "type")
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1828 )
1829
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1834 VADDL))]
1835 "TARGET_NEON"
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1838 )
1839
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1844 VADDW))]
1845 "TARGET_NEON"
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1848 )
1849
1850 ; vhadd and vrhadd.
1851
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1856 VHADD))]
1857 "TARGET_NEON"
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1860 )
1861
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1866 VQADD))]
1867 "TARGET_NEON"
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1870 )
1871
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1876 VADDHN))]
1877 "TARGET_NEON"
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1880 )
1881
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1887 UNSPEC_VMUL))]
1888 "TARGET_NEON"
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1890 [(set (attr "type")
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1894 )
1895
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1901 "TARGET_NEON"
1902 {
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1906 else
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1909 DONE;
1910 })
1911
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1918 {
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1920 operands[1]));
1921 DONE;
1922 })
1923
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1930 {
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1932 operands[1]));
1933 DONE;
1934 })
1935
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1937
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1943 UNSPEC_VMLA))]
1944 "TARGET_NEON"
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1946 [(set (attr "type")
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1950 )
1951
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1957 VMLAL))]
1958 "TARGET_NEON"
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1961 )
1962
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1968 "TARGET_NEON"
1969 {
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1973 else
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1976 DONE;
1977 })
1978
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1980
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1986 UNSPEC_VMLS))]
1987 "TARGET_NEON"
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1989 [(set (attr "type")
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1993 )
1994
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2000 VMLSL))]
2001 "TARGET_NEON"
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2004 )
2005
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2011 VQDMULH))]
2012 "TARGET_NEON"
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2015 )
2016
2017 (define_insn "neon_vqdmlal<mode>"
2018 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2019 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2020 (match_operand:VMDI 2 "s_register_operand" "w")
2021 (match_operand:VMDI 3 "s_register_operand" "w")]
2022 UNSPEC_VQDMLAL))]
2023 "TARGET_NEON"
2024 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2025 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2026 )
2027
2028 (define_insn "neon_vqdmlsl<mode>"
2029 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2030 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2031 (match_operand:VMDI 2 "s_register_operand" "w")
2032 (match_operand:VMDI 3 "s_register_operand" "w")]
2033 UNSPEC_VQDMLSL))]
2034 "TARGET_NEON"
2035 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2036 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2037 )
2038
2039 (define_insn "neon_vmull<sup><mode>"
2040 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2041 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2042 (match_operand:VW 2 "s_register_operand" "w")]
2043 VMULL))]
2044 "TARGET_NEON"
2045 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2046 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2047 )
2048
2049 (define_insn "neon_vqdmull<mode>"
2050 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2051 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2052 (match_operand:VMDI 2 "s_register_operand" "w")]
2053 UNSPEC_VQDMULL))]
2054 "TARGET_NEON"
2055 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2056 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2057 )
2058
2059 (define_expand "neon_vsub<mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2061 (match_operand:VCVTF 1 "s_register_operand" "w")
2062 (match_operand:VCVTF 2 "s_register_operand" "w")]
2063 "TARGET_NEON"
2064 {
2065 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2066 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2067 else
2068 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2069 operands[2]));
2070 DONE;
2071 })
2072
2073 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2074
2075 (define_insn "neon_vsub<mode>_unspec"
2076 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2077 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2078 (match_operand:VCVTF 2 "s_register_operand" "w")]
2079 UNSPEC_VSUB))]
2080 "TARGET_NEON"
2081 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2082 [(set (attr "type")
2083 (if_then_else (match_test "<Is_float_mode>")
2084 (const_string "neon_fp_addsub_s<q>")
2085 (const_string "neon_sub<q>")))]
2086 )
2087
2088 (define_insn "neon_vsubl<sup><mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2091 (match_operand:VDI 2 "s_register_operand" "w")]
2092 VSUBL))]
2093 "TARGET_NEON"
2094 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2095 [(set_attr "type" "neon_sub_long")]
2096 )
2097
2098 (define_insn "neon_vsubw<sup><mode>"
2099 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2100 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2101 (match_operand:VDI 2 "s_register_operand" "w")]
2102 VSUBW))]
2103 "TARGET_NEON"
2104 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2105 [(set_attr "type" "neon_sub_widen")]
2106 )
2107
2108 (define_insn "neon_vqsub<sup><mode>"
2109 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2110 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2111 (match_operand:VDQIX 2 "s_register_operand" "w")]
2112 VQSUB))]
2113 "TARGET_NEON"
2114 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2115 [(set_attr "type" "neon_qsub<q>")]
2116 )
2117
2118 (define_insn "neon_vhsub<sup><mode>"
2119 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2120 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2121 (match_operand:VDQIW 2 "s_register_operand" "w")]
2122 VHSUB))]
2123 "TARGET_NEON"
2124 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2125 [(set_attr "type" "neon_sub_halve<q>")]
2126 )
2127
2128 (define_insn "neon_v<r>subhn<mode>"
2129 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2130 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2131 (match_operand:VN 2 "s_register_operand" "w")]
2132 VSUBHN))]
2133 "TARGET_NEON"
2134 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2135 [(set_attr "type" "neon_sub_halve_narrow_q")]
2136 )
2137
2138 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2139 ;; without unsafe math optimizations.
2140 (define_expand "neon_vc<cmp_op><mode>"
2141 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2142 (neg:<V_cmp_result>
2143 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2144 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2145 "TARGET_NEON"
2146 {
2147 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2148 are enabled. */
2149 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2150 && !flag_unsafe_math_optimizations)
2151 {
2152 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2153 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2154 whereas this expander iterates over the integer modes as well,
2155 but we will never expand to UNSPECs for the integer comparisons. */
2156 switch (<MODE>mode)
2157 {
2158 case V2SFmode:
2159 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2160 operands[1],
2161 operands[2]));
2162 break;
2163 case V4SFmode:
2164 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2165 operands[1],
2166 operands[2]));
2167 break;
2168 default:
2169 gcc_unreachable ();
2170 }
2171 }
2172 else
2173 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2174 operands[1],
2175 operands[2]));
2176 DONE;
2177 }
2178 )
2179
2180 (define_insn "neon_vc<cmp_op><mode>_insn"
2181 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2182 (neg:<V_cmp_result>
2183 (COMPARISONS:<V_cmp_result>
2184 (match_operand:VDQW 1 "s_register_operand" "w,w")
2185 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2186 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2187 && !flag_unsafe_math_optimizations)"
2188 {
2189 char pattern[100];
2190 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2191 " %%<V_reg>1, %s",
2192 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2193 ? "f" : "<cmp_type>",
2194 which_alternative == 0
2195 ? "%<V_reg>2" : "#0");
2196 output_asm_insn (pattern, operands);
2197 return "";
2198 }
2199 [(set (attr "type")
2200 (if_then_else (match_operand 2 "zero_operand")
2201 (const_string "neon_compare_zero<q>")
2202 (const_string "neon_compare<q>")))]
2203 )
2204
2205 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2206 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2207 (unspec:<V_cmp_result>
2208 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2209 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2210 NEON_VCMP))]
2211 "TARGET_NEON"
2212 {
2213 char pattern[100];
2214 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2215 " %%<V_reg>1, %s",
2216 which_alternative == 0
2217 ? "%<V_reg>2" : "#0");
2218 output_asm_insn (pattern, operands);
2219 return "";
2220 }
2221 [(set_attr "type" "neon_fp_compare_s<q>")]
2222 )
2223
2224 (define_insn "neon_vc<cmp_op>u<mode>"
2225 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2226 (neg:<V_cmp_result>
2227 (GTUGEU:<V_cmp_result>
2228 (match_operand:VDQIW 1 "s_register_operand" "w")
2229 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2230 "TARGET_NEON"
2231 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2232 [(set_attr "type" "neon_compare<q>")]
2233 )
2234
2235 (define_expand "neon_vca<cmp_op><mode>"
2236 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2237 (neg:<V_cmp_result>
2238 (GTGE:<V_cmp_result>
2239 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2240 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2241 "TARGET_NEON"
2242 {
2243 if (flag_unsafe_math_optimizations)
2244 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2245 operands[2]));
2246 else
2247 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2248 operands[1],
2249 operands[2]));
2250 DONE;
2251 }
2252 )
2253
2254 (define_insn "neon_vca<cmp_op><mode>_insn"
2255 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2256 (neg:<V_cmp_result>
2257 (GTGE:<V_cmp_result>
2258 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2259 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2260 "TARGET_NEON && flag_unsafe_math_optimizations"
2261 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2262 [(set_attr "type" "neon_fp_compare_s<q>")]
2263 )
2264
2265 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2266 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2267 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2268 (match_operand:VCVTF 2 "s_register_operand" "w")]
2269 NEON_VACMP))]
2270 "TARGET_NEON"
2271 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2272 [(set_attr "type" "neon_fp_compare_s<q>")]
2273 )
2274
2275 (define_insn "neon_vtst<mode>"
2276 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2277 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2278 (match_operand:VDQIW 2 "s_register_operand" "w")]
2279 UNSPEC_VTST))]
2280 "TARGET_NEON"
2281 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2282 [(set_attr "type" "neon_tst<q>")]
2283 )
2284
2285 (define_insn "neon_vabd<sup><mode>"
2286 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2287 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2288 (match_operand:VDQIW 2 "s_register_operand" "w")]
2289 VABD))]
2290 "TARGET_NEON"
2291 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2292 [(set_attr "type" "neon_abd<q>")]
2293 )
2294
2295 (define_insn "neon_vabdf<mode>"
2296 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2297 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2298 (match_operand:VCVTF 2 "s_register_operand" "w")]
2299 UNSPEC_VABD_F))]
2300 "TARGET_NEON"
2301 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2302 [(set_attr "type" "neon_fp_abd_s<q>")]
2303 )
2304
2305 (define_insn "neon_vabdl<sup><mode>"
2306 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2307 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2308 (match_operand:VW 2 "s_register_operand" "w")]
2309 VABDL))]
2310 "TARGET_NEON"
2311 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2312 [(set_attr "type" "neon_abd_long")]
2313 )
2314
2315 (define_insn "neon_vaba<sup><mode>"
2316 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2317 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2318 (match_operand:VDQIW 3 "s_register_operand" "w")]
2319 VABD)
2320 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2321 "TARGET_NEON"
2322 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2323 [(set_attr "type" "neon_arith_acc<q>")]
2324 )
2325
2326 (define_insn "neon_vabal<sup><mode>"
2327 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2328 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2329 (match_operand:VW 3 "s_register_operand" "w")]
2330 VABDL)
2331 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2332 "TARGET_NEON"
2333 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2334 [(set_attr "type" "neon_arith_acc<q>")]
2335 )
2336
2337 (define_insn "neon_v<maxmin><sup><mode>"
2338 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2339 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2340 (match_operand:VDQIW 2 "s_register_operand" "w")]
2341 VMAXMIN))]
2342 "TARGET_NEON"
2343 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2344 [(set_attr "type" "neon_minmax<q>")]
2345 )
2346
2347 (define_insn "neon_v<maxmin>f<mode>"
2348 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2349 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2350 (match_operand:VCVTF 2 "s_register_operand" "w")]
2351 VMAXMINF))]
2352 "TARGET_NEON"
2353 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2354 [(set_attr "type" "neon_fp_minmax_s<q>")]
2355 )
2356
2357 (define_expand "neon_vpadd<mode>"
2358 [(match_operand:VD 0 "s_register_operand" "=w")
2359 (match_operand:VD 1 "s_register_operand" "w")
2360 (match_operand:VD 2 "s_register_operand" "w")]
2361 "TARGET_NEON"
2362 {
2363 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2364 operands[2]));
2365 DONE;
2366 })
2367
2368 (define_insn "neon_vpaddl<sup><mode>"
2369 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2370 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2371 VPADDL))]
2372 "TARGET_NEON"
2373 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2374 [(set_attr "type" "neon_reduc_add_long")]
2375 )
2376
2377 (define_insn "neon_vpadal<sup><mode>"
2378 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2379 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2380 (match_operand:VDQIW 2 "s_register_operand" "w")]
2381 VPADAL))]
2382 "TARGET_NEON"
2383 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2384 [(set_attr "type" "neon_reduc_add_acc")]
2385 )
2386
2387 (define_insn "neon_vp<maxmin><sup><mode>"
2388 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2389 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2390 (match_operand:VDI 2 "s_register_operand" "w")]
2391 VPMAXMIN))]
2392 "TARGET_NEON"
2393 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2394 [(set_attr "type" "neon_reduc_minmax<q>")]
2395 )
2396
2397 (define_insn "neon_vp<maxmin>f<mode>"
2398 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2399 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2400 (match_operand:VCVTF 2 "s_register_operand" "w")]
2401 VPMAXMINF))]
2402 "TARGET_NEON"
2403 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2404 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2405 )
2406
2407 (define_insn "neon_vrecps<mode>"
2408 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2409 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2410 (match_operand:VCVTF 2 "s_register_operand" "w")]
2411 UNSPEC_VRECPS))]
2412 "TARGET_NEON"
2413 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2414 [(set_attr "type" "neon_fp_recps_s<q>")]
2415 )
2416
2417 (define_insn "neon_vrsqrts<mode>"
2418 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2419 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2420 (match_operand:VCVTF 2 "s_register_operand" "w")]
2421 UNSPEC_VRSQRTS))]
2422 "TARGET_NEON"
2423 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2424 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2425 )
2426
2427 (define_expand "neon_vabs<mode>"
2428 [(match_operand:VDQW 0 "s_register_operand" "")
2429 (match_operand:VDQW 1 "s_register_operand" "")]
2430 "TARGET_NEON"
2431 {
2432 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2433 DONE;
2434 })
2435
2436 (define_insn "neon_vqabs<mode>"
2437 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2438 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2439 UNSPEC_VQABS))]
2440 "TARGET_NEON"
2441 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2442 [(set_attr "type" "neon_qabs<q>")]
2443 )
2444
2445 (define_insn "neon_bswap<mode>"
2446 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2447 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2448 "TARGET_NEON"
2449 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2450 [(set_attr "type" "neon_rev<q>")]
2451 )
2452
2453 (define_expand "neon_vneg<mode>"
2454 [(match_operand:VDQW 0 "s_register_operand" "")
2455 (match_operand:VDQW 1 "s_register_operand" "")]
2456 "TARGET_NEON"
2457 {
2458 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2459 DONE;
2460 })
2461
2462 (define_expand "neon_copysignf<mode>"
2463 [(match_operand:VCVTF 0 "register_operand")
2464 (match_operand:VCVTF 1 "register_operand")
2465 (match_operand:VCVTF 2 "register_operand")]
2466 "TARGET_NEON"
2467 "{
2468 rtx v_bitmask_cast;
2469 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2470 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2471 rtvec v = rtvec_alloc (n_elt);
2472
2473 /* Create bitmask for vector select. */
2474 for (i = 0; i < n_elt; ++i)
2475 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2476
2477 emit_move_insn (v_bitmask,
2478 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2479 emit_move_insn (operands[0], operands[2]);
2480 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2481 <VCVTF:V_cmp_result>mode, 0);
2482 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2483 operands[1]));
2484
2485 DONE;
2486 }"
2487 )
2488
2489 (define_insn "neon_vqneg<mode>"
2490 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2491 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2492 UNSPEC_VQNEG))]
2493 "TARGET_NEON"
2494 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2495 [(set_attr "type" "neon_qneg<q>")]
2496 )
2497
2498 (define_insn "neon_vcls<mode>"
2499 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2500 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2501 UNSPEC_VCLS))]
2502 "TARGET_NEON"
2503 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2504 [(set_attr "type" "neon_cls<q>")]
2505 )
2506
2507 (define_insn "clz<mode>2"
2508 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2509 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2510 "TARGET_NEON"
2511 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2512 [(set_attr "type" "neon_cnt<q>")]
2513 )
2514
2515 (define_expand "neon_vclz<mode>"
2516 [(match_operand:VDQIW 0 "s_register_operand" "")
2517 (match_operand:VDQIW 1 "s_register_operand" "")]
2518 "TARGET_NEON"
2519 {
2520 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2521 DONE;
2522 })
2523
2524 (define_insn "popcount<mode>2"
2525 [(set (match_operand:VE 0 "s_register_operand" "=w")
2526 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2527 "TARGET_NEON"
2528 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2529 [(set_attr "type" "neon_cnt<q>")]
2530 )
2531
2532 (define_expand "neon_vcnt<mode>"
2533 [(match_operand:VE 0 "s_register_operand" "=w")
2534 (match_operand:VE 1 "s_register_operand" "w")]
2535 "TARGET_NEON"
2536 {
2537 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2538 DONE;
2539 })
2540
2541 (define_insn "neon_vrecpe<mode>"
2542 [(set (match_operand:V32 0 "s_register_operand" "=w")
2543 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2544 UNSPEC_VRECPE))]
2545 "TARGET_NEON"
2546 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_fp_recpe_s<q>")]
2548 )
2549
2550 (define_insn "neon_vrsqrte<mode>"
2551 [(set (match_operand:V32 0 "s_register_operand" "=w")
2552 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2553 UNSPEC_VRSQRTE))]
2554 "TARGET_NEON"
2555 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2556 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2557 )
2558
2559 (define_expand "neon_vmvn<mode>"
2560 [(match_operand:VDQIW 0 "s_register_operand" "")
2561 (match_operand:VDQIW 1 "s_register_operand" "")]
2562 "TARGET_NEON"
2563 {
2564 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2565 DONE;
2566 })
2567
2568 (define_insn "neon_vget_lane<mode>_sext_internal"
2569 [(set (match_operand:SI 0 "s_register_operand" "=r")
2570 (sign_extend:SI
2571 (vec_select:<V_elem>
2572 (match_operand:VD 1 "s_register_operand" "w")
2573 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2574 "TARGET_NEON"
2575 {
2576 if (BYTES_BIG_ENDIAN)
2577 {
2578 int elt = INTVAL (operands[2]);
2579 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2580 operands[2] = GEN_INT (elt);
2581 }
2582 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2583 }
2584 [(set_attr "type" "neon_to_gp")]
2585 )
2586
2587 (define_insn "neon_vget_lane<mode>_zext_internal"
2588 [(set (match_operand:SI 0 "s_register_operand" "=r")
2589 (zero_extend:SI
2590 (vec_select:<V_elem>
2591 (match_operand:VD 1 "s_register_operand" "w")
2592 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2593 "TARGET_NEON"
2594 {
2595 if (BYTES_BIG_ENDIAN)
2596 {
2597 int elt = INTVAL (operands[2]);
2598 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2599 operands[2] = GEN_INT (elt);
2600 }
2601 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2602 }
2603 [(set_attr "type" "neon_to_gp")]
2604 )
2605
2606 (define_insn "neon_vget_lane<mode>_sext_internal"
2607 [(set (match_operand:SI 0 "s_register_operand" "=r")
2608 (sign_extend:SI
2609 (vec_select:<V_elem>
2610 (match_operand:VQ2 1 "s_register_operand" "w")
2611 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2612 "TARGET_NEON"
2613 {
2614 rtx ops[3];
2615 int regno = REGNO (operands[1]);
2616 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2617 unsigned int elt = INTVAL (operands[2]);
2618 unsigned int elt_adj = elt % halfelts;
2619
2620 if (BYTES_BIG_ENDIAN)
2621 elt_adj = halfelts - 1 - elt_adj;
2622
2623 ops[0] = operands[0];
2624 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2625 ops[2] = GEN_INT (elt_adj);
2626 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2627
2628 return "";
2629 }
2630 [(set_attr "type" "neon_to_gp_q")]
2631 )
2632
2633 (define_insn "neon_vget_lane<mode>_zext_internal"
2634 [(set (match_operand:SI 0 "s_register_operand" "=r")
2635 (zero_extend:SI
2636 (vec_select:<V_elem>
2637 (match_operand:VQ2 1 "s_register_operand" "w")
2638 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2639 "TARGET_NEON"
2640 {
2641 rtx ops[3];
2642 int regno = REGNO (operands[1]);
2643 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2644 unsigned int elt = INTVAL (operands[2]);
2645 unsigned int elt_adj = elt % halfelts;
2646
2647 if (BYTES_BIG_ENDIAN)
2648 elt_adj = halfelts - 1 - elt_adj;
2649
2650 ops[0] = operands[0];
2651 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2652 ops[2] = GEN_INT (elt_adj);
2653 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2654
2655 return "";
2656 }
2657 [(set_attr "type" "neon_to_gp_q")]
2658 )
2659
2660 (define_expand "neon_vget_lane<mode>"
2661 [(match_operand:<V_ext> 0 "s_register_operand" "")
2662 (match_operand:VDQW 1 "s_register_operand" "")
2663 (match_operand:SI 2 "immediate_operand" "")]
2664 "TARGET_NEON"
2665 {
2666 if (BYTES_BIG_ENDIAN)
2667 {
2668 /* The intrinsics are defined in terms of a model where the
2669 element ordering in memory is vldm order, whereas the generic
2670 RTL is defined in terms of a model where the element ordering
2671 in memory is array order. Convert the lane number to conform
2672 to this model. */
2673 unsigned int elt = INTVAL (operands[2]);
2674 unsigned int reg_nelts
2675 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2676 elt ^= reg_nelts - 1;
2677 operands[2] = GEN_INT (elt);
2678 }
2679
2680 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2681 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2682 else
2683 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2684 operands[1],
2685 operands[2]));
2686 DONE;
2687 })
2688
2689 (define_expand "neon_vget_laneu<mode>"
2690 [(match_operand:<V_ext> 0 "s_register_operand" "")
2691 (match_operand:VDQIW 1 "s_register_operand" "")
2692 (match_operand:SI 2 "immediate_operand" "")]
2693 "TARGET_NEON"
2694 {
2695 if (BYTES_BIG_ENDIAN)
2696 {
2697 /* The intrinsics are defined in terms of a model where the
2698 element ordering in memory is vldm order, whereas the generic
2699 RTL is defined in terms of a model where the element ordering
2700 in memory is array order. Convert the lane number to conform
2701 to this model. */
2702 unsigned int elt = INTVAL (operands[2]);
2703 unsigned int reg_nelts
2704 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2705 elt ^= reg_nelts - 1;
2706 operands[2] = GEN_INT (elt);
2707 }
2708
2709 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2710 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2711 else
2712 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2713 operands[1],
2714 operands[2]));
2715 DONE;
2716 })
2717
2718 (define_expand "neon_vget_lanedi"
2719 [(match_operand:DI 0 "s_register_operand" "=r")
2720 (match_operand:DI 1 "s_register_operand" "w")
2721 (match_operand:SI 2 "immediate_operand" "")]
2722 "TARGET_NEON"
2723 {
2724 emit_move_insn (operands[0], operands[1]);
2725 DONE;
2726 })
2727
2728 (define_expand "neon_vget_lanev2di"
2729 [(match_operand:DI 0 "s_register_operand" "")
2730 (match_operand:V2DI 1 "s_register_operand" "")
2731 (match_operand:SI 2 "immediate_operand" "")]
2732 "TARGET_NEON"
2733 {
2734 int lane;
2735
2736 if (BYTES_BIG_ENDIAN)
2737 {
2738 /* The intrinsics are defined in terms of a model where the
2739 element ordering in memory is vldm order, whereas the generic
2740 RTL is defined in terms of a model where the element ordering
2741 in memory is array order. Convert the lane number to conform
2742 to this model. */
2743 unsigned int elt = INTVAL (operands[2]);
2744 unsigned int reg_nelts = 2;
2745 elt ^= reg_nelts - 1;
2746 operands[2] = GEN_INT (elt);
2747 }
2748
2749 lane = INTVAL (operands[2]);
2750 gcc_assert ((lane ==0) || (lane == 1));
2751 emit_move_insn (operands[0], lane == 0
2752 ? gen_lowpart (DImode, operands[1])
2753 : gen_highpart (DImode, operands[1]));
2754 DONE;
2755 })
2756
2757 (define_expand "neon_vset_lane<mode>"
2758 [(match_operand:VDQ 0 "s_register_operand" "=w")
2759 (match_operand:<V_elem> 1 "s_register_operand" "r")
2760 (match_operand:VDQ 2 "s_register_operand" "0")
2761 (match_operand:SI 3 "immediate_operand" "i")]
2762 "TARGET_NEON"
2763 {
2764 unsigned int elt = INTVAL (operands[3]);
2765
2766 if (BYTES_BIG_ENDIAN)
2767 {
2768 unsigned int reg_nelts
2769 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2770 elt ^= reg_nelts - 1;
2771 }
2772
2773 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2774 GEN_INT (1 << elt), operands[2]));
2775 DONE;
2776 })
2777
2778 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2779
2780 (define_expand "neon_vset_lanedi"
2781 [(match_operand:DI 0 "s_register_operand" "=w")
2782 (match_operand:DI 1 "s_register_operand" "r")
2783 (match_operand:DI 2 "s_register_operand" "0")
2784 (match_operand:SI 3 "immediate_operand" "i")]
2785 "TARGET_NEON"
2786 {
2787 emit_move_insn (operands[0], operands[1]);
2788 DONE;
2789 })
2790
2791 (define_expand "neon_vcreate<mode>"
2792 [(match_operand:VD_RE 0 "s_register_operand" "")
2793 (match_operand:DI 1 "general_operand" "")]
2794 "TARGET_NEON"
2795 {
2796 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2797 emit_move_insn (operands[0], src);
2798 DONE;
2799 })
2800
2801 (define_insn "neon_vdup_n<mode>"
2802 [(set (match_operand:VX 0 "s_register_operand" "=w")
2803 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2804 "TARGET_NEON"
2805 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2806 [(set_attr "type" "neon_from_gp<q>")]
2807 )
2808
2809 (define_insn "neon_vdup_n<mode>"
2810 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2811 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2812 "TARGET_NEON"
2813 "@
2814 vdup.<V_sz_elem>\t%<V_reg>0, %1
2815 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2816 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2817 )
2818
2819 (define_expand "neon_vdup_ndi"
2820 [(match_operand:DI 0 "s_register_operand" "=w")
2821 (match_operand:DI 1 "s_register_operand" "r")]
2822 "TARGET_NEON"
2823 {
2824 emit_move_insn (operands[0], operands[1]);
2825 DONE;
2826 }
2827 )
2828
2829 (define_insn "neon_vdup_nv2di"
2830 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2831 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2832 "TARGET_NEON"
2833 "@
2834 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2835 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2836 [(set_attr "length" "8")
2837 (set_attr "type" "multiple")]
2838 )
2839
2840 (define_insn "neon_vdup_lane<mode>_internal"
2841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2842 (vec_duplicate:VDQW
2843 (vec_select:<V_elem>
2844 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2845 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2846 "TARGET_NEON"
2847 {
2848 if (BYTES_BIG_ENDIAN)
2849 {
2850 int elt = INTVAL (operands[2]);
2851 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2852 operands[2] = GEN_INT (elt);
2853 }
2854 if (<Is_d_reg>)
2855 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2856 else
2857 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2858 }
2859 [(set_attr "type" "neon_dup<q>")]
2860 )
2861
2862 (define_expand "neon_vdup_lane<mode>"
2863 [(match_operand:VDQW 0 "s_register_operand" "=w")
2864 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2865 (match_operand:SI 2 "immediate_operand" "i")]
2866 "TARGET_NEON"
2867 {
2868 if (BYTES_BIG_ENDIAN)
2869 {
2870 unsigned int elt = INTVAL (operands[2]);
2871 unsigned int reg_nelts
2872 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2873 elt ^= reg_nelts - 1;
2874 operands[2] = GEN_INT (elt);
2875 }
2876 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2877 operands[2]));
2878 DONE;
2879 })
2880
2881 ; Scalar index is ignored, since only zero is valid here.
2882 (define_expand "neon_vdup_lanedi"
2883 [(match_operand:DI 0 "s_register_operand" "=w")
2884 (match_operand:DI 1 "s_register_operand" "w")
2885 (match_operand:SI 2 "immediate_operand" "i")]
2886 "TARGET_NEON"
2887 {
2888 emit_move_insn (operands[0], operands[1]);
2889 DONE;
2890 })
2891
2892 ; Likewise for v2di, as the DImode second operand has only a single element.
2893 (define_expand "neon_vdup_lanev2di"
2894 [(match_operand:V2DI 0 "s_register_operand" "=w")
2895 (match_operand:DI 1 "s_register_operand" "w")
2896 (match_operand:SI 2 "immediate_operand" "i")]
2897 "TARGET_NEON"
2898 {
2899 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2900 DONE;
2901 })
2902
2903 ; Disabled before reload because we don't want combine doing something silly,
2904 ; but used by the post-reload expansion of neon_vcombine.
2905 (define_insn "*neon_vswp<mode>"
2906 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2907 (match_operand:VDQX 1 "s_register_operand" "+w"))
2908 (set (match_dup 1) (match_dup 0))]
2909 "TARGET_NEON && reload_completed"
2910 "vswp\t%<V_reg>0, %<V_reg>1"
2911 [(set_attr "type" "neon_permute<q>")]
2912 )
2913
2914 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2915 ;; dest vector.
2916 ;; FIXME: A different implementation of this builtin could make it much
2917 ;; more likely that we wouldn't actually need to output anything (we could make
2918 ;; it so that the reg allocator puts things in the right places magically
2919 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2920
2921 (define_insn_and_split "neon_vcombine<mode>"
2922 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2923 (vec_concat:<V_DOUBLE>
2924 (match_operand:VDX 1 "s_register_operand" "w")
2925 (match_operand:VDX 2 "s_register_operand" "w")))]
2926 "TARGET_NEON"
2927 "#"
2928 "&& reload_completed"
2929 [(const_int 0)]
2930 {
2931 neon_split_vcombine (operands);
2932 DONE;
2933 }
2934 [(set_attr "type" "multiple")]
2935 )
2936
2937 (define_expand "neon_vget_high<mode>"
2938 [(match_operand:<V_HALF> 0 "s_register_operand")
2939 (match_operand:VQX 1 "s_register_operand")]
2940 "TARGET_NEON"
2941 {
2942 emit_move_insn (operands[0],
2943 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2944 GET_MODE_SIZE (<V_HALF>mode)));
2945 DONE;
2946 })
2947
2948 (define_expand "neon_vget_low<mode>"
2949 [(match_operand:<V_HALF> 0 "s_register_operand")
2950 (match_operand:VQX 1 "s_register_operand")]
2951 "TARGET_NEON"
2952 {
2953 emit_move_insn (operands[0],
2954 simplify_gen_subreg (<V_HALF>mode, operands[1],
2955 <MODE>mode, 0));
2956 DONE;
2957 })
2958
2959 (define_insn "float<mode><V_cvtto>2"
2960 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2961 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2962 "TARGET_NEON && !flag_rounding_math"
2963 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2964 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2965 )
2966
2967 (define_insn "floatuns<mode><V_cvtto>2"
2968 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2969 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2970 "TARGET_NEON && !flag_rounding_math"
2971 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2972 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2973 )
2974
2975 (define_insn "fix_trunc<mode><V_cvtto>2"
2976 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2977 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2978 "TARGET_NEON"
2979 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
2980 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2981 )
2982
2983 (define_insn "fixuns_trunc<mode><V_cvtto>2"
2984 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2985 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2986 "TARGET_NEON"
2987 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
2988 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2989 )
2990
2991 (define_insn "neon_vcvt<sup><mode>"
2992 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2993 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
2994 VCVT_US))]
2995 "TARGET_NEON"
2996 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
2997 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2998 )
2999
3000 (define_insn "neon_vcvt<sup><mode>"
3001 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3002 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3003 VCVT_US))]
3004 "TARGET_NEON"
3005 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3006 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3007 )
3008
3009 (define_insn "neon_vcvtv4sfv4hf"
3010 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3011 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3012 UNSPEC_VCVT))]
3013 "TARGET_NEON && TARGET_FP16"
3014 "vcvt.f32.f16\t%q0, %P1"
3015 [(set_attr "type" "neon_fp_cvt_widen_h")]
3016 )
3017
3018 (define_insn "neon_vcvtv4hfv4sf"
3019 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3020 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3021 UNSPEC_VCVT))]
3022 "TARGET_NEON && TARGET_FP16"
3023 "vcvt.f16.f32\t%P0, %q1"
3024 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3025 )
3026
3027 (define_insn "neon_vcvt<sup>_n<mode>"
3028 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3029 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3030 (match_operand:SI 2 "immediate_operand" "i")]
3031 VCVT_US_N))]
3032 "TARGET_NEON"
3033 {
3034 neon_const_bounds (operands[2], 1, 33);
3035 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3036 }
3037 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3038 )
3039
3040 (define_insn "neon_vcvt<sup>_n<mode>"
3041 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3042 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3043 (match_operand:SI 2 "immediate_operand" "i")]
3044 VCVT_US_N))]
3045 "TARGET_NEON"
3046 {
3047 neon_const_bounds (operands[2], 1, 33);
3048 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3049 }
3050 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3051 )
3052
3053 (define_insn "neon_vmovn<mode>"
3054 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3055 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3056 UNSPEC_VMOVN))]
3057 "TARGET_NEON"
3058 "vmovn.<V_if_elem>\t%P0, %q1"
3059 [(set_attr "type" "neon_shift_imm_narrow_q")]
3060 )
3061
3062 (define_insn "neon_vqmovn<sup><mode>"
3063 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3064 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3065 VQMOVN))]
3066 "TARGET_NEON"
3067 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3068 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3069 )
3070
3071 (define_insn "neon_vqmovun<mode>"
3072 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3073 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3074 UNSPEC_VQMOVUN))]
3075 "TARGET_NEON"
3076 "vqmovun.<V_s_elem>\t%P0, %q1"
3077 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3078 )
3079
3080 (define_insn "neon_vmovl<sup><mode>"
3081 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3082 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3083 VMOVL))]
3084 "TARGET_NEON"
3085 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3086 [(set_attr "type" "neon_shift_imm_long")]
3087 )
3088
3089 (define_insn "neon_vmul_lane<mode>"
3090 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3091 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3092 (match_operand:VMD 2 "s_register_operand"
3093 "<scalar_mul_constraint>")
3094 (match_operand:SI 3 "immediate_operand" "i")]
3095 UNSPEC_VMUL_LANE))]
3096 "TARGET_NEON"
3097 {
3098 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3099 }
3100 [(set (attr "type")
3101 (if_then_else (match_test "<Is_float_mode>")
3102 (const_string "neon_fp_mul_s_scalar<q>")
3103 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3104 )
3105
3106 (define_insn "neon_vmul_lane<mode>"
3107 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3108 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3109 (match_operand:<V_HALF> 2 "s_register_operand"
3110 "<scalar_mul_constraint>")
3111 (match_operand:SI 3 "immediate_operand" "i")]
3112 UNSPEC_VMUL_LANE))]
3113 "TARGET_NEON"
3114 {
3115 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3116 }
3117 [(set (attr "type")
3118 (if_then_else (match_test "<Is_float_mode>")
3119 (const_string "neon_fp_mul_s_scalar<q>")
3120 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3121 )
3122
3123 (define_insn "neon_vmull<sup>_lane<mode>"
3124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3125 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3126 (match_operand:VMDI 2 "s_register_operand"
3127 "<scalar_mul_constraint>")
3128 (match_operand:SI 3 "immediate_operand" "i")]
3129 VMULL_LANE))]
3130 "TARGET_NEON"
3131 {
3132 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3133 }
3134 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3135 )
3136
3137 (define_insn "neon_vqdmull_lane<mode>"
3138 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3139 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3140 (match_operand:VMDI 2 "s_register_operand"
3141 "<scalar_mul_constraint>")
3142 (match_operand:SI 3 "immediate_operand" "i")]
3143 UNSPEC_VQDMULL_LANE))]
3144 "TARGET_NEON"
3145 {
3146 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3147 }
3148 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3149 )
3150
3151 (define_insn "neon_vq<r>dmulh_lane<mode>"
3152 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3153 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3154 (match_operand:<V_HALF> 2 "s_register_operand"
3155 "<scalar_mul_constraint>")
3156 (match_operand:SI 3 "immediate_operand" "i")]
3157 VQDMULH_LANE))]
3158 "TARGET_NEON"
3159 {
3160 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3161 }
3162 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3163 )
3164
3165 (define_insn "neon_vq<r>dmulh_lane<mode>"
3166 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3167 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3168 (match_operand:VMDI 2 "s_register_operand"
3169 "<scalar_mul_constraint>")
3170 (match_operand:SI 3 "immediate_operand" "i")]
3171 VQDMULH_LANE))]
3172 "TARGET_NEON"
3173 {
3174 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3175 }
3176 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3177 )
3178
3179 (define_insn "neon_vmla_lane<mode>"
3180 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3181 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3182 (match_operand:VMD 2 "s_register_operand" "w")
3183 (match_operand:VMD 3 "s_register_operand"
3184 "<scalar_mul_constraint>")
3185 (match_operand:SI 4 "immediate_operand" "i")]
3186 UNSPEC_VMLA_LANE))]
3187 "TARGET_NEON"
3188 {
3189 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3190 }
3191 [(set (attr "type")
3192 (if_then_else (match_test "<Is_float_mode>")
3193 (const_string "neon_fp_mla_s_scalar<q>")
3194 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3195 )
3196
3197 (define_insn "neon_vmla_lane<mode>"
3198 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3199 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3200 (match_operand:VMQ 2 "s_register_operand" "w")
3201 (match_operand:<V_HALF> 3 "s_register_operand"
3202 "<scalar_mul_constraint>")
3203 (match_operand:SI 4 "immediate_operand" "i")]
3204 UNSPEC_VMLA_LANE))]
3205 "TARGET_NEON"
3206 {
3207 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3208 }
3209 [(set (attr "type")
3210 (if_then_else (match_test "<Is_float_mode>")
3211 (const_string "neon_fp_mla_s_scalar<q>")
3212 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3213 )
3214
3215 (define_insn "neon_vmlal<sup>_lane<mode>"
3216 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3217 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3218 (match_operand:VMDI 2 "s_register_operand" "w")
3219 (match_operand:VMDI 3 "s_register_operand"
3220 "<scalar_mul_constraint>")
3221 (match_operand:SI 4 "immediate_operand" "i")]
3222 VMLAL_LANE))]
3223 "TARGET_NEON"
3224 {
3225 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3226 }
3227 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3228 )
3229
3230 (define_insn "neon_vqdmlal_lane<mode>"
3231 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3232 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3233 (match_operand:VMDI 2 "s_register_operand" "w")
3234 (match_operand:VMDI 3 "s_register_operand"
3235 "<scalar_mul_constraint>")
3236 (match_operand:SI 4 "immediate_operand" "i")]
3237 UNSPEC_VQDMLAL_LANE))]
3238 "TARGET_NEON"
3239 {
3240 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3241 }
3242 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3243 )
3244
3245 (define_insn "neon_vmls_lane<mode>"
3246 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3247 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3248 (match_operand:VMD 2 "s_register_operand" "w")
3249 (match_operand:VMD 3 "s_register_operand"
3250 "<scalar_mul_constraint>")
3251 (match_operand:SI 4 "immediate_operand" "i")]
3252 UNSPEC_VMLS_LANE))]
3253 "TARGET_NEON"
3254 {
3255 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3256 }
3257 [(set (attr "type")
3258 (if_then_else (match_test "<Is_float_mode>")
3259 (const_string "neon_fp_mla_s_scalar<q>")
3260 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3261 )
3262
3263 (define_insn "neon_vmls_lane<mode>"
3264 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3265 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3266 (match_operand:VMQ 2 "s_register_operand" "w")
3267 (match_operand:<V_HALF> 3 "s_register_operand"
3268 "<scalar_mul_constraint>")
3269 (match_operand:SI 4 "immediate_operand" "i")]
3270 UNSPEC_VMLS_LANE))]
3271 "TARGET_NEON"
3272 {
3273 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3274 }
3275 [(set (attr "type")
3276 (if_then_else (match_test "<Is_float_mode>")
3277 (const_string "neon_fp_mla_s_scalar<q>")
3278 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3279 )
3280
3281 (define_insn "neon_vmlsl<sup>_lane<mode>"
3282 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3283 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3284 (match_operand:VMDI 2 "s_register_operand" "w")
3285 (match_operand:VMDI 3 "s_register_operand"
3286 "<scalar_mul_constraint>")
3287 (match_operand:SI 4 "immediate_operand" "i")]
3288 VMLSL_LANE))]
3289 "TARGET_NEON"
3290 {
3291 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3292 }
3293 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3294 )
3295
3296 (define_insn "neon_vqdmlsl_lane<mode>"
3297 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3298 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3299 (match_operand:VMDI 2 "s_register_operand" "w")
3300 (match_operand:VMDI 3 "s_register_operand"
3301 "<scalar_mul_constraint>")
3302 (match_operand:SI 4 "immediate_operand" "i")]
3303 UNSPEC_VQDMLSL_LANE))]
3304 "TARGET_NEON"
3305 {
3306 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3307 }
3308 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3309 )
3310
3311 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3312 ; core register into a temp register, then use a scalar taken from that. This
3313 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3314 ; or extracted from another vector. The latter case it's currently better to
3315 ; use the "_lane" variant, and the former case can probably be implemented
3316 ; using vld1_lane, but that hasn't been done yet.
3317
3318 (define_expand "neon_vmul_n<mode>"
3319 [(match_operand:VMD 0 "s_register_operand" "")
3320 (match_operand:VMD 1 "s_register_operand" "")
3321 (match_operand:<V_elem> 2 "s_register_operand" "")]
3322 "TARGET_NEON"
3323 {
3324 rtx tmp = gen_reg_rtx (<MODE>mode);
3325 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3326 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3327 const0_rtx));
3328 DONE;
3329 })
3330
3331 (define_expand "neon_vmul_n<mode>"
3332 [(match_operand:VMQ 0 "s_register_operand" "")
3333 (match_operand:VMQ 1 "s_register_operand" "")
3334 (match_operand:<V_elem> 2 "s_register_operand" "")]
3335 "TARGET_NEON"
3336 {
3337 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3338 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3339 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3340 const0_rtx));
3341 DONE;
3342 })
3343
3344 (define_expand "neon_vmulls_n<mode>"
3345 [(match_operand:<V_widen> 0 "s_register_operand" "")
3346 (match_operand:VMDI 1 "s_register_operand" "")
3347 (match_operand:<V_elem> 2 "s_register_operand" "")]
3348 "TARGET_NEON"
3349 {
3350 rtx tmp = gen_reg_rtx (<MODE>mode);
3351 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3352 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3353 const0_rtx));
3354 DONE;
3355 })
3356
3357 (define_expand "neon_vmullu_n<mode>"
3358 [(match_operand:<V_widen> 0 "s_register_operand" "")
3359 (match_operand:VMDI 1 "s_register_operand" "")
3360 (match_operand:<V_elem> 2 "s_register_operand" "")]
3361 "TARGET_NEON"
3362 {
3363 rtx tmp = gen_reg_rtx (<MODE>mode);
3364 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3365 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3366 const0_rtx));
3367 DONE;
3368 })
3369
3370 (define_expand "neon_vqdmull_n<mode>"
3371 [(match_operand:<V_widen> 0 "s_register_operand" "")
3372 (match_operand:VMDI 1 "s_register_operand" "")
3373 (match_operand:<V_elem> 2 "s_register_operand" "")]
3374 "TARGET_NEON"
3375 {
3376 rtx tmp = gen_reg_rtx (<MODE>mode);
3377 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3378 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3379 const0_rtx));
3380 DONE;
3381 })
3382
3383 (define_expand "neon_vqdmulh_n<mode>"
3384 [(match_operand:VMDI 0 "s_register_operand" "")
3385 (match_operand:VMDI 1 "s_register_operand" "")
3386 (match_operand:<V_elem> 2 "s_register_operand" "")]
3387 "TARGET_NEON"
3388 {
3389 rtx tmp = gen_reg_rtx (<MODE>mode);
3390 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3391 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3392 const0_rtx));
3393 DONE;
3394 })
3395
3396 (define_expand "neon_vqrdmulh_n<mode>"
3397 [(match_operand:VMDI 0 "s_register_operand" "")
3398 (match_operand:VMDI 1 "s_register_operand" "")
3399 (match_operand:<V_elem> 2 "s_register_operand" "")]
3400 "TARGET_NEON"
3401 {
3402 rtx tmp = gen_reg_rtx (<MODE>mode);
3403 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3404 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3405 const0_rtx));
3406 DONE;
3407 })
3408
3409 (define_expand "neon_vqdmulh_n<mode>"
3410 [(match_operand:VMQI 0 "s_register_operand" "")
3411 (match_operand:VMQI 1 "s_register_operand" "")
3412 (match_operand:<V_elem> 2 "s_register_operand" "")]
3413 "TARGET_NEON"
3414 {
3415 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3416 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3417 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3418 const0_rtx));
3419 DONE;
3420 })
3421
3422 (define_expand "neon_vqrdmulh_n<mode>"
3423 [(match_operand:VMQI 0 "s_register_operand" "")
3424 (match_operand:VMQI 1 "s_register_operand" "")
3425 (match_operand:<V_elem> 2 "s_register_operand" "")]
3426 "TARGET_NEON"
3427 {
3428 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3429 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3430 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3431 const0_rtx));
3432 DONE;
3433 })
3434
3435 (define_expand "neon_vmla_n<mode>"
3436 [(match_operand:VMD 0 "s_register_operand" "")
3437 (match_operand:VMD 1 "s_register_operand" "")
3438 (match_operand:VMD 2 "s_register_operand" "")
3439 (match_operand:<V_elem> 3 "s_register_operand" "")]
3440 "TARGET_NEON"
3441 {
3442 rtx tmp = gen_reg_rtx (<MODE>mode);
3443 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3444 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3445 tmp, const0_rtx));
3446 DONE;
3447 })
3448
3449 (define_expand "neon_vmla_n<mode>"
3450 [(match_operand:VMQ 0 "s_register_operand" "")
3451 (match_operand:VMQ 1 "s_register_operand" "")
3452 (match_operand:VMQ 2 "s_register_operand" "")
3453 (match_operand:<V_elem> 3 "s_register_operand" "")]
3454 "TARGET_NEON"
3455 {
3456 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3457 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3458 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3459 tmp, const0_rtx));
3460 DONE;
3461 })
3462
3463 (define_expand "neon_vmlals_n<mode>"
3464 [(match_operand:<V_widen> 0 "s_register_operand" "")
3465 (match_operand:<V_widen> 1 "s_register_operand" "")
3466 (match_operand:VMDI 2 "s_register_operand" "")
3467 (match_operand:<V_elem> 3 "s_register_operand" "")]
3468 "TARGET_NEON"
3469 {
3470 rtx tmp = gen_reg_rtx (<MODE>mode);
3471 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3472 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3473 tmp, const0_rtx));
3474 DONE;
3475 })
3476
3477 (define_expand "neon_vmlalu_n<mode>"
3478 [(match_operand:<V_widen> 0 "s_register_operand" "")
3479 (match_operand:<V_widen> 1 "s_register_operand" "")
3480 (match_operand:VMDI 2 "s_register_operand" "")
3481 (match_operand:<V_elem> 3 "s_register_operand" "")]
3482 "TARGET_NEON"
3483 {
3484 rtx tmp = gen_reg_rtx (<MODE>mode);
3485 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3486 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3487 tmp, const0_rtx));
3488 DONE;
3489 })
3490
3491 (define_expand "neon_vqdmlal_n<mode>"
3492 [(match_operand:<V_widen> 0 "s_register_operand" "")
3493 (match_operand:<V_widen> 1 "s_register_operand" "")
3494 (match_operand:VMDI 2 "s_register_operand" "")
3495 (match_operand:<V_elem> 3 "s_register_operand" "")]
3496 "TARGET_NEON"
3497 {
3498 rtx tmp = gen_reg_rtx (<MODE>mode);
3499 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3500 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3501 tmp, const0_rtx));
3502 DONE;
3503 })
3504
3505 (define_expand "neon_vmls_n<mode>"
3506 [(match_operand:VMD 0 "s_register_operand" "")
3507 (match_operand:VMD 1 "s_register_operand" "")
3508 (match_operand:VMD 2 "s_register_operand" "")
3509 (match_operand:<V_elem> 3 "s_register_operand" "")]
3510 "TARGET_NEON"
3511 {
3512 rtx tmp = gen_reg_rtx (<MODE>mode);
3513 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3514 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3515 tmp, const0_rtx));
3516 DONE;
3517 })
3518
3519 (define_expand "neon_vmls_n<mode>"
3520 [(match_operand:VMQ 0 "s_register_operand" "")
3521 (match_operand:VMQ 1 "s_register_operand" "")
3522 (match_operand:VMQ 2 "s_register_operand" "")
3523 (match_operand:<V_elem> 3 "s_register_operand" "")]
3524 "TARGET_NEON"
3525 {
3526 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3527 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3528 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3529 tmp, const0_rtx));
3530 DONE;
3531 })
3532
3533 (define_expand "neon_vmlsls_n<mode>"
3534 [(match_operand:<V_widen> 0 "s_register_operand" "")
3535 (match_operand:<V_widen> 1 "s_register_operand" "")
3536 (match_operand:VMDI 2 "s_register_operand" "")
3537 (match_operand:<V_elem> 3 "s_register_operand" "")]
3538 "TARGET_NEON"
3539 {
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3543 tmp, const0_rtx));
3544 DONE;
3545 })
3546
3547 (define_expand "neon_vmlslu_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")]
3552 "TARGET_NEON"
3553 {
3554 rtx tmp = gen_reg_rtx (<MODE>mode);
3555 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3556 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3557 tmp, const0_rtx));
3558 DONE;
3559 })
3560
3561 (define_expand "neon_vqdmlsl_n<mode>"
3562 [(match_operand:<V_widen> 0 "s_register_operand" "")
3563 (match_operand:<V_widen> 1 "s_register_operand" "")
3564 (match_operand:VMDI 2 "s_register_operand" "")
3565 (match_operand:<V_elem> 3 "s_register_operand" "")]
3566 "TARGET_NEON"
3567 {
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3571 tmp, const0_rtx));
3572 DONE;
3573 })
3574
3575 (define_insn "neon_vext<mode>"
3576 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3577 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3578 (match_operand:VDQX 2 "s_register_operand" "w")
3579 (match_operand:SI 3 "immediate_operand" "i")]
3580 UNSPEC_VEXT))]
3581 "TARGET_NEON"
3582 {
3583 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3584 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3585 }
3586 [(set_attr "type" "neon_ext<q>")]
3587 )
3588
3589 (define_insn "neon_vrev64<mode>"
3590 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3591 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3592 UNSPEC_VREV64))]
3593 "TARGET_NEON"
3594 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3595 [(set_attr "type" "neon_rev<q>")]
3596 )
3597
3598 (define_insn "neon_vrev32<mode>"
3599 [(set (match_operand:VX 0 "s_register_operand" "=w")
3600 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3601 UNSPEC_VREV32))]
3602 "TARGET_NEON"
3603 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3604 [(set_attr "type" "neon_rev<q>")]
3605 )
3606
3607 (define_insn "neon_vrev16<mode>"
3608 [(set (match_operand:VE 0 "s_register_operand" "=w")
3609 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3610 UNSPEC_VREV16))]
3611 "TARGET_NEON"
3612 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3613 [(set_attr "type" "neon_rev<q>")]
3614 )
3615
3616 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3617 ; allocation. For an intrinsic of form:
3618 ; rD = vbsl_* (rS, rN, rM)
3619 ; We can use any of:
3620 ; vbsl rS, rN, rM (if D = S)
3621 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3622 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3623
3624 (define_insn "neon_vbsl<mode>_internal"
3625 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3626 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3627 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3628 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3629 UNSPEC_VBSL))]
3630 "TARGET_NEON"
3631 "@
3632 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3633 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3634 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3635 [(set_attr "type" "neon_bsl<q>")]
3636 )
3637
3638 (define_expand "neon_vbsl<mode>"
3639 [(set (match_operand:VDQX 0 "s_register_operand" "")
3640 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3641 (match_operand:VDQX 2 "s_register_operand" "")
3642 (match_operand:VDQX 3 "s_register_operand" "")]
3643 UNSPEC_VBSL))]
3644 "TARGET_NEON"
3645 {
3646 /* We can't alias operands together if they have different modes. */
3647 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3648 })
3649
3650 ;; vshl, vrshl
3651 (define_insn "neon_v<shift_op><sup><mode>"
3652 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3653 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3654 (match_operand:VDQIX 2 "s_register_operand" "w")]
3655 VSHL))]
3656 "TARGET_NEON"
3657 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3658 [(set_attr "type" "neon_shift_imm<q>")]
3659 )
3660
3661 ;; vqshl, vqrshl
3662 (define_insn "neon_v<shift_op><sup><mode>"
3663 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3664 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3665 (match_operand:VDQIX 2 "s_register_operand" "w")]
3666 VQSHL))]
3667 "TARGET_NEON"
3668 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3669 [(set_attr "type" "neon_sat_shift_imm<q>")]
3670 )
3671
3672 ;; vshr_n, vrshr_n
3673 (define_insn "neon_v<shift_op><sup>_n<mode>"
3674 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3675 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3676 (match_operand:SI 2 "immediate_operand" "i")]
3677 VSHR_N))]
3678 "TARGET_NEON"
3679 {
3680 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3681 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3682 }
3683 [(set_attr "type" "neon_shift_imm<q>")]
3684 )
3685
3686 ;; vshrn_n, vrshrn_n
3687 (define_insn "neon_v<shift_op>_n<mode>"
3688 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3689 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3690 (match_operand:SI 2 "immediate_operand" "i")]
3691 VSHRN_N))]
3692 "TARGET_NEON"
3693 {
3694 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3695 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3696 }
3697 [(set_attr "type" "neon_shift_imm_narrow_q")]
3698 )
3699
3700 ;; vqshrn_n, vqrshrn_n
3701 (define_insn "neon_v<shift_op><sup>_n<mode>"
3702 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3703 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3704 (match_operand:SI 2 "immediate_operand" "i")]
3705 VQSHRN_N))]
3706 "TARGET_NEON"
3707 {
3708 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3709 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3710 }
3711 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3712 )
3713
3714 ;; vqshrun_n, vqrshrun_n
3715 (define_insn "neon_v<shift_op>_n<mode>"
3716 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3717 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3718 (match_operand:SI 2 "immediate_operand" "i")]
3719 VQSHRUN_N))]
3720 "TARGET_NEON"
3721 {
3722 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3723 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3724 }
3725 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3726 )
3727
3728 (define_insn "neon_vshl_n<mode>"
3729 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3730 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3731 (match_operand:SI 2 "immediate_operand" "i")]
3732 UNSPEC_VSHL_N))]
3733 "TARGET_NEON"
3734 {
3735 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3736 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3737 }
3738 [(set_attr "type" "neon_shift_imm<q>")]
3739 )
3740
3741 (define_insn "neon_vqshl_<sup>_n<mode>"
3742 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3743 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3744 (match_operand:SI 2 "immediate_operand" "i")]
3745 VQSHL_N))]
3746 "TARGET_NEON"
3747 {
3748 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3749 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3750 }
3751 [(set_attr "type" "neon_sat_shift_imm<q>")]
3752 )
3753
3754 (define_insn "neon_vqshlu_n<mode>"
3755 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3756 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3757 (match_operand:SI 2 "immediate_operand" "i")]
3758 UNSPEC_VQSHLU_N))]
3759 "TARGET_NEON"
3760 {
3761 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3762 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3763 }
3764 [(set_attr "type" "neon_sat_shift_imm<q>")]
3765 )
3766
3767 (define_insn "neon_vshll<sup>_n<mode>"
3768 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3769 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3770 (match_operand:SI 2 "immediate_operand" "i")]
3771 VSHLL_N))]
3772 "TARGET_NEON"
3773 {
3774 /* The boundaries are: 0 < imm <= size. */
3775 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3776 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3777 }
3778 [(set_attr "type" "neon_shift_imm_long")]
3779 )
3780
3781 ;; vsra_n, vrsra_n
3782 (define_insn "neon_v<shift_op><sup>_n<mode>"
3783 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3784 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3785 (match_operand:VDQIX 2 "s_register_operand" "w")
3786 (match_operand:SI 3 "immediate_operand" "i")]
3787 VSRA_N))]
3788 "TARGET_NEON"
3789 {
3790 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3791 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3792 }
3793 [(set_attr "type" "neon_shift_acc<q>")]
3794 )
3795
3796 (define_insn "neon_vsri_n<mode>"
3797 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3798 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3799 (match_operand:VDQIX 2 "s_register_operand" "w")
3800 (match_operand:SI 3 "immediate_operand" "i")]
3801 UNSPEC_VSRI))]
3802 "TARGET_NEON"
3803 {
3804 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3805 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3806 }
3807 [(set_attr "type" "neon_shift_reg<q>")]
3808 )
3809
3810 (define_insn "neon_vsli_n<mode>"
3811 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3812 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3813 (match_operand:VDQIX 2 "s_register_operand" "w")
3814 (match_operand:SI 3 "immediate_operand" "i")]
3815 UNSPEC_VSLI))]
3816 "TARGET_NEON"
3817 {
3818 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3819 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3820 }
3821 [(set_attr "type" "neon_shift_reg<q>")]
3822 )
3823
3824 (define_insn "neon_vtbl1v8qi"
3825 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3826 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3827 (match_operand:V8QI 2 "s_register_operand" "w")]
3828 UNSPEC_VTBL))]
3829 "TARGET_NEON"
3830 "vtbl.8\t%P0, {%P1}, %P2"
3831 [(set_attr "type" "neon_tbl1")]
3832 )
3833
3834 (define_insn "neon_vtbl2v8qi"
3835 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3836 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3837 (match_operand:V8QI 2 "s_register_operand" "w")]
3838 UNSPEC_VTBL))]
3839 "TARGET_NEON"
3840 {
3841 rtx ops[4];
3842 int tabbase = REGNO (operands[1]);
3843
3844 ops[0] = operands[0];
3845 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3846 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3847 ops[3] = operands[2];
3848 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3849
3850 return "";
3851 }
3852 [(set_attr "type" "neon_tbl2")]
3853 )
3854
3855 (define_insn "neon_vtbl3v8qi"
3856 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3857 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3858 (match_operand:V8QI 2 "s_register_operand" "w")]
3859 UNSPEC_VTBL))]
3860 "TARGET_NEON"
3861 {
3862 rtx ops[5];
3863 int tabbase = REGNO (operands[1]);
3864
3865 ops[0] = operands[0];
3866 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3867 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3868 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3869 ops[4] = operands[2];
3870 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3871
3872 return "";
3873 }
3874 [(set_attr "type" "neon_tbl3")]
3875 )
3876
3877 (define_insn "neon_vtbl4v8qi"
3878 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3879 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3880 (match_operand:V8QI 2 "s_register_operand" "w")]
3881 UNSPEC_VTBL))]
3882 "TARGET_NEON"
3883 {
3884 rtx ops[6];
3885 int tabbase = REGNO (operands[1]);
3886
3887 ops[0] = operands[0];
3888 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3889 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3890 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3891 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3892 ops[5] = operands[2];
3893 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3894
3895 return "";
3896 }
3897 [(set_attr "type" "neon_tbl4")]
3898 )
3899
3900 ;; These three are used by the vec_perm infrastructure for V16QImode.
3901 (define_insn_and_split "neon_vtbl1v16qi"
3902 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3903 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3904 (match_operand:V16QI 2 "s_register_operand" "w")]
3905 UNSPEC_VTBL))]
3906 "TARGET_NEON"
3907 "#"
3908 "&& reload_completed"
3909 [(const_int 0)]
3910 {
3911 rtx op0, op1, op2, part0, part2;
3912 unsigned ofs;
3913
3914 op0 = operands[0];
3915 op1 = gen_lowpart (TImode, operands[1]);
3916 op2 = operands[2];
3917
3918 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3919 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3920 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3921 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3922
3923 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3924 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3925 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3926 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3927 DONE;
3928 }
3929 [(set_attr "type" "multiple")]
3930 )
3931
3932 (define_insn_and_split "neon_vtbl2v16qi"
3933 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3934 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3935 (match_operand:V16QI 2 "s_register_operand" "w")]
3936 UNSPEC_VTBL))]
3937 "TARGET_NEON"
3938 "#"
3939 "&& reload_completed"
3940 [(const_int 0)]
3941 {
3942 rtx op0, op1, op2, part0, part2;
3943 unsigned ofs;
3944
3945 op0 = operands[0];
3946 op1 = operands[1];
3947 op2 = operands[2];
3948
3949 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3950 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3951 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3952 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3953
3954 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3955 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3956 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3957 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3958 DONE;
3959 }
3960 [(set_attr "type" "multiple")]
3961 )
3962
3963 ;; ??? Logically we should extend the regular neon_vcombine pattern to
3964 ;; handle quad-word input modes, producing octa-word output modes. But
3965 ;; that requires us to add support for octa-word vector modes in moves.
3966 ;; That seems overkill for this one use in vec_perm.
3967 (define_insn_and_split "neon_vcombinev16qi"
3968 [(set (match_operand:OI 0 "s_register_operand" "=w")
3969 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
3970 (match_operand:V16QI 2 "s_register_operand" "w")]
3971 UNSPEC_VCONCAT))]
3972 "TARGET_NEON"
3973 "#"
3974 "&& reload_completed"
3975 [(const_int 0)]
3976 {
3977 neon_split_vcombine (operands);
3978 DONE;
3979 }
3980 [(set_attr "type" "multiple")]
3981 )
3982
3983 (define_insn "neon_vtbx1v8qi"
3984 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3985 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
3986 (match_operand:V8QI 2 "s_register_operand" "w")
3987 (match_operand:V8QI 3 "s_register_operand" "w")]
3988 UNSPEC_VTBX))]
3989 "TARGET_NEON"
3990 "vtbx.8\t%P0, {%P2}, %P3"
3991 [(set_attr "type" "neon_tbl1")]
3992 )
3993
3994 (define_insn "neon_vtbx2v8qi"
3995 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3996 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
3997 (match_operand:TI 2 "s_register_operand" "w")
3998 (match_operand:V8QI 3 "s_register_operand" "w")]
3999 UNSPEC_VTBX))]
4000 "TARGET_NEON"
4001 {
4002 rtx ops[4];
4003 int tabbase = REGNO (operands[2]);
4004
4005 ops[0] = operands[0];
4006 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4007 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4008 ops[3] = operands[3];
4009 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4010
4011 return "";
4012 }
4013 [(set_attr "type" "neon_tbl2")]
4014 )
4015
4016 (define_insn "neon_vtbx3v8qi"
4017 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4018 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4019 (match_operand:EI 2 "s_register_operand" "w")
4020 (match_operand:V8QI 3 "s_register_operand" "w")]
4021 UNSPEC_VTBX))]
4022 "TARGET_NEON"
4023 {
4024 rtx ops[5];
4025 int tabbase = REGNO (operands[2]);
4026
4027 ops[0] = operands[0];
4028 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4029 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4030 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4031 ops[4] = operands[3];
4032 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4033
4034 return "";
4035 }
4036 [(set_attr "type" "neon_tbl3")]
4037 )
4038
4039 (define_insn "neon_vtbx4v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:OI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4044 UNSPEC_VTBX))]
4045 "TARGET_NEON"
4046 {
4047 rtx ops[6];
4048 int tabbase = REGNO (operands[2]);
4049
4050 ops[0] = operands[0];
4051 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4052 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4053 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4054 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4055 ops[5] = operands[3];
4056 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4057
4058 return "";
4059 }
4060 [(set_attr "type" "neon_tbl4")]
4061 )
4062
4063 (define_expand "neon_vtrn<mode>_internal"
4064 [(parallel
4065 [(set (match_operand:VDQW 0 "s_register_operand" "")
4066 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4067 (match_operand:VDQW 2 "s_register_operand" "")]
4068 UNSPEC_VTRN1))
4069 (set (match_operand:VDQW 3 "s_register_operand" "")
4070 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4071 "TARGET_NEON"
4072 ""
4073 )
4074
4075 ;; Note: Different operand numbering to handle tied registers correctly.
4076 (define_insn "*neon_vtrn<mode>_insn"
4077 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4078 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4079 (match_operand:VDQW 3 "s_register_operand" "2")]
4080 UNSPEC_VTRN1))
4081 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4082 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4083 UNSPEC_VTRN2))]
4084 "TARGET_NEON"
4085 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4086 [(set_attr "type" "neon_permute<q>")]
4087 )
4088
4089 (define_expand "neon_vzip<mode>_internal"
4090 [(parallel
4091 [(set (match_operand:VDQW 0 "s_register_operand" "")
4092 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4093 (match_operand:VDQW 2 "s_register_operand" "")]
4094 UNSPEC_VZIP1))
4095 (set (match_operand:VDQW 3 "s_register_operand" "")
4096 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4097 "TARGET_NEON"
4098 ""
4099 )
4100
4101 ;; Note: Different operand numbering to handle tied registers correctly.
4102 (define_insn "*neon_vzip<mode>_insn"
4103 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4104 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4105 (match_operand:VDQW 3 "s_register_operand" "2")]
4106 UNSPEC_VZIP1))
4107 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4108 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4109 UNSPEC_VZIP2))]
4110 "TARGET_NEON"
4111 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4112 [(set_attr "type" "neon_zip<q>")]
4113 )
4114
4115 (define_expand "neon_vuzp<mode>_internal"
4116 [(parallel
4117 [(set (match_operand:VDQW 0 "s_register_operand" "")
4118 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4119 (match_operand:VDQW 2 "s_register_operand" "")]
4120 UNSPEC_VUZP1))
4121 (set (match_operand:VDQW 3 "s_register_operand" "")
4122 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4123 "TARGET_NEON"
4124 ""
4125 )
4126
4127 ;; Note: Different operand numbering to handle tied registers correctly.
4128 (define_insn "*neon_vuzp<mode>_insn"
4129 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4131 (match_operand:VDQW 3 "s_register_operand" "2")]
4132 UNSPEC_VUZP1))
4133 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4134 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4135 UNSPEC_VUZP2))]
4136 "TARGET_NEON"
4137 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4138 [(set_attr "type" "neon_zip<q>")]
4139 )
4140
4141 (define_expand "neon_vreinterpretv8qi<mode>"
4142 [(match_operand:V8QI 0 "s_register_operand" "")
4143 (match_operand:VD_RE 1 "s_register_operand" "")]
4144 "TARGET_NEON"
4145 {
4146 neon_reinterpret (operands[0], operands[1]);
4147 DONE;
4148 })
4149
4150 (define_expand "neon_vreinterpretv4hi<mode>"
4151 [(match_operand:V4HI 0 "s_register_operand" "")
4152 (match_operand:VD_RE 1 "s_register_operand" "")]
4153 "TARGET_NEON"
4154 {
4155 neon_reinterpret (operands[0], operands[1]);
4156 DONE;
4157 })
4158
4159 (define_expand "neon_vreinterpretv2si<mode>"
4160 [(match_operand:V2SI 0 "s_register_operand" "")
4161 (match_operand:VD_RE 1 "s_register_operand" "")]
4162 "TARGET_NEON"
4163 {
4164 neon_reinterpret (operands[0], operands[1]);
4165 DONE;
4166 })
4167
4168 (define_expand "neon_vreinterpretv2sf<mode>"
4169 [(match_operand:V2SF 0 "s_register_operand" "")
4170 (match_operand:VD_RE 1 "s_register_operand" "")]
4171 "TARGET_NEON"
4172 {
4173 neon_reinterpret (operands[0], operands[1]);
4174 DONE;
4175 })
4176
4177 (define_expand "neon_vreinterpretdi<mode>"
4178 [(match_operand:DI 0 "s_register_operand" "")
4179 (match_operand:VD_RE 1 "s_register_operand" "")]
4180 "TARGET_NEON"
4181 {
4182 neon_reinterpret (operands[0], operands[1]);
4183 DONE;
4184 })
4185
4186 (define_expand "neon_vreinterpretti<mode>"
4187 [(match_operand:TI 0 "s_register_operand" "")
4188 (match_operand:VQXMOV 1 "s_register_operand" "")]
4189 "TARGET_NEON"
4190 {
4191 neon_reinterpret (operands[0], operands[1]);
4192 DONE;
4193 })
4194
4195
4196 (define_expand "neon_vreinterpretv16qi<mode>"
4197 [(match_operand:V16QI 0 "s_register_operand" "")
4198 (match_operand:VQXMOV 1 "s_register_operand" "")]
4199 "TARGET_NEON"
4200 {
4201 neon_reinterpret (operands[0], operands[1]);
4202 DONE;
4203 })
4204
4205 (define_expand "neon_vreinterpretv8hi<mode>"
4206 [(match_operand:V8HI 0 "s_register_operand" "")
4207 (match_operand:VQXMOV 1 "s_register_operand" "")]
4208 "TARGET_NEON"
4209 {
4210 neon_reinterpret (operands[0], operands[1]);
4211 DONE;
4212 })
4213
4214 (define_expand "neon_vreinterpretv4si<mode>"
4215 [(match_operand:V4SI 0 "s_register_operand" "")
4216 (match_operand:VQXMOV 1 "s_register_operand" "")]
4217 "TARGET_NEON"
4218 {
4219 neon_reinterpret (operands[0], operands[1]);
4220 DONE;
4221 })
4222
4223 (define_expand "neon_vreinterpretv4sf<mode>"
4224 [(match_operand:V4SF 0 "s_register_operand" "")
4225 (match_operand:VQXMOV 1 "s_register_operand" "")]
4226 "TARGET_NEON"
4227 {
4228 neon_reinterpret (operands[0], operands[1]);
4229 DONE;
4230 })
4231
4232 (define_expand "neon_vreinterpretv2di<mode>"
4233 [(match_operand:V2DI 0 "s_register_operand" "")
4234 (match_operand:VQXMOV 1 "s_register_operand" "")]
4235 "TARGET_NEON"
4236 {
4237 neon_reinterpret (operands[0], operands[1]);
4238 DONE;
4239 })
4240
4241 (define_expand "vec_load_lanes<mode><mode>"
4242 [(set (match_operand:VDQX 0 "s_register_operand")
4243 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4244 UNSPEC_VLD1))]
4245 "TARGET_NEON")
4246
4247 (define_insn "neon_vld1<mode>"
4248 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4249 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4250 UNSPEC_VLD1))]
4251 "TARGET_NEON"
4252 "vld1.<V_sz_elem>\t%h0, %A1"
4253 [(set_attr "type" "neon_load1_1reg<q>")]
4254 )
4255
4256 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4257 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4258 ;; lane order here.
4259 (define_insn "neon_vld1_lane<mode>"
4260 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4261 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4262 (match_operand:VDX 2 "s_register_operand" "0")
4263 (match_operand:SI 3 "immediate_operand" "i")]
4264 UNSPEC_VLD1_LANE))]
4265 "TARGET_NEON"
4266 {
4267 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4268 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4269 operands[3] = GEN_INT (lane);
4270 if (max == 1)
4271 return "vld1.<V_sz_elem>\t%P0, %A1";
4272 else
4273 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4274 }
4275 [(set_attr "type" "neon_load1_one_lane<q>")]
4276 )
4277
4278 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4279 ;; here on big endian targets.
4280 (define_insn "neon_vld1_lane<mode>"
4281 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4282 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4283 (match_operand:VQX 2 "s_register_operand" "0")
4284 (match_operand:SI 3 "immediate_operand" "i")]
4285 UNSPEC_VLD1_LANE))]
4286 "TARGET_NEON"
4287 {
4288 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4289 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4290 operands[3] = GEN_INT (lane);
4291 int regno = REGNO (operands[0]);
4292 if (lane >= max / 2)
4293 {
4294 lane -= max / 2;
4295 regno += 2;
4296 operands[3] = GEN_INT (lane);
4297 }
4298 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4299 if (max == 2)
4300 return "vld1.<V_sz_elem>\t%P0, %A1";
4301 else
4302 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4303 }
4304 [(set_attr "type" "neon_load1_one_lane<q>")]
4305 )
4306
4307 (define_insn "neon_vld1_dup<mode>"
4308 [(set (match_operand:VD 0 "s_register_operand" "=w")
4309 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4310 "TARGET_NEON"
4311 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4312 [(set_attr "type" "neon_load1_all_lanes<q>")]
4313 )
4314
4315 ;; Special case for DImode. Treat it exactly like a simple load.
4316 (define_expand "neon_vld1_dupdi"
4317 [(set (match_operand:DI 0 "s_register_operand" "")
4318 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4319 UNSPEC_VLD1))]
4320 "TARGET_NEON"
4321 ""
4322 )
4323
4324 (define_insn "neon_vld1_dup<mode>"
4325 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4326 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4327 "TARGET_NEON"
4328 {
4329 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4330 }
4331 [(set_attr "type" "neon_load1_all_lanes<q>")]
4332 )
4333
4334 (define_insn_and_split "neon_vld1_dupv2di"
4335 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4336 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4337 "TARGET_NEON"
4338 "#"
4339 "&& reload_completed"
4340 [(const_int 0)]
4341 {
4342 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4343 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4344 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4345 DONE;
4346 }
4347 [(set_attr "length" "8")
4348 (set_attr "type" "neon_load1_all_lanes_q")]
4349 )
4350
4351 (define_expand "vec_store_lanes<mode><mode>"
4352 [(set (match_operand:VDQX 0 "neon_struct_operand")
4353 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4354 UNSPEC_VST1))]
4355 "TARGET_NEON")
4356
4357 (define_insn "neon_vst1<mode>"
4358 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4359 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4360 UNSPEC_VST1))]
4361 "TARGET_NEON"
4362 "vst1.<V_sz_elem>\t%h1, %A0"
4363 [(set_attr "type" "neon_store1_1reg<q>")])
4364
4365 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4366 ;; here on big endian targets.
4367 (define_insn "neon_vst1_lane<mode>"
4368 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4369 (unspec:<V_elem>
4370 [(match_operand:VDX 1 "s_register_operand" "w")
4371 (match_operand:SI 2 "immediate_operand" "i")]
4372 UNSPEC_VST1_LANE))]
4373 "TARGET_NEON"
4374 {
4375 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4376 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4377 operands[2] = GEN_INT (lane);
4378 if (max == 1)
4379 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4380 else
4381 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4382 }
4383 [(set_attr "type" "neon_store1_one_lane<q>")]
4384 )
4385
4386 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4387 ;; here on big endian targets.
4388 (define_insn "neon_vst1_lane<mode>"
4389 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4390 (unspec:<V_elem>
4391 [(match_operand:VQX 1 "s_register_operand" "w")
4392 (match_operand:SI 2 "immediate_operand" "i")]
4393 UNSPEC_VST1_LANE))]
4394 "TARGET_NEON"
4395 {
4396 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4398 int regno = REGNO (operands[1]);
4399 if (lane >= max / 2)
4400 {
4401 lane -= max / 2;
4402 regno += 2;
4403 }
4404 operands[2] = GEN_INT (lane);
4405 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4406 if (max == 2)
4407 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4408 else
4409 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4410 }
4411 [(set_attr "type" "neon_store1_one_lane<q>")]
4412 )
4413
4414 (define_expand "vec_load_lanesti<mode>"
4415 [(set (match_operand:TI 0 "s_register_operand")
4416 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4417 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4418 UNSPEC_VLD2))]
4419 "TARGET_NEON")
4420
4421 (define_insn "neon_vld2<mode>"
4422 [(set (match_operand:TI 0 "s_register_operand" "=w")
4423 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4424 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4425 UNSPEC_VLD2))]
4426 "TARGET_NEON"
4427 {
4428 if (<V_sz_elem> == 64)
4429 return "vld1.64\t%h0, %A1";
4430 else
4431 return "vld2.<V_sz_elem>\t%h0, %A1";
4432 }
4433 [(set (attr "type")
4434 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4435 (const_string "neon_load1_2reg<q>")
4436 (const_string "neon_load2_2reg<q>")))]
4437 )
4438
4439 (define_expand "vec_load_lanesoi<mode>"
4440 [(set (match_operand:OI 0 "s_register_operand")
4441 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4442 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4443 UNSPEC_VLD2))]
4444 "TARGET_NEON")
4445
4446 (define_insn "neon_vld2<mode>"
4447 [(set (match_operand:OI 0 "s_register_operand" "=w")
4448 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4449 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4450 UNSPEC_VLD2))]
4451 "TARGET_NEON"
4452 "vld2.<V_sz_elem>\t%h0, %A1"
4453 [(set_attr "type" "neon_load2_2reg_q")])
4454
4455 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4456 ;; here on big endian targets.
4457 (define_insn "neon_vld2_lane<mode>"
4458 [(set (match_operand:TI 0 "s_register_operand" "=w")
4459 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4460 (match_operand:TI 2 "s_register_operand" "0")
4461 (match_operand:SI 3 "immediate_operand" "i")
4462 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4463 UNSPEC_VLD2_LANE))]
4464 "TARGET_NEON"
4465 {
4466 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4467 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4468 int regno = REGNO (operands[0]);
4469 rtx ops[4];
4470 ops[0] = gen_rtx_REG (DImode, regno);
4471 ops[1] = gen_rtx_REG (DImode, regno + 2);
4472 ops[2] = operands[1];
4473 ops[3] = GEN_INT (lane);
4474 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4475 return "";
4476 }
4477 [(set_attr "type" "neon_load2_one_lane<q>")]
4478 )
4479
4480 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4481 ;; here on big endian targets.
4482 (define_insn "neon_vld2_lane<mode>"
4483 [(set (match_operand:OI 0 "s_register_operand" "=w")
4484 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4485 (match_operand:OI 2 "s_register_operand" "0")
4486 (match_operand:SI 3 "immediate_operand" "i")
4487 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4488 UNSPEC_VLD2_LANE))]
4489 "TARGET_NEON"
4490 {
4491 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4492 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4493 int regno = REGNO (operands[0]);
4494 rtx ops[4];
4495 if (lane >= max / 2)
4496 {
4497 lane -= max / 2;
4498 regno += 2;
4499 }
4500 ops[0] = gen_rtx_REG (DImode, regno);
4501 ops[1] = gen_rtx_REG (DImode, regno + 4);
4502 ops[2] = operands[1];
4503 ops[3] = GEN_INT (lane);
4504 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4505 return "";
4506 }
4507 [(set_attr "type" "neon_load2_one_lane<q>")]
4508 )
4509
4510 (define_insn "neon_vld2_dup<mode>"
4511 [(set (match_operand:TI 0 "s_register_operand" "=w")
4512 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4513 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4514 UNSPEC_VLD2_DUP))]
4515 "TARGET_NEON"
4516 {
4517 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4518 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4519 else
4520 return "vld1.<V_sz_elem>\t%h0, %A1";
4521 }
4522 [(set (attr "type")
4523 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4524 (const_string "neon_load2_all_lanes<q>")
4525 (const_string "neon_load1_1reg<q>")))]
4526 )
4527
4528 (define_expand "vec_store_lanesti<mode>"
4529 [(set (match_operand:TI 0 "neon_struct_operand")
4530 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4531 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4532 UNSPEC_VST2))]
4533 "TARGET_NEON")
4534
4535 (define_insn "neon_vst2<mode>"
4536 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4537 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4538 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4539 UNSPEC_VST2))]
4540 "TARGET_NEON"
4541 {
4542 if (<V_sz_elem> == 64)
4543 return "vst1.64\t%h1, %A0";
4544 else
4545 return "vst2.<V_sz_elem>\t%h1, %A0";
4546 }
4547 [(set (attr "type")
4548 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4549 (const_string "neon_store1_2reg<q>")
4550 (const_string "neon_store2_one_lane<q>")))]
4551 )
4552
4553 (define_expand "vec_store_lanesoi<mode>"
4554 [(set (match_operand:OI 0 "neon_struct_operand")
4555 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4556 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4557 UNSPEC_VST2))]
4558 "TARGET_NEON")
4559
4560 (define_insn "neon_vst2<mode>"
4561 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4562 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4563 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4564 UNSPEC_VST2))]
4565 "TARGET_NEON"
4566 "vst2.<V_sz_elem>\t%h1, %A0"
4567 [(set_attr "type" "neon_store2_4reg<q>")]
4568 )
4569
4570 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4571 ;; here on big endian targets.
4572 (define_insn "neon_vst2_lane<mode>"
4573 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4574 (unspec:<V_two_elem>
4575 [(match_operand:TI 1 "s_register_operand" "w")
4576 (match_operand:SI 2 "immediate_operand" "i")
4577 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4578 UNSPEC_VST2_LANE))]
4579 "TARGET_NEON"
4580 {
4581 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4582 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4583 int regno = REGNO (operands[1]);
4584 rtx ops[4];
4585 ops[0] = operands[0];
4586 ops[1] = gen_rtx_REG (DImode, regno);
4587 ops[2] = gen_rtx_REG (DImode, regno + 2);
4588 ops[3] = GEN_INT (lane);
4589 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4590 return "";
4591 }
4592 [(set_attr "type" "neon_store2_one_lane<q>")]
4593 )
4594
4595 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4596 ;; here on big endian targets.
4597 (define_insn "neon_vst2_lane<mode>"
4598 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4599 (unspec:<V_two_elem>
4600 [(match_operand:OI 1 "s_register_operand" "w")
4601 (match_operand:SI 2 "immediate_operand" "i")
4602 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4603 UNSPEC_VST2_LANE))]
4604 "TARGET_NEON"
4605 {
4606 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4607 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4608 int regno = REGNO (operands[1]);
4609 rtx ops[4];
4610 if (lane >= max / 2)
4611 {
4612 lane -= max / 2;
4613 regno += 2;
4614 }
4615 ops[0] = operands[0];
4616 ops[1] = gen_rtx_REG (DImode, regno);
4617 ops[2] = gen_rtx_REG (DImode, regno + 4);
4618 ops[3] = GEN_INT (lane);
4619 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4620 return "";
4621 }
4622 [(set_attr "type" "neon_store2_one_lane<q>")]
4623 )
4624
4625 (define_expand "vec_load_lanesei<mode>"
4626 [(set (match_operand:EI 0 "s_register_operand")
4627 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4628 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4629 UNSPEC_VLD3))]
4630 "TARGET_NEON")
4631
4632 (define_insn "neon_vld3<mode>"
4633 [(set (match_operand:EI 0 "s_register_operand" "=w")
4634 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4635 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4636 UNSPEC_VLD3))]
4637 "TARGET_NEON"
4638 {
4639 if (<V_sz_elem> == 64)
4640 return "vld1.64\t%h0, %A1";
4641 else
4642 return "vld3.<V_sz_elem>\t%h0, %A1";
4643 }
4644 [(set (attr "type")
4645 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4646 (const_string "neon_load1_3reg<q>")
4647 (const_string "neon_load3_3reg<q>")))]
4648 )
4649
4650 (define_expand "vec_load_lanesci<mode>"
4651 [(match_operand:CI 0 "s_register_operand")
4652 (match_operand:CI 1 "neon_struct_operand")
4653 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4654 "TARGET_NEON"
4655 {
4656 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4657 DONE;
4658 })
4659
4660 (define_expand "neon_vld3<mode>"
4661 [(match_operand:CI 0 "s_register_operand")
4662 (match_operand:CI 1 "neon_struct_operand")
4663 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4664 "TARGET_NEON"
4665 {
4666 rtx mem;
4667
4668 mem = adjust_address (operands[1], EImode, 0);
4669 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4670 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4671 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4672 DONE;
4673 })
4674
4675 (define_insn "neon_vld3qa<mode>"
4676 [(set (match_operand:CI 0 "s_register_operand" "=w")
4677 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4678 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4679 UNSPEC_VLD3A))]
4680 "TARGET_NEON"
4681 {
4682 int regno = REGNO (operands[0]);
4683 rtx ops[4];
4684 ops[0] = gen_rtx_REG (DImode, regno);
4685 ops[1] = gen_rtx_REG (DImode, regno + 4);
4686 ops[2] = gen_rtx_REG (DImode, regno + 8);
4687 ops[3] = operands[1];
4688 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4689 return "";
4690 }
4691 [(set_attr "type" "neon_load3_3reg<q>")]
4692 )
4693
4694 (define_insn "neon_vld3qb<mode>"
4695 [(set (match_operand:CI 0 "s_register_operand" "=w")
4696 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4697 (match_operand:CI 2 "s_register_operand" "0")
4698 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4699 UNSPEC_VLD3B))]
4700 "TARGET_NEON"
4701 {
4702 int regno = REGNO (operands[0]);
4703 rtx ops[4];
4704 ops[0] = gen_rtx_REG (DImode, regno + 2);
4705 ops[1] = gen_rtx_REG (DImode, regno + 6);
4706 ops[2] = gen_rtx_REG (DImode, regno + 10);
4707 ops[3] = operands[1];
4708 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4709 return "";
4710 }
4711 [(set_attr "type" "neon_load3_3reg<q>")]
4712 )
4713
4714 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4715 ;; here on big endian targets.
4716 (define_insn "neon_vld3_lane<mode>"
4717 [(set (match_operand:EI 0 "s_register_operand" "=w")
4718 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4719 (match_operand:EI 2 "s_register_operand" "0")
4720 (match_operand:SI 3 "immediate_operand" "i")
4721 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722 UNSPEC_VLD3_LANE))]
4723 "TARGET_NEON"
4724 {
4725 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4726 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4727 int regno = REGNO (operands[0]);
4728 rtx ops[5];
4729 ops[0] = gen_rtx_REG (DImode, regno);
4730 ops[1] = gen_rtx_REG (DImode, regno + 2);
4731 ops[2] = gen_rtx_REG (DImode, regno + 4);
4732 ops[3] = operands[1];
4733 ops[4] = GEN_INT (lane);
4734 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4735 ops);
4736 return "";
4737 }
4738 [(set_attr "type" "neon_load3_one_lane<q>")]
4739 )
4740
4741 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4742 ;; here on big endian targets.
4743 (define_insn "neon_vld3_lane<mode>"
4744 [(set (match_operand:CI 0 "s_register_operand" "=w")
4745 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4746 (match_operand:CI 2 "s_register_operand" "0")
4747 (match_operand:SI 3 "immediate_operand" "i")
4748 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4749 UNSPEC_VLD3_LANE))]
4750 "TARGET_NEON"
4751 {
4752 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4753 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4754 int regno = REGNO (operands[0]);
4755 rtx ops[5];
4756 if (lane >= max / 2)
4757 {
4758 lane -= max / 2;
4759 regno += 2;
4760 }
4761 ops[0] = gen_rtx_REG (DImode, regno);
4762 ops[1] = gen_rtx_REG (DImode, regno + 4);
4763 ops[2] = gen_rtx_REG (DImode, regno + 8);
4764 ops[3] = operands[1];
4765 ops[4] = GEN_INT (lane);
4766 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4767 ops);
4768 return "";
4769 }
4770 [(set_attr "type" "neon_load3_one_lane<q>")]
4771 )
4772
4773 (define_insn "neon_vld3_dup<mode>"
4774 [(set (match_operand:EI 0 "s_register_operand" "=w")
4775 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4776 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4777 UNSPEC_VLD3_DUP))]
4778 "TARGET_NEON"
4779 {
4780 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4781 {
4782 int regno = REGNO (operands[0]);
4783 rtx ops[4];
4784 ops[0] = gen_rtx_REG (DImode, regno);
4785 ops[1] = gen_rtx_REG (DImode, regno + 2);
4786 ops[2] = gen_rtx_REG (DImode, regno + 4);
4787 ops[3] = operands[1];
4788 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4789 return "";
4790 }
4791 else
4792 return "vld1.<V_sz_elem>\t%h0, %A1";
4793 }
4794 [(set (attr "type")
4795 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4796 (const_string "neon_load3_all_lanes<q>")
4797 (const_string "neon_load1_1reg<q>")))])
4798
4799 (define_expand "vec_store_lanesei<mode>"
4800 [(set (match_operand:EI 0 "neon_struct_operand")
4801 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4802 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4803 UNSPEC_VST3))]
4804 "TARGET_NEON")
4805
4806 (define_insn "neon_vst3<mode>"
4807 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4808 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4809 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810 UNSPEC_VST3))]
4811 "TARGET_NEON"
4812 {
4813 if (<V_sz_elem> == 64)
4814 return "vst1.64\t%h1, %A0";
4815 else
4816 return "vst3.<V_sz_elem>\t%h1, %A0";
4817 }
4818 [(set (attr "type")
4819 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4820 (const_string "neon_store1_3reg<q>")
4821 (const_string "neon_store3_one_lane<q>")))])
4822
4823 (define_expand "vec_store_lanesci<mode>"
4824 [(match_operand:CI 0 "neon_struct_operand")
4825 (match_operand:CI 1 "s_register_operand")
4826 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4827 "TARGET_NEON"
4828 {
4829 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4830 DONE;
4831 })
4832
4833 (define_expand "neon_vst3<mode>"
4834 [(match_operand:CI 0 "neon_struct_operand")
4835 (match_operand:CI 1 "s_register_operand")
4836 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4837 "TARGET_NEON"
4838 {
4839 rtx mem;
4840
4841 mem = adjust_address (operands[0], EImode, 0);
4842 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4843 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4844 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4845 DONE;
4846 })
4847
4848 (define_insn "neon_vst3qa<mode>"
4849 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4850 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4851 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4852 UNSPEC_VST3A))]
4853 "TARGET_NEON"
4854 {
4855 int regno = REGNO (operands[1]);
4856 rtx ops[4];
4857 ops[0] = operands[0];
4858 ops[1] = gen_rtx_REG (DImode, regno);
4859 ops[2] = gen_rtx_REG (DImode, regno + 4);
4860 ops[3] = gen_rtx_REG (DImode, regno + 8);
4861 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4862 return "";
4863 }
4864 [(set_attr "type" "neon_store3_3reg<q>")]
4865 )
4866
4867 (define_insn "neon_vst3qb<mode>"
4868 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4869 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4870 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4871 UNSPEC_VST3B))]
4872 "TARGET_NEON"
4873 {
4874 int regno = REGNO (operands[1]);
4875 rtx ops[4];
4876 ops[0] = operands[0];
4877 ops[1] = gen_rtx_REG (DImode, regno + 2);
4878 ops[2] = gen_rtx_REG (DImode, regno + 6);
4879 ops[3] = gen_rtx_REG (DImode, regno + 10);
4880 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4881 return "";
4882 }
4883 [(set_attr "type" "neon_store3_3reg<q>")]
4884 )
4885
4886 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4887 ;; here on big endian targets.
4888 (define_insn "neon_vst3_lane<mode>"
4889 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4890 (unspec:<V_three_elem>
4891 [(match_operand:EI 1 "s_register_operand" "w")
4892 (match_operand:SI 2 "immediate_operand" "i")
4893 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4894 UNSPEC_VST3_LANE))]
4895 "TARGET_NEON"
4896 {
4897 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4898 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4899 int regno = REGNO (operands[1]);
4900 rtx ops[5];
4901 ops[0] = operands[0];
4902 ops[1] = gen_rtx_REG (DImode, regno);
4903 ops[2] = gen_rtx_REG (DImode, regno + 2);
4904 ops[3] = gen_rtx_REG (DImode, regno + 4);
4905 ops[4] = GEN_INT (lane);
4906 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4907 ops);
4908 return "";
4909 }
4910 [(set_attr "type" "neon_store3_one_lane<q>")]
4911 )
4912
4913 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4914 ;; here on big endian targets.
4915 (define_insn "neon_vst3_lane<mode>"
4916 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4917 (unspec:<V_three_elem>
4918 [(match_operand:CI 1 "s_register_operand" "w")
4919 (match_operand:SI 2 "immediate_operand" "i")
4920 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4921 UNSPEC_VST3_LANE))]
4922 "TARGET_NEON"
4923 {
4924 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4925 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4926 int regno = REGNO (operands[1]);
4927 rtx ops[5];
4928 if (lane >= max / 2)
4929 {
4930 lane -= max / 2;
4931 regno += 2;
4932 }
4933 ops[0] = operands[0];
4934 ops[1] = gen_rtx_REG (DImode, regno);
4935 ops[2] = gen_rtx_REG (DImode, regno + 4);
4936 ops[3] = gen_rtx_REG (DImode, regno + 8);
4937 ops[4] = GEN_INT (lane);
4938 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4939 ops);
4940 return "";
4941 }
4942 [(set_attr "type" "neon_store3_one_lane<q>")]
4943 )
4944
4945 (define_expand "vec_load_lanesoi<mode>"
4946 [(set (match_operand:OI 0 "s_register_operand")
4947 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4948 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4949 UNSPEC_VLD4))]
4950 "TARGET_NEON")
4951
4952 (define_insn "neon_vld4<mode>"
4953 [(set (match_operand:OI 0 "s_register_operand" "=w")
4954 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4955 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4956 UNSPEC_VLD4))]
4957 "TARGET_NEON"
4958 {
4959 if (<V_sz_elem> == 64)
4960 return "vld1.64\t%h0, %A1";
4961 else
4962 return "vld4.<V_sz_elem>\t%h0, %A1";
4963 }
4964 [(set (attr "type")
4965 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4966 (const_string "neon_load1_4reg<q>")
4967 (const_string "neon_load4_4reg<q>")))]
4968 )
4969
4970 (define_expand "vec_load_lanesxi<mode>"
4971 [(match_operand:XI 0 "s_register_operand")
4972 (match_operand:XI 1 "neon_struct_operand")
4973 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4974 "TARGET_NEON"
4975 {
4976 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
4977 DONE;
4978 })
4979
4980 (define_expand "neon_vld4<mode>"
4981 [(match_operand:XI 0 "s_register_operand")
4982 (match_operand:XI 1 "neon_struct_operand")
4983 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4984 "TARGET_NEON"
4985 {
4986 rtx mem;
4987
4988 mem = adjust_address (operands[1], OImode, 0);
4989 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
4990 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
4991 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
4992 DONE;
4993 })
4994
4995 (define_insn "neon_vld4qa<mode>"
4996 [(set (match_operand:XI 0 "s_register_operand" "=w")
4997 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
4998 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4999 UNSPEC_VLD4A))]
5000 "TARGET_NEON"
5001 {
5002 int regno = REGNO (operands[0]);
5003 rtx ops[5];
5004 ops[0] = gen_rtx_REG (DImode, regno);
5005 ops[1] = gen_rtx_REG (DImode, regno + 4);
5006 ops[2] = gen_rtx_REG (DImode, regno + 8);
5007 ops[3] = gen_rtx_REG (DImode, regno + 12);
5008 ops[4] = operands[1];
5009 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5010 return "";
5011 }
5012 [(set_attr "type" "neon_load4_4reg<q>")]
5013 )
5014
5015 (define_insn "neon_vld4qb<mode>"
5016 [(set (match_operand:XI 0 "s_register_operand" "=w")
5017 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5018 (match_operand:XI 2 "s_register_operand" "0")
5019 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5020 UNSPEC_VLD4B))]
5021 "TARGET_NEON"
5022 {
5023 int regno = REGNO (operands[0]);
5024 rtx ops[5];
5025 ops[0] = gen_rtx_REG (DImode, regno + 2);
5026 ops[1] = gen_rtx_REG (DImode, regno + 6);
5027 ops[2] = gen_rtx_REG (DImode, regno + 10);
5028 ops[3] = gen_rtx_REG (DImode, regno + 14);
5029 ops[4] = operands[1];
5030 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5031 return "";
5032 }
5033 [(set_attr "type" "neon_load4_4reg<q>")]
5034 )
5035
5036 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5037 ;; here on big endian targets.
5038 (define_insn "neon_vld4_lane<mode>"
5039 [(set (match_operand:OI 0 "s_register_operand" "=w")
5040 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5041 (match_operand:OI 2 "s_register_operand" "0")
5042 (match_operand:SI 3 "immediate_operand" "i")
5043 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5044 UNSPEC_VLD4_LANE))]
5045 "TARGET_NEON"
5046 {
5047 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5048 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5049 int regno = REGNO (operands[0]);
5050 rtx ops[6];
5051 ops[0] = gen_rtx_REG (DImode, regno);
5052 ops[1] = gen_rtx_REG (DImode, regno + 2);
5053 ops[2] = gen_rtx_REG (DImode, regno + 4);
5054 ops[3] = gen_rtx_REG (DImode, regno + 6);
5055 ops[4] = operands[1];
5056 ops[5] = GEN_INT (lane);
5057 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5058 ops);
5059 return "";
5060 }
5061 [(set_attr "type" "neon_load4_one_lane<q>")]
5062 )
5063
5064 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5065 ;; here on big endian targets.
5066 (define_insn "neon_vld4_lane<mode>"
5067 [(set (match_operand:XI 0 "s_register_operand" "=w")
5068 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5069 (match_operand:XI 2 "s_register_operand" "0")
5070 (match_operand:SI 3 "immediate_operand" "i")
5071 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5072 UNSPEC_VLD4_LANE))]
5073 "TARGET_NEON"
5074 {
5075 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5076 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5077 int regno = REGNO (operands[0]);
5078 rtx ops[6];
5079 if (lane >= max / 2)
5080 {
5081 lane -= max / 2;
5082 regno += 2;
5083 }
5084 ops[0] = gen_rtx_REG (DImode, regno);
5085 ops[1] = gen_rtx_REG (DImode, regno + 4);
5086 ops[2] = gen_rtx_REG (DImode, regno + 8);
5087 ops[3] = gen_rtx_REG (DImode, regno + 12);
5088 ops[4] = operands[1];
5089 ops[5] = GEN_INT (lane);
5090 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5091 ops);
5092 return "";
5093 }
5094 [(set_attr "type" "neon_load4_one_lane<q>")]
5095 )
5096
5097 (define_insn "neon_vld4_dup<mode>"
5098 [(set (match_operand:OI 0 "s_register_operand" "=w")
5099 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5100 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5101 UNSPEC_VLD4_DUP))]
5102 "TARGET_NEON"
5103 {
5104 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5105 {
5106 int regno = REGNO (operands[0]);
5107 rtx ops[5];
5108 ops[0] = gen_rtx_REG (DImode, regno);
5109 ops[1] = gen_rtx_REG (DImode, regno + 2);
5110 ops[2] = gen_rtx_REG (DImode, regno + 4);
5111 ops[3] = gen_rtx_REG (DImode, regno + 6);
5112 ops[4] = operands[1];
5113 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5114 ops);
5115 return "";
5116 }
5117 else
5118 return "vld1.<V_sz_elem>\t%h0, %A1";
5119 }
5120 [(set (attr "type")
5121 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5122 (const_string "neon_load4_all_lanes<q>")
5123 (const_string "neon_load1_1reg<q>")))]
5124 )
5125
5126 (define_expand "vec_store_lanesoi<mode>"
5127 [(set (match_operand:OI 0 "neon_struct_operand")
5128 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5129 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5130 UNSPEC_VST4))]
5131 "TARGET_NEON")
5132
5133 (define_insn "neon_vst4<mode>"
5134 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5135 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5136 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5137 UNSPEC_VST4))]
5138 "TARGET_NEON"
5139 {
5140 if (<V_sz_elem> == 64)
5141 return "vst1.64\t%h1, %A0";
5142 else
5143 return "vst4.<V_sz_elem>\t%h1, %A0";
5144 }
5145 [(set (attr "type")
5146 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5147 (const_string "neon_store1_4reg<q>")
5148 (const_string "neon_store4_4reg<q>")))]
5149 )
5150
5151 (define_expand "vec_store_lanesxi<mode>"
5152 [(match_operand:XI 0 "neon_struct_operand")
5153 (match_operand:XI 1 "s_register_operand")
5154 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5155 "TARGET_NEON"
5156 {
5157 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5158 DONE;
5159 })
5160
5161 (define_expand "neon_vst4<mode>"
5162 [(match_operand:XI 0 "neon_struct_operand")
5163 (match_operand:XI 1 "s_register_operand")
5164 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5165 "TARGET_NEON"
5166 {
5167 rtx mem;
5168
5169 mem = adjust_address (operands[0], OImode, 0);
5170 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5171 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5172 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5173 DONE;
5174 })
5175
5176 (define_insn "neon_vst4qa<mode>"
5177 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5178 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5179 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5180 UNSPEC_VST4A))]
5181 "TARGET_NEON"
5182 {
5183 int regno = REGNO (operands[1]);
5184 rtx ops[5];
5185 ops[0] = operands[0];
5186 ops[1] = gen_rtx_REG (DImode, regno);
5187 ops[2] = gen_rtx_REG (DImode, regno + 4);
5188 ops[3] = gen_rtx_REG (DImode, regno + 8);
5189 ops[4] = gen_rtx_REG (DImode, regno + 12);
5190 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5191 return "";
5192 }
5193 [(set_attr "type" "neon_store4_4reg<q>")]
5194 )
5195
5196 (define_insn "neon_vst4qb<mode>"
5197 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5198 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5199 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5200 UNSPEC_VST4B))]
5201 "TARGET_NEON"
5202 {
5203 int regno = REGNO (operands[1]);
5204 rtx ops[5];
5205 ops[0] = operands[0];
5206 ops[1] = gen_rtx_REG (DImode, regno + 2);
5207 ops[2] = gen_rtx_REG (DImode, regno + 6);
5208 ops[3] = gen_rtx_REG (DImode, regno + 10);
5209 ops[4] = gen_rtx_REG (DImode, regno + 14);
5210 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5211 return "";
5212 }
5213 [(set_attr "type" "neon_store4_4reg<q>")]
5214 )
5215
5216 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5217 ;; here on big endian targets.
5218 (define_insn "neon_vst4_lane<mode>"
5219 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5220 (unspec:<V_four_elem>
5221 [(match_operand:OI 1 "s_register_operand" "w")
5222 (match_operand:SI 2 "immediate_operand" "i")
5223 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224 UNSPEC_VST4_LANE))]
5225 "TARGET_NEON"
5226 {
5227 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5228 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5229 int regno = REGNO (operands[1]);
5230 rtx ops[6];
5231 ops[0] = operands[0];
5232 ops[1] = gen_rtx_REG (DImode, regno);
5233 ops[2] = gen_rtx_REG (DImode, regno + 2);
5234 ops[3] = gen_rtx_REG (DImode, regno + 4);
5235 ops[4] = gen_rtx_REG (DImode, regno + 6);
5236 ops[5] = GEN_INT (lane);
5237 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5238 ops);
5239 return "";
5240 }
5241 [(set_attr "type" "neon_store4_one_lane<q>")]
5242 )
5243
5244 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5245 ;; here on big endian targets.
5246 (define_insn "neon_vst4_lane<mode>"
5247 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5248 (unspec:<V_four_elem>
5249 [(match_operand:XI 1 "s_register_operand" "w")
5250 (match_operand:SI 2 "immediate_operand" "i")
5251 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 UNSPEC_VST4_LANE))]
5253 "TARGET_NEON"
5254 {
5255 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5256 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5257 int regno = REGNO (operands[1]);
5258 rtx ops[6];
5259 if (lane >= max / 2)
5260 {
5261 lane -= max / 2;
5262 regno += 2;
5263 }
5264 ops[0] = operands[0];
5265 ops[1] = gen_rtx_REG (DImode, regno);
5266 ops[2] = gen_rtx_REG (DImode, regno + 4);
5267 ops[3] = gen_rtx_REG (DImode, regno + 8);
5268 ops[4] = gen_rtx_REG (DImode, regno + 12);
5269 ops[5] = GEN_INT (lane);
5270 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5271 ops);
5272 return "";
5273 }
5274 [(set_attr "type" "neon_store4_4reg<q>")]
5275 )
5276
5277 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5278 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5279 (SE:<V_unpack> (vec_select:<V_HALF>
5280 (match_operand:VU 1 "register_operand" "w")
5281 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5282 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5283 "vmovl.<US><V_sz_elem> %q0, %e1"
5284 [(set_attr "type" "neon_shift_imm_long")]
5285 )
5286
5287 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5288 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5289 (SE:<V_unpack> (vec_select:<V_HALF>
5290 (match_operand:VU 1 "register_operand" "w")
5291 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5292 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5293 "vmovl.<US><V_sz_elem> %q0, %f1"
5294 [(set_attr "type" "neon_shift_imm_long")]
5295 )
5296
5297 (define_expand "vec_unpack<US>_hi_<mode>"
5298 [(match_operand:<V_unpack> 0 "register_operand" "")
5299 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5300 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5301 {
5302 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5303 rtx t1;
5304 int i;
5305 for (i = 0; i < (<V_mode_nunits>/2); i++)
5306 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5307
5308 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5309 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5310 operands[1],
5311 t1));
5312 DONE;
5313 }
5314 )
5315
5316 (define_expand "vec_unpack<US>_lo_<mode>"
5317 [(match_operand:<V_unpack> 0 "register_operand" "")
5318 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5319 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5320 {
5321 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5322 rtx t1;
5323 int i;
5324 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5325 RTVEC_ELT (v, i) = GEN_INT (i);
5326 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5327 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5328 operands[1],
5329 t1));
5330 DONE;
5331 }
5332 )
5333
5334 (define_insn "neon_vec_<US>mult_lo_<mode>"
5335 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5336 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5337 (match_operand:VU 1 "register_operand" "w")
5338 (match_operand:VU 2 "vect_par_constant_low" "")))
5339 (SE:<V_unpack> (vec_select:<V_HALF>
5340 (match_operand:VU 3 "register_operand" "w")
5341 (match_dup 2)))))]
5342 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5343 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5344 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5345 )
5346
5347 (define_expand "vec_widen_<US>mult_lo_<mode>"
5348 [(match_operand:<V_unpack> 0 "register_operand" "")
5349 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5350 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5351 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5352 {
5353 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5354 rtx t1;
5355 int i;
5356 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5357 RTVEC_ELT (v, i) = GEN_INT (i);
5358 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5359
5360 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5361 operands[1],
5362 t1,
5363 operands[2]));
5364 DONE;
5365 }
5366 )
5367
5368 (define_insn "neon_vec_<US>mult_hi_<mode>"
5369 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5370 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5371 (match_operand:VU 1 "register_operand" "w")
5372 (match_operand:VU 2 "vect_par_constant_high" "")))
5373 (SE:<V_unpack> (vec_select:<V_HALF>
5374 (match_operand:VU 3 "register_operand" "w")
5375 (match_dup 2)))))]
5376 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5377 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5378 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5379 )
5380
5381 (define_expand "vec_widen_<US>mult_hi_<mode>"
5382 [(match_operand:<V_unpack> 0 "register_operand" "")
5383 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5384 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5385 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5386 {
5387 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5388 rtx t1;
5389 int i;
5390 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5391 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5392 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5393
5394 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5395 operands[1],
5396 t1,
5397 operands[2]));
5398 DONE;
5399
5400 }
5401 )
5402
5403 (define_insn "neon_vec_<US>shiftl_<mode>"
5404 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5405 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5406 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5407 "TARGET_NEON"
5408 {
5409 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5410 }
5411 [(set_attr "type" "neon_shift_imm_long")]
5412 )
5413
5414 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5415 [(match_operand:<V_unpack> 0 "register_operand" "")
5416 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5417 (match_operand:SI 2 "immediate_operand" "i")]
5418 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5419 {
5420 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5421 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5422 operands[2]));
5423 DONE;
5424 }
5425 )
5426
5427 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5428 [(match_operand:<V_unpack> 0 "register_operand" "")
5429 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5430 (match_operand:SI 2 "immediate_operand" "i")]
5431 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5432 {
5433 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5434 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5435 GET_MODE_SIZE (<V_HALF>mode)),
5436 operands[2]));
5437 DONE;
5438 }
5439 )
5440
5441 ;; Vectorize for non-neon-quad case
5442 (define_insn "neon_unpack<US>_<mode>"
5443 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5444 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5445 "TARGET_NEON"
5446 "vmovl.<US><V_sz_elem> %q0, %P1"
5447 [(set_attr "type" "neon_move")]
5448 )
5449
5450 (define_expand "vec_unpack<US>_lo_<mode>"
5451 [(match_operand:<V_double_width> 0 "register_operand" "")
5452 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5453 "TARGET_NEON"
5454 {
5455 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5456 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5457 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5458
5459 DONE;
5460 }
5461 )
5462
5463 (define_expand "vec_unpack<US>_hi_<mode>"
5464 [(match_operand:<V_double_width> 0 "register_operand" "")
5465 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5466 "TARGET_NEON"
5467 {
5468 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5469 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5470 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5471
5472 DONE;
5473 }
5474 )
5475
5476 (define_insn "neon_vec_<US>mult_<mode>"
5477 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5478 (mult:<V_widen> (SE:<V_widen>
5479 (match_operand:VDI 1 "register_operand" "w"))
5480 (SE:<V_widen>
5481 (match_operand:VDI 2 "register_operand" "w"))))]
5482 "TARGET_NEON"
5483 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5484 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5485 )
5486
5487 (define_expand "vec_widen_<US>mult_hi_<mode>"
5488 [(match_operand:<V_double_width> 0 "register_operand" "")
5489 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5490 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5491 "TARGET_NEON"
5492 {
5493 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5494 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5495 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5496
5497 DONE;
5498
5499 }
5500 )
5501
5502 (define_expand "vec_widen_<US>mult_lo_<mode>"
5503 [(match_operand:<V_double_width> 0 "register_operand" "")
5504 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5505 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5506 "TARGET_NEON"
5507 {
5508 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5509 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5510 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5511
5512 DONE;
5513
5514 }
5515 )
5516
5517 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5518 [(match_operand:<V_double_width> 0 "register_operand" "")
5519 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5520 (match_operand:SI 2 "immediate_operand" "i")]
5521 "TARGET_NEON"
5522 {
5523 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5524 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5525 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5526
5527 DONE;
5528 }
5529 )
5530
5531 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5532 [(match_operand:<V_double_width> 0 "register_operand" "")
5533 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5534 (match_operand:SI 2 "immediate_operand" "i")]
5535 "TARGET_NEON"
5536 {
5537 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5538 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5539 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5540
5541 DONE;
5542 }
5543 )
5544
5545 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5546 ; because the ordering of vector elements in Q registers is different from what
5547 ; the semantics of the instructions require.
5548
5549 (define_insn "vec_pack_trunc_<mode>"
5550 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5551 (vec_concat:<V_narrow_pack>
5552 (truncate:<V_narrow>
5553 (match_operand:VN 1 "register_operand" "w"))
5554 (truncate:<V_narrow>
5555 (match_operand:VN 2 "register_operand" "w"))))]
5556 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5557 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5558 [(set_attr "type" "multiple")
5559 (set_attr "length" "8")]
5560 )
5561
5562 ;; For the non-quad case.
5563 (define_insn "neon_vec_pack_trunc_<mode>"
5564 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5565 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5566 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5567 "vmovn.i<V_sz_elem>\t%P0, %q1"
5568 [(set_attr "type" "neon_move_narrow_q")]
5569 )
5570
5571 (define_expand "vec_pack_trunc_<mode>"
5572 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5573 (match_operand:VSHFT 1 "register_operand" "")
5574 (match_operand:VSHFT 2 "register_operand")]
5575 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5576 {
5577 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5578
5579 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5580 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5581 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5582 DONE;
5583 })
5584
5585 (define_insn "neon_vabd<mode>_2"
5586 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5587 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5588 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5589 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5590 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5591 [(set (attr "type")
5592 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5593 (const_string "neon_fp_abd_s<q>")
5594 (const_string "neon_abd<q>")))]
5595 )
5596
5597 (define_insn "neon_vabd<mode>_3"
5598 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5599 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5600 (match_operand:VDQ 2 "s_register_operand" "w")]
5601 UNSPEC_VSUB)))]
5602 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5603 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5604 [(set (attr "type")
5605 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5606 (const_string "neon_fp_abd_s<q>")
5607 (const_string "neon_abd<q>")))]
5608 )
5609
5610 ;; Copy from core-to-neon regs, then extend, not vice-versa
5611
5612 (define_split
5613 [(set (match_operand:DI 0 "s_register_operand" "")
5614 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5615 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5616 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5617 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5618 {
5619 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5620 })
5621
5622 (define_split
5623 [(set (match_operand:DI 0 "s_register_operand" "")
5624 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5625 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5626 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5627 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5628 {
5629 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5630 })
5631
5632 (define_split
5633 [(set (match_operand:DI 0 "s_register_operand" "")
5634 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5635 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5636 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5637 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5638 {
5639 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5640 })
5641
5642 (define_split
5643 [(set (match_operand:DI 0 "s_register_operand" "")
5644 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5645 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5646 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5647 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5648 {
5649 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5650 })
5651
5652 (define_split
5653 [(set (match_operand:DI 0 "s_register_operand" "")
5654 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5655 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5656 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5657 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5658 {
5659 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5660 })
5661
5662 (define_split
5663 [(set (match_operand:DI 0 "s_register_operand" "")
5664 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5665 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5666 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5667 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5668 {
5669 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5670 })