[ARM] Migrate to new reduc_[us](min|max)_scal_optab
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
143 "TARGET_NEON
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
146 {
147 switch (which_alternative)
148 {
149 case 0: return "#";
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
156
157 (define_split
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
172
173 neon_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[3], src[3];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
213
214 neon_disambiguate_copy (operands, dest, src, 3);
215 })
216
217 (define_split
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
225 {
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
228 rtx dest[4], src[4];
229
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
238
239 neon_disambiguate_copy (operands, dest, src, 4);
240 })
241
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
247 {
248 rtx adjust_mem;
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
255
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
258 else
259 adjust_mem = operands[0];
260
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
264
265 })
266
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
274
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
278 " Um")]
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
283
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
291
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
295 " Um")]
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
300
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
303 (vec_merge:VD
304 (vec_duplicate:VD
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
308 "TARGET_NEON"
309 {
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
314
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
317 else
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
319 }
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ 0 "s_register_operand" "=w,w")
324 (vec_merge:VQ
325 (vec_duplicate:VQ
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
336
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
339
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
342
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
345 else
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
347 }
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
349 )
350
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
353 (vec_merge:V2DI
354 (vec_duplicate:V2DI
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
358 "TARGET_NEON"
359 {
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
362
363 operands[0] = gen_rtx_REG (DImode, regno);
364
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
367 else
368 return "vmov\t%P0, %Q1, %R1";
369 }
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
371 )
372
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
382 DONE;
383 })
384
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
387 (vec_select:<V_elem>
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
390 "TARGET_NEON"
391 {
392 if (BYTES_BIG_ENDIAN)
393 {
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
397 }
398
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
401 else
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
403 }
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
405 )
406
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (vec_select:<V_elem>
410 (match_operand:VQ 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 "TARGET_NEON"
413 {
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
418
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
421
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
424
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
427 else
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
429 }
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
431 )
432
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
435 (vec_select:DI
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
438 "TARGET_NEON"
439 {
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
441
442 operands[1] = gen_rtx_REG (DImode, regno);
443
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
446 else
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
448 }
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
450 )
451
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
455 "TARGET_NEON"
456 {
457 neon_expand_vector_init (operands[0], operands[1]);
458 DONE;
459 })
460
461 ;; Doubleword and quadword arithmetic.
462
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
465
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472 [(set (attr "type")
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
476 )
477
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
483 "TARGET_NEON"
484 {
485 switch (which_alternative)
486 {
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
489 case 1: return "#";
490 case 2: return "#";
491 case 4: return "#";
492 case 5: return "#";
493 case 6: return "#";
494 default: gcc_unreachable ();
495 }
496 }
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
502 )
503
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
514 )
515
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
521 "TARGET_NEON"
522 {
523 switch (which_alternative)
524 {
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
531 }
532 }
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
537 )
538
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
545 [(set (attr "type")
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
549 )
550
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
558 [(set (attr "type")
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
562 )
563
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
575 )
576
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
590 )
591
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
600 )
601
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
610 )
611
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
626 NEON_VRINT))]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
630 )
631
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
636 NEON_VCVT)))]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
641 )
642
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
647 "TARGET_NEON"
648 {
649 switch (which_alternative)
650 {
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
655 }
656 }
657 [(set_attr "type" "neon_logic<q>")]
658 )
659
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
664
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
669 "TARGET_NEON"
670 {
671 switch (which_alternative)
672 {
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
677 }
678 }
679 [(set_attr "type" "neon_logic<q>")]
680 )
681
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
686 "TARGET_NEON"
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
689 )
690
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
693 ;; changes above.
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
698 "TARGET_NEON"
699 "@
700 vorn\t%P0, %P1, %P2
701 #
702 #
703 #"
704 "reload_completed &&
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
708 "
709 {
710 if (TARGET_THUMB2)
711 {
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
718 }
719 else
720 {
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
723 DONE;
724 }
725 }"
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
729 )
730
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
735 "TARGET_NEON"
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
745 "TARGET_NEON"
746 "@
747 vbic\t%P0, %P1, %P2
748 #
749 #"
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
752 )
753
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
758 "TARGET_NEON"
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
761 )
762
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "TARGET_NEON"
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
769 )
770
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
774 "TARGET_NEON"
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
776 [(set (attr "type")
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
780 )
781
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
785 "TARGET_NEON"
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
787 [(set (attr "type")
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
791 )
792
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
798 "TARGET_NEON"
799 "#"
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
802 )
803
804 ; Split negdi2_neon for vfp registers
805 (define_split
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
814 {
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
817 }
818 )
819
820 ; Split negdi2_neon for core registers
821 (define_split
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
830 ""
831 )
832
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
837 "TARGET_NEON"
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
840 )
841
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
846 "TARGET_NEON"
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
849 )
850
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
855 "TARGET_NEON"
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
857 [(set (attr "type")
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
861 )
862
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set (attr "type")
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
873 )
874
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
877 ; SImode elements.
878
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
883 "TARGET_NEON"
884 {
885 switch (which_alternative)
886 {
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
889 <MODE>mode,
890 VALID_NEON_QREG_MODE (<MODE>mode),
891 true);
892 default: gcc_unreachable ();
893 }
894 }
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
896 )
897
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
902 "TARGET_NEON"
903 {
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
906 false);
907 }
908 [(set_attr "type" "neon_shift_imm<q>")]
909 )
910
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
915 "TARGET_NEON"
916 {
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
919 false);
920 }
921 [(set_attr "type" "neon_shift_imm<q>")]
922 )
923
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
927 ; shift amounts.
928
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
934 "TARGET_NEON"
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
937 )
938
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
941
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
947 "TARGET_NEON"
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
950 )
951
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
956 "TARGET_NEON"
957 {
958 if (s_register_operand (operands[2], <MODE>mode))
959 {
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
963 }
964 else
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
966 DONE;
967 })
968
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
973 "TARGET_NEON"
974 {
975 if (s_register_operand (operands[2], <MODE>mode))
976 {
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
980 }
981 else
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
983 DONE;
984 })
985
986 ;; 64-bit shifts
987
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
992 ;; using an unspec.
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
996 UNSPEC_LOAD_COUNT))]
997 "TARGET_NEON"
998 "@
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1002 )
1003
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1011 "@
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1015 )
1016
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1025 "TARGET_NEON"
1026 "#"
1027 "TARGET_NEON && reload_completed"
1028 [(const_int 0)]
1029 "
1030 {
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1032 {
1033 if (CONST_INT_P (operands[2]))
1034 {
1035 if (INTVAL (operands[2]) < 1)
1036 {
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1038 DONE;
1039 }
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1042 }
1043 else
1044 {
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1047 }
1048
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1051 operands[2]));
1052 }
1053 else
1054 {
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1060 else
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1063 }
1064 DONE;
1065 }"
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1069 )
1070
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1080 )
1081
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1091 )
1092
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1101 )
1102
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1111 )
1112
1113 ;; ashrdi3_neon
1114 ;; lshrdi3_neon
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1123 "TARGET_NEON"
1124 "#"
1125 "TARGET_NEON && reload_completed"
1126 [(const_int 0)]
1127 "
1128 {
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1130 {
1131 if (CONST_INT_P (operands[2]))
1132 {
1133 if (INTVAL (operands[2]) < 1)
1134 {
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1136 DONE;
1137 }
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1140
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1143 operands[1],
1144 operands[2]));
1145 }
1146 else
1147 {
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1152 operands[5]));
1153 }
1154 }
1155 else
1156 {
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1162 else
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1166 }
1167
1168 DONE;
1169 }"
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1173 )
1174
1175 ;; Widening operations
1176
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1182 "TARGET_NEON"
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1185 )
1186
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1192 "TARGET_NEON"
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1195 )
1196
1197 ;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
1198 ;; shift-count granularity. That's good enough for the middle-end's current
1199 ;; needs.
1200
1201 ;; Note that it's not safe to perform such an operation in big-endian mode,
1202 ;; due to element-ordering issues.
1203
1204 (define_expand "vec_shr_<mode>"
1205 [(match_operand:VDQ 0 "s_register_operand" "")
1206 (match_operand:VDQ 1 "s_register_operand" "")
1207 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1208 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1209 {
1210 rtx zero_reg;
1211 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1212 const int width = GET_MODE_BITSIZE (<MODE>mode);
1213 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1214 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1215 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1216
1217 if (num_bits == width)
1218 {
1219 emit_move_insn (operands[0], operands[1]);
1220 DONE;
1221 }
1222
1223 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1224 operands[0] = gen_lowpart (bvecmode, operands[0]);
1225 operands[1] = gen_lowpart (bvecmode, operands[1]);
1226
1227 emit_insn (gen_ext (operands[0], operands[1], zero_reg,
1228 GEN_INT (num_bits / BITS_PER_UNIT)));
1229 DONE;
1230 })
1231
1232 (define_expand "vec_shl_<mode>"
1233 [(match_operand:VDQ 0 "s_register_operand" "")
1234 (match_operand:VDQ 1 "s_register_operand" "")
1235 (match_operand:SI 2 "const_multiple_of_8_operand" "")]
1236 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1237 {
1238 rtx zero_reg;
1239 HOST_WIDE_INT num_bits = INTVAL (operands[2]);
1240 const int width = GET_MODE_BITSIZE (<MODE>mode);
1241 const machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
1242 rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
1243 (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
1244
1245 if (num_bits == 0)
1246 {
1247 emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
1248 DONE;
1249 }
1250
1251 num_bits = width - num_bits;
1252
1253 zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
1254 operands[0] = gen_lowpart (bvecmode, operands[0]);
1255 operands[1] = gen_lowpart (bvecmode, operands[1]);
1256
1257 emit_insn (gen_ext (operands[0], zero_reg, operands[1],
1258 GEN_INT (num_bits / BITS_PER_UNIT)));
1259 DONE;
1260 })
1261
1262 ;; Helpers for quad-word reduction operations
1263
1264 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1265 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1266 ; N/2-element vector.
1267
1268 (define_insn "quad_halves_<code>v4si"
1269 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1270 (vqh_ops:V2SI
1271 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1272 (parallel [(const_int 0) (const_int 1)]))
1273 (vec_select:V2SI (match_dup 1)
1274 (parallel [(const_int 2) (const_int 3)]))))]
1275 "TARGET_NEON"
1276 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1277 [(set_attr "vqh_mnem" "<VQH_mnem>")
1278 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1279 )
1280
1281 (define_insn "quad_halves_<code>v4sf"
1282 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1283 (vqhs_ops:V2SF
1284 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1285 (parallel [(const_int 0) (const_int 1)]))
1286 (vec_select:V2SF (match_dup 1)
1287 (parallel [(const_int 2) (const_int 3)]))))]
1288 "TARGET_NEON && flag_unsafe_math_optimizations"
1289 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1290 [(set_attr "vqh_mnem" "<VQH_mnem>")
1291 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1292 )
1293
1294 (define_insn "quad_halves_<code>v8hi"
1295 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1296 (vqh_ops:V4HI
1297 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1298 (parallel [(const_int 0) (const_int 1)
1299 (const_int 2) (const_int 3)]))
1300 (vec_select:V4HI (match_dup 1)
1301 (parallel [(const_int 4) (const_int 5)
1302 (const_int 6) (const_int 7)]))))]
1303 "TARGET_NEON"
1304 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1305 [(set_attr "vqh_mnem" "<VQH_mnem>")
1306 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1307 )
1308
1309 (define_insn "quad_halves_<code>v16qi"
1310 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1311 (vqh_ops:V8QI
1312 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1313 (parallel [(const_int 0) (const_int 1)
1314 (const_int 2) (const_int 3)
1315 (const_int 4) (const_int 5)
1316 (const_int 6) (const_int 7)]))
1317 (vec_select:V8QI (match_dup 1)
1318 (parallel [(const_int 8) (const_int 9)
1319 (const_int 10) (const_int 11)
1320 (const_int 12) (const_int 13)
1321 (const_int 14) (const_int 15)]))))]
1322 "TARGET_NEON"
1323 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1324 [(set_attr "vqh_mnem" "<VQH_mnem>")
1325 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1326 )
1327
1328 (define_expand "move_hi_quad_<mode>"
1329 [(match_operand:ANY128 0 "s_register_operand" "")
1330 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1331 "TARGET_NEON"
1332 {
1333 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1334 GET_MODE_SIZE (<V_HALF>mode)),
1335 operands[1]);
1336 DONE;
1337 })
1338
1339 (define_expand "move_lo_quad_<mode>"
1340 [(match_operand:ANY128 0 "s_register_operand" "")
1341 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1342 "TARGET_NEON"
1343 {
1344 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1345 <MODE>mode, 0),
1346 operands[1]);
1347 DONE;
1348 })
1349
1350 ;; Reduction operations
1351
1352 (define_expand "reduc_plus_scal_<mode>"
1353 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1354 (match_operand:VD 1 "s_register_operand" "")]
1355 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1356 {
1357 rtx vec = gen_reg_rtx (<MODE>mode);
1358 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1359 &gen_neon_vpadd_internal<mode>);
1360 /* The same result is actually computed into every element. */
1361 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1362 DONE;
1363 })
1364
1365 (define_expand "reduc_plus_scal_<mode>"
1366 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1367 (match_operand:VQ 1 "s_register_operand" "")]
1368 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1369 && !BYTES_BIG_ENDIAN"
1370 {
1371 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1372
1373 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1374 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1375
1376 DONE;
1377 })
1378
1379 (define_expand "reduc_plus_scal_v2di"
1380 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1381 (match_operand:V2DI 1 "s_register_operand" "")]
1382 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1383 {
1384 rtx vec = gen_reg_rtx (V2DImode);
1385
1386 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1387 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1388
1389 DONE;
1390 })
1391
1392 (define_insn "arm_reduc_plus_internal_v2di"
1393 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1394 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1395 UNSPEC_VPADD))]
1396 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1397 "vadd.i64\t%e0, %e1, %f1"
1398 [(set_attr "type" "neon_add_q")]
1399 )
1400
1401 (define_expand "reduc_smin_scal_<mode>"
1402 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1403 (match_operand:VD 1 "s_register_operand" "")]
1404 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1405 {
1406 rtx vec = gen_reg_rtx (<MODE>mode);
1407
1408 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1409 &gen_neon_vpsmin<mode>);
1410 /* The result is computed into every element of the vector. */
1411 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1412 DONE;
1413 })
1414
1415 (define_expand "reduc_smin_scal_<mode>"
1416 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1417 (match_operand:VQ 1 "s_register_operand" "")]
1418 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1419 && !BYTES_BIG_ENDIAN"
1420 {
1421 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1422
1423 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1424 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1425
1426 DONE;
1427 })
1428
1429 (define_expand "reduc_smax_scal_<mode>"
1430 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1431 (match_operand:VD 1 "s_register_operand" "")]
1432 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1433 {
1434 rtx vec = gen_reg_rtx (<MODE>mode);
1435 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1436 &gen_neon_vpsmax<mode>);
1437 /* The result is computed into every element of the vector. */
1438 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1439 DONE;
1440 })
1441
1442 (define_expand "reduc_smax_scal_<mode>"
1443 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1444 (match_operand:VQ 1 "s_register_operand" "")]
1445 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1446 && !BYTES_BIG_ENDIAN"
1447 {
1448 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1449
1450 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1451 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1452
1453 DONE;
1454 })
1455
1456 (define_expand "reduc_umin_scal_<mode>"
1457 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1458 (match_operand:VDI 1 "s_register_operand" "")]
1459 "TARGET_NEON"
1460 {
1461 rtx vec = gen_reg_rtx (<MODE>mode);
1462 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1463 &gen_neon_vpumin<mode>);
1464 /* The result is computed into every element of the vector. */
1465 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1466 DONE;
1467 })
1468
1469 (define_expand "reduc_umin_scal_<mode>"
1470 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1471 (match_operand:VQI 1 "s_register_operand" "")]
1472 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1473 {
1474 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1475
1476 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1477 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1478
1479 DONE;
1480 })
1481
1482 (define_expand "reduc_umax_scal_<mode>"
1483 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1484 (match_operand:VDI 1 "s_register_operand" "")]
1485 "TARGET_NEON"
1486 {
1487 rtx vec = gen_reg_rtx (<MODE>mode);
1488 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1489 &gen_neon_vpumax<mode>);
1490 /* The result is computed into every element of the vector. */
1491 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1492 DONE;
1493 })
1494
1495 (define_expand "reduc_umax_scal_<mode>"
1496 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1497 (match_operand:VQI 1 "s_register_operand" "")]
1498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1499 {
1500 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1501
1502 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1503 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1504
1505 DONE;
1506 })
1507
1508 (define_insn "neon_vpadd_internal<mode>"
1509 [(set (match_operand:VD 0 "s_register_operand" "=w")
1510 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1511 (match_operand:VD 2 "s_register_operand" "w")]
1512 UNSPEC_VPADD))]
1513 "TARGET_NEON"
1514 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1515 ;; Assume this schedules like vadd.
1516 [(set (attr "type")
1517 (if_then_else (match_test "<Is_float_mode>")
1518 (const_string "neon_fp_reduc_add_s<q>")
1519 (const_string "neon_reduc_add<q>")))]
1520 )
1521
1522 (define_insn "neon_vpsmin<mode>"
1523 [(set (match_operand:VD 0 "s_register_operand" "=w")
1524 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1525 (match_operand:VD 2 "s_register_operand" "w")]
1526 UNSPEC_VPSMIN))]
1527 "TARGET_NEON"
1528 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1529 [(set (attr "type")
1530 (if_then_else (match_test "<Is_float_mode>")
1531 (const_string "neon_fp_reduc_minmax_s<q>")
1532 (const_string "neon_reduc_minmax<q>")))]
1533 )
1534
1535 (define_insn "neon_vpsmax<mode>"
1536 [(set (match_operand:VD 0 "s_register_operand" "=w")
1537 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1538 (match_operand:VD 2 "s_register_operand" "w")]
1539 UNSPEC_VPSMAX))]
1540 "TARGET_NEON"
1541 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1542 [(set (attr "type")
1543 (if_then_else (match_test "<Is_float_mode>")
1544 (const_string "neon_fp_reduc_minmax_s<q>")
1545 (const_string "neon_reduc_minmax<q>")))]
1546 )
1547
1548 (define_insn "neon_vpumin<mode>"
1549 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1550 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1551 (match_operand:VDI 2 "s_register_operand" "w")]
1552 UNSPEC_VPUMIN))]
1553 "TARGET_NEON"
1554 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1555 [(set_attr "type" "neon_reduc_minmax<q>")]
1556 )
1557
1558 (define_insn "neon_vpumax<mode>"
1559 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1560 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1561 (match_operand:VDI 2 "s_register_operand" "w")]
1562 UNSPEC_VPUMAX))]
1563 "TARGET_NEON"
1564 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1565 [(set_attr "type" "neon_reduc_minmax<q>")]
1566 )
1567
1568 ;; Saturating arithmetic
1569
1570 ; NOTE: Neon supports many more saturating variants of instructions than the
1571 ; following, but these are all GCC currently understands.
1572 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1573 ; yet either, although these patterns may be used by intrinsics when they're
1574 ; added.
1575
1576 (define_insn "*ss_add<mode>_neon"
1577 [(set (match_operand:VD 0 "s_register_operand" "=w")
1578 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1579 (match_operand:VD 2 "s_register_operand" "w")))]
1580 "TARGET_NEON"
1581 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1582 [(set_attr "type" "neon_qadd<q>")]
1583 )
1584
1585 (define_insn "*us_add<mode>_neon"
1586 [(set (match_operand:VD 0 "s_register_operand" "=w")
1587 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1588 (match_operand:VD 2 "s_register_operand" "w")))]
1589 "TARGET_NEON"
1590 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1591 [(set_attr "type" "neon_qadd<q>")]
1592 )
1593
1594 (define_insn "*ss_sub<mode>_neon"
1595 [(set (match_operand:VD 0 "s_register_operand" "=w")
1596 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1597 (match_operand:VD 2 "s_register_operand" "w")))]
1598 "TARGET_NEON"
1599 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1600 [(set_attr "type" "neon_qsub<q>")]
1601 )
1602
1603 (define_insn "*us_sub<mode>_neon"
1604 [(set (match_operand:VD 0 "s_register_operand" "=w")
1605 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1606 (match_operand:VD 2 "s_register_operand" "w")))]
1607 "TARGET_NEON"
1608 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1609 [(set_attr "type" "neon_qsub<q>")]
1610 )
1611
1612 ;; Conditional instructions. These are comparisons with conditional moves for
1613 ;; vectors. They perform the assignment:
1614 ;;
1615 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1616 ;;
1617 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1618 ;; element-wise.
1619
1620 (define_expand "vcond<mode><mode>"
1621 [(set (match_operand:VDQW 0 "s_register_operand" "")
1622 (if_then_else:VDQW
1623 (match_operator 3 "comparison_operator"
1624 [(match_operand:VDQW 4 "s_register_operand" "")
1625 (match_operand:VDQW 5 "nonmemory_operand" "")])
1626 (match_operand:VDQW 1 "s_register_operand" "")
1627 (match_operand:VDQW 2 "s_register_operand" "")))]
1628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1629 {
1630 HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
1631 ? 3 : 1;
1632 rtx magic_rtx = GEN_INT (magic_word);
1633 int inverse = 0;
1634 int use_zero_form = 0;
1635 int swap_bsl_operands = 0;
1636 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1637 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1638
1639 rtx (*base_comparison) (rtx, rtx, rtx, rtx);
1640 rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
1641
1642 switch (GET_CODE (operands[3]))
1643 {
1644 case GE:
1645 case GT:
1646 case LE:
1647 case LT:
1648 case EQ:
1649 if (operands[5] == CONST0_RTX (<MODE>mode))
1650 {
1651 use_zero_form = 1;
1652 break;
1653 }
1654 /* Fall through. */
1655 default:
1656 if (!REG_P (operands[5]))
1657 operands[5] = force_reg (<MODE>mode, operands[5]);
1658 }
1659
1660 switch (GET_CODE (operands[3]))
1661 {
1662 case LT:
1663 case UNLT:
1664 inverse = 1;
1665 /* Fall through. */
1666 case GE:
1667 case UNGE:
1668 case ORDERED:
1669 case UNORDERED:
1670 base_comparison = gen_neon_vcge<mode>;
1671 complimentary_comparison = gen_neon_vcgt<mode>;
1672 break;
1673 case LE:
1674 case UNLE:
1675 inverse = 1;
1676 /* Fall through. */
1677 case GT:
1678 case UNGT:
1679 base_comparison = gen_neon_vcgt<mode>;
1680 complimentary_comparison = gen_neon_vcge<mode>;
1681 break;
1682 case EQ:
1683 case NE:
1684 case UNEQ:
1685 base_comparison = gen_neon_vceq<mode>;
1686 complimentary_comparison = gen_neon_vceq<mode>;
1687 break;
1688 default:
1689 gcc_unreachable ();
1690 }
1691
1692 switch (GET_CODE (operands[3]))
1693 {
1694 case LT:
1695 case LE:
1696 case GT:
1697 case GE:
1698 case EQ:
1699 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1700 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1701 a GE b -> a GE b
1702 a GT b -> a GT b
1703 a LE b -> b GE a
1704 a LT b -> b GT a
1705 a EQ b -> a EQ b
1706 Note that there also exist direct comparison against 0 forms,
1707 so catch those as a special case. */
1708 if (use_zero_form)
1709 {
1710 inverse = 0;
1711 switch (GET_CODE (operands[3]))
1712 {
1713 case LT:
1714 base_comparison = gen_neon_vclt<mode>;
1715 break;
1716 case LE:
1717 base_comparison = gen_neon_vcle<mode>;
1718 break;
1719 default:
1720 /* Do nothing, other zero form cases already have the correct
1721 base_comparison. */
1722 break;
1723 }
1724 }
1725
1726 if (!inverse)
1727 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1728 else
1729 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1730 break;
1731 case UNLT:
1732 case UNLE:
1733 case UNGT:
1734 case UNGE:
1735 case NE:
1736 /* Vector compare returns false for lanes which are unordered, so if we use
1737 the inverse of the comparison we actually want to emit, then
1738 swap the operands to BSL, we will end up with the correct result.
1739 Note that a NE NaN and NaN NE b are true for all a, b.
1740
1741 Our transformations are:
1742 a GE b -> !(b GT a)
1743 a GT b -> !(b GE a)
1744 a LE b -> !(a GT b)
1745 a LT b -> !(a GE b)
1746 a NE b -> !(a EQ b) */
1747
1748 if (inverse)
1749 emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
1750 else
1751 emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
1752
1753 swap_bsl_operands = 1;
1754 break;
1755 case UNEQ:
1756 /* We check (a > b || b > a). combining these comparisons give us
1757 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1758 will then give us (a == b || a UNORDERED b) as intended. */
1759
1760 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
1761 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
1762 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1763 swap_bsl_operands = 1;
1764 break;
1765 case UNORDERED:
1766 /* Operands are ORDERED iff (a > b || b >= a).
1767 Swapping the operands to BSL will give the UNORDERED case. */
1768 swap_bsl_operands = 1;
1769 /* Fall through. */
1770 case ORDERED:
1771 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
1772 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
1773 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1774 break;
1775 default:
1776 gcc_unreachable ();
1777 }
1778
1779 if (swap_bsl_operands)
1780 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1781 operands[1]));
1782 else
1783 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1784 operands[2]));
1785 DONE;
1786 })
1787
1788 (define_expand "vcondu<mode><mode>"
1789 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1790 (if_then_else:VDQIW
1791 (match_operator 3 "arm_comparison_operator"
1792 [(match_operand:VDQIW 4 "s_register_operand" "")
1793 (match_operand:VDQIW 5 "s_register_operand" "")])
1794 (match_operand:VDQIW 1 "s_register_operand" "")
1795 (match_operand:VDQIW 2 "s_register_operand" "")))]
1796 "TARGET_NEON"
1797 {
1798 rtx mask;
1799 int inverse = 0, immediate_zero = 0;
1800
1801 mask = gen_reg_rtx (<V_cmp_result>mode);
1802
1803 if (operands[5] == CONST0_RTX (<MODE>mode))
1804 immediate_zero = 1;
1805 else if (!REG_P (operands[5]))
1806 operands[5] = force_reg (<MODE>mode, operands[5]);
1807
1808 switch (GET_CODE (operands[3]))
1809 {
1810 case GEU:
1811 emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
1812 const0_rtx));
1813 break;
1814
1815 case GTU:
1816 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
1817 const0_rtx));
1818 break;
1819
1820 case EQ:
1821 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1822 const0_rtx));
1823 break;
1824
1825 case LEU:
1826 if (immediate_zero)
1827 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
1828 const0_rtx));
1829 else
1830 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
1831 const0_rtx));
1832 break;
1833
1834 case LTU:
1835 if (immediate_zero)
1836 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
1837 const0_rtx));
1838 else
1839 emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
1840 const0_rtx));
1841 break;
1842
1843 case NE:
1844 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
1845 const0_rtx));
1846 inverse = 1;
1847 break;
1848
1849 default:
1850 gcc_unreachable ();
1851 }
1852
1853 if (inverse)
1854 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1855 operands[1]));
1856 else
1857 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1858 operands[2]));
1859
1860 DONE;
1861 })
1862
1863 ;; Patterns for builtins.
1864
1865 ; good for plain vadd, vaddq.
1866
1867 (define_expand "neon_vadd<mode>"
1868 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1869 (match_operand:VCVTF 1 "s_register_operand" "w")
1870 (match_operand:VCVTF 2 "s_register_operand" "w")
1871 (match_operand:SI 3 "immediate_operand" "i")]
1872 "TARGET_NEON"
1873 {
1874 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1875 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1876 else
1877 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1878 operands[2]));
1879 DONE;
1880 })
1881
1882 ; Note that NEON operations don't support the full IEEE 754 standard: in
1883 ; particular, denormal values are flushed to zero. This means that GCC cannot
1884 ; use those instructions for autovectorization, etc. unless
1885 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1886 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1887 ; header) must work in either case: if -funsafe-math-optimizations is given,
1888 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1889 ; expand to unspecs (which may potentially limit the extent to which they might
1890 ; be optimized by generic code).
1891
1892 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1893
1894 (define_insn "neon_vadd<mode>_unspec"
1895 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1896 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1897 (match_operand:VCVTF 2 "s_register_operand" "w")]
1898 UNSPEC_VADD))]
1899 "TARGET_NEON"
1900 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1901 [(set (attr "type")
1902 (if_then_else (match_test "<Is_float_mode>")
1903 (const_string "neon_fp_addsub_s<q>")
1904 (const_string "neon_add<q>")))]
1905 )
1906
1907 ; operand 3 represents in bits:
1908 ; bit 0: signed (vs unsigned).
1909 ; bit 1: rounding (vs none).
1910
1911 (define_insn "neon_vaddl<mode>"
1912 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1913 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1914 (match_operand:VDI 2 "s_register_operand" "w")
1915 (match_operand:SI 3 "immediate_operand" "i")]
1916 UNSPEC_VADDL))]
1917 "TARGET_NEON"
1918 "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
1919 [(set_attr "type" "neon_add_long")]
1920 )
1921
1922 (define_insn "neon_vaddw<mode>"
1923 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1924 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1925 (match_operand:VDI 2 "s_register_operand" "w")
1926 (match_operand:SI 3 "immediate_operand" "i")]
1927 UNSPEC_VADDW))]
1928 "TARGET_NEON"
1929 "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
1930 [(set_attr "type" "neon_add_widen")]
1931 )
1932
1933 ; vhadd and vrhadd.
1934
1935 (define_insn "neon_vhadd<mode>"
1936 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1937 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1938 (match_operand:VDQIW 2 "s_register_operand" "w")
1939 (match_operand:SI 3 "immediate_operand" "i")]
1940 UNSPEC_VHADD))]
1941 "TARGET_NEON"
1942 "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1943 [(set_attr "type" "neon_add_halve_q")]
1944 )
1945
1946 (define_insn "neon_vqadd<mode>"
1947 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1948 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1949 (match_operand:VDQIX 2 "s_register_operand" "w")
1950 (match_operand:SI 3 "immediate_operand" "i")]
1951 UNSPEC_VQADD))]
1952 "TARGET_NEON"
1953 "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1954 [(set_attr "type" "neon_qadd<q>")]
1955 )
1956
1957 (define_insn "neon_vaddhn<mode>"
1958 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1959 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1960 (match_operand:VN 2 "s_register_operand" "w")
1961 (match_operand:SI 3 "immediate_operand" "i")]
1962 UNSPEC_VADDHN))]
1963 "TARGET_NEON"
1964 "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
1965 [(set_attr "type" "neon_add_halve_narrow_q")]
1966 )
1967
1968 ;; We cannot replace this unspec with mul<mode>3 because of the odd
1969 ;; polynomial multiplication case that can specified by operand 3.
1970 (define_insn "neon_vmul<mode>"
1971 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1972 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
1973 (match_operand:VDQW 2 "s_register_operand" "w")
1974 (match_operand:SI 3 "immediate_operand" "i")]
1975 UNSPEC_VMUL))]
1976 "TARGET_NEON"
1977 "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1978 [(set (attr "type")
1979 (if_then_else (match_test "<Is_float_mode>")
1980 (const_string "neon_fp_mul_s<q>")
1981 (const_string "neon_mul_<V_elem_ch><q>")))]
1982 )
1983
1984 (define_expand "neon_vmla<mode>"
1985 [(match_operand:VDQW 0 "s_register_operand" "=w")
1986 (match_operand:VDQW 1 "s_register_operand" "0")
1987 (match_operand:VDQW 2 "s_register_operand" "w")
1988 (match_operand:VDQW 3 "s_register_operand" "w")
1989 (match_operand:SI 4 "immediate_operand" "i")]
1990 "TARGET_NEON"
1991 {
1992 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1993 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1994 operands[2], operands[3]));
1995 else
1996 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1997 operands[2], operands[3]));
1998 DONE;
1999 })
2000
2001 (define_expand "neon_vfma<VCVTF:mode>"
2002 [(match_operand:VCVTF 0 "s_register_operand")
2003 (match_operand:VCVTF 1 "s_register_operand")
2004 (match_operand:VCVTF 2 "s_register_operand")
2005 (match_operand:VCVTF 3 "s_register_operand")
2006 (match_operand:SI 4 "immediate_operand")]
2007 "TARGET_NEON && TARGET_FMA"
2008 {
2009 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2010 operands[1]));
2011 DONE;
2012 })
2013
2014 (define_expand "neon_vfms<VCVTF:mode>"
2015 [(match_operand:VCVTF 0 "s_register_operand")
2016 (match_operand:VCVTF 1 "s_register_operand")
2017 (match_operand:VCVTF 2 "s_register_operand")
2018 (match_operand:VCVTF 3 "s_register_operand")
2019 (match_operand:SI 4 "immediate_operand")]
2020 "TARGET_NEON && TARGET_FMA"
2021 {
2022 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2023 operands[1]));
2024 DONE;
2025 })
2026
2027 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2028
2029 (define_insn "neon_vmla<mode>_unspec"
2030 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2031 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2032 (match_operand:VDQW 2 "s_register_operand" "w")
2033 (match_operand:VDQW 3 "s_register_operand" "w")]
2034 UNSPEC_VMLA))]
2035 "TARGET_NEON"
2036 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2037 [(set (attr "type")
2038 (if_then_else (match_test "<Is_float_mode>")
2039 (const_string "neon_fp_mla_s<q>")
2040 (const_string "neon_mla_<V_elem_ch><q>")))]
2041 )
2042
2043 (define_insn "neon_vmlal<mode>"
2044 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2045 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2046 (match_operand:VW 2 "s_register_operand" "w")
2047 (match_operand:VW 3 "s_register_operand" "w")
2048 (match_operand:SI 4 "immediate_operand" "i")]
2049 UNSPEC_VMLAL))]
2050 "TARGET_NEON"
2051 "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2052 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2053 )
2054
2055 (define_expand "neon_vmls<mode>"
2056 [(match_operand:VDQW 0 "s_register_operand" "=w")
2057 (match_operand:VDQW 1 "s_register_operand" "0")
2058 (match_operand:VDQW 2 "s_register_operand" "w")
2059 (match_operand:VDQW 3 "s_register_operand" "w")
2060 (match_operand:SI 4 "immediate_operand" "i")]
2061 "TARGET_NEON"
2062 {
2063 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2064 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2065 operands[1], operands[2], operands[3]));
2066 else
2067 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2068 operands[2], operands[3]));
2069 DONE;
2070 })
2071
2072 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2073
2074 (define_insn "neon_vmls<mode>_unspec"
2075 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2076 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2077 (match_operand:VDQW 2 "s_register_operand" "w")
2078 (match_operand:VDQW 3 "s_register_operand" "w")]
2079 UNSPEC_VMLS))]
2080 "TARGET_NEON"
2081 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2082 [(set (attr "type")
2083 (if_then_else (match_test "<Is_float_mode>")
2084 (const_string "neon_fp_mla_s<q>")
2085 (const_string "neon_mla_<V_elem_ch><q>")))]
2086 )
2087
2088 (define_insn "neon_vmlsl<mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2091 (match_operand:VW 2 "s_register_operand" "w")
2092 (match_operand:VW 3 "s_register_operand" "w")
2093 (match_operand:SI 4 "immediate_operand" "i")]
2094 UNSPEC_VMLSL))]
2095 "TARGET_NEON"
2096 "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2097 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2098 )
2099
2100 (define_insn "neon_vqdmulh<mode>"
2101 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2102 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2103 (match_operand:VMDQI 2 "s_register_operand" "w")
2104 (match_operand:SI 3 "immediate_operand" "i")]
2105 UNSPEC_VQDMULH))]
2106 "TARGET_NEON"
2107 "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2108 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2109 )
2110
2111 (define_insn "neon_vqdmlal<mode>"
2112 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2113 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2114 (match_operand:VMDI 2 "s_register_operand" "w")
2115 (match_operand:VMDI 3 "s_register_operand" "w")
2116 (match_operand:SI 4 "immediate_operand" "i")]
2117 UNSPEC_VQDMLAL))]
2118 "TARGET_NEON"
2119 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2120 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2121 )
2122
2123 (define_insn "neon_vqdmlsl<mode>"
2124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2125 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2126 (match_operand:VMDI 2 "s_register_operand" "w")
2127 (match_operand:VMDI 3 "s_register_operand" "w")
2128 (match_operand:SI 4 "immediate_operand" "i")]
2129 UNSPEC_VQDMLSL))]
2130 "TARGET_NEON"
2131 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2132 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2133 )
2134
2135 (define_insn "neon_vmull<mode>"
2136 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2137 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2138 (match_operand:VW 2 "s_register_operand" "w")
2139 (match_operand:SI 3 "immediate_operand" "i")]
2140 UNSPEC_VMULL))]
2141 "TARGET_NEON"
2142 "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2143 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2144 )
2145
2146 (define_insn "neon_vqdmull<mode>"
2147 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2148 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2149 (match_operand:VMDI 2 "s_register_operand" "w")
2150 (match_operand:SI 3 "immediate_operand" "i")]
2151 UNSPEC_VQDMULL))]
2152 "TARGET_NEON"
2153 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2154 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2155 )
2156
2157 (define_expand "neon_vsub<mode>"
2158 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2159 (match_operand:VCVTF 1 "s_register_operand" "w")
2160 (match_operand:VCVTF 2 "s_register_operand" "w")
2161 (match_operand:SI 3 "immediate_operand" "i")]
2162 "TARGET_NEON"
2163 {
2164 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2165 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2166 else
2167 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2168 operands[2]));
2169 DONE;
2170 })
2171
2172 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2173
2174 (define_insn "neon_vsub<mode>_unspec"
2175 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2176 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2177 (match_operand:VCVTF 2 "s_register_operand" "w")]
2178 UNSPEC_VSUB))]
2179 "TARGET_NEON"
2180 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2181 [(set (attr "type")
2182 (if_then_else (match_test "<Is_float_mode>")
2183 (const_string "neon_fp_addsub_s<q>")
2184 (const_string "neon_sub<q>")))]
2185 )
2186
2187 (define_insn "neon_vsubl<mode>"
2188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2189 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2190 (match_operand:VDI 2 "s_register_operand" "w")
2191 (match_operand:SI 3 "immediate_operand" "i")]
2192 UNSPEC_VSUBL))]
2193 "TARGET_NEON"
2194 "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2195 [(set_attr "type" "neon_sub_long")]
2196 )
2197
2198 (define_insn "neon_vsubw<mode>"
2199 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2200 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2201 (match_operand:VDI 2 "s_register_operand" "w")
2202 (match_operand:SI 3 "immediate_operand" "i")]
2203 UNSPEC_VSUBW))]
2204 "TARGET_NEON"
2205 "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
2206 [(set_attr "type" "neon_sub_widen")]
2207 )
2208
2209 (define_insn "neon_vqsub<mode>"
2210 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2211 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2212 (match_operand:VDQIX 2 "s_register_operand" "w")
2213 (match_operand:SI 3 "immediate_operand" "i")]
2214 UNSPEC_VQSUB))]
2215 "TARGET_NEON"
2216 "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2217 [(set_attr "type" "neon_qsub<q>")]
2218 )
2219
2220 (define_insn "neon_vhsub<mode>"
2221 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2222 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2223 (match_operand:VDQIW 2 "s_register_operand" "w")
2224 (match_operand:SI 3 "immediate_operand" "i")]
2225 UNSPEC_VHSUB))]
2226 "TARGET_NEON"
2227 "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2228 [(set_attr "type" "neon_sub_halve<q>")]
2229 )
2230
2231 (define_insn "neon_vsubhn<mode>"
2232 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2233 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2234 (match_operand:VN 2 "s_register_operand" "w")
2235 (match_operand:SI 3 "immediate_operand" "i")]
2236 UNSPEC_VSUBHN))]
2237 "TARGET_NEON"
2238 "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
2239 [(set_attr "type" "neon_sub_halve_narrow_q")]
2240 )
2241
2242 (define_insn "neon_vceq<mode>"
2243 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2244 (unspec:<V_cmp_result>
2245 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2246 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2247 (match_operand:SI 3 "immediate_operand" "i,i")]
2248 UNSPEC_VCEQ))]
2249 "TARGET_NEON"
2250 "@
2251 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2252 vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
2253 [(set (attr "type")
2254 (if_then_else (match_test "<Is_float_mode>")
2255 (const_string "neon_fp_compare_s<q>")
2256 (if_then_else (match_operand 2 "zero_operand")
2257 (const_string "neon_compare_zero<q>")
2258 (const_string "neon_compare<q>"))))]
2259 )
2260
2261 (define_insn "neon_vcge<mode>"
2262 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2263 (unspec:<V_cmp_result>
2264 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2265 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2266 (match_operand:SI 3 "immediate_operand" "i,i")]
2267 UNSPEC_VCGE))]
2268 "TARGET_NEON"
2269 "@
2270 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2271 vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2272 [(set (attr "type")
2273 (if_then_else (match_test "<Is_float_mode>")
2274 (const_string "neon_fp_compare_s<q>")
2275 (if_then_else (match_operand 2 "zero_operand")
2276 (const_string "neon_compare_zero<q>")
2277 (const_string "neon_compare<q>"))))]
2278 )
2279
2280 (define_insn "neon_vcgeu<mode>"
2281 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2282 (unspec:<V_cmp_result>
2283 [(match_operand:VDQIW 1 "s_register_operand" "w")
2284 (match_operand:VDQIW 2 "s_register_operand" "w")
2285 (match_operand:SI 3 "immediate_operand" "i")]
2286 UNSPEC_VCGEU))]
2287 "TARGET_NEON"
2288 "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2289 [(set_attr "type" "neon_compare<q>")]
2290 )
2291
2292 (define_insn "neon_vcgt<mode>"
2293 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2294 (unspec:<V_cmp_result>
2295 [(match_operand:VDQW 1 "s_register_operand" "w,w")
2296 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")
2297 (match_operand:SI 3 "immediate_operand" "i,i")]
2298 UNSPEC_VCGT))]
2299 "TARGET_NEON"
2300 "@
2301 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
2302 vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2303 [(set (attr "type")
2304 (if_then_else (match_test "<Is_float_mode>")
2305 (const_string "neon_fp_compare_s<q>")
2306 (if_then_else (match_operand 2 "zero_operand")
2307 (const_string "neon_compare_zero<q>")
2308 (const_string "neon_compare<q>"))))]
2309 )
2310
2311 (define_insn "neon_vcgtu<mode>"
2312 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2313 (unspec:<V_cmp_result>
2314 [(match_operand:VDQIW 1 "s_register_operand" "w")
2315 (match_operand:VDQIW 2 "s_register_operand" "w")
2316 (match_operand:SI 3 "immediate_operand" "i")]
2317 UNSPEC_VCGTU))]
2318 "TARGET_NEON"
2319 "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2320 [(set_attr "type" "neon_compare<q>")]
2321 )
2322
2323 ;; VCLE and VCLT only support comparisons with immediate zero (register
2324 ;; variants are VCGE and VCGT with operands reversed).
2325
2326 (define_insn "neon_vcle<mode>"
2327 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2328 (unspec:<V_cmp_result>
2329 [(match_operand:VDQW 1 "s_register_operand" "w")
2330 (match_operand:VDQW 2 "zero_operand" "Dz")
2331 (match_operand:SI 3 "immediate_operand" "i")]
2332 UNSPEC_VCLE))]
2333 "TARGET_NEON"
2334 "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2335 [(set (attr "type")
2336 (if_then_else (match_test "<Is_float_mode>")
2337 (const_string "neon_fp_compare_s<q>")
2338 (if_then_else (match_operand 2 "zero_operand")
2339 (const_string "neon_compare_zero<q>")
2340 (const_string "neon_compare<q>"))))]
2341 )
2342
2343 (define_insn "neon_vclt<mode>"
2344 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2345 (unspec:<V_cmp_result>
2346 [(match_operand:VDQW 1 "s_register_operand" "w")
2347 (match_operand:VDQW 2 "zero_operand" "Dz")
2348 (match_operand:SI 3 "immediate_operand" "i")]
2349 UNSPEC_VCLT))]
2350 "TARGET_NEON"
2351 "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
2352 [(set (attr "type")
2353 (if_then_else (match_test "<Is_float_mode>")
2354 (const_string "neon_fp_compare_s<q>")
2355 (if_then_else (match_operand 2 "zero_operand")
2356 (const_string "neon_compare_zero<q>")
2357 (const_string "neon_compare<q>"))))]
2358 )
2359
2360 (define_insn "neon_vcage<mode>"
2361 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2362 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2363 (match_operand:VCVTF 2 "s_register_operand" "w")
2364 (match_operand:SI 3 "immediate_operand" "i")]
2365 UNSPEC_VCAGE))]
2366 "TARGET_NEON"
2367 "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2368 [(set_attr "type" "neon_fp_compare_s<q>")]
2369 )
2370
2371 (define_insn "neon_vcagt<mode>"
2372 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2373 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2374 (match_operand:VCVTF 2 "s_register_operand" "w")
2375 (match_operand:SI 3 "immediate_operand" "i")]
2376 UNSPEC_VCAGT))]
2377 "TARGET_NEON"
2378 "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_fp_compare_s<q>")]
2380 )
2381
2382 (define_insn "neon_vtst<mode>"
2383 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2384 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2385 (match_operand:VDQIW 2 "s_register_operand" "w")
2386 (match_operand:SI 3 "immediate_operand" "i")]
2387 UNSPEC_VTST))]
2388 "TARGET_NEON"
2389 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2390 [(set_attr "type" "neon_tst<q>")]
2391 )
2392
2393 (define_insn "neon_vabd<mode>"
2394 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2395 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2396 (match_operand:VDQW 2 "s_register_operand" "w")
2397 (match_operand:SI 3 "immediate_operand" "i")]
2398 UNSPEC_VABD))]
2399 "TARGET_NEON"
2400 "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2401 [(set (attr "type")
2402 (if_then_else (match_test "<Is_float_mode>")
2403 (const_string "neon_fp_abd_s<q>")
2404 (const_string "neon_abd<q>")))]
2405 )
2406
2407 (define_insn "neon_vabdl<mode>"
2408 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2409 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2410 (match_operand:VW 2 "s_register_operand" "w")
2411 (match_operand:SI 3 "immediate_operand" "i")]
2412 UNSPEC_VABDL))]
2413 "TARGET_NEON"
2414 "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
2415 [(set_attr "type" "neon_abd_long")]
2416 )
2417
2418 (define_insn "neon_vaba<mode>"
2419 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2420 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2421 (match_operand:VDQIW 3 "s_register_operand" "w")
2422 (match_operand:SI 4 "immediate_operand" "i")]
2423 UNSPEC_VABD)
2424 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2425 "TARGET_NEON"
2426 "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2427 [(set_attr "type" "neon_arith_acc<q>")]
2428 )
2429
2430 (define_insn "neon_vabal<mode>"
2431 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2432 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2433 (match_operand:VW 3 "s_register_operand" "w")
2434 (match_operand:SI 4 "immediate_operand" "i")]
2435 UNSPEC_VABDL)
2436 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2437 "TARGET_NEON"
2438 "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
2439 [(set_attr "type" "neon_arith_acc<q>")]
2440 )
2441
2442 (define_insn "neon_vmax<mode>"
2443 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2444 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2445 (match_operand:VDQW 2 "s_register_operand" "w")
2446 (match_operand:SI 3 "immediate_operand" "i")]
2447 UNSPEC_VMAX))]
2448 "TARGET_NEON"
2449 "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2450 [(set (attr "type")
2451 (if_then_else (match_test "<Is_float_mode>")
2452 (const_string "neon_fp_minmax_s<q>")
2453 (const_string "neon_minmax<q>")))]
2454 )
2455
2456 (define_insn "neon_vmin<mode>"
2457 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2458 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
2459 (match_operand:VDQW 2 "s_register_operand" "w")
2460 (match_operand:SI 3 "immediate_operand" "i")]
2461 UNSPEC_VMIN))]
2462 "TARGET_NEON"
2463 "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2464 [(set (attr "type")
2465 (if_then_else (match_test "<Is_float_mode>")
2466 (const_string "neon_fp_minmax_s<q>")
2467 (const_string "neon_minmax<q>")))]
2468 )
2469
2470 (define_expand "neon_vpadd<mode>"
2471 [(match_operand:VD 0 "s_register_operand" "=w")
2472 (match_operand:VD 1 "s_register_operand" "w")
2473 (match_operand:VD 2 "s_register_operand" "w")
2474 (match_operand:SI 3 "immediate_operand" "i")]
2475 "TARGET_NEON"
2476 {
2477 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2478 operands[2]));
2479 DONE;
2480 })
2481
2482 (define_insn "neon_vpaddl<mode>"
2483 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2484 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
2485 (match_operand:SI 2 "immediate_operand" "i")]
2486 UNSPEC_VPADDL))]
2487 "TARGET_NEON"
2488 "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2489 [(set_attr "type" "neon_reduc_add_long")]
2490 )
2491
2492 (define_insn "neon_vpadal<mode>"
2493 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2494 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2495 (match_operand:VDQIW 2 "s_register_operand" "w")
2496 (match_operand:SI 3 "immediate_operand" "i")]
2497 UNSPEC_VPADAL))]
2498 "TARGET_NEON"
2499 "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2500 [(set_attr "type" "neon_reduc_add_acc")]
2501 )
2502
2503 (define_insn "neon_vpmax<mode>"
2504 [(set (match_operand:VD 0 "s_register_operand" "=w")
2505 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2506 (match_operand:VD 2 "s_register_operand" "w")
2507 (match_operand:SI 3 "immediate_operand" "i")]
2508 UNSPEC_VPMAX))]
2509 "TARGET_NEON"
2510 "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2511 [(set (attr "type")
2512 (if_then_else (match_test "<Is_float_mode>")
2513 (const_string "neon_fp_reduc_minmax_s<q>")
2514 (const_string "neon_reduc_minmax<q>")))]
2515 )
2516
2517 (define_insn "neon_vpmin<mode>"
2518 [(set (match_operand:VD 0 "s_register_operand" "=w")
2519 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
2520 (match_operand:VD 2 "s_register_operand" "w")
2521 (match_operand:SI 3 "immediate_operand" "i")]
2522 UNSPEC_VPMIN))]
2523 "TARGET_NEON"
2524 "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2525 [(set (attr "type")
2526 (if_then_else (match_test "<Is_float_mode>")
2527 (const_string "neon_fp_reduc_minmax_s<q>")
2528 (const_string "neon_reduc_minmax<q>")))]
2529 )
2530
2531 (define_insn "neon_vrecps<mode>"
2532 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2533 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2534 (match_operand:VCVTF 2 "s_register_operand" "w")
2535 (match_operand:SI 3 "immediate_operand" "i")]
2536 UNSPEC_VRECPS))]
2537 "TARGET_NEON"
2538 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2539 [(set_attr "type" "neon_fp_recps_s<q>")]
2540 )
2541
2542 (define_insn "neon_vrsqrts<mode>"
2543 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2544 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2545 (match_operand:VCVTF 2 "s_register_operand" "w")
2546 (match_operand:SI 3 "immediate_operand" "i")]
2547 UNSPEC_VRSQRTS))]
2548 "TARGET_NEON"
2549 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2550 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2551 )
2552
2553 (define_expand "neon_vabs<mode>"
2554 [(match_operand:VDQW 0 "s_register_operand" "")
2555 (match_operand:VDQW 1 "s_register_operand" "")
2556 (match_operand:SI 2 "immediate_operand" "")]
2557 "TARGET_NEON"
2558 {
2559 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2560 DONE;
2561 })
2562
2563 (define_insn "neon_vqabs<mode>"
2564 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2565 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2566 (match_operand:SI 2 "immediate_operand" "i")]
2567 UNSPEC_VQABS))]
2568 "TARGET_NEON"
2569 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2570 [(set_attr "type" "neon_qabs<q>")]
2571 )
2572
2573 (define_insn "neon_bswap<mode>"
2574 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2575 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2576 "TARGET_NEON"
2577 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2578 [(set_attr "type" "neon_rev<q>")]
2579 )
2580
2581 (define_expand "neon_vneg<mode>"
2582 [(match_operand:VDQW 0 "s_register_operand" "")
2583 (match_operand:VDQW 1 "s_register_operand" "")
2584 (match_operand:SI 2 "immediate_operand" "")]
2585 "TARGET_NEON"
2586 {
2587 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2588 DONE;
2589 })
2590
2591 (define_expand "neon_copysignf<mode>"
2592 [(match_operand:VCVTF 0 "register_operand")
2593 (match_operand:VCVTF 1 "register_operand")
2594 (match_operand:VCVTF 2 "register_operand")]
2595 "TARGET_NEON"
2596 "{
2597 rtx v_bitmask_cast;
2598 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2599 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2600 rtvec v = rtvec_alloc (n_elt);
2601
2602 /* Create bitmask for vector select. */
2603 for (i = 0; i < n_elt; ++i)
2604 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2605
2606 emit_move_insn (v_bitmask,
2607 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2608 emit_move_insn (operands[0], operands[2]);
2609 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2610 <VCVTF:V_cmp_result>mode, 0);
2611 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2612 operands[1]));
2613
2614 DONE;
2615 }"
2616 )
2617
2618 (define_insn "neon_vqneg<mode>"
2619 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2620 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2621 (match_operand:SI 2 "immediate_operand" "i")]
2622 UNSPEC_VQNEG))]
2623 "TARGET_NEON"
2624 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2625 [(set_attr "type" "neon_qneg<q>")]
2626 )
2627
2628 (define_insn "neon_vcls<mode>"
2629 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2630 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2631 (match_operand:SI 2 "immediate_operand" "i")]
2632 UNSPEC_VCLS))]
2633 "TARGET_NEON"
2634 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2635 [(set_attr "type" "neon_cls<q>")]
2636 )
2637
2638 (define_insn "clz<mode>2"
2639 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2640 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2641 "TARGET_NEON"
2642 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2643 [(set_attr "type" "neon_cnt<q>")]
2644 )
2645
2646 (define_expand "neon_vclz<mode>"
2647 [(match_operand:VDQIW 0 "s_register_operand" "")
2648 (match_operand:VDQIW 1 "s_register_operand" "")
2649 (match_operand:SI 2 "immediate_operand" "")]
2650 "TARGET_NEON"
2651 {
2652 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2653 DONE;
2654 })
2655
2656 (define_insn "popcount<mode>2"
2657 [(set (match_operand:VE 0 "s_register_operand" "=w")
2658 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2659 "TARGET_NEON"
2660 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2661 [(set_attr "type" "neon_cnt<q>")]
2662 )
2663
2664 (define_expand "neon_vcnt<mode>"
2665 [(match_operand:VE 0 "s_register_operand" "=w")
2666 (match_operand:VE 1 "s_register_operand" "w")
2667 (match_operand:SI 2 "immediate_operand" "i")]
2668 "TARGET_NEON"
2669 {
2670 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2671 DONE;
2672 })
2673
2674 (define_insn "neon_vrecpe<mode>"
2675 [(set (match_operand:V32 0 "s_register_operand" "=w")
2676 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2677 (match_operand:SI 2 "immediate_operand" "i")]
2678 UNSPEC_VRECPE))]
2679 "TARGET_NEON"
2680 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2681 [(set_attr "type" "neon_fp_recpe_s<q>")]
2682 )
2683
2684 (define_insn "neon_vrsqrte<mode>"
2685 [(set (match_operand:V32 0 "s_register_operand" "=w")
2686 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
2687 (match_operand:SI 2 "immediate_operand" "i")]
2688 UNSPEC_VRSQRTE))]
2689 "TARGET_NEON"
2690 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2691 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2692 )
2693
2694 (define_expand "neon_vmvn<mode>"
2695 [(match_operand:VDQIW 0 "s_register_operand" "")
2696 (match_operand:VDQIW 1 "s_register_operand" "")
2697 (match_operand:SI 2 "immediate_operand" "")]
2698 "TARGET_NEON"
2699 {
2700 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2701 DONE;
2702 })
2703
2704 (define_insn "neon_vget_lane<mode>_sext_internal"
2705 [(set (match_operand:SI 0 "s_register_operand" "=r")
2706 (sign_extend:SI
2707 (vec_select:<V_elem>
2708 (match_operand:VD 1 "s_register_operand" "w")
2709 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2710 "TARGET_NEON"
2711 {
2712 if (BYTES_BIG_ENDIAN)
2713 {
2714 int elt = INTVAL (operands[2]);
2715 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2716 operands[2] = GEN_INT (elt);
2717 }
2718 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2719 }
2720 [(set_attr "type" "neon_to_gp")]
2721 )
2722
2723 (define_insn "neon_vget_lane<mode>_zext_internal"
2724 [(set (match_operand:SI 0 "s_register_operand" "=r")
2725 (zero_extend:SI
2726 (vec_select:<V_elem>
2727 (match_operand:VD 1 "s_register_operand" "w")
2728 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2729 "TARGET_NEON"
2730 {
2731 if (BYTES_BIG_ENDIAN)
2732 {
2733 int elt = INTVAL (operands[2]);
2734 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2735 operands[2] = GEN_INT (elt);
2736 }
2737 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2738 }
2739 [(set_attr "type" "neon_to_gp")]
2740 )
2741
2742 (define_insn "neon_vget_lane<mode>_sext_internal"
2743 [(set (match_operand:SI 0 "s_register_operand" "=r")
2744 (sign_extend:SI
2745 (vec_select:<V_elem>
2746 (match_operand:VQ 1 "s_register_operand" "w")
2747 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2748 "TARGET_NEON"
2749 {
2750 rtx ops[3];
2751 int regno = REGNO (operands[1]);
2752 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2753 unsigned int elt = INTVAL (operands[2]);
2754 unsigned int elt_adj = elt % halfelts;
2755
2756 if (BYTES_BIG_ENDIAN)
2757 elt_adj = halfelts - 1 - elt_adj;
2758
2759 ops[0] = operands[0];
2760 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2761 ops[2] = GEN_INT (elt_adj);
2762 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2763
2764 return "";
2765 }
2766 [(set_attr "type" "neon_to_gp_q")]
2767 )
2768
2769 (define_insn "neon_vget_lane<mode>_zext_internal"
2770 [(set (match_operand:SI 0 "s_register_operand" "=r")
2771 (zero_extend:SI
2772 (vec_select:<V_elem>
2773 (match_operand:VQ 1 "s_register_operand" "w")
2774 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2775 "TARGET_NEON"
2776 {
2777 rtx ops[3];
2778 int regno = REGNO (operands[1]);
2779 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2780 unsigned int elt = INTVAL (operands[2]);
2781 unsigned int elt_adj = elt % halfelts;
2782
2783 if (BYTES_BIG_ENDIAN)
2784 elt_adj = halfelts - 1 - elt_adj;
2785
2786 ops[0] = operands[0];
2787 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2788 ops[2] = GEN_INT (elt_adj);
2789 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2790
2791 return "";
2792 }
2793 [(set_attr "type" "neon_to_gp_q")]
2794 )
2795
2796 (define_expand "neon_vget_lane<mode>"
2797 [(match_operand:<V_ext> 0 "s_register_operand" "")
2798 (match_operand:VDQW 1 "s_register_operand" "")
2799 (match_operand:SI 2 "immediate_operand" "")
2800 (match_operand:SI 3 "immediate_operand" "")]
2801 "TARGET_NEON"
2802 {
2803 HOST_WIDE_INT magic = INTVAL (operands[3]);
2804 rtx insn;
2805
2806 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
2807
2808 if (BYTES_BIG_ENDIAN)
2809 {
2810 /* The intrinsics are defined in terms of a model where the
2811 element ordering in memory is vldm order, whereas the generic
2812 RTL is defined in terms of a model where the element ordering
2813 in memory is array order. Convert the lane number to conform
2814 to this model. */
2815 unsigned int elt = INTVAL (operands[2]);
2816 unsigned int reg_nelts
2817 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2818 elt ^= reg_nelts - 1;
2819 operands[2] = GEN_INT (elt);
2820 }
2821
2822 if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
2823 insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
2824 else
2825 {
2826 if ((magic & 1) != 0)
2827 insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
2828 operands[2]);
2829 else
2830 insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
2831 operands[2]);
2832 }
2833 emit_insn (insn);
2834 DONE;
2835 })
2836
2837 ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
2838 ; elements.
2839
2840 (define_expand "neon_vget_lanedi"
2841 [(match_operand:DI 0 "s_register_operand" "=r")
2842 (match_operand:DI 1 "s_register_operand" "w")
2843 (match_operand:SI 2 "immediate_operand" "i")
2844 (match_operand:SI 3 "immediate_operand" "i")]
2845 "TARGET_NEON"
2846 {
2847 neon_lane_bounds (operands[2], 0, 1);
2848 emit_move_insn (operands[0], operands[1]);
2849 DONE;
2850 })
2851
2852 (define_expand "neon_vget_lanev2di"
2853 [(match_operand:DI 0 "s_register_operand" "")
2854 (match_operand:V2DI 1 "s_register_operand" "")
2855 (match_operand:SI 2 "immediate_operand" "")
2856 (match_operand:SI 3 "immediate_operand" "")]
2857 "TARGET_NEON"
2858 {
2859 switch (INTVAL (operands[2]))
2860 {
2861 case 0:
2862 emit_move_insn (operands[0], gen_lowpart (DImode, operands[1]));
2863 break;
2864 case 1:
2865 emit_move_insn (operands[0], gen_highpart (DImode, operands[1]));
2866 break;
2867 default:
2868 neon_lane_bounds (operands[2], 0, 1);
2869 FAIL;
2870 }
2871 DONE;
2872 })
2873
2874 (define_expand "neon_vset_lane<mode>"
2875 [(match_operand:VDQ 0 "s_register_operand" "=w")
2876 (match_operand:<V_elem> 1 "s_register_operand" "r")
2877 (match_operand:VDQ 2 "s_register_operand" "0")
2878 (match_operand:SI 3 "immediate_operand" "i")]
2879 "TARGET_NEON"
2880 {
2881 unsigned int elt = INTVAL (operands[3]);
2882 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
2883
2884 if (BYTES_BIG_ENDIAN)
2885 {
2886 unsigned int reg_nelts
2887 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
2888 elt ^= reg_nelts - 1;
2889 }
2890
2891 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2892 GEN_INT (1 << elt), operands[2]));
2893 DONE;
2894 })
2895
2896 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2897
2898 (define_expand "neon_vset_lanedi"
2899 [(match_operand:DI 0 "s_register_operand" "=w")
2900 (match_operand:DI 1 "s_register_operand" "r")
2901 (match_operand:DI 2 "s_register_operand" "0")
2902 (match_operand:SI 3 "immediate_operand" "i")]
2903 "TARGET_NEON"
2904 {
2905 neon_lane_bounds (operands[3], 0, 1);
2906 emit_move_insn (operands[0], operands[1]);
2907 DONE;
2908 })
2909
2910 (define_expand "neon_vcreate<mode>"
2911 [(match_operand:VDX 0 "s_register_operand" "")
2912 (match_operand:DI 1 "general_operand" "")]
2913 "TARGET_NEON"
2914 {
2915 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2916 emit_move_insn (operands[0], src);
2917 DONE;
2918 })
2919
2920 (define_insn "neon_vdup_n<mode>"
2921 [(set (match_operand:VX 0 "s_register_operand" "=w")
2922 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2923 "TARGET_NEON"
2924 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2925 [(set_attr "type" "neon_from_gp<q>")]
2926 )
2927
2928 (define_insn "neon_vdup_n<mode>"
2929 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2930 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2931 "TARGET_NEON"
2932 "@
2933 vdup.<V_sz_elem>\t%<V_reg>0, %1
2934 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2935 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2936 )
2937
2938 (define_expand "neon_vdup_ndi"
2939 [(match_operand:DI 0 "s_register_operand" "=w")
2940 (match_operand:DI 1 "s_register_operand" "r")]
2941 "TARGET_NEON"
2942 {
2943 emit_move_insn (operands[0], operands[1]);
2944 DONE;
2945 }
2946 )
2947
2948 (define_insn "neon_vdup_nv2di"
2949 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2950 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2951 "TARGET_NEON"
2952 "@
2953 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2954 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2955 [(set_attr "length" "8")
2956 (set_attr "type" "multiple")]
2957 )
2958
2959 (define_insn "neon_vdup_lane<mode>_internal"
2960 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2961 (vec_duplicate:VDQW
2962 (vec_select:<V_elem>
2963 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2964 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2965 "TARGET_NEON"
2966 {
2967 if (BYTES_BIG_ENDIAN)
2968 {
2969 int elt = INTVAL (operands[2]);
2970 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2971 operands[2] = GEN_INT (elt);
2972 }
2973 if (<Is_d_reg>)
2974 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2975 else
2976 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2977 }
2978 [(set_attr "type" "neon_dup<q>")]
2979 )
2980
2981 (define_expand "neon_vdup_lane<mode>"
2982 [(match_operand:VDQW 0 "s_register_operand" "=w")
2983 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2984 (match_operand:SI 2 "immediate_operand" "i")]
2985 "TARGET_NEON"
2986 {
2987 neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
2988 if (BYTES_BIG_ENDIAN)
2989 {
2990 unsigned int elt = INTVAL (operands[2]);
2991 unsigned int reg_nelts
2992 = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
2993 elt ^= reg_nelts - 1;
2994 operands[2] = GEN_INT (elt);
2995 }
2996 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2997 operands[2]));
2998 DONE;
2999 })
3000
3001 ; Scalar index is ignored, since only zero is valid here.
3002 (define_expand "neon_vdup_lanedi"
3003 [(match_operand:DI 0 "s_register_operand" "=w")
3004 (match_operand:DI 1 "s_register_operand" "w")
3005 (match_operand:SI 2 "immediate_operand" "i")]
3006 "TARGET_NEON"
3007 {
3008 neon_lane_bounds (operands[2], 0, 1);
3009 emit_move_insn (operands[0], operands[1]);
3010 DONE;
3011 })
3012
3013 ; Likewise for v2di, as the DImode second operand has only a single element.
3014 (define_expand "neon_vdup_lanev2di"
3015 [(match_operand:V2DI 0 "s_register_operand" "=w")
3016 (match_operand:DI 1 "s_register_operand" "w")
3017 (match_operand:SI 2 "immediate_operand" "i")]
3018 "TARGET_NEON"
3019 {
3020 neon_lane_bounds (operands[2], 0, 1);
3021 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3022 DONE;
3023 })
3024
3025 ; Disabled before reload because we don't want combine doing something silly,
3026 ; but used by the post-reload expansion of neon_vcombine.
3027 (define_insn "*neon_vswp<mode>"
3028 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3029 (match_operand:VDQX 1 "s_register_operand" "+w"))
3030 (set (match_dup 1) (match_dup 0))]
3031 "TARGET_NEON && reload_completed"
3032 "vswp\t%<V_reg>0, %<V_reg>1"
3033 [(set_attr "type" "neon_permute<q>")]
3034 )
3035
3036 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3037 ;; dest vector.
3038 ;; FIXME: A different implementation of this builtin could make it much
3039 ;; more likely that we wouldn't actually need to output anything (we could make
3040 ;; it so that the reg allocator puts things in the right places magically
3041 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3042
3043 (define_insn_and_split "neon_vcombine<mode>"
3044 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3045 (vec_concat:<V_DOUBLE>
3046 (match_operand:VDX 1 "s_register_operand" "w")
3047 (match_operand:VDX 2 "s_register_operand" "w")))]
3048 "TARGET_NEON"
3049 "#"
3050 "&& reload_completed"
3051 [(const_int 0)]
3052 {
3053 neon_split_vcombine (operands);
3054 DONE;
3055 }
3056 [(set_attr "type" "multiple")]
3057 )
3058
3059 (define_expand "neon_vget_high<mode>"
3060 [(match_operand:<V_HALF> 0 "s_register_operand")
3061 (match_operand:VQX 1 "s_register_operand")]
3062 "TARGET_NEON"
3063 {
3064 emit_move_insn (operands[0],
3065 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3066 GET_MODE_SIZE (<V_HALF>mode)));
3067 DONE;
3068 })
3069
3070 (define_expand "neon_vget_low<mode>"
3071 [(match_operand:<V_HALF> 0 "s_register_operand")
3072 (match_operand:VQX 1 "s_register_operand")]
3073 "TARGET_NEON"
3074 {
3075 emit_move_insn (operands[0],
3076 simplify_gen_subreg (<V_HALF>mode, operands[1],
3077 <MODE>mode, 0));
3078 DONE;
3079 })
3080
3081 (define_insn "float<mode><V_cvtto>2"
3082 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3083 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3084 "TARGET_NEON && !flag_rounding_math"
3085 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3086 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3087 )
3088
3089 (define_insn "floatuns<mode><V_cvtto>2"
3090 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3091 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3092 "TARGET_NEON && !flag_rounding_math"
3093 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3094 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3095 )
3096
3097 (define_insn "fix_trunc<mode><V_cvtto>2"
3098 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3099 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3100 "TARGET_NEON"
3101 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3102 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3103 )
3104
3105 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3106 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3107 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3108 "TARGET_NEON"
3109 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3110 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3111 )
3112
3113 (define_insn "neon_vcvt<mode>"
3114 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3115 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3116 (match_operand:SI 2 "immediate_operand" "i")]
3117 UNSPEC_VCVT))]
3118 "TARGET_NEON"
3119 "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
3120 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3121 )
3122
3123 (define_insn "neon_vcvt<mode>"
3124 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3125 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3126 (match_operand:SI 2 "immediate_operand" "i")]
3127 UNSPEC_VCVT))]
3128 "TARGET_NEON"
3129 "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
3130 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3131 )
3132
3133 (define_insn "neon_vcvtv4sfv4hf"
3134 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3135 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3136 UNSPEC_VCVT))]
3137 "TARGET_NEON && TARGET_FP16"
3138 "vcvt.f32.f16\t%q0, %P1"
3139 [(set_attr "type" "neon_fp_cvt_widen_h")]
3140 )
3141
3142 (define_insn "neon_vcvtv4hfv4sf"
3143 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3144 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3145 UNSPEC_VCVT))]
3146 "TARGET_NEON && TARGET_FP16"
3147 "vcvt.f16.f32\t%P0, %q1"
3148 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3149 )
3150
3151 (define_insn "neon_vcvt_n<mode>"
3152 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3153 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3154 (match_operand:SI 2 "immediate_operand" "i")
3155 (match_operand:SI 3 "immediate_operand" "i")]
3156 UNSPEC_VCVT_N))]
3157 "TARGET_NEON"
3158 {
3159 neon_const_bounds (operands[2], 1, 33);
3160 return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3161 }
3162 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3163 )
3164
3165 (define_insn "neon_vcvt_n<mode>"
3166 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3167 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3168 (match_operand:SI 2 "immediate_operand" "i")
3169 (match_operand:SI 3 "immediate_operand" "i")]
3170 UNSPEC_VCVT_N))]
3171 "TARGET_NEON"
3172 {
3173 neon_const_bounds (operands[2], 1, 33);
3174 return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
3175 }
3176 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3177 )
3178
3179 (define_insn "neon_vmovn<mode>"
3180 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3181 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3182 (match_operand:SI 2 "immediate_operand" "i")]
3183 UNSPEC_VMOVN))]
3184 "TARGET_NEON"
3185 "vmovn.<V_if_elem>\t%P0, %q1"
3186 [(set_attr "type" "neon_shift_imm_narrow_q")]
3187 )
3188
3189 (define_insn "neon_vqmovn<mode>"
3190 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3191 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3192 (match_operand:SI 2 "immediate_operand" "i")]
3193 UNSPEC_VQMOVN))]
3194 "TARGET_NEON"
3195 "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
3196 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3197 )
3198
3199 (define_insn "neon_vqmovun<mode>"
3200 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3201 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3202 (match_operand:SI 2 "immediate_operand" "i")]
3203 UNSPEC_VQMOVUN))]
3204 "TARGET_NEON"
3205 "vqmovun.<V_s_elem>\t%P0, %q1"
3206 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3207 )
3208
3209 (define_insn "neon_vmovl<mode>"
3210 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3211 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3212 (match_operand:SI 2 "immediate_operand" "i")]
3213 UNSPEC_VMOVL))]
3214 "TARGET_NEON"
3215 "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
3216 [(set_attr "type" "neon_shift_imm_long")]
3217 )
3218
3219 (define_insn "neon_vmul_lane<mode>"
3220 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3221 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3222 (match_operand:VMD 2 "s_register_operand"
3223 "<scalar_mul_constraint>")
3224 (match_operand:SI 3 "immediate_operand" "i")
3225 (match_operand:SI 4 "immediate_operand" "i")]
3226 UNSPEC_VMUL_LANE))]
3227 "TARGET_NEON"
3228 {
3229 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3230 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3231 }
3232 [(set (attr "type")
3233 (if_then_else (match_test "<Is_float_mode>")
3234 (const_string "neon_fp_mul_s_scalar<q>")
3235 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3236 )
3237
3238 (define_insn "neon_vmul_lane<mode>"
3239 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3240 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3241 (match_operand:<V_HALF> 2 "s_register_operand"
3242 "<scalar_mul_constraint>")
3243 (match_operand:SI 3 "immediate_operand" "i")
3244 (match_operand:SI 4 "immediate_operand" "i")]
3245 UNSPEC_VMUL_LANE))]
3246 "TARGET_NEON"
3247 {
3248 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
3249 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3250 }
3251 [(set (attr "type")
3252 (if_then_else (match_test "<Is_float_mode>")
3253 (const_string "neon_fp_mul_s_scalar<q>")
3254 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3255 )
3256
3257 (define_insn "neon_vmull_lane<mode>"
3258 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3259 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3260 (match_operand:VMDI 2 "s_register_operand"
3261 "<scalar_mul_constraint>")
3262 (match_operand:SI 3 "immediate_operand" "i")
3263 (match_operand:SI 4 "immediate_operand" "i")]
3264 UNSPEC_VMULL_LANE))]
3265 "TARGET_NEON"
3266 {
3267 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3268 return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3269 }
3270 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3271 )
3272
3273 (define_insn "neon_vqdmull_lane<mode>"
3274 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3275 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3276 (match_operand:VMDI 2 "s_register_operand"
3277 "<scalar_mul_constraint>")
3278 (match_operand:SI 3 "immediate_operand" "i")
3279 (match_operand:SI 4 "immediate_operand" "i")]
3280 UNSPEC_VQDMULL_LANE))]
3281 "TARGET_NEON"
3282 {
3283 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3284 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3285 }
3286 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3287 )
3288
3289 (define_insn "neon_vqdmulh_lane<mode>"
3290 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3291 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3292 (match_operand:<V_HALF> 2 "s_register_operand"
3293 "<scalar_mul_constraint>")
3294 (match_operand:SI 3 "immediate_operand" "i")
3295 (match_operand:SI 4 "immediate_operand" "i")]
3296 UNSPEC_VQDMULH_LANE))]
3297 "TARGET_NEON"
3298 {
3299 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3300 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
3301 }
3302 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3303 )
3304
3305 (define_insn "neon_vqdmulh_lane<mode>"
3306 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3307 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3308 (match_operand:VMDI 2 "s_register_operand"
3309 "<scalar_mul_constraint>")
3310 (match_operand:SI 3 "immediate_operand" "i")
3311 (match_operand:SI 4 "immediate_operand" "i")]
3312 UNSPEC_VQDMULH_LANE))]
3313 "TARGET_NEON"
3314 {
3315 neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3316 return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
3317 }
3318 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3319 )
3320
3321 (define_insn "neon_vmla_lane<mode>"
3322 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3323 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3324 (match_operand:VMD 2 "s_register_operand" "w")
3325 (match_operand:VMD 3 "s_register_operand"
3326 "<scalar_mul_constraint>")
3327 (match_operand:SI 4 "immediate_operand" "i")
3328 (match_operand:SI 5 "immediate_operand" "i")]
3329 UNSPEC_VMLA_LANE))]
3330 "TARGET_NEON"
3331 {
3332 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3333 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3334 }
3335 [(set (attr "type")
3336 (if_then_else (match_test "<Is_float_mode>")
3337 (const_string "neon_fp_mla_s_scalar<q>")
3338 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3339 )
3340
3341 (define_insn "neon_vmla_lane<mode>"
3342 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3343 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3344 (match_operand:VMQ 2 "s_register_operand" "w")
3345 (match_operand:<V_HALF> 3 "s_register_operand"
3346 "<scalar_mul_constraint>")
3347 (match_operand:SI 4 "immediate_operand" "i")
3348 (match_operand:SI 5 "immediate_operand" "i")]
3349 UNSPEC_VMLA_LANE))]
3350 "TARGET_NEON"
3351 {
3352 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3353 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3354 }
3355 [(set (attr "type")
3356 (if_then_else (match_test "<Is_float_mode>")
3357 (const_string "neon_fp_mla_s_scalar<q>")
3358 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3359 )
3360
3361 (define_insn "neon_vmlal_lane<mode>"
3362 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3363 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3364 (match_operand:VMDI 2 "s_register_operand" "w")
3365 (match_operand:VMDI 3 "s_register_operand"
3366 "<scalar_mul_constraint>")
3367 (match_operand:SI 4 "immediate_operand" "i")
3368 (match_operand:SI 5 "immediate_operand" "i")]
3369 UNSPEC_VMLAL_LANE))]
3370 "TARGET_NEON"
3371 {
3372 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3373 return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3374 }
3375 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3376 )
3377
3378 (define_insn "neon_vqdmlal_lane<mode>"
3379 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3380 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3381 (match_operand:VMDI 2 "s_register_operand" "w")
3382 (match_operand:VMDI 3 "s_register_operand"
3383 "<scalar_mul_constraint>")
3384 (match_operand:SI 4 "immediate_operand" "i")
3385 (match_operand:SI 5 "immediate_operand" "i")]
3386 UNSPEC_VQDMLAL_LANE))]
3387 "TARGET_NEON"
3388 {
3389 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3390 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3391 }
3392 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3393 )
3394
3395 (define_insn "neon_vmls_lane<mode>"
3396 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3397 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3398 (match_operand:VMD 2 "s_register_operand" "w")
3399 (match_operand:VMD 3 "s_register_operand"
3400 "<scalar_mul_constraint>")
3401 (match_operand:SI 4 "immediate_operand" "i")
3402 (match_operand:SI 5 "immediate_operand" "i")]
3403 UNSPEC_VMLS_LANE))]
3404 "TARGET_NEON"
3405 {
3406 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3407 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3408 }
3409 [(set (attr "type")
3410 (if_then_else (match_test "<Is_float_mode>")
3411 (const_string "neon_fp_mla_s_scalar<q>")
3412 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3413 )
3414
3415 (define_insn "neon_vmls_lane<mode>"
3416 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3417 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3418 (match_operand:VMQ 2 "s_register_operand" "w")
3419 (match_operand:<V_HALF> 3 "s_register_operand"
3420 "<scalar_mul_constraint>")
3421 (match_operand:SI 4 "immediate_operand" "i")
3422 (match_operand:SI 5 "immediate_operand" "i")]
3423 UNSPEC_VMLS_LANE))]
3424 "TARGET_NEON"
3425 {
3426 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3427 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3428 }
3429 [(set (attr "type")
3430 (if_then_else (match_test "<Is_float_mode>")
3431 (const_string "neon_fp_mla_s_scalar<q>")
3432 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3433 )
3434
3435 (define_insn "neon_vmlsl_lane<mode>"
3436 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3437 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3438 (match_operand:VMDI 2 "s_register_operand" "w")
3439 (match_operand:VMDI 3 "s_register_operand"
3440 "<scalar_mul_constraint>")
3441 (match_operand:SI 4 "immediate_operand" "i")
3442 (match_operand:SI 5 "immediate_operand" "i")]
3443 UNSPEC_VMLSL_LANE))]
3444 "TARGET_NEON"
3445 {
3446 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3447 return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3448 }
3449 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3450 )
3451
3452 (define_insn "neon_vqdmlsl_lane<mode>"
3453 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3454 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3455 (match_operand:VMDI 2 "s_register_operand" "w")
3456 (match_operand:VMDI 3 "s_register_operand"
3457 "<scalar_mul_constraint>")
3458 (match_operand:SI 4 "immediate_operand" "i")
3459 (match_operand:SI 5 "immediate_operand" "i")]
3460 UNSPEC_VQDMLSL_LANE))]
3461 "TARGET_NEON"
3462 {
3463 neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
3464 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3465 }
3466 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3467 )
3468
3469 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3470 ; core register into a temp register, then use a scalar taken from that. This
3471 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3472 ; or extracted from another vector. The latter case it's currently better to
3473 ; use the "_lane" variant, and the former case can probably be implemented
3474 ; using vld1_lane, but that hasn't been done yet.
3475
3476 (define_expand "neon_vmul_n<mode>"
3477 [(match_operand:VMD 0 "s_register_operand" "")
3478 (match_operand:VMD 1 "s_register_operand" "")
3479 (match_operand:<V_elem> 2 "s_register_operand" "")
3480 (match_operand:SI 3 "immediate_operand" "")]
3481 "TARGET_NEON"
3482 {
3483 rtx tmp = gen_reg_rtx (<MODE>mode);
3484 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3485 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3486 const0_rtx, const0_rtx));
3487 DONE;
3488 })
3489
3490 (define_expand "neon_vmul_n<mode>"
3491 [(match_operand:VMQ 0 "s_register_operand" "")
3492 (match_operand:VMQ 1 "s_register_operand" "")
3493 (match_operand:<V_elem> 2 "s_register_operand" "")
3494 (match_operand:SI 3 "immediate_operand" "")]
3495 "TARGET_NEON"
3496 {
3497 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3498 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3499 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3500 const0_rtx, const0_rtx));
3501 DONE;
3502 })
3503
3504 (define_expand "neon_vmull_n<mode>"
3505 [(match_operand:<V_widen> 0 "s_register_operand" "")
3506 (match_operand:VMDI 1 "s_register_operand" "")
3507 (match_operand:<V_elem> 2 "s_register_operand" "")
3508 (match_operand:SI 3 "immediate_operand" "")]
3509 "TARGET_NEON"
3510 {
3511 rtx tmp = gen_reg_rtx (<MODE>mode);
3512 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3513 emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
3514 const0_rtx, operands[3]));
3515 DONE;
3516 })
3517
3518 (define_expand "neon_vqdmull_n<mode>"
3519 [(match_operand:<V_widen> 0 "s_register_operand" "")
3520 (match_operand:VMDI 1 "s_register_operand" "")
3521 (match_operand:<V_elem> 2 "s_register_operand" "")
3522 (match_operand:SI 3 "immediate_operand" "")]
3523 "TARGET_NEON"
3524 {
3525 rtx tmp = gen_reg_rtx (<MODE>mode);
3526 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3527 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3528 const0_rtx, const0_rtx));
3529 DONE;
3530 })
3531
3532 (define_expand "neon_vqdmulh_n<mode>"
3533 [(match_operand:VMDI 0 "s_register_operand" "")
3534 (match_operand:VMDI 1 "s_register_operand" "")
3535 (match_operand:<V_elem> 2 "s_register_operand" "")
3536 (match_operand:SI 3 "immediate_operand" "")]
3537 "TARGET_NEON"
3538 {
3539 rtx tmp = gen_reg_rtx (<MODE>mode);
3540 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3541 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3542 const0_rtx, operands[3]));
3543 DONE;
3544 })
3545
3546 (define_expand "neon_vqdmulh_n<mode>"
3547 [(match_operand:VMQI 0 "s_register_operand" "")
3548 (match_operand:VMQI 1 "s_register_operand" "")
3549 (match_operand:<V_elem> 2 "s_register_operand" "")
3550 (match_operand:SI 3 "immediate_operand" "")]
3551 "TARGET_NEON"
3552 {
3553 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3554 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3555 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3556 const0_rtx, operands[3]));
3557 DONE;
3558 })
3559
3560 (define_expand "neon_vmla_n<mode>"
3561 [(match_operand:VMD 0 "s_register_operand" "")
3562 (match_operand:VMD 1 "s_register_operand" "")
3563 (match_operand:VMD 2 "s_register_operand" "")
3564 (match_operand:<V_elem> 3 "s_register_operand" "")
3565 (match_operand:SI 4 "immediate_operand" "")]
3566 "TARGET_NEON"
3567 {
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3571 tmp, const0_rtx, operands[4]));
3572 DONE;
3573 })
3574
3575 (define_expand "neon_vmla_n<mode>"
3576 [(match_operand:VMQ 0 "s_register_operand" "")
3577 (match_operand:VMQ 1 "s_register_operand" "")
3578 (match_operand:VMQ 2 "s_register_operand" "")
3579 (match_operand:<V_elem> 3 "s_register_operand" "")
3580 (match_operand:SI 4 "immediate_operand" "")]
3581 "TARGET_NEON"
3582 {
3583 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3584 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3585 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3586 tmp, const0_rtx, operands[4]));
3587 DONE;
3588 })
3589
3590 (define_expand "neon_vmlal_n<mode>"
3591 [(match_operand:<V_widen> 0 "s_register_operand" "")
3592 (match_operand:<V_widen> 1 "s_register_operand" "")
3593 (match_operand:VMDI 2 "s_register_operand" "")
3594 (match_operand:<V_elem> 3 "s_register_operand" "")
3595 (match_operand:SI 4 "immediate_operand" "")]
3596 "TARGET_NEON"
3597 {
3598 rtx tmp = gen_reg_rtx (<MODE>mode);
3599 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3600 emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
3601 tmp, const0_rtx, operands[4]));
3602 DONE;
3603 })
3604
3605 (define_expand "neon_vqdmlal_n<mode>"
3606 [(match_operand:<V_widen> 0 "s_register_operand" "")
3607 (match_operand:<V_widen> 1 "s_register_operand" "")
3608 (match_operand:VMDI 2 "s_register_operand" "")
3609 (match_operand:<V_elem> 3 "s_register_operand" "")
3610 (match_operand:SI 4 "immediate_operand" "")]
3611 "TARGET_NEON"
3612 {
3613 rtx tmp = gen_reg_rtx (<MODE>mode);
3614 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3615 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3616 tmp, const0_rtx, operands[4]));
3617 DONE;
3618 })
3619
3620 (define_expand "neon_vmls_n<mode>"
3621 [(match_operand:VMD 0 "s_register_operand" "")
3622 (match_operand:VMD 1 "s_register_operand" "")
3623 (match_operand:VMD 2 "s_register_operand" "")
3624 (match_operand:<V_elem> 3 "s_register_operand" "")
3625 (match_operand:SI 4 "immediate_operand" "")]
3626 "TARGET_NEON"
3627 {
3628 rtx tmp = gen_reg_rtx (<MODE>mode);
3629 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3630 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3631 tmp, const0_rtx, operands[4]));
3632 DONE;
3633 })
3634
3635 (define_expand "neon_vmls_n<mode>"
3636 [(match_operand:VMQ 0 "s_register_operand" "")
3637 (match_operand:VMQ 1 "s_register_operand" "")
3638 (match_operand:VMQ 2 "s_register_operand" "")
3639 (match_operand:<V_elem> 3 "s_register_operand" "")
3640 (match_operand:SI 4 "immediate_operand" "")]
3641 "TARGET_NEON"
3642 {
3643 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3644 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3645 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3646 tmp, const0_rtx, operands[4]));
3647 DONE;
3648 })
3649
3650 (define_expand "neon_vmlsl_n<mode>"
3651 [(match_operand:<V_widen> 0 "s_register_operand" "")
3652 (match_operand:<V_widen> 1 "s_register_operand" "")
3653 (match_operand:VMDI 2 "s_register_operand" "")
3654 (match_operand:<V_elem> 3 "s_register_operand" "")
3655 (match_operand:SI 4 "immediate_operand" "")]
3656 "TARGET_NEON"
3657 {
3658 rtx tmp = gen_reg_rtx (<MODE>mode);
3659 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3660 emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
3661 tmp, const0_rtx, operands[4]));
3662 DONE;
3663 })
3664
3665 (define_expand "neon_vqdmlsl_n<mode>"
3666 [(match_operand:<V_widen> 0 "s_register_operand" "")
3667 (match_operand:<V_widen> 1 "s_register_operand" "")
3668 (match_operand:VMDI 2 "s_register_operand" "")
3669 (match_operand:<V_elem> 3 "s_register_operand" "")
3670 (match_operand:SI 4 "immediate_operand" "")]
3671 "TARGET_NEON"
3672 {
3673 rtx tmp = gen_reg_rtx (<MODE>mode);
3674 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3675 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3676 tmp, const0_rtx, operands[4]));
3677 DONE;
3678 })
3679
3680 (define_insn "neon_vext<mode>"
3681 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3682 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3683 (match_operand:VDQX 2 "s_register_operand" "w")
3684 (match_operand:SI 3 "immediate_operand" "i")]
3685 UNSPEC_VEXT))]
3686 "TARGET_NEON"
3687 {
3688 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3689 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3690 }
3691 [(set_attr "type" "neon_ext<q>")]
3692 )
3693
3694 (define_insn "neon_vrev64<mode>"
3695 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3696 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
3697 (match_operand:SI 2 "immediate_operand" "i")]
3698 UNSPEC_VREV64))]
3699 "TARGET_NEON"
3700 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3701 [(set_attr "type" "neon_rev<q>")]
3702 )
3703
3704 (define_insn "neon_vrev32<mode>"
3705 [(set (match_operand:VX 0 "s_register_operand" "=w")
3706 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
3707 (match_operand:SI 2 "immediate_operand" "i")]
3708 UNSPEC_VREV32))]
3709 "TARGET_NEON"
3710 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3711 [(set_attr "type" "neon_rev<q>")]
3712 )
3713
3714 (define_insn "neon_vrev16<mode>"
3715 [(set (match_operand:VE 0 "s_register_operand" "=w")
3716 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
3717 (match_operand:SI 2 "immediate_operand" "i")]
3718 UNSPEC_VREV16))]
3719 "TARGET_NEON"
3720 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3721 [(set_attr "type" "neon_rev<q>")]
3722 )
3723
3724 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3725 ; allocation. For an intrinsic of form:
3726 ; rD = vbsl_* (rS, rN, rM)
3727 ; We can use any of:
3728 ; vbsl rS, rN, rM (if D = S)
3729 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3730 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3731
3732 (define_insn "neon_vbsl<mode>_internal"
3733 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3734 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3735 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3736 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3737 UNSPEC_VBSL))]
3738 "TARGET_NEON"
3739 "@
3740 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3741 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3742 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3743 [(set_attr "type" "neon_bsl<q>")]
3744 )
3745
3746 (define_expand "neon_vbsl<mode>"
3747 [(set (match_operand:VDQX 0 "s_register_operand" "")
3748 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3749 (match_operand:VDQX 2 "s_register_operand" "")
3750 (match_operand:VDQX 3 "s_register_operand" "")]
3751 UNSPEC_VBSL))]
3752 "TARGET_NEON"
3753 {
3754 /* We can't alias operands together if they have different modes. */
3755 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3756 })
3757
3758 (define_insn "neon_vshl<mode>"
3759 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3760 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3761 (match_operand:VDQIX 2 "s_register_operand" "w")
3762 (match_operand:SI 3 "immediate_operand" "i")]
3763 UNSPEC_VSHL))]
3764 "TARGET_NEON"
3765 "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3766 [(set_attr "type" "neon_shift_imm<q>")]
3767 )
3768
3769 (define_insn "neon_vqshl<mode>"
3770 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3771 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3772 (match_operand:VDQIX 2 "s_register_operand" "w")
3773 (match_operand:SI 3 "immediate_operand" "i")]
3774 UNSPEC_VQSHL))]
3775 "TARGET_NEON"
3776 "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3777 [(set_attr "type" "neon_sat_shift_imm<q>")]
3778 )
3779
3780 (define_insn "neon_vshr_n<mode>"
3781 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3782 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3783 (match_operand:SI 2 "immediate_operand" "i")
3784 (match_operand:SI 3 "immediate_operand" "i")]
3785 UNSPEC_VSHR_N))]
3786 "TARGET_NEON"
3787 {
3788 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3789 return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3790 }
3791 [(set_attr "type" "neon_shift_imm<q>")]
3792 )
3793
3794 (define_insn "neon_vshrn_n<mode>"
3795 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3796 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3797 (match_operand:SI 2 "immediate_operand" "i")
3798 (match_operand:SI 3 "immediate_operand" "i")]
3799 UNSPEC_VSHRN_N))]
3800 "TARGET_NEON"
3801 {
3802 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3803 return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
3804 }
3805 [(set_attr "type" "neon_shift_imm_narrow_q")]
3806 )
3807
3808 (define_insn "neon_vqshrn_n<mode>"
3809 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3810 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3811 (match_operand:SI 2 "immediate_operand" "i")
3812 (match_operand:SI 3 "immediate_operand" "i")]
3813 UNSPEC_VQSHRN_N))]
3814 "TARGET_NEON"
3815 {
3816 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3817 return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3818 }
3819 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3820 )
3821
3822 (define_insn "neon_vqshrun_n<mode>"
3823 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3824 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3825 (match_operand:SI 2 "immediate_operand" "i")
3826 (match_operand:SI 3 "immediate_operand" "i")]
3827 UNSPEC_VQSHRUN_N))]
3828 "TARGET_NEON"
3829 {
3830 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3831 return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
3832 }
3833 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3834 )
3835
3836 (define_insn "neon_vshl_n<mode>"
3837 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3838 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3839 (match_operand:SI 2 "immediate_operand" "i")
3840 (match_operand:SI 3 "immediate_operand" "i")]
3841 UNSPEC_VSHL_N))]
3842 "TARGET_NEON"
3843 {
3844 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3845 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3846 }
3847 [(set_attr "type" "neon_shift_imm<q>")]
3848 )
3849
3850 (define_insn "neon_vqshl_n<mode>"
3851 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3852 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3853 (match_operand:SI 2 "immediate_operand" "i")
3854 (match_operand:SI 3 "immediate_operand" "i")]
3855 UNSPEC_VQSHL_N))]
3856 "TARGET_NEON"
3857 {
3858 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3859 return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3860 }
3861 [(set_attr "type" "neon_sat_shift_imm<q>")]
3862 )
3863
3864 (define_insn "neon_vqshlu_n<mode>"
3865 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3866 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3867 (match_operand:SI 2 "immediate_operand" "i")
3868 (match_operand:SI 3 "immediate_operand" "i")]
3869 UNSPEC_VQSHLU_N))]
3870 "TARGET_NEON"
3871 {
3872 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3873 return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3874 }
3875 [(set_attr "type" "neon_sat_shift_imm<q>")]
3876 )
3877
3878 (define_insn "neon_vshll_n<mode>"
3879 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3880 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3881 (match_operand:SI 2 "immediate_operand" "i")
3882 (match_operand:SI 3 "immediate_operand" "i")]
3883 UNSPEC_VSHLL_N))]
3884 "TARGET_NEON"
3885 {
3886 /* The boundaries are: 0 < imm <= size. */
3887 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3888 return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
3889 }
3890 [(set_attr "type" "neon_shift_imm_long")]
3891 )
3892
3893 (define_insn "neon_vsra_n<mode>"
3894 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3895 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3896 (match_operand:VDQIX 2 "s_register_operand" "w")
3897 (match_operand:SI 3 "immediate_operand" "i")
3898 (match_operand:SI 4 "immediate_operand" "i")]
3899 UNSPEC_VSRA_N))]
3900 "TARGET_NEON"
3901 {
3902 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3903 return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3904 }
3905 [(set_attr "type" "neon_shift_acc<q>")]
3906 )
3907
3908 (define_insn "neon_vsri_n<mode>"
3909 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3910 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3911 (match_operand:VDQIX 2 "s_register_operand" "w")
3912 (match_operand:SI 3 "immediate_operand" "i")]
3913 UNSPEC_VSRI))]
3914 "TARGET_NEON"
3915 {
3916 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3917 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3918 }
3919 [(set_attr "type" "neon_shift_reg<q>")]
3920 )
3921
3922 (define_insn "neon_vsli_n<mode>"
3923 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3924 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3925 (match_operand:VDQIX 2 "s_register_operand" "w")
3926 (match_operand:SI 3 "immediate_operand" "i")]
3927 UNSPEC_VSLI))]
3928 "TARGET_NEON"
3929 {
3930 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3931 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3932 }
3933 [(set_attr "type" "neon_shift_reg<q>")]
3934 )
3935
3936 (define_insn "neon_vtbl1v8qi"
3937 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3938 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3939 (match_operand:V8QI 2 "s_register_operand" "w")]
3940 UNSPEC_VTBL))]
3941 "TARGET_NEON"
3942 "vtbl.8\t%P0, {%P1}, %P2"
3943 [(set_attr "type" "neon_tbl1")]
3944 )
3945
3946 (define_insn "neon_vtbl2v8qi"
3947 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3948 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3949 (match_operand:V8QI 2 "s_register_operand" "w")]
3950 UNSPEC_VTBL))]
3951 "TARGET_NEON"
3952 {
3953 rtx ops[4];
3954 int tabbase = REGNO (operands[1]);
3955
3956 ops[0] = operands[0];
3957 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3958 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3959 ops[3] = operands[2];
3960 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3961
3962 return "";
3963 }
3964 [(set_attr "type" "neon_tbl2")]
3965 )
3966
3967 (define_insn "neon_vtbl3v8qi"
3968 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3969 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3970 (match_operand:V8QI 2 "s_register_operand" "w")]
3971 UNSPEC_VTBL))]
3972 "TARGET_NEON"
3973 {
3974 rtx ops[5];
3975 int tabbase = REGNO (operands[1]);
3976
3977 ops[0] = operands[0];
3978 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3979 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3980 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3981 ops[4] = operands[2];
3982 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3983
3984 return "";
3985 }
3986 [(set_attr "type" "neon_tbl3")]
3987 )
3988
3989 (define_insn "neon_vtbl4v8qi"
3990 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3991 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3992 (match_operand:V8QI 2 "s_register_operand" "w")]
3993 UNSPEC_VTBL))]
3994 "TARGET_NEON"
3995 {
3996 rtx ops[6];
3997 int tabbase = REGNO (operands[1]);
3998
3999 ops[0] = operands[0];
4000 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4001 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4002 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4003 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4004 ops[5] = operands[2];
4005 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4006
4007 return "";
4008 }
4009 [(set_attr "type" "neon_tbl4")]
4010 )
4011
4012 ;; These three are used by the vec_perm infrastructure for V16QImode.
4013 (define_insn_and_split "neon_vtbl1v16qi"
4014 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4015 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4016 (match_operand:V16QI 2 "s_register_operand" "w")]
4017 UNSPEC_VTBL))]
4018 "TARGET_NEON"
4019 "#"
4020 "&& reload_completed"
4021 [(const_int 0)]
4022 {
4023 rtx op0, op1, op2, part0, part2;
4024 unsigned ofs;
4025
4026 op0 = operands[0];
4027 op1 = gen_lowpart (TImode, operands[1]);
4028 op2 = operands[2];
4029
4030 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4031 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4032 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4033 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4034
4035 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4036 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4037 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4038 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4039 DONE;
4040 }
4041 [(set_attr "type" "multiple")]
4042 )
4043
4044 (define_insn_and_split "neon_vtbl2v16qi"
4045 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4046 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4047 (match_operand:V16QI 2 "s_register_operand" "w")]
4048 UNSPEC_VTBL))]
4049 "TARGET_NEON"
4050 "#"
4051 "&& reload_completed"
4052 [(const_int 0)]
4053 {
4054 rtx op0, op1, op2, part0, part2;
4055 unsigned ofs;
4056
4057 op0 = operands[0];
4058 op1 = operands[1];
4059 op2 = operands[2];
4060
4061 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4062 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4063 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4064 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4065
4066 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4067 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4068 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4069 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4070 DONE;
4071 }
4072 [(set_attr "type" "multiple")]
4073 )
4074
4075 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4076 ;; handle quad-word input modes, producing octa-word output modes. But
4077 ;; that requires us to add support for octa-word vector modes in moves.
4078 ;; That seems overkill for this one use in vec_perm.
4079 (define_insn_and_split "neon_vcombinev16qi"
4080 [(set (match_operand:OI 0 "s_register_operand" "=w")
4081 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4082 (match_operand:V16QI 2 "s_register_operand" "w")]
4083 UNSPEC_VCONCAT))]
4084 "TARGET_NEON"
4085 "#"
4086 "&& reload_completed"
4087 [(const_int 0)]
4088 {
4089 neon_split_vcombine (operands);
4090 DONE;
4091 }
4092 [(set_attr "type" "multiple")]
4093 )
4094
4095 (define_insn "neon_vtbx1v8qi"
4096 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4097 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4098 (match_operand:V8QI 2 "s_register_operand" "w")
4099 (match_operand:V8QI 3 "s_register_operand" "w")]
4100 UNSPEC_VTBX))]
4101 "TARGET_NEON"
4102 "vtbx.8\t%P0, {%P2}, %P3"
4103 [(set_attr "type" "neon_tbl1")]
4104 )
4105
4106 (define_insn "neon_vtbx2v8qi"
4107 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4108 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4109 (match_operand:TI 2 "s_register_operand" "w")
4110 (match_operand:V8QI 3 "s_register_operand" "w")]
4111 UNSPEC_VTBX))]
4112 "TARGET_NEON"
4113 {
4114 rtx ops[4];
4115 int tabbase = REGNO (operands[2]);
4116
4117 ops[0] = operands[0];
4118 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4119 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4120 ops[3] = operands[3];
4121 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4122
4123 return "";
4124 }
4125 [(set_attr "type" "neon_tbl2")]
4126 )
4127
4128 (define_insn "neon_vtbx3v8qi"
4129 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4130 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4131 (match_operand:EI 2 "s_register_operand" "w")
4132 (match_operand:V8QI 3 "s_register_operand" "w")]
4133 UNSPEC_VTBX))]
4134 "TARGET_NEON"
4135 {
4136 rtx ops[5];
4137 int tabbase = REGNO (operands[2]);
4138
4139 ops[0] = operands[0];
4140 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4141 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4142 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4143 ops[4] = operands[3];
4144 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4145
4146 return "";
4147 }
4148 [(set_attr "type" "neon_tbl3")]
4149 )
4150
4151 (define_insn "neon_vtbx4v8qi"
4152 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4153 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4154 (match_operand:OI 2 "s_register_operand" "w")
4155 (match_operand:V8QI 3 "s_register_operand" "w")]
4156 UNSPEC_VTBX))]
4157 "TARGET_NEON"
4158 {
4159 rtx ops[6];
4160 int tabbase = REGNO (operands[2]);
4161
4162 ops[0] = operands[0];
4163 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4164 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4165 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4166 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4167 ops[5] = operands[3];
4168 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4169
4170 return "";
4171 }
4172 [(set_attr "type" "neon_tbl4")]
4173 )
4174
4175 (define_expand "neon_vtrn<mode>_internal"
4176 [(parallel
4177 [(set (match_operand:VDQW 0 "s_register_operand" "")
4178 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4179 (match_operand:VDQW 2 "s_register_operand" "")]
4180 UNSPEC_VTRN1))
4181 (set (match_operand:VDQW 3 "s_register_operand" "")
4182 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4183 "TARGET_NEON"
4184 ""
4185 )
4186
4187 ;; Note: Different operand numbering to handle tied registers correctly.
4188 (define_insn "*neon_vtrn<mode>_insn"
4189 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4190 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4191 (match_operand:VDQW 3 "s_register_operand" "2")]
4192 UNSPEC_VTRN1))
4193 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4194 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4195 UNSPEC_VTRN2))]
4196 "TARGET_NEON"
4197 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4198 [(set_attr "type" "neon_permute<q>")]
4199 )
4200
4201 (define_expand "neon_vzip<mode>_internal"
4202 [(parallel
4203 [(set (match_operand:VDQW 0 "s_register_operand" "")
4204 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4205 (match_operand:VDQW 2 "s_register_operand" "")]
4206 UNSPEC_VZIP1))
4207 (set (match_operand:VDQW 3 "s_register_operand" "")
4208 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4209 "TARGET_NEON"
4210 ""
4211 )
4212
4213 ;; Note: Different operand numbering to handle tied registers correctly.
4214 (define_insn "*neon_vzip<mode>_insn"
4215 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4216 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4217 (match_operand:VDQW 3 "s_register_operand" "2")]
4218 UNSPEC_VZIP1))
4219 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4220 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4221 UNSPEC_VZIP2))]
4222 "TARGET_NEON"
4223 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4224 [(set_attr "type" "neon_zip<q>")]
4225 )
4226
4227 (define_expand "neon_vuzp<mode>_internal"
4228 [(parallel
4229 [(set (match_operand:VDQW 0 "s_register_operand" "")
4230 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4231 (match_operand:VDQW 2 "s_register_operand" "")]
4232 UNSPEC_VUZP1))
4233 (set (match_operand:VDQW 3 "s_register_operand" "")
4234 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4235 "TARGET_NEON"
4236 ""
4237 )
4238
4239 ;; Note: Different operand numbering to handle tied registers correctly.
4240 (define_insn "*neon_vuzp<mode>_insn"
4241 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4242 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4243 (match_operand:VDQW 3 "s_register_operand" "2")]
4244 UNSPEC_VUZP1))
4245 (set (match_operand:VDQW 2 "s_register_operand" "=w")
4246 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4247 UNSPEC_VUZP2))]
4248 "TARGET_NEON"
4249 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4250 [(set_attr "type" "neon_zip<q>")]
4251 )
4252
4253 (define_expand "neon_vreinterpretv8qi<mode>"
4254 [(match_operand:V8QI 0 "s_register_operand" "")
4255 (match_operand:VDX 1 "s_register_operand" "")]
4256 "TARGET_NEON"
4257 {
4258 neon_reinterpret (operands[0], operands[1]);
4259 DONE;
4260 })
4261
4262 (define_expand "neon_vreinterpretv4hi<mode>"
4263 [(match_operand:V4HI 0 "s_register_operand" "")
4264 (match_operand:VDX 1 "s_register_operand" "")]
4265 "TARGET_NEON"
4266 {
4267 neon_reinterpret (operands[0], operands[1]);
4268 DONE;
4269 })
4270
4271 (define_expand "neon_vreinterpretv2si<mode>"
4272 [(match_operand:V2SI 0 "s_register_operand" "")
4273 (match_operand:VDX 1 "s_register_operand" "")]
4274 "TARGET_NEON"
4275 {
4276 neon_reinterpret (operands[0], operands[1]);
4277 DONE;
4278 })
4279
4280 (define_expand "neon_vreinterpretv2sf<mode>"
4281 [(match_operand:V2SF 0 "s_register_operand" "")
4282 (match_operand:VDX 1 "s_register_operand" "")]
4283 "TARGET_NEON"
4284 {
4285 neon_reinterpret (operands[0], operands[1]);
4286 DONE;
4287 })
4288
4289 (define_expand "neon_vreinterpretdi<mode>"
4290 [(match_operand:DI 0 "s_register_operand" "")
4291 (match_operand:VDX 1 "s_register_operand" "")]
4292 "TARGET_NEON"
4293 {
4294 neon_reinterpret (operands[0], operands[1]);
4295 DONE;
4296 })
4297
4298 (define_expand "neon_vreinterpretti<mode>"
4299 [(match_operand:TI 0 "s_register_operand" "")
4300 (match_operand:VQXMOV 1 "s_register_operand" "")]
4301 "TARGET_NEON"
4302 {
4303 neon_reinterpret (operands[0], operands[1]);
4304 DONE;
4305 })
4306
4307
4308 (define_expand "neon_vreinterpretv16qi<mode>"
4309 [(match_operand:V16QI 0 "s_register_operand" "")
4310 (match_operand:VQXMOV 1 "s_register_operand" "")]
4311 "TARGET_NEON"
4312 {
4313 neon_reinterpret (operands[0], operands[1]);
4314 DONE;
4315 })
4316
4317 (define_expand "neon_vreinterpretv8hi<mode>"
4318 [(match_operand:V8HI 0 "s_register_operand" "")
4319 (match_operand:VQXMOV 1 "s_register_operand" "")]
4320 "TARGET_NEON"
4321 {
4322 neon_reinterpret (operands[0], operands[1]);
4323 DONE;
4324 })
4325
4326 (define_expand "neon_vreinterpretv4si<mode>"
4327 [(match_operand:V4SI 0 "s_register_operand" "")
4328 (match_operand:VQXMOV 1 "s_register_operand" "")]
4329 "TARGET_NEON"
4330 {
4331 neon_reinterpret (operands[0], operands[1]);
4332 DONE;
4333 })
4334
4335 (define_expand "neon_vreinterpretv4sf<mode>"
4336 [(match_operand:V4SF 0 "s_register_operand" "")
4337 (match_operand:VQXMOV 1 "s_register_operand" "")]
4338 "TARGET_NEON"
4339 {
4340 neon_reinterpret (operands[0], operands[1]);
4341 DONE;
4342 })
4343
4344 (define_expand "neon_vreinterpretv2di<mode>"
4345 [(match_operand:V2DI 0 "s_register_operand" "")
4346 (match_operand:VQXMOV 1 "s_register_operand" "")]
4347 "TARGET_NEON"
4348 {
4349 neon_reinterpret (operands[0], operands[1]);
4350 DONE;
4351 })
4352
4353 (define_expand "vec_load_lanes<mode><mode>"
4354 [(set (match_operand:VDQX 0 "s_register_operand")
4355 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4356 UNSPEC_VLD1))]
4357 "TARGET_NEON")
4358
4359 (define_insn "neon_vld1<mode>"
4360 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4361 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4362 UNSPEC_VLD1))]
4363 "TARGET_NEON"
4364 "vld1.<V_sz_elem>\t%h0, %A1"
4365 [(set_attr "type" "neon_load1_1reg<q>")]
4366 )
4367
4368 (define_insn "neon_vld1_lane<mode>"
4369 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4370 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4371 (match_operand:VDX 2 "s_register_operand" "0")
4372 (match_operand:SI 3 "immediate_operand" "i")]
4373 UNSPEC_VLD1_LANE))]
4374 "TARGET_NEON"
4375 {
4376 HOST_WIDE_INT lane = INTVAL (operands[3]);
4377 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4378 if (lane < 0 || lane >= max)
4379 error ("lane out of range");
4380 if (max == 1)
4381 return "vld1.<V_sz_elem>\t%P0, %A1";
4382 else
4383 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4384 }
4385 [(set_attr "type" "neon_load1_one_lane<q>")]
4386 )
4387
4388 (define_insn "neon_vld1_lane<mode>"
4389 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4390 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4391 (match_operand:VQX 2 "s_register_operand" "0")
4392 (match_operand:SI 3 "immediate_operand" "i")]
4393 UNSPEC_VLD1_LANE))]
4394 "TARGET_NEON"
4395 {
4396 HOST_WIDE_INT lane = INTVAL (operands[3]);
4397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4398 int regno = REGNO (operands[0]);
4399 if (lane < 0 || lane >= max)
4400 error ("lane out of range");
4401 else if (lane >= max / 2)
4402 {
4403 lane -= max / 2;
4404 regno += 2;
4405 operands[3] = GEN_INT (lane);
4406 }
4407 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4408 if (max == 2)
4409 return "vld1.<V_sz_elem>\t%P0, %A1";
4410 else
4411 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4412 }
4413 [(set_attr "type" "neon_load1_one_lane<q>")]
4414 )
4415
4416 (define_insn "neon_vld1_dup<mode>"
4417 [(set (match_operand:VD 0 "s_register_operand" "=w")
4418 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4419 "TARGET_NEON"
4420 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4421 [(set_attr "type" "neon_load1_all_lanes<q>")]
4422 )
4423
4424 ;; Special case for DImode. Treat it exactly like a simple load.
4425 (define_expand "neon_vld1_dupdi"
4426 [(set (match_operand:DI 0 "s_register_operand" "")
4427 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4428 UNSPEC_VLD1))]
4429 "TARGET_NEON"
4430 ""
4431 )
4432
4433 (define_insn "neon_vld1_dup<mode>"
4434 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4435 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4436 "TARGET_NEON"
4437 {
4438 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4439 }
4440 [(set_attr "type" "neon_load1_all_lanes<q>")]
4441 )
4442
4443 (define_insn_and_split "neon_vld1_dupv2di"
4444 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4445 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4446 "TARGET_NEON"
4447 "#"
4448 "&& reload_completed"
4449 [(const_int 0)]
4450 {
4451 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4452 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4453 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4454 DONE;
4455 }
4456 [(set_attr "length" "8")
4457 (set_attr "type" "neon_load1_all_lanes_q")]
4458 )
4459
4460 (define_expand "vec_store_lanes<mode><mode>"
4461 [(set (match_operand:VDQX 0 "neon_struct_operand")
4462 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4463 UNSPEC_VST1))]
4464 "TARGET_NEON")
4465
4466 (define_insn "neon_vst1<mode>"
4467 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4468 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4469 UNSPEC_VST1))]
4470 "TARGET_NEON"
4471 "vst1.<V_sz_elem>\t%h1, %A0"
4472 [(set_attr "type" "neon_store1_1reg<q>")])
4473
4474 (define_insn "neon_vst1_lane<mode>"
4475 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4476 (unspec:<V_elem>
4477 [(match_operand:VDX 1 "s_register_operand" "w")
4478 (match_operand:SI 2 "immediate_operand" "i")]
4479 UNSPEC_VST1_LANE))]
4480 "TARGET_NEON"
4481 {
4482 HOST_WIDE_INT lane = INTVAL (operands[2]);
4483 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4484 if (lane < 0 || lane >= max)
4485 error ("lane out of range");
4486 if (max == 1)
4487 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4488 else
4489 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4490 }
4491 [(set_attr "type" "neon_store1_one_lane<q>")]
4492 )
4493
4494 (define_insn "neon_vst1_lane<mode>"
4495 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4496 (unspec:<V_elem>
4497 [(match_operand:VQX 1 "s_register_operand" "w")
4498 (match_operand:SI 2 "immediate_operand" "i")]
4499 UNSPEC_VST1_LANE))]
4500 "TARGET_NEON"
4501 {
4502 HOST_WIDE_INT lane = INTVAL (operands[2]);
4503 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4504 int regno = REGNO (operands[1]);
4505 if (lane < 0 || lane >= max)
4506 error ("lane out of range");
4507 else if (lane >= max / 2)
4508 {
4509 lane -= max / 2;
4510 regno += 2;
4511 operands[2] = GEN_INT (lane);
4512 }
4513 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4514 if (max == 2)
4515 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4516 else
4517 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4518 }
4519 [(set_attr "type" "neon_store1_one_lane<q>")]
4520 )
4521
4522 (define_expand "vec_load_lanesti<mode>"
4523 [(set (match_operand:TI 0 "s_register_operand")
4524 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4525 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4526 UNSPEC_VLD2))]
4527 "TARGET_NEON")
4528
4529 (define_insn "neon_vld2<mode>"
4530 [(set (match_operand:TI 0 "s_register_operand" "=w")
4531 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4532 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4533 UNSPEC_VLD2))]
4534 "TARGET_NEON"
4535 {
4536 if (<V_sz_elem> == 64)
4537 return "vld1.64\t%h0, %A1";
4538 else
4539 return "vld2.<V_sz_elem>\t%h0, %A1";
4540 }
4541 [(set (attr "type")
4542 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4543 (const_string "neon_load1_2reg<q>")
4544 (const_string "neon_load2_2reg<q>")))]
4545 )
4546
4547 (define_expand "vec_load_lanesoi<mode>"
4548 [(set (match_operand:OI 0 "s_register_operand")
4549 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4550 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4551 UNSPEC_VLD2))]
4552 "TARGET_NEON")
4553
4554 (define_insn "neon_vld2<mode>"
4555 [(set (match_operand:OI 0 "s_register_operand" "=w")
4556 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4557 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4558 UNSPEC_VLD2))]
4559 "TARGET_NEON"
4560 "vld2.<V_sz_elem>\t%h0, %A1"
4561 [(set_attr "type" "neon_load2_2reg_q")])
4562
4563 (define_insn "neon_vld2_lane<mode>"
4564 [(set (match_operand:TI 0 "s_register_operand" "=w")
4565 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4566 (match_operand:TI 2 "s_register_operand" "0")
4567 (match_operand:SI 3 "immediate_operand" "i")
4568 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 UNSPEC_VLD2_LANE))]
4570 "TARGET_NEON"
4571 {
4572 HOST_WIDE_INT lane = INTVAL (operands[3]);
4573 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4574 int regno = REGNO (operands[0]);
4575 rtx ops[4];
4576 if (lane < 0 || lane >= max)
4577 error ("lane out of range");
4578 ops[0] = gen_rtx_REG (DImode, regno);
4579 ops[1] = gen_rtx_REG (DImode, regno + 2);
4580 ops[2] = operands[1];
4581 ops[3] = operands[3];
4582 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4583 return "";
4584 }
4585 [(set_attr "type" "neon_load2_one_lane<q>")]
4586 )
4587
4588 (define_insn "neon_vld2_lane<mode>"
4589 [(set (match_operand:OI 0 "s_register_operand" "=w")
4590 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4591 (match_operand:OI 2 "s_register_operand" "0")
4592 (match_operand:SI 3 "immediate_operand" "i")
4593 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4594 UNSPEC_VLD2_LANE))]
4595 "TARGET_NEON"
4596 {
4597 HOST_WIDE_INT lane = INTVAL (operands[3]);
4598 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4599 int regno = REGNO (operands[0]);
4600 rtx ops[4];
4601 if (lane < 0 || lane >= max)
4602 error ("lane out of range");
4603 else if (lane >= max / 2)
4604 {
4605 lane -= max / 2;
4606 regno += 2;
4607 }
4608 ops[0] = gen_rtx_REG (DImode, regno);
4609 ops[1] = gen_rtx_REG (DImode, regno + 4);
4610 ops[2] = operands[1];
4611 ops[3] = GEN_INT (lane);
4612 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4613 return "";
4614 }
4615 [(set_attr "type" "neon_load2_one_lane<q>")]
4616 )
4617
4618 (define_insn "neon_vld2_dup<mode>"
4619 [(set (match_operand:TI 0 "s_register_operand" "=w")
4620 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4621 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4622 UNSPEC_VLD2_DUP))]
4623 "TARGET_NEON"
4624 {
4625 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4626 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4627 else
4628 return "vld1.<V_sz_elem>\t%h0, %A1";
4629 }
4630 [(set (attr "type")
4631 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4632 (const_string "neon_load2_all_lanes<q>")
4633 (const_string "neon_load1_1reg<q>")))]
4634 )
4635
4636 (define_expand "vec_store_lanesti<mode>"
4637 [(set (match_operand:TI 0 "neon_struct_operand")
4638 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4639 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4640 UNSPEC_VST2))]
4641 "TARGET_NEON")
4642
4643 (define_insn "neon_vst2<mode>"
4644 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4645 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4646 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4647 UNSPEC_VST2))]
4648 "TARGET_NEON"
4649 {
4650 if (<V_sz_elem> == 64)
4651 return "vst1.64\t%h1, %A0";
4652 else
4653 return "vst2.<V_sz_elem>\t%h1, %A0";
4654 }
4655 [(set (attr "type")
4656 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4657 (const_string "neon_store1_2reg<q>")
4658 (const_string "neon_store2_one_lane<q>")))]
4659 )
4660
4661 (define_expand "vec_store_lanesoi<mode>"
4662 [(set (match_operand:OI 0 "neon_struct_operand")
4663 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4664 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4665 UNSPEC_VST2))]
4666 "TARGET_NEON")
4667
4668 (define_insn "neon_vst2<mode>"
4669 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4670 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4671 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4672 UNSPEC_VST2))]
4673 "TARGET_NEON"
4674 "vst2.<V_sz_elem>\t%h1, %A0"
4675 [(set_attr "type" "neon_store2_4reg<q>")]
4676 )
4677
4678 (define_insn "neon_vst2_lane<mode>"
4679 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4680 (unspec:<V_two_elem>
4681 [(match_operand:TI 1 "s_register_operand" "w")
4682 (match_operand:SI 2 "immediate_operand" "i")
4683 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4684 UNSPEC_VST2_LANE))]
4685 "TARGET_NEON"
4686 {
4687 HOST_WIDE_INT lane = INTVAL (operands[2]);
4688 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4689 int regno = REGNO (operands[1]);
4690 rtx ops[4];
4691 if (lane < 0 || lane >= max)
4692 error ("lane out of range");
4693 ops[0] = operands[0];
4694 ops[1] = gen_rtx_REG (DImode, regno);
4695 ops[2] = gen_rtx_REG (DImode, regno + 2);
4696 ops[3] = operands[2];
4697 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4698 return "";
4699 }
4700 [(set_attr "type" "neon_store2_one_lane<q>")]
4701 )
4702
4703 (define_insn "neon_vst2_lane<mode>"
4704 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4705 (unspec:<V_two_elem>
4706 [(match_operand:OI 1 "s_register_operand" "w")
4707 (match_operand:SI 2 "immediate_operand" "i")
4708 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4709 UNSPEC_VST2_LANE))]
4710 "TARGET_NEON"
4711 {
4712 HOST_WIDE_INT lane = INTVAL (operands[2]);
4713 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4714 int regno = REGNO (operands[1]);
4715 rtx ops[4];
4716 if (lane < 0 || lane >= max)
4717 error ("lane out of range");
4718 else if (lane >= max / 2)
4719 {
4720 lane -= max / 2;
4721 regno += 2;
4722 }
4723 ops[0] = operands[0];
4724 ops[1] = gen_rtx_REG (DImode, regno);
4725 ops[2] = gen_rtx_REG (DImode, regno + 4);
4726 ops[3] = GEN_INT (lane);
4727 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4728 return "";
4729 }
4730 [(set_attr "type" "neon_store2_one_lane<q>")]
4731 )
4732
4733 (define_expand "vec_load_lanesei<mode>"
4734 [(set (match_operand:EI 0 "s_register_operand")
4735 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4736 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4737 UNSPEC_VLD3))]
4738 "TARGET_NEON")
4739
4740 (define_insn "neon_vld3<mode>"
4741 [(set (match_operand:EI 0 "s_register_operand" "=w")
4742 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4743 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4744 UNSPEC_VLD3))]
4745 "TARGET_NEON"
4746 {
4747 if (<V_sz_elem> == 64)
4748 return "vld1.64\t%h0, %A1";
4749 else
4750 return "vld3.<V_sz_elem>\t%h0, %A1";
4751 }
4752 [(set (attr "type")
4753 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4754 (const_string "neon_load1_3reg<q>")
4755 (const_string "neon_load3_3reg<q>")))]
4756 )
4757
4758 (define_expand "vec_load_lanesci<mode>"
4759 [(match_operand:CI 0 "s_register_operand")
4760 (match_operand:CI 1 "neon_struct_operand")
4761 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4762 "TARGET_NEON"
4763 {
4764 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4765 DONE;
4766 })
4767
4768 (define_expand "neon_vld3<mode>"
4769 [(match_operand:CI 0 "s_register_operand")
4770 (match_operand:CI 1 "neon_struct_operand")
4771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4772 "TARGET_NEON"
4773 {
4774 rtx mem;
4775
4776 mem = adjust_address (operands[1], EImode, 0);
4777 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4778 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4779 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4780 DONE;
4781 })
4782
4783 (define_insn "neon_vld3qa<mode>"
4784 [(set (match_operand:CI 0 "s_register_operand" "=w")
4785 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4786 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4787 UNSPEC_VLD3A))]
4788 "TARGET_NEON"
4789 {
4790 int regno = REGNO (operands[0]);
4791 rtx ops[4];
4792 ops[0] = gen_rtx_REG (DImode, regno);
4793 ops[1] = gen_rtx_REG (DImode, regno + 4);
4794 ops[2] = gen_rtx_REG (DImode, regno + 8);
4795 ops[3] = operands[1];
4796 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4797 return "";
4798 }
4799 [(set_attr "type" "neon_load3_3reg<q>")]
4800 )
4801
4802 (define_insn "neon_vld3qb<mode>"
4803 [(set (match_operand:CI 0 "s_register_operand" "=w")
4804 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4805 (match_operand:CI 2 "s_register_operand" "0")
4806 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4807 UNSPEC_VLD3B))]
4808 "TARGET_NEON"
4809 {
4810 int regno = REGNO (operands[0]);
4811 rtx ops[4];
4812 ops[0] = gen_rtx_REG (DImode, regno + 2);
4813 ops[1] = gen_rtx_REG (DImode, regno + 6);
4814 ops[2] = gen_rtx_REG (DImode, regno + 10);
4815 ops[3] = operands[1];
4816 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4817 return "";
4818 }
4819 [(set_attr "type" "neon_load3_3reg<q>")]
4820 )
4821
4822 (define_insn "neon_vld3_lane<mode>"
4823 [(set (match_operand:EI 0 "s_register_operand" "=w")
4824 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4825 (match_operand:EI 2 "s_register_operand" "0")
4826 (match_operand:SI 3 "immediate_operand" "i")
4827 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4828 UNSPEC_VLD3_LANE))]
4829 "TARGET_NEON"
4830 {
4831 HOST_WIDE_INT lane = INTVAL (operands[3]);
4832 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4833 int regno = REGNO (operands[0]);
4834 rtx ops[5];
4835 if (lane < 0 || lane >= max)
4836 error ("lane out of range");
4837 ops[0] = gen_rtx_REG (DImode, regno);
4838 ops[1] = gen_rtx_REG (DImode, regno + 2);
4839 ops[2] = gen_rtx_REG (DImode, regno + 4);
4840 ops[3] = operands[1];
4841 ops[4] = operands[3];
4842 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4843 ops);
4844 return "";
4845 }
4846 [(set_attr "type" "neon_load3_one_lane<q>")]
4847 )
4848
4849 (define_insn "neon_vld3_lane<mode>"
4850 [(set (match_operand:CI 0 "s_register_operand" "=w")
4851 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4852 (match_operand:CI 2 "s_register_operand" "0")
4853 (match_operand:SI 3 "immediate_operand" "i")
4854 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4855 UNSPEC_VLD3_LANE))]
4856 "TARGET_NEON"
4857 {
4858 HOST_WIDE_INT lane = INTVAL (operands[3]);
4859 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4860 int regno = REGNO (operands[0]);
4861 rtx ops[5];
4862 if (lane < 0 || lane >= max)
4863 error ("lane out of range");
4864 else if (lane >= max / 2)
4865 {
4866 lane -= max / 2;
4867 regno += 2;
4868 }
4869 ops[0] = gen_rtx_REG (DImode, regno);
4870 ops[1] = gen_rtx_REG (DImode, regno + 4);
4871 ops[2] = gen_rtx_REG (DImode, regno + 8);
4872 ops[3] = operands[1];
4873 ops[4] = GEN_INT (lane);
4874 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4875 ops);
4876 return "";
4877 }
4878 [(set_attr "type" "neon_load3_one_lane<q>")]
4879 )
4880
4881 (define_insn "neon_vld3_dup<mode>"
4882 [(set (match_operand:EI 0 "s_register_operand" "=w")
4883 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4884 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4885 UNSPEC_VLD3_DUP))]
4886 "TARGET_NEON"
4887 {
4888 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4889 {
4890 int regno = REGNO (operands[0]);
4891 rtx ops[4];
4892 ops[0] = gen_rtx_REG (DImode, regno);
4893 ops[1] = gen_rtx_REG (DImode, regno + 2);
4894 ops[2] = gen_rtx_REG (DImode, regno + 4);
4895 ops[3] = operands[1];
4896 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4897 return "";
4898 }
4899 else
4900 return "vld1.<V_sz_elem>\t%h0, %A1";
4901 }
4902 [(set (attr "type")
4903 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4904 (const_string "neon_load3_all_lanes<q>")
4905 (const_string "neon_load1_1reg<q>")))])
4906
4907 (define_expand "vec_store_lanesei<mode>"
4908 [(set (match_operand:EI 0 "neon_struct_operand")
4909 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4910 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4911 UNSPEC_VST3))]
4912 "TARGET_NEON")
4913
4914 (define_insn "neon_vst3<mode>"
4915 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4916 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4917 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4918 UNSPEC_VST3))]
4919 "TARGET_NEON"
4920 {
4921 if (<V_sz_elem> == 64)
4922 return "vst1.64\t%h1, %A0";
4923 else
4924 return "vst3.<V_sz_elem>\t%h1, %A0";
4925 }
4926 [(set (attr "type")
4927 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4928 (const_string "neon_store1_3reg<q>")
4929 (const_string "neon_store3_one_lane<q>")))])
4930
4931 (define_expand "vec_store_lanesci<mode>"
4932 [(match_operand:CI 0 "neon_struct_operand")
4933 (match_operand:CI 1 "s_register_operand")
4934 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4935 "TARGET_NEON"
4936 {
4937 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4938 DONE;
4939 })
4940
4941 (define_expand "neon_vst3<mode>"
4942 [(match_operand:CI 0 "neon_struct_operand")
4943 (match_operand:CI 1 "s_register_operand")
4944 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945 "TARGET_NEON"
4946 {
4947 rtx mem;
4948
4949 mem = adjust_address (operands[0], EImode, 0);
4950 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4951 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4952 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4953 DONE;
4954 })
4955
4956 (define_insn "neon_vst3qa<mode>"
4957 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4958 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4959 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4960 UNSPEC_VST3A))]
4961 "TARGET_NEON"
4962 {
4963 int regno = REGNO (operands[1]);
4964 rtx ops[4];
4965 ops[0] = operands[0];
4966 ops[1] = gen_rtx_REG (DImode, regno);
4967 ops[2] = gen_rtx_REG (DImode, regno + 4);
4968 ops[3] = gen_rtx_REG (DImode, regno + 8);
4969 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4970 return "";
4971 }
4972 [(set_attr "type" "neon_store3_3reg<q>")]
4973 )
4974
4975 (define_insn "neon_vst3qb<mode>"
4976 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4977 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4978 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4979 UNSPEC_VST3B))]
4980 "TARGET_NEON"
4981 {
4982 int regno = REGNO (operands[1]);
4983 rtx ops[4];
4984 ops[0] = operands[0];
4985 ops[1] = gen_rtx_REG (DImode, regno + 2);
4986 ops[2] = gen_rtx_REG (DImode, regno + 6);
4987 ops[3] = gen_rtx_REG (DImode, regno + 10);
4988 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4989 return "";
4990 }
4991 [(set_attr "type" "neon_store3_3reg<q>")]
4992 )
4993
4994 (define_insn "neon_vst3_lane<mode>"
4995 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4996 (unspec:<V_three_elem>
4997 [(match_operand:EI 1 "s_register_operand" "w")
4998 (match_operand:SI 2 "immediate_operand" "i")
4999 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5000 UNSPEC_VST3_LANE))]
5001 "TARGET_NEON"
5002 {
5003 HOST_WIDE_INT lane = INTVAL (operands[2]);
5004 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5005 int regno = REGNO (operands[1]);
5006 rtx ops[5];
5007 if (lane < 0 || lane >= max)
5008 error ("lane out of range");
5009 ops[0] = operands[0];
5010 ops[1] = gen_rtx_REG (DImode, regno);
5011 ops[2] = gen_rtx_REG (DImode, regno + 2);
5012 ops[3] = gen_rtx_REG (DImode, regno + 4);
5013 ops[4] = operands[2];
5014 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5015 ops);
5016 return "";
5017 }
5018 [(set_attr "type" "neon_store3_one_lane<q>")]
5019 )
5020
5021 (define_insn "neon_vst3_lane<mode>"
5022 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5023 (unspec:<V_three_elem>
5024 [(match_operand:CI 1 "s_register_operand" "w")
5025 (match_operand:SI 2 "immediate_operand" "i")
5026 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5027 UNSPEC_VST3_LANE))]
5028 "TARGET_NEON"
5029 {
5030 HOST_WIDE_INT lane = INTVAL (operands[2]);
5031 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5032 int regno = REGNO (operands[1]);
5033 rtx ops[5];
5034 if (lane < 0 || lane >= max)
5035 error ("lane out of range");
5036 else if (lane >= max / 2)
5037 {
5038 lane -= max / 2;
5039 regno += 2;
5040 }
5041 ops[0] = operands[0];
5042 ops[1] = gen_rtx_REG (DImode, regno);
5043 ops[2] = gen_rtx_REG (DImode, regno + 4);
5044 ops[3] = gen_rtx_REG (DImode, regno + 8);
5045 ops[4] = GEN_INT (lane);
5046 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5047 ops);
5048 return "";
5049 }
5050 [(set_attr "type" "neon_store3_one_lane<q>")]
5051 )
5052
5053 (define_expand "vec_load_lanesoi<mode>"
5054 [(set (match_operand:OI 0 "s_register_operand")
5055 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5056 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5057 UNSPEC_VLD4))]
5058 "TARGET_NEON")
5059
5060 (define_insn "neon_vld4<mode>"
5061 [(set (match_operand:OI 0 "s_register_operand" "=w")
5062 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5063 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 UNSPEC_VLD4))]
5065 "TARGET_NEON"
5066 {
5067 if (<V_sz_elem> == 64)
5068 return "vld1.64\t%h0, %A1";
5069 else
5070 return "vld4.<V_sz_elem>\t%h0, %A1";
5071 }
5072 [(set (attr "type")
5073 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5074 (const_string "neon_load1_4reg<q>")
5075 (const_string "neon_load4_4reg<q>")))]
5076 )
5077
5078 (define_expand "vec_load_lanesxi<mode>"
5079 [(match_operand:XI 0 "s_register_operand")
5080 (match_operand:XI 1 "neon_struct_operand")
5081 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5082 "TARGET_NEON"
5083 {
5084 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5085 DONE;
5086 })
5087
5088 (define_expand "neon_vld4<mode>"
5089 [(match_operand:XI 0 "s_register_operand")
5090 (match_operand:XI 1 "neon_struct_operand")
5091 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5092 "TARGET_NEON"
5093 {
5094 rtx mem;
5095
5096 mem = adjust_address (operands[1], OImode, 0);
5097 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5098 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5099 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5100 DONE;
5101 })
5102
5103 (define_insn "neon_vld4qa<mode>"
5104 [(set (match_operand:XI 0 "s_register_operand" "=w")
5105 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5106 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5107 UNSPEC_VLD4A))]
5108 "TARGET_NEON"
5109 {
5110 int regno = REGNO (operands[0]);
5111 rtx ops[5];
5112 ops[0] = gen_rtx_REG (DImode, regno);
5113 ops[1] = gen_rtx_REG (DImode, regno + 4);
5114 ops[2] = gen_rtx_REG (DImode, regno + 8);
5115 ops[3] = gen_rtx_REG (DImode, regno + 12);
5116 ops[4] = operands[1];
5117 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5118 return "";
5119 }
5120 [(set_attr "type" "neon_load4_4reg<q>")]
5121 )
5122
5123 (define_insn "neon_vld4qb<mode>"
5124 [(set (match_operand:XI 0 "s_register_operand" "=w")
5125 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5126 (match_operand:XI 2 "s_register_operand" "0")
5127 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5128 UNSPEC_VLD4B))]
5129 "TARGET_NEON"
5130 {
5131 int regno = REGNO (operands[0]);
5132 rtx ops[5];
5133 ops[0] = gen_rtx_REG (DImode, regno + 2);
5134 ops[1] = gen_rtx_REG (DImode, regno + 6);
5135 ops[2] = gen_rtx_REG (DImode, regno + 10);
5136 ops[3] = gen_rtx_REG (DImode, regno + 14);
5137 ops[4] = operands[1];
5138 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5139 return "";
5140 }
5141 [(set_attr "type" "neon_load4_4reg<q>")]
5142 )
5143
5144 (define_insn "neon_vld4_lane<mode>"
5145 [(set (match_operand:OI 0 "s_register_operand" "=w")
5146 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5147 (match_operand:OI 2 "s_register_operand" "0")
5148 (match_operand:SI 3 "immediate_operand" "i")
5149 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5150 UNSPEC_VLD4_LANE))]
5151 "TARGET_NEON"
5152 {
5153 HOST_WIDE_INT lane = INTVAL (operands[3]);
5154 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5155 int regno = REGNO (operands[0]);
5156 rtx ops[6];
5157 if (lane < 0 || lane >= max)
5158 error ("lane out of range");
5159 ops[0] = gen_rtx_REG (DImode, regno);
5160 ops[1] = gen_rtx_REG (DImode, regno + 2);
5161 ops[2] = gen_rtx_REG (DImode, regno + 4);
5162 ops[3] = gen_rtx_REG (DImode, regno + 6);
5163 ops[4] = operands[1];
5164 ops[5] = operands[3];
5165 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5166 ops);
5167 return "";
5168 }
5169 [(set_attr "type" "neon_load4_one_lane<q>")]
5170 )
5171
5172 (define_insn "neon_vld4_lane<mode>"
5173 [(set (match_operand:XI 0 "s_register_operand" "=w")
5174 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5175 (match_operand:XI 2 "s_register_operand" "0")
5176 (match_operand:SI 3 "immediate_operand" "i")
5177 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5178 UNSPEC_VLD4_LANE))]
5179 "TARGET_NEON"
5180 {
5181 HOST_WIDE_INT lane = INTVAL (operands[3]);
5182 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5183 int regno = REGNO (operands[0]);
5184 rtx ops[6];
5185 if (lane < 0 || lane >= max)
5186 error ("lane out of range");
5187 else if (lane >= max / 2)
5188 {
5189 lane -= max / 2;
5190 regno += 2;
5191 }
5192 ops[0] = gen_rtx_REG (DImode, regno);
5193 ops[1] = gen_rtx_REG (DImode, regno + 4);
5194 ops[2] = gen_rtx_REG (DImode, regno + 8);
5195 ops[3] = gen_rtx_REG (DImode, regno + 12);
5196 ops[4] = operands[1];
5197 ops[5] = GEN_INT (lane);
5198 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5199 ops);
5200 return "";
5201 }
5202 [(set_attr "type" "neon_load4_one_lane<q>")]
5203 )
5204
5205 (define_insn "neon_vld4_dup<mode>"
5206 [(set (match_operand:OI 0 "s_register_operand" "=w")
5207 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5208 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5209 UNSPEC_VLD4_DUP))]
5210 "TARGET_NEON"
5211 {
5212 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5213 {
5214 int regno = REGNO (operands[0]);
5215 rtx ops[5];
5216 ops[0] = gen_rtx_REG (DImode, regno);
5217 ops[1] = gen_rtx_REG (DImode, regno + 2);
5218 ops[2] = gen_rtx_REG (DImode, regno + 4);
5219 ops[3] = gen_rtx_REG (DImode, regno + 6);
5220 ops[4] = operands[1];
5221 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5222 ops);
5223 return "";
5224 }
5225 else
5226 return "vld1.<V_sz_elem>\t%h0, %A1";
5227 }
5228 [(set (attr "type")
5229 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5230 (const_string "neon_load4_all_lanes<q>")
5231 (const_string "neon_load1_1reg<q>")))]
5232 )
5233
5234 (define_expand "vec_store_lanesoi<mode>"
5235 [(set (match_operand:OI 0 "neon_struct_operand")
5236 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5237 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5238 UNSPEC_VST4))]
5239 "TARGET_NEON")
5240
5241 (define_insn "neon_vst4<mode>"
5242 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5243 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5244 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5245 UNSPEC_VST4))]
5246 "TARGET_NEON"
5247 {
5248 if (<V_sz_elem> == 64)
5249 return "vst1.64\t%h1, %A0";
5250 else
5251 return "vst4.<V_sz_elem>\t%h1, %A0";
5252 }
5253 [(set (attr "type")
5254 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5255 (const_string "neon_store1_4reg<q>")
5256 (const_string "neon_store4_4reg<q>")))]
5257 )
5258
5259 (define_expand "vec_store_lanesxi<mode>"
5260 [(match_operand:XI 0 "neon_struct_operand")
5261 (match_operand:XI 1 "s_register_operand")
5262 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 "TARGET_NEON"
5264 {
5265 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5266 DONE;
5267 })
5268
5269 (define_expand "neon_vst4<mode>"
5270 [(match_operand:XI 0 "neon_struct_operand")
5271 (match_operand:XI 1 "s_register_operand")
5272 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5273 "TARGET_NEON"
5274 {
5275 rtx mem;
5276
5277 mem = adjust_address (operands[0], OImode, 0);
5278 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5279 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5280 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5281 DONE;
5282 })
5283
5284 (define_insn "neon_vst4qa<mode>"
5285 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5286 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5287 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5288 UNSPEC_VST4A))]
5289 "TARGET_NEON"
5290 {
5291 int regno = REGNO (operands[1]);
5292 rtx ops[5];
5293 ops[0] = operands[0];
5294 ops[1] = gen_rtx_REG (DImode, regno);
5295 ops[2] = gen_rtx_REG (DImode, regno + 4);
5296 ops[3] = gen_rtx_REG (DImode, regno + 8);
5297 ops[4] = gen_rtx_REG (DImode, regno + 12);
5298 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5299 return "";
5300 }
5301 [(set_attr "type" "neon_store4_4reg<q>")]
5302 )
5303
5304 (define_insn "neon_vst4qb<mode>"
5305 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5306 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5307 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5308 UNSPEC_VST4B))]
5309 "TARGET_NEON"
5310 {
5311 int regno = REGNO (operands[1]);
5312 rtx ops[5];
5313 ops[0] = operands[0];
5314 ops[1] = gen_rtx_REG (DImode, regno + 2);
5315 ops[2] = gen_rtx_REG (DImode, regno + 6);
5316 ops[3] = gen_rtx_REG (DImode, regno + 10);
5317 ops[4] = gen_rtx_REG (DImode, regno + 14);
5318 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5319 return "";
5320 }
5321 [(set_attr "type" "neon_store4_4reg<q>")]
5322 )
5323
5324 (define_insn "neon_vst4_lane<mode>"
5325 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5326 (unspec:<V_four_elem>
5327 [(match_operand:OI 1 "s_register_operand" "w")
5328 (match_operand:SI 2 "immediate_operand" "i")
5329 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5330 UNSPEC_VST4_LANE))]
5331 "TARGET_NEON"
5332 {
5333 HOST_WIDE_INT lane = INTVAL (operands[2]);
5334 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5335 int regno = REGNO (operands[1]);
5336 rtx ops[6];
5337 if (lane < 0 || lane >= max)
5338 error ("lane out of range");
5339 ops[0] = operands[0];
5340 ops[1] = gen_rtx_REG (DImode, regno);
5341 ops[2] = gen_rtx_REG (DImode, regno + 2);
5342 ops[3] = gen_rtx_REG (DImode, regno + 4);
5343 ops[4] = gen_rtx_REG (DImode, regno + 6);
5344 ops[5] = operands[2];
5345 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5346 ops);
5347 return "";
5348 }
5349 [(set_attr "type" "neon_store4_one_lane<q>")]
5350 )
5351
5352 (define_insn "neon_vst4_lane<mode>"
5353 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5354 (unspec:<V_four_elem>
5355 [(match_operand:XI 1 "s_register_operand" "w")
5356 (match_operand:SI 2 "immediate_operand" "i")
5357 (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5358 UNSPEC_VST4_LANE))]
5359 "TARGET_NEON"
5360 {
5361 HOST_WIDE_INT lane = INTVAL (operands[2]);
5362 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5363 int regno = REGNO (operands[1]);
5364 rtx ops[6];
5365 if (lane < 0 || lane >= max)
5366 error ("lane out of range");
5367 else if (lane >= max / 2)
5368 {
5369 lane -= max / 2;
5370 regno += 2;
5371 }
5372 ops[0] = operands[0];
5373 ops[1] = gen_rtx_REG (DImode, regno);
5374 ops[2] = gen_rtx_REG (DImode, regno + 4);
5375 ops[3] = gen_rtx_REG (DImode, regno + 8);
5376 ops[4] = gen_rtx_REG (DImode, regno + 12);
5377 ops[5] = GEN_INT (lane);
5378 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5379 ops);
5380 return "";
5381 }
5382 [(set_attr "type" "neon_store4_4reg<q>")]
5383 )
5384
5385 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5386 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5387 (SE:<V_unpack> (vec_select:<V_HALF>
5388 (match_operand:VU 1 "register_operand" "w")
5389 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5390 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5391 "vmovl.<US><V_sz_elem> %q0, %e1"
5392 [(set_attr "type" "neon_shift_imm_long")]
5393 )
5394
5395 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5396 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5397 (SE:<V_unpack> (vec_select:<V_HALF>
5398 (match_operand:VU 1 "register_operand" "w")
5399 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5400 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5401 "vmovl.<US><V_sz_elem> %q0, %f1"
5402 [(set_attr "type" "neon_shift_imm_long")]
5403 )
5404
5405 (define_expand "vec_unpack<US>_hi_<mode>"
5406 [(match_operand:<V_unpack> 0 "register_operand" "")
5407 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5408 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5409 {
5410 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5411 rtx t1;
5412 int i;
5413 for (i = 0; i < (<V_mode_nunits>/2); i++)
5414 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5415
5416 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5417 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5418 operands[1],
5419 t1));
5420 DONE;
5421 }
5422 )
5423
5424 (define_expand "vec_unpack<US>_lo_<mode>"
5425 [(match_operand:<V_unpack> 0 "register_operand" "")
5426 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5427 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5428 {
5429 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5430 rtx t1;
5431 int i;
5432 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5433 RTVEC_ELT (v, i) = GEN_INT (i);
5434 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5435 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5436 operands[1],
5437 t1));
5438 DONE;
5439 }
5440 )
5441
5442 (define_insn "neon_vec_<US>mult_lo_<mode>"
5443 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5444 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5445 (match_operand:VU 1 "register_operand" "w")
5446 (match_operand:VU 2 "vect_par_constant_low" "")))
5447 (SE:<V_unpack> (vec_select:<V_HALF>
5448 (match_operand:VU 3 "register_operand" "w")
5449 (match_dup 2)))))]
5450 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5451 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5452 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5453 )
5454
5455 (define_expand "vec_widen_<US>mult_lo_<mode>"
5456 [(match_operand:<V_unpack> 0 "register_operand" "")
5457 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5458 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5459 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5460 {
5461 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5462 rtx t1;
5463 int i;
5464 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5465 RTVEC_ELT (v, i) = GEN_INT (i);
5466 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5467
5468 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5469 operands[1],
5470 t1,
5471 operands[2]));
5472 DONE;
5473 }
5474 )
5475
5476 (define_insn "neon_vec_<US>mult_hi_<mode>"
5477 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5478 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5479 (match_operand:VU 1 "register_operand" "w")
5480 (match_operand:VU 2 "vect_par_constant_high" "")))
5481 (SE:<V_unpack> (vec_select:<V_HALF>
5482 (match_operand:VU 3 "register_operand" "w")
5483 (match_dup 2)))))]
5484 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5485 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5486 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5487 )
5488
5489 (define_expand "vec_widen_<US>mult_hi_<mode>"
5490 [(match_operand:<V_unpack> 0 "register_operand" "")
5491 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5492 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5493 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5494 {
5495 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5496 rtx t1;
5497 int i;
5498 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5499 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5500 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5501
5502 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5503 operands[1],
5504 t1,
5505 operands[2]));
5506 DONE;
5507
5508 }
5509 )
5510
5511 (define_insn "neon_vec_<US>shiftl_<mode>"
5512 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5513 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5514 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5515 "TARGET_NEON"
5516 {
5517 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5518 }
5519 [(set_attr "type" "neon_shift_imm_long")]
5520 )
5521
5522 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5523 [(match_operand:<V_unpack> 0 "register_operand" "")
5524 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5525 (match_operand:SI 2 "immediate_operand" "i")]
5526 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5527 {
5528 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5529 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5530 operands[2]));
5531 DONE;
5532 }
5533 )
5534
5535 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5536 [(match_operand:<V_unpack> 0 "register_operand" "")
5537 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5538 (match_operand:SI 2 "immediate_operand" "i")]
5539 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5540 {
5541 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5542 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5543 GET_MODE_SIZE (<V_HALF>mode)),
5544 operands[2]));
5545 DONE;
5546 }
5547 )
5548
5549 ;; Vectorize for non-neon-quad case
5550 (define_insn "neon_unpack<US>_<mode>"
5551 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5552 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5553 "TARGET_NEON"
5554 "vmovl.<US><V_sz_elem> %q0, %P1"
5555 [(set_attr "type" "neon_move")]
5556 )
5557
5558 (define_expand "vec_unpack<US>_lo_<mode>"
5559 [(match_operand:<V_double_width> 0 "register_operand" "")
5560 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5561 "TARGET_NEON"
5562 {
5563 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5564 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5565 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5566
5567 DONE;
5568 }
5569 )
5570
5571 (define_expand "vec_unpack<US>_hi_<mode>"
5572 [(match_operand:<V_double_width> 0 "register_operand" "")
5573 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5574 "TARGET_NEON"
5575 {
5576 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5577 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5578 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5579
5580 DONE;
5581 }
5582 )
5583
5584 (define_insn "neon_vec_<US>mult_<mode>"
5585 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5586 (mult:<V_widen> (SE:<V_widen>
5587 (match_operand:VDI 1 "register_operand" "w"))
5588 (SE:<V_widen>
5589 (match_operand:VDI 2 "register_operand" "w"))))]
5590 "TARGET_NEON"
5591 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5592 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5593 )
5594
5595 (define_expand "vec_widen_<US>mult_hi_<mode>"
5596 [(match_operand:<V_double_width> 0 "register_operand" "")
5597 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5598 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5599 "TARGET_NEON"
5600 {
5601 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5602 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5603 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5604
5605 DONE;
5606
5607 }
5608 )
5609
5610 (define_expand "vec_widen_<US>mult_lo_<mode>"
5611 [(match_operand:<V_double_width> 0 "register_operand" "")
5612 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5613 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5614 "TARGET_NEON"
5615 {
5616 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5617 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5618 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5619
5620 DONE;
5621
5622 }
5623 )
5624
5625 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5626 [(match_operand:<V_double_width> 0 "register_operand" "")
5627 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5628 (match_operand:SI 2 "immediate_operand" "i")]
5629 "TARGET_NEON"
5630 {
5631 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5632 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5633 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5634
5635 DONE;
5636 }
5637 )
5638
5639 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5640 [(match_operand:<V_double_width> 0 "register_operand" "")
5641 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5642 (match_operand:SI 2 "immediate_operand" "i")]
5643 "TARGET_NEON"
5644 {
5645 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5646 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5647 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5648
5649 DONE;
5650 }
5651 )
5652
5653 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5654 ; because the ordering of vector elements in Q registers is different from what
5655 ; the semantics of the instructions require.
5656
5657 (define_insn "vec_pack_trunc_<mode>"
5658 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5659 (vec_concat:<V_narrow_pack>
5660 (truncate:<V_narrow>
5661 (match_operand:VN 1 "register_operand" "w"))
5662 (truncate:<V_narrow>
5663 (match_operand:VN 2 "register_operand" "w"))))]
5664 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5665 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5666 [(set_attr "type" "multiple")
5667 (set_attr "length" "8")]
5668 )
5669
5670 ;; For the non-quad case.
5671 (define_insn "neon_vec_pack_trunc_<mode>"
5672 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5673 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5674 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5675 "vmovn.i<V_sz_elem>\t%P0, %q1"
5676 [(set_attr "type" "neon_move_narrow_q")]
5677 )
5678
5679 (define_expand "vec_pack_trunc_<mode>"
5680 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5681 (match_operand:VSHFT 1 "register_operand" "")
5682 (match_operand:VSHFT 2 "register_operand")]
5683 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5684 {
5685 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5686
5687 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5688 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5689 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5690 DONE;
5691 })
5692
5693 (define_insn "neon_vabd<mode>_2"
5694 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5695 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5696 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5697 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5698 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5699 [(set (attr "type")
5700 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5701 (const_string "neon_fp_abd_s<q>")
5702 (const_string "neon_abd<q>")))]
5703 )
5704
5705 (define_insn "neon_vabd<mode>_3"
5706 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5707 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5708 (match_operand:VDQ 2 "s_register_operand" "w")]
5709 UNSPEC_VSUB)))]
5710 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5711 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5712 [(set (attr "type")
5713 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5714 (const_string "neon_fp_abd_s<q>")
5715 (const_string "neon_abd<q>")))]
5716 )
5717
5718 ;; Copy from core-to-neon regs, then extend, not vice-versa
5719
5720 (define_split
5721 [(set (match_operand:DI 0 "s_register_operand" "")
5722 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5723 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5724 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5725 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5726 {
5727 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5728 })
5729
5730 (define_split
5731 [(set (match_operand:DI 0 "s_register_operand" "")
5732 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5733 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5734 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5735 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5736 {
5737 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5738 })
5739
5740 (define_split
5741 [(set (match_operand:DI 0 "s_register_operand" "")
5742 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5743 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5744 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5745 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5746 {
5747 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5748 })
5749
5750 (define_split
5751 [(set (match_operand:DI 0 "s_register_operand" "")
5752 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5753 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5754 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5755 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5756 {
5757 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5758 })
5759
5760 (define_split
5761 [(set (match_operand:DI 0 "s_register_operand" "")
5762 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5763 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5764 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5765 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5766 {
5767 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5768 })
5769
5770 (define_split
5771 [(set (match_operand:DI 0 "s_register_operand" "")
5772 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5773 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5774 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5775 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5776 {
5777 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5778 })