[ARM] remove unused variable
[gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
31 "TARGET_NEON
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
34 {
35 if (which_alternative == 2)
36 {
37 int width, is_valid;
38 static char templ[40];
39
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
42
43 gcc_assert (is_valid != 0);
44
45 if (width == 0)
46 return "vmov.f32\t%P0, %1 @ <mode>";
47 else
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
49
50 return templ;
51 }
52
53 switch (which_alternative)
54 {
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
61 }
62 }
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
76 "TARGET_NEON
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
79 {
80 if (which_alternative == 2)
81 {
82 int width, is_valid;
83 static char templ[40];
84
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
87
88 gcc_assert (is_valid != 0);
89
90 if (width == 0)
91 return "vmov.f32\t%q0, %1 @ <mode>";
92 else
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
94
95 return templ;
96 }
97
98 switch (which_alternative)
99 {
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
106 }
107 }
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
119 "TARGET_NEON"
120 {
121 if (can_create_pseudo_p ())
122 {
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
125 }
126 })
127
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
131 "TARGET_NEON"
132 {
133 if (can_create_pseudo_p ())
134 {
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
137 }
138 })
139
140 (define_insn "*neon_mov<mode>"
141 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
142 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
143 "TARGET_NEON
144 && (register_operand (operands[0], <MODE>mode)
145 || register_operand (operands[1], <MODE>mode))"
146 {
147 switch (which_alternative)
148 {
149 case 0: return "#";
150 case 1: case 2: return output_move_neon (operands);
151 default: gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
155 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
156
157 (define_split
158 [(set (match_operand:EI 0 "s_register_operand" "")
159 (match_operand:EI 1 "s_register_operand" ""))]
160 "TARGET_NEON && reload_completed"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (TImode, rdest);
169 src[0] = gen_rtx_REG (TImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 4);
171 src[1] = gen_rtx_REG (DImode, rsrc + 4);
172
173 neon_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:OI 0 "s_register_operand" "")
178 (match_operand:OI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
182 {
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
185 rtx dest[2], src[2];
186
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (TImode, rdest + 4);
190 src[1] = gen_rtx_REG (TImode, rsrc + 4);
191
192 neon_disambiguate_copy (operands, dest, src, 2);
193 })
194
195 (define_split
196 [(set (match_operand:CI 0 "s_register_operand" "")
197 (match_operand:CI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))
201 (set (match_dup 4) (match_dup 5))]
202 {
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
205 rtx dest[3], src[3];
206
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 dest[2] = gen_rtx_REG (TImode, rdest + 8);
212 src[2] = gen_rtx_REG (TImode, rsrc + 8);
213
214 neon_disambiguate_copy (operands, dest, src, 3);
215 })
216
217 (define_split
218 [(set (match_operand:XI 0 "s_register_operand" "")
219 (match_operand:XI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))
223 (set (match_dup 4) (match_dup 5))
224 (set (match_dup 6) (match_dup 7))]
225 {
226 int rdest = REGNO (operands[0]);
227 int rsrc = REGNO (operands[1]);
228 rtx dest[4], src[4];
229
230 dest[0] = gen_rtx_REG (TImode, rdest);
231 src[0] = gen_rtx_REG (TImode, rsrc);
232 dest[1] = gen_rtx_REG (TImode, rdest + 4);
233 src[1] = gen_rtx_REG (TImode, rsrc + 4);
234 dest[2] = gen_rtx_REG (TImode, rdest + 8);
235 src[2] = gen_rtx_REG (TImode, rsrc + 8);
236 dest[3] = gen_rtx_REG (TImode, rdest + 12);
237 src[3] = gen_rtx_REG (TImode, rsrc + 12);
238
239 neon_disambiguate_copy (operands, dest, src, 4);
240 })
241
242 (define_expand "movmisalign<mode>"
243 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
244 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
245 UNSPEC_MISALIGNED_ACCESS))]
246 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
247 {
248 rtx adjust_mem;
249 /* This pattern is not permitted to fail during expansion: if both arguments
250 are non-registers (e.g. memory := constant, which can be created by the
251 auto-vectorizer), force operand 1 into a register. */
252 if (!s_register_operand (operands[0], <MODE>mode)
253 && !s_register_operand (operands[1], <MODE>mode))
254 operands[1] = force_reg (<MODE>mode, operands[1]);
255
256 if (s_register_operand (operands[0], <MODE>mode))
257 adjust_mem = operands[1];
258 else
259 adjust_mem = operands[0];
260
261 /* Legitimize address. */
262 if (!neon_vector_mem_operand (adjust_mem, 2, true))
263 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
264
265 })
266
267 (define_insn "*movmisalign<mode>_neon_store"
268 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
269 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
270 UNSPEC_MISALIGNED_ACCESS))]
271 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
272 "vst1.<V_sz_elem>\t{%P1}, %A0"
273 [(set_attr "type" "neon_store1_1reg<q>")])
274
275 (define_insn "*movmisalign<mode>_neon_load"
276 [(set (match_operand:VDX 0 "s_register_operand" "=w")
277 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
278 " Um")]
279 UNSPEC_MISALIGNED_ACCESS))]
280 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
281 "vld1.<V_sz_elem>\t{%P0}, %A1"
282 [(set_attr "type" "neon_load1_1reg<q>")])
283
284 (define_insn "*movmisalign<mode>_neon_store"
285 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
286 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
287 UNSPEC_MISALIGNED_ACCESS))]
288 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
289 "vst1.<V_sz_elem>\t{%q1}, %A0"
290 [(set_attr "type" "neon_store1_1reg<q>")])
291
292 (define_insn "*movmisalign<mode>_neon_load"
293 [(set (match_operand:VQX 0 "s_register_operand" "=w")
294 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
295 " Um")]
296 UNSPEC_MISALIGNED_ACCESS))]
297 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
298 "vld1.<V_sz_elem>\t{%q0}, %A1"
299 [(set_attr "type" "neon_load1_1reg<q>")])
300
301 (define_insn "vec_set<mode>_internal"
302 [(set (match_operand:VD 0 "s_register_operand" "=w,w")
303 (vec_merge:VD
304 (vec_duplicate:VD
305 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
306 (match_operand:VD 3 "s_register_operand" "0,0")
307 (match_operand:SI 2 "immediate_operand" "i,i")))]
308 "TARGET_NEON"
309 {
310 int elt = ffs ((int) INTVAL (operands[2])) - 1;
311 if (BYTES_BIG_ENDIAN)
312 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
313 operands[2] = GEN_INT (elt);
314
315 if (which_alternative == 0)
316 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
317 else
318 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
319 }
320 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
321
322 (define_insn "vec_set<mode>_internal"
323 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
324 (vec_merge:VQ2
325 (vec_duplicate:VQ2
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VQ2 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 "TARGET_NEON"
330 {
331 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
332 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
333 int elt = elem % half_elts;
334 int hi = (elem / half_elts) * 2;
335 int regno = REGNO (operands[0]);
336
337 if (BYTES_BIG_ENDIAN)
338 elt = half_elts - 1 - elt;
339
340 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
341 operands[2] = GEN_INT (elt);
342
343 if (which_alternative == 0)
344 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
345 else
346 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
347 }
348 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
349 )
350
351 (define_insn "vec_setv2di_internal"
352 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
353 (vec_merge:V2DI
354 (vec_duplicate:V2DI
355 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
356 (match_operand:V2DI 3 "s_register_operand" "0,0")
357 (match_operand:SI 2 "immediate_operand" "i,i")))]
358 "TARGET_NEON"
359 {
360 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
361 int regno = REGNO (operands[0]) + 2 * elem;
362
363 operands[0] = gen_rtx_REG (DImode, regno);
364
365 if (which_alternative == 0)
366 return "vld1.64\t%P0, %A1";
367 else
368 return "vmov\t%P0, %Q1, %R1";
369 }
370 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
371 )
372
373 (define_expand "vec_set<mode>"
374 [(match_operand:VDQ 0 "s_register_operand" "")
375 (match_operand:<V_elem> 1 "s_register_operand" "")
376 (match_operand:SI 2 "immediate_operand" "")]
377 "TARGET_NEON"
378 {
379 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
380 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
381 GEN_INT (elem), operands[0]));
382 DONE;
383 })
384
385 (define_insn "vec_extract<mode>"
386 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
387 (vec_select:<V_elem>
388 (match_operand:VD 1 "s_register_operand" "w,w")
389 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
390 "TARGET_NEON"
391 {
392 if (BYTES_BIG_ENDIAN)
393 {
394 int elt = INTVAL (operands[2]);
395 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
396 operands[2] = GEN_INT (elt);
397 }
398
399 if (which_alternative == 0)
400 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
401 else
402 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
403 }
404 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
405 )
406
407 (define_insn "vec_extract<mode>"
408 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (vec_select:<V_elem>
410 (match_operand:VQ2 1 "s_register_operand" "w,w")
411 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 "TARGET_NEON"
413 {
414 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
415 int elt = INTVAL (operands[2]) % half_elts;
416 int hi = (INTVAL (operands[2]) / half_elts) * 2;
417 int regno = REGNO (operands[1]);
418
419 if (BYTES_BIG_ENDIAN)
420 elt = half_elts - 1 - elt;
421
422 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
423 operands[2] = GEN_INT (elt);
424
425 if (which_alternative == 0)
426 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
427 else
428 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
429 }
430 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
431 )
432
433 (define_insn "vec_extractv2di"
434 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
435 (vec_select:DI
436 (match_operand:V2DI 1 "s_register_operand" "w,w")
437 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
438 "TARGET_NEON"
439 {
440 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
441
442 operands[1] = gen_rtx_REG (DImode, regno);
443
444 if (which_alternative == 0)
445 return "vst1.64\t{%P1}, %A0 @ v2di";
446 else
447 return "vmov\t%Q0, %R0, %P1 @ v2di";
448 }
449 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
450 )
451
452 (define_expand "vec_init<mode>"
453 [(match_operand:VDQ 0 "s_register_operand" "")
454 (match_operand 1 "" "")]
455 "TARGET_NEON"
456 {
457 neon_expand_vector_init (operands[0], operands[1]);
458 DONE;
459 })
460
461 ;; Doubleword and quadword arithmetic.
462
463 ;; NOTE: some other instructions also support 64-bit integer
464 ;; element size, which we could potentially use for "long long" operations.
465
466 (define_insn "*add<mode>3_neon"
467 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469 (match_operand:VDQ 2 "s_register_operand" "w")))]
470 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472 [(set (attr "type")
473 (if_then_else (match_test "<Is_float_mode>")
474 (const_string "neon_fp_addsub_s<q>")
475 (const_string "neon_add<q>")))]
476 )
477
478 (define_insn "adddi3_neon"
479 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
480 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
481 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
482 (clobber (reg:CC CC_REGNUM))]
483 "TARGET_NEON"
484 {
485 switch (which_alternative)
486 {
487 case 0: /* fall through */
488 case 3: return "vadd.i64\t%P0, %P1, %P2";
489 case 1: return "#";
490 case 2: return "#";
491 case 4: return "#";
492 case 5: return "#";
493 case 6: return "#";
494 default: gcc_unreachable ();
495 }
496 }
497 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
498 multiple,multiple,multiple")
499 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
500 (set_attr "length" "*,8,8,*,8,8,8")
501 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
502 )
503
504 (define_insn "*sub<mode>3_neon"
505 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
506 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
507 (match_operand:VDQ 2 "s_register_operand" "w")))]
508 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
509 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 [(set (attr "type")
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_sub<q>")))]
514 )
515
516 (define_insn "subdi3_neon"
517 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
518 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
519 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
520 (clobber (reg:CC CC_REGNUM))]
521 "TARGET_NEON"
522 {
523 switch (which_alternative)
524 {
525 case 0: /* fall through */
526 case 4: return "vsub.i64\t%P0, %P1, %P2";
527 case 1: /* fall through */
528 case 2: /* fall through */
529 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
530 default: gcc_unreachable ();
531 }
532 }
533 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
534 (set_attr "conds" "*,clob,clob,clob,*")
535 (set_attr "length" "*,8,8,8,*")
536 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
537 )
538
539 (define_insn "*mul<mode>3_neon"
540 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
541 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
542 (match_operand:VDQW 2 "s_register_operand" "w")))]
543 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
544 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
545 [(set (attr "type")
546 (if_then_else (match_test "<Is_float_mode>")
547 (const_string "neon_fp_mul_s<q>")
548 (const_string "neon_mul_<V_elem_ch><q>")))]
549 )
550
551 (define_insn "mul<mode>3add<mode>_neon"
552 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
553 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
554 (match_operand:VDQW 3 "s_register_operand" "w"))
555 (match_operand:VDQW 1 "s_register_operand" "0")))]
556 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
557 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
558 [(set (attr "type")
559 (if_then_else (match_test "<Is_float_mode>")
560 (const_string "neon_fp_mla_s<q>")
561 (const_string "neon_mla_<V_elem_ch><q>")))]
562 )
563
564 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
565 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
566 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
567 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
568 (match_operand:VDQW 3 "s_register_operand" "w"))))]
569 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
570 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
571 [(set (attr "type")
572 (if_then_else (match_test "<Is_float_mode>")
573 (const_string "neon_fp_mla_s<q>")
574 (const_string "neon_mla_<V_elem_ch><q>")))]
575 )
576
577 ;; Fused multiply-accumulate
578 ;; We define each insn twice here:
579 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
580 ;; to be able to use when converting to FMA.
581 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
582 (define_insn "fma<VCVTF:mode>4"
583 [(set (match_operand:VCVTF 0 "register_operand" "=w")
584 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
585 (match_operand:VCVTF 2 "register_operand" "w")
586 (match_operand:VCVTF 3 "register_operand" "0")))]
587 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
588 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
589 [(set_attr "type" "neon_fp_mla_s<q>")]
590 )
591
592 (define_insn "fma<VCVTF:mode>4_intrinsic"
593 [(set (match_operand:VCVTF 0 "register_operand" "=w")
594 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
595 (match_operand:VCVTF 2 "register_operand" "w")
596 (match_operand:VCVTF 3 "register_operand" "0")))]
597 "TARGET_NEON && TARGET_FMA"
598 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
599 [(set_attr "type" "neon_fp_mla_s<q>")]
600 )
601
602 (define_insn "*fmsub<VCVTF:mode>4"
603 [(set (match_operand:VCVTF 0 "register_operand" "=w")
604 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
605 (match_operand:VCVTF 2 "register_operand" "w")
606 (match_operand:VCVTF 3 "register_operand" "0")))]
607 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
608 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
609 [(set_attr "type" "neon_fp_mla_s<q>")]
610 )
611
612 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA"
618 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
620 )
621
622 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
624 (unspec:VCVTF [(match_operand:VCVTF 1
625 "s_register_operand" "w")]
626 NEON_VRINT))]
627 "TARGET_NEON && TARGET_FPU_ARMV8"
628 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
629 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
630 )
631
632 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
634 (FIXUORS:<V_cmp_result> (unspec:VCVTF
635 [(match_operand:VCVTF 1 "register_operand" "w")]
636 NEON_VCVT)))]
637 "TARGET_NEON && TARGET_FPU_ARMV8"
638 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
639 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
640 (set_attr "predicable" "no")]
641 )
642
643 (define_insn "ior<mode>3"
644 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
645 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
646 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
647 "TARGET_NEON"
648 {
649 switch (which_alternative)
650 {
651 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
652 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
653 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
654 default: gcc_unreachable ();
655 }
656 }
657 [(set_attr "type" "neon_logic<q>")]
658 )
659
660 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
661 ;; vorr. We support the pseudo-instruction vand instead, because that
662 ;; corresponds to the canonical form the middle-end expects to use for
663 ;; immediate bitwise-ANDs.
664
665 (define_insn "and<mode>3"
666 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
667 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
668 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
669 "TARGET_NEON"
670 {
671 switch (which_alternative)
672 {
673 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
674 case 1: return neon_output_logic_immediate ("vand", &operands[2],
675 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
676 default: gcc_unreachable ();
677 }
678 }
679 [(set_attr "type" "neon_logic<q>")]
680 )
681
682 (define_insn "orn<mode>3_neon"
683 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
684 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
685 (match_operand:VDQ 1 "s_register_operand" "w")))]
686 "TARGET_NEON"
687 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
688 [(set_attr "type" "neon_logic<q>")]
689 )
690
691 ;; TODO: investigate whether we should disable
692 ;; this and bicdi3_neon for the A8 in line with the other
693 ;; changes above.
694 (define_insn_and_split "orndi3_neon"
695 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
696 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
697 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
698 "TARGET_NEON"
699 "@
700 vorn\t%P0, %P1, %P2
701 #
702 #
703 #"
704 "reload_completed &&
705 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
706 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
707 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
708 "
709 {
710 if (TARGET_THUMB2)
711 {
712 operands[3] = gen_highpart (SImode, operands[0]);
713 operands[0] = gen_lowpart (SImode, operands[0]);
714 operands[4] = gen_highpart (SImode, operands[2]);
715 operands[2] = gen_lowpart (SImode, operands[2]);
716 operands[5] = gen_highpart (SImode, operands[1]);
717 operands[1] = gen_lowpart (SImode, operands[1]);
718 }
719 else
720 {
721 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
722 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
723 DONE;
724 }
725 }"
726 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
727 (set_attr "length" "*,16,8,8")
728 (set_attr "arch" "any,a,t2,t2")]
729 )
730
731 (define_insn "bic<mode>3_neon"
732 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
733 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
734 (match_operand:VDQ 1 "s_register_operand" "w")))]
735 "TARGET_NEON"
736 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 ;; Compare to *anddi_notdi_di.
741 (define_insn "bicdi3_neon"
742 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
743 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
744 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
745 "TARGET_NEON"
746 "@
747 vbic\t%P0, %P1, %P2
748 #
749 #"
750 [(set_attr "type" "neon_logic,multiple,multiple")
751 (set_attr "length" "*,8,8")]
752 )
753
754 (define_insn "xor<mode>3"
755 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
756 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
757 (match_operand:VDQ 2 "s_register_operand" "w")))]
758 "TARGET_NEON"
759 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
760 [(set_attr "type" "neon_logic<q>")]
761 )
762
763 (define_insn "one_cmpl<mode>2"
764 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
765 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "TARGET_NEON"
767 "vmvn\t%<V_reg>0, %<V_reg>1"
768 [(set_attr "type" "neon_move<q>")]
769 )
770
771 (define_insn "abs<mode>2"
772 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
773 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
774 "TARGET_NEON"
775 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
776 [(set (attr "type")
777 (if_then_else (match_test "<Is_float_mode>")
778 (const_string "neon_fp_abs_s<q>")
779 (const_string "neon_abs<q>")))]
780 )
781
782 (define_insn "neg<mode>2"
783 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
784 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
785 "TARGET_NEON"
786 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
787 [(set (attr "type")
788 (if_then_else (match_test "<Is_float_mode>")
789 (const_string "neon_fp_neg_s<q>")
790 (const_string "neon_neg<q>")))]
791 )
792
793 (define_insn "negdi2_neon"
794 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
795 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
796 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
797 (clobber (reg:CC CC_REGNUM))]
798 "TARGET_NEON"
799 "#"
800 [(set_attr "length" "8")
801 (set_attr "type" "multiple")]
802 )
803
804 ; Split negdi2_neon for vfp registers
805 (define_split
806 [(set (match_operand:DI 0 "s_register_operand" "")
807 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
808 (clobber (match_scratch:DI 2 ""))
809 (clobber (reg:CC CC_REGNUM))]
810 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
811 [(set (match_dup 2) (const_int 0))
812 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
813 (clobber (reg:CC CC_REGNUM))])]
814 {
815 if (!REG_P (operands[2]))
816 operands[2] = operands[0];
817 }
818 )
819
820 ; Split negdi2_neon for core registers
821 (define_split
822 [(set (match_operand:DI 0 "s_register_operand" "")
823 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
824 (clobber (match_scratch:DI 2 ""))
825 (clobber (reg:CC CC_REGNUM))]
826 "TARGET_32BIT && reload_completed
827 && arm_general_register_operand (operands[0], DImode)"
828 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
829 (clobber (reg:CC CC_REGNUM))])]
830 ""
831 )
832
833 (define_insn "*umin<mode>3_neon"
834 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
835 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
836 (match_operand:VDQIW 2 "s_register_operand" "w")))]
837 "TARGET_NEON"
838 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
839 [(set_attr "type" "neon_minmax<q>")]
840 )
841
842 (define_insn "*umax<mode>3_neon"
843 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
844 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
845 (match_operand:VDQIW 2 "s_register_operand" "w")))]
846 "TARGET_NEON"
847 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
848 [(set_attr "type" "neon_minmax<q>")]
849 )
850
851 (define_insn "*smin<mode>3_neon"
852 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
853 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
854 (match_operand:VDQW 2 "s_register_operand" "w")))]
855 "TARGET_NEON"
856 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
857 [(set (attr "type")
858 (if_then_else (match_test "<Is_float_mode>")
859 (const_string "neon_fp_minmax_s<q>")
860 (const_string "neon_minmax<q>")))]
861 )
862
863 (define_insn "*smax<mode>3_neon"
864 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
865 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
866 (match_operand:VDQW 2 "s_register_operand" "w")))]
867 "TARGET_NEON"
868 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set (attr "type")
870 (if_then_else (match_test "<Is_float_mode>")
871 (const_string "neon_fp_minmax_s<q>")
872 (const_string "neon_minmax<q>")))]
873 )
874
875 ; TODO: V2DI shifts are current disabled because there are bugs in the
876 ; generic vectorizer code. It ends up creating a V2DI constructor with
877 ; SImode elements.
878
879 (define_insn "vashl<mode>3"
880 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
881 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
882 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
883 "TARGET_NEON"
884 {
885 switch (which_alternative)
886 {
887 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
888 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
889 <MODE>mode,
890 VALID_NEON_QREG_MODE (<MODE>mode),
891 true);
892 default: gcc_unreachable ();
893 }
894 }
895 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
896 )
897
898 (define_insn "vashr<mode>3_imm"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
902 "TARGET_NEON"
903 {
904 return neon_output_shift_immediate ("vshr", 's', &operands[2],
905 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
906 false);
907 }
908 [(set_attr "type" "neon_shift_imm<q>")]
909 )
910
911 (define_insn "vlshr<mode>3_imm"
912 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
913 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
914 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
915 "TARGET_NEON"
916 {
917 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
918 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
919 false);
920 }
921 [(set_attr "type" "neon_shift_imm<q>")]
922 )
923
924 ; Used for implementing logical shift-right, which is a left-shift by a negative
925 ; amount, with signed operands. This is essentially the same as ashl<mode>3
926 ; above, but using an unspec in case GCC tries anything tricky with negative
927 ; shift amounts.
928
929 (define_insn "ashl<mode>3_signed"
930 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
931 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
932 (match_operand:VDQI 2 "s_register_operand" "w")]
933 UNSPEC_ASHIFT_SIGNED))]
934 "TARGET_NEON"
935 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
936 [(set_attr "type" "neon_shift_reg<q>")]
937 )
938
939 ; Used for implementing logical shift-right, which is a left-shift by a negative
940 ; amount, with unsigned operands.
941
942 (define_insn "ashl<mode>3_unsigned"
943 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
944 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
945 (match_operand:VDQI 2 "s_register_operand" "w")]
946 UNSPEC_ASHIFT_UNSIGNED))]
947 "TARGET_NEON"
948 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
949 [(set_attr "type" "neon_shift_reg<q>")]
950 )
951
952 (define_expand "vashr<mode>3"
953 [(set (match_operand:VDQIW 0 "s_register_operand" "")
954 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
955 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
956 "TARGET_NEON"
957 {
958 if (s_register_operand (operands[2], <MODE>mode))
959 {
960 rtx neg = gen_reg_rtx (<MODE>mode);
961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
962 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
963 }
964 else
965 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
966 DONE;
967 })
968
969 (define_expand "vlshr<mode>3"
970 [(set (match_operand:VDQIW 0 "s_register_operand" "")
971 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
972 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
973 "TARGET_NEON"
974 {
975 if (s_register_operand (operands[2], <MODE>mode))
976 {
977 rtx neg = gen_reg_rtx (<MODE>mode);
978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
979 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
980 }
981 else
982 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
983 DONE;
984 })
985
986 ;; 64-bit shifts
987
988 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
989 ;; leaving the upper half uninitalized. This is OK since the shift
990 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
991 ;; data flow analysis however, we pretend the full register is set
992 ;; using an unspec.
993 (define_insn "neon_load_count"
994 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
995 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
996 UNSPEC_LOAD_COUNT))]
997 "TARGET_NEON"
998 "@
999 vld1.32\t{%P0[0]}, %A1
1000 vmov.32\t%P0[0], %1"
1001 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1002 )
1003
1004 (define_insn "ashldi3_neon_noclobber"
1005 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1006 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1007 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1008 "TARGET_NEON && reload_completed
1009 && (!CONST_INT_P (operands[2])
1010 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1011 "@
1012 vshl.u64\t%P0, %P1, %2
1013 vshl.u64\t%P0, %P1, %P2"
1014 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1015 )
1016
1017 (define_insn_and_split "ashldi3_neon"
1018 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1019 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1020 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1021 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1022 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1023 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1024 (clobber (reg:CC_C CC_REGNUM))]
1025 "TARGET_NEON"
1026 "#"
1027 "TARGET_NEON && reload_completed"
1028 [(const_int 0)]
1029 "
1030 {
1031 if (IS_VFP_REGNUM (REGNO (operands[0])))
1032 {
1033 if (CONST_INT_P (operands[2]))
1034 {
1035 if (INTVAL (operands[2]) < 1)
1036 {
1037 emit_insn (gen_movdi (operands[0], operands[1]));
1038 DONE;
1039 }
1040 else if (INTVAL (operands[2]) > 63)
1041 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1042 }
1043 else
1044 {
1045 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1046 operands[2] = operands[5];
1047 }
1048
1049 /* Ditch the unnecessary clobbers. */
1050 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1051 operands[2]));
1052 }
1053 else
1054 {
1055 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1056 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1057 || REGNO (operands[0]) == REGNO (operands[1])))
1058 /* This clobbers CC. */
1059 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1060 else
1061 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1062 operands[2], operands[3], operands[4]);
1063 }
1064 DONE;
1065 }"
1066 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1067 (set_attr "opt" "*,*,speed,speed,*,*")
1068 (set_attr "type" "multiple")]
1069 )
1070
1071 ; The shift amount needs to be negated for right-shifts
1072 (define_insn "signed_shift_di3_neon"
1073 [(set (match_operand:DI 0 "s_register_operand" "=w")
1074 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1075 (match_operand:DI 2 "s_register_operand" " w")]
1076 UNSPEC_ASHIFT_SIGNED))]
1077 "TARGET_NEON && reload_completed"
1078 "vshl.s64\t%P0, %P1, %P2"
1079 [(set_attr "type" "neon_shift_reg")]
1080 )
1081
1082 ; The shift amount needs to be negated for right-shifts
1083 (define_insn "unsigned_shift_di3_neon"
1084 [(set (match_operand:DI 0 "s_register_operand" "=w")
1085 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1086 (match_operand:DI 2 "s_register_operand" " w")]
1087 UNSPEC_ASHIFT_UNSIGNED))]
1088 "TARGET_NEON && reload_completed"
1089 "vshl.u64\t%P0, %P1, %P2"
1090 [(set_attr "type" "neon_shift_reg")]
1091 )
1092
1093 (define_insn "ashrdi3_neon_imm_noclobber"
1094 [(set (match_operand:DI 0 "s_register_operand" "=w")
1095 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1096 (match_operand:DI 2 "const_int_operand" " i")))]
1097 "TARGET_NEON && reload_completed
1098 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1099 "vshr.s64\t%P0, %P1, %2"
1100 [(set_attr "type" "neon_shift_imm")]
1101 )
1102
1103 (define_insn "lshrdi3_neon_imm_noclobber"
1104 [(set (match_operand:DI 0 "s_register_operand" "=w")
1105 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1106 (match_operand:DI 2 "const_int_operand" " i")))]
1107 "TARGET_NEON && reload_completed
1108 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1109 "vshr.u64\t%P0, %P1, %2"
1110 [(set_attr "type" "neon_shift_imm")]
1111 )
1112
1113 ;; ashrdi3_neon
1114 ;; lshrdi3_neon
1115 (define_insn_and_split "<shift>di3_neon"
1116 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1117 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1118 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1119 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1120 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1121 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1122 (clobber (reg:CC CC_REGNUM))]
1123 "TARGET_NEON"
1124 "#"
1125 "TARGET_NEON && reload_completed"
1126 [(const_int 0)]
1127 "
1128 {
1129 if (IS_VFP_REGNUM (REGNO (operands[0])))
1130 {
1131 if (CONST_INT_P (operands[2]))
1132 {
1133 if (INTVAL (operands[2]) < 1)
1134 {
1135 emit_insn (gen_movdi (operands[0], operands[1]));
1136 DONE;
1137 }
1138 else if (INTVAL (operands[2]) > 64)
1139 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1140
1141 /* Ditch the unnecessary clobbers. */
1142 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1143 operands[1],
1144 operands[2]));
1145 }
1146 else
1147 {
1148 /* We must use a negative left-shift. */
1149 emit_insn (gen_negsi2 (operands[3], operands[2]));
1150 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1151 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1152 operands[5]));
1153 }
1154 }
1155 else
1156 {
1157 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1158 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1159 || REGNO (operands[0]) == REGNO (operands[1])))
1160 /* This clobbers CC. */
1161 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1162 else
1163 /* This clobbers CC (ASHIFTRT by register only). */
1164 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1165 operands[2], operands[3], operands[4]);
1166 }
1167
1168 DONE;
1169 }"
1170 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1171 (set_attr "opt" "*,*,speed,speed,*,*")
1172 (set_attr "type" "multiple")]
1173 )
1174
1175 ;; Widening operations
1176
1177 (define_insn "widen_ssum<mode>3"
1178 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1179 (plus:<V_widen> (sign_extend:<V_widen>
1180 (match_operand:VW 1 "s_register_operand" "%w"))
1181 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1182 "TARGET_NEON"
1183 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1184 [(set_attr "type" "neon_add_widen")]
1185 )
1186
1187 (define_insn "widen_usum<mode>3"
1188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1189 (plus:<V_widen> (zero_extend:<V_widen>
1190 (match_operand:VW 1 "s_register_operand" "%w"))
1191 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1192 "TARGET_NEON"
1193 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1194 [(set_attr "type" "neon_add_widen")]
1195 )
1196
1197 ;; Helpers for quad-word reduction operations
1198
1199 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1200 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1201 ; N/2-element vector.
1202
1203 (define_insn "quad_halves_<code>v4si"
1204 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1205 (VQH_OPS:V2SI
1206 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1207 (parallel [(const_int 0) (const_int 1)]))
1208 (vec_select:V2SI (match_dup 1)
1209 (parallel [(const_int 2) (const_int 3)]))))]
1210 "TARGET_NEON"
1211 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1212 [(set_attr "vqh_mnem" "<VQH_mnem>")
1213 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1214 )
1215
1216 (define_insn "quad_halves_<code>v4sf"
1217 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1218 (VQHS_OPS:V2SF
1219 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SF (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON && flag_unsafe_math_optimizations"
1224 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
1226 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1227 )
1228
1229 (define_insn "quad_halves_<code>v8hi"
1230 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1231 (VQH_OPS:V4HI
1232 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)
1234 (const_int 2) (const_int 3)]))
1235 (vec_select:V4HI (match_dup 1)
1236 (parallel [(const_int 4) (const_int 5)
1237 (const_int 6) (const_int 7)]))))]
1238 "TARGET_NEON"
1239 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1240 [(set_attr "vqh_mnem" "<VQH_mnem>")
1241 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1242 )
1243
1244 (define_insn "quad_halves_<code>v16qi"
1245 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1246 (VQH_OPS:V8QI
1247 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1248 (parallel [(const_int 0) (const_int 1)
1249 (const_int 2) (const_int 3)
1250 (const_int 4) (const_int 5)
1251 (const_int 6) (const_int 7)]))
1252 (vec_select:V8QI (match_dup 1)
1253 (parallel [(const_int 8) (const_int 9)
1254 (const_int 10) (const_int 11)
1255 (const_int 12) (const_int 13)
1256 (const_int 14) (const_int 15)]))))]
1257 "TARGET_NEON"
1258 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1259 [(set_attr "vqh_mnem" "<VQH_mnem>")
1260 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1261 )
1262
1263 (define_expand "move_hi_quad_<mode>"
1264 [(match_operand:ANY128 0 "s_register_operand" "")
1265 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1266 "TARGET_NEON"
1267 {
1268 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1269 GET_MODE_SIZE (<V_HALF>mode)),
1270 operands[1]);
1271 DONE;
1272 })
1273
1274 (define_expand "move_lo_quad_<mode>"
1275 [(match_operand:ANY128 0 "s_register_operand" "")
1276 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1277 "TARGET_NEON"
1278 {
1279 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1280 <MODE>mode, 0),
1281 operands[1]);
1282 DONE;
1283 })
1284
1285 ;; Reduction operations
1286
1287 (define_expand "reduc_plus_scal_<mode>"
1288 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1289 (match_operand:VD 1 "s_register_operand" "")]
1290 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1291 {
1292 rtx vec = gen_reg_rtx (<MODE>mode);
1293 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1294 &gen_neon_vpadd_internal<mode>);
1295 /* The same result is actually computed into every element. */
1296 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1297 DONE;
1298 })
1299
1300 (define_expand "reduc_plus_scal_<mode>"
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1302 (match_operand:VQ 1 "s_register_operand" "")]
1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1304 && !BYTES_BIG_ENDIAN"
1305 {
1306 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1307
1308 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1309 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1310
1311 DONE;
1312 })
1313
1314 (define_expand "reduc_plus_scal_v2di"
1315 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1316 (match_operand:V2DI 1 "s_register_operand" "")]
1317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1318 {
1319 rtx vec = gen_reg_rtx (V2DImode);
1320
1321 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1322 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1323
1324 DONE;
1325 })
1326
1327 (define_insn "arm_reduc_plus_internal_v2di"
1328 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1329 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1330 UNSPEC_VPADD))]
1331 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1332 "vadd.i64\t%e0, %e1, %f1"
1333 [(set_attr "type" "neon_add_q")]
1334 )
1335
1336 (define_expand "reduc_smin_scal_<mode>"
1337 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1338 (match_operand:VD 1 "s_register_operand" "")]
1339 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1340 {
1341 rtx vec = gen_reg_rtx (<MODE>mode);
1342
1343 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1344 &gen_neon_vpsmin<mode>);
1345 /* The result is computed into every element of the vector. */
1346 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1347 DONE;
1348 })
1349
1350 (define_expand "reduc_smin_scal_<mode>"
1351 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1352 (match_operand:VQ 1 "s_register_operand" "")]
1353 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1354 && !BYTES_BIG_ENDIAN"
1355 {
1356 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1357
1358 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1359 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1360
1361 DONE;
1362 })
1363
1364 (define_expand "reduc_smax_scal_<mode>"
1365 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1366 (match_operand:VD 1 "s_register_operand" "")]
1367 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1368 {
1369 rtx vec = gen_reg_rtx (<MODE>mode);
1370 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1371 &gen_neon_vpsmax<mode>);
1372 /* The result is computed into every element of the vector. */
1373 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1374 DONE;
1375 })
1376
1377 (define_expand "reduc_smax_scal_<mode>"
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1379 (match_operand:VQ 1 "s_register_operand" "")]
1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1381 && !BYTES_BIG_ENDIAN"
1382 {
1383 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1384
1385 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1386 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1387
1388 DONE;
1389 })
1390
1391 (define_expand "reduc_umin_scal_<mode>"
1392 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1393 (match_operand:VDI 1 "s_register_operand" "")]
1394 "TARGET_NEON"
1395 {
1396 rtx vec = gen_reg_rtx (<MODE>mode);
1397 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1398 &gen_neon_vpumin<mode>);
1399 /* The result is computed into every element of the vector. */
1400 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1401 DONE;
1402 })
1403
1404 (define_expand "reduc_umin_scal_<mode>"
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1406 (match_operand:VQI 1 "s_register_operand" "")]
1407 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1408 {
1409 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1410
1411 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1412 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1413
1414 DONE;
1415 })
1416
1417 (define_expand "reduc_umax_scal_<mode>"
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1419 (match_operand:VDI 1 "s_register_operand" "")]
1420 "TARGET_NEON"
1421 {
1422 rtx vec = gen_reg_rtx (<MODE>mode);
1423 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1424 &gen_neon_vpumax<mode>);
1425 /* The result is computed into every element of the vector. */
1426 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1427 DONE;
1428 })
1429
1430 (define_expand "reduc_umax_scal_<mode>"
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1432 (match_operand:VQI 1 "s_register_operand" "")]
1433 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1434 {
1435 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1436
1437 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1438 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1439
1440 DONE;
1441 })
1442
1443 (define_insn "neon_vpadd_internal<mode>"
1444 [(set (match_operand:VD 0 "s_register_operand" "=w")
1445 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1446 (match_operand:VD 2 "s_register_operand" "w")]
1447 UNSPEC_VPADD))]
1448 "TARGET_NEON"
1449 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1450 ;; Assume this schedules like vadd.
1451 [(set (attr "type")
1452 (if_then_else (match_test "<Is_float_mode>")
1453 (const_string "neon_fp_reduc_add_s<q>")
1454 (const_string "neon_reduc_add<q>")))]
1455 )
1456
1457 (define_insn "neon_vpsmin<mode>"
1458 [(set (match_operand:VD 0 "s_register_operand" "=w")
1459 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1460 (match_operand:VD 2 "s_register_operand" "w")]
1461 UNSPEC_VPSMIN))]
1462 "TARGET_NEON"
1463 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1464 [(set (attr "type")
1465 (if_then_else (match_test "<Is_float_mode>")
1466 (const_string "neon_fp_reduc_minmax_s<q>")
1467 (const_string "neon_reduc_minmax<q>")))]
1468 )
1469
1470 (define_insn "neon_vpsmax<mode>"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")]
1474 UNSPEC_VPSMAX))]
1475 "TARGET_NEON"
1476 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1477 [(set (attr "type")
1478 (if_then_else (match_test "<Is_float_mode>")
1479 (const_string "neon_fp_reduc_minmax_s<q>")
1480 (const_string "neon_reduc_minmax<q>")))]
1481 )
1482
1483 (define_insn "neon_vpumin<mode>"
1484 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1485 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1486 (match_operand:VDI 2 "s_register_operand" "w")]
1487 UNSPEC_VPUMIN))]
1488 "TARGET_NEON"
1489 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1490 [(set_attr "type" "neon_reduc_minmax<q>")]
1491 )
1492
1493 (define_insn "neon_vpumax<mode>"
1494 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1495 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1496 (match_operand:VDI 2 "s_register_operand" "w")]
1497 UNSPEC_VPUMAX))]
1498 "TARGET_NEON"
1499 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1500 [(set_attr "type" "neon_reduc_minmax<q>")]
1501 )
1502
1503 ;; Saturating arithmetic
1504
1505 ; NOTE: Neon supports many more saturating variants of instructions than the
1506 ; following, but these are all GCC currently understands.
1507 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1508 ; yet either, although these patterns may be used by intrinsics when they're
1509 ; added.
1510
1511 (define_insn "*ss_add<mode>_neon"
1512 [(set (match_operand:VD 0 "s_register_operand" "=w")
1513 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1514 (match_operand:VD 2 "s_register_operand" "w")))]
1515 "TARGET_NEON"
1516 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1517 [(set_attr "type" "neon_qadd<q>")]
1518 )
1519
1520 (define_insn "*us_add<mode>_neon"
1521 [(set (match_operand:VD 0 "s_register_operand" "=w")
1522 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1523 (match_operand:VD 2 "s_register_operand" "w")))]
1524 "TARGET_NEON"
1525 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1526 [(set_attr "type" "neon_qadd<q>")]
1527 )
1528
1529 (define_insn "*ss_sub<mode>_neon"
1530 [(set (match_operand:VD 0 "s_register_operand" "=w")
1531 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1532 (match_operand:VD 2 "s_register_operand" "w")))]
1533 "TARGET_NEON"
1534 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1535 [(set_attr "type" "neon_qsub<q>")]
1536 )
1537
1538 (define_insn "*us_sub<mode>_neon"
1539 [(set (match_operand:VD 0 "s_register_operand" "=w")
1540 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1541 (match_operand:VD 2 "s_register_operand" "w")))]
1542 "TARGET_NEON"
1543 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1544 [(set_attr "type" "neon_qsub<q>")]
1545 )
1546
1547 ;; Conditional instructions. These are comparisons with conditional moves for
1548 ;; vectors. They perform the assignment:
1549 ;;
1550 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1551 ;;
1552 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1553 ;; element-wise.
1554
1555 (define_expand "vcond<mode><mode>"
1556 [(set (match_operand:VDQW 0 "s_register_operand" "")
1557 (if_then_else:VDQW
1558 (match_operator 3 "comparison_operator"
1559 [(match_operand:VDQW 4 "s_register_operand" "")
1560 (match_operand:VDQW 5 "nonmemory_operand" "")])
1561 (match_operand:VDQW 1 "s_register_operand" "")
1562 (match_operand:VDQW 2 "s_register_operand" "")))]
1563 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1564 {
1565 int inverse = 0;
1566 int use_zero_form = 0;
1567 int swap_bsl_operands = 0;
1568 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1569 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1570
1571 rtx (*base_comparison) (rtx, rtx, rtx);
1572 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1573
1574 switch (GET_CODE (operands[3]))
1575 {
1576 case GE:
1577 case GT:
1578 case LE:
1579 case LT:
1580 case EQ:
1581 if (operands[5] == CONST0_RTX (<MODE>mode))
1582 {
1583 use_zero_form = 1;
1584 break;
1585 }
1586 /* Fall through. */
1587 default:
1588 if (!REG_P (operands[5]))
1589 operands[5] = force_reg (<MODE>mode, operands[5]);
1590 }
1591
1592 switch (GET_CODE (operands[3]))
1593 {
1594 case LT:
1595 case UNLT:
1596 inverse = 1;
1597 /* Fall through. */
1598 case GE:
1599 case UNGE:
1600 case ORDERED:
1601 case UNORDERED:
1602 base_comparison = gen_neon_vcge<mode>;
1603 complimentary_comparison = gen_neon_vcgt<mode>;
1604 break;
1605 case LE:
1606 case UNLE:
1607 inverse = 1;
1608 /* Fall through. */
1609 case GT:
1610 case UNGT:
1611 base_comparison = gen_neon_vcgt<mode>;
1612 complimentary_comparison = gen_neon_vcge<mode>;
1613 break;
1614 case EQ:
1615 case NE:
1616 case UNEQ:
1617 base_comparison = gen_neon_vceq<mode>;
1618 complimentary_comparison = gen_neon_vceq<mode>;
1619 break;
1620 default:
1621 gcc_unreachable ();
1622 }
1623
1624 switch (GET_CODE (operands[3]))
1625 {
1626 case LT:
1627 case LE:
1628 case GT:
1629 case GE:
1630 case EQ:
1631 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1632 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1633 a GE b -> a GE b
1634 a GT b -> a GT b
1635 a LE b -> b GE a
1636 a LT b -> b GT a
1637 a EQ b -> a EQ b
1638 Note that there also exist direct comparison against 0 forms,
1639 so catch those as a special case. */
1640 if (use_zero_form)
1641 {
1642 inverse = 0;
1643 switch (GET_CODE (operands[3]))
1644 {
1645 case LT:
1646 base_comparison = gen_neon_vclt<mode>;
1647 break;
1648 case LE:
1649 base_comparison = gen_neon_vcle<mode>;
1650 break;
1651 default:
1652 /* Do nothing, other zero form cases already have the correct
1653 base_comparison. */
1654 break;
1655 }
1656 }
1657
1658 if (!inverse)
1659 emit_insn (base_comparison (mask, operands[4], operands[5]));
1660 else
1661 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1662 break;
1663 case UNLT:
1664 case UNLE:
1665 case UNGT:
1666 case UNGE:
1667 case NE:
1668 /* Vector compare returns false for lanes which are unordered, so if we use
1669 the inverse of the comparison we actually want to emit, then
1670 swap the operands to BSL, we will end up with the correct result.
1671 Note that a NE NaN and NaN NE b are true for all a, b.
1672
1673 Our transformations are:
1674 a GE b -> !(b GT a)
1675 a GT b -> !(b GE a)
1676 a LE b -> !(a GT b)
1677 a LT b -> !(a GE b)
1678 a NE b -> !(a EQ b) */
1679
1680 if (inverse)
1681 emit_insn (base_comparison (mask, operands[4], operands[5]));
1682 else
1683 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1684
1685 swap_bsl_operands = 1;
1686 break;
1687 case UNEQ:
1688 /* We check (a > b || b > a). combining these comparisons give us
1689 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1690 will then give us (a == b || a UNORDERED b) as intended. */
1691
1692 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1693 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1694 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1695 swap_bsl_operands = 1;
1696 break;
1697 case UNORDERED:
1698 /* Operands are ORDERED iff (a > b || b >= a).
1699 Swapping the operands to BSL will give the UNORDERED case. */
1700 swap_bsl_operands = 1;
1701 /* Fall through. */
1702 case ORDERED:
1703 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1704 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1705 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1706 break;
1707 default:
1708 gcc_unreachable ();
1709 }
1710
1711 if (swap_bsl_operands)
1712 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1713 operands[1]));
1714 else
1715 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1716 operands[2]));
1717 DONE;
1718 })
1719
1720 (define_expand "vcondu<mode><mode>"
1721 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1722 (if_then_else:VDQIW
1723 (match_operator 3 "arm_comparison_operator"
1724 [(match_operand:VDQIW 4 "s_register_operand" "")
1725 (match_operand:VDQIW 5 "s_register_operand" "")])
1726 (match_operand:VDQIW 1 "s_register_operand" "")
1727 (match_operand:VDQIW 2 "s_register_operand" "")))]
1728 "TARGET_NEON"
1729 {
1730 rtx mask;
1731 int inverse = 0, immediate_zero = 0;
1732
1733 mask = gen_reg_rtx (<V_cmp_result>mode);
1734
1735 if (operands[5] == CONST0_RTX (<MODE>mode))
1736 immediate_zero = 1;
1737 else if (!REG_P (operands[5]))
1738 operands[5] = force_reg (<MODE>mode, operands[5]);
1739
1740 switch (GET_CODE (operands[3]))
1741 {
1742 case GEU:
1743 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1744 break;
1745
1746 case GTU:
1747 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1748 break;
1749
1750 case EQ:
1751 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1752 break;
1753
1754 case LEU:
1755 if (immediate_zero)
1756 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1757 else
1758 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1759 break;
1760
1761 case LTU:
1762 if (immediate_zero)
1763 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1764 else
1765 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1766 break;
1767
1768 case NE:
1769 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1770 inverse = 1;
1771 break;
1772
1773 default:
1774 gcc_unreachable ();
1775 }
1776
1777 if (inverse)
1778 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1779 operands[1]));
1780 else
1781 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1782 operands[2]));
1783
1784 DONE;
1785 })
1786
1787 ;; Patterns for builtins.
1788
1789 ; good for plain vadd, vaddq.
1790
1791 (define_expand "neon_vadd<mode>"
1792 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1793 (match_operand:VCVTF 1 "s_register_operand" "w")
1794 (match_operand:VCVTF 2 "s_register_operand" "w")]
1795 "TARGET_NEON"
1796 {
1797 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1798 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1799 else
1800 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1801 operands[2]));
1802 DONE;
1803 })
1804
1805 ; Note that NEON operations don't support the full IEEE 754 standard: in
1806 ; particular, denormal values are flushed to zero. This means that GCC cannot
1807 ; use those instructions for autovectorization, etc. unless
1808 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1809 ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
1810 ; header) must work in either case: if -funsafe-math-optimizations is given,
1811 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1812 ; expand to unspecs (which may potentially limit the extent to which they might
1813 ; be optimized by generic code).
1814
1815 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1816
1817 (define_insn "neon_vadd<mode>_unspec"
1818 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1819 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1820 (match_operand:VCVTF 2 "s_register_operand" "w")]
1821 UNSPEC_VADD))]
1822 "TARGET_NEON"
1823 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1824 [(set (attr "type")
1825 (if_then_else (match_test "<Is_float_mode>")
1826 (const_string "neon_fp_addsub_s<q>")
1827 (const_string "neon_add<q>")))]
1828 )
1829
1830 (define_insn "neon_vaddl<sup><mode>"
1831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1832 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1833 (match_operand:VDI 2 "s_register_operand" "w")]
1834 VADDL))]
1835 "TARGET_NEON"
1836 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1837 [(set_attr "type" "neon_add_long")]
1838 )
1839
1840 (define_insn "neon_vaddw<sup><mode>"
1841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1842 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1843 (match_operand:VDI 2 "s_register_operand" "w")]
1844 VADDW))]
1845 "TARGET_NEON"
1846 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1847 [(set_attr "type" "neon_add_widen")]
1848 )
1849
1850 ; vhadd and vrhadd.
1851
1852 (define_insn "neon_v<r>hadd<sup><mode>"
1853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1854 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1855 (match_operand:VDQIW 2 "s_register_operand" "w")]
1856 VHADD))]
1857 "TARGET_NEON"
1858 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1859 [(set_attr "type" "neon_add_halve_q")]
1860 )
1861
1862 (define_insn "neon_vqadd<sup><mode>"
1863 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1864 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1865 (match_operand:VDQIX 2 "s_register_operand" "w")]
1866 VQADD))]
1867 "TARGET_NEON"
1868 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1869 [(set_attr "type" "neon_qadd<q>")]
1870 )
1871
1872 (define_insn "neon_v<r>addhn<mode>"
1873 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1874 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1875 (match_operand:VN 2 "s_register_operand" "w")]
1876 VADDHN))]
1877 "TARGET_NEON"
1878 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1879 [(set_attr "type" "neon_add_halve_narrow_q")]
1880 )
1881
1882 ;; Polynomial and Float multiplication.
1883 (define_insn "neon_vmul<pf><mode>"
1884 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1885 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1886 (match_operand:VPF 2 "s_register_operand" "w")]
1887 UNSPEC_VMUL))]
1888 "TARGET_NEON"
1889 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1890 [(set (attr "type")
1891 (if_then_else (match_test "<Is_float_mode>")
1892 (const_string "neon_fp_mul_s<q>")
1893 (const_string "neon_mul_<V_elem_ch><q>")))]
1894 )
1895
1896 (define_expand "neon_vmla<mode>"
1897 [(match_operand:VDQW 0 "s_register_operand" "=w")
1898 (match_operand:VDQW 1 "s_register_operand" "0")
1899 (match_operand:VDQW 2 "s_register_operand" "w")
1900 (match_operand:VDQW 3 "s_register_operand" "w")]
1901 "TARGET_NEON"
1902 {
1903 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1904 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1905 operands[2], operands[3]));
1906 else
1907 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1908 operands[2], operands[3]));
1909 DONE;
1910 })
1911
1912 (define_expand "neon_vfma<VCVTF:mode>"
1913 [(match_operand:VCVTF 0 "s_register_operand")
1914 (match_operand:VCVTF 1 "s_register_operand")
1915 (match_operand:VCVTF 2 "s_register_operand")
1916 (match_operand:VCVTF 3 "s_register_operand")]
1917 "TARGET_NEON && TARGET_FMA"
1918 {
1919 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1920 operands[1]));
1921 DONE;
1922 })
1923
1924 (define_expand "neon_vfms<VCVTF:mode>"
1925 [(match_operand:VCVTF 0 "s_register_operand")
1926 (match_operand:VCVTF 1 "s_register_operand")
1927 (match_operand:VCVTF 2 "s_register_operand")
1928 (match_operand:VCVTF 3 "s_register_operand")]
1929 "TARGET_NEON && TARGET_FMA"
1930 {
1931 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1932 operands[1]));
1933 DONE;
1934 })
1935
1936 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1937
1938 (define_insn "neon_vmla<mode>_unspec"
1939 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1940 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1941 (match_operand:VDQW 2 "s_register_operand" "w")
1942 (match_operand:VDQW 3 "s_register_operand" "w")]
1943 UNSPEC_VMLA))]
1944 "TARGET_NEON"
1945 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1946 [(set (attr "type")
1947 (if_then_else (match_test "<Is_float_mode>")
1948 (const_string "neon_fp_mla_s<q>")
1949 (const_string "neon_mla_<V_elem_ch><q>")))]
1950 )
1951
1952 (define_insn "neon_vmlal<sup><mode>"
1953 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1954 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1955 (match_operand:VW 2 "s_register_operand" "w")
1956 (match_operand:VW 3 "s_register_operand" "w")]
1957 VMLAL))]
1958 "TARGET_NEON"
1959 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1960 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1961 )
1962
1963 (define_expand "neon_vmls<mode>"
1964 [(match_operand:VDQW 0 "s_register_operand" "=w")
1965 (match_operand:VDQW 1 "s_register_operand" "0")
1966 (match_operand:VDQW 2 "s_register_operand" "w")
1967 (match_operand:VDQW 3 "s_register_operand" "w")]
1968 "TARGET_NEON"
1969 {
1970 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1971 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
1972 operands[1], operands[2], operands[3]));
1973 else
1974 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
1975 operands[2], operands[3]));
1976 DONE;
1977 })
1978
1979 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1980
1981 (define_insn "neon_vmls<mode>_unspec"
1982 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1983 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1984 (match_operand:VDQW 2 "s_register_operand" "w")
1985 (match_operand:VDQW 3 "s_register_operand" "w")]
1986 UNSPEC_VMLS))]
1987 "TARGET_NEON"
1988 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1989 [(set (attr "type")
1990 (if_then_else (match_test "<Is_float_mode>")
1991 (const_string "neon_fp_mla_s<q>")
1992 (const_string "neon_mla_<V_elem_ch><q>")))]
1993 )
1994
1995 (define_insn "neon_vmlsl<sup><mode>"
1996 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1997 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1998 (match_operand:VW 2 "s_register_operand" "w")
1999 (match_operand:VW 3 "s_register_operand" "w")]
2000 VMLSL))]
2001 "TARGET_NEON"
2002 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2003 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2004 )
2005
2006 ;; vqdmulh, vqrdmulh
2007 (define_insn "neon_vq<r>dmulh<mode>"
2008 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2009 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2010 (match_operand:VMDQI 2 "s_register_operand" "w")]
2011 VQDMULH))]
2012 "TARGET_NEON"
2013 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2014 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2015 )
2016
2017 (define_insn "neon_vqdmlal<mode>"
2018 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2019 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2020 (match_operand:VMDI 2 "s_register_operand" "w")
2021 (match_operand:VMDI 3 "s_register_operand" "w")]
2022 UNSPEC_VQDMLAL))]
2023 "TARGET_NEON"
2024 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2025 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2026 )
2027
2028 (define_insn "neon_vqdmlsl<mode>"
2029 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2030 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2031 (match_operand:VMDI 2 "s_register_operand" "w")
2032 (match_operand:VMDI 3 "s_register_operand" "w")]
2033 UNSPEC_VQDMLSL))]
2034 "TARGET_NEON"
2035 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2036 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2037 )
2038
2039 (define_insn "neon_vmull<sup><mode>"
2040 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2041 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2042 (match_operand:VW 2 "s_register_operand" "w")]
2043 VMULL))]
2044 "TARGET_NEON"
2045 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2046 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2047 )
2048
2049 (define_insn "neon_vqdmull<mode>"
2050 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2051 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2052 (match_operand:VMDI 2 "s_register_operand" "w")]
2053 UNSPEC_VQDMULL))]
2054 "TARGET_NEON"
2055 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2056 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2057 )
2058
2059 (define_expand "neon_vsub<mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2061 (match_operand:VCVTF 1 "s_register_operand" "w")
2062 (match_operand:VCVTF 2 "s_register_operand" "w")]
2063 "TARGET_NEON"
2064 {
2065 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2066 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2067 else
2068 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2069 operands[2]));
2070 DONE;
2071 })
2072
2073 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2074
2075 (define_insn "neon_vsub<mode>_unspec"
2076 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2077 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2078 (match_operand:VCVTF 2 "s_register_operand" "w")]
2079 UNSPEC_VSUB))]
2080 "TARGET_NEON"
2081 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2082 [(set (attr "type")
2083 (if_then_else (match_test "<Is_float_mode>")
2084 (const_string "neon_fp_addsub_s<q>")
2085 (const_string "neon_sub<q>")))]
2086 )
2087
2088 (define_insn "neon_vsubl<sup><mode>"
2089 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2090 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2091 (match_operand:VDI 2 "s_register_operand" "w")]
2092 VSUBL))]
2093 "TARGET_NEON"
2094 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2095 [(set_attr "type" "neon_sub_long")]
2096 )
2097
2098 (define_insn "neon_vsubw<sup><mode>"
2099 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2100 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2101 (match_operand:VDI 2 "s_register_operand" "w")]
2102 VSUBW))]
2103 "TARGET_NEON"
2104 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2105 [(set_attr "type" "neon_sub_widen")]
2106 )
2107
2108 (define_insn "neon_vqsub<sup><mode>"
2109 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2110 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2111 (match_operand:VDQIX 2 "s_register_operand" "w")]
2112 VQSUB))]
2113 "TARGET_NEON"
2114 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2115 [(set_attr "type" "neon_qsub<q>")]
2116 )
2117
2118 (define_insn "neon_vhsub<sup><mode>"
2119 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2120 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2121 (match_operand:VDQIW 2 "s_register_operand" "w")]
2122 VHSUB))]
2123 "TARGET_NEON"
2124 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2125 [(set_attr "type" "neon_sub_halve<q>")]
2126 )
2127
2128 (define_insn "neon_v<r>subhn<mode>"
2129 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2130 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2131 (match_operand:VN 2 "s_register_operand" "w")]
2132 VSUBHN))]
2133 "TARGET_NEON"
2134 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2135 [(set_attr "type" "neon_sub_halve_narrow_q")]
2136 )
2137
2138 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2139 ;; without unsafe math optimizations.
2140 (define_expand "neon_vc<cmp_op><mode>"
2141 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2142 (neg:<V_cmp_result>
2143 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2144 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2145 "TARGET_NEON"
2146 {
2147 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2148 are enabled. */
2149 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2150 && !flag_unsafe_math_optimizations)
2151 {
2152 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2153 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2154 whereas this expander iterates over the integer modes as well,
2155 but we will never expand to UNSPECs for the integer comparisons. */
2156 switch (<MODE>mode)
2157 {
2158 case V2SFmode:
2159 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2160 operands[1],
2161 operands[2]));
2162 break;
2163 case V4SFmode:
2164 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2165 operands[1],
2166 operands[2]));
2167 break;
2168 default:
2169 gcc_unreachable ();
2170 }
2171 }
2172 else
2173 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2174 operands[1],
2175 operands[2]));
2176 DONE;
2177 }
2178 )
2179
2180 (define_insn "neon_vc<cmp_op><mode>_insn"
2181 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2182 (neg:<V_cmp_result>
2183 (COMPARISONS:<V_cmp_result>
2184 (match_operand:VDQW 1 "s_register_operand" "w,w")
2185 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2186 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2187 && !flag_unsafe_math_optimizations)"
2188 {
2189 char pattern[100];
2190 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2191 " %%<V_reg>1, %s",
2192 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2193 ? "f" : "<cmp_type>",
2194 which_alternative == 0
2195 ? "%<V_reg>2" : "#0");
2196 output_asm_insn (pattern, operands);
2197 return "";
2198 }
2199 [(set (attr "type")
2200 (if_then_else (match_operand 2 "zero_operand")
2201 (const_string "neon_compare_zero<q>")
2202 (const_string "neon_compare<q>")))]
2203 )
2204
2205 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2206 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2207 (unspec:<V_cmp_result>
2208 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2209 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2210 NEON_VCMP))]
2211 "TARGET_NEON"
2212 {
2213 char pattern[100];
2214 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2215 " %%<V_reg>1, %s",
2216 which_alternative == 0
2217 ? "%<V_reg>2" : "#0");
2218 output_asm_insn (pattern, operands);
2219 return "";
2220 }
2221 [(set_attr "type" "neon_fp_compare_s<q>")]
2222 )
2223
2224 (define_insn "neon_vc<cmp_op>u<mode>"
2225 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2226 (neg:<V_cmp_result>
2227 (GTUGEU:<V_cmp_result>
2228 (match_operand:VDQIW 1 "s_register_operand" "w")
2229 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2230 "TARGET_NEON"
2231 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2232 [(set_attr "type" "neon_compare<q>")]
2233 )
2234
2235 (define_expand "neon_vca<cmp_op><mode>"
2236 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2237 (neg:<V_cmp_result>
2238 (GTGE:<V_cmp_result>
2239 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2240 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2241 "TARGET_NEON"
2242 {
2243 if (flag_unsafe_math_optimizations)
2244 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2245 operands[2]));
2246 else
2247 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2248 operands[1],
2249 operands[2]));
2250 DONE;
2251 }
2252 )
2253
2254 (define_insn "neon_vca<cmp_op><mode>_insn"
2255 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2256 (neg:<V_cmp_result>
2257 (GTGE:<V_cmp_result>
2258 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2259 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2260 "TARGET_NEON && flag_unsafe_math_optimizations"
2261 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2262 [(set_attr "type" "neon_fp_compare_s<q>")]
2263 )
2264
2265 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2266 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2267 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2268 (match_operand:VCVTF 2 "s_register_operand" "w")]
2269 NEON_VACMP))]
2270 "TARGET_NEON"
2271 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2272 [(set_attr "type" "neon_fp_compare_s<q>")]
2273 )
2274
2275 (define_insn "neon_vtst<mode>"
2276 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2277 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2278 (match_operand:VDQIW 2 "s_register_operand" "w")]
2279 UNSPEC_VTST))]
2280 "TARGET_NEON"
2281 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2282 [(set_attr "type" "neon_tst<q>")]
2283 )
2284
2285 (define_insn "neon_vabd<sup><mode>"
2286 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2287 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2288 (match_operand:VDQIW 2 "s_register_operand" "w")]
2289 VABD))]
2290 "TARGET_NEON"
2291 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2292 [(set_attr "type" "neon_abd<q>")]
2293 )
2294
2295 (define_insn "neon_vabdf<mode>"
2296 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2297 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2298 (match_operand:VCVTF 2 "s_register_operand" "w")]
2299 UNSPEC_VABD_F))]
2300 "TARGET_NEON"
2301 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2302 [(set_attr "type" "neon_fp_abd_s<q>")]
2303 )
2304
2305 (define_insn "neon_vabdl<sup><mode>"
2306 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2307 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2308 (match_operand:VW 2 "s_register_operand" "w")]
2309 VABDL))]
2310 "TARGET_NEON"
2311 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2312 [(set_attr "type" "neon_abd_long")]
2313 )
2314
2315 (define_insn "neon_vaba<sup><mode>"
2316 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2317 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2318 (match_operand:VDQIW 3 "s_register_operand" "w")]
2319 VABD)
2320 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2321 "TARGET_NEON"
2322 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2323 [(set_attr "type" "neon_arith_acc<q>")]
2324 )
2325
2326 (define_insn "neon_vabal<sup><mode>"
2327 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2328 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2329 (match_operand:VW 3 "s_register_operand" "w")]
2330 VABDL)
2331 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2332 "TARGET_NEON"
2333 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2334 [(set_attr "type" "neon_arith_acc<q>")]
2335 )
2336
2337 (define_insn "neon_v<maxmin><sup><mode>"
2338 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2339 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2340 (match_operand:VDQIW 2 "s_register_operand" "w")]
2341 VMAXMIN))]
2342 "TARGET_NEON"
2343 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2344 [(set_attr "type" "neon_minmax<q>")]
2345 )
2346
2347 (define_insn "neon_v<maxmin>f<mode>"
2348 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2349 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2350 (match_operand:VCVTF 2 "s_register_operand" "w")]
2351 VMAXMINF))]
2352 "TARGET_NEON"
2353 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2354 [(set_attr "type" "neon_fp_minmax_s<q>")]
2355 )
2356
2357 (define_expand "neon_vpadd<mode>"
2358 [(match_operand:VD 0 "s_register_operand" "=w")
2359 (match_operand:VD 1 "s_register_operand" "w")
2360 (match_operand:VD 2 "s_register_operand" "w")]
2361 "TARGET_NEON"
2362 {
2363 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2364 operands[2]));
2365 DONE;
2366 })
2367
2368 (define_insn "neon_vpaddl<sup><mode>"
2369 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2370 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2371 VPADDL))]
2372 "TARGET_NEON"
2373 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2374 [(set_attr "type" "neon_reduc_add_long")]
2375 )
2376
2377 (define_insn "neon_vpadal<sup><mode>"
2378 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2379 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2380 (match_operand:VDQIW 2 "s_register_operand" "w")]
2381 VPADAL))]
2382 "TARGET_NEON"
2383 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2384 [(set_attr "type" "neon_reduc_add_acc")]
2385 )
2386
2387 (define_insn "neon_vp<maxmin><sup><mode>"
2388 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2389 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2390 (match_operand:VDI 2 "s_register_operand" "w")]
2391 VPMAXMIN))]
2392 "TARGET_NEON"
2393 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2394 [(set_attr "type" "neon_reduc_minmax<q>")]
2395 )
2396
2397 (define_insn "neon_vp<maxmin>f<mode>"
2398 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2399 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2400 (match_operand:VCVTF 2 "s_register_operand" "w")]
2401 VPMAXMINF))]
2402 "TARGET_NEON"
2403 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2404 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2405 )
2406
2407 (define_insn "neon_vrecps<mode>"
2408 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2409 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2410 (match_operand:VCVTF 2 "s_register_operand" "w")]
2411 UNSPEC_VRECPS))]
2412 "TARGET_NEON"
2413 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2414 [(set_attr "type" "neon_fp_recps_s<q>")]
2415 )
2416
2417 (define_insn "neon_vrsqrts<mode>"
2418 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2419 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2420 (match_operand:VCVTF 2 "s_register_operand" "w")]
2421 UNSPEC_VRSQRTS))]
2422 "TARGET_NEON"
2423 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2424 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2425 )
2426
2427 (define_expand "neon_vabs<mode>"
2428 [(match_operand:VDQW 0 "s_register_operand" "")
2429 (match_operand:VDQW 1 "s_register_operand" "")]
2430 "TARGET_NEON"
2431 {
2432 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2433 DONE;
2434 })
2435
2436 (define_insn "neon_vqabs<mode>"
2437 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2438 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2439 UNSPEC_VQABS))]
2440 "TARGET_NEON"
2441 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2442 [(set_attr "type" "neon_qabs<q>")]
2443 )
2444
2445 (define_insn "neon_bswap<mode>"
2446 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2447 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2448 "TARGET_NEON"
2449 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2450 [(set_attr "type" "neon_rev<q>")]
2451 )
2452
2453 (define_expand "neon_vneg<mode>"
2454 [(match_operand:VDQW 0 "s_register_operand" "")
2455 (match_operand:VDQW 1 "s_register_operand" "")]
2456 "TARGET_NEON"
2457 {
2458 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2459 DONE;
2460 })
2461
2462 (define_expand "neon_copysignf<mode>"
2463 [(match_operand:VCVTF 0 "register_operand")
2464 (match_operand:VCVTF 1 "register_operand")
2465 (match_operand:VCVTF 2 "register_operand")]
2466 "TARGET_NEON"
2467 "{
2468 rtx v_bitmask_cast;
2469 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2470 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2471 rtvec v = rtvec_alloc (n_elt);
2472
2473 /* Create bitmask for vector select. */
2474 for (i = 0; i < n_elt; ++i)
2475 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2476
2477 emit_move_insn (v_bitmask,
2478 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2479 emit_move_insn (operands[0], operands[2]);
2480 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2481 <VCVTF:V_cmp_result>mode, 0);
2482 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2483 operands[1]));
2484
2485 DONE;
2486 }"
2487 )
2488
2489 (define_insn "neon_vqneg<mode>"
2490 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2491 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2492 UNSPEC_VQNEG))]
2493 "TARGET_NEON"
2494 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2495 [(set_attr "type" "neon_qneg<q>")]
2496 )
2497
2498 (define_insn "neon_vcls<mode>"
2499 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2500 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2501 UNSPEC_VCLS))]
2502 "TARGET_NEON"
2503 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2504 [(set_attr "type" "neon_cls<q>")]
2505 )
2506
2507 (define_insn "clz<mode>2"
2508 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2509 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2510 "TARGET_NEON"
2511 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2512 [(set_attr "type" "neon_cnt<q>")]
2513 )
2514
2515 (define_expand "neon_vclz<mode>"
2516 [(match_operand:VDQIW 0 "s_register_operand" "")
2517 (match_operand:VDQIW 1 "s_register_operand" "")]
2518 "TARGET_NEON"
2519 {
2520 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2521 DONE;
2522 })
2523
2524 (define_insn "popcount<mode>2"
2525 [(set (match_operand:VE 0 "s_register_operand" "=w")
2526 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2527 "TARGET_NEON"
2528 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2529 [(set_attr "type" "neon_cnt<q>")]
2530 )
2531
2532 (define_expand "neon_vcnt<mode>"
2533 [(match_operand:VE 0 "s_register_operand" "=w")
2534 (match_operand:VE 1 "s_register_operand" "w")]
2535 "TARGET_NEON"
2536 {
2537 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2538 DONE;
2539 })
2540
2541 (define_insn "neon_vrecpe<mode>"
2542 [(set (match_operand:V32 0 "s_register_operand" "=w")
2543 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2544 UNSPEC_VRECPE))]
2545 "TARGET_NEON"
2546 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2547 [(set_attr "type" "neon_fp_recpe_s<q>")]
2548 )
2549
2550 (define_insn "neon_vrsqrte<mode>"
2551 [(set (match_operand:V32 0 "s_register_operand" "=w")
2552 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2553 UNSPEC_VRSQRTE))]
2554 "TARGET_NEON"
2555 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2556 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2557 )
2558
2559 (define_expand "neon_vmvn<mode>"
2560 [(match_operand:VDQIW 0 "s_register_operand" "")
2561 (match_operand:VDQIW 1 "s_register_operand" "")]
2562 "TARGET_NEON"
2563 {
2564 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2565 DONE;
2566 })
2567
2568 (define_insn "neon_vget_lane<mode>_sext_internal"
2569 [(set (match_operand:SI 0 "s_register_operand" "=r")
2570 (sign_extend:SI
2571 (vec_select:<V_elem>
2572 (match_operand:VD 1 "s_register_operand" "w")
2573 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2574 "TARGET_NEON"
2575 {
2576 if (BYTES_BIG_ENDIAN)
2577 {
2578 int elt = INTVAL (operands[2]);
2579 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2580 operands[2] = GEN_INT (elt);
2581 }
2582 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2583 }
2584 [(set_attr "type" "neon_to_gp")]
2585 )
2586
2587 (define_insn "neon_vget_lane<mode>_zext_internal"
2588 [(set (match_operand:SI 0 "s_register_operand" "=r")
2589 (zero_extend:SI
2590 (vec_select:<V_elem>
2591 (match_operand:VD 1 "s_register_operand" "w")
2592 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2593 "TARGET_NEON"
2594 {
2595 if (BYTES_BIG_ENDIAN)
2596 {
2597 int elt = INTVAL (operands[2]);
2598 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2599 operands[2] = GEN_INT (elt);
2600 }
2601 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2602 }
2603 [(set_attr "type" "neon_to_gp")]
2604 )
2605
2606 (define_insn "neon_vget_lane<mode>_sext_internal"
2607 [(set (match_operand:SI 0 "s_register_operand" "=r")
2608 (sign_extend:SI
2609 (vec_select:<V_elem>
2610 (match_operand:VQ2 1 "s_register_operand" "w")
2611 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2612 "TARGET_NEON"
2613 {
2614 rtx ops[3];
2615 int regno = REGNO (operands[1]);
2616 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2617 unsigned int elt = INTVAL (operands[2]);
2618 unsigned int elt_adj = elt % halfelts;
2619
2620 if (BYTES_BIG_ENDIAN)
2621 elt_adj = halfelts - 1 - elt_adj;
2622
2623 ops[0] = operands[0];
2624 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2625 ops[2] = GEN_INT (elt_adj);
2626 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2627
2628 return "";
2629 }
2630 [(set_attr "type" "neon_to_gp_q")]
2631 )
2632
2633 (define_insn "neon_vget_lane<mode>_zext_internal"
2634 [(set (match_operand:SI 0 "s_register_operand" "=r")
2635 (zero_extend:SI
2636 (vec_select:<V_elem>
2637 (match_operand:VQ2 1 "s_register_operand" "w")
2638 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2639 "TARGET_NEON"
2640 {
2641 rtx ops[3];
2642 int regno = REGNO (operands[1]);
2643 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2644 unsigned int elt = INTVAL (operands[2]);
2645 unsigned int elt_adj = elt % halfelts;
2646
2647 if (BYTES_BIG_ENDIAN)
2648 elt_adj = halfelts - 1 - elt_adj;
2649
2650 ops[0] = operands[0];
2651 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2652 ops[2] = GEN_INT (elt_adj);
2653 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2654
2655 return "";
2656 }
2657 [(set_attr "type" "neon_to_gp_q")]
2658 )
2659
2660 (define_expand "neon_vget_lane<mode>"
2661 [(match_operand:<V_ext> 0 "s_register_operand" "")
2662 (match_operand:VDQW 1 "s_register_operand" "")
2663 (match_operand:SI 2 "immediate_operand" "")]
2664 "TARGET_NEON"
2665 {
2666 if (BYTES_BIG_ENDIAN)
2667 {
2668 /* The intrinsics are defined in terms of a model where the
2669 element ordering in memory is vldm order, whereas the generic
2670 RTL is defined in terms of a model where the element ordering
2671 in memory is array order. Convert the lane number to conform
2672 to this model. */
2673 unsigned int elt = INTVAL (operands[2]);
2674 unsigned int reg_nelts
2675 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2676 elt ^= reg_nelts - 1;
2677 operands[2] = GEN_INT (elt);
2678 }
2679
2680 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2681 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2682 else
2683 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2684 operands[1],
2685 operands[2]));
2686 DONE;
2687 })
2688
2689 (define_expand "neon_vget_laneu<mode>"
2690 [(match_operand:<V_ext> 0 "s_register_operand" "")
2691 (match_operand:VDQIW 1 "s_register_operand" "")
2692 (match_operand:SI 2 "immediate_operand" "")]
2693 "TARGET_NEON"
2694 {
2695 if (BYTES_BIG_ENDIAN)
2696 {
2697 /* The intrinsics are defined in terms of a model where the
2698 element ordering in memory is vldm order, whereas the generic
2699 RTL is defined in terms of a model where the element ordering
2700 in memory is array order. Convert the lane number to conform
2701 to this model. */
2702 unsigned int elt = INTVAL (operands[2]);
2703 unsigned int reg_nelts
2704 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2705 elt ^= reg_nelts - 1;
2706 operands[2] = GEN_INT (elt);
2707 }
2708
2709 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2710 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2711 else
2712 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2713 operands[1],
2714 operands[2]));
2715 DONE;
2716 })
2717
2718 (define_expand "neon_vget_lanedi"
2719 [(match_operand:DI 0 "s_register_operand" "=r")
2720 (match_operand:DI 1 "s_register_operand" "w")
2721 (match_operand:SI 2 "immediate_operand" "")]
2722 "TARGET_NEON"
2723 {
2724 emit_move_insn (operands[0], operands[1]);
2725 DONE;
2726 })
2727
2728 (define_expand "neon_vget_lanev2di"
2729 [(match_operand:DI 0 "s_register_operand" "")
2730 (match_operand:V2DI 1 "s_register_operand" "")
2731 (match_operand:SI 2 "immediate_operand" "")]
2732 "TARGET_NEON"
2733 {
2734 int lane;
2735
2736 if (BYTES_BIG_ENDIAN)
2737 {
2738 /* The intrinsics are defined in terms of a model where the
2739 element ordering in memory is vldm order, whereas the generic
2740 RTL is defined in terms of a model where the element ordering
2741 in memory is array order. Convert the lane number to conform
2742 to this model. */
2743 unsigned int elt = INTVAL (operands[2]);
2744 unsigned int reg_nelts = 2;
2745 elt ^= reg_nelts - 1;
2746 operands[2] = GEN_INT (elt);
2747 }
2748
2749 lane = INTVAL (operands[2]);
2750 gcc_assert ((lane ==0) || (lane == 1));
2751 emit_move_insn (operands[0], lane == 0
2752 ? gen_lowpart (DImode, operands[1])
2753 : gen_highpart (DImode, operands[1]));
2754 DONE;
2755 })
2756
2757 (define_expand "neon_vset_lane<mode>"
2758 [(match_operand:VDQ 0 "s_register_operand" "=w")
2759 (match_operand:<V_elem> 1 "s_register_operand" "r")
2760 (match_operand:VDQ 2 "s_register_operand" "0")
2761 (match_operand:SI 3 "immediate_operand" "i")]
2762 "TARGET_NEON"
2763 {
2764 unsigned int elt = INTVAL (operands[3]);
2765
2766 if (BYTES_BIG_ENDIAN)
2767 {
2768 unsigned int reg_nelts
2769 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2770 elt ^= reg_nelts - 1;
2771 }
2772
2773 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2774 GEN_INT (1 << elt), operands[2]));
2775 DONE;
2776 })
2777
2778 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2779
2780 (define_expand "neon_vset_lanedi"
2781 [(match_operand:DI 0 "s_register_operand" "=w")
2782 (match_operand:DI 1 "s_register_operand" "r")
2783 (match_operand:DI 2 "s_register_operand" "0")
2784 (match_operand:SI 3 "immediate_operand" "i")]
2785 "TARGET_NEON"
2786 {
2787 emit_move_insn (operands[0], operands[1]);
2788 DONE;
2789 })
2790
2791 (define_expand "neon_vcreate<mode>"
2792 [(match_operand:VD_RE 0 "s_register_operand" "")
2793 (match_operand:DI 1 "general_operand" "")]
2794 "TARGET_NEON"
2795 {
2796 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2797 emit_move_insn (operands[0], src);
2798 DONE;
2799 })
2800
2801 (define_insn "neon_vdup_n<mode>"
2802 [(set (match_operand:VX 0 "s_register_operand" "=w")
2803 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2804 "TARGET_NEON"
2805 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2806 [(set_attr "type" "neon_from_gp<q>")]
2807 )
2808
2809 (define_insn "neon_vdup_n<mode>"
2810 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2811 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2812 "TARGET_NEON"
2813 "@
2814 vdup.<V_sz_elem>\t%<V_reg>0, %1
2815 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2816 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2817 )
2818
2819 (define_expand "neon_vdup_ndi"
2820 [(match_operand:DI 0 "s_register_operand" "=w")
2821 (match_operand:DI 1 "s_register_operand" "r")]
2822 "TARGET_NEON"
2823 {
2824 emit_move_insn (operands[0], operands[1]);
2825 DONE;
2826 }
2827 )
2828
2829 (define_insn "neon_vdup_nv2di"
2830 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2831 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2832 "TARGET_NEON"
2833 "@
2834 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2835 vmov\t%e0, %P1\;vmov\t%f0, %P1"
2836 [(set_attr "length" "8")
2837 (set_attr "type" "multiple")]
2838 )
2839
2840 (define_insn "neon_vdup_lane<mode>_internal"
2841 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2842 (vec_duplicate:VDQW
2843 (vec_select:<V_elem>
2844 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2845 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2846 "TARGET_NEON"
2847 {
2848 if (BYTES_BIG_ENDIAN)
2849 {
2850 int elt = INTVAL (operands[2]);
2851 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2852 operands[2] = GEN_INT (elt);
2853 }
2854 if (<Is_d_reg>)
2855 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2856 else
2857 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2858 }
2859 [(set_attr "type" "neon_dup<q>")]
2860 )
2861
2862 (define_expand "neon_vdup_lane<mode>"
2863 [(match_operand:VDQW 0 "s_register_operand" "=w")
2864 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2865 (match_operand:SI 2 "immediate_operand" "i")]
2866 "TARGET_NEON"
2867 {
2868 if (BYTES_BIG_ENDIAN)
2869 {
2870 unsigned int elt = INTVAL (operands[2]);
2871 unsigned int reg_nelts
2872 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2873 elt ^= reg_nelts - 1;
2874 operands[2] = GEN_INT (elt);
2875 }
2876 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2877 operands[2]));
2878 DONE;
2879 })
2880
2881 ; Scalar index is ignored, since only zero is valid here.
2882 (define_expand "neon_vdup_lanedi"
2883 [(match_operand:DI 0 "s_register_operand" "=w")
2884 (match_operand:DI 1 "s_register_operand" "w")
2885 (match_operand:SI 2 "immediate_operand" "i")]
2886 "TARGET_NEON"
2887 {
2888 emit_move_insn (operands[0], operands[1]);
2889 DONE;
2890 })
2891
2892 ; Likewise for v2di, as the DImode second operand has only a single element.
2893 (define_expand "neon_vdup_lanev2di"
2894 [(match_operand:V2DI 0 "s_register_operand" "=w")
2895 (match_operand:DI 1 "s_register_operand" "w")
2896 (match_operand:SI 2 "immediate_operand" "i")]
2897 "TARGET_NEON"
2898 {
2899 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2900 DONE;
2901 })
2902
2903 ; Disabled before reload because we don't want combine doing something silly,
2904 ; but used by the post-reload expansion of neon_vcombine.
2905 (define_insn "*neon_vswp<mode>"
2906 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2907 (match_operand:VDQX 1 "s_register_operand" "+w"))
2908 (set (match_dup 1) (match_dup 0))]
2909 "TARGET_NEON && reload_completed"
2910 "vswp\t%<V_reg>0, %<V_reg>1"
2911 [(set_attr "type" "neon_permute<q>")]
2912 )
2913
2914 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2915 ;; dest vector.
2916 ;; FIXME: A different implementation of this builtin could make it much
2917 ;; more likely that we wouldn't actually need to output anything (we could make
2918 ;; it so that the reg allocator puts things in the right places magically
2919 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
2920
2921 (define_insn_and_split "neon_vcombine<mode>"
2922 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2923 (vec_concat:<V_DOUBLE>
2924 (match_operand:VDX 1 "s_register_operand" "w")
2925 (match_operand:VDX 2 "s_register_operand" "w")))]
2926 "TARGET_NEON"
2927 "#"
2928 "&& reload_completed"
2929 [(const_int 0)]
2930 {
2931 neon_split_vcombine (operands);
2932 DONE;
2933 }
2934 [(set_attr "type" "multiple")]
2935 )
2936
2937 (define_expand "neon_vget_high<mode>"
2938 [(match_operand:<V_HALF> 0 "s_register_operand")
2939 (match_operand:VQX 1 "s_register_operand")]
2940 "TARGET_NEON"
2941 {
2942 emit_move_insn (operands[0],
2943 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
2944 GET_MODE_SIZE (<V_HALF>mode)));
2945 DONE;
2946 })
2947
2948 (define_expand "neon_vget_low<mode>"
2949 [(match_operand:<V_HALF> 0 "s_register_operand")
2950 (match_operand:VQX 1 "s_register_operand")]
2951 "TARGET_NEON"
2952 {
2953 emit_move_insn (operands[0],
2954 simplify_gen_subreg (<V_HALF>mode, operands[1],
2955 <MODE>mode, 0));
2956 DONE;
2957 })
2958
2959 (define_insn "float<mode><V_cvtto>2"
2960 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2961 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2962 "TARGET_NEON && !flag_rounding_math"
2963 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
2964 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2965 )
2966
2967 (define_insn "floatuns<mode><V_cvtto>2"
2968 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2969 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
2970 "TARGET_NEON && !flag_rounding_math"
2971 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
2972 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
2973 )
2974
2975 (define_insn "fix_trunc<mode><V_cvtto>2"
2976 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2977 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2978 "TARGET_NEON"
2979 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
2980 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2981 )
2982
2983 (define_insn "fixuns_trunc<mode><V_cvtto>2"
2984 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2985 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
2986 "TARGET_NEON"
2987 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
2988 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2989 )
2990
2991 (define_insn "neon_vcvt<sup><mode>"
2992 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
2993 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
2994 VCVT_US))]
2995 "TARGET_NEON"
2996 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
2997 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
2998 )
2999
3000 (define_insn "neon_vcvt<sup><mode>"
3001 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3002 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3003 VCVT_US))]
3004 "TARGET_NEON"
3005 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3006 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3007 )
3008
3009 (define_insn "neon_vcvtv4sfv4hf"
3010 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3011 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3012 UNSPEC_VCVT))]
3013 "TARGET_NEON && TARGET_FP16"
3014 "vcvt.f32.f16\t%q0, %P1"
3015 [(set_attr "type" "neon_fp_cvt_widen_h")]
3016 )
3017
3018 (define_insn "neon_vcvtv4hfv4sf"
3019 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3020 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3021 UNSPEC_VCVT))]
3022 "TARGET_NEON && TARGET_FP16"
3023 "vcvt.f16.f32\t%P0, %q1"
3024 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3025 )
3026
3027 (define_insn "neon_vcvt<sup>_n<mode>"
3028 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3029 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3030 (match_operand:SI 2 "immediate_operand" "i")]
3031 VCVT_US_N))]
3032 "TARGET_NEON"
3033 {
3034 neon_const_bounds (operands[2], 1, 33);
3035 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3036 }
3037 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3038 )
3039
3040 (define_insn "neon_vcvt<sup>_n<mode>"
3041 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3042 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3043 (match_operand:SI 2 "immediate_operand" "i")]
3044 VCVT_US_N))]
3045 "TARGET_NEON"
3046 {
3047 neon_const_bounds (operands[2], 1, 33);
3048 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3049 }
3050 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3051 )
3052
3053 (define_insn "neon_vmovn<mode>"
3054 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3055 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3056 UNSPEC_VMOVN))]
3057 "TARGET_NEON"
3058 "vmovn.<V_if_elem>\t%P0, %q1"
3059 [(set_attr "type" "neon_shift_imm_narrow_q")]
3060 )
3061
3062 (define_insn "neon_vqmovn<sup><mode>"
3063 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3064 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3065 VQMOVN))]
3066 "TARGET_NEON"
3067 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3068 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3069 )
3070
3071 (define_insn "neon_vqmovun<mode>"
3072 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3073 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3074 UNSPEC_VQMOVUN))]
3075 "TARGET_NEON"
3076 "vqmovun.<V_s_elem>\t%P0, %q1"
3077 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3078 )
3079
3080 (define_insn "neon_vmovl<sup><mode>"
3081 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3082 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3083 VMOVL))]
3084 "TARGET_NEON"
3085 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3086 [(set_attr "type" "neon_shift_imm_long")]
3087 )
3088
3089 (define_insn "neon_vmul_lane<mode>"
3090 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3091 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3092 (match_operand:VMD 2 "s_register_operand"
3093 "<scalar_mul_constraint>")
3094 (match_operand:SI 3 "immediate_operand" "i")]
3095 UNSPEC_VMUL_LANE))]
3096 "TARGET_NEON"
3097 {
3098 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3099 }
3100 [(set (attr "type")
3101 (if_then_else (match_test "<Is_float_mode>")
3102 (const_string "neon_fp_mul_s_scalar<q>")
3103 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3104 )
3105
3106 (define_insn "neon_vmul_lane<mode>"
3107 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3108 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3109 (match_operand:<V_HALF> 2 "s_register_operand"
3110 "<scalar_mul_constraint>")
3111 (match_operand:SI 3 "immediate_operand" "i")]
3112 UNSPEC_VMUL_LANE))]
3113 "TARGET_NEON"
3114 {
3115 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3116 }
3117 [(set (attr "type")
3118 (if_then_else (match_test "<Is_float_mode>")
3119 (const_string "neon_fp_mul_s_scalar<q>")
3120 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3121 )
3122
3123 (define_insn "neon_vmull<sup>_lane<mode>"
3124 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3125 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3126 (match_operand:VMDI 2 "s_register_operand"
3127 "<scalar_mul_constraint>")
3128 (match_operand:SI 3 "immediate_operand" "i")]
3129 VMULL_LANE))]
3130 "TARGET_NEON"
3131 {
3132 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3133 }
3134 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3135 )
3136
3137 (define_insn "neon_vqdmull_lane<mode>"
3138 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3139 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3140 (match_operand:VMDI 2 "s_register_operand"
3141 "<scalar_mul_constraint>")
3142 (match_operand:SI 3 "immediate_operand" "i")]
3143 UNSPEC_VQDMULL_LANE))]
3144 "TARGET_NEON"
3145 {
3146 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3147 }
3148 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3149 )
3150
3151 (define_insn "neon_vq<r>dmulh_lane<mode>"
3152 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3153 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3154 (match_operand:<V_HALF> 2 "s_register_operand"
3155 "<scalar_mul_constraint>")
3156 (match_operand:SI 3 "immediate_operand" "i")]
3157 VQDMULH_LANE))]
3158 "TARGET_NEON"
3159 {
3160 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3161 }
3162 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3163 )
3164
3165 (define_insn "neon_vq<r>dmulh_lane<mode>"
3166 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3167 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3168 (match_operand:VMDI 2 "s_register_operand"
3169 "<scalar_mul_constraint>")
3170 (match_operand:SI 3 "immediate_operand" "i")]
3171 VQDMULH_LANE))]
3172 "TARGET_NEON"
3173 {
3174 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3175 }
3176 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3177 )
3178
3179 (define_insn "neon_vmla_lane<mode>"
3180 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3181 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3182 (match_operand:VMD 2 "s_register_operand" "w")
3183 (match_operand:VMD 3 "s_register_operand"
3184 "<scalar_mul_constraint>")
3185 (match_operand:SI 4 "immediate_operand" "i")]
3186 UNSPEC_VMLA_LANE))]
3187 "TARGET_NEON"
3188 {
3189 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3190 }
3191 [(set (attr "type")
3192 (if_then_else (match_test "<Is_float_mode>")
3193 (const_string "neon_fp_mla_s_scalar<q>")
3194 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3195 )
3196
3197 (define_insn "neon_vmla_lane<mode>"
3198 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3199 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3200 (match_operand:VMQ 2 "s_register_operand" "w")
3201 (match_operand:<V_HALF> 3 "s_register_operand"
3202 "<scalar_mul_constraint>")
3203 (match_operand:SI 4 "immediate_operand" "i")]
3204 UNSPEC_VMLA_LANE))]
3205 "TARGET_NEON"
3206 {
3207 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3208 }
3209 [(set (attr "type")
3210 (if_then_else (match_test "<Is_float_mode>")
3211 (const_string "neon_fp_mla_s_scalar<q>")
3212 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3213 )
3214
3215 (define_insn "neon_vmlal<sup>_lane<mode>"
3216 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3217 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3218 (match_operand:VMDI 2 "s_register_operand" "w")
3219 (match_operand:VMDI 3 "s_register_operand"
3220 "<scalar_mul_constraint>")
3221 (match_operand:SI 4 "immediate_operand" "i")]
3222 VMLAL_LANE))]
3223 "TARGET_NEON"
3224 {
3225 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3226 }
3227 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3228 )
3229
3230 (define_insn "neon_vqdmlal_lane<mode>"
3231 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3232 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3233 (match_operand:VMDI 2 "s_register_operand" "w")
3234 (match_operand:VMDI 3 "s_register_operand"
3235 "<scalar_mul_constraint>")
3236 (match_operand:SI 4 "immediate_operand" "i")]
3237 UNSPEC_VQDMLAL_LANE))]
3238 "TARGET_NEON"
3239 {
3240 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3241 }
3242 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3243 )
3244
3245 (define_insn "neon_vmls_lane<mode>"
3246 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3247 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3248 (match_operand:VMD 2 "s_register_operand" "w")
3249 (match_operand:VMD 3 "s_register_operand"
3250 "<scalar_mul_constraint>")
3251 (match_operand:SI 4 "immediate_operand" "i")]
3252 UNSPEC_VMLS_LANE))]
3253 "TARGET_NEON"
3254 {
3255 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3256 }
3257 [(set (attr "type")
3258 (if_then_else (match_test "<Is_float_mode>")
3259 (const_string "neon_fp_mla_s_scalar<q>")
3260 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3261 )
3262
3263 (define_insn "neon_vmls_lane<mode>"
3264 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3265 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3266 (match_operand:VMQ 2 "s_register_operand" "w")
3267 (match_operand:<V_HALF> 3 "s_register_operand"
3268 "<scalar_mul_constraint>")
3269 (match_operand:SI 4 "immediate_operand" "i")]
3270 UNSPEC_VMLS_LANE))]
3271 "TARGET_NEON"
3272 {
3273 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3274 }
3275 [(set (attr "type")
3276 (if_then_else (match_test "<Is_float_mode>")
3277 (const_string "neon_fp_mla_s_scalar<q>")
3278 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3279 )
3280
3281 (define_insn "neon_vmlsl<sup>_lane<mode>"
3282 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3283 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3284 (match_operand:VMDI 2 "s_register_operand" "w")
3285 (match_operand:VMDI 3 "s_register_operand"
3286 "<scalar_mul_constraint>")
3287 (match_operand:SI 4 "immediate_operand" "i")]
3288 VMLSL_LANE))]
3289 "TARGET_NEON"
3290 {
3291 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3292 }
3293 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3294 )
3295
3296 (define_insn "neon_vqdmlsl_lane<mode>"
3297 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3298 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3299 (match_operand:VMDI 2 "s_register_operand" "w")
3300 (match_operand:VMDI 3 "s_register_operand"
3301 "<scalar_mul_constraint>")
3302 (match_operand:SI 4 "immediate_operand" "i")]
3303 UNSPEC_VQDMLSL_LANE))]
3304 "TARGET_NEON"
3305 {
3306 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3307 }
3308 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3309 )
3310
3311 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3312 ; core register into a temp register, then use a scalar taken from that. This
3313 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3314 ; or extracted from another vector. The latter case it's currently better to
3315 ; use the "_lane" variant, and the former case can probably be implemented
3316 ; using vld1_lane, but that hasn't been done yet.
3317
3318 (define_expand "neon_vmul_n<mode>"
3319 [(match_operand:VMD 0 "s_register_operand" "")
3320 (match_operand:VMD 1 "s_register_operand" "")
3321 (match_operand:<V_elem> 2 "s_register_operand" "")]
3322 "TARGET_NEON"
3323 {
3324 rtx tmp = gen_reg_rtx (<MODE>mode);
3325 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3326 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3327 const0_rtx));
3328 DONE;
3329 })
3330
3331 (define_expand "neon_vmul_n<mode>"
3332 [(match_operand:VMQ 0 "s_register_operand" "")
3333 (match_operand:VMQ 1 "s_register_operand" "")
3334 (match_operand:<V_elem> 2 "s_register_operand" "")]
3335 "TARGET_NEON"
3336 {
3337 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3338 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3339 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3340 const0_rtx));
3341 DONE;
3342 })
3343
3344 (define_expand "neon_vmulls_n<mode>"
3345 [(match_operand:<V_widen> 0 "s_register_operand" "")
3346 (match_operand:VMDI 1 "s_register_operand" "")
3347 (match_operand:<V_elem> 2 "s_register_operand" "")]
3348 "TARGET_NEON"
3349 {
3350 rtx tmp = gen_reg_rtx (<MODE>mode);
3351 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3352 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3353 const0_rtx));
3354 DONE;
3355 })
3356
3357 (define_expand "neon_vmullu_n<mode>"
3358 [(match_operand:<V_widen> 0 "s_register_operand" "")
3359 (match_operand:VMDI 1 "s_register_operand" "")
3360 (match_operand:<V_elem> 2 "s_register_operand" "")]
3361 "TARGET_NEON"
3362 {
3363 rtx tmp = gen_reg_rtx (<MODE>mode);
3364 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3365 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3366 const0_rtx));
3367 DONE;
3368 })
3369
3370 (define_expand "neon_vqdmull_n<mode>"
3371 [(match_operand:<V_widen> 0 "s_register_operand" "")
3372 (match_operand:VMDI 1 "s_register_operand" "")
3373 (match_operand:<V_elem> 2 "s_register_operand" "")]
3374 "TARGET_NEON"
3375 {
3376 rtx tmp = gen_reg_rtx (<MODE>mode);
3377 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3378 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3379 const0_rtx));
3380 DONE;
3381 })
3382
3383 (define_expand "neon_vqdmulh_n<mode>"
3384 [(match_operand:VMDI 0 "s_register_operand" "")
3385 (match_operand:VMDI 1 "s_register_operand" "")
3386 (match_operand:<V_elem> 2 "s_register_operand" "")]
3387 "TARGET_NEON"
3388 {
3389 rtx tmp = gen_reg_rtx (<MODE>mode);
3390 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3391 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3392 const0_rtx));
3393 DONE;
3394 })
3395
3396 (define_expand "neon_vqrdmulh_n<mode>"
3397 [(match_operand:VMDI 0 "s_register_operand" "")
3398 (match_operand:VMDI 1 "s_register_operand" "")
3399 (match_operand:<V_elem> 2 "s_register_operand" "")]
3400 "TARGET_NEON"
3401 {
3402 rtx tmp = gen_reg_rtx (<MODE>mode);
3403 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3404 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3405 const0_rtx));
3406 DONE;
3407 })
3408
3409 (define_expand "neon_vqdmulh_n<mode>"
3410 [(match_operand:VMQI 0 "s_register_operand" "")
3411 (match_operand:VMQI 1 "s_register_operand" "")
3412 (match_operand:<V_elem> 2 "s_register_operand" "")]
3413 "TARGET_NEON"
3414 {
3415 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3416 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3417 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3418 const0_rtx));
3419 DONE;
3420 })
3421
3422 (define_expand "neon_vqrdmulh_n<mode>"
3423 [(match_operand:VMQI 0 "s_register_operand" "")
3424 (match_operand:VMQI 1 "s_register_operand" "")
3425 (match_operand:<V_elem> 2 "s_register_operand" "")]
3426 "TARGET_NEON"
3427 {
3428 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3429 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3430 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3431 const0_rtx));
3432 DONE;
3433 })
3434
3435 (define_expand "neon_vmla_n<mode>"
3436 [(match_operand:VMD 0 "s_register_operand" "")
3437 (match_operand:VMD 1 "s_register_operand" "")
3438 (match_operand:VMD 2 "s_register_operand" "")
3439 (match_operand:<V_elem> 3 "s_register_operand" "")]
3440 "TARGET_NEON"
3441 {
3442 rtx tmp = gen_reg_rtx (<MODE>mode);
3443 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3444 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3445 tmp, const0_rtx));
3446 DONE;
3447 })
3448
3449 (define_expand "neon_vmla_n<mode>"
3450 [(match_operand:VMQ 0 "s_register_operand" "")
3451 (match_operand:VMQ 1 "s_register_operand" "")
3452 (match_operand:VMQ 2 "s_register_operand" "")
3453 (match_operand:<V_elem> 3 "s_register_operand" "")]
3454 "TARGET_NEON"
3455 {
3456 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3457 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3458 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3459 tmp, const0_rtx));
3460 DONE;
3461 })
3462
3463 (define_expand "neon_vmlals_n<mode>"
3464 [(match_operand:<V_widen> 0 "s_register_operand" "")
3465 (match_operand:<V_widen> 1 "s_register_operand" "")
3466 (match_operand:VMDI 2 "s_register_operand" "")
3467 (match_operand:<V_elem> 3 "s_register_operand" "")]
3468 "TARGET_NEON"
3469 {
3470 rtx tmp = gen_reg_rtx (<MODE>mode);
3471 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3472 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3473 tmp, const0_rtx));
3474 DONE;
3475 })
3476
3477 (define_expand "neon_vmlalu_n<mode>"
3478 [(match_operand:<V_widen> 0 "s_register_operand" "")
3479 (match_operand:<V_widen> 1 "s_register_operand" "")
3480 (match_operand:VMDI 2 "s_register_operand" "")
3481 (match_operand:<V_elem> 3 "s_register_operand" "")]
3482 "TARGET_NEON"
3483 {
3484 rtx tmp = gen_reg_rtx (<MODE>mode);
3485 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3486 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3487 tmp, const0_rtx));
3488 DONE;
3489 })
3490
3491 (define_expand "neon_vqdmlal_n<mode>"
3492 [(match_operand:<V_widen> 0 "s_register_operand" "")
3493 (match_operand:<V_widen> 1 "s_register_operand" "")
3494 (match_operand:VMDI 2 "s_register_operand" "")
3495 (match_operand:<V_elem> 3 "s_register_operand" "")]
3496 "TARGET_NEON"
3497 {
3498 rtx tmp = gen_reg_rtx (<MODE>mode);
3499 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3500 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3501 tmp, const0_rtx));
3502 DONE;
3503 })
3504
3505 (define_expand "neon_vmls_n<mode>"
3506 [(match_operand:VMD 0 "s_register_operand" "")
3507 (match_operand:VMD 1 "s_register_operand" "")
3508 (match_operand:VMD 2 "s_register_operand" "")
3509 (match_operand:<V_elem> 3 "s_register_operand" "")]
3510 "TARGET_NEON"
3511 {
3512 rtx tmp = gen_reg_rtx (<MODE>mode);
3513 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3514 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3515 tmp, const0_rtx));
3516 DONE;
3517 })
3518
3519 (define_expand "neon_vmls_n<mode>"
3520 [(match_operand:VMQ 0 "s_register_operand" "")
3521 (match_operand:VMQ 1 "s_register_operand" "")
3522 (match_operand:VMQ 2 "s_register_operand" "")
3523 (match_operand:<V_elem> 3 "s_register_operand" "")]
3524 "TARGET_NEON"
3525 {
3526 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3527 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3528 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3529 tmp, const0_rtx));
3530 DONE;
3531 })
3532
3533 (define_expand "neon_vmlsls_n<mode>"
3534 [(match_operand:<V_widen> 0 "s_register_operand" "")
3535 (match_operand:<V_widen> 1 "s_register_operand" "")
3536 (match_operand:VMDI 2 "s_register_operand" "")
3537 (match_operand:<V_elem> 3 "s_register_operand" "")]
3538 "TARGET_NEON"
3539 {
3540 rtx tmp = gen_reg_rtx (<MODE>mode);
3541 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3542 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3543 tmp, const0_rtx));
3544 DONE;
3545 })
3546
3547 (define_expand "neon_vmlslu_n<mode>"
3548 [(match_operand:<V_widen> 0 "s_register_operand" "")
3549 (match_operand:<V_widen> 1 "s_register_operand" "")
3550 (match_operand:VMDI 2 "s_register_operand" "")
3551 (match_operand:<V_elem> 3 "s_register_operand" "")]
3552 "TARGET_NEON"
3553 {
3554 rtx tmp = gen_reg_rtx (<MODE>mode);
3555 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3556 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3557 tmp, const0_rtx));
3558 DONE;
3559 })
3560
3561 (define_expand "neon_vqdmlsl_n<mode>"
3562 [(match_operand:<V_widen> 0 "s_register_operand" "")
3563 (match_operand:<V_widen> 1 "s_register_operand" "")
3564 (match_operand:VMDI 2 "s_register_operand" "")
3565 (match_operand:<V_elem> 3 "s_register_operand" "")]
3566 "TARGET_NEON"
3567 {
3568 rtx tmp = gen_reg_rtx (<MODE>mode);
3569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3570 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3571 tmp, const0_rtx));
3572 DONE;
3573 })
3574
3575 (define_insn "neon_vext<mode>"
3576 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3577 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3578 (match_operand:VDQX 2 "s_register_operand" "w")
3579 (match_operand:SI 3 "immediate_operand" "i")]
3580 UNSPEC_VEXT))]
3581 "TARGET_NEON"
3582 {
3583 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3584 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3585 }
3586 [(set_attr "type" "neon_ext<q>")]
3587 )
3588
3589 (define_insn "neon_vrev64<mode>"
3590 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3591 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3592 UNSPEC_VREV64))]
3593 "TARGET_NEON"
3594 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3595 [(set_attr "type" "neon_rev<q>")]
3596 )
3597
3598 (define_insn "neon_vrev32<mode>"
3599 [(set (match_operand:VX 0 "s_register_operand" "=w")
3600 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3601 UNSPEC_VREV32))]
3602 "TARGET_NEON"
3603 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3604 [(set_attr "type" "neon_rev<q>")]
3605 )
3606
3607 (define_insn "neon_vrev16<mode>"
3608 [(set (match_operand:VE 0 "s_register_operand" "=w")
3609 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3610 UNSPEC_VREV16))]
3611 "TARGET_NEON"
3612 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3613 [(set_attr "type" "neon_rev<q>")]
3614 )
3615
3616 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3617 ; allocation. For an intrinsic of form:
3618 ; rD = vbsl_* (rS, rN, rM)
3619 ; We can use any of:
3620 ; vbsl rS, rN, rM (if D = S)
3621 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3622 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3623
3624 (define_insn "neon_vbsl<mode>_internal"
3625 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3626 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3627 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3628 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3629 UNSPEC_VBSL))]
3630 "TARGET_NEON"
3631 "@
3632 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3633 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3634 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3635 [(set_attr "type" "neon_bsl<q>")]
3636 )
3637
3638 (define_expand "neon_vbsl<mode>"
3639 [(set (match_operand:VDQX 0 "s_register_operand" "")
3640 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3641 (match_operand:VDQX 2 "s_register_operand" "")
3642 (match_operand:VDQX 3 "s_register_operand" "")]
3643 UNSPEC_VBSL))]
3644 "TARGET_NEON"
3645 {
3646 /* We can't alias operands together if they have different modes. */
3647 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3648 })
3649
3650 ;; vshl, vrshl
3651 (define_insn "neon_v<shift_op><sup><mode>"
3652 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3653 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3654 (match_operand:VDQIX 2 "s_register_operand" "w")]
3655 VSHL))]
3656 "TARGET_NEON"
3657 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3658 [(set_attr "type" "neon_shift_imm<q>")]
3659 )
3660
3661 ;; vqshl, vqrshl
3662 (define_insn "neon_v<shift_op><sup><mode>"
3663 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3664 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3665 (match_operand:VDQIX 2 "s_register_operand" "w")]
3666 VQSHL))]
3667 "TARGET_NEON"
3668 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3669 [(set_attr "type" "neon_sat_shift_imm<q>")]
3670 )
3671
3672 ;; vshr_n, vrshr_n
3673 (define_insn "neon_v<shift_op><sup>_n<mode>"
3674 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3675 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3676 (match_operand:SI 2 "immediate_operand" "i")]
3677 VSHR_N))]
3678 "TARGET_NEON"
3679 {
3680 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3681 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3682 }
3683 [(set_attr "type" "neon_shift_imm<q>")]
3684 )
3685
3686 ;; vshrn_n, vrshrn_n
3687 (define_insn "neon_v<shift_op>_n<mode>"
3688 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3689 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3690 (match_operand:SI 2 "immediate_operand" "i")]
3691 VSHRN_N))]
3692 "TARGET_NEON"
3693 {
3694 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3695 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3696 }
3697 [(set_attr "type" "neon_shift_imm_narrow_q")]
3698 )
3699
3700 ;; vqshrn_n, vqrshrn_n
3701 (define_insn "neon_v<shift_op><sup>_n<mode>"
3702 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3703 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3704 (match_operand:SI 2 "immediate_operand" "i")]
3705 VQSHRN_N))]
3706 "TARGET_NEON"
3707 {
3708 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3709 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3710 }
3711 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3712 )
3713
3714 ;; vqshrun_n, vqrshrun_n
3715 (define_insn "neon_v<shift_op>_n<mode>"
3716 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3717 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3718 (match_operand:SI 2 "immediate_operand" "i")]
3719 VQSHRUN_N))]
3720 "TARGET_NEON"
3721 {
3722 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3723 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3724 }
3725 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3726 )
3727
3728 (define_insn "neon_vshl_n<mode>"
3729 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3730 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3731 (match_operand:SI 2 "immediate_operand" "i")]
3732 UNSPEC_VSHL_N))]
3733 "TARGET_NEON"
3734 {
3735 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3736 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3737 }
3738 [(set_attr "type" "neon_shift_imm<q>")]
3739 )
3740
3741 (define_insn "neon_vqshl_<sup>_n<mode>"
3742 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3743 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3744 (match_operand:SI 2 "immediate_operand" "i")]
3745 VQSHL_N))]
3746 "TARGET_NEON"
3747 {
3748 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3749 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3750 }
3751 [(set_attr "type" "neon_sat_shift_imm<q>")]
3752 )
3753
3754 (define_insn "neon_vqshlu_n<mode>"
3755 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3756 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3757 (match_operand:SI 2 "immediate_operand" "i")]
3758 UNSPEC_VQSHLU_N))]
3759 "TARGET_NEON"
3760 {
3761 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3762 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3763 }
3764 [(set_attr "type" "neon_sat_shift_imm<q>")]
3765 )
3766
3767 (define_insn "neon_vshll<sup>_n<mode>"
3768 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3769 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3770 (match_operand:SI 2 "immediate_operand" "i")]
3771 VSHLL_N))]
3772 "TARGET_NEON"
3773 {
3774 /* The boundaries are: 0 < imm <= size. */
3775 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3776 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3777 }
3778 [(set_attr "type" "neon_shift_imm_long")]
3779 )
3780
3781 ;; vsra_n, vrsra_n
3782 (define_insn "neon_v<shift_op><sup>_n<mode>"
3783 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3784 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3785 (match_operand:VDQIX 2 "s_register_operand" "w")
3786 (match_operand:SI 3 "immediate_operand" "i")]
3787 VSRA_N))]
3788 "TARGET_NEON"
3789 {
3790 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3791 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3792 }
3793 [(set_attr "type" "neon_shift_acc<q>")]
3794 )
3795
3796 (define_insn "neon_vsri_n<mode>"
3797 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3798 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3799 (match_operand:VDQIX 2 "s_register_operand" "w")
3800 (match_operand:SI 3 "immediate_operand" "i")]
3801 UNSPEC_VSRI))]
3802 "TARGET_NEON"
3803 {
3804 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3805 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3806 }
3807 [(set_attr "type" "neon_shift_reg<q>")]
3808 )
3809
3810 (define_insn "neon_vsli_n<mode>"
3811 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3812 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3813 (match_operand:VDQIX 2 "s_register_operand" "w")
3814 (match_operand:SI 3 "immediate_operand" "i")]
3815 UNSPEC_VSLI))]
3816 "TARGET_NEON"
3817 {
3818 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3819 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3820 }
3821 [(set_attr "type" "neon_shift_reg<q>")]
3822 )
3823
3824 (define_insn "neon_vtbl1v8qi"
3825 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3826 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3827 (match_operand:V8QI 2 "s_register_operand" "w")]
3828 UNSPEC_VTBL))]
3829 "TARGET_NEON"
3830 "vtbl.8\t%P0, {%P1}, %P2"
3831 [(set_attr "type" "neon_tbl1")]
3832 )
3833
3834 (define_insn "neon_vtbl2v8qi"
3835 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3836 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3837 (match_operand:V8QI 2 "s_register_operand" "w")]
3838 UNSPEC_VTBL))]
3839 "TARGET_NEON"
3840 {
3841 rtx ops[4];
3842 int tabbase = REGNO (operands[1]);
3843
3844 ops[0] = operands[0];
3845 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3846 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3847 ops[3] = operands[2];
3848 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3849
3850 return "";
3851 }
3852 [(set_attr "type" "neon_tbl2")]
3853 )
3854
3855 (define_insn "neon_vtbl3v8qi"
3856 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3857 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3858 (match_operand:V8QI 2 "s_register_operand" "w")]
3859 UNSPEC_VTBL))]
3860 "TARGET_NEON"
3861 {
3862 rtx ops[5];
3863 int tabbase = REGNO (operands[1]);
3864
3865 ops[0] = operands[0];
3866 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3867 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3868 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3869 ops[4] = operands[2];
3870 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3871
3872 return "";
3873 }
3874 [(set_attr "type" "neon_tbl3")]
3875 )
3876
3877 (define_insn "neon_vtbl4v8qi"
3878 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3879 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3880 (match_operand:V8QI 2 "s_register_operand" "w")]
3881 UNSPEC_VTBL))]
3882 "TARGET_NEON"
3883 {
3884 rtx ops[6];
3885 int tabbase = REGNO (operands[1]);
3886
3887 ops[0] = operands[0];
3888 ops[1] = gen_rtx_REG (V8QImode, tabbase);
3889 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3890 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3891 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3892 ops[5] = operands[2];
3893 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
3894
3895 return "";
3896 }
3897 [(set_attr "type" "neon_tbl4")]
3898 )
3899
3900 ;; These three are used by the vec_perm infrastructure for V16QImode.
3901 (define_insn_and_split "neon_vtbl1v16qi"
3902 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3903 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
3904 (match_operand:V16QI 2 "s_register_operand" "w")]
3905 UNSPEC_VTBL))]
3906 "TARGET_NEON"
3907 "#"
3908 "&& reload_completed"
3909 [(const_int 0)]
3910 {
3911 rtx op0, op1, op2, part0, part2;
3912 unsigned ofs;
3913
3914 op0 = operands[0];
3915 op1 = gen_lowpart (TImode, operands[1]);
3916 op2 = operands[2];
3917
3918 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3919 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3920 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3921 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3922
3923 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3924 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3925 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3926 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3927 DONE;
3928 }
3929 [(set_attr "type" "multiple")]
3930 )
3931
3932 (define_insn_and_split "neon_vtbl2v16qi"
3933 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
3934 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
3935 (match_operand:V16QI 2 "s_register_operand" "w")]
3936 UNSPEC_VTBL))]
3937 "TARGET_NEON"
3938 "#"
3939 "&& reload_completed"
3940 [(const_int 0)]
3941 {
3942 rtx op0, op1, op2, part0, part2;
3943 unsigned ofs;
3944
3945 op0 = operands[0];
3946 op1 = operands[1];
3947 op2 = operands[2];
3948
3949 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
3950 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3951 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3952 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3953
3954 ofs = subreg_highpart_offset (V8QImode, V16QImode);
3955 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
3956 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
3957 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
3958 DONE;
3959 }
3960 [(set_attr "type" "multiple")]
3961 )
3962
3963 ;; ??? Logically we should extend the regular neon_vcombine pattern to
3964 ;; handle quad-word input modes, producing octa-word output modes. But
3965 ;; that requires us to add support for octa-word vector modes in moves.
3966 ;; That seems overkill for this one use in vec_perm.
3967 (define_insn_and_split "neon_vcombinev16qi"
3968 [(set (match_operand:OI 0 "s_register_operand" "=w")
3969 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
3970 (match_operand:V16QI 2 "s_register_operand" "w")]
3971 UNSPEC_VCONCAT))]
3972 "TARGET_NEON"
3973 "#"
3974 "&& reload_completed"
3975 [(const_int 0)]
3976 {
3977 neon_split_vcombine (operands);
3978 DONE;
3979 }
3980 [(set_attr "type" "multiple")]
3981 )
3982
3983 (define_insn "neon_vtbx1v8qi"
3984 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3985 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
3986 (match_operand:V8QI 2 "s_register_operand" "w")
3987 (match_operand:V8QI 3 "s_register_operand" "w")]
3988 UNSPEC_VTBX))]
3989 "TARGET_NEON"
3990 "vtbx.8\t%P0, {%P2}, %P3"
3991 [(set_attr "type" "neon_tbl1")]
3992 )
3993
3994 (define_insn "neon_vtbx2v8qi"
3995 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3996 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
3997 (match_operand:TI 2 "s_register_operand" "w")
3998 (match_operand:V8QI 3 "s_register_operand" "w")]
3999 UNSPEC_VTBX))]
4000 "TARGET_NEON"
4001 {
4002 rtx ops[4];
4003 int tabbase = REGNO (operands[2]);
4004
4005 ops[0] = operands[0];
4006 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4007 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4008 ops[3] = operands[3];
4009 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4010
4011 return "";
4012 }
4013 [(set_attr "type" "neon_tbl2")]
4014 )
4015
4016 (define_insn "neon_vtbx3v8qi"
4017 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4018 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4019 (match_operand:EI 2 "s_register_operand" "w")
4020 (match_operand:V8QI 3 "s_register_operand" "w")]
4021 UNSPEC_VTBX))]
4022 "TARGET_NEON"
4023 {
4024 rtx ops[5];
4025 int tabbase = REGNO (operands[2]);
4026
4027 ops[0] = operands[0];
4028 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4029 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4030 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4031 ops[4] = operands[3];
4032 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4033
4034 return "";
4035 }
4036 [(set_attr "type" "neon_tbl3")]
4037 )
4038
4039 (define_insn "neon_vtbx4v8qi"
4040 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4041 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4042 (match_operand:OI 2 "s_register_operand" "w")
4043 (match_operand:V8QI 3 "s_register_operand" "w")]
4044 UNSPEC_VTBX))]
4045 "TARGET_NEON"
4046 {
4047 rtx ops[6];
4048 int tabbase = REGNO (operands[2]);
4049
4050 ops[0] = operands[0];
4051 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4052 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4053 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4054 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4055 ops[5] = operands[3];
4056 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4057
4058 return "";
4059 }
4060 [(set_attr "type" "neon_tbl4")]
4061 )
4062
4063 (define_expand "neon_vtrn<mode>_internal"
4064 [(parallel
4065 [(set (match_operand:VDQW 0 "s_register_operand" "")
4066 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4067 (match_operand:VDQW 2 "s_register_operand" "")]
4068 UNSPEC_VTRN1))
4069 (set (match_operand:VDQW 3 "s_register_operand" "")
4070 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4071 "TARGET_NEON"
4072 ""
4073 )
4074
4075 ;; Note: Different operand numbering to handle tied registers correctly.
4076 (define_insn "*neon_vtrn<mode>_insn"
4077 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4078 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4079 (match_operand:VDQW 3 "s_register_operand" "2")]
4080 UNSPEC_VTRN1))
4081 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4082 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4083 UNSPEC_VTRN2))]
4084 "TARGET_NEON"
4085 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4086 [(set_attr "type" "neon_permute<q>")]
4087 )
4088
4089 (define_expand "neon_vzip<mode>_internal"
4090 [(parallel
4091 [(set (match_operand:VDQW 0 "s_register_operand" "")
4092 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4093 (match_operand:VDQW 2 "s_register_operand" "")]
4094 UNSPEC_VZIP1))
4095 (set (match_operand:VDQW 3 "s_register_operand" "")
4096 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4097 "TARGET_NEON"
4098 ""
4099 )
4100
4101 ;; Note: Different operand numbering to handle tied registers correctly.
4102 (define_insn "*neon_vzip<mode>_insn"
4103 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4104 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4105 (match_operand:VDQW 3 "s_register_operand" "2")]
4106 UNSPEC_VZIP1))
4107 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4108 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4109 UNSPEC_VZIP2))]
4110 "TARGET_NEON"
4111 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4112 [(set_attr "type" "neon_zip<q>")]
4113 )
4114
4115 (define_expand "neon_vuzp<mode>_internal"
4116 [(parallel
4117 [(set (match_operand:VDQW 0 "s_register_operand" "")
4118 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4119 (match_operand:VDQW 2 "s_register_operand" "")]
4120 UNSPEC_VUZP1))
4121 (set (match_operand:VDQW 3 "s_register_operand" "")
4122 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4123 "TARGET_NEON"
4124 ""
4125 )
4126
4127 ;; Note: Different operand numbering to handle tied registers correctly.
4128 (define_insn "*neon_vuzp<mode>_insn"
4129 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4131 (match_operand:VDQW 3 "s_register_operand" "2")]
4132 UNSPEC_VUZP1))
4133 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4134 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4135 UNSPEC_VUZP2))]
4136 "TARGET_NEON"
4137 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4138 [(set_attr "type" "neon_zip<q>")]
4139 )
4140
4141 (define_expand "neon_vreinterpretv8qi<mode>"
4142 [(match_operand:V8QI 0 "s_register_operand" "")
4143 (match_operand:VD_RE 1 "s_register_operand" "")]
4144 "TARGET_NEON"
4145 {
4146 neon_reinterpret (operands[0], operands[1]);
4147 DONE;
4148 })
4149
4150 (define_expand "neon_vreinterpretv4hi<mode>"
4151 [(match_operand:V4HI 0 "s_register_operand" "")
4152 (match_operand:VD_RE 1 "s_register_operand" "")]
4153 "TARGET_NEON"
4154 {
4155 neon_reinterpret (operands[0], operands[1]);
4156 DONE;
4157 })
4158
4159 (define_expand "neon_vreinterpretv2si<mode>"
4160 [(match_operand:V2SI 0 "s_register_operand" "")
4161 (match_operand:VD_RE 1 "s_register_operand" "")]
4162 "TARGET_NEON"
4163 {
4164 neon_reinterpret (operands[0], operands[1]);
4165 DONE;
4166 })
4167
4168 (define_expand "neon_vreinterpretv2sf<mode>"
4169 [(match_operand:V2SF 0 "s_register_operand" "")
4170 (match_operand:VD_RE 1 "s_register_operand" "")]
4171 "TARGET_NEON"
4172 {
4173 neon_reinterpret (operands[0], operands[1]);
4174 DONE;
4175 })
4176
4177 (define_expand "neon_vreinterpretdi<mode>"
4178 [(match_operand:DI 0 "s_register_operand" "")
4179 (match_operand:VD_RE 1 "s_register_operand" "")]
4180 "TARGET_NEON"
4181 {
4182 neon_reinterpret (operands[0], operands[1]);
4183 DONE;
4184 })
4185
4186 (define_expand "neon_vreinterpretti<mode>"
4187 [(match_operand:TI 0 "s_register_operand" "")
4188 (match_operand:VQXMOV 1 "s_register_operand" "")]
4189 "TARGET_NEON"
4190 {
4191 neon_reinterpret (operands[0], operands[1]);
4192 DONE;
4193 })
4194
4195
4196 (define_expand "neon_vreinterpretv16qi<mode>"
4197 [(match_operand:V16QI 0 "s_register_operand" "")
4198 (match_operand:VQXMOV 1 "s_register_operand" "")]
4199 "TARGET_NEON"
4200 {
4201 neon_reinterpret (operands[0], operands[1]);
4202 DONE;
4203 })
4204
4205 (define_expand "neon_vreinterpretv8hi<mode>"
4206 [(match_operand:V8HI 0 "s_register_operand" "")
4207 (match_operand:VQXMOV 1 "s_register_operand" "")]
4208 "TARGET_NEON"
4209 {
4210 neon_reinterpret (operands[0], operands[1]);
4211 DONE;
4212 })
4213
4214 (define_expand "neon_vreinterpretv4si<mode>"
4215 [(match_operand:V4SI 0 "s_register_operand" "")
4216 (match_operand:VQXMOV 1 "s_register_operand" "")]
4217 "TARGET_NEON"
4218 {
4219 neon_reinterpret (operands[0], operands[1]);
4220 DONE;
4221 })
4222
4223 (define_expand "neon_vreinterpretv4sf<mode>"
4224 [(match_operand:V4SF 0 "s_register_operand" "")
4225 (match_operand:VQXMOV 1 "s_register_operand" "")]
4226 "TARGET_NEON"
4227 {
4228 neon_reinterpret (operands[0], operands[1]);
4229 DONE;
4230 })
4231
4232 (define_expand "neon_vreinterpretv2di<mode>"
4233 [(match_operand:V2DI 0 "s_register_operand" "")
4234 (match_operand:VQXMOV 1 "s_register_operand" "")]
4235 "TARGET_NEON"
4236 {
4237 neon_reinterpret (operands[0], operands[1]);
4238 DONE;
4239 })
4240
4241 (define_expand "vec_load_lanes<mode><mode>"
4242 [(set (match_operand:VDQX 0 "s_register_operand")
4243 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4244 UNSPEC_VLD1))]
4245 "TARGET_NEON")
4246
4247 (define_insn "neon_vld1<mode>"
4248 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4249 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4250 UNSPEC_VLD1))]
4251 "TARGET_NEON"
4252 "vld1.<V_sz_elem>\t%h0, %A1"
4253 [(set_attr "type" "neon_load1_1reg<q>")]
4254 )
4255
4256 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4257 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4258 ;; lane order here.
4259 (define_insn "neon_vld1_lane<mode>"
4260 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4261 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4262 (match_operand:VDX 2 "s_register_operand" "0")
4263 (match_operand:SI 3 "immediate_operand" "i")]
4264 UNSPEC_VLD1_LANE))]
4265 "TARGET_NEON"
4266 {
4267 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4268 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4269 operands[3] = GEN_INT (lane);
4270 if (max == 1)
4271 return "vld1.<V_sz_elem>\t%P0, %A1";
4272 else
4273 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4274 }
4275 [(set_attr "type" "neon_load1_one_lane<q>")]
4276 )
4277
4278 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4279 ;; here on big endian targets.
4280 (define_insn "neon_vld1_lane<mode>"
4281 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4282 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4283 (match_operand:VQX 2 "s_register_operand" "0")
4284 (match_operand:SI 3 "immediate_operand" "i")]
4285 UNSPEC_VLD1_LANE))]
4286 "TARGET_NEON"
4287 {
4288 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4289 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4290 operands[3] = GEN_INT (lane);
4291 int regno = REGNO (operands[0]);
4292 if (lane >= max / 2)
4293 {
4294 lane -= max / 2;
4295 regno += 2;
4296 operands[3] = GEN_INT (lane);
4297 }
4298 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4299 if (max == 2)
4300 return "vld1.<V_sz_elem>\t%P0, %A1";
4301 else
4302 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4303 }
4304 [(set_attr "type" "neon_load1_one_lane<q>")]
4305 )
4306
4307 (define_insn "neon_vld1_dup<mode>"
4308 [(set (match_operand:VD 0 "s_register_operand" "=w")
4309 (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4310 "TARGET_NEON"
4311 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4312 [(set_attr "type" "neon_load1_all_lanes<q>")]
4313 )
4314
4315 ;; Special case for DImode. Treat it exactly like a simple load.
4316 (define_expand "neon_vld1_dupdi"
4317 [(set (match_operand:DI 0 "s_register_operand" "")
4318 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4319 UNSPEC_VLD1))]
4320 "TARGET_NEON"
4321 ""
4322 )
4323
4324 (define_insn "neon_vld1_dup<mode>"
4325 [(set (match_operand:VQ 0 "s_register_operand" "=w")
4326 (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4327 "TARGET_NEON"
4328 {
4329 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4330 }
4331 [(set_attr "type" "neon_load1_all_lanes<q>")]
4332 )
4333
4334 (define_insn_and_split "neon_vld1_dupv2di"
4335 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4336 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4337 "TARGET_NEON"
4338 "#"
4339 "&& reload_completed"
4340 [(const_int 0)]
4341 {
4342 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4343 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4344 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4345 DONE;
4346 }
4347 [(set_attr "length" "8")
4348 (set_attr "type" "neon_load1_all_lanes_q")]
4349 )
4350
4351 (define_expand "vec_store_lanes<mode><mode>"
4352 [(set (match_operand:VDQX 0 "neon_struct_operand")
4353 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4354 UNSPEC_VST1))]
4355 "TARGET_NEON")
4356
4357 (define_insn "neon_vst1<mode>"
4358 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4359 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4360 UNSPEC_VST1))]
4361 "TARGET_NEON"
4362 "vst1.<V_sz_elem>\t%h1, %A0"
4363 [(set_attr "type" "neon_store1_1reg<q>")])
4364
4365 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4366 ;; here on big endian targets.
4367 (define_insn "neon_vst1_lane<mode>"
4368 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4369 (unspec:<V_elem>
4370 [(match_operand:VDX 1 "s_register_operand" "w")
4371 (match_operand:SI 2 "immediate_operand" "i")]
4372 UNSPEC_VST1_LANE))]
4373 "TARGET_NEON"
4374 {
4375 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4376 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4377 operands[2] = GEN_INT (lane);
4378 if (max == 1)
4379 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4380 else
4381 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4382 }
4383 [(set_attr "type" "neon_store1_one_lane<q>")]
4384 )
4385
4386 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4387 ;; here on big endian targets.
4388 (define_insn "neon_vst1_lane<mode>"
4389 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4390 (unspec:<V_elem>
4391 [(match_operand:VQX 1 "s_register_operand" "w")
4392 (match_operand:SI 2 "immediate_operand" "i")]
4393 UNSPEC_VST1_LANE))]
4394 "TARGET_NEON"
4395 {
4396 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4397 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4398 int regno = REGNO (operands[1]);
4399 if (lane >= max / 2)
4400 {
4401 lane -= max / 2;
4402 regno += 2;
4403 }
4404 operands[2] = GEN_INT (lane);
4405 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4406 if (max == 2)
4407 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4408 else
4409 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4410 }
4411 [(set_attr "type" "neon_store1_one_lane<q>")]
4412 )
4413
4414 (define_expand "vec_load_lanesti<mode>"
4415 [(set (match_operand:TI 0 "s_register_operand")
4416 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4417 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4418 UNSPEC_VLD2))]
4419 "TARGET_NEON")
4420
4421 (define_insn "neon_vld2<mode>"
4422 [(set (match_operand:TI 0 "s_register_operand" "=w")
4423 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4424 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4425 UNSPEC_VLD2))]
4426 "TARGET_NEON"
4427 {
4428 if (<V_sz_elem> == 64)
4429 return "vld1.64\t%h0, %A1";
4430 else
4431 return "vld2.<V_sz_elem>\t%h0, %A1";
4432 }
4433 [(set (attr "type")
4434 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4435 (const_string "neon_load1_2reg<q>")
4436 (const_string "neon_load2_2reg<q>")))]
4437 )
4438
4439 (define_expand "vec_load_lanesoi<mode>"
4440 [(set (match_operand:OI 0 "s_register_operand")
4441 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4442 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4443 UNSPEC_VLD2))]
4444 "TARGET_NEON")
4445
4446 (define_insn "neon_vld2<mode>"
4447 [(set (match_operand:OI 0 "s_register_operand" "=w")
4448 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4449 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4450 UNSPEC_VLD2))]
4451 "TARGET_NEON"
4452 "vld2.<V_sz_elem>\t%h0, %A1"
4453 [(set_attr "type" "neon_load2_2reg_q")])
4454
4455 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4456 ;; here on big endian targets.
4457 (define_insn "neon_vld2_lane<mode>"
4458 [(set (match_operand:TI 0 "s_register_operand" "=w")
4459 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4460 (match_operand:TI 2 "s_register_operand" "0")
4461 (match_operand:SI 3 "immediate_operand" "i")
4462 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4463 UNSPEC_VLD2_LANE))]
4464 "TARGET_NEON"
4465 {
4466 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4467 int regno = REGNO (operands[0]);
4468 rtx ops[4];
4469 ops[0] = gen_rtx_REG (DImode, regno);
4470 ops[1] = gen_rtx_REG (DImode, regno + 2);
4471 ops[2] = operands[1];
4472 ops[3] = GEN_INT (lane);
4473 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4474 return "";
4475 }
4476 [(set_attr "type" "neon_load2_one_lane<q>")]
4477 )
4478
4479 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4480 ;; here on big endian targets.
4481 (define_insn "neon_vld2_lane<mode>"
4482 [(set (match_operand:OI 0 "s_register_operand" "=w")
4483 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4484 (match_operand:OI 2 "s_register_operand" "0")
4485 (match_operand:SI 3 "immediate_operand" "i")
4486 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4487 UNSPEC_VLD2_LANE))]
4488 "TARGET_NEON"
4489 {
4490 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4491 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4492 int regno = REGNO (operands[0]);
4493 rtx ops[4];
4494 if (lane >= max / 2)
4495 {
4496 lane -= max / 2;
4497 regno += 2;
4498 }
4499 ops[0] = gen_rtx_REG (DImode, regno);
4500 ops[1] = gen_rtx_REG (DImode, regno + 4);
4501 ops[2] = operands[1];
4502 ops[3] = GEN_INT (lane);
4503 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4504 return "";
4505 }
4506 [(set_attr "type" "neon_load2_one_lane<q>")]
4507 )
4508
4509 (define_insn "neon_vld2_dup<mode>"
4510 [(set (match_operand:TI 0 "s_register_operand" "=w")
4511 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4512 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4513 UNSPEC_VLD2_DUP))]
4514 "TARGET_NEON"
4515 {
4516 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4517 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4518 else
4519 return "vld1.<V_sz_elem>\t%h0, %A1";
4520 }
4521 [(set (attr "type")
4522 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4523 (const_string "neon_load2_all_lanes<q>")
4524 (const_string "neon_load1_1reg<q>")))]
4525 )
4526
4527 (define_expand "vec_store_lanesti<mode>"
4528 [(set (match_operand:TI 0 "neon_struct_operand")
4529 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4530 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4531 UNSPEC_VST2))]
4532 "TARGET_NEON")
4533
4534 (define_insn "neon_vst2<mode>"
4535 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4536 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4537 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4538 UNSPEC_VST2))]
4539 "TARGET_NEON"
4540 {
4541 if (<V_sz_elem> == 64)
4542 return "vst1.64\t%h1, %A0";
4543 else
4544 return "vst2.<V_sz_elem>\t%h1, %A0";
4545 }
4546 [(set (attr "type")
4547 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4548 (const_string "neon_store1_2reg<q>")
4549 (const_string "neon_store2_one_lane<q>")))]
4550 )
4551
4552 (define_expand "vec_store_lanesoi<mode>"
4553 [(set (match_operand:OI 0 "neon_struct_operand")
4554 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4555 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4556 UNSPEC_VST2))]
4557 "TARGET_NEON")
4558
4559 (define_insn "neon_vst2<mode>"
4560 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4561 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4562 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4563 UNSPEC_VST2))]
4564 "TARGET_NEON"
4565 "vst2.<V_sz_elem>\t%h1, %A0"
4566 [(set_attr "type" "neon_store2_4reg<q>")]
4567 )
4568
4569 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4570 ;; here on big endian targets.
4571 (define_insn "neon_vst2_lane<mode>"
4572 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4573 (unspec:<V_two_elem>
4574 [(match_operand:TI 1 "s_register_operand" "w")
4575 (match_operand:SI 2 "immediate_operand" "i")
4576 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4577 UNSPEC_VST2_LANE))]
4578 "TARGET_NEON"
4579 {
4580 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4581 int regno = REGNO (operands[1]);
4582 rtx ops[4];
4583 ops[0] = operands[0];
4584 ops[1] = gen_rtx_REG (DImode, regno);
4585 ops[2] = gen_rtx_REG (DImode, regno + 2);
4586 ops[3] = GEN_INT (lane);
4587 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4588 return "";
4589 }
4590 [(set_attr "type" "neon_store2_one_lane<q>")]
4591 )
4592
4593 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4594 ;; here on big endian targets.
4595 (define_insn "neon_vst2_lane<mode>"
4596 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4597 (unspec:<V_two_elem>
4598 [(match_operand:OI 1 "s_register_operand" "w")
4599 (match_operand:SI 2 "immediate_operand" "i")
4600 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4601 UNSPEC_VST2_LANE))]
4602 "TARGET_NEON"
4603 {
4604 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4605 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4606 int regno = REGNO (operands[1]);
4607 rtx ops[4];
4608 if (lane >= max / 2)
4609 {
4610 lane -= max / 2;
4611 regno += 2;
4612 }
4613 ops[0] = operands[0];
4614 ops[1] = gen_rtx_REG (DImode, regno);
4615 ops[2] = gen_rtx_REG (DImode, regno + 4);
4616 ops[3] = GEN_INT (lane);
4617 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4618 return "";
4619 }
4620 [(set_attr "type" "neon_store2_one_lane<q>")]
4621 )
4622
4623 (define_expand "vec_load_lanesei<mode>"
4624 [(set (match_operand:EI 0 "s_register_operand")
4625 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4626 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4627 UNSPEC_VLD3))]
4628 "TARGET_NEON")
4629
4630 (define_insn "neon_vld3<mode>"
4631 [(set (match_operand:EI 0 "s_register_operand" "=w")
4632 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4633 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4634 UNSPEC_VLD3))]
4635 "TARGET_NEON"
4636 {
4637 if (<V_sz_elem> == 64)
4638 return "vld1.64\t%h0, %A1";
4639 else
4640 return "vld3.<V_sz_elem>\t%h0, %A1";
4641 }
4642 [(set (attr "type")
4643 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4644 (const_string "neon_load1_3reg<q>")
4645 (const_string "neon_load3_3reg<q>")))]
4646 )
4647
4648 (define_expand "vec_load_lanesci<mode>"
4649 [(match_operand:CI 0 "s_register_operand")
4650 (match_operand:CI 1 "neon_struct_operand")
4651 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4652 "TARGET_NEON"
4653 {
4654 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4655 DONE;
4656 })
4657
4658 (define_expand "neon_vld3<mode>"
4659 [(match_operand:CI 0 "s_register_operand")
4660 (match_operand:CI 1 "neon_struct_operand")
4661 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4662 "TARGET_NEON"
4663 {
4664 rtx mem;
4665
4666 mem = adjust_address (operands[1], EImode, 0);
4667 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4668 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4669 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4670 DONE;
4671 })
4672
4673 (define_insn "neon_vld3qa<mode>"
4674 [(set (match_operand:CI 0 "s_register_operand" "=w")
4675 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4676 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4677 UNSPEC_VLD3A))]
4678 "TARGET_NEON"
4679 {
4680 int regno = REGNO (operands[0]);
4681 rtx ops[4];
4682 ops[0] = gen_rtx_REG (DImode, regno);
4683 ops[1] = gen_rtx_REG (DImode, regno + 4);
4684 ops[2] = gen_rtx_REG (DImode, regno + 8);
4685 ops[3] = operands[1];
4686 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4687 return "";
4688 }
4689 [(set_attr "type" "neon_load3_3reg<q>")]
4690 )
4691
4692 (define_insn "neon_vld3qb<mode>"
4693 [(set (match_operand:CI 0 "s_register_operand" "=w")
4694 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4695 (match_operand:CI 2 "s_register_operand" "0")
4696 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4697 UNSPEC_VLD3B))]
4698 "TARGET_NEON"
4699 {
4700 int regno = REGNO (operands[0]);
4701 rtx ops[4];
4702 ops[0] = gen_rtx_REG (DImode, regno + 2);
4703 ops[1] = gen_rtx_REG (DImode, regno + 6);
4704 ops[2] = gen_rtx_REG (DImode, regno + 10);
4705 ops[3] = operands[1];
4706 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4707 return "";
4708 }
4709 [(set_attr "type" "neon_load3_3reg<q>")]
4710 )
4711
4712 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4713 ;; here on big endian targets.
4714 (define_insn "neon_vld3_lane<mode>"
4715 [(set (match_operand:EI 0 "s_register_operand" "=w")
4716 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4717 (match_operand:EI 2 "s_register_operand" "0")
4718 (match_operand:SI 3 "immediate_operand" "i")
4719 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4720 UNSPEC_VLD3_LANE))]
4721 "TARGET_NEON"
4722 {
4723 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4724 int regno = REGNO (operands[0]);
4725 rtx ops[5];
4726 ops[0] = gen_rtx_REG (DImode, regno);
4727 ops[1] = gen_rtx_REG (DImode, regno + 2);
4728 ops[2] = gen_rtx_REG (DImode, regno + 4);
4729 ops[3] = operands[1];
4730 ops[4] = GEN_INT (lane);
4731 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4732 ops);
4733 return "";
4734 }
4735 [(set_attr "type" "neon_load3_one_lane<q>")]
4736 )
4737
4738 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4739 ;; here on big endian targets.
4740 (define_insn "neon_vld3_lane<mode>"
4741 [(set (match_operand:CI 0 "s_register_operand" "=w")
4742 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4743 (match_operand:CI 2 "s_register_operand" "0")
4744 (match_operand:SI 3 "immediate_operand" "i")
4745 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4746 UNSPEC_VLD3_LANE))]
4747 "TARGET_NEON"
4748 {
4749 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4750 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4751 int regno = REGNO (operands[0]);
4752 rtx ops[5];
4753 if (lane >= max / 2)
4754 {
4755 lane -= max / 2;
4756 regno += 2;
4757 }
4758 ops[0] = gen_rtx_REG (DImode, regno);
4759 ops[1] = gen_rtx_REG (DImode, regno + 4);
4760 ops[2] = gen_rtx_REG (DImode, regno + 8);
4761 ops[3] = operands[1];
4762 ops[4] = GEN_INT (lane);
4763 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4764 ops);
4765 return "";
4766 }
4767 [(set_attr "type" "neon_load3_one_lane<q>")]
4768 )
4769
4770 (define_insn "neon_vld3_dup<mode>"
4771 [(set (match_operand:EI 0 "s_register_operand" "=w")
4772 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4773 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4774 UNSPEC_VLD3_DUP))]
4775 "TARGET_NEON"
4776 {
4777 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4778 {
4779 int regno = REGNO (operands[0]);
4780 rtx ops[4];
4781 ops[0] = gen_rtx_REG (DImode, regno);
4782 ops[1] = gen_rtx_REG (DImode, regno + 2);
4783 ops[2] = gen_rtx_REG (DImode, regno + 4);
4784 ops[3] = operands[1];
4785 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4786 return "";
4787 }
4788 else
4789 return "vld1.<V_sz_elem>\t%h0, %A1";
4790 }
4791 [(set (attr "type")
4792 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4793 (const_string "neon_load3_all_lanes<q>")
4794 (const_string "neon_load1_1reg<q>")))])
4795
4796 (define_expand "vec_store_lanesei<mode>"
4797 [(set (match_operand:EI 0 "neon_struct_operand")
4798 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4799 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4800 UNSPEC_VST3))]
4801 "TARGET_NEON")
4802
4803 (define_insn "neon_vst3<mode>"
4804 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4805 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4806 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4807 UNSPEC_VST3))]
4808 "TARGET_NEON"
4809 {
4810 if (<V_sz_elem> == 64)
4811 return "vst1.64\t%h1, %A0";
4812 else
4813 return "vst3.<V_sz_elem>\t%h1, %A0";
4814 }
4815 [(set (attr "type")
4816 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4817 (const_string "neon_store1_3reg<q>")
4818 (const_string "neon_store3_one_lane<q>")))])
4819
4820 (define_expand "vec_store_lanesci<mode>"
4821 [(match_operand:CI 0 "neon_struct_operand")
4822 (match_operand:CI 1 "s_register_operand")
4823 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4824 "TARGET_NEON"
4825 {
4826 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4827 DONE;
4828 })
4829
4830 (define_expand "neon_vst3<mode>"
4831 [(match_operand:CI 0 "neon_struct_operand")
4832 (match_operand:CI 1 "s_register_operand")
4833 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4834 "TARGET_NEON"
4835 {
4836 rtx mem;
4837
4838 mem = adjust_address (operands[0], EImode, 0);
4839 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4840 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4841 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4842 DONE;
4843 })
4844
4845 (define_insn "neon_vst3qa<mode>"
4846 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4847 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4848 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4849 UNSPEC_VST3A))]
4850 "TARGET_NEON"
4851 {
4852 int regno = REGNO (operands[1]);
4853 rtx ops[4];
4854 ops[0] = operands[0];
4855 ops[1] = gen_rtx_REG (DImode, regno);
4856 ops[2] = gen_rtx_REG (DImode, regno + 4);
4857 ops[3] = gen_rtx_REG (DImode, regno + 8);
4858 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4859 return "";
4860 }
4861 [(set_attr "type" "neon_store3_3reg<q>")]
4862 )
4863
4864 (define_insn "neon_vst3qb<mode>"
4865 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4866 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4867 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4868 UNSPEC_VST3B))]
4869 "TARGET_NEON"
4870 {
4871 int regno = REGNO (operands[1]);
4872 rtx ops[4];
4873 ops[0] = operands[0];
4874 ops[1] = gen_rtx_REG (DImode, regno + 2);
4875 ops[2] = gen_rtx_REG (DImode, regno + 6);
4876 ops[3] = gen_rtx_REG (DImode, regno + 10);
4877 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4878 return "";
4879 }
4880 [(set_attr "type" "neon_store3_3reg<q>")]
4881 )
4882
4883 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4884 ;; here on big endian targets.
4885 (define_insn "neon_vst3_lane<mode>"
4886 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4887 (unspec:<V_three_elem>
4888 [(match_operand:EI 1 "s_register_operand" "w")
4889 (match_operand:SI 2 "immediate_operand" "i")
4890 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4891 UNSPEC_VST3_LANE))]
4892 "TARGET_NEON"
4893 {
4894 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4895 int regno = REGNO (operands[1]);
4896 rtx ops[5];
4897 ops[0] = operands[0];
4898 ops[1] = gen_rtx_REG (DImode, regno);
4899 ops[2] = gen_rtx_REG (DImode, regno + 2);
4900 ops[3] = gen_rtx_REG (DImode, regno + 4);
4901 ops[4] = GEN_INT (lane);
4902 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4903 ops);
4904 return "";
4905 }
4906 [(set_attr "type" "neon_store3_one_lane<q>")]
4907 )
4908
4909 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4910 ;; here on big endian targets.
4911 (define_insn "neon_vst3_lane<mode>"
4912 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4913 (unspec:<V_three_elem>
4914 [(match_operand:CI 1 "s_register_operand" "w")
4915 (match_operand:SI 2 "immediate_operand" "i")
4916 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4917 UNSPEC_VST3_LANE))]
4918 "TARGET_NEON"
4919 {
4920 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4921 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4922 int regno = REGNO (operands[1]);
4923 rtx ops[5];
4924 if (lane >= max / 2)
4925 {
4926 lane -= max / 2;
4927 regno += 2;
4928 }
4929 ops[0] = operands[0];
4930 ops[1] = gen_rtx_REG (DImode, regno);
4931 ops[2] = gen_rtx_REG (DImode, regno + 4);
4932 ops[3] = gen_rtx_REG (DImode, regno + 8);
4933 ops[4] = GEN_INT (lane);
4934 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4935 ops);
4936 return "";
4937 }
4938 [(set_attr "type" "neon_store3_one_lane<q>")]
4939 )
4940
4941 (define_expand "vec_load_lanesoi<mode>"
4942 [(set (match_operand:OI 0 "s_register_operand")
4943 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4944 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945 UNSPEC_VLD4))]
4946 "TARGET_NEON")
4947
4948 (define_insn "neon_vld4<mode>"
4949 [(set (match_operand:OI 0 "s_register_operand" "=w")
4950 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4951 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4952 UNSPEC_VLD4))]
4953 "TARGET_NEON"
4954 {
4955 if (<V_sz_elem> == 64)
4956 return "vld1.64\t%h0, %A1";
4957 else
4958 return "vld4.<V_sz_elem>\t%h0, %A1";
4959 }
4960 [(set (attr "type")
4961 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4962 (const_string "neon_load1_4reg<q>")
4963 (const_string "neon_load4_4reg<q>")))]
4964 )
4965
4966 (define_expand "vec_load_lanesxi<mode>"
4967 [(match_operand:XI 0 "s_register_operand")
4968 (match_operand:XI 1 "neon_struct_operand")
4969 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4970 "TARGET_NEON"
4971 {
4972 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
4973 DONE;
4974 })
4975
4976 (define_expand "neon_vld4<mode>"
4977 [(match_operand:XI 0 "s_register_operand")
4978 (match_operand:XI 1 "neon_struct_operand")
4979 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980 "TARGET_NEON"
4981 {
4982 rtx mem;
4983
4984 mem = adjust_address (operands[1], OImode, 0);
4985 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
4986 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
4987 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
4988 DONE;
4989 })
4990
4991 (define_insn "neon_vld4qa<mode>"
4992 [(set (match_operand:XI 0 "s_register_operand" "=w")
4993 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
4994 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4995 UNSPEC_VLD4A))]
4996 "TARGET_NEON"
4997 {
4998 int regno = REGNO (operands[0]);
4999 rtx ops[5];
5000 ops[0] = gen_rtx_REG (DImode, regno);
5001 ops[1] = gen_rtx_REG (DImode, regno + 4);
5002 ops[2] = gen_rtx_REG (DImode, regno + 8);
5003 ops[3] = gen_rtx_REG (DImode, regno + 12);
5004 ops[4] = operands[1];
5005 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5006 return "";
5007 }
5008 [(set_attr "type" "neon_load4_4reg<q>")]
5009 )
5010
5011 (define_insn "neon_vld4qb<mode>"
5012 [(set (match_operand:XI 0 "s_register_operand" "=w")
5013 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5014 (match_operand:XI 2 "s_register_operand" "0")
5015 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5016 UNSPEC_VLD4B))]
5017 "TARGET_NEON"
5018 {
5019 int regno = REGNO (operands[0]);
5020 rtx ops[5];
5021 ops[0] = gen_rtx_REG (DImode, regno + 2);
5022 ops[1] = gen_rtx_REG (DImode, regno + 6);
5023 ops[2] = gen_rtx_REG (DImode, regno + 10);
5024 ops[3] = gen_rtx_REG (DImode, regno + 14);
5025 ops[4] = operands[1];
5026 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5027 return "";
5028 }
5029 [(set_attr "type" "neon_load4_4reg<q>")]
5030 )
5031
5032 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5033 ;; here on big endian targets.
5034 (define_insn "neon_vld4_lane<mode>"
5035 [(set (match_operand:OI 0 "s_register_operand" "=w")
5036 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5037 (match_operand:OI 2 "s_register_operand" "0")
5038 (match_operand:SI 3 "immediate_operand" "i")
5039 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5040 UNSPEC_VLD4_LANE))]
5041 "TARGET_NEON"
5042 {
5043 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5044 int regno = REGNO (operands[0]);
5045 rtx ops[6];
5046 ops[0] = gen_rtx_REG (DImode, regno);
5047 ops[1] = gen_rtx_REG (DImode, regno + 2);
5048 ops[2] = gen_rtx_REG (DImode, regno + 4);
5049 ops[3] = gen_rtx_REG (DImode, regno + 6);
5050 ops[4] = operands[1];
5051 ops[5] = GEN_INT (lane);
5052 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5053 ops);
5054 return "";
5055 }
5056 [(set_attr "type" "neon_load4_one_lane<q>")]
5057 )
5058
5059 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5060 ;; here on big endian targets.
5061 (define_insn "neon_vld4_lane<mode>"
5062 [(set (match_operand:XI 0 "s_register_operand" "=w")
5063 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5064 (match_operand:XI 2 "s_register_operand" "0")
5065 (match_operand:SI 3 "immediate_operand" "i")
5066 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067 UNSPEC_VLD4_LANE))]
5068 "TARGET_NEON"
5069 {
5070 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5071 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5072 int regno = REGNO (operands[0]);
5073 rtx ops[6];
5074 if (lane >= max / 2)
5075 {
5076 lane -= max / 2;
5077 regno += 2;
5078 }
5079 ops[0] = gen_rtx_REG (DImode, regno);
5080 ops[1] = gen_rtx_REG (DImode, regno + 4);
5081 ops[2] = gen_rtx_REG (DImode, regno + 8);
5082 ops[3] = gen_rtx_REG (DImode, regno + 12);
5083 ops[4] = operands[1];
5084 ops[5] = GEN_INT (lane);
5085 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5086 ops);
5087 return "";
5088 }
5089 [(set_attr "type" "neon_load4_one_lane<q>")]
5090 )
5091
5092 (define_insn "neon_vld4_dup<mode>"
5093 [(set (match_operand:OI 0 "s_register_operand" "=w")
5094 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5095 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5096 UNSPEC_VLD4_DUP))]
5097 "TARGET_NEON"
5098 {
5099 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5100 {
5101 int regno = REGNO (operands[0]);
5102 rtx ops[5];
5103 ops[0] = gen_rtx_REG (DImode, regno);
5104 ops[1] = gen_rtx_REG (DImode, regno + 2);
5105 ops[2] = gen_rtx_REG (DImode, regno + 4);
5106 ops[3] = gen_rtx_REG (DImode, regno + 6);
5107 ops[4] = operands[1];
5108 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5109 ops);
5110 return "";
5111 }
5112 else
5113 return "vld1.<V_sz_elem>\t%h0, %A1";
5114 }
5115 [(set (attr "type")
5116 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5117 (const_string "neon_load4_all_lanes<q>")
5118 (const_string "neon_load1_1reg<q>")))]
5119 )
5120
5121 (define_expand "vec_store_lanesoi<mode>"
5122 [(set (match_operand:OI 0 "neon_struct_operand")
5123 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5124 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125 UNSPEC_VST4))]
5126 "TARGET_NEON")
5127
5128 (define_insn "neon_vst4<mode>"
5129 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5130 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5131 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132 UNSPEC_VST4))]
5133 "TARGET_NEON"
5134 {
5135 if (<V_sz_elem> == 64)
5136 return "vst1.64\t%h1, %A0";
5137 else
5138 return "vst4.<V_sz_elem>\t%h1, %A0";
5139 }
5140 [(set (attr "type")
5141 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5142 (const_string "neon_store1_4reg<q>")
5143 (const_string "neon_store4_4reg<q>")))]
5144 )
5145
5146 (define_expand "vec_store_lanesxi<mode>"
5147 [(match_operand:XI 0 "neon_struct_operand")
5148 (match_operand:XI 1 "s_register_operand")
5149 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5150 "TARGET_NEON"
5151 {
5152 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5153 DONE;
5154 })
5155
5156 (define_expand "neon_vst4<mode>"
5157 [(match_operand:XI 0 "neon_struct_operand")
5158 (match_operand:XI 1 "s_register_operand")
5159 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5160 "TARGET_NEON"
5161 {
5162 rtx mem;
5163
5164 mem = adjust_address (operands[0], OImode, 0);
5165 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5166 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5167 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5168 DONE;
5169 })
5170
5171 (define_insn "neon_vst4qa<mode>"
5172 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5173 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5174 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5175 UNSPEC_VST4A))]
5176 "TARGET_NEON"
5177 {
5178 int regno = REGNO (operands[1]);
5179 rtx ops[5];
5180 ops[0] = operands[0];
5181 ops[1] = gen_rtx_REG (DImode, regno);
5182 ops[2] = gen_rtx_REG (DImode, regno + 4);
5183 ops[3] = gen_rtx_REG (DImode, regno + 8);
5184 ops[4] = gen_rtx_REG (DImode, regno + 12);
5185 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5186 return "";
5187 }
5188 [(set_attr "type" "neon_store4_4reg<q>")]
5189 )
5190
5191 (define_insn "neon_vst4qb<mode>"
5192 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5193 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5194 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195 UNSPEC_VST4B))]
5196 "TARGET_NEON"
5197 {
5198 int regno = REGNO (operands[1]);
5199 rtx ops[5];
5200 ops[0] = operands[0];
5201 ops[1] = gen_rtx_REG (DImode, regno + 2);
5202 ops[2] = gen_rtx_REG (DImode, regno + 6);
5203 ops[3] = gen_rtx_REG (DImode, regno + 10);
5204 ops[4] = gen_rtx_REG (DImode, regno + 14);
5205 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5206 return "";
5207 }
5208 [(set_attr "type" "neon_store4_4reg<q>")]
5209 )
5210
5211 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5212 ;; here on big endian targets.
5213 (define_insn "neon_vst4_lane<mode>"
5214 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5215 (unspec:<V_four_elem>
5216 [(match_operand:OI 1 "s_register_operand" "w")
5217 (match_operand:SI 2 "immediate_operand" "i")
5218 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5219 UNSPEC_VST4_LANE))]
5220 "TARGET_NEON"
5221 {
5222 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5223 int regno = REGNO (operands[1]);
5224 rtx ops[6];
5225 ops[0] = operands[0];
5226 ops[1] = gen_rtx_REG (DImode, regno);
5227 ops[2] = gen_rtx_REG (DImode, regno + 2);
5228 ops[3] = gen_rtx_REG (DImode, regno + 4);
5229 ops[4] = gen_rtx_REG (DImode, regno + 6);
5230 ops[5] = GEN_INT (lane);
5231 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5232 ops);
5233 return "";
5234 }
5235 [(set_attr "type" "neon_store4_one_lane<q>")]
5236 )
5237
5238 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5239 ;; here on big endian targets.
5240 (define_insn "neon_vst4_lane<mode>"
5241 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5242 (unspec:<V_four_elem>
5243 [(match_operand:XI 1 "s_register_operand" "w")
5244 (match_operand:SI 2 "immediate_operand" "i")
5245 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5246 UNSPEC_VST4_LANE))]
5247 "TARGET_NEON"
5248 {
5249 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5250 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5251 int regno = REGNO (operands[1]);
5252 rtx ops[6];
5253 if (lane >= max / 2)
5254 {
5255 lane -= max / 2;
5256 regno += 2;
5257 }
5258 ops[0] = operands[0];
5259 ops[1] = gen_rtx_REG (DImode, regno);
5260 ops[2] = gen_rtx_REG (DImode, regno + 4);
5261 ops[3] = gen_rtx_REG (DImode, regno + 8);
5262 ops[4] = gen_rtx_REG (DImode, regno + 12);
5263 ops[5] = GEN_INT (lane);
5264 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5265 ops);
5266 return "";
5267 }
5268 [(set_attr "type" "neon_store4_4reg<q>")]
5269 )
5270
5271 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5272 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5273 (SE:<V_unpack> (vec_select:<V_HALF>
5274 (match_operand:VU 1 "register_operand" "w")
5275 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5276 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5277 "vmovl.<US><V_sz_elem> %q0, %e1"
5278 [(set_attr "type" "neon_shift_imm_long")]
5279 )
5280
5281 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5282 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5283 (SE:<V_unpack> (vec_select:<V_HALF>
5284 (match_operand:VU 1 "register_operand" "w")
5285 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5286 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5287 "vmovl.<US><V_sz_elem> %q0, %f1"
5288 [(set_attr "type" "neon_shift_imm_long")]
5289 )
5290
5291 (define_expand "vec_unpack<US>_hi_<mode>"
5292 [(match_operand:<V_unpack> 0 "register_operand" "")
5293 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5294 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5295 {
5296 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5297 rtx t1;
5298 int i;
5299 for (i = 0; i < (<V_mode_nunits>/2); i++)
5300 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5301
5302 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5303 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5304 operands[1],
5305 t1));
5306 DONE;
5307 }
5308 )
5309
5310 (define_expand "vec_unpack<US>_lo_<mode>"
5311 [(match_operand:<V_unpack> 0 "register_operand" "")
5312 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5313 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5314 {
5315 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5316 rtx t1;
5317 int i;
5318 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5319 RTVEC_ELT (v, i) = GEN_INT (i);
5320 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5321 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5322 operands[1],
5323 t1));
5324 DONE;
5325 }
5326 )
5327
5328 (define_insn "neon_vec_<US>mult_lo_<mode>"
5329 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5330 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5331 (match_operand:VU 1 "register_operand" "w")
5332 (match_operand:VU 2 "vect_par_constant_low" "")))
5333 (SE:<V_unpack> (vec_select:<V_HALF>
5334 (match_operand:VU 3 "register_operand" "w")
5335 (match_dup 2)))))]
5336 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5337 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5338 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5339 )
5340
5341 (define_expand "vec_widen_<US>mult_lo_<mode>"
5342 [(match_operand:<V_unpack> 0 "register_operand" "")
5343 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5344 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5345 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5346 {
5347 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5348 rtx t1;
5349 int i;
5350 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5351 RTVEC_ELT (v, i) = GEN_INT (i);
5352 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5353
5354 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5355 operands[1],
5356 t1,
5357 operands[2]));
5358 DONE;
5359 }
5360 )
5361
5362 (define_insn "neon_vec_<US>mult_hi_<mode>"
5363 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5364 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5365 (match_operand:VU 1 "register_operand" "w")
5366 (match_operand:VU 2 "vect_par_constant_high" "")))
5367 (SE:<V_unpack> (vec_select:<V_HALF>
5368 (match_operand:VU 3 "register_operand" "w")
5369 (match_dup 2)))))]
5370 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5371 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5372 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5373 )
5374
5375 (define_expand "vec_widen_<US>mult_hi_<mode>"
5376 [(match_operand:<V_unpack> 0 "register_operand" "")
5377 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5378 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5379 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5380 {
5381 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5382 rtx t1;
5383 int i;
5384 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5385 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5386 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5387
5388 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5389 operands[1],
5390 t1,
5391 operands[2]));
5392 DONE;
5393
5394 }
5395 )
5396
5397 (define_insn "neon_vec_<US>shiftl_<mode>"
5398 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5399 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5400 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5401 "TARGET_NEON"
5402 {
5403 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5404 }
5405 [(set_attr "type" "neon_shift_imm_long")]
5406 )
5407
5408 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5409 [(match_operand:<V_unpack> 0 "register_operand" "")
5410 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5411 (match_operand:SI 2 "immediate_operand" "i")]
5412 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5413 {
5414 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5415 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5416 operands[2]));
5417 DONE;
5418 }
5419 )
5420
5421 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5422 [(match_operand:<V_unpack> 0 "register_operand" "")
5423 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5424 (match_operand:SI 2 "immediate_operand" "i")]
5425 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5426 {
5427 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5428 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5429 GET_MODE_SIZE (<V_HALF>mode)),
5430 operands[2]));
5431 DONE;
5432 }
5433 )
5434
5435 ;; Vectorize for non-neon-quad case
5436 (define_insn "neon_unpack<US>_<mode>"
5437 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5438 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5439 "TARGET_NEON"
5440 "vmovl.<US><V_sz_elem> %q0, %P1"
5441 [(set_attr "type" "neon_move")]
5442 )
5443
5444 (define_expand "vec_unpack<US>_lo_<mode>"
5445 [(match_operand:<V_double_width> 0 "register_operand" "")
5446 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5447 "TARGET_NEON"
5448 {
5449 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5450 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5451 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5452
5453 DONE;
5454 }
5455 )
5456
5457 (define_expand "vec_unpack<US>_hi_<mode>"
5458 [(match_operand:<V_double_width> 0 "register_operand" "")
5459 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5460 "TARGET_NEON"
5461 {
5462 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5463 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5464 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5465
5466 DONE;
5467 }
5468 )
5469
5470 (define_insn "neon_vec_<US>mult_<mode>"
5471 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5472 (mult:<V_widen> (SE:<V_widen>
5473 (match_operand:VDI 1 "register_operand" "w"))
5474 (SE:<V_widen>
5475 (match_operand:VDI 2 "register_operand" "w"))))]
5476 "TARGET_NEON"
5477 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5478 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5479 )
5480
5481 (define_expand "vec_widen_<US>mult_hi_<mode>"
5482 [(match_operand:<V_double_width> 0 "register_operand" "")
5483 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5484 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5485 "TARGET_NEON"
5486 {
5487 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5488 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5489 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5490
5491 DONE;
5492
5493 }
5494 )
5495
5496 (define_expand "vec_widen_<US>mult_lo_<mode>"
5497 [(match_operand:<V_double_width> 0 "register_operand" "")
5498 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5499 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5500 "TARGET_NEON"
5501 {
5502 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5503 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5504 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5505
5506 DONE;
5507
5508 }
5509 )
5510
5511 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5512 [(match_operand:<V_double_width> 0 "register_operand" "")
5513 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5514 (match_operand:SI 2 "immediate_operand" "i")]
5515 "TARGET_NEON"
5516 {
5517 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5518 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5519 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5520
5521 DONE;
5522 }
5523 )
5524
5525 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5526 [(match_operand:<V_double_width> 0 "register_operand" "")
5527 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5528 (match_operand:SI 2 "immediate_operand" "i")]
5529 "TARGET_NEON"
5530 {
5531 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5532 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5533 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5534
5535 DONE;
5536 }
5537 )
5538
5539 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5540 ; because the ordering of vector elements in Q registers is different from what
5541 ; the semantics of the instructions require.
5542
5543 (define_insn "vec_pack_trunc_<mode>"
5544 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5545 (vec_concat:<V_narrow_pack>
5546 (truncate:<V_narrow>
5547 (match_operand:VN 1 "register_operand" "w"))
5548 (truncate:<V_narrow>
5549 (match_operand:VN 2 "register_operand" "w"))))]
5550 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5551 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5552 [(set_attr "type" "multiple")
5553 (set_attr "length" "8")]
5554 )
5555
5556 ;; For the non-quad case.
5557 (define_insn "neon_vec_pack_trunc_<mode>"
5558 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5559 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5560 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5561 "vmovn.i<V_sz_elem>\t%P0, %q1"
5562 [(set_attr "type" "neon_move_narrow_q")]
5563 )
5564
5565 (define_expand "vec_pack_trunc_<mode>"
5566 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5567 (match_operand:VSHFT 1 "register_operand" "")
5568 (match_operand:VSHFT 2 "register_operand")]
5569 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5570 {
5571 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5572
5573 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5574 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5575 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5576 DONE;
5577 })
5578
5579 (define_insn "neon_vabd<mode>_2"
5580 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5581 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5582 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5583 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5584 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5585 [(set (attr "type")
5586 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5587 (const_string "neon_fp_abd_s<q>")
5588 (const_string "neon_abd<q>")))]
5589 )
5590
5591 (define_insn "neon_vabd<mode>_3"
5592 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5593 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5594 (match_operand:VDQ 2 "s_register_operand" "w")]
5595 UNSPEC_VSUB)))]
5596 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5597 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5598 [(set (attr "type")
5599 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5600 (const_string "neon_fp_abd_s<q>")
5601 (const_string "neon_abd<q>")))]
5602 )
5603
5604 ;; Copy from core-to-neon regs, then extend, not vice-versa
5605
5606 (define_split
5607 [(set (match_operand:DI 0 "s_register_operand" "")
5608 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5609 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5610 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5611 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5612 {
5613 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5614 })
5615
5616 (define_split
5617 [(set (match_operand:DI 0 "s_register_operand" "")
5618 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5619 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5620 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5621 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5622 {
5623 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5624 })
5625
5626 (define_split
5627 [(set (match_operand:DI 0 "s_register_operand" "")
5628 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5629 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5630 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5631 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5632 {
5633 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5634 })
5635
5636 (define_split
5637 [(set (match_operand:DI 0 "s_register_operand" "")
5638 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5639 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5640 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5641 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5642 {
5643 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5644 })
5645
5646 (define_split
5647 [(set (match_operand:DI 0 "s_register_operand" "")
5648 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5649 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5650 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5651 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5652 {
5653 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5654 })
5655
5656 (define_split
5657 [(set (match_operand:DI 0 "s_register_operand" "")
5658 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5659 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5660 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5661 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5662 {
5663 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5664 })