a528ef2e8
[gcc.git] /
1 ;; VSX patterns.
2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5 ;; This file is part of GCC.
6
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
11
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
23
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
29
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
32
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
37 TI
38 V1TI])
39
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
42
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
45
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
48
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
51
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
54 V8HI
55 V4SI
56 V2DI
57 V4SF
58 V2DF
59 V1TI
60 TI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
63
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
66 V8HI
67 V4SI
68 V2DI
69 V4SF
70 V2DF
71 V1TI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
74 TI])
75
76 (define_mode_attr VSX_XXBR [(V8HI "h")
77 (V4SI "w")
78 (V4SF "w")
79 (V2DF "d")
80 (V2DI "d")
81 (V1TI "q")])
82
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
85 (V8HI "vw4")
86 (V4SI "vw4")
87 (V4SF "vw4")
88 (V2DF "vd2")
89 (V2DI "vd2")
90 (DF "d")
91 (TF "vd2")
92 (KF "vd2")
93 (V1TI "vd2")
94 (TI "vd2")])
95
96 ;; Map into the appropriate suffix based on the type
97 (define_mode_attr VSs [(V16QI "sp")
98 (V8HI "sp")
99 (V4SI "sp")
100 (V4SF "sp")
101 (V2DF "dp")
102 (V2DI "dp")
103 (DF "dp")
104 (SF "sp")
105 (TF "dp")
106 (KF "dp")
107 (V1TI "dp")
108 (TI "dp")])
109
110 ;; Map the register class used
111 (define_mode_attr VSr [(V16QI "v")
112 (V8HI "v")
113 (V4SI "v")
114 (V4SF "wf")
115 (V2DI "wd")
116 (V2DF "wd")
117 (DI "wi")
118 (DF "ws")
119 (SF "ww")
120 (TF "wp")
121 (KF "wq")
122 (V1TI "v")
123 (TI "wt")])
124
125 ;; Map the register class used for float<->int conversions (floating point side)
126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
127 ;; hold the data
128 (define_mode_attr VSr2 [(V2DF "wd")
129 (V4SF "wf")
130 (DF "ws")
131 (SF "ww")
132 (DI "wi")
133 (KF "wq")
134 (TF "wp")])
135
136 (define_mode_attr VSr3 [(V2DF "wa")
137 (V4SF "wa")
138 (DF "ws")
139 (SF "ww")
140 (DI "wi")
141 (KF "wq")
142 (TF "wp")])
143
144 ;; Map the register class for sp<->dp float conversions, destination
145 (define_mode_attr VSr4 [(SF "ws")
146 (DF "f")
147 (V2DF "wd")
148 (V4SF "v")])
149
150 ;; Map the register class for sp<->dp float conversions, source
151 (define_mode_attr VSr5 [(SF "ws")
152 (DF "f")
153 (V2DF "v")
154 (V4SF "wd")])
155
156 ;; The VSX register class that a type can occupy, even if it is not the
157 ;; preferred register class (VSr is the preferred register class that will get
158 ;; allocated first).
159 (define_mode_attr VSa [(V16QI "wa")
160 (V8HI "wa")
161 (V4SI "wa")
162 (V4SF "wa")
163 (V2DI "wa")
164 (V2DF "wa")
165 (DI "wi")
166 (DF "ws")
167 (SF "ww")
168 (V1TI "wa")
169 (TI "wt")
170 (TF "wp")
171 (KF "wq")])
172
173 ;; A mode attribute to disparage use of GPR registers, except for scalar
174 ;; integer modes.
175 (define_mode_attr ??r [(V16QI "??r")
176 (V8HI "??r")
177 (V4SI "??r")
178 (V4SF "??r")
179 (V2DI "??r")
180 (V2DF "??r")
181 (V1TI "??r")
182 (KF "??r")
183 (TF "??r")
184 (TI "r")])
185
186 ;; Same size integer type for floating point data
187 (define_mode_attr VSi [(V4SF "v4si")
188 (V2DF "v2di")
189 (DF "di")])
190
191 (define_mode_attr VSI [(V4SF "V4SI")
192 (V2DF "V2DI")
193 (DF "DI")])
194
195 ;; Word size for same size conversion
196 (define_mode_attr VSc [(V4SF "w")
197 (V2DF "d")
198 (DF "d")])
199
200 ;; Map into either s or v, depending on whether this is a scalar or vector
201 ;; operation
202 (define_mode_attr VSv [(V16QI "v")
203 (V8HI "v")
204 (V4SI "v")
205 (V4SF "v")
206 (V2DI "v")
207 (V2DF "v")
208 (V1TI "v")
209 (DF "s")
210 (KF "v")])
211
212 ;; Appropriate type for add ops (and other simple FP ops)
213 (define_mode_attr VStype_simple [(V2DF "vecdouble")
214 (V4SF "vecfloat")
215 (DF "fp")])
216
217 ;; Appropriate type for multiply ops
218 (define_mode_attr VStype_mul [(V2DF "vecdouble")
219 (V4SF "vecfloat")
220 (DF "dmul")])
221
222 ;; Appropriate type for divide ops.
223 (define_mode_attr VStype_div [(V2DF "vecdiv")
224 (V4SF "vecfdiv")
225 (DF "ddiv")])
226
227 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
228 ;; the scalar sqrt
229 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
230 (V4SF "ssqrt")
231 (DF "dsqrt")])
232
233 ;; Iterator and modes for sp<->dp conversions
234 ;; Because scalar SF values are represented internally as double, use the
235 ;; V4SF type to represent this than SF.
236 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
237
238 (define_mode_attr VS_spdp_res [(DF "V4SF")
239 (V4SF "V2DF")
240 (V2DF "V4SF")])
241
242 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
243 (V4SF "xvcvspdp")
244 (V2DF "xvcvdpsp")])
245
246 (define_mode_attr VS_spdp_type [(DF "fp")
247 (V4SF "vecdouble")
248 (V2DF "vecdouble")])
249
250 ;; Map the scalar mode for a vector type
251 (define_mode_attr VS_scalar [(V1TI "TI")
252 (V2DF "DF")
253 (V2DI "DI")
254 (V4SF "SF")
255 (V4SI "SI")
256 (V8HI "HI")
257 (V16QI "QI")])
258
259 ;; Map to a double-sized vector mode
260 (define_mode_attr VS_double [(V4SI "V8SI")
261 (V4SF "V8SF")
262 (V2DI "V4DI")
263 (V2DF "V4DF")
264 (V1TI "V2TI")])
265
266 ;; Map register class for 64-bit element in 128-bit vector for direct moves
267 ;; to/from gprs
268 (define_mode_attr VS_64dm [(V2DF "wk")
269 (V2DI "wj")])
270
271 ;; Map register class for 64-bit element in 128-bit vector for normal register
272 ;; to register moves
273 (define_mode_attr VS_64reg [(V2DF "ws")
274 (V2DI "wi")])
275
276 ;; Iterators for loading constants with xxspltib
277 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
278 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
279
280 ;; Vector reverse byte modes
281 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
282
283 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
284 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
285 ;; done on ISA 2.07 and not just ISA 3.0.
286 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
287 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
288
289 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
290 (V8HI "h")
291 (V4SI "w")])
292
293 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
294 ;; insert to validate the operand number.
295 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
296 (V8HI "const_0_to_7_operand")
297 (V4SI "const_0_to_3_operand")])
298
299 ;; Mode attribute to give the constraint for vector extract and insert
300 ;; operations.
301 (define_mode_attr VSX_EX [(V16QI "v")
302 (V8HI "v")
303 (V4SI "wa")])
304
305 ;; Mode iterator for binary floating types other than double to
306 ;; optimize convert to that floating point type from an extract
307 ;; of an integer type
308 (define_mode_iterator VSX_EXTRACT_FL [SF
309 (IF "FLOAT128_2REG_P (IFmode)")
310 (KF "TARGET_FLOAT128_HW")
311 (TF "FLOAT128_2REG_P (TFmode)
312 || (FLOAT128_IEEE_P (TFmode)
313 && TARGET_FLOAT128_HW)")])
314
315 ;; Mode iterator for binary floating types that have a direct conversion
316 ;; from 64-bit integer to floating point
317 (define_mode_iterator FL_CONV [SF
318 DF
319 (KF "TARGET_FLOAT128_HW")
320 (TF "TARGET_FLOAT128_HW
321 && FLOAT128_IEEE_P (TFmode)")])
322
323 ;; Iterator for the 2 short vector types to do a splat from an integer
324 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
325
326 ;; Mode attribute to give the count for the splat instruction to splat
327 ;; the value in the 64-bit integer slot
328 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
329
330 ;; Mode attribute to give the suffix for the splat instruction
331 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
332
333 ;; Constants for creating unspecs
334 (define_c_enum "unspec"
335 [UNSPEC_VSX_CONCAT
336 UNSPEC_VSX_CVDPSXWS
337 UNSPEC_VSX_CVDPUXWS
338 UNSPEC_VSX_CVSPDP
339 UNSPEC_VSX_CVHPSP
340 UNSPEC_VSX_CVSPDPN
341 UNSPEC_VSX_CVDPSPN
342 UNSPEC_VSX_CVSXWDP
343 UNSPEC_VSX_CVUXWDP
344 UNSPEC_VSX_CVSXDSP
345 UNSPEC_VSX_CVUXDSP
346 UNSPEC_VSX_CVSPSXDS
347 UNSPEC_VSX_CVSPUXDS
348 UNSPEC_VSX_CVSXWSP
349 UNSPEC_VSX_CVUXWSP
350 UNSPEC_VSX_FLOAT2
351 UNSPEC_VSX_UNS_FLOAT2
352 UNSPEC_VSX_FLOATE
353 UNSPEC_VSX_UNS_FLOATE
354 UNSPEC_VSX_FLOATO
355 UNSPEC_VSX_UNS_FLOATO
356 UNSPEC_VSX_TDIV
357 UNSPEC_VSX_TSQRT
358 UNSPEC_VSX_SET
359 UNSPEC_VSX_ROUND_I
360 UNSPEC_VSX_ROUND_IC
361 UNSPEC_VSX_SLDWI
362 UNSPEC_VSX_XXPERM
363
364 UNSPEC_VSX_XXSPLTW
365 UNSPEC_VSX_XXSPLTD
366 UNSPEC_VSX_DIVSD
367 UNSPEC_VSX_DIVUD
368 UNSPEC_VSX_MULSD
369 UNSPEC_VSX_XVCVSXDDP
370 UNSPEC_VSX_XVCVUXDDP
371 UNSPEC_VSX_XVCVDPSXDS
372 UNSPEC_VSX_XVCDPSP
373 UNSPEC_VSX_XVCVDPUXDS
374 UNSPEC_VSX_SIGN_EXTEND
375 UNSPEC_VSX_XVCVSPSXWS
376 UNSPEC_VSX_XVCVSPSXDS
377 UNSPEC_VSX_VSLO
378 UNSPEC_VSX_EXTRACT
379 UNSPEC_VSX_SXEXPDP
380 UNSPEC_VSX_SXSIG
381 UNSPEC_VSX_SIEXPDP
382 UNSPEC_VSX_SIEXPQP
383 UNSPEC_VSX_SCMPEXPDP
384 UNSPEC_VSX_STSTDC
385 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
386 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
387 UNSPEC_VSX_VXEXP
388 UNSPEC_VSX_VXSIG
389 UNSPEC_VSX_VIEXP
390 UNSPEC_VSX_VTSTDC
391 UNSPEC_VSX_VEC_INIT
392 UNSPEC_VSX_VSIGNED2
393
394 UNSPEC_LXVL
395 UNSPEC_LXVLL
396 UNSPEC_LVSL_REG
397 UNSPEC_LVSR_REG
398 UNSPEC_STXVL
399 UNSPEC_STXVLL
400 UNSPEC_XL_LEN_R
401 UNSPEC_XST_LEN_R
402
403 UNSPEC_VCLZLSBB
404 UNSPEC_VCTZLSBB
405 UNSPEC_VEXTUBLX
406 UNSPEC_VEXTUHLX
407 UNSPEC_VEXTUWLX
408 UNSPEC_VEXTUBRX
409 UNSPEC_VEXTUHRX
410 UNSPEC_VEXTUWRX
411 UNSPEC_VCMPNEB
412 UNSPEC_VCMPNEZB
413 UNSPEC_VCMPNEH
414 UNSPEC_VCMPNEZH
415 UNSPEC_VCMPNEW
416 UNSPEC_VCMPNEZW
417 UNSPEC_XXEXTRACTUW
418 UNSPEC_XXINSERTW
419 UNSPEC_VSX_FIRST_MATCH_INDEX
420 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
421 UNSPEC_VSX_FIRST_MISMATCH_INDEX
422 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
423 ])
424
425 ;; VSX moves
426
427 ;; The patterns for LE permuted loads and stores come before the general
428 ;; VSX moves so they match first.
429 (define_insn_and_split "*vsx_le_perm_load_<mode>"
430 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
431 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
432 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
433 "#"
434 "&& 1"
435 [(set (match_dup 2)
436 (vec_select:<MODE>
437 (match_dup 1)
438 (parallel [(const_int 1) (const_int 0)])))
439 (set (match_dup 0)
440 (vec_select:<MODE>
441 (match_dup 2)
442 (parallel [(const_int 1) (const_int 0)])))]
443 {
444 rtx mem = operands[1];
445
446 /* Don't apply the swap optimization if we've already performed register
447 allocation and the hard register destination is not in the altivec
448 range. */
449 if ((MEM_ALIGN (mem) >= 128)
450 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
451 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
452 {
453 rtx mem_address = XEXP (mem, 0);
454 enum machine_mode mode = GET_MODE (mem);
455
456 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
457 {
458 /* Replace the source memory address with masked address. */
459 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
460 emit_insn (lvx_set_expr);
461 DONE;
462 }
463 else if (rs6000_quadword_masked_address_p (mem_address))
464 {
465 /* This rtl is already in the form that matches lvx
466 instruction, so leave it alone. */
467 DONE;
468 }
469 /* Otherwise, fall through to transform into a swapping load. */
470 }
471 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
472 : operands[0];
473 }
474 [(set_attr "type" "vecload")
475 (set_attr "length" "8")])
476
477 (define_insn_and_split "*vsx_le_perm_load_<mode>"
478 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
479 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
480 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
481 "#"
482 "&& 1"
483 [(set (match_dup 2)
484 (vec_select:<MODE>
485 (match_dup 1)
486 (parallel [(const_int 2) (const_int 3)
487 (const_int 0) (const_int 1)])))
488 (set (match_dup 0)
489 (vec_select:<MODE>
490 (match_dup 2)
491 (parallel [(const_int 2) (const_int 3)
492 (const_int 0) (const_int 1)])))]
493 {
494 rtx mem = operands[1];
495
496 /* Don't apply the swap optimization if we've already performed register
497 allocation and the hard register destination is not in the altivec
498 range. */
499 if ((MEM_ALIGN (mem) >= 128)
500 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
501 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
502 {
503 rtx mem_address = XEXP (mem, 0);
504 enum machine_mode mode = GET_MODE (mem);
505
506 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
507 {
508 /* Replace the source memory address with masked address. */
509 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
510 emit_insn (lvx_set_expr);
511 DONE;
512 }
513 else if (rs6000_quadword_masked_address_p (mem_address))
514 {
515 /* This rtl is already in the form that matches lvx
516 instruction, so leave it alone. */
517 DONE;
518 }
519 /* Otherwise, fall through to transform into a swapping load. */
520 }
521 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
522 : operands[0];
523 }
524 [(set_attr "type" "vecload")
525 (set_attr "length" "8")])
526
527 (define_insn_and_split "*vsx_le_perm_load_v8hi"
528 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
529 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
530 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
531 "#"
532 "&& 1"
533 [(set (match_dup 2)
534 (vec_select:V8HI
535 (match_dup 1)
536 (parallel [(const_int 4) (const_int 5)
537 (const_int 6) (const_int 7)
538 (const_int 0) (const_int 1)
539 (const_int 2) (const_int 3)])))
540 (set (match_dup 0)
541 (vec_select:V8HI
542 (match_dup 2)
543 (parallel [(const_int 4) (const_int 5)
544 (const_int 6) (const_int 7)
545 (const_int 0) (const_int 1)
546 (const_int 2) (const_int 3)])))]
547 {
548 rtx mem = operands[1];
549
550 /* Don't apply the swap optimization if we've already performed register
551 allocation and the hard register destination is not in the altivec
552 range. */
553 if ((MEM_ALIGN (mem) >= 128)
554 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
555 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
556 {
557 rtx mem_address = XEXP (mem, 0);
558 enum machine_mode mode = GET_MODE (mem);
559
560 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
561 {
562 /* Replace the source memory address with masked address. */
563 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
564 emit_insn (lvx_set_expr);
565 DONE;
566 }
567 else if (rs6000_quadword_masked_address_p (mem_address))
568 {
569 /* This rtl is already in the form that matches lvx
570 instruction, so leave it alone. */
571 DONE;
572 }
573 /* Otherwise, fall through to transform into a swapping load. */
574 }
575 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
576 : operands[0];
577 }
578 [(set_attr "type" "vecload")
579 (set_attr "length" "8")])
580
581 (define_insn_and_split "*vsx_le_perm_load_v16qi"
582 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
583 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
584 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
585 "#"
586 "&& 1"
587 [(set (match_dup 2)
588 (vec_select:V16QI
589 (match_dup 1)
590 (parallel [(const_int 8) (const_int 9)
591 (const_int 10) (const_int 11)
592 (const_int 12) (const_int 13)
593 (const_int 14) (const_int 15)
594 (const_int 0) (const_int 1)
595 (const_int 2) (const_int 3)
596 (const_int 4) (const_int 5)
597 (const_int 6) (const_int 7)])))
598 (set (match_dup 0)
599 (vec_select:V16QI
600 (match_dup 2)
601 (parallel [(const_int 8) (const_int 9)
602 (const_int 10) (const_int 11)
603 (const_int 12) (const_int 13)
604 (const_int 14) (const_int 15)
605 (const_int 0) (const_int 1)
606 (const_int 2) (const_int 3)
607 (const_int 4) (const_int 5)
608 (const_int 6) (const_int 7)])))]
609 {
610 rtx mem = operands[1];
611
612 /* Don't apply the swap optimization if we've already performed register
613 allocation and the hard register destination is not in the altivec
614 range. */
615 if ((MEM_ALIGN (mem) >= 128)
616 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
617 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
618 {
619 rtx mem_address = XEXP (mem, 0);
620 enum machine_mode mode = GET_MODE (mem);
621
622 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
623 {
624 /* Replace the source memory address with masked address. */
625 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
626 emit_insn (lvx_set_expr);
627 DONE;
628 }
629 else if (rs6000_quadword_masked_address_p (mem_address))
630 {
631 /* This rtl is already in the form that matches lvx
632 instruction, so leave it alone. */
633 DONE;
634 }
635 /* Otherwise, fall through to transform into a swapping load. */
636 }
637 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
638 : operands[0];
639 }
640 [(set_attr "type" "vecload")
641 (set_attr "length" "8")])
642
643 (define_insn "*vsx_le_perm_store_<mode>"
644 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
645 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
646 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
647 "#"
648 [(set_attr "type" "vecstore")
649 (set_attr "length" "12")])
650
651 (define_split
652 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
653 (match_operand:VSX_D 1 "vsx_register_operand"))]
654 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
655 [(set (match_dup 2)
656 (vec_select:<MODE>
657 (match_dup 1)
658 (parallel [(const_int 1) (const_int 0)])))
659 (set (match_dup 0)
660 (vec_select:<MODE>
661 (match_dup 2)
662 (parallel [(const_int 1) (const_int 0)])))]
663 {
664 rtx mem = operands[0];
665
666 /* Don't apply the swap optimization if we've already performed register
667 allocation and the hard register source is not in the altivec range. */
668 if ((MEM_ALIGN (mem) >= 128)
669 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
670 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
671 {
672 rtx mem_address = XEXP (mem, 0);
673 enum machine_mode mode = GET_MODE (mem);
674 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
675 {
676 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
677 emit_insn (stvx_set_expr);
678 DONE;
679 }
680 else if (rs6000_quadword_masked_address_p (mem_address))
681 {
682 /* This rtl is already in the form that matches stvx instruction,
683 so leave it alone. */
684 DONE;
685 }
686 /* Otherwise, fall through to transform into a swapping store. */
687 }
688
689 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
690 : operands[1];
691 })
692
693 ;; The post-reload split requires that we re-permute the source
694 ;; register in case it is still live.
695 (define_split
696 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
697 (match_operand:VSX_D 1 "vsx_register_operand"))]
698 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
699 [(set (match_dup 1)
700 (vec_select:<MODE>
701 (match_dup 1)
702 (parallel [(const_int 1) (const_int 0)])))
703 (set (match_dup 0)
704 (vec_select:<MODE>
705 (match_dup 1)
706 (parallel [(const_int 1) (const_int 0)])))
707 (set (match_dup 1)
708 (vec_select:<MODE>
709 (match_dup 1)
710 (parallel [(const_int 1) (const_int 0)])))]
711 "")
712
713 (define_insn "*vsx_le_perm_store_<mode>"
714 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
715 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
716 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
717 "#"
718 [(set_attr "type" "vecstore")
719 (set_attr "length" "12")])
720
721 (define_split
722 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
723 (match_operand:VSX_W 1 "vsx_register_operand"))]
724 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
725 [(set (match_dup 2)
726 (vec_select:<MODE>
727 (match_dup 1)
728 (parallel [(const_int 2) (const_int 3)
729 (const_int 0) (const_int 1)])))
730 (set (match_dup 0)
731 (vec_select:<MODE>
732 (match_dup 2)
733 (parallel [(const_int 2) (const_int 3)
734 (const_int 0) (const_int 1)])))]
735 {
736 rtx mem = operands[0];
737
738 /* Don't apply the swap optimization if we've already performed register
739 allocation and the hard register source is not in the altivec range. */
740 if ((MEM_ALIGN (mem) >= 128)
741 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
742 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
743 {
744 rtx mem_address = XEXP (mem, 0);
745 enum machine_mode mode = GET_MODE (mem);
746 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
747 {
748 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
749 emit_insn (stvx_set_expr);
750 DONE;
751 }
752 else if (rs6000_quadword_masked_address_p (mem_address))
753 {
754 /* This rtl is already in the form that matches stvx instruction,
755 so leave it alone. */
756 DONE;
757 }
758 /* Otherwise, fall through to transform into a swapping store. */
759 }
760
761 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
762 : operands[1];
763 })
764
765 ;; The post-reload split requires that we re-permute the source
766 ;; register in case it is still live.
767 (define_split
768 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
769 (match_operand:VSX_W 1 "vsx_register_operand"))]
770 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
771 [(set (match_dup 1)
772 (vec_select:<MODE>
773 (match_dup 1)
774 (parallel [(const_int 2) (const_int 3)
775 (const_int 0) (const_int 1)])))
776 (set (match_dup 0)
777 (vec_select:<MODE>
778 (match_dup 1)
779 (parallel [(const_int 2) (const_int 3)
780 (const_int 0) (const_int 1)])))
781 (set (match_dup 1)
782 (vec_select:<MODE>
783 (match_dup 1)
784 (parallel [(const_int 2) (const_int 3)
785 (const_int 0) (const_int 1)])))]
786 "")
787
788 (define_insn "*vsx_le_perm_store_v8hi"
789 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
790 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
791 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
792 "#"
793 [(set_attr "type" "vecstore")
794 (set_attr "length" "12")])
795
796 (define_split
797 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
798 (match_operand:V8HI 1 "vsx_register_operand"))]
799 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
800 [(set (match_dup 2)
801 (vec_select:V8HI
802 (match_dup 1)
803 (parallel [(const_int 4) (const_int 5)
804 (const_int 6) (const_int 7)
805 (const_int 0) (const_int 1)
806 (const_int 2) (const_int 3)])))
807 (set (match_dup 0)
808 (vec_select:V8HI
809 (match_dup 2)
810 (parallel [(const_int 4) (const_int 5)
811 (const_int 6) (const_int 7)
812 (const_int 0) (const_int 1)
813 (const_int 2) (const_int 3)])))]
814 {
815 rtx mem = operands[0];
816
817 /* Don't apply the swap optimization if we've already performed register
818 allocation and the hard register source is not in the altivec range. */
819 if ((MEM_ALIGN (mem) >= 128)
820 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
821 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
822 {
823 rtx mem_address = XEXP (mem, 0);
824 enum machine_mode mode = GET_MODE (mem);
825 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
826 {
827 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
828 emit_insn (stvx_set_expr);
829 DONE;
830 }
831 else if (rs6000_quadword_masked_address_p (mem_address))
832 {
833 /* This rtl is already in the form that matches stvx instruction,
834 so leave it alone. */
835 DONE;
836 }
837 /* Otherwise, fall through to transform into a swapping store. */
838 }
839
840 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
841 : operands[1];
842 })
843
844 ;; The post-reload split requires that we re-permute the source
845 ;; register in case it is still live.
846 (define_split
847 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
848 (match_operand:V8HI 1 "vsx_register_operand"))]
849 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
850 [(set (match_dup 1)
851 (vec_select:V8HI
852 (match_dup 1)
853 (parallel [(const_int 4) (const_int 5)
854 (const_int 6) (const_int 7)
855 (const_int 0) (const_int 1)
856 (const_int 2) (const_int 3)])))
857 (set (match_dup 0)
858 (vec_select:V8HI
859 (match_dup 1)
860 (parallel [(const_int 4) (const_int 5)
861 (const_int 6) (const_int 7)
862 (const_int 0) (const_int 1)
863 (const_int 2) (const_int 3)])))
864 (set (match_dup 1)
865 (vec_select:V8HI
866 (match_dup 1)
867 (parallel [(const_int 4) (const_int 5)
868 (const_int 6) (const_int 7)
869 (const_int 0) (const_int 1)
870 (const_int 2) (const_int 3)])))]
871 "")
872
873 (define_insn "*vsx_le_perm_store_v16qi"
874 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
875 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
876 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
877 "#"
878 [(set_attr "type" "vecstore")
879 (set_attr "length" "12")])
880
881 (define_split
882 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
883 (match_operand:V16QI 1 "vsx_register_operand"))]
884 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
885 [(set (match_dup 2)
886 (vec_select:V16QI
887 (match_dup 1)
888 (parallel [(const_int 8) (const_int 9)
889 (const_int 10) (const_int 11)
890 (const_int 12) (const_int 13)
891 (const_int 14) (const_int 15)
892 (const_int 0) (const_int 1)
893 (const_int 2) (const_int 3)
894 (const_int 4) (const_int 5)
895 (const_int 6) (const_int 7)])))
896 (set (match_dup 0)
897 (vec_select:V16QI
898 (match_dup 2)
899 (parallel [(const_int 8) (const_int 9)
900 (const_int 10) (const_int 11)
901 (const_int 12) (const_int 13)
902 (const_int 14) (const_int 15)
903 (const_int 0) (const_int 1)
904 (const_int 2) (const_int 3)
905 (const_int 4) (const_int 5)
906 (const_int 6) (const_int 7)])))]
907 {
908 rtx mem = operands[0];
909
910 /* Don't apply the swap optimization if we've already performed register
911 allocation and the hard register source is not in the altivec range. */
912 if ((MEM_ALIGN (mem) >= 128)
913 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
914 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
915 {
916 rtx mem_address = XEXP (mem, 0);
917 enum machine_mode mode = GET_MODE (mem);
918 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
919 {
920 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
921 emit_insn (stvx_set_expr);
922 DONE;
923 }
924 else if (rs6000_quadword_masked_address_p (mem_address))
925 {
926 /* This rtl is already in the form that matches stvx instruction,
927 so leave it alone. */
928 DONE;
929 }
930 /* Otherwise, fall through to transform into a swapping store. */
931 }
932
933 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
934 : operands[1];
935 })
936
937 ;; The post-reload split requires that we re-permute the source
938 ;; register in case it is still live.
939 (define_split
940 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
941 (match_operand:V16QI 1 "vsx_register_operand"))]
942 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
943 [(set (match_dup 1)
944 (vec_select:V16QI
945 (match_dup 1)
946 (parallel [(const_int 8) (const_int 9)
947 (const_int 10) (const_int 11)
948 (const_int 12) (const_int 13)
949 (const_int 14) (const_int 15)
950 (const_int 0) (const_int 1)
951 (const_int 2) (const_int 3)
952 (const_int 4) (const_int 5)
953 (const_int 6) (const_int 7)])))
954 (set (match_dup 0)
955 (vec_select:V16QI
956 (match_dup 1)
957 (parallel [(const_int 8) (const_int 9)
958 (const_int 10) (const_int 11)
959 (const_int 12) (const_int 13)
960 (const_int 14) (const_int 15)
961 (const_int 0) (const_int 1)
962 (const_int 2) (const_int 3)
963 (const_int 4) (const_int 5)
964 (const_int 6) (const_int 7)])))
965 (set (match_dup 1)
966 (vec_select:V16QI
967 (match_dup 1)
968 (parallel [(const_int 8) (const_int 9)
969 (const_int 10) (const_int 11)
970 (const_int 12) (const_int 13)
971 (const_int 14) (const_int 15)
972 (const_int 0) (const_int 1)
973 (const_int 2) (const_int 3)
974 (const_int 4) (const_int 5)
975 (const_int 6) (const_int 7)])))]
976 "")
977
978 ;; Little endian word swapping for 128-bit types that are either scalars or the
979 ;; special V1TI container class, which it is not appropriate to use vec_select
980 ;; for the type.
981 (define_insn "*vsx_le_permute_<mode>"
982 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
983 (rotate:VSX_TI
984 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
985 (const_int 64)))]
986 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
987 "@
988 xxpermdi %x0,%x1,%x1,2
989 lxvd2x %x0,%y1
990 stxvd2x %x1,%y0
991 mr %0,%L1\;mr %L0,%1
992 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
993 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
994 [(set_attr "length" "4,4,4,8,8,8")
995 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
996
997 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
998 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
999 (rotate:VSX_TI
1000 (rotate:VSX_TI
1001 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1002 (const_int 64))
1003 (const_int 64)))]
1004 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1005 "@
1006 #
1007 xxlor %x0,%x1"
1008 ""
1009 [(set (match_dup 0) (match_dup 1))]
1010 {
1011 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1012 {
1013 emit_note (NOTE_INSN_DELETED);
1014 DONE;
1015 }
1016 }
1017 [(set_attr "length" "0,4")
1018 (set_attr "type" "veclogical")])
1019
1020 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1021 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1022 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1023 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1024 "@
1025 #
1026 #"
1027 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1028 [(const_int 0)]
1029 {
1030 rtx tmp = (can_create_pseudo_p ()
1031 ? gen_reg_rtx_and_attrs (operands[0])
1032 : operands[0]);
1033 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1034 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1035 DONE;
1036 }
1037 [(set_attr "type" "vecload,load")
1038 (set_attr "length" "8,8")])
1039
1040 (define_insn "*vsx_le_perm_store_<mode>"
1041 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1042 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1043 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1044 "@
1045 #
1046 #"
1047 [(set_attr "type" "vecstore,store")
1048 (set_attr "length" "12,8")])
1049
1050 (define_split
1051 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1052 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1053 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1054 [(const_int 0)]
1055 {
1056 rtx tmp = (can_create_pseudo_p ()
1057 ? gen_reg_rtx_and_attrs (operands[0])
1058 : operands[0]);
1059 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1060 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1061 DONE;
1062 })
1063
1064 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1065 ;; GPR registers on a little endian system.
1066 (define_peephole2
1067 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1068 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1069 (const_int 64)))
1070 (set (match_operand:VSX_TI 2 "int_reg_operand")
1071 (rotate:VSX_TI (match_dup 0)
1072 (const_int 64)))]
1073 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1074 && (rtx_equal_p (operands[0], operands[2])
1075 || peep2_reg_dead_p (2, operands[0]))"
1076 [(set (match_dup 2) (match_dup 1))])
1077
1078 (define_peephole2
1079 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1080 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1081 (const_int 64)))
1082 (set (match_operand:VSX_TI 2 "memory_operand")
1083 (rotate:VSX_TI (match_dup 0)
1084 (const_int 64)))]
1085 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1086 && peep2_reg_dead_p (2, operands[0])"
1087 [(set (match_dup 2) (match_dup 1))])
1088
1089 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1090 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1091 ;; floating point are handled by the more generic swap elimination pass.
1092 (define_peephole2
1093 [(set (match_operand:TI 0 "vsx_register_operand")
1094 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1095 (const_int 64)))
1096 (set (match_operand:TI 2 "vsx_register_operand")
1097 (rotate:TI (match_dup 0)
1098 (const_int 64)))]
1099 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1100 && (rtx_equal_p (operands[0], operands[2])
1101 || peep2_reg_dead_p (2, operands[0]))"
1102 [(set (match_dup 2) (match_dup 1))])
1103
1104 ;; The post-reload split requires that we re-permute the source
1105 ;; register in case it is still live.
1106 (define_split
1107 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1108 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1109 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1110 [(const_int 0)]
1111 {
1112 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1113 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1114 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1115 DONE;
1116 })
1117
1118 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1119 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1120 (define_insn "xxspltib_v16qi"
1121 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1122 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1123 "TARGET_P9_VECTOR"
1124 {
1125 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1126 return "xxspltib %x0,%2";
1127 }
1128 [(set_attr "type" "vecperm")])
1129
1130 (define_insn "xxspltib_<mode>_nosplit"
1131 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1132 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1133 "TARGET_P9_VECTOR"
1134 {
1135 rtx op1 = operands[1];
1136 int value = 256;
1137 int num_insns = -1;
1138
1139 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1140 || num_insns != 1)
1141 gcc_unreachable ();
1142
1143 operands[2] = GEN_INT (value & 0xff);
1144 return "xxspltib %x0,%2";
1145 }
1146 [(set_attr "type" "vecperm")])
1147
1148 (define_insn_and_split "*xxspltib_<mode>_split"
1149 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1150 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1151 "TARGET_P9_VECTOR"
1152 "#"
1153 "&& 1"
1154 [(const_int 0)]
1155 {
1156 int value = 256;
1157 int num_insns = -1;
1158 rtx op0 = operands[0];
1159 rtx op1 = operands[1];
1160 rtx tmp = ((can_create_pseudo_p ())
1161 ? gen_reg_rtx (V16QImode)
1162 : gen_lowpart (V16QImode, op0));
1163
1164 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1165 || num_insns != 2)
1166 gcc_unreachable ();
1167
1168 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1169
1170 if (<MODE>mode == V2DImode)
1171 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1172
1173 else if (<MODE>mode == V4SImode)
1174 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1175
1176 else if (<MODE>mode == V8HImode)
1177 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1178
1179 else
1180 gcc_unreachable ();
1181
1182 DONE;
1183 }
1184 [(set_attr "type" "vecperm")
1185 (set_attr "length" "8")])
1186
1187
1188 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1189 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1190 ;; all 1's, since the machine does not have to wait for the previous
1191 ;; instruction using the register being set (such as a store waiting on a slow
1192 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1193
1194 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1195 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1196 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1197 (define_insn "*vsx_mov<mode>_64bit"
1198 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1199 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
1200 ?&r, ??r, ??Y, <??r>, wo, v,
1201 ?<VSa>, *r, v, ??r, wZ, v")
1202
1203 (match_operand:VSX_M 1 "input_operand"
1204 "<VSa>, ZwO, <VSa>, we, r, r,
1205 wQ, Y, r, r, wE, jwM,
1206 ?jwM, jwM, W, W, v, wZ"))]
1207
1208 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1209 && (register_operand (operands[0], <MODE>mode)
1210 || register_operand (operands[1], <MODE>mode))"
1211 {
1212 return rs6000_output_move_128bit (operands);
1213 }
1214 [(set_attr "type"
1215 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1216 store, load, store, *, vecsimple, vecsimple,
1217 vecsimple, *, *, *, vecstore, vecload")
1218
1219 (set_attr "length"
1220 "4, 4, 4, 8, 4, 8,
1221 8, 8, 8, 8, 4, 4,
1222 4, 8, 20, 20, 4, 4")])
1223
1224 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1225 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
1226 ;; LVX (VMX) STVX (VMX)
1227 (define_insn "*vsx_mov<mode>_32bit"
1228 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1229 "=ZwO, <VSa>, <VSa>, ??r, ??Y, <??r>,
1230 wo, v, ?<VSa>, *r, v, ??r,
1231 wZ, v")
1232
1233 (match_operand:VSX_M 1 "input_operand"
1234 "<VSa>, ZwO, <VSa>, Y, r, r,
1235 wE, jwM, ?jwM, jwM, W, W,
1236 v, wZ"))]
1237
1238 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1239 && (register_operand (operands[0], <MODE>mode)
1240 || register_operand (operands[1], <MODE>mode))"
1241 {
1242 return rs6000_output_move_128bit (operands);
1243 }
1244 [(set_attr "type"
1245 "vecstore, vecload, vecsimple, load, store, *,
1246 vecsimple, vecsimple, vecsimple, *, *, *,
1247 vecstore, vecload")
1248
1249 (set_attr "length"
1250 "4, 4, 4, 16, 16, 16,
1251 4, 4, 4, 16, 20, 32,
1252 4, 4")])
1253
1254 ;; Explicit load/store expanders for the builtin functions
1255 (define_expand "vsx_load_<mode>"
1256 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1257 (match_operand:VSX_M 1 "memory_operand"))]
1258 "VECTOR_MEM_VSX_P (<MODE>mode)"
1259 {
1260 /* Expand to swaps if needed, prior to swap optimization. */
1261 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1262 {
1263 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1264 DONE;
1265 }
1266 })
1267
1268 (define_expand "vsx_store_<mode>"
1269 [(set (match_operand:VSX_M 0 "memory_operand")
1270 (match_operand:VSX_M 1 "vsx_register_operand"))]
1271 "VECTOR_MEM_VSX_P (<MODE>mode)"
1272 {
1273 /* Expand to swaps if needed, prior to swap optimization. */
1274 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1275 {
1276 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1277 DONE;
1278 }
1279 })
1280
1281 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1282 ;; when you really want their element-reversing behavior.
1283 (define_insn "vsx_ld_elemrev_v2di"
1284 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1285 (vec_select:V2DI
1286 (match_operand:V2DI 1 "memory_operand" "Z")
1287 (parallel [(const_int 1) (const_int 0)])))]
1288 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1289 "lxvd2x %x0,%y1"
1290 [(set_attr "type" "vecload")])
1291
1292 (define_insn "vsx_ld_elemrev_v1ti"
1293 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1294 (vec_select:V1TI
1295 (match_operand:V1TI 1 "memory_operand" "Z")
1296 (parallel [(const_int 0)])))]
1297 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1298 {
1299 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1300 }
1301 [(set_attr "type" "vecload")])
1302
1303 (define_insn "vsx_ld_elemrev_v2df"
1304 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1305 (vec_select:V2DF
1306 (match_operand:V2DF 1 "memory_operand" "Z")
1307 (parallel [(const_int 1) (const_int 0)])))]
1308 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1309 "lxvd2x %x0,%y1"
1310 [(set_attr "type" "vecload")])
1311
1312 (define_insn "vsx_ld_elemrev_v4si"
1313 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1314 (vec_select:V4SI
1315 (match_operand:V4SI 1 "memory_operand" "Z")
1316 (parallel [(const_int 3) (const_int 2)
1317 (const_int 1) (const_int 0)])))]
1318 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1319 "lxvw4x %x0,%y1"
1320 [(set_attr "type" "vecload")])
1321
1322 (define_insn "vsx_ld_elemrev_v4sf"
1323 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1324 (vec_select:V4SF
1325 (match_operand:V4SF 1 "memory_operand" "Z")
1326 (parallel [(const_int 3) (const_int 2)
1327 (const_int 1) (const_int 0)])))]
1328 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1329 "lxvw4x %x0,%y1"
1330 [(set_attr "type" "vecload")])
1331
1332 (define_expand "vsx_ld_elemrev_v8hi"
1333 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1334 (vec_select:V8HI
1335 (match_operand:V8HI 1 "memory_operand" "Z")
1336 (parallel [(const_int 7) (const_int 6)
1337 (const_int 5) (const_int 4)
1338 (const_int 3) (const_int 2)
1339 (const_int 1) (const_int 0)])))]
1340 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1341 {
1342 if (!TARGET_P9_VECTOR)
1343 {
1344 rtx tmp = gen_reg_rtx (V4SImode);
1345 rtx subreg, subreg2, perm[16], pcv;
1346 /* 2 is leftmost element in register */
1347 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1348 int i;
1349
1350 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1351 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1352 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1353
1354 for (i = 0; i < 16; ++i)
1355 perm[i] = GEN_INT (reorder[i]);
1356
1357 pcv = force_reg (V16QImode,
1358 gen_rtx_CONST_VECTOR (V16QImode,
1359 gen_rtvec_v (16, perm)));
1360 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1361 subreg2, pcv));
1362 DONE;
1363 }
1364 })
1365
1366 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1367 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1368 (vec_select:V8HI
1369 (match_operand:V8HI 1 "memory_operand" "Z")
1370 (parallel [(const_int 7) (const_int 6)
1371 (const_int 5) (const_int 4)
1372 (const_int 3) (const_int 2)
1373 (const_int 1) (const_int 0)])))]
1374 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1375 "lxvh8x %x0,%y1"
1376 [(set_attr "type" "vecload")])
1377
1378 (define_expand "vsx_ld_elemrev_v16qi"
1379 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1380 (vec_select:V16QI
1381 (match_operand:V16QI 1 "memory_operand" "Z")
1382 (parallel [(const_int 15) (const_int 14)
1383 (const_int 13) (const_int 12)
1384 (const_int 11) (const_int 10)
1385 (const_int 9) (const_int 8)
1386 (const_int 7) (const_int 6)
1387 (const_int 5) (const_int 4)
1388 (const_int 3) (const_int 2)
1389 (const_int 1) (const_int 0)])))]
1390 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1391 {
1392 if (!TARGET_P9_VECTOR)
1393 {
1394 rtx tmp = gen_reg_rtx (V4SImode);
1395 rtx subreg, subreg2, perm[16], pcv;
1396 /* 3 is leftmost element in register */
1397 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1398 int i;
1399
1400 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1401 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1402 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1403
1404 for (i = 0; i < 16; ++i)
1405 perm[i] = GEN_INT (reorder[i]);
1406
1407 pcv = force_reg (V16QImode,
1408 gen_rtx_CONST_VECTOR (V16QImode,
1409 gen_rtvec_v (16, perm)));
1410 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1411 subreg2, pcv));
1412 DONE;
1413 }
1414 })
1415
1416 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1417 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1418 (vec_select:V16QI
1419 (match_operand:V16QI 1 "memory_operand" "Z")
1420 (parallel [(const_int 15) (const_int 14)
1421 (const_int 13) (const_int 12)
1422 (const_int 11) (const_int 10)
1423 (const_int 9) (const_int 8)
1424 (const_int 7) (const_int 6)
1425 (const_int 5) (const_int 4)
1426 (const_int 3) (const_int 2)
1427 (const_int 1) (const_int 0)])))]
1428 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1429 "lxvb16x %x0,%y1"
1430 [(set_attr "type" "vecload")])
1431
1432 (define_insn "vsx_st_elemrev_v1ti"
1433 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1434 (vec_select:V1TI
1435 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1436 (parallel [(const_int 0)])))
1437 (clobber (match_dup 1))]
1438 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1439 {
1440 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1441 }
1442 [(set_attr "type" "vecstore")])
1443
1444 (define_insn "vsx_st_elemrev_v2df"
1445 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1446 (vec_select:V2DF
1447 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1448 (parallel [(const_int 1) (const_int 0)])))]
1449 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1450 "stxvd2x %x1,%y0"
1451 [(set_attr "type" "vecstore")])
1452
1453 (define_insn "vsx_st_elemrev_v2di"
1454 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1455 (vec_select:V2DI
1456 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1457 (parallel [(const_int 1) (const_int 0)])))]
1458 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1459 "stxvd2x %x1,%y0"
1460 [(set_attr "type" "vecstore")])
1461
1462 (define_insn "vsx_st_elemrev_v4sf"
1463 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1464 (vec_select:V4SF
1465 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1466 (parallel [(const_int 3) (const_int 2)
1467 (const_int 1) (const_int 0)])))]
1468 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1469 "stxvw4x %x1,%y0"
1470 [(set_attr "type" "vecstore")])
1471
1472 (define_insn "vsx_st_elemrev_v4si"
1473 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1474 (vec_select:V4SI
1475 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1476 (parallel [(const_int 3) (const_int 2)
1477 (const_int 1) (const_int 0)])))]
1478 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1479 "stxvw4x %x1,%y0"
1480 [(set_attr "type" "vecstore")])
1481
1482 (define_expand "vsx_st_elemrev_v8hi"
1483 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1484 (vec_select:V8HI
1485 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1486 (parallel [(const_int 7) (const_int 6)
1487 (const_int 5) (const_int 4)
1488 (const_int 3) (const_int 2)
1489 (const_int 1) (const_int 0)])))]
1490 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1491 {
1492 if (!TARGET_P9_VECTOR)
1493 {
1494 rtx mem_subreg, subreg, perm[16], pcv;
1495 rtx tmp = gen_reg_rtx (V8HImode);
1496 /* 2 is leftmost element in register */
1497 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1498 int i;
1499
1500 for (i = 0; i < 16; ++i)
1501 perm[i] = GEN_INT (reorder[i]);
1502
1503 pcv = force_reg (V16QImode,
1504 gen_rtx_CONST_VECTOR (V16QImode,
1505 gen_rtvec_v (16, perm)));
1506 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1507 operands[1], pcv));
1508 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1509 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1510 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1511 DONE;
1512 }
1513 })
1514
1515 (define_insn "*vsx_st_elemrev_v2di_internal"
1516 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1517 (vec_select:V2DI
1518 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1519 (parallel [(const_int 1) (const_int 0)])))]
1520 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1521 "stxvd2x %x1,%y0"
1522 [(set_attr "type" "vecstore")])
1523
1524 (define_insn "*vsx_st_elemrev_v8hi_internal"
1525 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1526 (vec_select:V8HI
1527 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1528 (parallel [(const_int 7) (const_int 6)
1529 (const_int 5) (const_int 4)
1530 (const_int 3) (const_int 2)
1531 (const_int 1) (const_int 0)])))]
1532 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1533 "stxvh8x %x1,%y0"
1534 [(set_attr "type" "vecstore")])
1535
1536 (define_expand "vsx_st_elemrev_v16qi"
1537 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1538 (vec_select:V16QI
1539 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1540 (parallel [(const_int 15) (const_int 14)
1541 (const_int 13) (const_int 12)
1542 (const_int 11) (const_int 10)
1543 (const_int 9) (const_int 8)
1544 (const_int 7) (const_int 6)
1545 (const_int 5) (const_int 4)
1546 (const_int 3) (const_int 2)
1547 (const_int 1) (const_int 0)])))]
1548 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1549 {
1550 if (!TARGET_P9_VECTOR)
1551 {
1552 rtx mem_subreg, subreg, perm[16], pcv;
1553 rtx tmp = gen_reg_rtx (V16QImode);
1554 /* 3 is leftmost element in register */
1555 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1556 int i;
1557
1558 for (i = 0; i < 16; ++i)
1559 perm[i] = GEN_INT (reorder[i]);
1560
1561 pcv = force_reg (V16QImode,
1562 gen_rtx_CONST_VECTOR (V16QImode,
1563 gen_rtvec_v (16, perm)));
1564 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1565 operands[1], pcv));
1566 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1567 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1568 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1569 DONE;
1570 }
1571 })
1572
1573 (define_insn "*vsx_st_elemrev_v16qi_internal"
1574 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1575 (vec_select:V16QI
1576 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1577 (parallel [(const_int 15) (const_int 14)
1578 (const_int 13) (const_int 12)
1579 (const_int 11) (const_int 10)
1580 (const_int 9) (const_int 8)
1581 (const_int 7) (const_int 6)
1582 (const_int 5) (const_int 4)
1583 (const_int 3) (const_int 2)
1584 (const_int 1) (const_int 0)])))]
1585 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1586 "stxvb16x %x1,%y0"
1587 [(set_attr "type" "vecstore")])
1588
1589 \f
1590 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1591 ;; instructions are now combined with the insn for the traditional floating
1592 ;; point unit.
1593 (define_insn "*vsx_add<mode>3"
1594 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1595 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1596 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1597 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1598 "xvadd<VSs> %x0,%x1,%x2"
1599 [(set_attr "type" "<VStype_simple>")])
1600
1601 (define_insn "*vsx_sub<mode>3"
1602 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1603 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1604 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1605 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1606 "xvsub<VSs> %x0,%x1,%x2"
1607 [(set_attr "type" "<VStype_simple>")])
1608
1609 (define_insn "*vsx_mul<mode>3"
1610 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1611 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1612 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1613 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614 "xvmul<VSs> %x0,%x1,%x2"
1615 [(set_attr "type" "<VStype_simple>")])
1616
1617 ; Emulate vector with scalar for vec_mul in V2DImode
1618 (define_insn_and_split "vsx_mul_v2di"
1619 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1620 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1621 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1622 UNSPEC_VSX_MULSD))]
1623 "VECTOR_MEM_VSX_P (V2DImode)"
1624 "#"
1625 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1626 [(const_int 0)]
1627 {
1628 rtx op0 = operands[0];
1629 rtx op1 = operands[1];
1630 rtx op2 = operands[2];
1631 rtx op3 = gen_reg_rtx (DImode);
1632 rtx op4 = gen_reg_rtx (DImode);
1633 rtx op5 = gen_reg_rtx (DImode);
1634 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1635 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1636 if (TARGET_POWERPC64)
1637 emit_insn (gen_muldi3 (op5, op3, op4));
1638 else
1639 {
1640 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1641 emit_move_insn (op5, ret);
1642 }
1643 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1644 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1645 if (TARGET_POWERPC64)
1646 emit_insn (gen_muldi3 (op3, op3, op4));
1647 else
1648 {
1649 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1650 emit_move_insn (op3, ret);
1651 }
1652 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1653 DONE;
1654 }
1655 [(set_attr "type" "mul")])
1656
1657 (define_insn "*vsx_div<mode>3"
1658 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1659 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1660 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1661 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662 "xvdiv<VSs> %x0,%x1,%x2"
1663 [(set_attr "type" "<VStype_div>")])
1664
1665 ; Emulate vector with scalar for vec_div in V2DImode
1666 (define_insn_and_split "vsx_div_v2di"
1667 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1668 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1669 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1670 UNSPEC_VSX_DIVSD))]
1671 "VECTOR_MEM_VSX_P (V2DImode)"
1672 "#"
1673 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1674 [(const_int 0)]
1675 {
1676 rtx op0 = operands[0];
1677 rtx op1 = operands[1];
1678 rtx op2 = operands[2];
1679 rtx op3 = gen_reg_rtx (DImode);
1680 rtx op4 = gen_reg_rtx (DImode);
1681 rtx op5 = gen_reg_rtx (DImode);
1682 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1683 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1684 if (TARGET_POWERPC64)
1685 emit_insn (gen_divdi3 (op5, op3, op4));
1686 else
1687 {
1688 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1689 rtx target = emit_library_call_value (libfunc,
1690 op5, LCT_NORMAL, DImode,
1691 op3, DImode,
1692 op4, DImode);
1693 emit_move_insn (op5, target);
1694 }
1695 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1696 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1697 if (TARGET_POWERPC64)
1698 emit_insn (gen_divdi3 (op3, op3, op4));
1699 else
1700 {
1701 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1702 rtx target = emit_library_call_value (libfunc,
1703 op3, LCT_NORMAL, DImode,
1704 op3, DImode,
1705 op4, DImode);
1706 emit_move_insn (op3, target);
1707 }
1708 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1709 DONE;
1710 }
1711 [(set_attr "type" "div")])
1712
1713 (define_insn_and_split "vsx_udiv_v2di"
1714 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1715 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1716 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1717 UNSPEC_VSX_DIVUD))]
1718 "VECTOR_MEM_VSX_P (V2DImode)"
1719 "#"
1720 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1721 [(const_int 0)]
1722 {
1723 rtx op0 = operands[0];
1724 rtx op1 = operands[1];
1725 rtx op2 = operands[2];
1726 rtx op3 = gen_reg_rtx (DImode);
1727 rtx op4 = gen_reg_rtx (DImode);
1728 rtx op5 = gen_reg_rtx (DImode);
1729 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1730 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1731 if (TARGET_POWERPC64)
1732 emit_insn (gen_udivdi3 (op5, op3, op4));
1733 else
1734 {
1735 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1736 rtx target = emit_library_call_value (libfunc,
1737 op5, LCT_NORMAL, DImode,
1738 op3, DImode,
1739 op4, DImode);
1740 emit_move_insn (op5, target);
1741 }
1742 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1743 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1744 if (TARGET_POWERPC64)
1745 emit_insn (gen_udivdi3 (op3, op3, op4));
1746 else
1747 {
1748 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1749 rtx target = emit_library_call_value (libfunc,
1750 op3, LCT_NORMAL, DImode,
1751 op3, DImode,
1752 op4, DImode);
1753 emit_move_insn (op3, target);
1754 }
1755 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1756 DONE;
1757 }
1758 [(set_attr "type" "div")])
1759
1760 ;; *tdiv* instruction returning the FG flag
1761 (define_expand "vsx_tdiv<mode>3_fg"
1762 [(set (match_dup 3)
1763 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1764 (match_operand:VSX_B 2 "vsx_register_operand")]
1765 UNSPEC_VSX_TDIV))
1766 (set (match_operand:SI 0 "gpc_reg_operand")
1767 (gt:SI (match_dup 3)
1768 (const_int 0)))]
1769 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1770 {
1771 operands[3] = gen_reg_rtx (CCFPmode);
1772 })
1773
1774 ;; *tdiv* instruction returning the FE flag
1775 (define_expand "vsx_tdiv<mode>3_fe"
1776 [(set (match_dup 3)
1777 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1778 (match_operand:VSX_B 2 "vsx_register_operand")]
1779 UNSPEC_VSX_TDIV))
1780 (set (match_operand:SI 0 "gpc_reg_operand")
1781 (eq:SI (match_dup 3)
1782 (const_int 0)))]
1783 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1784 {
1785 operands[3] = gen_reg_rtx (CCFPmode);
1786 })
1787
1788 (define_insn "*vsx_tdiv<mode>3_internal"
1789 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1790 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1791 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1792 UNSPEC_VSX_TDIV))]
1793 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1794 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1795 [(set_attr "type" "<VStype_simple>")])
1796
1797 (define_insn "vsx_fre<mode>2"
1798 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1799 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1800 UNSPEC_FRES))]
1801 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1802 "xvre<VSs> %x0,%x1"
1803 [(set_attr "type" "<VStype_simple>")])
1804
1805 (define_insn "*vsx_neg<mode>2"
1806 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1807 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1808 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1809 "xvneg<VSs> %x0,%x1"
1810 [(set_attr "type" "<VStype_simple>")])
1811
1812 (define_insn "*vsx_abs<mode>2"
1813 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1814 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1815 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1816 "xvabs<VSs> %x0,%x1"
1817 [(set_attr "type" "<VStype_simple>")])
1818
1819 (define_insn "vsx_nabs<mode>2"
1820 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1821 (neg:VSX_F
1822 (abs:VSX_F
1823 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1824 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1825 "xvnabs<VSs> %x0,%x1"
1826 [(set_attr "type" "<VStype_simple>")])
1827
1828 (define_insn "vsx_smax<mode>3"
1829 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1830 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1831 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1832 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1833 "xvmax<VSs> %x0,%x1,%x2"
1834 [(set_attr "type" "<VStype_simple>")])
1835
1836 (define_insn "*vsx_smin<mode>3"
1837 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1838 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1839 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1840 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1841 "xvmin<VSs> %x0,%x1,%x2"
1842 [(set_attr "type" "<VStype_simple>")])
1843
1844 (define_insn "*vsx_sqrt<mode>2"
1845 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1846 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1847 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1848 "xvsqrt<VSs> %x0,%x1"
1849 [(set_attr "type" "<VStype_sqrt>")])
1850
1851 (define_insn "*vsx_rsqrte<mode>2"
1852 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1853 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1854 UNSPEC_RSQRT))]
1855 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1856 "xvrsqrte<VSs> %x0,%x1"
1857 [(set_attr "type" "<VStype_simple>")])
1858
1859 ;; *tsqrt* returning the fg flag
1860 (define_expand "vsx_tsqrt<mode>2_fg"
1861 [(set (match_dup 2)
1862 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1863 UNSPEC_VSX_TSQRT))
1864 (set (match_operand:SI 0 "gpc_reg_operand")
1865 (gt:SI (match_dup 2)
1866 (const_int 0)))]
1867 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1868 {
1869 operands[2] = gen_reg_rtx (CCFPmode);
1870 })
1871
1872 ;; *tsqrt* returning the fe flag
1873 (define_expand "vsx_tsqrt<mode>2_fe"
1874 [(set (match_dup 2)
1875 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1876 UNSPEC_VSX_TSQRT))
1877 (set (match_operand:SI 0 "gpc_reg_operand")
1878 (eq:SI (match_dup 2)
1879 (const_int 0)))]
1880 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1881 {
1882 operands[2] = gen_reg_rtx (CCFPmode);
1883 })
1884
1885 (define_insn "*vsx_tsqrt<mode>2_internal"
1886 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1887 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1888 UNSPEC_VSX_TSQRT))]
1889 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1890 "x<VSv>tsqrt<VSs> %0,%x1"
1891 [(set_attr "type" "<VStype_simple>")])
1892
1893 ;; Fused vector multiply/add instructions. Support the classical Altivec
1894 ;; versions of fma, which allows the target to be a separate register from the
1895 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1896 ;; multiply.
1897
1898 (define_insn "*vsx_fmav4sf4"
1899 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1900 (fma:V4SF
1901 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1902 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1903 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1904 "VECTOR_UNIT_VSX_P (V4SFmode)"
1905 "@
1906 xvmaddasp %x0,%x1,%x2
1907 xvmaddmsp %x0,%x1,%x3
1908 xvmaddasp %x0,%x1,%x2
1909 xvmaddmsp %x0,%x1,%x3
1910 vmaddfp %0,%1,%2,%3"
1911 [(set_attr "type" "vecfloat")])
1912
1913 (define_insn "*vsx_fmav2df4"
1914 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1915 (fma:V2DF
1916 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1917 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1918 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1919 "VECTOR_UNIT_VSX_P (V2DFmode)"
1920 "@
1921 xvmaddadp %x0,%x1,%x2
1922 xvmaddmdp %x0,%x1,%x3
1923 xvmaddadp %x0,%x1,%x2
1924 xvmaddmdp %x0,%x1,%x3"
1925 [(set_attr "type" "vecdouble")])
1926
1927 (define_insn "*vsx_fms<mode>4"
1928 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1929 (fma:VSX_F
1930 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1931 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1932 (neg:VSX_F
1933 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1934 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1935 "@
1936 xvmsuba<VSs> %x0,%x1,%x2
1937 xvmsubm<VSs> %x0,%x1,%x3
1938 xvmsuba<VSs> %x0,%x1,%x2
1939 xvmsubm<VSs> %x0,%x1,%x3"
1940 [(set_attr "type" "<VStype_mul>")])
1941
1942 (define_insn "*vsx_nfma<mode>4"
1943 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1944 (neg:VSX_F
1945 (fma:VSX_F
1946 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1947 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1948 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1949 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1950 "@
1951 xvnmadda<VSs> %x0,%x1,%x2
1952 xvnmaddm<VSs> %x0,%x1,%x3
1953 xvnmadda<VSs> %x0,%x1,%x2
1954 xvnmaddm<VSs> %x0,%x1,%x3"
1955 [(set_attr "type" "<VStype_mul>")])
1956
1957 (define_insn "*vsx_nfmsv4sf4"
1958 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1959 (neg:V4SF
1960 (fma:V4SF
1961 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1962 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1963 (neg:V4SF
1964 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1965 "VECTOR_UNIT_VSX_P (V4SFmode)"
1966 "@
1967 xvnmsubasp %x0,%x1,%x2
1968 xvnmsubmsp %x0,%x1,%x3
1969 xvnmsubasp %x0,%x1,%x2
1970 xvnmsubmsp %x0,%x1,%x3
1971 vnmsubfp %0,%1,%2,%3"
1972 [(set_attr "type" "vecfloat")])
1973
1974 (define_insn "*vsx_nfmsv2df4"
1975 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1976 (neg:V2DF
1977 (fma:V2DF
1978 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1979 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1980 (neg:V2DF
1981 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1982 "VECTOR_UNIT_VSX_P (V2DFmode)"
1983 "@
1984 xvnmsubadp %x0,%x1,%x2
1985 xvnmsubmdp %x0,%x1,%x3
1986 xvnmsubadp %x0,%x1,%x2
1987 xvnmsubmdp %x0,%x1,%x3"
1988 [(set_attr "type" "vecdouble")])
1989
1990 ;; Vector conditional expressions (no scalar version for these instructions)
1991 (define_insn "vsx_eq<mode>"
1992 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1993 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1994 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1995 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1996 "xvcmpeq<VSs> %x0,%x1,%x2"
1997 [(set_attr "type" "<VStype_simple>")])
1998
1999 (define_insn "vsx_gt<mode>"
2000 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2001 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2002 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2003 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2004 "xvcmpgt<VSs> %x0,%x1,%x2"
2005 [(set_attr "type" "<VStype_simple>")])
2006
2007 (define_insn "*vsx_ge<mode>"
2008 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2009 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2010 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2011 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2012 "xvcmpge<VSs> %x0,%x1,%x2"
2013 [(set_attr "type" "<VStype_simple>")])
2014
2015 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2016 ;; indicate a combined status
2017 (define_insn "*vsx_eq_<mode>_p"
2018 [(set (reg:CC CR6_REGNO)
2019 (unspec:CC
2020 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2021 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2022 UNSPEC_PREDICATE))
2023 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024 (eq:VSX_F (match_dup 1)
2025 (match_dup 2)))]
2026 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027 "xvcmpeq<VSs>. %x0,%x1,%x2"
2028 [(set_attr "type" "<VStype_simple>")])
2029
2030 (define_insn "*vsx_gt_<mode>_p"
2031 [(set (reg:CC CR6_REGNO)
2032 (unspec:CC
2033 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2034 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2035 UNSPEC_PREDICATE))
2036 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2037 (gt:VSX_F (match_dup 1)
2038 (match_dup 2)))]
2039 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2040 "xvcmpgt<VSs>. %x0,%x1,%x2"
2041 [(set_attr "type" "<VStype_simple>")])
2042
2043 (define_insn "*vsx_ge_<mode>_p"
2044 [(set (reg:CC CR6_REGNO)
2045 (unspec:CC
2046 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2047 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2048 UNSPEC_PREDICATE))
2049 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2050 (ge:VSX_F (match_dup 1)
2051 (match_dup 2)))]
2052 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2053 "xvcmpge<VSs>. %x0,%x1,%x2"
2054 [(set_attr "type" "<VStype_simple>")])
2055
2056 ;; Vector select
2057 (define_insn "*vsx_xxsel<mode>"
2058 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2059 (if_then_else:VSX_L
2060 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2061 (match_operand:VSX_L 4 "zero_constant" ""))
2062 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2063 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2064 "VECTOR_MEM_VSX_P (<MODE>mode)"
2065 "xxsel %x0,%x3,%x2,%x1"
2066 [(set_attr "type" "vecmove")])
2067
2068 (define_insn "*vsx_xxsel<mode>_uns"
2069 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2070 (if_then_else:VSX_L
2071 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2072 (match_operand:VSX_L 4 "zero_constant" ""))
2073 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2074 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2075 "VECTOR_MEM_VSX_P (<MODE>mode)"
2076 "xxsel %x0,%x3,%x2,%x1"
2077 [(set_attr "type" "vecmove")])
2078
2079 ;; Copy sign
2080 (define_insn "vsx_copysign<mode>3"
2081 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2082 (unspec:VSX_F
2083 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2084 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2085 UNSPEC_COPYSIGN))]
2086 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087 "xvcpsgn<VSs> %x0,%x2,%x1"
2088 [(set_attr "type" "<VStype_simple>")])
2089
2090 ;; For the conversions, limit the register class for the integer value to be
2091 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2092 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2093 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2094 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2095 ;; in allowing virtual registers.
2096 (define_insn "vsx_float<VSi><mode>2"
2097 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2098 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2099 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2100 "xvcvsx<VSc><VSs> %x0,%x1"
2101 [(set_attr "type" "<VStype_simple>")])
2102
2103 (define_insn "vsx_floatuns<VSi><mode>2"
2104 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2105 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2106 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2107 "xvcvux<VSc><VSs> %x0,%x1"
2108 [(set_attr "type" "<VStype_simple>")])
2109
2110 (define_insn "vsx_fix_trunc<mode><VSi>2"
2111 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2112 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2113 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2114 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2115 [(set_attr "type" "<VStype_simple>")])
2116
2117 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2118 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2119 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2120 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2122 [(set_attr "type" "<VStype_simple>")])
2123
2124 ;; Math rounding functions
2125 (define_insn "vsx_x<VSv>r<VSs>i"
2126 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2127 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2128 UNSPEC_VSX_ROUND_I))]
2129 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2130 "x<VSv>r<VSs>i %x0,%x1"
2131 [(set_attr "type" "<VStype_simple>")])
2132
2133 (define_insn "vsx_x<VSv>r<VSs>ic"
2134 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2135 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2136 UNSPEC_VSX_ROUND_IC))]
2137 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2138 "x<VSv>r<VSs>ic %x0,%x1"
2139 [(set_attr "type" "<VStype_simple>")])
2140
2141 (define_insn "vsx_btrunc<mode>2"
2142 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2143 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2144 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2145 "xvr<VSs>iz %x0,%x1"
2146 [(set_attr "type" "<VStype_simple>")])
2147
2148 (define_insn "*vsx_b2trunc<mode>2"
2149 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2150 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2151 UNSPEC_FRIZ))]
2152 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2153 "x<VSv>r<VSs>iz %x0,%x1"
2154 [(set_attr "type" "<VStype_simple>")])
2155
2156 (define_insn "vsx_floor<mode>2"
2157 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2158 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2159 UNSPEC_FRIM))]
2160 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2161 "xvr<VSs>im %x0,%x1"
2162 [(set_attr "type" "<VStype_simple>")])
2163
2164 (define_insn "vsx_ceil<mode>2"
2165 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2167 UNSPEC_FRIP))]
2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169 "xvr<VSs>ip %x0,%x1"
2170 [(set_attr "type" "<VStype_simple>")])
2171
2172 \f
2173 ;; VSX convert to/from double vector
2174
2175 ;; Convert between single and double precision
2176 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2177 ;; scalar single precision instructions internally use the double format.
2178 ;; Prefer the altivec registers, since we likely will need to do a vperm
2179 (define_insn "vsx_<VS_spdp_insn>"
2180 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2181 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2182 UNSPEC_VSX_CVSPDP))]
2183 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2184 "<VS_spdp_insn> %x0,%x1"
2185 [(set_attr "type" "<VS_spdp_type>")])
2186
2187 ;; xscvspdp, represent the scalar SF type as V4SF
2188 (define_insn "vsx_xscvspdp"
2189 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2190 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2191 UNSPEC_VSX_CVSPDP))]
2192 "VECTOR_UNIT_VSX_P (V4SFmode)"
2193 "xscvspdp %x0,%x1"
2194 [(set_attr "type" "fp")])
2195
2196 ;; Same as vsx_xscvspdp, but use SF as the type
2197 (define_insn "vsx_xscvspdp_scalar2"
2198 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2199 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2200 UNSPEC_VSX_CVSPDP))]
2201 "VECTOR_UNIT_VSX_P (V4SFmode)"
2202 "xscvspdp %x0,%x1"
2203 [(set_attr "type" "fp")])
2204
2205 ;; Generate xvcvhpsp instruction
2206 (define_insn "vsx_xvcvhpsp"
2207 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2208 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2209 UNSPEC_VSX_CVHPSP))]
2210 "TARGET_P9_VECTOR"
2211 "xvcvhpsp %x0,%x1"
2212 [(set_attr "type" "vecfloat")])
2213
2214 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2215 ;; format of scalars is actually DF.
2216 (define_insn "vsx_xscvdpsp_scalar"
2217 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2218 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2219 UNSPEC_VSX_CVSPDP))]
2220 "VECTOR_UNIT_VSX_P (V4SFmode)"
2221 "xscvdpsp %x0,%x1"
2222 [(set_attr "type" "fp")])
2223
2224 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2225 (define_insn "vsx_xscvdpspn"
2226 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2227 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2228 UNSPEC_VSX_CVDPSPN))]
2229 "TARGET_XSCVDPSPN"
2230 "xscvdpspn %x0,%x1"
2231 [(set_attr "type" "fp")])
2232
2233 (define_insn "vsx_xscvspdpn"
2234 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236 UNSPEC_VSX_CVSPDPN))]
2237 "TARGET_XSCVSPDPN"
2238 "xscvspdpn %x0,%x1"
2239 [(set_attr "type" "fp")])
2240
2241 (define_insn "vsx_xscvdpspn_scalar"
2242 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2243 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2244 UNSPEC_VSX_CVDPSPN))]
2245 "TARGET_XSCVDPSPN"
2246 "xscvdpspn %x0,%x1"
2247 [(set_attr "type" "fp")])
2248
2249 ;; Used by direct move to move a SFmode value from GPR to VSX register
2250 (define_insn "vsx_xscvspdpn_directmove"
2251 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2252 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2253 UNSPEC_VSX_CVSPDPN))]
2254 "TARGET_XSCVSPDPN"
2255 "xscvspdpn %x0,%x1"
2256 [(set_attr "type" "fp")])
2257
2258 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2259
2260 (define_expand "vsx_xvcvsxddp_scale"
2261 [(match_operand:V2DF 0 "vsx_register_operand")
2262 (match_operand:V2DI 1 "vsx_register_operand")
2263 (match_operand:QI 2 "immediate_operand")]
2264 "VECTOR_UNIT_VSX_P (V2DFmode)"
2265 {
2266 rtx op0 = operands[0];
2267 rtx op1 = operands[1];
2268 int scale = INTVAL(operands[2]);
2269 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2270 if (scale != 0)
2271 rs6000_scale_v2df (op0, op0, -scale);
2272 DONE;
2273 })
2274
2275 (define_insn "vsx_xvcvsxddp"
2276 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2277 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2278 UNSPEC_VSX_XVCVSXDDP))]
2279 "VECTOR_UNIT_VSX_P (V2DFmode)"
2280 "xvcvsxddp %x0,%x1"
2281 [(set_attr "type" "vecdouble")])
2282
2283 (define_expand "vsx_xvcvuxddp_scale"
2284 [(match_operand:V2DF 0 "vsx_register_operand")
2285 (match_operand:V2DI 1 "vsx_register_operand")
2286 (match_operand:QI 2 "immediate_operand")]
2287 "VECTOR_UNIT_VSX_P (V2DFmode)"
2288 {
2289 rtx op0 = operands[0];
2290 rtx op1 = operands[1];
2291 int scale = INTVAL(operands[2]);
2292 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2293 if (scale != 0)
2294 rs6000_scale_v2df (op0, op0, -scale);
2295 DONE;
2296 })
2297
2298 (define_insn "vsx_xvcvuxddp"
2299 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2300 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2301 UNSPEC_VSX_XVCVUXDDP))]
2302 "VECTOR_UNIT_VSX_P (V2DFmode)"
2303 "xvcvuxddp %x0,%x1"
2304 [(set_attr "type" "vecdouble")])
2305
2306 (define_expand "vsx_xvcvdpsxds_scale"
2307 [(match_operand:V2DI 0 "vsx_register_operand")
2308 (match_operand:V2DF 1 "vsx_register_operand")
2309 (match_operand:QI 2 "immediate_operand")]
2310 "VECTOR_UNIT_VSX_P (V2DFmode)"
2311 {
2312 rtx op0 = operands[0];
2313 rtx op1 = operands[1];
2314 rtx tmp;
2315 int scale = INTVAL (operands[2]);
2316 if (scale == 0)
2317 tmp = op1;
2318 else
2319 {
2320 tmp = gen_reg_rtx (V2DFmode);
2321 rs6000_scale_v2df (tmp, op1, scale);
2322 }
2323 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2324 DONE;
2325 })
2326
2327 ;; convert vector of 64-bit floating point numbers to vector of
2328 ;; 64-bit signed integer
2329 (define_insn "vsx_xvcvdpsxds"
2330 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2331 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2332 UNSPEC_VSX_XVCVDPSXDS))]
2333 "VECTOR_UNIT_VSX_P (V2DFmode)"
2334 "xvcvdpsxds %x0,%x1"
2335 [(set_attr "type" "vecdouble")])
2336
2337 ;; convert vector of 32-bit floating point numbers to vector of
2338 ;; 32-bit signed integer
2339 (define_insn "vsx_xvcvspsxws"
2340 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2341 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2342 UNSPEC_VSX_XVCVSPSXWS))]
2343 "VECTOR_UNIT_VSX_P (V4SFmode)"
2344 "xvcvspsxws %x0,%x1"
2345 [(set_attr "type" "vecfloat")])
2346
2347 ;; convert vector of 64-bit floating point numbers to vector of
2348 ;; 64-bit unsigned integer
2349 (define_expand "vsx_xvcvdpuxds_scale"
2350 [(match_operand:V2DI 0 "vsx_register_operand")
2351 (match_operand:V2DF 1 "vsx_register_operand")
2352 (match_operand:QI 2 "immediate_operand")]
2353 "VECTOR_UNIT_VSX_P (V2DFmode)"
2354 {
2355 rtx op0 = operands[0];
2356 rtx op1 = operands[1];
2357 rtx tmp;
2358 int scale = INTVAL (operands[2]);
2359 if (scale == 0)
2360 tmp = op1;
2361 else
2362 {
2363 tmp = gen_reg_rtx (V2DFmode);
2364 rs6000_scale_v2df (tmp, op1, scale);
2365 }
2366 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2367 DONE;
2368 })
2369
2370 ;; convert vector of 32-bit floating point numbers to vector of
2371 ;; 32-bit unsigned integer
2372 (define_insn "vsx_xvcvspuxws"
2373 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2374 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2375 UNSPEC_VSX_XVCVSPSXWS))]
2376 "VECTOR_UNIT_VSX_P (V4SFmode)"
2377 "xvcvspuxws %x0,%x1"
2378 [(set_attr "type" "vecfloat")])
2379
2380 (define_insn "vsx_xvcvdpuxds"
2381 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2382 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2383 UNSPEC_VSX_XVCVDPUXDS))]
2384 "VECTOR_UNIT_VSX_P (V2DFmode)"
2385 "xvcvdpuxds %x0,%x1"
2386 [(set_attr "type" "vecdouble")])
2387
2388 ;; Convert from 64-bit to 32-bit types
2389 ;; Note, favor the Altivec registers since the usual use of these instructions
2390 ;; is in vector converts and we need to use the Altivec vperm instruction.
2391
2392 (define_insn "vsx_xvcvdpsxws"
2393 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2394 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2395 UNSPEC_VSX_CVDPSXWS))]
2396 "VECTOR_UNIT_VSX_P (V2DFmode)"
2397 "xvcvdpsxws %x0,%x1"
2398 [(set_attr "type" "vecdouble")])
2399
2400 (define_insn "vsx_xvcvdpuxws"
2401 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2402 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2403 UNSPEC_VSX_CVDPUXWS))]
2404 "VECTOR_UNIT_VSX_P (V2DFmode)"
2405 "xvcvdpuxws %x0,%x1"
2406 [(set_attr "type" "vecdouble")])
2407
2408 (define_insn "vsx_xvcvsxdsp"
2409 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2410 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2411 UNSPEC_VSX_CVSXDSP))]
2412 "VECTOR_UNIT_VSX_P (V2DFmode)"
2413 "xvcvsxdsp %x0,%x1"
2414 [(set_attr "type" "vecfloat")])
2415
2416 (define_insn "vsx_xvcvuxdsp"
2417 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2418 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2419 UNSPEC_VSX_CVUXDSP))]
2420 "VECTOR_UNIT_VSX_P (V2DFmode)"
2421 "xvcvuxdsp %x0,%x1"
2422 [(set_attr "type" "vecdouble")])
2423
2424 (define_insn "vsx_xvcdpsp"
2425 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2426 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2427 UNSPEC_VSX_XVCDPSP))]
2428 "VECTOR_UNIT_VSX_P (V2DFmode)"
2429 "xvcvdpsp %x0,%x1"
2430 [(set_attr "type" "vecdouble")])
2431
2432 ;; Convert from 32-bit to 64-bit types
2433 ;; Provide both vector and scalar targets
2434 (define_insn "vsx_xvcvsxwdp"
2435 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2436 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2437 UNSPEC_VSX_CVSXWDP))]
2438 "VECTOR_UNIT_VSX_P (V2DFmode)"
2439 "xvcvsxwdp %x0,%x1"
2440 [(set_attr "type" "vecdouble")])
2441
2442 (define_insn "vsx_xvcvsxwdp_df"
2443 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2444 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2445 UNSPEC_VSX_CVSXWDP))]
2446 "TARGET_VSX"
2447 "xvcvsxwdp %x0,%x1"
2448 [(set_attr "type" "vecdouble")])
2449
2450 (define_insn "vsx_xvcvuxwdp"
2451 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2452 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2453 UNSPEC_VSX_CVUXWDP))]
2454 "VECTOR_UNIT_VSX_P (V2DFmode)"
2455 "xvcvuxwdp %x0,%x1"
2456 [(set_attr "type" "vecdouble")])
2457
2458 (define_insn "vsx_xvcvuxwdp_df"
2459 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2460 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2461 UNSPEC_VSX_CVUXWDP))]
2462 "TARGET_VSX"
2463 "xvcvuxwdp %x0,%x1"
2464 [(set_attr "type" "vecdouble")])
2465
2466 (define_insn "vsx_xvcvspsxds"
2467 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2468 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2469 UNSPEC_VSX_CVSPSXDS))]
2470 "VECTOR_UNIT_VSX_P (V2DFmode)"
2471 "xvcvspsxds %x0,%x1"
2472 [(set_attr "type" "vecdouble")])
2473
2474 (define_insn "vsx_xvcvspuxds"
2475 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2476 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2477 UNSPEC_VSX_CVSPUXDS))]
2478 "VECTOR_UNIT_VSX_P (V2DFmode)"
2479 "xvcvspuxds %x0,%x1"
2480 [(set_attr "type" "vecdouble")])
2481
2482 (define_insn "vsx_xvcvsxwsp"
2483 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2484 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2485 UNSPEC_VSX_CVSXWSP))]
2486 "VECTOR_UNIT_VSX_P (V4SFmode)"
2487 "xvcvsxwsp %x0,%x1"
2488 [(set_attr "type" "vecfloat")])
2489
2490 (define_insn "vsx_xvcvuxwsp"
2491 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2492 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493 UNSPEC_VSX_CVUXWSP))]
2494 "VECTOR_UNIT_VSX_P (V4SFmode)"
2495 "xvcvuxwsp %x0,%x1"
2496 [(set_attr "type" "vecfloat")])
2497
2498 ;; Generate float2 double
2499 ;; convert two double to float
2500 (define_expand "float2_v2df"
2501 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2502 (use (match_operand:V2DF 1 "register_operand" "wa"))
2503 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2504 "VECTOR_UNIT_VSX_P (V4SFmode)"
2505 {
2506 rtx rtx_src1, rtx_src2, rtx_dst;
2507
2508 rtx_dst = operands[0];
2509 rtx_src1 = operands[1];
2510 rtx_src2 = operands[2];
2511
2512 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2513 DONE;
2514 })
2515
2516 ;; Generate float2
2517 ;; convert two long long signed ints to float
2518 (define_expand "float2_v2di"
2519 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2520 (use (match_operand:V2DI 1 "register_operand" "wa"))
2521 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2522 "VECTOR_UNIT_VSX_P (V4SFmode)"
2523 {
2524 rtx rtx_src1, rtx_src2, rtx_dst;
2525
2526 rtx_dst = operands[0];
2527 rtx_src1 = operands[1];
2528 rtx_src2 = operands[2];
2529
2530 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2531 DONE;
2532 })
2533
2534 ;; Generate uns_float2
2535 ;; convert two long long unsigned ints to float
2536 (define_expand "uns_float2_v2di"
2537 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2538 (use (match_operand:V2DI 1 "register_operand" "wa"))
2539 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2540 "VECTOR_UNIT_VSX_P (V4SFmode)"
2541 {
2542 rtx rtx_src1, rtx_src2, rtx_dst;
2543
2544 rtx_dst = operands[0];
2545 rtx_src1 = operands[1];
2546 rtx_src2 = operands[2];
2547
2548 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2549 DONE;
2550 })
2551
2552 ;; Generate floate
2553 ;; convert double or long long signed to float
2554 ;; (Only even words are valid, BE numbering)
2555 (define_expand "floate<mode>"
2556 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2557 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2558 "VECTOR_UNIT_VSX_P (V4SFmode)"
2559 {
2560 if (BYTES_BIG_ENDIAN)
2561 {
2562 /* Shift left one word to put even word correct location */
2563 rtx rtx_tmp;
2564 rtx rtx_val = GEN_INT (4);
2565
2566 rtx_tmp = gen_reg_rtx (V4SFmode);
2567 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2568 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2569 rtx_tmp, rtx_tmp, rtx_val));
2570 }
2571 else
2572 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2573
2574 DONE;
2575 })
2576
2577 ;; Generate uns_floate
2578 ;; convert long long unsigned to float
2579 ;; (Only even words are valid, BE numbering)
2580 (define_expand "unsfloatev2di"
2581 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2582 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2583 "VECTOR_UNIT_VSX_P (V4SFmode)"
2584 {
2585 if (BYTES_BIG_ENDIAN)
2586 {
2587 /* Shift left one word to put even word correct location */
2588 rtx rtx_tmp;
2589 rtx rtx_val = GEN_INT (4);
2590
2591 rtx_tmp = gen_reg_rtx (V4SFmode);
2592 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2593 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2594 rtx_tmp, rtx_tmp, rtx_val));
2595 }
2596 else
2597 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2598
2599 DONE;
2600 })
2601
2602 ;; Generate floato
2603 ;; convert double or long long signed to float
2604 ;; Only odd words are valid, BE numbering)
2605 (define_expand "floato<mode>"
2606 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2607 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2608 "VECTOR_UNIT_VSX_P (V4SFmode)"
2609 {
2610 if (BYTES_BIG_ENDIAN)
2611 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2612 else
2613 {
2614 /* Shift left one word to put odd word correct location */
2615 rtx rtx_tmp;
2616 rtx rtx_val = GEN_INT (4);
2617
2618 rtx_tmp = gen_reg_rtx (V4SFmode);
2619 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2620 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2621 rtx_tmp, rtx_tmp, rtx_val));
2622 }
2623 DONE;
2624 })
2625
2626 ;; Generate uns_floato
2627 ;; convert long long unsigned to float
2628 ;; (Only odd words are valid, BE numbering)
2629 (define_expand "unsfloatov2di"
2630 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2631 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2632 "VECTOR_UNIT_VSX_P (V4SFmode)"
2633 {
2634 if (BYTES_BIG_ENDIAN)
2635 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2636 else
2637 {
2638 /* Shift left one word to put odd word correct location */
2639 rtx rtx_tmp;
2640 rtx rtx_val = GEN_INT (4);
2641
2642 rtx_tmp = gen_reg_rtx (V4SFmode);
2643 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2644 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2645 rtx_tmp, rtx_tmp, rtx_val));
2646 }
2647 DONE;
2648 })
2649
2650 ;; Generate vsigned2
2651 ;; convert two double float vectors to a vector of single precision ints
2652 (define_expand "vsigned2_v2df"
2653 [(match_operand:V4SI 0 "register_operand" "=wa")
2654 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2655 (match_operand:V2DF 2 "register_operand" "wa")]
2656 UNSPEC_VSX_VSIGNED2)]
2657 "TARGET_VSX"
2658 {
2659 rtx rtx_src1, rtx_src2, rtx_dst;
2660 bool signed_convert=true;
2661
2662 rtx_dst = operands[0];
2663 rtx_src1 = operands[1];
2664 rtx_src2 = operands[2];
2665
2666 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2667 DONE;
2668 })
2669
2670 ;; Generate vsignedo_v2df
2671 ;; signed double float to int convert odd word
2672 (define_expand "vsignedo_v2df"
2673 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2674 (match_operand:V2DF 1 "register_operand" "wa"))]
2675 "TARGET_VSX"
2676 {
2677 if (BYTES_BIG_ENDIAN)
2678 {
2679 rtx rtx_tmp;
2680 rtx rtx_val = GEN_INT (12);
2681 rtx_tmp = gen_reg_rtx (V4SImode);
2682
2683 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2684
2685 /* Big endian word numbering for words in operand is 0 1 2 3.
2686 take (operand[1] operand[1]) and shift left one word
2687 0 1 2 3 0 1 2 3 => 1 2 3 0
2688 Words 1 and 3 are now are now where they need to be for result. */
2689
2690 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2691 rtx_tmp, rtx_val));
2692 }
2693 else
2694 /* Little endian word numbering for operand is 3 2 1 0.
2695 Result words 3 and 1 are where they need to be. */
2696 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2697
2698 DONE;
2699 }
2700 [(set_attr "type" "veccomplex")])
2701
2702 ;; Generate vsignede_v2df
2703 ;; signed double float to int even word
2704 (define_expand "vsignede_v2df"
2705 [(set (match_operand:V4SI 0 "register_operand" "=v")
2706 (match_operand:V2DF 1 "register_operand" "v"))]
2707 "TARGET_VSX"
2708 {
2709 if (BYTES_BIG_ENDIAN)
2710 /* Big endian word numbering for words in operand is 0 1
2711 Result words 0 is where they need to be. */
2712 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2713
2714 else
2715 {
2716 rtx rtx_tmp;
2717 rtx rtx_val = GEN_INT (12);
2718 rtx_tmp = gen_reg_rtx (V4SImode);
2719
2720 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2721
2722 /* Little endian word numbering for operand is 3 2 1 0.
2723 take (operand[1] operand[1]) and shift left three words
2724 0 1 2 3 0 1 2 3 => 3 0 1 2
2725 Words 0 and 2 are now where they need to be for the result. */
2726 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2727 rtx_tmp, rtx_val));
2728 }
2729 DONE;
2730 }
2731 [(set_attr "type" "veccomplex")])
2732
2733 ;; Generate unsigned2
2734 ;; convert two double float vectors to a vector of single precision
2735 ;; unsigned ints
2736 (define_expand "vunsigned2_v2df"
2737 [(match_operand:V4SI 0 "register_operand" "=v")
2738 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2739 (match_operand:V2DF 2 "register_operand" "v")]
2740 UNSPEC_VSX_VSIGNED2)]
2741 "TARGET_VSX"
2742 {
2743 rtx rtx_src1, rtx_src2, rtx_dst;
2744 bool signed_convert=false;
2745
2746 rtx_dst = operands[0];
2747 rtx_src1 = operands[1];
2748 rtx_src2 = operands[2];
2749
2750 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2751 DONE;
2752 })
2753
2754 ;; Generate vunsignedo_v2df
2755 ;; unsigned double float to int convert odd word
2756 (define_expand "vunsignedo_v2df"
2757 [(set (match_operand:V4SI 0 "register_operand" "=v")
2758 (match_operand:V2DF 1 "register_operand" "v"))]
2759 "TARGET_VSX"
2760 {
2761 if (BYTES_BIG_ENDIAN)
2762 {
2763 rtx rtx_tmp;
2764 rtx rtx_val = GEN_INT (12);
2765 rtx_tmp = gen_reg_rtx (V4SImode);
2766
2767 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2768
2769 /* Big endian word numbering for words in operand is 0 1 2 3.
2770 take (operand[1] operand[1]) and shift left one word
2771 0 1 2 3 0 1 2 3 => 1 2 3 0
2772 Words 1 and 3 are now are now where they need to be for result. */
2773
2774 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2775 rtx_tmp, rtx_val));
2776 }
2777 else
2778 /* Little endian word numbering for operand is 3 2 1 0.
2779 Result words 3 and 1 are where they need to be. */
2780 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2781
2782 DONE;
2783 }
2784 [(set_attr "type" "veccomplex")])
2785
2786 ;; Generate vunsignede_v2df
2787 ;; unsigned double float to int even word
2788 (define_expand "vunsignede_v2df"
2789 [(set (match_operand:V4SI 0 "register_operand" "=v")
2790 (match_operand:V2DF 1 "register_operand" "v"))]
2791 "TARGET_VSX"
2792 {
2793 if (BYTES_BIG_ENDIAN)
2794 /* Big endian word numbering for words in operand is 0 1
2795 Result words 0 is where they need to be. */
2796 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2797
2798 else
2799 {
2800 rtx rtx_tmp;
2801 rtx rtx_val = GEN_INT (12);
2802 rtx_tmp = gen_reg_rtx (V4SImode);
2803
2804 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2805
2806 /* Little endian word numbering for operand is 3 2 1 0.
2807 take (operand[1] operand[1]) and shift left three words
2808 0 1 2 3 0 1 2 3 => 3 0 1 2
2809 Words 0 and 2 are now where they need to be for the result. */
2810 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2811 rtx_tmp, rtx_val));
2812 }
2813 DONE;
2814 }
2815 [(set_attr "type" "veccomplex")])
2816
2817 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2818 ;; since the xvrdpiz instruction does not truncate the value if the floating
2819 ;; point value is < LONG_MIN or > LONG_MAX.
2820 (define_insn "*vsx_float_fix_v2df2"
2821 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2822 (float:V2DF
2823 (fix:V2DI
2824 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2825 "TARGET_HARD_FLOAT
2826 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2827 && !flag_trapping_math && TARGET_FRIZ"
2828 "xvrdpiz %x0,%x1"
2829 [(set_attr "type" "vecdouble")])
2830
2831 \f
2832 ;; Permute operations
2833
2834 ;; Build a V2DF/V2DI vector from two scalars
2835 (define_insn "vsx_concat_<mode>"
2836 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2837 (vec_concat:VSX_D
2838 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2839 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2840 "VECTOR_MEM_VSX_P (<MODE>mode)"
2841 {
2842 if (which_alternative == 0)
2843 return (BYTES_BIG_ENDIAN
2844 ? "xxpermdi %x0,%x1,%x2,0"
2845 : "xxpermdi %x0,%x2,%x1,0");
2846
2847 else if (which_alternative == 1)
2848 return (BYTES_BIG_ENDIAN
2849 ? "mtvsrdd %x0,%1,%2"
2850 : "mtvsrdd %x0,%2,%1");
2851
2852 else
2853 gcc_unreachable ();
2854 }
2855 [(set_attr "type" "vecperm")])
2856
2857 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2858 ;; word element in a vector register.
2859 (define_insn "*vsx_concat_<mode>_1"
2860 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2861 (vec_concat:VSX_D
2862 (vec_select:<VS_scalar>
2863 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2864 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2865 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2866 "VECTOR_MEM_VSX_P (<MODE>mode)"
2867 {
2868 HOST_WIDE_INT dword = INTVAL (operands[2]);
2869 if (BYTES_BIG_ENDIAN)
2870 {
2871 operands[4] = GEN_INT (2*dword);
2872 return "xxpermdi %x0,%x1,%x3,%4";
2873 }
2874 else
2875 {
2876 operands[4] = GEN_INT (!dword);
2877 return "xxpermdi %x0,%x3,%x1,%4";
2878 }
2879 }
2880 [(set_attr "type" "vecperm")])
2881
2882 (define_insn "*vsx_concat_<mode>_2"
2883 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2884 (vec_concat:VSX_D
2885 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2886 (vec_select:<VS_scalar>
2887 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2888 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2889 "VECTOR_MEM_VSX_P (<MODE>mode)"
2890 {
2891 HOST_WIDE_INT dword = INTVAL (operands[3]);
2892 if (BYTES_BIG_ENDIAN)
2893 {
2894 operands[4] = GEN_INT (dword);
2895 return "xxpermdi %x0,%x1,%x2,%4";
2896 }
2897 else
2898 {
2899 operands[4] = GEN_INT (2 * !dword);
2900 return "xxpermdi %x0,%x2,%x1,%4";
2901 }
2902 }
2903 [(set_attr "type" "vecperm")])
2904
2905 (define_insn "*vsx_concat_<mode>_3"
2906 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2907 (vec_concat:VSX_D
2908 (vec_select:<VS_scalar>
2909 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911 (vec_select:<VS_scalar>
2912 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2913 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2914 "VECTOR_MEM_VSX_P (<MODE>mode)"
2915 {
2916 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2917 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2918 if (BYTES_BIG_ENDIAN)
2919 {
2920 operands[5] = GEN_INT ((2 * dword1) + dword2);
2921 return "xxpermdi %x0,%x1,%x3,%5";
2922 }
2923 else
2924 {
2925 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2926 return "xxpermdi %x0,%x3,%x1,%5";
2927 }
2928 }
2929 [(set_attr "type" "vecperm")])
2930
2931 ;; Special purpose concat using xxpermdi to glue two single precision values
2932 ;; together, relying on the fact that internally scalar floats are represented
2933 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2934 (define_insn "vsx_concat_v2sf"
2935 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2936 (unspec:V2DF
2937 [(match_operand:SF 1 "vsx_register_operand" "ww")
2938 (match_operand:SF 2 "vsx_register_operand" "ww")]
2939 UNSPEC_VSX_CONCAT))]
2940 "VECTOR_MEM_VSX_P (V2DFmode)"
2941 {
2942 if (BYTES_BIG_ENDIAN)
2943 return "xxpermdi %x0,%x1,%x2,0";
2944 else
2945 return "xxpermdi %x0,%x2,%x1,0";
2946 }
2947 [(set_attr "type" "vecperm")])
2948
2949 ;; V4SImode initialization splitter
2950 (define_insn_and_split "vsx_init_v4si"
2951 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2952 (unspec:V4SI
2953 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2954 (match_operand:SI 2 "reg_or_cint_operand" "rn")
2955 (match_operand:SI 3 "reg_or_cint_operand" "rn")
2956 (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2957 UNSPEC_VSX_VEC_INIT))
2958 (clobber (match_scratch:DI 5 "=&r"))
2959 (clobber (match_scratch:DI 6 "=&r"))]
2960 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2961 "#"
2962 "&& reload_completed"
2963 [(const_int 0)]
2964 {
2965 rs6000_split_v4si_init (operands);
2966 DONE;
2967 })
2968
2969 ;; xxpermdi for little endian loads and stores. We need several of
2970 ;; these since the form of the PARALLEL differs by mode.
2971 (define_insn "*vsx_xxpermdi2_le_<mode>"
2972 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2973 (vec_select:VSX_D
2974 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2975 (parallel [(const_int 1) (const_int 0)])))]
2976 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2977 "xxpermdi %x0,%x1,%x1,2"
2978 [(set_attr "type" "vecperm")])
2979
2980 (define_insn "*vsx_xxpermdi4_le_<mode>"
2981 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2982 (vec_select:VSX_W
2983 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2984 (parallel [(const_int 2) (const_int 3)
2985 (const_int 0) (const_int 1)])))]
2986 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2987 "xxpermdi %x0,%x1,%x1,2"
2988 [(set_attr "type" "vecperm")])
2989
2990 (define_insn "*vsx_xxpermdi8_le_V8HI"
2991 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2992 (vec_select:V8HI
2993 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2994 (parallel [(const_int 4) (const_int 5)
2995 (const_int 6) (const_int 7)
2996 (const_int 0) (const_int 1)
2997 (const_int 2) (const_int 3)])))]
2998 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2999 "xxpermdi %x0,%x1,%x1,2"
3000 [(set_attr "type" "vecperm")])
3001
3002 (define_insn "*vsx_xxpermdi16_le_V16QI"
3003 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3004 (vec_select:V16QI
3005 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3006 (parallel [(const_int 8) (const_int 9)
3007 (const_int 10) (const_int 11)
3008 (const_int 12) (const_int 13)
3009 (const_int 14) (const_int 15)
3010 (const_int 0) (const_int 1)
3011 (const_int 2) (const_int 3)
3012 (const_int 4) (const_int 5)
3013 (const_int 6) (const_int 7)])))]
3014 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3015 "xxpermdi %x0,%x1,%x1,2"
3016 [(set_attr "type" "vecperm")])
3017
3018 ;; lxvd2x for little endian loads. We need several of
3019 ;; these since the form of the PARALLEL differs by mode.
3020 (define_insn "*vsx_lxvd2x2_le_<mode>"
3021 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3022 (vec_select:VSX_D
3023 (match_operand:VSX_D 1 "memory_operand" "Z")
3024 (parallel [(const_int 1) (const_int 0)])))]
3025 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3026 "lxvd2x %x0,%y1"
3027 [(set_attr "type" "vecload")])
3028
3029 (define_insn "*vsx_lxvd2x4_le_<mode>"
3030 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3031 (vec_select:VSX_W
3032 (match_operand:VSX_W 1 "memory_operand" "Z")
3033 (parallel [(const_int 2) (const_int 3)
3034 (const_int 0) (const_int 1)])))]
3035 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3036 "lxvd2x %x0,%y1"
3037 [(set_attr "type" "vecload")])
3038
3039 (define_insn "*vsx_lxvd2x8_le_V8HI"
3040 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3041 (vec_select:V8HI
3042 (match_operand:V8HI 1 "memory_operand" "Z")
3043 (parallel [(const_int 4) (const_int 5)
3044 (const_int 6) (const_int 7)
3045 (const_int 0) (const_int 1)
3046 (const_int 2) (const_int 3)])))]
3047 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3048 "lxvd2x %x0,%y1"
3049 [(set_attr "type" "vecload")])
3050
3051 (define_insn "*vsx_lxvd2x16_le_V16QI"
3052 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3053 (vec_select:V16QI
3054 (match_operand:V16QI 1 "memory_operand" "Z")
3055 (parallel [(const_int 8) (const_int 9)
3056 (const_int 10) (const_int 11)
3057 (const_int 12) (const_int 13)
3058 (const_int 14) (const_int 15)
3059 (const_int 0) (const_int 1)
3060 (const_int 2) (const_int 3)
3061 (const_int 4) (const_int 5)
3062 (const_int 6) (const_int 7)])))]
3063 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3064 "lxvd2x %x0,%y1"
3065 [(set_attr "type" "vecload")])
3066
3067 ;; stxvd2x for little endian stores. We need several of
3068 ;; these since the form of the PARALLEL differs by mode.
3069 (define_insn "*vsx_stxvd2x2_le_<mode>"
3070 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3071 (vec_select:VSX_D
3072 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3073 (parallel [(const_int 1) (const_int 0)])))]
3074 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3075 "stxvd2x %x1,%y0"
3076 [(set_attr "type" "vecstore")])
3077
3078 (define_insn "*vsx_stxvd2x4_le_<mode>"
3079 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3080 (vec_select:VSX_W
3081 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3082 (parallel [(const_int 2) (const_int 3)
3083 (const_int 0) (const_int 1)])))]
3084 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3085 "stxvd2x %x1,%y0"
3086 [(set_attr "type" "vecstore")])
3087
3088 (define_insn "*vsx_stxvd2x8_le_V8HI"
3089 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3090 (vec_select:V8HI
3091 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3092 (parallel [(const_int 4) (const_int 5)
3093 (const_int 6) (const_int 7)
3094 (const_int 0) (const_int 1)
3095 (const_int 2) (const_int 3)])))]
3096 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3097 "stxvd2x %x1,%y0"
3098 [(set_attr "type" "vecstore")])
3099
3100 (define_insn "*vsx_stxvd2x16_le_V16QI"
3101 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3102 (vec_select:V16QI
3103 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3104 (parallel [(const_int 8) (const_int 9)
3105 (const_int 10) (const_int 11)
3106 (const_int 12) (const_int 13)
3107 (const_int 14) (const_int 15)
3108 (const_int 0) (const_int 1)
3109 (const_int 2) (const_int 3)
3110 (const_int 4) (const_int 5)
3111 (const_int 6) (const_int 7)])))]
3112 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3113 "stxvd2x %x1,%y0"
3114 [(set_attr "type" "vecstore")])
3115
3116 ;; Convert a TImode value into V1TImode
3117 (define_expand "vsx_set_v1ti"
3118 [(match_operand:V1TI 0 "nonimmediate_operand")
3119 (match_operand:V1TI 1 "nonimmediate_operand")
3120 (match_operand:TI 2 "input_operand")
3121 (match_operand:QI 3 "u5bit_cint_operand")]
3122 "VECTOR_MEM_VSX_P (V1TImode)"
3123 {
3124 if (operands[3] != const0_rtx)
3125 gcc_unreachable ();
3126
3127 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3128 DONE;
3129 })
3130
3131 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3132 (define_expand "vsx_set_<mode>"
3133 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3134 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3135 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3136 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3137 "VECTOR_MEM_VSX_P (<MODE>mode)"
3138 {
3139 rtx dest = operands[0];
3140 rtx vec_reg = operands[1];
3141 rtx value = operands[2];
3142 rtx ele = operands[3];
3143 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3144
3145 if (ele == const0_rtx)
3146 {
3147 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3148 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3149 DONE;
3150 }
3151 else if (ele == const1_rtx)
3152 {
3153 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3154 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3155 DONE;
3156 }
3157 else
3158 gcc_unreachable ();
3159 })
3160
3161 ;; Extract a DF/DI element from V2DF/V2DI
3162 ;; Optimize cases were we can do a simple or direct move.
3163 ;; Or see if we can avoid doing the move at all
3164
3165 ;; There are some unresolved problems with reload that show up if an Altivec
3166 ;; register was picked. Limit the scalar value to FPRs for now.
3167
3168 (define_insn "vsx_extract_<mode>"
3169 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3170
3171 (vec_select:<VS_scalar>
3172 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
3173
3174 (parallel
3175 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3176 "VECTOR_MEM_VSX_P (<MODE>mode)"
3177 {
3178 int element = INTVAL (operands[2]);
3179 int op0_regno = REGNO (operands[0]);
3180 int op1_regno = REGNO (operands[1]);
3181 int fldDM;
3182
3183 gcc_assert (IN_RANGE (element, 0, 1));
3184 gcc_assert (VSX_REGNO_P (op1_regno));
3185
3186 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3187 {
3188 if (op0_regno == op1_regno)
3189 return ASM_COMMENT_START " vec_extract to same register";
3190
3191 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3192 && TARGET_POWERPC64)
3193 return "mfvsrd %0,%x1";
3194
3195 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3196 return "fmr %0,%1";
3197
3198 else if (VSX_REGNO_P (op0_regno))
3199 return "xxlor %x0,%x1,%x1";
3200
3201 else
3202 gcc_unreachable ();
3203 }
3204
3205 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3206 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3207 return "mfvsrld %0,%x1";
3208
3209 else if (VSX_REGNO_P (op0_regno))
3210 {
3211 fldDM = element << 1;
3212 if (!BYTES_BIG_ENDIAN)
3213 fldDM = 3 - fldDM;
3214 operands[3] = GEN_INT (fldDM);
3215 return "xxpermdi %x0,%x1,%x1,%3";
3216 }
3217
3218 else
3219 gcc_unreachable ();
3220 }
3221 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3222
3223 ;; Optimize extracting a single scalar element from memory.
3224 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3225 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3226 (vec_select:<VSX_D:VS_scalar>
3227 (match_operand:VSX_D 1 "memory_operand" "m,m")
3228 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3229 (clobber (match_scratch:P 3 "=&b,&b"))]
3230 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3231 "#"
3232 "&& reload_completed"
3233 [(set (match_dup 0) (match_dup 4))]
3234 {
3235 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3236 operands[3], <VSX_D:VS_scalar>mode);
3237 }
3238 [(set_attr "type" "fpload,load")
3239 (set_attr "length" "8")])
3240
3241 ;; Optimize storing a single scalar element that is the right location to
3242 ;; memory
3243 (define_insn "*vsx_extract_<mode>_store"
3244 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3245 (vec_select:<VS_scalar>
3246 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3247 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3248 "VECTOR_MEM_VSX_P (<MODE>mode)"
3249 "@
3250 stfd%U0%X0 %1,%0
3251 stxsd%U0x %x1,%y0
3252 stxsd %1,%0"
3253 [(set_attr "type" "fpstore")
3254 (set_attr "length" "4")])
3255
3256 ;; Variable V2DI/V2DF extract shift
3257 (define_insn "vsx_vslo_<mode>"
3258 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3259 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3260 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3261 UNSPEC_VSX_VSLO))]
3262 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3263 "vslo %0,%1,%2"
3264 [(set_attr "type" "vecperm")])
3265
3266 ;; Variable V2DI/V2DF extract
3267 (define_insn_and_split "vsx_extract_<mode>_var"
3268 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3269 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3270 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3271 UNSPEC_VSX_EXTRACT))
3272 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3273 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3274 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3275 "#"
3276 "&& reload_completed"
3277 [(const_int 0)]
3278 {
3279 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3280 operands[3], operands[4]);
3281 DONE;
3282 })
3283
3284 ;; Extract a SF element from V4SF
3285 (define_insn_and_split "vsx_extract_v4sf"
3286 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3287 (vec_select:SF
3288 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3289 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3290 (clobber (match_scratch:V4SF 3 "=0"))]
3291 "VECTOR_UNIT_VSX_P (V4SFmode)"
3292 "#"
3293 "&& 1"
3294 [(const_int 0)]
3295 {
3296 rtx op0 = operands[0];
3297 rtx op1 = operands[1];
3298 rtx op2 = operands[2];
3299 rtx op3 = operands[3];
3300 rtx tmp;
3301 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3302
3303 if (ele == 0)
3304 tmp = op1;
3305 else
3306 {
3307 if (GET_CODE (op3) == SCRATCH)
3308 op3 = gen_reg_rtx (V4SFmode);
3309 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3310 tmp = op3;
3311 }
3312 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3313 DONE;
3314 }
3315 [(set_attr "length" "8")
3316 (set_attr "type" "fp")])
3317
3318 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3319 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3320 (vec_select:SF
3321 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3322 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3323 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3324 "VECTOR_MEM_VSX_P (V4SFmode)"
3325 "#"
3326 "&& reload_completed"
3327 [(set (match_dup 0) (match_dup 4))]
3328 {
3329 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3330 operands[3], SFmode);
3331 }
3332 [(set_attr "type" "fpload,fpload,fpload,load")
3333 (set_attr "length" "8")])
3334
3335 ;; Variable V4SF extract
3336 (define_insn_and_split "vsx_extract_v4sf_var"
3337 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3338 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3339 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3340 UNSPEC_VSX_EXTRACT))
3341 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3342 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3343 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3344 "#"
3345 "&& reload_completed"
3346 [(const_int 0)]
3347 {
3348 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3349 operands[3], operands[4]);
3350 DONE;
3351 })
3352
3353 ;; Expand the builtin form of xxpermdi to canonical rtl.
3354 (define_expand "vsx_xxpermdi_<mode>"
3355 [(match_operand:VSX_L 0 "vsx_register_operand")
3356 (match_operand:VSX_L 1 "vsx_register_operand")
3357 (match_operand:VSX_L 2 "vsx_register_operand")
3358 (match_operand:QI 3 "u5bit_cint_operand")]
3359 "VECTOR_MEM_VSX_P (<MODE>mode)"
3360 {
3361 rtx target = operands[0];
3362 rtx op0 = operands[1];
3363 rtx op1 = operands[2];
3364 int mask = INTVAL (operands[3]);
3365 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3366 rtx perm1 = GEN_INT ((mask & 1) + 2);
3367 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3368
3369 if (<MODE>mode == V2DFmode)
3370 gen = gen_vsx_xxpermdi2_v2df_1;
3371 else
3372 {
3373 gen = gen_vsx_xxpermdi2_v2di_1;
3374 if (<MODE>mode != V2DImode)
3375 {
3376 target = gen_lowpart (V2DImode, target);
3377 op0 = gen_lowpart (V2DImode, op0);
3378 op1 = gen_lowpart (V2DImode, op1);
3379 }
3380 }
3381 emit_insn (gen (target, op0, op1, perm0, perm1));
3382 DONE;
3383 })
3384
3385 ;; Special version of xxpermdi that retains big-endian semantics.
3386 (define_expand "vsx_xxpermdi_<mode>_be"
3387 [(match_operand:VSX_L 0 "vsx_register_operand")
3388 (match_operand:VSX_L 1 "vsx_register_operand")
3389 (match_operand:VSX_L 2 "vsx_register_operand")
3390 (match_operand:QI 3 "u5bit_cint_operand")]
3391 "VECTOR_MEM_VSX_P (<MODE>mode)"
3392 {
3393 rtx target = operands[0];
3394 rtx op0 = operands[1];
3395 rtx op1 = operands[2];
3396 int mask = INTVAL (operands[3]);
3397 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3398 rtx perm1 = GEN_INT ((mask & 1) + 2);
3399 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3400
3401 if (<MODE>mode == V2DFmode)
3402 gen = gen_vsx_xxpermdi2_v2df_1;
3403 else
3404 {
3405 gen = gen_vsx_xxpermdi2_v2di_1;
3406 if (<MODE>mode != V2DImode)
3407 {
3408 target = gen_lowpart (V2DImode, target);
3409 op0 = gen_lowpart (V2DImode, op0);
3410 op1 = gen_lowpart (V2DImode, op1);
3411 }
3412 }
3413 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3414 transformation we don't want; it is necessary for
3415 rs6000_expand_vec_perm_const_1 but not for this use. So we
3416 prepare for that by reversing the transformation here. */
3417 if (BYTES_BIG_ENDIAN)
3418 emit_insn (gen (target, op0, op1, perm0, perm1));
3419 else
3420 {
3421 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3422 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3423 emit_insn (gen (target, op1, op0, p0, p1));
3424 }
3425 DONE;
3426 })
3427
3428 (define_insn "vsx_xxpermdi2_<mode>_1"
3429 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3430 (vec_select:VSX_D
3431 (vec_concat:<VS_double>
3432 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3433 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3434 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3435 (match_operand 4 "const_2_to_3_operand" "")])))]
3436 "VECTOR_MEM_VSX_P (<MODE>mode)"
3437 {
3438 int op3, op4, mask;
3439
3440 /* For little endian, swap operands and invert/swap selectors
3441 to get the correct xxpermdi. The operand swap sets up the
3442 inputs as a little endian array. The selectors are swapped
3443 because they are defined to use big endian ordering. The
3444 selectors are inverted to get the correct doublewords for
3445 little endian ordering. */
3446 if (BYTES_BIG_ENDIAN)
3447 {
3448 op3 = INTVAL (operands[3]);
3449 op4 = INTVAL (operands[4]);
3450 }
3451 else
3452 {
3453 op3 = 3 - INTVAL (operands[4]);
3454 op4 = 3 - INTVAL (operands[3]);
3455 }
3456
3457 mask = (op3 << 1) | (op4 - 2);
3458 operands[3] = GEN_INT (mask);
3459
3460 if (BYTES_BIG_ENDIAN)
3461 return "xxpermdi %x0,%x1,%x2,%3";
3462 else
3463 return "xxpermdi %x0,%x2,%x1,%3";
3464 }
3465 [(set_attr "type" "vecperm")])
3466
3467 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3468 ;; none of the small types were allowed in a vector register, so we had to
3469 ;; extract to a DImode and either do a direct move or store.
3470 (define_expand "vsx_extract_<mode>"
3471 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3472 (vec_select:<VS_scalar>
3473 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3474 (parallel [(match_operand:QI 2 "const_int_operand")])))
3475 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3476 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3477 {
3478 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3479 if (TARGET_P9_VECTOR)
3480 {
3481 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3482 operands[2]));
3483 DONE;
3484 }
3485 })
3486
3487 (define_insn "vsx_extract_<mode>_p9"
3488 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3489 (vec_select:<VS_scalar>
3490 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3491 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3492 (clobber (match_scratch:SI 3 "=r,X"))]
3493 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3494 {
3495 if (which_alternative == 0)
3496 return "#";
3497
3498 else
3499 {
3500 HOST_WIDE_INT elt = INTVAL (operands[2]);
3501 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3502 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3503 : elt);
3504
3505 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3506 HOST_WIDE_INT offset = unit_size * elt_adj;
3507
3508 operands[2] = GEN_INT (offset);
3509 if (unit_size == 4)
3510 return "xxextractuw %x0,%x1,%2";
3511 else
3512 return "vextractu<wd> %0,%1,%2";
3513 }
3514 }
3515 [(set_attr "type" "vecsimple")])
3516
3517 (define_split
3518 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3519 (vec_select:<VS_scalar>
3520 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3521 (parallel [(match_operand:QI 2 "const_int_operand")])))
3522 (clobber (match_operand:SI 3 "int_reg_operand"))]
3523 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3524 [(const_int 0)]
3525 {
3526 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3527 rtx op1 = operands[1];
3528 rtx op2 = operands[2];
3529 rtx op3 = operands[3];
3530 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3531
3532 emit_move_insn (op3, GEN_INT (offset));
3533 if (BYTES_BIG_ENDIAN)
3534 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3535 else
3536 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3537 DONE;
3538 })
3539
3540 ;; Optimize zero extracts to eliminate the AND after the extract.
3541 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3542 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3543 (zero_extend:DI
3544 (vec_select:<VS_scalar>
3545 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3546 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3547 (clobber (match_scratch:SI 3 "=r,X"))]
3548 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3549 "#"
3550 "&& reload_completed"
3551 [(parallel [(set (match_dup 4)
3552 (vec_select:<VS_scalar>
3553 (match_dup 1)
3554 (parallel [(match_dup 2)])))
3555 (clobber (match_dup 3))])]
3556 {
3557 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3558 })
3559
3560 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3561 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3562 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3563 (vec_select:<VS_scalar>
3564 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3565 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3566 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3567 (clobber (match_scratch:SI 4 "=X,&r"))]
3568 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3569 "#"
3570 "&& reload_completed"
3571 [(parallel [(set (match_dup 3)
3572 (vec_select:<VS_scalar>
3573 (match_dup 1)
3574 (parallel [(match_dup 2)])))
3575 (clobber (match_dup 4))])
3576 (set (match_dup 0)
3577 (match_dup 3))])
3578
3579 (define_insn_and_split "*vsx_extract_si"
3580 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3581 (vec_select:SI
3582 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3583 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3584 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3585 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3586 "#"
3587 "&& reload_completed"
3588 [(const_int 0)]
3589 {
3590 rtx dest = operands[0];
3591 rtx src = operands[1];
3592 rtx element = operands[2];
3593 rtx vec_tmp = operands[3];
3594 int value;
3595
3596 if (!BYTES_BIG_ENDIAN)
3597 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3598
3599 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3600 instruction. */
3601 value = INTVAL (element);
3602 if (value != 1)
3603 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3604 else
3605 vec_tmp = src;
3606
3607 if (MEM_P (operands[0]))
3608 {
3609 if (can_create_pseudo_p ())
3610 dest = rs6000_address_for_fpconvert (dest);
3611
3612 if (TARGET_P8_VECTOR)
3613 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3614 else
3615 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3616 }
3617
3618 else if (TARGET_P8_VECTOR)
3619 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3620 else
3621 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3622 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3623
3624 DONE;
3625 }
3626 [(set_attr "type" "mftgpr,vecperm,fpstore")
3627 (set_attr "length" "8")])
3628
3629 (define_insn_and_split "*vsx_extract_<mode>_p8"
3630 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3631 (vec_select:<VS_scalar>
3632 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3633 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3634 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3635 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3636 && !TARGET_P9_VECTOR"
3637 "#"
3638 "&& reload_completed"
3639 [(const_int 0)]
3640 {
3641 rtx dest = operands[0];
3642 rtx src = operands[1];
3643 rtx element = operands[2];
3644 rtx vec_tmp = operands[3];
3645 int value;
3646
3647 if (!BYTES_BIG_ENDIAN)
3648 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3649
3650 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3651 instruction. */
3652 value = INTVAL (element);
3653 if (<MODE>mode == V16QImode)
3654 {
3655 if (value != 7)
3656 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3657 else
3658 vec_tmp = src;
3659 }
3660 else if (<MODE>mode == V8HImode)
3661 {
3662 if (value != 3)
3663 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3664 else
3665 vec_tmp = src;
3666 }
3667 else
3668 gcc_unreachable ();
3669
3670 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3671 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3672 DONE;
3673 }
3674 [(set_attr "type" "mftgpr")])
3675
3676 ;; Optimize extracting a single scalar element from memory.
3677 (define_insn_and_split "*vsx_extract_<mode>_load"
3678 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3679 (vec_select:<VS_scalar>
3680 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3681 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3682 (clobber (match_scratch:DI 3 "=&b"))]
3683 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3684 "#"
3685 "&& reload_completed"
3686 [(set (match_dup 0) (match_dup 4))]
3687 {
3688 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3689 operands[3], <VS_scalar>mode);
3690 }
3691 [(set_attr "type" "load")
3692 (set_attr "length" "8")])
3693
3694 ;; Variable V16QI/V8HI/V4SI extract
3695 (define_insn_and_split "vsx_extract_<mode>_var"
3696 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3697 (unspec:<VS_scalar>
3698 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3699 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3700 UNSPEC_VSX_EXTRACT))
3701 (clobber (match_scratch:DI 3 "=r,r,&b"))
3702 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3703 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3704 "#"
3705 "&& reload_completed"
3706 [(const_int 0)]
3707 {
3708 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3709 operands[3], operands[4]);
3710 DONE;
3711 })
3712
3713 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3714 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3715 (zero_extend:SDI
3716 (unspec:<VSX_EXTRACT_I:VS_scalar>
3717 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3718 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3719 UNSPEC_VSX_EXTRACT)))
3720 (clobber (match_scratch:DI 3 "=r,r,&b"))
3721 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3722 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3723 "#"
3724 "&& reload_completed"
3725 [(const_int 0)]
3726 {
3727 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3728 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3729 operands[1], operands[2],
3730 operands[3], operands[4]);
3731 DONE;
3732 })
3733
3734 ;; VSX_EXTRACT optimizations
3735 ;; Optimize double d = (double) vec_extract (vi, <n>)
3736 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3737 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3738 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3739 (any_float:DF
3740 (vec_select:SI
3741 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3742 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3743 (clobber (match_scratch:V4SI 3 "=v"))]
3744 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3745 "#"
3746 "&& 1"
3747 [(const_int 0)]
3748 {
3749 rtx dest = operands[0];
3750 rtx src = operands[1];
3751 rtx element = operands[2];
3752 rtx v4si_tmp = operands[3];
3753 int value;
3754
3755 if (!BYTES_BIG_ENDIAN)
3756 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3757
3758 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3759 instruction. */
3760 value = INTVAL (element);
3761 if (value != 0)
3762 {
3763 if (GET_CODE (v4si_tmp) == SCRATCH)
3764 v4si_tmp = gen_reg_rtx (V4SImode);
3765 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3766 }
3767 else
3768 v4si_tmp = src;
3769
3770 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3771 DONE;
3772 })
3773
3774 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3775 ;; where <type> is a floating point type that supported by the hardware that is
3776 ;; not double. First convert the value to double, and then to the desired
3777 ;; type.
3778 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3779 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3780 (any_float:VSX_EXTRACT_FL
3781 (vec_select:SI
3782 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3783 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3784 (clobber (match_scratch:V4SI 3 "=v"))
3785 (clobber (match_scratch:DF 4 "=ws"))]
3786 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3787 "#"
3788 "&& 1"
3789 [(const_int 0)]
3790 {
3791 rtx dest = operands[0];
3792 rtx src = operands[1];
3793 rtx element = operands[2];
3794 rtx v4si_tmp = operands[3];
3795 rtx df_tmp = operands[4];
3796 int value;
3797
3798 if (!BYTES_BIG_ENDIAN)
3799 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3800
3801 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3802 instruction. */
3803 value = INTVAL (element);
3804 if (value != 0)
3805 {
3806 if (GET_CODE (v4si_tmp) == SCRATCH)
3807 v4si_tmp = gen_reg_rtx (V4SImode);
3808 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3809 }
3810 else
3811 v4si_tmp = src;
3812
3813 if (GET_CODE (df_tmp) == SCRATCH)
3814 df_tmp = gen_reg_rtx (DFmode);
3815
3816 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3817
3818 if (<MODE>mode == SFmode)
3819 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3820 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3821 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3822 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3823 && TARGET_FLOAT128_HW)
3824 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3825 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3826 emit_insn (gen_extenddfif2 (dest, df_tmp));
3827 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3828 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3829 else
3830 gcc_unreachable ();
3831
3832 DONE;
3833 })
3834
3835 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3836 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3837 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3838 ;; vector short or vector unsigned short.
3839 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3840 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3841 (float:FL_CONV
3842 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3843 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3844 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3845 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3846 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3847 && TARGET_P9_VECTOR"
3848 "#"
3849 "&& reload_completed"
3850 [(parallel [(set (match_dup 3)
3851 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3852 (match_dup 1)
3853 (parallel [(match_dup 2)])))
3854 (clobber (scratch:SI))])
3855 (set (match_dup 4)
3856 (sign_extend:DI (match_dup 3)))
3857 (set (match_dup 0)
3858 (float:<FL_CONV:MODE> (match_dup 4)))]
3859 {
3860 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3861 })
3862
3863 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3864 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3865 (unsigned_float:FL_CONV
3866 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3867 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3868 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3869 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3870 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3871 && TARGET_P9_VECTOR"
3872 "#"
3873 "&& reload_completed"
3874 [(parallel [(set (match_dup 3)
3875 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3876 (match_dup 1)
3877 (parallel [(match_dup 2)])))
3878 (clobber (scratch:SI))])
3879 (set (match_dup 0)
3880 (float:<FL_CONV:MODE> (match_dup 4)))]
3881 {
3882 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3883 })
3884
3885 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3886 (define_insn "vsx_set_<mode>_p9"
3887 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3888 (unspec:VSX_EXTRACT_I
3889 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3890 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3891 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3892 UNSPEC_VSX_SET))]
3893 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3894 {
3895 int ele = INTVAL (operands[3]);
3896 int nunits = GET_MODE_NUNITS (<MODE>mode);
3897
3898 if (!BYTES_BIG_ENDIAN)
3899 ele = nunits - 1 - ele;
3900
3901 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3902 if (<MODE>mode == V4SImode)
3903 return "xxinsertw %x0,%x2,%3";
3904 else
3905 return "vinsert<wd> %0,%2,%3";
3906 }
3907 [(set_attr "type" "vecperm")])
3908
3909 (define_insn_and_split "vsx_set_v4sf_p9"
3910 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3911 (unspec:V4SF
3912 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3913 (match_operand:SF 2 "gpc_reg_operand" "ww")
3914 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3915 UNSPEC_VSX_SET))
3916 (clobber (match_scratch:SI 4 "=&wJwK"))]
3917 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3918 "#"
3919 "&& reload_completed"
3920 [(set (match_dup 5)
3921 (unspec:V4SF [(match_dup 2)]
3922 UNSPEC_VSX_CVDPSPN))
3923 (parallel [(set (match_dup 4)
3924 (vec_select:SI (match_dup 6)
3925 (parallel [(match_dup 7)])))
3926 (clobber (scratch:SI))])
3927 (set (match_dup 8)
3928 (unspec:V4SI [(match_dup 8)
3929 (match_dup 4)
3930 (match_dup 3)]
3931 UNSPEC_VSX_SET))]
3932 {
3933 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3934
3935 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3936 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3937 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 2);
3938 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3939 }
3940 [(set_attr "type" "vecperm")
3941 (set_attr "length" "12")])
3942
3943 ;; Special case setting 0.0f to a V4SF element
3944 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3945 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3946 (unspec:V4SF
3947 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3948 (match_operand:SF 2 "zero_fp_constant" "j")
3949 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3950 UNSPEC_VSX_SET))
3951 (clobber (match_scratch:SI 4 "=&wJwK"))]
3952 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3953 "#"
3954 "&& reload_completed"
3955 [(set (match_dup 4)
3956 (const_int 0))
3957 (set (match_dup 5)
3958 (unspec:V4SI [(match_dup 5)
3959 (match_dup 4)
3960 (match_dup 3)]
3961 UNSPEC_VSX_SET))]
3962 {
3963 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3964 }
3965 [(set_attr "type" "vecperm")
3966 (set_attr "length" "8")])
3967
3968 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3969 ;; that is in the default scalar position (1 for big endian, 2 for little
3970 ;; endian). We just need to do an xxinsertw since the element is in the
3971 ;; correct location.
3972
3973 (define_insn "*vsx_insert_extract_v4sf_p9"
3974 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3975 (unspec:V4SF
3976 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3977 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3978 (parallel
3979 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3980 (match_operand:QI 4 "const_0_to_3_operand" "n")]
3981 UNSPEC_VSX_SET))]
3982 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3983 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
3984 {
3985 int ele = INTVAL (operands[4]);
3986
3987 if (!BYTES_BIG_ENDIAN)
3988 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3989
3990 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3991 return "xxinsertw %x0,%x2,%4";
3992 }
3993 [(set_attr "type" "vecperm")])
3994
3995 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3996 ;; that is in the default scalar position (1 for big endian, 2 for little
3997 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
3998
3999 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4000 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4001 (unspec:V4SF
4002 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4003 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4004 (parallel
4005 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4006 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4007 UNSPEC_VSX_SET))
4008 (clobber (match_scratch:SI 5 "=&wJwK"))]
4009 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4010 && TARGET_P9_VECTOR && TARGET_POWERPC64
4011 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4012 "#"
4013 "&& 1"
4014 [(parallel [(set (match_dup 5)
4015 (vec_select:SI (match_dup 6)
4016 (parallel [(match_dup 3)])))
4017 (clobber (scratch:SI))])
4018 (set (match_dup 7)
4019 (unspec:V4SI [(match_dup 8)
4020 (match_dup 5)
4021 (match_dup 4)]
4022 UNSPEC_VSX_SET))]
4023 {
4024 if (GET_CODE (operands[5]) == SCRATCH)
4025 operands[5] = gen_reg_rtx (SImode);
4026
4027 operands[6] = gen_lowpart (V4SImode, operands[2]);
4028 operands[7] = gen_lowpart (V4SImode, operands[0]);
4029 operands[8] = gen_lowpart (V4SImode, operands[1]);
4030 }
4031 [(set_attr "type" "vecperm")])
4032
4033 ;; Expanders for builtins
4034 (define_expand "vsx_mergel_<mode>"
4035 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4036 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4037 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4038 "VECTOR_MEM_VSX_P (<MODE>mode)"
4039 {
4040 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4041 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4042 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4043 emit_insn (gen_rtx_SET (operands[0], x));
4044 DONE;
4045 })
4046
4047 (define_expand "vsx_mergeh_<mode>"
4048 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4049 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4050 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4051 "VECTOR_MEM_VSX_P (<MODE>mode)"
4052 {
4053 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4054 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4055 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4056 emit_insn (gen_rtx_SET (operands[0], x));
4057 DONE;
4058 })
4059
4060 ;; V2DF/V2DI splat
4061 ;; We separate the register splat insn from the memory splat insn to force the
4062 ;; register allocator to generate the indexed form of the SPLAT when it is
4063 ;; given an offsettable memory reference. Otherwise, if the register and
4064 ;; memory insns were combined into a single insn, the register allocator will
4065 ;; load the value into a register, and then do a double word permute.
4066 (define_expand "vsx_splat_<mode>"
4067 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4068 (vec_duplicate:VSX_D
4069 (match_operand:<VS_scalar> 1 "input_operand")))]
4070 "VECTOR_MEM_VSX_P (<MODE>mode)"
4071 {
4072 rtx op1 = operands[1];
4073 if (MEM_P (op1))
4074 operands[1] = rs6000_address_for_fpconvert (op1);
4075 else if (!REG_P (op1))
4076 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4077 })
4078
4079 (define_insn "vsx_splat_<mode>_reg"
4080 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4081 (vec_duplicate:VSX_D
4082 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4083 "VECTOR_MEM_VSX_P (<MODE>mode)"
4084 "@
4085 xxpermdi %x0,%x1,%x1,0
4086 mtvsrdd %x0,%1,%1"
4087 [(set_attr "type" "vecperm")])
4088
4089 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4090 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4091 (vec_duplicate:VSX_D
4092 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4093 "VECTOR_MEM_VSX_P (<MODE>mode)"
4094 "lxvdsx %x0,%y1"
4095 [(set_attr "type" "vecload")])
4096
4097 ;; V4SI splat support
4098 (define_insn "vsx_splat_v4si"
4099 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4100 (vec_duplicate:V4SI
4101 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4102 "TARGET_P9_VECTOR"
4103 "@
4104 mtvsrws %x0,%1
4105 lxvwsx %x0,%y1"
4106 [(set_attr "type" "vecperm,vecload")])
4107
4108 ;; SImode is not currently allowed in vector registers. This pattern
4109 ;; allows us to use direct move to get the value in a vector register
4110 ;; so that we can use XXSPLTW
4111 (define_insn "vsx_splat_v4si_di"
4112 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4113 (vec_duplicate:V4SI
4114 (truncate:SI
4115 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4116 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4117 "@
4118 xxspltw %x0,%x1,1
4119 mtvsrws %x0,%1"
4120 [(set_attr "type" "vecperm")])
4121
4122 ;; V4SF splat (ISA 3.0)
4123 (define_insn_and_split "vsx_splat_v4sf"
4124 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4125 (vec_duplicate:V4SF
4126 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4127 "TARGET_P9_VECTOR"
4128 "@
4129 lxvwsx %x0,%y1
4130 #
4131 mtvsrws %x0,%1"
4132 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4133 [(set (match_dup 0)
4134 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4135 (set (match_dup 0)
4136 (unspec:V4SF [(match_dup 0)
4137 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4138 ""
4139 [(set_attr "type" "vecload,vecperm,mftgpr")
4140 (set_attr "length" "4,8,4")])
4141
4142 ;; V4SF/V4SI splat from a vector element
4143 (define_insn "vsx_xxspltw_<mode>"
4144 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4145 (vec_duplicate:VSX_W
4146 (vec_select:<VS_scalar>
4147 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4148 (parallel
4149 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4150 "VECTOR_MEM_VSX_P (<MODE>mode)"
4151 {
4152 if (!BYTES_BIG_ENDIAN)
4153 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4154
4155 return "xxspltw %x0,%x1,%2";
4156 }
4157 [(set_attr "type" "vecperm")])
4158
4159 (define_insn "vsx_xxspltw_<mode>_direct"
4160 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4161 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4162 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4163 UNSPEC_VSX_XXSPLTW))]
4164 "VECTOR_MEM_VSX_P (<MODE>mode)"
4165 "xxspltw %x0,%x1,%2"
4166 [(set_attr "type" "vecperm")])
4167
4168 ;; V16QI/V8HI splat support on ISA 2.07
4169 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4170 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4171 (vec_duplicate:VSX_SPLAT_I
4172 (truncate:<VS_scalar>
4173 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4174 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4175 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4176 [(set_attr "type" "vecperm")])
4177
4178 ;; V2DF/V2DI splat for use by vec_splat builtin
4179 (define_insn "vsx_xxspltd_<mode>"
4180 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4181 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4182 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4183 UNSPEC_VSX_XXSPLTD))]
4184 "VECTOR_MEM_VSX_P (<MODE>mode)"
4185 {
4186 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4187 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4188 return "xxpermdi %x0,%x1,%x1,0";
4189 else
4190 return "xxpermdi %x0,%x1,%x1,3";
4191 }
4192 [(set_attr "type" "vecperm")])
4193
4194 ;; V4SF/V4SI interleave
4195 (define_insn "vsx_xxmrghw_<mode>"
4196 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4197 (vec_select:VSX_W
4198 (vec_concat:<VS_double>
4199 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4200 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4201 (parallel [(const_int 0) (const_int 4)
4202 (const_int 1) (const_int 5)])))]
4203 "VECTOR_MEM_VSX_P (<MODE>mode)"
4204 {
4205 if (BYTES_BIG_ENDIAN)
4206 return "xxmrghw %x0,%x1,%x2";
4207 else
4208 return "xxmrglw %x0,%x2,%x1";
4209 }
4210 [(set_attr "type" "vecperm")])
4211
4212 (define_insn "vsx_xxmrglw_<mode>"
4213 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4214 (vec_select:VSX_W
4215 (vec_concat:<VS_double>
4216 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4217 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4218 (parallel [(const_int 2) (const_int 6)
4219 (const_int 3) (const_int 7)])))]
4220 "VECTOR_MEM_VSX_P (<MODE>mode)"
4221 {
4222 if (BYTES_BIG_ENDIAN)
4223 return "xxmrglw %x0,%x1,%x2";
4224 else
4225 return "xxmrghw %x0,%x2,%x1";
4226 }
4227 [(set_attr "type" "vecperm")])
4228
4229 ;; Shift left double by word immediate
4230 (define_insn "vsx_xxsldwi_<mode>"
4231 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4232 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4233 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4234 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4235 UNSPEC_VSX_SLDWI))]
4236 "VECTOR_MEM_VSX_P (<MODE>mode)"
4237 "xxsldwi %x0,%x1,%x2,%3"
4238 [(set_attr "type" "vecperm")])
4239
4240 \f
4241 ;; Vector reduction insns and splitters
4242
4243 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4244 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4245 (VEC_reduc:V2DF
4246 (vec_concat:V2DF
4247 (vec_select:DF
4248 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4249 (parallel [(const_int 1)]))
4250 (vec_select:DF
4251 (match_dup 1)
4252 (parallel [(const_int 0)])))
4253 (match_dup 1)))
4254 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4255 "VECTOR_UNIT_VSX_P (V2DFmode)"
4256 "#"
4257 ""
4258 [(const_int 0)]
4259 {
4260 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4261 ? gen_reg_rtx (V2DFmode)
4262 : operands[2];
4263 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4264 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4265 DONE;
4266 }
4267 [(set_attr "length" "8")
4268 (set_attr "type" "veccomplex")])
4269
4270 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4271 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4272 (VEC_reduc:V4SF
4273 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4274 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4275 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4276 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4277 "VECTOR_UNIT_VSX_P (V4SFmode)"
4278 "#"
4279 ""
4280 [(const_int 0)]
4281 {
4282 rtx op0 = operands[0];
4283 rtx op1 = operands[1];
4284 rtx tmp2, tmp3, tmp4;
4285
4286 if (can_create_pseudo_p ())
4287 {
4288 tmp2 = gen_reg_rtx (V4SFmode);
4289 tmp3 = gen_reg_rtx (V4SFmode);
4290 tmp4 = gen_reg_rtx (V4SFmode);
4291 }
4292 else
4293 {
4294 tmp2 = operands[2];
4295 tmp3 = operands[3];
4296 tmp4 = tmp2;
4297 }
4298
4299 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4300 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4301 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4302 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4303 DONE;
4304 }
4305 [(set_attr "length" "16")
4306 (set_attr "type" "veccomplex")])
4307
4308 ;; Combiner patterns with the vector reduction patterns that knows we can get
4309 ;; to the top element of the V2DF array without doing an extract.
4310
4311 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4312 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4313 (vec_select:DF
4314 (VEC_reduc:V2DF
4315 (vec_concat:V2DF
4316 (vec_select:DF
4317 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4318 (parallel [(const_int 1)]))
4319 (vec_select:DF
4320 (match_dup 1)
4321 (parallel [(const_int 0)])))
4322 (match_dup 1))
4323 (parallel [(const_int 1)])))
4324 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4325 "VECTOR_UNIT_VSX_P (V2DFmode)"
4326 "#"
4327 ""
4328 [(const_int 0)]
4329 {
4330 rtx hi = gen_highpart (DFmode, operands[1]);
4331 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4332 ? gen_reg_rtx (DFmode)
4333 : operands[2];
4334
4335 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4336 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4337 DONE;
4338 }
4339 [(set_attr "length" "8")
4340 (set_attr "type" "veccomplex")])
4341
4342 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4343 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4344 (vec_select:SF
4345 (VEC_reduc:V4SF
4346 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4347 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4348 (parallel [(const_int 3)])))
4349 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4350 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4351 (clobber (match_scratch:V4SF 4 "=0,0"))]
4352 "VECTOR_UNIT_VSX_P (V4SFmode)"
4353 "#"
4354 ""
4355 [(const_int 0)]
4356 {
4357 rtx op0 = operands[0];
4358 rtx op1 = operands[1];
4359 rtx tmp2, tmp3, tmp4, tmp5;
4360
4361 if (can_create_pseudo_p ())
4362 {
4363 tmp2 = gen_reg_rtx (V4SFmode);
4364 tmp3 = gen_reg_rtx (V4SFmode);
4365 tmp4 = gen_reg_rtx (V4SFmode);
4366 tmp5 = gen_reg_rtx (V4SFmode);
4367 }
4368 else
4369 {
4370 tmp2 = operands[2];
4371 tmp3 = operands[3];
4372 tmp4 = tmp2;
4373 tmp5 = operands[4];
4374 }
4375
4376 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4377 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4378 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4379 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4380 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4381 DONE;
4382 }
4383 [(set_attr "length" "20")
4384 (set_attr "type" "veccomplex")])
4385
4386 \f
4387 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4388 (define_peephole
4389 [(set (match_operand:P 0 "base_reg_operand")
4390 (match_operand:P 1 "short_cint_operand"))
4391 (set (match_operand:VSX_M 2 "vsx_register_operand")
4392 (mem:VSX_M (plus:P (match_dup 0)
4393 (match_operand:P 3 "int_reg_operand"))))]
4394 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4395 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4396 [(set_attr "length" "8")
4397 (set_attr "type" "vecload")])
4398
4399 (define_peephole
4400 [(set (match_operand:P 0 "base_reg_operand")
4401 (match_operand:P 1 "short_cint_operand"))
4402 (set (match_operand:VSX_M 2 "vsx_register_operand")
4403 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4404 (match_dup 0))))]
4405 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4406 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4407 [(set_attr "length" "8")
4408 (set_attr "type" "vecload")])
4409
4410 \f
4411 ;; ISA 3.0 vector extend sign support
4412
4413 (define_insn "vsx_sign_extend_qi_<mode>"
4414 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4415 (unspec:VSINT_84
4416 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4417 UNSPEC_VSX_SIGN_EXTEND))]
4418 "TARGET_P9_VECTOR"
4419 "vextsb2<wd> %0,%1"
4420 [(set_attr "type" "vecexts")])
4421
4422 (define_insn "vsx_sign_extend_hi_<mode>"
4423 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4424 (unspec:VSINT_84
4425 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4426 UNSPEC_VSX_SIGN_EXTEND))]
4427 "TARGET_P9_VECTOR"
4428 "vextsh2<wd> %0,%1"
4429 [(set_attr "type" "vecexts")])
4430
4431 (define_insn "*vsx_sign_extend_si_v2di"
4432 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4433 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4434 UNSPEC_VSX_SIGN_EXTEND))]
4435 "TARGET_P9_VECTOR"
4436 "vextsw2d %0,%1"
4437 [(set_attr "type" "vecexts")])
4438
4439 \f
4440 ;; ISA 3.0 Binary Floating-Point Support
4441
4442 ;; VSX Scalar Extract Exponent Quad-Precision
4443 (define_insn "xsxexpqp_<mode>"
4444 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4445 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4446 UNSPEC_VSX_SXEXPDP))]
4447 "TARGET_P9_VECTOR"
4448 "xsxexpqp %0,%1"
4449 [(set_attr "type" "vecmove")])
4450
4451 ;; VSX Scalar Extract Exponent Double-Precision
4452 (define_insn "xsxexpdp"
4453 [(set (match_operand:DI 0 "register_operand" "=r")
4454 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4455 UNSPEC_VSX_SXEXPDP))]
4456 "TARGET_P9_VECTOR && TARGET_64BIT"
4457 "xsxexpdp %0,%x1"
4458 [(set_attr "type" "integer")])
4459
4460 ;; VSX Scalar Extract Significand Quad-Precision
4461 (define_insn "xsxsigqp_<mode>"
4462 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4463 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4464 UNSPEC_VSX_SXSIG))]
4465 "TARGET_P9_VECTOR"
4466 "xsxsigqp %0,%1"
4467 [(set_attr "type" "vecmove")])
4468
4469 ;; VSX Scalar Extract Significand Double-Precision
4470 (define_insn "xsxsigdp"
4471 [(set (match_operand:DI 0 "register_operand" "=r")
4472 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4473 UNSPEC_VSX_SXSIG))]
4474 "TARGET_P9_VECTOR && TARGET_64BIT"
4475 "xsxsigdp %0,%x1"
4476 [(set_attr "type" "integer")])
4477
4478 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4479 (define_insn "xsiexpqpf_<mode>"
4480 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4481 (unspec:IEEE128
4482 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4483 (match_operand:DI 2 "altivec_register_operand" "v")]
4484 UNSPEC_VSX_SIEXPQP))]
4485 "TARGET_P9_VECTOR"
4486 "xsiexpqp %0,%1,%2"
4487 [(set_attr "type" "vecmove")])
4488
4489 ;; VSX Scalar Insert Exponent Quad-Precision
4490 (define_insn "xsiexpqp_<mode>"
4491 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4492 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4493 (match_operand:DI 2 "altivec_register_operand" "v")]
4494 UNSPEC_VSX_SIEXPQP))]
4495 "TARGET_P9_VECTOR"
4496 "xsiexpqp %0,%1,%2"
4497 [(set_attr "type" "vecmove")])
4498
4499 ;; VSX Scalar Insert Exponent Double-Precision
4500 (define_insn "xsiexpdp"
4501 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4502 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4503 (match_operand:DI 2 "register_operand" "r")]
4504 UNSPEC_VSX_SIEXPDP))]
4505 "TARGET_P9_VECTOR && TARGET_64BIT"
4506 "xsiexpdp %x0,%1,%2"
4507 [(set_attr "type" "fpsimple")])
4508
4509 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4510 (define_insn "xsiexpdpf"
4511 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4512 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4513 (match_operand:DI 2 "register_operand" "r")]
4514 UNSPEC_VSX_SIEXPDP))]
4515 "TARGET_P9_VECTOR && TARGET_64BIT"
4516 "xsiexpdp %x0,%1,%2"
4517 [(set_attr "type" "fpsimple")])
4518
4519 ;; VSX Scalar Compare Exponents Double-Precision
4520 (define_expand "xscmpexpdp_<code>"
4521 [(set (match_dup 3)
4522 (compare:CCFP
4523 (unspec:DF
4524 [(match_operand:DF 1 "vsx_register_operand" "wa")
4525 (match_operand:DF 2 "vsx_register_operand" "wa")]
4526 UNSPEC_VSX_SCMPEXPDP)
4527 (const_int 0)))
4528 (set (match_operand:SI 0 "register_operand" "=r")
4529 (CMP_TEST:SI (match_dup 3)
4530 (const_int 0)))]
4531 "TARGET_P9_VECTOR"
4532 {
4533 operands[3] = gen_reg_rtx (CCFPmode);
4534 })
4535
4536 (define_insn "*xscmpexpdp"
4537 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4538 (compare:CCFP
4539 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4540 (match_operand:DF 2 "vsx_register_operand" "wa")]
4541 UNSPEC_VSX_SCMPEXPDP)
4542 (match_operand:SI 3 "zero_constant" "j")))]
4543 "TARGET_P9_VECTOR"
4544 "xscmpexpdp %0,%x1,%x2"
4545 [(set_attr "type" "fpcompare")])
4546
4547 ;; VSX Scalar Test Data Class Quad-Precision
4548 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4549 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4550 ;; setting the eq bit if any of the conditions tested by operand 2
4551 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4552 (define_expand "xststdcqp_<mode>"
4553 [(set (match_dup 3)
4554 (compare:CCFP
4555 (unspec:IEEE128
4556 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4557 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4558 UNSPEC_VSX_STSTDC)
4559 (const_int 0)))
4560 (set (match_operand:SI 0 "register_operand" "=r")
4561 (eq:SI (match_dup 3)
4562 (const_int 0)))]
4563 "TARGET_P9_VECTOR"
4564 {
4565 operands[3] = gen_reg_rtx (CCFPmode);
4566 })
4567
4568 ;; VSX Scalar Test Data Class Double- and Single-Precision
4569 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4570 ;; if any of the conditions tested by operand 2 are satisfied.
4571 ;; The gt and unordered bits are cleared to zero.)
4572 (define_expand "xststdc<Fvsx>"
4573 [(set (match_dup 3)
4574 (compare:CCFP
4575 (unspec:SFDF
4576 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4577 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4578 UNSPEC_VSX_STSTDC)
4579 (match_dup 4)))
4580 (set (match_operand:SI 0 "register_operand" "=r")
4581 (eq:SI (match_dup 3)
4582 (const_int 0)))]
4583 "TARGET_P9_VECTOR"
4584 {
4585 operands[3] = gen_reg_rtx (CCFPmode);
4586 operands[4] = CONST0_RTX (SImode);
4587 })
4588
4589 ;; The VSX Scalar Test Negative Quad-Precision
4590 (define_expand "xststdcnegqp_<mode>"
4591 [(set (match_dup 2)
4592 (compare:CCFP
4593 (unspec:IEEE128
4594 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4595 (const_int 0)]
4596 UNSPEC_VSX_STSTDC)
4597 (const_int 0)))
4598 (set (match_operand:SI 0 "register_operand" "=r")
4599 (lt:SI (match_dup 2)
4600 (const_int 0)))]
4601 "TARGET_P9_VECTOR"
4602 {
4603 operands[2] = gen_reg_rtx (CCFPmode);
4604 })
4605
4606 ;; The VSX Scalar Test Negative Double- and Single-Precision
4607 (define_expand "xststdcneg<Fvsx>"
4608 [(set (match_dup 2)
4609 (compare:CCFP
4610 (unspec:SFDF
4611 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4612 (const_int 0)]
4613 UNSPEC_VSX_STSTDC)
4614 (match_dup 3)))
4615 (set (match_operand:SI 0 "register_operand" "=r")
4616 (lt:SI (match_dup 2)
4617 (const_int 0)))]
4618 "TARGET_P9_VECTOR"
4619 {
4620 operands[2] = gen_reg_rtx (CCFPmode);
4621 operands[3] = CONST0_RTX (SImode);
4622 })
4623
4624 (define_insn "*xststdcqp_<mode>"
4625 [(set (match_operand:CCFP 0 "" "=y")
4626 (compare:CCFP
4627 (unspec:IEEE128
4628 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4629 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4630 UNSPEC_VSX_STSTDC)
4631 (const_int 0)))]
4632 "TARGET_P9_VECTOR"
4633 "xststdcqp %0,%1,%2"
4634 [(set_attr "type" "fpcompare")])
4635
4636 (define_insn "*xststdc<Fvsx>"
4637 [(set (match_operand:CCFP 0 "" "=y")
4638 (compare:CCFP
4639 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4640 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4641 UNSPEC_VSX_STSTDC)
4642 (match_operand:SI 3 "zero_constant" "j")))]
4643 "TARGET_P9_VECTOR"
4644 "xststdc<Fvsx> %0,%x1,%2"
4645 [(set_attr "type" "fpcompare")])
4646
4647 ;; VSX Vector Extract Exponent Double and Single Precision
4648 (define_insn "xvxexp<VSs>"
4649 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4650 (unspec:VSX_F
4651 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4652 UNSPEC_VSX_VXEXP))]
4653 "TARGET_P9_VECTOR"
4654 "xvxexp<VSs> %x0,%x1"
4655 [(set_attr "type" "vecsimple")])
4656
4657 ;; VSX Vector Extract Significand Double and Single Precision
4658 (define_insn "xvxsig<VSs>"
4659 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4660 (unspec:VSX_F
4661 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4662 UNSPEC_VSX_VXSIG))]
4663 "TARGET_P9_VECTOR"
4664 "xvxsig<VSs> %x0,%x1"
4665 [(set_attr "type" "vecsimple")])
4666
4667 ;; VSX Vector Insert Exponent Double and Single Precision
4668 (define_insn "xviexp<VSs>"
4669 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4670 (unspec:VSX_F
4671 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4672 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4673 UNSPEC_VSX_VIEXP))]
4674 "TARGET_P9_VECTOR"
4675 "xviexp<VSs> %x0,%x1,%x2"
4676 [(set_attr "type" "vecsimple")])
4677
4678 ;; VSX Vector Test Data Class Double and Single Precision
4679 ;; The corresponding elements of the result vector are all ones
4680 ;; if any of the conditions tested by operand 3 are satisfied.
4681 (define_insn "xvtstdc<VSs>"
4682 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4683 (unspec:<VSI>
4684 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4685 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4686 UNSPEC_VSX_VTSTDC))]
4687 "TARGET_P9_VECTOR"
4688 "xvtstdc<VSs> %x0,%x1,%2"
4689 [(set_attr "type" "vecsimple")])
4690
4691 ;; ISA 3.0 String Operations Support
4692
4693 ;; Compare vectors producing a vector result and a predicate, setting CR6
4694 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4695 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4696 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4697 ;; to use Power8 instructions.
4698 (define_insn "*vsx_ne_<mode>_p"
4699 [(set (reg:CC CR6_REGNO)
4700 (unspec:CC
4701 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4702 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4703 UNSPEC_PREDICATE))
4704 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4705 (ne:VSX_EXTRACT_I (match_dup 1)
4706 (match_dup 2)))]
4707 "TARGET_P9_VECTOR"
4708 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4709 [(set_attr "type" "vecsimple")])
4710
4711 (define_insn "*vector_nez_<mode>_p"
4712 [(set (reg:CC CR6_REGNO)
4713 (unspec:CC [(unspec:VI
4714 [(match_operand:VI 1 "gpc_reg_operand" "v")
4715 (match_operand:VI 2 "gpc_reg_operand" "v")]
4716 UNSPEC_NEZ_P)]
4717 UNSPEC_PREDICATE))
4718 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4719 (unspec:VI [(match_dup 1)
4720 (match_dup 2)]
4721 UNSPEC_NEZ_P))]
4722 "TARGET_P9_VECTOR"
4723 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4724 [(set_attr "type" "vecsimple")])
4725
4726 ;; Return first position of match between vectors using natural order
4727 ;; for both LE and BE execution modes.
4728 (define_expand "first_match_index_<mode>"
4729 [(match_operand:SI 0 "register_operand")
4730 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4731 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4732 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4733 "TARGET_P9_VECTOR"
4734 {
4735 int sh;
4736
4737 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4738 rtx not_result = gen_reg_rtx (<MODE>mode);
4739
4740 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4741 operands[2]));
4742 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4743
4744 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4745
4746 if (<MODE>mode == V16QImode)
4747 {
4748 if (!BYTES_BIG_ENDIAN)
4749 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4750 else
4751 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4752 }
4753 else
4754 {
4755 rtx tmp = gen_reg_rtx (SImode);
4756 if (!BYTES_BIG_ENDIAN)
4757 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4758 else
4759 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4760 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4761 }
4762 DONE;
4763 })
4764
4765 ;; Return first position of match between vectors or end of string (EOS) using
4766 ;; natural element order for both LE and BE execution modes.
4767 (define_expand "first_match_or_eos_index_<mode>"
4768 [(match_operand:SI 0 "register_operand")
4769 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4770 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4771 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4772 "TARGET_P9_VECTOR"
4773 {
4774 int sh;
4775 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4776 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4777 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4778 rtx and_result = gen_reg_rtx (<MODE>mode);
4779 rtx result = gen_reg_rtx (<MODE>mode);
4780 rtx vzero = gen_reg_rtx (<MODE>mode);
4781
4782 /* Vector with zeros in elements that correspond to zeros in operands. */
4783 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4784 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4785 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4786 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4787
4788 /* Vector with ones in elments that do not match. */
4789 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4790 operands[2]));
4791
4792 /* Create vector with ones in elements where there was a zero in one of
4793 the source elements or the elements that match. */
4794 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4795 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4796
4797 if (<MODE>mode == V16QImode)
4798 {
4799 if (!BYTES_BIG_ENDIAN)
4800 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4801 else
4802 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4803 }
4804 else
4805 {
4806 rtx tmp = gen_reg_rtx (SImode);
4807 if (!BYTES_BIG_ENDIAN)
4808 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4809 else
4810 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4811 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4812 }
4813 DONE;
4814 })
4815
4816 ;; Return first position of mismatch between vectors using natural
4817 ;; element order for both LE and BE execution modes.
4818 (define_expand "first_mismatch_index_<mode>"
4819 [(match_operand:SI 0 "register_operand")
4820 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4821 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4822 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4823 "TARGET_P9_VECTOR"
4824 {
4825 int sh;
4826 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4827
4828 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4829 operands[2]));
4830 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4831
4832 if (<MODE>mode == V16QImode)
4833 {
4834 if (!BYTES_BIG_ENDIAN)
4835 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4836 else
4837 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4838 }
4839 else
4840 {
4841 rtx tmp = gen_reg_rtx (SImode);
4842 if (!BYTES_BIG_ENDIAN)
4843 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4844 else
4845 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4846 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4847 }
4848 DONE;
4849 })
4850
4851 ;; Return first position of mismatch between vectors or end of string (EOS)
4852 ;; using natural element order for both LE and BE execution modes.
4853 (define_expand "first_mismatch_or_eos_index_<mode>"
4854 [(match_operand:SI 0 "register_operand")
4855 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4856 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4857 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4858 "TARGET_P9_VECTOR"
4859 {
4860 int sh;
4861 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4862 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4863 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4864 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4865 rtx and_result = gen_reg_rtx (<MODE>mode);
4866 rtx result = gen_reg_rtx (<MODE>mode);
4867 rtx vzero = gen_reg_rtx (<MODE>mode);
4868
4869 /* Vector with zeros in elements that correspond to zeros in operands. */
4870 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4871
4872 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4873 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4874 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4875
4876 /* Vector with ones in elments that match. */
4877 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4878 operands[2]));
4879 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4880
4881 /* Create vector with ones in elements where there was a zero in one of
4882 the source elements or the elements did not match. */
4883 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4884 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4885
4886 if (<MODE>mode == V16QImode)
4887 {
4888 if (!BYTES_BIG_ENDIAN)
4889 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4890 else
4891 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4892 }
4893 else
4894 {
4895 rtx tmp = gen_reg_rtx (SImode);
4896 if (!BYTES_BIG_ENDIAN)
4897 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4898 else
4899 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4900 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4901 }
4902 DONE;
4903 })
4904
4905 ;; Load VSX Vector with Length
4906 (define_expand "lxvl"
4907 [(set (match_dup 3)
4908 (ashift:DI (match_operand:DI 2 "register_operand")
4909 (const_int 56)))
4910 (set (match_operand:V16QI 0 "vsx_register_operand")
4911 (unspec:V16QI
4912 [(match_operand:DI 1 "gpc_reg_operand")
4913 (mem:V16QI (match_dup 1))
4914 (match_dup 3)]
4915 UNSPEC_LXVL))]
4916 "TARGET_P9_VECTOR && TARGET_64BIT"
4917 {
4918 operands[3] = gen_reg_rtx (DImode);
4919 })
4920
4921 (define_insn "*lxvl"
4922 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4923 (unspec:V16QI
4924 [(match_operand:DI 1 "gpc_reg_operand" "b")
4925 (mem:V16QI (match_dup 1))
4926 (match_operand:DI 2 "register_operand" "r")]
4927 UNSPEC_LXVL))]
4928 "TARGET_P9_VECTOR && TARGET_64BIT"
4929 "lxvl %x0,%1,%2"
4930 [(set_attr "type" "vecload")])
4931
4932 (define_insn "lxvll"
4933 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4934 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4935 (mem:V16QI (match_dup 1))
4936 (match_operand:DI 2 "register_operand" "r")]
4937 UNSPEC_LXVLL))]
4938 "TARGET_P9_VECTOR"
4939 "lxvll %x0,%1,%2"
4940 [(set_attr "type" "vecload")])
4941
4942 ;; Expand for builtin xl_len_r
4943 (define_expand "xl_len_r"
4944 [(match_operand:V16QI 0 "vsx_register_operand")
4945 (match_operand:DI 1 "register_operand")
4946 (match_operand:DI 2 "register_operand")]
4947 ""
4948 {
4949 rtx shift_mask = gen_reg_rtx (V16QImode);
4950 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4951 rtx tmp = gen_reg_rtx (DImode);
4952
4953 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4954 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4955 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4956 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4957 shift_mask));
4958 DONE;
4959 })
4960
4961 (define_insn "stxvll"
4962 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4963 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4964 (mem:V16QI (match_dup 1))
4965 (match_operand:DI 2 "register_operand" "r")]
4966 UNSPEC_STXVLL))]
4967 "TARGET_P9_VECTOR"
4968 "stxvll %x0,%1,%2"
4969 [(set_attr "type" "vecstore")])
4970
4971 ;; Store VSX Vector with Length
4972 (define_expand "stxvl"
4973 [(set (match_dup 3)
4974 (ashift:DI (match_operand:DI 2 "register_operand")
4975 (const_int 56)))
4976 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4977 (unspec:V16QI
4978 [(match_operand:V16QI 0 "vsx_register_operand")
4979 (mem:V16QI (match_dup 1))
4980 (match_dup 3)]
4981 UNSPEC_STXVL))]
4982 "TARGET_P9_VECTOR && TARGET_64BIT"
4983 {
4984 operands[3] = gen_reg_rtx (DImode);
4985 })
4986
4987 (define_insn "*stxvl"
4988 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4989 (unspec:V16QI
4990 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4991 (mem:V16QI (match_dup 1))
4992 (match_operand:DI 2 "register_operand" "r")]
4993 UNSPEC_STXVL))]
4994 "TARGET_P9_VECTOR && TARGET_64BIT"
4995 "stxvl %x0,%1,%2"
4996 [(set_attr "type" "vecstore")])
4997
4998 ;; Expand for builtin xst_len_r
4999 (define_expand "xst_len_r"
5000 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5001 (match_operand:DI 1 "register_operand" "b")
5002 (match_operand:DI 2 "register_operand" "r")]
5003 "UNSPEC_XST_LEN_R"
5004 {
5005 rtx shift_mask = gen_reg_rtx (V16QImode);
5006 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5007 rtx tmp = gen_reg_rtx (DImode);
5008
5009 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5010 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5011 shift_mask));
5012 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5013 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5014 DONE;
5015 })
5016
5017 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5018 (define_insn "vcmpneb"
5019 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5020 (not:V16QI
5021 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5022 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5023 "TARGET_P9_VECTOR"
5024 "vcmpneb %0,%1,%2"
5025 [(set_attr "type" "vecsimple")])
5026
5027 ;; Vector Compare Not Equal or Zero Byte
5028 (define_insn "vcmpnezb"
5029 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5030 (unspec:V16QI
5031 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5032 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5033 UNSPEC_VCMPNEZB))]
5034 "TARGET_P9_VECTOR"
5035 "vcmpnezb %0,%1,%2"
5036 [(set_attr "type" "vecsimple")])
5037
5038 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5039 (define_insn "vcmpneh"
5040 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5041 (not:V8HI
5042 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5043 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5044 "TARGET_P9_VECTOR"
5045 "vcmpneh %0,%1,%2"
5046 [(set_attr "type" "vecsimple")])
5047
5048 ;; Vector Compare Not Equal or Zero Half Word
5049 (define_insn "vcmpnezh"
5050 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5051 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5052 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5053 UNSPEC_VCMPNEZH))]
5054 "TARGET_P9_VECTOR"
5055 "vcmpnezh %0,%1,%2"
5056 [(set_attr "type" "vecsimple")])
5057
5058 ;; Vector Compare Not Equal Word (specified/not+eq:)
5059 (define_insn "vcmpnew"
5060 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5061 (not:V4SI
5062 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5063 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5064 "TARGET_P9_VECTOR"
5065 "vcmpnew %0,%1,%2"
5066 [(set_attr "type" "vecsimple")])
5067
5068 ;; Vector Compare Not Equal or Zero Word
5069 (define_insn "vcmpnezw"
5070 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5071 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5072 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5073 UNSPEC_VCMPNEZW))]
5074 "TARGET_P9_VECTOR"
5075 "vcmpnezw %0,%1,%2"
5076 [(set_attr "type" "vecsimple")])
5077
5078 ;; Vector Count Leading Zero Least-Significant Bits Byte
5079 (define_insn "vclzlsbb_<mode>"
5080 [(set (match_operand:SI 0 "register_operand" "=r")
5081 (unspec:SI
5082 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5083 UNSPEC_VCLZLSBB))]
5084 "TARGET_P9_VECTOR"
5085 "vclzlsbb %0,%1"
5086 [(set_attr "type" "vecsimple")])
5087
5088 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5089 (define_insn "vctzlsbb_<mode>"
5090 [(set (match_operand:SI 0 "register_operand" "=r")
5091 (unspec:SI
5092 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5093 UNSPEC_VCTZLSBB))]
5094 "TARGET_P9_VECTOR"
5095 "vctzlsbb %0,%1"
5096 [(set_attr "type" "vecsimple")])
5097
5098 ;; Vector Extract Unsigned Byte Left-Indexed
5099 (define_insn "vextublx"
5100 [(set (match_operand:SI 0 "register_operand" "=r")
5101 (unspec:SI
5102 [(match_operand:SI 1 "register_operand" "r")
5103 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5104 UNSPEC_VEXTUBLX))]
5105 "TARGET_P9_VECTOR"
5106 "vextublx %0,%1,%2"
5107 [(set_attr "type" "vecsimple")])
5108
5109 ;; Vector Extract Unsigned Byte Right-Indexed
5110 (define_insn "vextubrx"
5111 [(set (match_operand:SI 0 "register_operand" "=r")
5112 (unspec:SI
5113 [(match_operand:SI 1 "register_operand" "r")
5114 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5115 UNSPEC_VEXTUBRX))]
5116 "TARGET_P9_VECTOR"
5117 "vextubrx %0,%1,%2"
5118 [(set_attr "type" "vecsimple")])
5119
5120 ;; Vector Extract Unsigned Half Word Left-Indexed
5121 (define_insn "vextuhlx"
5122 [(set (match_operand:SI 0 "register_operand" "=r")
5123 (unspec:SI
5124 [(match_operand:SI 1 "register_operand" "r")
5125 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5126 UNSPEC_VEXTUHLX))]
5127 "TARGET_P9_VECTOR"
5128 "vextuhlx %0,%1,%2"
5129 [(set_attr "type" "vecsimple")])
5130
5131 ;; Vector Extract Unsigned Half Word Right-Indexed
5132 (define_insn "vextuhrx"
5133 [(set (match_operand:SI 0 "register_operand" "=r")
5134 (unspec:SI
5135 [(match_operand:SI 1 "register_operand" "r")
5136 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5137 UNSPEC_VEXTUHRX))]
5138 "TARGET_P9_VECTOR"
5139 "vextuhrx %0,%1,%2"
5140 [(set_attr "type" "vecsimple")])
5141
5142 ;; Vector Extract Unsigned Word Left-Indexed
5143 (define_insn "vextuwlx"
5144 [(set (match_operand:SI 0 "register_operand" "=r")
5145 (unspec:SI
5146 [(match_operand:SI 1 "register_operand" "r")
5147 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5148 UNSPEC_VEXTUWLX))]
5149 "TARGET_P9_VECTOR"
5150 "vextuwlx %0,%1,%2"
5151 [(set_attr "type" "vecsimple")])
5152
5153 ;; Vector Extract Unsigned Word Right-Indexed
5154 (define_insn "vextuwrx"
5155 [(set (match_operand:SI 0 "register_operand" "=r")
5156 (unspec:SI
5157 [(match_operand:SI 1 "register_operand" "r")
5158 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5159 UNSPEC_VEXTUWRX))]
5160 "TARGET_P9_VECTOR"
5161 "vextuwrx %0,%1,%2"
5162 [(set_attr "type" "vecsimple")])
5163
5164 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5165 ;; endian version needs to adjust the byte number, and the V4SI element in
5166 ;; vinsert4b.
5167 (define_insn "extract4b"
5168 [(set (match_operand:V2DI 0 "vsx_register_operand")
5169 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5170 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5171 UNSPEC_XXEXTRACTUW))]
5172 "TARGET_P9_VECTOR"
5173 {
5174 if (!BYTES_BIG_ENDIAN)
5175 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5176
5177 return "xxextractuw %x0,%x1,%2";
5178 })
5179
5180 (define_expand "insert4b"
5181 [(set (match_operand:V16QI 0 "vsx_register_operand")
5182 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5183 (match_operand:V16QI 2 "vsx_register_operand")
5184 (match_operand:QI 3 "const_0_to_12_operand")]
5185 UNSPEC_XXINSERTW))]
5186 "TARGET_P9_VECTOR"
5187 {
5188 if (!BYTES_BIG_ENDIAN)
5189 {
5190 rtx op1 = operands[1];
5191 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5192 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5193 operands[1] = v4si_tmp;
5194 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5195 }
5196 })
5197
5198 (define_insn "*insert4b_internal"
5199 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5200 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5201 (match_operand:V16QI 2 "vsx_register_operand" "0")
5202 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5203 UNSPEC_XXINSERTW))]
5204 "TARGET_P9_VECTOR"
5205 "xxinsertw %x0,%x1,%3"
5206 [(set_attr "type" "vecperm")])
5207
5208
5209 ;; Generate vector extract four float 32 values from left four elements
5210 ;; of eight element vector of float 16 values.
5211 (define_expand "vextract_fp_from_shorth"
5212 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5213 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5214 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5215 "TARGET_P9_VECTOR"
5216 {
5217 int i;
5218 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5219 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5220
5221 rtx rvals[16];
5222 rtx mask = gen_reg_rtx (V16QImode);
5223 rtx tmp = gen_reg_rtx (V16QImode);
5224 rtvec v;
5225
5226 for (i = 0; i < 16; i++)
5227 if (!BYTES_BIG_ENDIAN)
5228 rvals[i] = GEN_INT (vals_le[i]);
5229 else
5230 rvals[i] = GEN_INT (vals_be[i]);
5231
5232 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5233 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5234 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5235 conversion instruction. */
5236 v = gen_rtvec_v (16, rvals);
5237 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5238 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5239 operands[1], mask));
5240 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5241 DONE;
5242 })
5243
5244 ;; Generate vector extract four float 32 values from right four elements
5245 ;; of eight element vector of float 16 values.
5246 (define_expand "vextract_fp_from_shortl"
5247 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5248 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5249 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5250 "TARGET_P9_VECTOR"
5251 {
5252 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5253 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5254
5255 int i;
5256 rtx rvals[16];
5257 rtx mask = gen_reg_rtx (V16QImode);
5258 rtx tmp = gen_reg_rtx (V16QImode);
5259 rtvec v;
5260
5261 for (i = 0; i < 16; i++)
5262 if (!BYTES_BIG_ENDIAN)
5263 rvals[i] = GEN_INT (vals_le[i]);
5264 else
5265 rvals[i] = GEN_INT (vals_be[i]);
5266
5267 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5268 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5269 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5270 conversion instruction. */
5271 v = gen_rtvec_v (16, rvals);
5272 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5273 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5274 operands[1], mask));
5275 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5276 DONE;
5277 })
5278
5279 ;; Support for ISA 3.0 vector byte reverse
5280
5281 ;; Swap all bytes with in a vector
5282 (define_insn "p9_xxbrq_v1ti"
5283 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5284 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5285 "TARGET_P9_VECTOR"
5286 "xxbrq %x0,%x1"
5287 [(set_attr "type" "vecperm")])
5288
5289 (define_expand "p9_xxbrq_v16qi"
5290 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5291 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5292 "TARGET_P9_VECTOR"
5293 {
5294 rtx op0 = gen_reg_rtx (V1TImode);
5295 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5296 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5297 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5298 DONE;
5299 })
5300
5301 ;; Swap all bytes in each 64-bit element
5302 (define_insn "p9_xxbrd_v2di"
5303 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5304 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5305 "TARGET_P9_VECTOR"
5306 "xxbrd %x0,%x1"
5307 [(set_attr "type" "vecperm")])
5308
5309 (define_expand "p9_xxbrd_v2df"
5310 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5311 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5312 "TARGET_P9_VECTOR"
5313 {
5314 rtx op0 = gen_reg_rtx (V2DImode);
5315 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5316 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5317 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5318 DONE;
5319 })
5320
5321 ;; Swap all bytes in each 32-bit element
5322 (define_insn "p9_xxbrw_v4si"
5323 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5324 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5325 "TARGET_P9_VECTOR"
5326 "xxbrw %x0,%x1"
5327 [(set_attr "type" "vecperm")])
5328
5329 (define_expand "p9_xxbrw_v4sf"
5330 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5331 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5332 "TARGET_P9_VECTOR"
5333 {
5334 rtx op0 = gen_reg_rtx (V4SImode);
5335 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5336 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5337 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5338 DONE;
5339 })
5340
5341 ;; Swap all bytes in each element of vector
5342 (define_expand "revb_<mode>"
5343 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5344 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5345 ""
5346 {
5347 if (TARGET_P9_VECTOR)
5348 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5349 else
5350 {
5351 /* Want to have the elements in reverse order relative
5352 to the endian mode in use, i.e. in LE mode, put elements
5353 in BE order. */
5354 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5355 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5356 operands[1], sel));
5357 }
5358
5359 DONE;
5360 })
5361
5362 ;; Reversing bytes in vector char is just a NOP.
5363 (define_expand "revb_v16qi"
5364 [(set (match_operand:V16QI 0 "vsx_register_operand")
5365 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5366 ""
5367 {
5368 emit_move_insn (operands[0], operands[1]);
5369 DONE;
5370 })
5371
5372 ;; Swap all bytes in each 16-bit element
5373 (define_insn "p9_xxbrh_v8hi"
5374 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5375 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5376 "TARGET_P9_VECTOR"
5377 "xxbrh %x0,%x1"
5378 [(set_attr "type" "vecperm")])
5379 \f
5380
5381 ;; Operand numbers for the following peephole2
5382 (define_constants
5383 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5384 (SFBOOL_TMP_VSX 1) ;; vector temporary
5385 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5386 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5387 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5388 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5389 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5390 (SFBOOL_SHL_D 7) ;; shift left dest
5391 (SFBOOL_SHL_A 8) ;; shift left arg
5392 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5393 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5394 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5395 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5396 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5397
5398 ;; Attempt to optimize some common GLIBC operations using logical operations to
5399 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5400 ;; after macro expansion that looks like:
5401 ;;
5402 ;; typedef union {
5403 ;; float value;
5404 ;; uint32_t word;
5405 ;; } ieee_float_shape_type;
5406 ;;
5407 ;; float t1;
5408 ;; int32_t is;
5409 ;;
5410 ;; do {
5411 ;; ieee_float_shape_type gf_u;
5412 ;; gf_u.value = (t1);
5413 ;; (is) = gf_u.word;
5414 ;; } while (0);
5415 ;;
5416 ;; do {
5417 ;; ieee_float_shape_type sf_u;
5418 ;; sf_u.word = (is & 0xfffff000);
5419 ;; (t1) = sf_u.value;
5420 ;; } while (0);
5421 ;;
5422 ;;
5423 ;; This would result in two direct move operations (convert to memory format,
5424 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5425 ;; scalar format). With this peephole, we eliminate the direct move to the
5426 ;; GPR, and instead move the integer mask value to the vector register after a
5427 ;; shift and do the VSX logical operation.
5428
5429 ;; The insns for dealing with SFmode in GPR registers looks like:
5430 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5431 ;;
5432 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5433 ;;
5434 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5435 ;;
5436 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5437 ;;
5438 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5439 ;;
5440 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5441
5442 (define_peephole2
5443 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5444 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5445
5446 ;; MFVSRWZ (aka zero_extend)
5447 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5448 (zero_extend:DI
5449 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5450
5451 ;; AND/IOR/XOR operation on int
5452 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5453 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5454 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5455
5456 ;; SLDI
5457 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5458 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5459 (const_int 32)))
5460
5461 ;; MTVSRD
5462 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5463 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5464
5465 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5466 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5467 to compare registers, when the mode is different. */
5468 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5469 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5470 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5471 && (REG_P (operands[SFBOOL_BOOL_A2])
5472 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5473 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5474 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5475 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5476 || (REG_P (operands[SFBOOL_BOOL_A2])
5477 && REGNO (operands[SFBOOL_MFVSR_D])
5478 == REGNO (operands[SFBOOL_BOOL_A2])))
5479 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5480 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5481 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5482 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5483 [(set (match_dup SFBOOL_TMP_GPR)
5484 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5485 (const_int 32)))
5486
5487 (set (match_dup SFBOOL_TMP_VSX_DI)
5488 (match_dup SFBOOL_TMP_GPR))
5489
5490 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5491 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5492 (match_dup SFBOOL_TMP_VSX)))]
5493 {
5494 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5495 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5496 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5497 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5498 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5499 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5500
5501 if (CONST_INT_P (bool_a2))
5502 {
5503 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5504 emit_move_insn (tmp_gpr, bool_a2);
5505 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5506 }
5507 else
5508 {
5509 int regno_bool_a1 = REGNO (bool_a1);
5510 int regno_bool_a2 = REGNO (bool_a2);
5511 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5512 ? regno_bool_a2 : regno_bool_a1);
5513 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5514 }
5515
5516 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5517 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5518 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5519 })