git.libre-soc.org Git - gcc.git/blob

1 ;; VSX patterns.

3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>

5 ;; This file is part of GCC.

7 ;; GCC is free software; you can redistribute it and/or modify it

8 ;; under the terms of the GNU General Public License as published

9 ;; by the Free Software Foundation; either version 3, or (at your

10 ;; option) any later version.

12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT

13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public

15 ;; License for more details.

17 ;; You should have received a copy of the GNU General Public License

18 ;; along with GCC; see the file COPYING3. If not see

19 ;; <http://www.gnu.org/licenses/>.

21 ;; Iterator for comparison types

22 (define_code_iterator CMP_TEST [eq lt gt unordered])

24 ;; Mode attribute for vector floate and floato conversions

25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])

27 ;; Iterator for both scalar and vector floating point types supported by VSX

28 (define_mode_iterator VSX_B [DF V4SF V2DF])

30 ;; Iterator for the 2 64-bit vector types

31 (define_mode_iterator VSX_D [V2DF V2DI])

33 ;; Mode iterator to handle swapping words on little endian for the 128-bit

34 ;; types that goes in a single vector register.

35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")

36 (TF "FLOAT128_VECTOR_P (TFmode)")

37 TI

38 V1TI])

40 ;; Iterator for 128-bit integer types that go in a single vector register.

41 (define_mode_iterator VSX_TI [TI V1TI])

43 ;; Iterator for the 2 32-bit vector types

44 (define_mode_iterator VSX_W [V4SF V4SI])

46 ;; Iterator for the DF types

47 (define_mode_iterator VSX_DF [V2DF DF])

49 ;; Iterator for vector floating point types supported by VSX

50 (define_mode_iterator VSX_F [V4SF V2DF])

52 ;; Iterator for logical types supported by VSX

53 (define_mode_iterator VSX_L [V16QI

54 V8HI

55 V4SI

56 V2DI

57 V4SF

58 V2DF

59 V1TI

60 TI

61 (KF "FLOAT128_VECTOR_P (KFmode)")

62 (TF "FLOAT128_VECTOR_P (TFmode)")])

64 ;; Iterator for memory moves.

65 (define_mode_iterator VSX_M [V16QI

66 V8HI

67 V4SI

68 V2DI

69 V4SF

70 V2DF

71 V1TI

72 (KF "FLOAT128_VECTOR_P (KFmode)")

73 (TF "FLOAT128_VECTOR_P (TFmode)")

74 TI])

76 (define_mode_attr VSX_XXBR [(V8HI "h")

77 (V4SI "w")

78 (V4SF "w")

79 (V2DF "d")

80 (V2DI "d")

81 (V1TI "q")])

83 ;; Map into the appropriate load/store name based on the type

84 (define_mode_attr VSm [(V16QI "vw4")

85 (V8HI "vw4")

86 (V4SI "vw4")

87 (V4SF "vw4")

88 (V2DF "vd2")

89 (V2DI "vd2")

90 (DF "d")

91 (TF "vd2")

92 (KF "vd2")

93 (V1TI "vd2")

94 (TI "vd2")])

96 ;; Map into the appropriate suffix based on the type

97 (define_mode_attr VSs [(V16QI "sp")

98 (V8HI "sp")

99 (V4SI "sp")

100 (V4SF "sp")

101 (V2DF "dp")

102 (V2DI "dp")

103 (DF "dp")

104 (SF "sp")

105 (TF "dp")

106 (KF "dp")

107 (V1TI "dp")

108 (TI "dp")])

109

110 ;; Map the register class used

111 (define_mode_attr VSr [(V16QI "v")

112 (V8HI "v")

113 (V4SI "v")

114 (V4SF "wf")

115 (V2DI "wd")

116 (V2DF "wd")

117 (DI "wi")

118 (DF "ws")

119 (SF "ww")

120 (TF "wp")

121 (KF "wq")

122 (V1TI "v")

123 (TI "wt")])

124

125 ;; Map the register class used for float<->int conversions (floating point side)

126 ;; VSr2 is the preferred register class, VSr3 is any register class that will

127 ;; hold the data

128 (define_mode_attr VSr2 [(V2DF "wd")

129 (V4SF "wf")

130 (DF "ws")

131 (SF "ww")

132 (DI "wi")

133 (KF "wq")

134 (TF "wp")])

135

136 (define_mode_attr VSr3 [(V2DF "wa")

137 (V4SF "wa")

138 (DF "ws")

139 (SF "ww")

140 (DI "wi")

141 (KF "wq")

142 (TF "wp")])

143

144 ;; Map the register class for sp<->dp float conversions, destination

145 (define_mode_attr VSr4 [(SF "ws")

146 (DF "f")

147 (V2DF "wd")

148 (V4SF "v")])

149

150 ;; Map the register class for sp<->dp float conversions, source

151 (define_mode_attr VSr5 [(SF "ws")

152 (DF "f")

153 (V2DF "v")

154 (V4SF "wd")])

155

156 ;; The VSX register class that a type can occupy, even if it is not the

157 ;; preferred register class (VSr is the preferred register class that will get

158 ;; allocated first).

159 (define_mode_attr VSa [(V16QI "wa")

160 (V8HI "wa")

161 (V4SI "wa")

162 (V4SF "wa")

163 (V2DI "wa")

164 (V2DF "wa")

165 (DI "wi")

166 (DF "ws")

167 (SF "ww")

168 (V1TI "wa")

169 (TI "wt")

170 (TF "wp")

171 (KF "wq")])

172

173 ;; A mode attribute to disparage use of GPR registers, except for scalar

174 ;; integer modes.

175 (define_mode_attr ??r [(V16QI "??r")

176 (V8HI "??r")

177 (V4SI "??r")

178 (V4SF "??r")

179 (V2DI "??r")

180 (V2DF "??r")

181 (V1TI "??r")

182 (KF "??r")

183 (TF "??r")

184 (TI "r")])

185

186 ;; Same size integer type for floating point data

187 (define_mode_attr VSi [(V4SF "v4si")

188 (V2DF "v2di")

189 (DF "di")])

190

191 (define_mode_attr VSI [(V4SF "V4SI")

192 (V2DF "V2DI")

193 (DF "DI")])

194

195 ;; Word size for same size conversion

196 (define_mode_attr VSc [(V4SF "w")

197 (V2DF "d")

198 (DF "d")])

199

200 ;; Map into either s or v, depending on whether this is a scalar or vector

201 ;; operation

202 (define_mode_attr VSv [(V16QI "v")

203 (V8HI "v")

204 (V4SI "v")

205 (V4SF "v")

206 (V2DI "v")

207 (V2DF "v")

208 (V1TI "v")

209 (DF "s")

210 (KF "v")])

211

212 ;; Appropriate type for add ops (and other simple FP ops)

213 (define_mode_attr VStype_simple [(V2DF "vecdouble")

214 (V4SF "vecfloat")

215 (DF "fp")])

216

217 ;; Appropriate type for multiply ops

218 (define_mode_attr VStype_mul [(V2DF "vecdouble")

219 (V4SF "vecfloat")

220 (DF "dmul")])

221

222 ;; Appropriate type for divide ops.

223 (define_mode_attr VStype_div [(V2DF "vecdiv")

224 (V4SF "vecfdiv")

225 (DF "ddiv")])

226

227 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with

228 ;; the scalar sqrt

229 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")

230 (V4SF "ssqrt")

231 (DF "dsqrt")])

232

233 ;; Iterator and modes for sp<->dp conversions

234 ;; Because scalar SF values are represented internally as double, use the

235 ;; V4SF type to represent this than SF.

236 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])

237

238 (define_mode_attr VS_spdp_res [(DF "V4SF")

239 (V4SF "V2DF")

240 (V2DF "V4SF")])

241

242 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")

243 (V4SF "xvcvspdp")

244 (V2DF "xvcvdpsp")])

245

246 (define_mode_attr VS_spdp_type [(DF "fp")

247 (V4SF "vecdouble")

248 (V2DF "vecdouble")])

249

250 ;; Map the scalar mode for a vector type

251 (define_mode_attr VS_scalar [(V1TI "TI")

252 (V2DF "DF")

253 (V2DI "DI")

254 (V4SF "SF")

255 (V4SI "SI")

256 (V8HI "HI")

257 (V16QI "QI")])

258

259 ;; Map to a double-sized vector mode

260 (define_mode_attr VS_double [(V4SI "V8SI")

261 (V4SF "V8SF")

262 (V2DI "V4DI")

263 (V2DF "V4DF")

264 (V1TI "V2TI")])

265

266 ;; Map register class for 64-bit element in 128-bit vector for direct moves

267 ;; to/from gprs

268 (define_mode_attr VS_64dm [(V2DF "wk")

269 (V2DI "wj")])

270

271 ;; Map register class for 64-bit element in 128-bit vector for normal register

272 ;; to register moves

273 (define_mode_attr VS_64reg [(V2DF "ws")

274 (V2DI "wi")])

275

276 ;; Iterators for loading constants with xxspltib

277 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])

278 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])

279

280 ;; Vector reverse byte modes

281 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])

282

283 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.

284 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be

285 ;; done on ISA 2.07 and not just ISA 3.0.

286 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])

287 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])

288

289 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")

290 (V8HI "h")

291 (V4SI "w")])

292

293 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and

294 ;; insert to validate the operand number.

295 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")

296 (V8HI "const_0_to_7_operand")

297 (V4SI "const_0_to_3_operand")])

298

299 ;; Mode attribute to give the constraint for vector extract and insert

300 ;; operations.

301 (define_mode_attr VSX_EX [(V16QI "v")

302 (V8HI "v")

303 (V4SI "wa")])

304

305 ;; Mode iterator for binary floating types other than double to

306 ;; optimize convert to that floating point type from an extract

307 ;; of an integer type

308 (define_mode_iterator VSX_EXTRACT_FL [SF

309 (IF "FLOAT128_2REG_P (IFmode)")

310 (KF "TARGET_FLOAT128_HW")

311 (TF "FLOAT128_2REG_P (TFmode)

312 || (FLOAT128_IEEE_P (TFmode)

313 && TARGET_FLOAT128_HW)")])

314

315 ;; Mode iterator for binary floating types that have a direct conversion

316 ;; from 64-bit integer to floating point

317 (define_mode_iterator FL_CONV [SF

318 DF

319 (KF "TARGET_FLOAT128_HW")

320 (TF "TARGET_FLOAT128_HW

321 && FLOAT128_IEEE_P (TFmode)")])

322

323 ;; Iterator for the 2 short vector types to do a splat from an integer

324 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])

325

326 ;; Mode attribute to give the count for the splat instruction to splat

327 ;; the value in the 64-bit integer slot

328 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])

329

330 ;; Mode attribute to give the suffix for the splat instruction

331 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])

332

333 ;; Constants for creating unspecs

334 (define_c_enum "unspec"

335 [UNSPEC_VSX_CONCAT

336 UNSPEC_VSX_CVDPSXWS

337 UNSPEC_VSX_CVDPUXWS

338 UNSPEC_VSX_CVSPDP

339 UNSPEC_VSX_CVHPSP

340 UNSPEC_VSX_CVSPDPN

341 UNSPEC_VSX_CVDPSPN

342 UNSPEC_VSX_CVSXWDP

343 UNSPEC_VSX_CVUXWDP

344 UNSPEC_VSX_CVSXDSP

345 UNSPEC_VSX_CVUXDSP

346 UNSPEC_VSX_CVSPSXDS

347 UNSPEC_VSX_CVSPUXDS

348 UNSPEC_VSX_CVSXWSP

349 UNSPEC_VSX_CVUXWSP

350 UNSPEC_VSX_FLOAT2

351 UNSPEC_VSX_UNS_FLOAT2

352 UNSPEC_VSX_FLOATE

353 UNSPEC_VSX_UNS_FLOATE

354 UNSPEC_VSX_FLOATO

355 UNSPEC_VSX_UNS_FLOATO

356 UNSPEC_VSX_TDIV

357 UNSPEC_VSX_TSQRT

358 UNSPEC_VSX_SET

359 UNSPEC_VSX_ROUND_I

360 UNSPEC_VSX_ROUND_IC

361 UNSPEC_VSX_SLDWI

362 UNSPEC_VSX_XXPERM

363

364 UNSPEC_VSX_XXSPLTW

365 UNSPEC_VSX_XXSPLTD

366 UNSPEC_VSX_DIVSD

367 UNSPEC_VSX_DIVUD

368 UNSPEC_VSX_MULSD

369 UNSPEC_VSX_XVCVSXDDP

370 UNSPEC_VSX_XVCVUXDDP

371 UNSPEC_VSX_XVCVDPSXDS

372 UNSPEC_VSX_XVCDPSP

373 UNSPEC_VSX_XVCVDPUXDS

374 UNSPEC_VSX_SIGN_EXTEND

375 UNSPEC_VSX_XVCVSPSXWS

376 UNSPEC_VSX_XVCVSPSXDS

377 UNSPEC_VSX_VSLO

378 UNSPEC_VSX_EXTRACT

379 UNSPEC_VSX_SXEXPDP

380 UNSPEC_VSX_SXSIG

381 UNSPEC_VSX_SIEXPDP

382 UNSPEC_VSX_SIEXPQP

383 UNSPEC_VSX_SCMPEXPDP

384 UNSPEC_VSX_STSTDC

385 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH

386 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL

387 UNSPEC_VSX_VXEXP

388 UNSPEC_VSX_VXSIG

389 UNSPEC_VSX_VIEXP

390 UNSPEC_VSX_VTSTDC

391 UNSPEC_VSX_VEC_INIT

392 UNSPEC_VSX_VSIGNED2

393

394 UNSPEC_LXVL

395 UNSPEC_LXVLL

396 UNSPEC_LVSL_REG

397 UNSPEC_LVSR_REG

398 UNSPEC_STXVL

399 UNSPEC_STXVLL

400 UNSPEC_XL_LEN_R

401 UNSPEC_XST_LEN_R

402

403 UNSPEC_VCLZLSBB

404 UNSPEC_VCTZLSBB

405 UNSPEC_VEXTUBLX

406 UNSPEC_VEXTUHLX

407 UNSPEC_VEXTUWLX

408 UNSPEC_VEXTUBRX

409 UNSPEC_VEXTUHRX

410 UNSPEC_VEXTUWRX

411 UNSPEC_VCMPNEB

412 UNSPEC_VCMPNEZB

413 UNSPEC_VCMPNEH

414 UNSPEC_VCMPNEZH

415 UNSPEC_VCMPNEW

416 UNSPEC_VCMPNEZW

417 UNSPEC_XXEXTRACTUW

418 UNSPEC_XXINSERTW

419 UNSPEC_VSX_FIRST_MATCH_INDEX

420 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX

421 UNSPEC_VSX_FIRST_MISMATCH_INDEX

422 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX

423 ])

424

425 ;; VSX moves

426

427 ;; The patterns for LE permuted loads and stores come before the general

428 ;; VSX moves so they match first.

429 (define_insn_and_split "*vsx_le_perm_load_<mode>"

430 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")

431 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]

432 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

433 "#"

434 "&& 1"

435 [(set (match_dup 2)

436 (vec_select:<MODE>

437 (match_dup 1)

438 (parallel [(const_int 1) (const_int 0)])))

439 (set (match_dup 0)

440 (vec_select:<MODE>

441 (match_dup 2)

442 (parallel [(const_int 1) (const_int 0)])))]

443 {

444 rtx mem = operands[1];

445

446 /* Don't apply the swap optimization if we've already performed register

447 allocation and the hard register destination is not in the altivec

448 range. */

449 if ((MEM_ALIGN (mem) >= 128)

450 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)

451 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))

452 {

453 rtx mem_address = XEXP (mem, 0);

454 enum machine_mode mode = GET_MODE (mem);

455

456 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

457 {

458 /* Replace the source memory address with masked address. */

459 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);

460 emit_insn (lvx_set_expr);

461 DONE;

462 }

463 else if (rs6000_quadword_masked_address_p (mem_address))

464 {

465 /* This rtl is already in the form that matches lvx

466 instruction, so leave it alone. */

467 DONE;

468 }

469 /* Otherwise, fall through to transform into a swapping load. */

470 }

471 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])

472 : operands[0];

473 }

474 [(set_attr "type" "vecload")

475 (set_attr "length" "8")])

476

477 (define_insn_and_split "*vsx_le_perm_load_<mode>"

478 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")

479 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]

480 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

481 "#"

482 "&& 1"

483 [(set (match_dup 2)

484 (vec_select:<MODE>

485 (match_dup 1)

486 (parallel [(const_int 2) (const_int 3)

487 (const_int 0) (const_int 1)])))

488 (set (match_dup 0)

489 (vec_select:<MODE>

490 (match_dup 2)

491 (parallel [(const_int 2) (const_int 3)

492 (const_int 0) (const_int 1)])))]

493 {

494 rtx mem = operands[1];

495

496 /* Don't apply the swap optimization if we've already performed register

497 allocation and the hard register destination is not in the altivec

498 range. */

499 if ((MEM_ALIGN (mem) >= 128)

500 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)

501 || ALTIVEC_REGNO_P (REGNO(operands[0]))))

502 {

503 rtx mem_address = XEXP (mem, 0);

504 enum machine_mode mode = GET_MODE (mem);

505

506 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

507 {

508 /* Replace the source memory address with masked address. */

509 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);

510 emit_insn (lvx_set_expr);

511 DONE;

512 }

513 else if (rs6000_quadword_masked_address_p (mem_address))

514 {

515 /* This rtl is already in the form that matches lvx

516 instruction, so leave it alone. */

517 DONE;

518 }

519 /* Otherwise, fall through to transform into a swapping load. */

520 }

521 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])

522 : operands[0];

523 }

524 [(set_attr "type" "vecload")

525 (set_attr "length" "8")])

526

527 (define_insn_and_split "*vsx_le_perm_load_v8hi"

528 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")

529 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]

530 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

531 "#"

532 "&& 1"

533 [(set (match_dup 2)

534 (vec_select:V8HI

535 (match_dup 1)

536 (parallel [(const_int 4) (const_int 5)

537 (const_int 6) (const_int 7)

538 (const_int 0) (const_int 1)

539 (const_int 2) (const_int 3)])))

540 (set (match_dup 0)

541 (vec_select:V8HI

542 (match_dup 2)

543 (parallel [(const_int 4) (const_int 5)

544 (const_int 6) (const_int 7)

545 (const_int 0) (const_int 1)

546 (const_int 2) (const_int 3)])))]

547 {

548 rtx mem = operands[1];

549

550 /* Don't apply the swap optimization if we've already performed register

551 allocation and the hard register destination is not in the altivec

552 range. */

553 if ((MEM_ALIGN (mem) >= 128)

554 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)

555 || ALTIVEC_REGNO_P (REGNO(operands[0]))))

556 {

557 rtx mem_address = XEXP (mem, 0);

558 enum machine_mode mode = GET_MODE (mem);

559

560 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

561 {

562 /* Replace the source memory address with masked address. */

563 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);

564 emit_insn (lvx_set_expr);

565 DONE;

566 }

567 else if (rs6000_quadword_masked_address_p (mem_address))

568 {

569 /* This rtl is already in the form that matches lvx

570 instruction, so leave it alone. */

571 DONE;

572 }

573 /* Otherwise, fall through to transform into a swapping load. */

574 }

575 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])

576 : operands[0];

577 }

578 [(set_attr "type" "vecload")

579 (set_attr "length" "8")])

580

581 (define_insn_and_split "*vsx_le_perm_load_v16qi"

582 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

583 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]

584 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

585 "#"

586 "&& 1"

587 [(set (match_dup 2)

588 (vec_select:V16QI

589 (match_dup 1)

590 (parallel [(const_int 8) (const_int 9)

591 (const_int 10) (const_int 11)

592 (const_int 12) (const_int 13)

593 (const_int 14) (const_int 15)

594 (const_int 0) (const_int 1)

595 (const_int 2) (const_int 3)

596 (const_int 4) (const_int 5)

597 (const_int 6) (const_int 7)])))

598 (set (match_dup 0)

599 (vec_select:V16QI

600 (match_dup 2)

601 (parallel [(const_int 8) (const_int 9)

602 (const_int 10) (const_int 11)

603 (const_int 12) (const_int 13)

604 (const_int 14) (const_int 15)

605 (const_int 0) (const_int 1)

606 (const_int 2) (const_int 3)

607 (const_int 4) (const_int 5)

608 (const_int 6) (const_int 7)])))]

609 {

610 rtx mem = operands[1];

611

612 /* Don't apply the swap optimization if we've already performed register

613 allocation and the hard register destination is not in the altivec

614 range. */

615 if ((MEM_ALIGN (mem) >= 128)

616 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)

617 || ALTIVEC_REGNO_P (REGNO(operands[0]))))

618 {

619 rtx mem_address = XEXP (mem, 0);

620 enum machine_mode mode = GET_MODE (mem);

621

622 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

623 {

624 /* Replace the source memory address with masked address. */

625 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);

626 emit_insn (lvx_set_expr);

627 DONE;

628 }

629 else if (rs6000_quadword_masked_address_p (mem_address))

630 {

631 /* This rtl is already in the form that matches lvx

632 instruction, so leave it alone. */

633 DONE;

634 }

635 /* Otherwise, fall through to transform into a swapping load. */

636 }

637 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])

638 : operands[0];

639 }

640 [(set_attr "type" "vecload")

641 (set_attr "length" "8")])

642

643 (define_insn "*vsx_le_perm_store_<mode>"

644 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")

645 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]

646 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

647 "#"

648 [(set_attr "type" "vecstore")

649 (set_attr "length" "12")])

650

651 (define_split

652 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")

653 (match_operand:VSX_D 1 "vsx_register_operand"))]

654 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"

655 [(set (match_dup 2)

656 (vec_select:<MODE>

657 (match_dup 1)

658 (parallel [(const_int 1) (const_int 0)])))

659 (set (match_dup 0)

660 (vec_select:<MODE>

661 (match_dup 2)

662 (parallel [(const_int 1) (const_int 0)])))]

663 {

664 rtx mem = operands[0];

665

666 /* Don't apply the swap optimization if we've already performed register

667 allocation and the hard register source is not in the altivec range. */

668 if ((MEM_ALIGN (mem) >= 128)

669 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)

670 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))

671 {

672 rtx mem_address = XEXP (mem, 0);

673 enum machine_mode mode = GET_MODE (mem);

674 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

675 {

676 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);

677 emit_insn (stvx_set_expr);

678 DONE;

679 }

680 else if (rs6000_quadword_masked_address_p (mem_address))

681 {

682 /* This rtl is already in the form that matches stvx instruction,

683 so leave it alone. */

684 DONE;

685 }

686 /* Otherwise, fall through to transform into a swapping store. */

687 }

688

689 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])

690 : operands[1];

691 })

692

693 ;; The post-reload split requires that we re-permute the source

694 ;; register in case it is still live.

695 (define_split

696 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")

697 (match_operand:VSX_D 1 "vsx_register_operand"))]

698 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"

699 [(set (match_dup 1)

700 (vec_select:<MODE>

701 (match_dup 1)

702 (parallel [(const_int 1) (const_int 0)])))

703 (set (match_dup 0)

704 (vec_select:<MODE>

705 (match_dup 1)

706 (parallel [(const_int 1) (const_int 0)])))

707 (set (match_dup 1)

708 (vec_select:<MODE>

709 (match_dup 1)

710 (parallel [(const_int 1) (const_int 0)])))]

711 "")

712

713 (define_insn "*vsx_le_perm_store_<mode>"

714 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")

715 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]

716 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

717 "#"

718 [(set_attr "type" "vecstore")

719 (set_attr "length" "12")])

720

721 (define_split

722 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")

723 (match_operand:VSX_W 1 "vsx_register_operand"))]

724 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"

725 [(set (match_dup 2)

726 (vec_select:<MODE>

727 (match_dup 1)

728 (parallel [(const_int 2) (const_int 3)

729 (const_int 0) (const_int 1)])))

730 (set (match_dup 0)

731 (vec_select:<MODE>

732 (match_dup 2)

733 (parallel [(const_int 2) (const_int 3)

734 (const_int 0) (const_int 1)])))]

735 {

736 rtx mem = operands[0];

737

738 /* Don't apply the swap optimization if we've already performed register

739 allocation and the hard register source is not in the altivec range. */

740 if ((MEM_ALIGN (mem) >= 128)

741 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)

742 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))

743 {

744 rtx mem_address = XEXP (mem, 0);

745 enum machine_mode mode = GET_MODE (mem);

746 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

747 {

748 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);

749 emit_insn (stvx_set_expr);

750 DONE;

751 }

752 else if (rs6000_quadword_masked_address_p (mem_address))

753 {

754 /* This rtl is already in the form that matches stvx instruction,

755 so leave it alone. */

756 DONE;

757 }

758 /* Otherwise, fall through to transform into a swapping store. */

759 }

760

761 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])

762 : operands[1];

763 })

764

765 ;; The post-reload split requires that we re-permute the source

766 ;; register in case it is still live.

767 (define_split

768 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")

769 (match_operand:VSX_W 1 "vsx_register_operand"))]

770 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"

771 [(set (match_dup 1)

772 (vec_select:<MODE>

773 (match_dup 1)

774 (parallel [(const_int 2) (const_int 3)

775 (const_int 0) (const_int 1)])))

776 (set (match_dup 0)

777 (vec_select:<MODE>

778 (match_dup 1)

779 (parallel [(const_int 2) (const_int 3)

780 (const_int 0) (const_int 1)])))

781 (set (match_dup 1)

782 (vec_select:<MODE>

783 (match_dup 1)

784 (parallel [(const_int 2) (const_int 3)

785 (const_int 0) (const_int 1)])))]

786 "")

787

788 (define_insn "*vsx_le_perm_store_v8hi"

789 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")

790 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]

791 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

792 "#"

793 [(set_attr "type" "vecstore")

794 (set_attr "length" "12")])

795

796 (define_split

797 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")

798 (match_operand:V8HI 1 "vsx_register_operand"))]

799 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"

800 [(set (match_dup 2)

801 (vec_select:V8HI

802 (match_dup 1)

803 (parallel [(const_int 4) (const_int 5)

804 (const_int 6) (const_int 7)

805 (const_int 0) (const_int 1)

806 (const_int 2) (const_int 3)])))

807 (set (match_dup 0)

808 (vec_select:V8HI

809 (match_dup 2)

810 (parallel [(const_int 4) (const_int 5)

811 (const_int 6) (const_int 7)

812 (const_int 0) (const_int 1)

813 (const_int 2) (const_int 3)])))]

814 {

815 rtx mem = operands[0];

816

817 /* Don't apply the swap optimization if we've already performed register

818 allocation and the hard register source is not in the altivec range. */

819 if ((MEM_ALIGN (mem) >= 128)

820 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)

821 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))

822 {

823 rtx mem_address = XEXP (mem, 0);

824 enum machine_mode mode = GET_MODE (mem);

825 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

826 {

827 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);

828 emit_insn (stvx_set_expr);

829 DONE;

830 }

831 else if (rs6000_quadword_masked_address_p (mem_address))

832 {

833 /* This rtl is already in the form that matches stvx instruction,

834 so leave it alone. */

835 DONE;

836 }

837 /* Otherwise, fall through to transform into a swapping store. */

838 }

839

840 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])

841 : operands[1];

842 })

843

844 ;; The post-reload split requires that we re-permute the source

845 ;; register in case it is still live.

846 (define_split

847 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")

848 (match_operand:V8HI 1 "vsx_register_operand"))]

849 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"

850 [(set (match_dup 1)

851 (vec_select:V8HI

852 (match_dup 1)

853 (parallel [(const_int 4) (const_int 5)

854 (const_int 6) (const_int 7)

855 (const_int 0) (const_int 1)

856 (const_int 2) (const_int 3)])))

857 (set (match_dup 0)

858 (vec_select:V8HI

859 (match_dup 1)

860 (parallel [(const_int 4) (const_int 5)

861 (const_int 6) (const_int 7)

862 (const_int 0) (const_int 1)

863 (const_int 2) (const_int 3)])))

864 (set (match_dup 1)

865 (vec_select:V8HI

866 (match_dup 1)

867 (parallel [(const_int 4) (const_int 5)

868 (const_int 6) (const_int 7)

869 (const_int 0) (const_int 1)

870 (const_int 2) (const_int 3)])))]

871 "")

872

873 (define_insn "*vsx_le_perm_store_v16qi"

874 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")

875 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]

876 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

877 "#"

878 [(set_attr "type" "vecstore")

879 (set_attr "length" "12")])

880

881 (define_split

882 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")

883 (match_operand:V16QI 1 "vsx_register_operand"))]

884 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"

885 [(set (match_dup 2)

886 (vec_select:V16QI

887 (match_dup 1)

888 (parallel [(const_int 8) (const_int 9)

889 (const_int 10) (const_int 11)

890 (const_int 12) (const_int 13)

891 (const_int 14) (const_int 15)

892 (const_int 0) (const_int 1)

893 (const_int 2) (const_int 3)

894 (const_int 4) (const_int 5)

895 (const_int 6) (const_int 7)])))

896 (set (match_dup 0)

897 (vec_select:V16QI

898 (match_dup 2)

899 (parallel [(const_int 8) (const_int 9)

900 (const_int 10) (const_int 11)

901 (const_int 12) (const_int 13)

902 (const_int 14) (const_int 15)

903 (const_int 0) (const_int 1)

904 (const_int 2) (const_int 3)

905 (const_int 4) (const_int 5)

906 (const_int 6) (const_int 7)])))]

907 {

908 rtx mem = operands[0];

909

910 /* Don't apply the swap optimization if we've already performed register

911 allocation and the hard register source is not in the altivec range. */

912 if ((MEM_ALIGN (mem) >= 128)

913 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)

914 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))

915 {

916 rtx mem_address = XEXP (mem, 0);

917 enum machine_mode mode = GET_MODE (mem);

918 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))

919 {

920 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);

921 emit_insn (stvx_set_expr);

922 DONE;

923 }

924 else if (rs6000_quadword_masked_address_p (mem_address))

925 {

926 /* This rtl is already in the form that matches stvx instruction,

927 so leave it alone. */

928 DONE;

929 }

930 /* Otherwise, fall through to transform into a swapping store. */

931 }

932

933 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])

934 : operands[1];

935 })

936

937 ;; The post-reload split requires that we re-permute the source

938 ;; register in case it is still live.

939 (define_split

940 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")

941 (match_operand:V16QI 1 "vsx_register_operand"))]

942 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"

943 [(set (match_dup 1)

944 (vec_select:V16QI

945 (match_dup 1)

946 (parallel [(const_int 8) (const_int 9)

947 (const_int 10) (const_int 11)

948 (const_int 12) (const_int 13)

949 (const_int 14) (const_int 15)

950 (const_int 0) (const_int 1)

951 (const_int 2) (const_int 3)

952 (const_int 4) (const_int 5)

953 (const_int 6) (const_int 7)])))

954 (set (match_dup 0)

955 (vec_select:V16QI

956 (match_dup 1)

957 (parallel [(const_int 8) (const_int 9)

958 (const_int 10) (const_int 11)

959 (const_int 12) (const_int 13)

960 (const_int 14) (const_int 15)

961 (const_int 0) (const_int 1)

962 (const_int 2) (const_int 3)

963 (const_int 4) (const_int 5)

964 (const_int 6) (const_int 7)])))

965 (set (match_dup 1)

966 (vec_select:V16QI

967 (match_dup 1)

968 (parallel [(const_int 8) (const_int 9)

969 (const_int 10) (const_int 11)

970 (const_int 12) (const_int 13)

971 (const_int 14) (const_int 15)

972 (const_int 0) (const_int 1)

973 (const_int 2) (const_int 3)

974 (const_int 4) (const_int 5)

975 (const_int 6) (const_int 7)])))]

976 "")

977

978 ;; Little endian word swapping for 128-bit types that are either scalars or the

979 ;; special V1TI container class, which it is not appropriate to use vec_select

980 ;; for the type.

981 (define_insn "*vsx_le_permute_<mode>"

982 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")

983 (rotate:VSX_TI

984 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")

985 (const_int 64)))]

986 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

987 "@

988 xxpermdi %x0,%x1,%x1,2

989 lxvd2x %x0,%y1

990 stxvd2x %x1,%y0

991 mr %0,%L1\;mr %L0,%1

992 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1

993 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"

994 [(set_attr "length" "4,4,4,8,8,8")

995 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])

996

997 (define_insn_and_split "*vsx_le_undo_permute_<mode>"

998 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")

999 (rotate:VSX_TI

1000 (rotate:VSX_TI

1001 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")

1002 (const_int 64))

1003 (const_int 64)))]

1004 "!BYTES_BIG_ENDIAN && TARGET_VSX"

1005 "@

1006 #

1007 xxlor %x0,%x1"

1008 ""

1009 [(set (match_dup 0) (match_dup 1))]

1010 {

1011 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))

1012 {

1013 emit_note (NOTE_INSN_DELETED);

1014 DONE;

1015 }

1016 }

1017 [(set_attr "length" "0,4")

1018 (set_attr "type" "veclogical")])

1019

1020 (define_insn_and_split "*vsx_le_perm_load_<mode>"

1021 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")

1022 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]

1023 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

1024 "@

1025 #

1026 #"

1027 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

1028 [(const_int 0)]

1029 {

1030 rtx tmp = (can_create_pseudo_p ()

1031 ? gen_reg_rtx_and_attrs (operands[0])

1032 : operands[0]);

1033 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);

1034 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);

1035 DONE;

1036 }

1037 [(set_attr "type" "vecload,load")

1038 (set_attr "length" "8,8")])

1039

1040 (define_insn "*vsx_le_perm_store_<mode>"

1041 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")

1042 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]

1043 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"

1044 "@

1045 #

1046 #"

1047 [(set_attr "type" "vecstore,store")

1048 (set_attr "length" "12,8")])

1049

1050 (define_split

1051 [(set (match_operand:VSX_LE_128 0 "memory_operand")

1052 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]

1053 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"

1054 [(const_int 0)]

1055 {

1056 rtx tmp = (can_create_pseudo_p ()

1057 ? gen_reg_rtx_and_attrs (operands[0])

1058 : operands[0]);

1059 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);

1060 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);

1061 DONE;

1062 })

1063

1064 ;; Peepholes to catch loads and stores for TImode if TImode landed in

1065 ;; GPR registers on a little endian system.

1066 (define_peephole2

1067 [(set (match_operand:VSX_TI 0 "int_reg_operand")

1068 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")

1069 (const_int 64)))

1070 (set (match_operand:VSX_TI 2 "int_reg_operand")

1071 (rotate:VSX_TI (match_dup 0)

1072 (const_int 64)))]

1073 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR

1074 && (rtx_equal_p (operands[0], operands[2])

1075 || peep2_reg_dead_p (2, operands[0]))"

1076 [(set (match_dup 2) (match_dup 1))])

1077

1078 (define_peephole2

1079 [(set (match_operand:VSX_TI 0 "int_reg_operand")

1080 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")

1081 (const_int 64)))

1082 (set (match_operand:VSX_TI 2 "memory_operand")

1083 (rotate:VSX_TI (match_dup 0)

1084 (const_int 64)))]

1085 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR

1086 && peep2_reg_dead_p (2, operands[0])"

1087 [(set (match_dup 2) (match_dup 1))])

1088

1089 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in

1090 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit

1091 ;; floating point are handled by the more generic swap elimination pass.

1092 (define_peephole2

1093 [(set (match_operand:TI 0 "vsx_register_operand")

1094 (rotate:TI (match_operand:TI 1 "vsx_register_operand")

1095 (const_int 64)))

1096 (set (match_operand:TI 2 "vsx_register_operand")

1097 (rotate:TI (match_dup 0)

1098 (const_int 64)))]

1099 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR

1100 && (rtx_equal_p (operands[0], operands[2])

1101 || peep2_reg_dead_p (2, operands[0]))"

1102 [(set (match_dup 2) (match_dup 1))])

1103

1104 ;; The post-reload split requires that we re-permute the source

1105 ;; register in case it is still live.

1106 (define_split

1107 [(set (match_operand:VSX_LE_128 0 "memory_operand")

1108 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]

1109 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"

1110 [(const_int 0)]

1111 {

1112 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);

1113 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);

1114 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);

1115 DONE;

1116 })

1117

1118 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA

1119 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.

1120 (define_insn "xxspltib_v16qi"

1121 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

1122 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]

1123 "TARGET_P9_VECTOR"

1124 {

1125 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);

1126 return "xxspltib %x0,%2";

1127 }

1128 [(set_attr "type" "vecperm")])

1129

1130 (define_insn "xxspltib_<mode>_nosplit"

1131 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")

1132 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]

1133 "TARGET_P9_VECTOR"

1134 {

1135 rtx op1 = operands[1];

1136 int value = 256;

1137 int num_insns = -1;

1138

1139 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)

1140 || num_insns != 1)

1141 gcc_unreachable ();

1142

1143 operands[2] = GEN_INT (value & 0xff);

1144 return "xxspltib %x0,%2";

1145 }

1146 [(set_attr "type" "vecperm")])

1147

1148 (define_insn_and_split "*xxspltib_<mode>_split"

1149 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")

1150 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]

1151 "TARGET_P9_VECTOR"

1152 "#"

1153 "&& 1"

1154 [(const_int 0)]

1155 {

1156 int value = 256;

1157 int num_insns = -1;

1158 rtx op0 = operands[0];

1159 rtx op1 = operands[1];

1160 rtx tmp = ((can_create_pseudo_p ())

1161 ? gen_reg_rtx (V16QImode)

1162 : gen_lowpart (V16QImode, op0));

1163

1164 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)

1165 || num_insns != 2)

1166 gcc_unreachable ();

1167

1168 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));

1169

1170 if (<MODE>mode == V2DImode)

1171 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));

1172

1173 else if (<MODE>mode == V4SImode)

1174 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));

1175

1176 else if (<MODE>mode == V8HImode)

1177 emit_insn (gen_altivec_vupkhsb (op0, tmp));

1178

1179 else

1180 gcc_unreachable ();

1181

1182 DONE;

1183 }

1184 [(set_attr "type" "vecperm")

1185 (set_attr "length" "8")])

1186

1187

1188 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB

1189 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or

1190 ;; all 1's, since the machine does not have to wait for the previous

1191 ;; instruction using the register being set (such as a store waiting on a slow

1192 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.

1193

1194 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)

1195 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW

1196 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)

1197 (define_insn "*vsx_mov<mode>_64bit"

1198 [(set (match_operand:VSX_M 0 "nonimmediate_operand"

1199 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,

1200 ?&r, ??r, ??Y, <??r>, wo, v,

1201 ?<VSa>, *r, v, ??r, wZ, v")

1202

1203 (match_operand:VSX_M 1 "input_operand"

1204 "<VSa>, ZwO, <VSa>, we, r, r,

1205 wQ, Y, r, r, wE, jwM,

1206 ?jwM, jwM, W, W, v, wZ"))]

1207

1208 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)

1209 && (register_operand (operands[0], <MODE>mode)

1210 || register_operand (operands[1], <MODE>mode))"

1211 {

1212 return rs6000_output_move_128bit (operands);

1213 }

1214 [(set_attr "type"

1215 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,

1216 store, load, store, *, vecsimple, vecsimple,

1217 vecsimple, *, *, *, vecstore, vecload")

1218

1219 (set_attr "length"

1220 "4, 4, 4, 8, 4, 8,

1221 8, 8, 8, 8, 4, 4,

1222 4, 8, 20, 20, 4, 4")])

1223

1224 ;; VSX store VSX load VSX move GPR load GPR store GPR move

1225 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const

1226 ;; LVX (VMX) STVX (VMX)

1227 (define_insn "*vsx_mov<mode>_32bit"

1228 [(set (match_operand:VSX_M 0 "nonimmediate_operand"

1229 "=ZwO, <VSa>, <VSa>, ??r, ??Y, <??r>,

1230 wo, v, ?<VSa>, *r, v, ??r,

1231 wZ, v")

1232

1233 (match_operand:VSX_M 1 "input_operand"

1234 "<VSa>, ZwO, <VSa>, Y, r, r,

1235 wE, jwM, ?jwM, jwM, W, W,

1236 v, wZ"))]

1237

1238 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)

1239 && (register_operand (operands[0], <MODE>mode)

1240 || register_operand (operands[1], <MODE>mode))"

1241 {

1242 return rs6000_output_move_128bit (operands);

1243 }

1244 [(set_attr "type"

1245 "vecstore, vecload, vecsimple, load, store, *,

1246 vecsimple, vecsimple, vecsimple, *, *, *,

1247 vecstore, vecload")

1248

1249 (set_attr "length"

1250 "4, 4, 4, 16, 16, 16,

1251 4, 4, 4, 16, 20, 32,

1252 4, 4")])

1253

1254 ;; Explicit load/store expanders for the builtin functions

1255 (define_expand "vsx_load_<mode>"

1256 [(set (match_operand:VSX_M 0 "vsx_register_operand")

1257 (match_operand:VSX_M 1 "memory_operand"))]

1258 "VECTOR_MEM_VSX_P (<MODE>mode)"

1259 {

1260 /* Expand to swaps if needed, prior to swap optimization. */

1261 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)

1262 {

1263 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);

1264 DONE;

1265 }

1266 })

1267

1268 (define_expand "vsx_store_<mode>"

1269 [(set (match_operand:VSX_M 0 "memory_operand")

1270 (match_operand:VSX_M 1 "vsx_register_operand"))]

1271 "VECTOR_MEM_VSX_P (<MODE>mode)"

1272 {

1273 /* Expand to swaps if needed, prior to swap optimization. */

1274 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)

1275 {

1276 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);

1277 DONE;

1278 }

1279 })

1280

1281 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,

1282 ;; when you really want their element-reversing behavior.

1283 (define_insn "vsx_ld_elemrev_v2di"

1284 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

1285 (vec_select:V2DI

1286 (match_operand:V2DI 1 "memory_operand" "Z")

1287 (parallel [(const_int 1) (const_int 0)])))]

1288 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"

1289 "lxvd2x %x0,%y1"

1290 [(set_attr "type" "vecload")])

1291

1292 (define_insn "vsx_ld_elemrev_v1ti"

1293 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")

1294 (vec_select:V1TI

1295 (match_operand:V1TI 1 "memory_operand" "Z")

1296 (parallel [(const_int 0)])))]

1297 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"

1298 {

1299 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";

1300 }

1301 [(set_attr "type" "vecload")])

1302

1303 (define_insn "vsx_ld_elemrev_v2df"

1304 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")

1305 (vec_select:V2DF

1306 (match_operand:V2DF 1 "memory_operand" "Z")

1307 (parallel [(const_int 1) (const_int 0)])))]

1308 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"

1309 "lxvd2x %x0,%y1"

1310 [(set_attr "type" "vecload")])

1311

1312 (define_insn "vsx_ld_elemrev_v4si"

1313 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")

1314 (vec_select:V4SI

1315 (match_operand:V4SI 1 "memory_operand" "Z")

1316 (parallel [(const_int 3) (const_int 2)

1317 (const_int 1) (const_int 0)])))]

1318 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"

1319 "lxvw4x %x0,%y1"

1320 [(set_attr "type" "vecload")])

1321

1322 (define_insn "vsx_ld_elemrev_v4sf"

1323 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")

1324 (vec_select:V4SF

1325 (match_operand:V4SF 1 "memory_operand" "Z")

1326 (parallel [(const_int 3) (const_int 2)

1327 (const_int 1) (const_int 0)])))]

1328 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"

1329 "lxvw4x %x0,%y1"

1330 [(set_attr "type" "vecload")])

1331

1332 (define_expand "vsx_ld_elemrev_v8hi"

1333 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")

1334 (vec_select:V8HI

1335 (match_operand:V8HI 1 "memory_operand" "Z")

1336 (parallel [(const_int 7) (const_int 6)

1337 (const_int 5) (const_int 4)

1338 (const_int 3) (const_int 2)

1339 (const_int 1) (const_int 0)])))]

1340 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"

1341 {

1342 if (!TARGET_P9_VECTOR)

1343 {

1344 rtx tmp = gen_reg_rtx (V4SImode);

1345 rtx subreg, subreg2, perm[16], pcv;

1346 /* 2 is leftmost element in register */

1347 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};

1348 int i;

1349

1350 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);

1351 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));

1352 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);

1353

1354 for (i = 0; i < 16; ++i)

1355 perm[i] = GEN_INT (reorder[i]);

1356

1357 pcv = force_reg (V16QImode,

1358 gen_rtx_CONST_VECTOR (V16QImode,

1359 gen_rtvec_v (16, perm)));

1360 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,

1361 subreg2, pcv));

1362 DONE;

1363 }

1364 })

1365

1366 (define_insn "*vsx_ld_elemrev_v8hi_internal"

1367 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")

1368 (vec_select:V8HI

1369 (match_operand:V8HI 1 "memory_operand" "Z")

1370 (parallel [(const_int 7) (const_int 6)

1371 (const_int 5) (const_int 4)

1372 (const_int 3) (const_int 2)

1373 (const_int 1) (const_int 0)])))]

1374 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"

1375 "lxvh8x %x0,%y1"

1376 [(set_attr "type" "vecload")])

1377

1378 (define_expand "vsx_ld_elemrev_v16qi"

1379 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

1380 (vec_select:V16QI

1381 (match_operand:V16QI 1 "memory_operand" "Z")

1382 (parallel [(const_int 15) (const_int 14)

1383 (const_int 13) (const_int 12)

1384 (const_int 11) (const_int 10)

1385 (const_int 9) (const_int 8)

1386 (const_int 7) (const_int 6)

1387 (const_int 5) (const_int 4)

1388 (const_int 3) (const_int 2)

1389 (const_int 1) (const_int 0)])))]

1390 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"

1391 {

1392 if (!TARGET_P9_VECTOR)

1393 {

1394 rtx tmp = gen_reg_rtx (V4SImode);

1395 rtx subreg, subreg2, perm[16], pcv;

1396 /* 3 is leftmost element in register */

1397 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};

1398 int i;

1399

1400 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);

1401 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));

1402 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);

1403

1404 for (i = 0; i < 16; ++i)

1405 perm[i] = GEN_INT (reorder[i]);

1406

1407 pcv = force_reg (V16QImode,

1408 gen_rtx_CONST_VECTOR (V16QImode,

1409 gen_rtvec_v (16, perm)));

1410 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,

1411 subreg2, pcv));

1412 DONE;

1413 }

1414 })

1415

1416 (define_insn "*vsx_ld_elemrev_v16qi_internal"

1417 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

1418 (vec_select:V16QI

1419 (match_operand:V16QI 1 "memory_operand" "Z")

1420 (parallel [(const_int 15) (const_int 14)

1421 (const_int 13) (const_int 12)

1422 (const_int 11) (const_int 10)

1423 (const_int 9) (const_int 8)

1424 (const_int 7) (const_int 6)

1425 (const_int 5) (const_int 4)

1426 (const_int 3) (const_int 2)

1427 (const_int 1) (const_int 0)])))]

1428 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"

1429 "lxvb16x %x0,%y1"

1430 [(set_attr "type" "vecload")])

1431

1432 (define_insn "vsx_st_elemrev_v1ti"

1433 [(set (match_operand:V1TI 0 "memory_operand" "=Z")

1434 (vec_select:V1TI

1435 (match_operand:V1TI 1 "vsx_register_operand" "+wa")

1436 (parallel [(const_int 0)])))

1437 (clobber (match_dup 1))]

1438 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"

1439 {

1440 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";

1441 }

1442 [(set_attr "type" "vecstore")])

1443

1444 (define_insn "vsx_st_elemrev_v2df"

1445 [(set (match_operand:V2DF 0 "memory_operand" "=Z")

1446 (vec_select:V2DF

1447 (match_operand:V2DF 1 "vsx_register_operand" "wa")

1448 (parallel [(const_int 1) (const_int 0)])))]

1449 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"

1450 "stxvd2x %x1,%y0"

1451 [(set_attr "type" "vecstore")])

1452

1453 (define_insn "vsx_st_elemrev_v2di"

1454 [(set (match_operand:V2DI 0 "memory_operand" "=Z")

1455 (vec_select:V2DI

1456 (match_operand:V2DI 1 "vsx_register_operand" "wa")

1457 (parallel [(const_int 1) (const_int 0)])))]

1458 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"

1459 "stxvd2x %x1,%y0"

1460 [(set_attr "type" "vecstore")])

1461

1462 (define_insn "vsx_st_elemrev_v4sf"

1463 [(set (match_operand:V4SF 0 "memory_operand" "=Z")

1464 (vec_select:V4SF

1465 (match_operand:V4SF 1 "vsx_register_operand" "wa")

1466 (parallel [(const_int 3) (const_int 2)

1467 (const_int 1) (const_int 0)])))]

1468 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"

1469 "stxvw4x %x1,%y0"

1470 [(set_attr "type" "vecstore")])

1471

1472 (define_insn "vsx_st_elemrev_v4si"

1473 [(set (match_operand:V4SI 0 "memory_operand" "=Z")

1474 (vec_select:V4SI

1475 (match_operand:V4SI 1 "vsx_register_operand" "wa")

1476 (parallel [(const_int 3) (const_int 2)

1477 (const_int 1) (const_int 0)])))]

1478 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"

1479 "stxvw4x %x1,%y0"

1480 [(set_attr "type" "vecstore")])

1481

1482 (define_expand "vsx_st_elemrev_v8hi"

1483 [(set (match_operand:V8HI 0 "memory_operand" "=Z")

1484 (vec_select:V8HI

1485 (match_operand:V8HI 1 "vsx_register_operand" "wa")

1486 (parallel [(const_int 7) (const_int 6)

1487 (const_int 5) (const_int 4)

1488 (const_int 3) (const_int 2)

1489 (const_int 1) (const_int 0)])))]

1490 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"

1491 {

1492 if (!TARGET_P9_VECTOR)

1493 {

1494 rtx mem_subreg, subreg, perm[16], pcv;

1495 rtx tmp = gen_reg_rtx (V8HImode);

1496 /* 2 is leftmost element in register */

1497 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};

1498 int i;

1499

1500 for (i = 0; i < 16; ++i)

1501 perm[i] = GEN_INT (reorder[i]);

1502

1503 pcv = force_reg (V16QImode,

1504 gen_rtx_CONST_VECTOR (V16QImode,

1505 gen_rtvec_v (16, perm)));

1506 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],

1507 operands[1], pcv));

1508 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);

1509 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);

1510 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));

1511 DONE;

1512 }

1513 })

1514

1515 (define_insn "*vsx_st_elemrev_v2di_internal"

1516 [(set (match_operand:V2DI 0 "memory_operand" "=Z")

1517 (vec_select:V2DI

1518 (match_operand:V2DI 1 "vsx_register_operand" "wa")

1519 (parallel [(const_int 1) (const_int 0)])))]

1520 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"

1521 "stxvd2x %x1,%y0"

1522 [(set_attr "type" "vecstore")])

1523

1524 (define_insn "*vsx_st_elemrev_v8hi_internal"

1525 [(set (match_operand:V8HI 0 "memory_operand" "=Z")

1526 (vec_select:V8HI

1527 (match_operand:V8HI 1 "vsx_register_operand" "wa")

1528 (parallel [(const_int 7) (const_int 6)

1529 (const_int 5) (const_int 4)

1530 (const_int 3) (const_int 2)

1531 (const_int 1) (const_int 0)])))]

1532 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"

1533 "stxvh8x %x1,%y0"

1534 [(set_attr "type" "vecstore")])

1535

1536 (define_expand "vsx_st_elemrev_v16qi"

1537 [(set (match_operand:V16QI 0 "memory_operand" "=Z")

1538 (vec_select:V16QI

1539 (match_operand:V16QI 1 "vsx_register_operand" "wa")

1540 (parallel [(const_int 15) (const_int 14)

1541 (const_int 13) (const_int 12)

1542 (const_int 11) (const_int 10)

1543 (const_int 9) (const_int 8)

1544 (const_int 7) (const_int 6)

1545 (const_int 5) (const_int 4)

1546 (const_int 3) (const_int 2)

1547 (const_int 1) (const_int 0)])))]

1548 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"

1549 {

1550 if (!TARGET_P9_VECTOR)

1551 {

1552 rtx mem_subreg, subreg, perm[16], pcv;

1553 rtx tmp = gen_reg_rtx (V16QImode);

1554 /* 3 is leftmost element in register */

1555 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};

1556 int i;

1557

1558 for (i = 0; i < 16; ++i)

1559 perm[i] = GEN_INT (reorder[i]);

1560

1561 pcv = force_reg (V16QImode,

1562 gen_rtx_CONST_VECTOR (V16QImode,

1563 gen_rtvec_v (16, perm)));

1564 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],

1565 operands[1], pcv));

1566 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);

1567 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);

1568 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));

1569 DONE;

1570 }

1571 })

1572

1573 (define_insn "*vsx_st_elemrev_v16qi_internal"

1574 [(set (match_operand:V16QI 0 "memory_operand" "=Z")

1575 (vec_select:V16QI

1576 (match_operand:V16QI 1 "vsx_register_operand" "wa")

1577 (parallel [(const_int 15) (const_int 14)

1578 (const_int 13) (const_int 12)

1579 (const_int 11) (const_int 10)

1580 (const_int 9) (const_int 8)

1581 (const_int 7) (const_int 6)

1582 (const_int 5) (const_int 4)

1583 (const_int 3) (const_int 2)

1584 (const_int 1) (const_int 0)])))]

1585 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"

1586 "stxvb16x %x1,%y0"

1587 [(set_attr "type" "vecstore")])

1588

1589 \f

1590 ;; VSX vector floating point arithmetic instructions. The VSX scalar

1591 ;; instructions are now combined with the insn for the traditional floating

1592 ;; point unit.

1593 (define_insn "*vsx_add<mode>3"

1594 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1595 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1596 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1597 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1598 "xvadd<VSs> %x0,%x1,%x2"

1599 [(set_attr "type" "<VStype_simple>")])

1600

1601 (define_insn "*vsx_sub<mode>3"

1602 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1603 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1604 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1605 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1606 "xvsub<VSs> %x0,%x1,%x2"

1607 [(set_attr "type" "<VStype_simple>")])

1608

1609 (define_insn "*vsx_mul<mode>3"

1610 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1611 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1612 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1613 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1614 "xvmul<VSs> %x0,%x1,%x2"

1615 [(set_attr "type" "<VStype_simple>")])

1616

1617 ; Emulate vector with scalar for vec_mul in V2DImode

1618 (define_insn_and_split "vsx_mul_v2di"

1619 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

1620 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")

1621 (match_operand:V2DI 2 "vsx_register_operand" "wa")]

1622 UNSPEC_VSX_MULSD))]

1623 "VECTOR_MEM_VSX_P (V2DImode)"

1624 "#"

1625 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"

1626 [(const_int 0)]

1627 {

1628 rtx op0 = operands[0];

1629 rtx op1 = operands[1];

1630 rtx op2 = operands[2];

1631 rtx op3 = gen_reg_rtx (DImode);

1632 rtx op4 = gen_reg_rtx (DImode);

1633 rtx op5 = gen_reg_rtx (DImode);

1634 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));

1635 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));

1636 if (TARGET_POWERPC64)

1637 emit_insn (gen_muldi3 (op5, op3, op4));

1638 else

1639 {

1640 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);

1641 emit_move_insn (op5, ret);

1642 }

1643 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));

1644 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));

1645 if (TARGET_POWERPC64)

1646 emit_insn (gen_muldi3 (op3, op3, op4));

1647 else

1648 {

1649 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);

1650 emit_move_insn (op3, ret);

1651 }

1652 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));

1653 DONE;

1654 }

1655 [(set_attr "type" "mul")])

1656

1657 (define_insn "*vsx_div<mode>3"

1658 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1659 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1660 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1661 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1662 "xvdiv<VSs> %x0,%x1,%x2"

1663 [(set_attr "type" "<VStype_div>")])

1664

1665 ; Emulate vector with scalar for vec_div in V2DImode

1666 (define_insn_and_split "vsx_div_v2di"

1667 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

1668 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")

1669 (match_operand:V2DI 2 "vsx_register_operand" "wa")]

1670 UNSPEC_VSX_DIVSD))]

1671 "VECTOR_MEM_VSX_P (V2DImode)"

1672 "#"

1673 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"

1674 [(const_int 0)]

1675 {

1676 rtx op0 = operands[0];

1677 rtx op1 = operands[1];

1678 rtx op2 = operands[2];

1679 rtx op3 = gen_reg_rtx (DImode);

1680 rtx op4 = gen_reg_rtx (DImode);

1681 rtx op5 = gen_reg_rtx (DImode);

1682 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));

1683 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));

1684 if (TARGET_POWERPC64)

1685 emit_insn (gen_divdi3 (op5, op3, op4));

1686 else

1687 {

1688 rtx libfunc = optab_libfunc (sdiv_optab, DImode);

1689 rtx target = emit_library_call_value (libfunc,

1690 op5, LCT_NORMAL, DImode,

1691 op3, DImode,

1692 op4, DImode);

1693 emit_move_insn (op5, target);

1694 }

1695 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));

1696 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));

1697 if (TARGET_POWERPC64)

1698 emit_insn (gen_divdi3 (op3, op3, op4));

1699 else

1700 {

1701 rtx libfunc = optab_libfunc (sdiv_optab, DImode);

1702 rtx target = emit_library_call_value (libfunc,

1703 op3, LCT_NORMAL, DImode,

1704 op3, DImode,

1705 op4, DImode);

1706 emit_move_insn (op3, target);

1707 }

1708 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));

1709 DONE;

1710 }

1711 [(set_attr "type" "div")])

1712

1713 (define_insn_and_split "vsx_udiv_v2di"

1714 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

1715 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")

1716 (match_operand:V2DI 2 "vsx_register_operand" "wa")]

1717 UNSPEC_VSX_DIVUD))]

1718 "VECTOR_MEM_VSX_P (V2DImode)"

1719 "#"

1720 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"

1721 [(const_int 0)]

1722 {

1723 rtx op0 = operands[0];

1724 rtx op1 = operands[1];

1725 rtx op2 = operands[2];

1726 rtx op3 = gen_reg_rtx (DImode);

1727 rtx op4 = gen_reg_rtx (DImode);

1728 rtx op5 = gen_reg_rtx (DImode);

1729 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));

1730 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));

1731 if (TARGET_POWERPC64)

1732 emit_insn (gen_udivdi3 (op5, op3, op4));

1733 else

1734 {

1735 rtx libfunc = optab_libfunc (udiv_optab, DImode);

1736 rtx target = emit_library_call_value (libfunc,

1737 op5, LCT_NORMAL, DImode,

1738 op3, DImode,

1739 op4, DImode);

1740 emit_move_insn (op5, target);

1741 }

1742 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));

1743 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));

1744 if (TARGET_POWERPC64)

1745 emit_insn (gen_udivdi3 (op3, op3, op4));

1746 else

1747 {

1748 rtx libfunc = optab_libfunc (udiv_optab, DImode);

1749 rtx target = emit_library_call_value (libfunc,

1750 op3, LCT_NORMAL, DImode,

1751 op3, DImode,

1752 op4, DImode);

1753 emit_move_insn (op3, target);

1754 }

1755 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));

1756 DONE;

1757 }

1758 [(set_attr "type" "div")])

1759

1760 ;; *tdiv* instruction returning the FG flag

1761 (define_expand "vsx_tdiv<mode>3_fg"

1762 [(set (match_dup 3)

1763 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")

1764 (match_operand:VSX_B 2 "vsx_register_operand")]

1765 UNSPEC_VSX_TDIV))

1766 (set (match_operand:SI 0 "gpc_reg_operand")

1767 (gt:SI (match_dup 3)

1768 (const_int 0)))]

1769 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1770 {

1771 operands[3] = gen_reg_rtx (CCFPmode);

1772 })

1773

1774 ;; *tdiv* instruction returning the FE flag

1775 (define_expand "vsx_tdiv<mode>3_fe"

1776 [(set (match_dup 3)

1777 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")

1778 (match_operand:VSX_B 2 "vsx_register_operand")]

1779 UNSPEC_VSX_TDIV))

1780 (set (match_operand:SI 0 "gpc_reg_operand")

1781 (eq:SI (match_dup 3)

1782 (const_int 0)))]

1783 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1784 {

1785 operands[3] = gen_reg_rtx (CCFPmode);

1786 })

1787

1788 (define_insn "*vsx_tdiv<mode>3_internal"

1789 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")

1790 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")

1791 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]

1792 UNSPEC_VSX_TDIV))]

1793 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1794 "x<VSv>tdiv<VSs> %0,%x1,%x2"

1795 [(set_attr "type" "<VStype_simple>")])

1796

1797 (define_insn "vsx_fre<mode>2"

1798 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1799 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]

1800 UNSPEC_FRES))]

1801 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1802 "xvre<VSs> %x0,%x1"

1803 [(set_attr "type" "<VStype_simple>")])

1804

1805 (define_insn "*vsx_neg<mode>2"

1806 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1807 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]

1808 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1809 "xvneg<VSs> %x0,%x1"

1810 [(set_attr "type" "<VStype_simple>")])

1811

1812 (define_insn "*vsx_abs<mode>2"

1813 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1814 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]

1815 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1816 "xvabs<VSs> %x0,%x1"

1817 [(set_attr "type" "<VStype_simple>")])

1818

1819 (define_insn "vsx_nabs<mode>2"

1820 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1821 (neg:VSX_F

1822 (abs:VSX_F

1823 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]

1824 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1825 "xvnabs<VSs> %x0,%x1"

1826 [(set_attr "type" "<VStype_simple>")])

1827

1828 (define_insn "vsx_smax<mode>3"

1829 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1830 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1831 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1832 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1833 "xvmax<VSs> %x0,%x1,%x2"

1834 [(set_attr "type" "<VStype_simple>")])

1835

1836 (define_insn "*vsx_smin<mode>3"

1837 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1838 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1839 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1840 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1841 "xvmin<VSs> %x0,%x1,%x2"

1842 [(set_attr "type" "<VStype_simple>")])

1843

1844 (define_insn "*vsx_sqrt<mode>2"

1845 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1846 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]

1847 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1848 "xvsqrt<VSs> %x0,%x1"

1849 [(set_attr "type" "<VStype_sqrt>")])

1850

1851 (define_insn "*vsx_rsqrte<mode>2"

1852 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1853 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]

1854 UNSPEC_RSQRT))]

1855 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1856 "xvrsqrte<VSs> %x0,%x1"

1857 [(set_attr "type" "<VStype_simple>")])

1858

1859 ;; *tsqrt* returning the fg flag

1860 (define_expand "vsx_tsqrt<mode>2_fg"

1861 [(set (match_dup 2)

1862 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]

1863 UNSPEC_VSX_TSQRT))

1864 (set (match_operand:SI 0 "gpc_reg_operand")

1865 (gt:SI (match_dup 2)

1866 (const_int 0)))]

1867 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1868 {

1869 operands[2] = gen_reg_rtx (CCFPmode);

1870 })

1871

1872 ;; *tsqrt* returning the fe flag

1873 (define_expand "vsx_tsqrt<mode>2_fe"

1874 [(set (match_dup 2)

1875 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]

1876 UNSPEC_VSX_TSQRT))

1877 (set (match_operand:SI 0 "gpc_reg_operand")

1878 (eq:SI (match_dup 2)

1879 (const_int 0)))]

1880 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1881 {

1882 operands[2] = gen_reg_rtx (CCFPmode);

1883 })

1884

1885 (define_insn "*vsx_tsqrt<mode>2_internal"

1886 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")

1887 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]

1888 UNSPEC_VSX_TSQRT))]

1889 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1890 "x<VSv>tsqrt<VSs> %0,%x1"

1891 [(set_attr "type" "<VStype_simple>")])

1892

1893 ;; Fused vector multiply/add instructions. Support the classical Altivec

1894 ;; versions of fma, which allows the target to be a separate register from the

1895 ;; 3 inputs. Under VSX, the target must be either the addend or the first

1896 ;; multiply.

1897

1898 (define_insn "*vsx_fmav4sf4"

1899 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")

1900 (fma:V4SF

1901 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")

1902 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")

1903 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]

1904 "VECTOR_UNIT_VSX_P (V4SFmode)"

1905 "@

1906 xvmaddasp %x0,%x1,%x2

1907 xvmaddmsp %x0,%x1,%x3

1908 xvmaddasp %x0,%x1,%x2

1909 xvmaddmsp %x0,%x1,%x3

1910 vmaddfp %0,%1,%2,%3"

1911 [(set_attr "type" "vecfloat")])

1912

1913 (define_insn "*vsx_fmav2df4"

1914 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")

1915 (fma:V2DF

1916 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")

1917 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")

1918 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]

1919 "VECTOR_UNIT_VSX_P (V2DFmode)"

1920 "@

1921 xvmaddadp %x0,%x1,%x2

1922 xvmaddmdp %x0,%x1,%x3

1923 xvmaddadp %x0,%x1,%x2

1924 xvmaddmdp %x0,%x1,%x3"

1925 [(set_attr "type" "vecdouble")])

1926

1927 (define_insn "*vsx_fms<mode>4"

1928 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")

1929 (fma:VSX_F

1930 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")

1931 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")

1932 (neg:VSX_F

1933 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]

1934 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1935 "@

1936 xvmsuba<VSs> %x0,%x1,%x2

1937 xvmsubm<VSs> %x0,%x1,%x3

1938 xvmsuba<VSs> %x0,%x1,%x2

1939 xvmsubm<VSs> %x0,%x1,%x3"

1940 [(set_attr "type" "<VStype_mul>")])

1941

1942 (define_insn "*vsx_nfma<mode>4"

1943 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")

1944 (neg:VSX_F

1945 (fma:VSX_F

1946 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")

1947 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")

1948 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]

1949 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1950 "@

1951 xvnmadda<VSs> %x0,%x1,%x2

1952 xvnmaddm<VSs> %x0,%x1,%x3

1953 xvnmadda<VSs> %x0,%x1,%x2

1954 xvnmaddm<VSs> %x0,%x1,%x3"

1955 [(set_attr "type" "<VStype_mul>")])

1956

1957 (define_insn "*vsx_nfmsv4sf4"

1958 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")

1959 (neg:V4SF

1960 (fma:V4SF

1961 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")

1962 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")

1963 (neg:V4SF

1964 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]

1965 "VECTOR_UNIT_VSX_P (V4SFmode)"

1966 "@

1967 xvnmsubasp %x0,%x1,%x2

1968 xvnmsubmsp %x0,%x1,%x3

1969 xvnmsubasp %x0,%x1,%x2

1970 xvnmsubmsp %x0,%x1,%x3

1971 vnmsubfp %0,%1,%2,%3"

1972 [(set_attr "type" "vecfloat")])

1973

1974 (define_insn "*vsx_nfmsv2df4"

1975 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")

1976 (neg:V2DF

1977 (fma:V2DF

1978 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")

1979 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")

1980 (neg:V2DF

1981 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]

1982 "VECTOR_UNIT_VSX_P (V2DFmode)"

1983 "@

1984 xvnmsubadp %x0,%x1,%x2

1985 xvnmsubmdp %x0,%x1,%x3

1986 xvnmsubadp %x0,%x1,%x2

1987 xvnmsubmdp %x0,%x1,%x3"

1988 [(set_attr "type" "vecdouble")])

1989

1990 ;; Vector conditional expressions (no scalar version for these instructions)

1991 (define_insn "vsx_eq<mode>"

1992 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

1993 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

1994 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

1995 "VECTOR_UNIT_VSX_P (<MODE>mode)"

1996 "xvcmpeq<VSs> %x0,%x1,%x2"

1997 [(set_attr "type" "<VStype_simple>")])

1998

1999 (define_insn "vsx_gt<mode>"

2000 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2001 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

2002 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

2003 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2004 "xvcmpgt<VSs> %x0,%x1,%x2"

2005 [(set_attr "type" "<VStype_simple>")])

2006

2007 (define_insn "*vsx_ge<mode>"

2008 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2009 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

2010 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]

2011 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2012 "xvcmpge<VSs> %x0,%x1,%x2"

2013 [(set_attr "type" "<VStype_simple>")])

2014

2015 ;; Compare vectors producing a vector result and a predicate, setting CR6 to

2016 ;; indicate a combined status

2017 (define_insn "*vsx_eq_<mode>_p"

2018 [(set (reg:CC CR6_REGNO)

2019 (unspec:CC

2020 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")

2021 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]

2022 UNSPEC_PREDICATE))

2023 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2024 (eq:VSX_F (match_dup 1)

2025 (match_dup 2)))]

2026 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2027 "xvcmpeq<VSs>. %x0,%x1,%x2"

2028 [(set_attr "type" "<VStype_simple>")])

2029

2030 (define_insn "*vsx_gt_<mode>_p"

2031 [(set (reg:CC CR6_REGNO)

2032 (unspec:CC

2033 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")

2034 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]

2035 UNSPEC_PREDICATE))

2036 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2037 (gt:VSX_F (match_dup 1)

2038 (match_dup 2)))]

2039 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2040 "xvcmpgt<VSs>. %x0,%x1,%x2"

2041 [(set_attr "type" "<VStype_simple>")])

2042

2043 (define_insn "*vsx_ge_<mode>_p"

2044 [(set (reg:CC CR6_REGNO)

2045 (unspec:CC

2046 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")

2047 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]

2048 UNSPEC_PREDICATE))

2049 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2050 (ge:VSX_F (match_dup 1)

2051 (match_dup 2)))]

2052 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2053 "xvcmpge<VSs>. %x0,%x1,%x2"

2054 [(set_attr "type" "<VStype_simple>")])

2055

2056 ;; Vector select

2057 (define_insn "*vsx_xxsel<mode>"

2058 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2059 (if_then_else:VSX_L

2060 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")

2061 (match_operand:VSX_L 4 "zero_constant" ""))

2062 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")

2063 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]

2064 "VECTOR_MEM_VSX_P (<MODE>mode)"

2065 "xxsel %x0,%x3,%x2,%x1"

2066 [(set_attr "type" "vecmove")])

2067

2068 (define_insn "*vsx_xxsel<mode>_uns"

2069 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2070 (if_then_else:VSX_L

2071 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")

2072 (match_operand:VSX_L 4 "zero_constant" ""))

2073 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")

2074 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]

2075 "VECTOR_MEM_VSX_P (<MODE>mode)"

2076 "xxsel %x0,%x3,%x2,%x1"

2077 [(set_attr "type" "vecmove")])

2078

2079 ;; Copy sign

2080 (define_insn "vsx_copysign<mode>3"

2081 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2082 (unspec:VSX_F

2083 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")

2084 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]

2085 UNSPEC_COPYSIGN))]

2086 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2087 "xvcpsgn<VSs> %x0,%x2,%x1"

2088 [(set_attr "type" "<VStype_simple>")])

2089

2090 ;; For the conversions, limit the register class for the integer value to be

2091 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.

2092 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion

2093 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.

2094 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md

2095 ;; in allowing virtual registers.

2096 (define_insn "vsx_float<VSi><mode>2"

2097 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")

2098 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]

2099 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2100 "xvcvsx<VSc><VSs> %x0,%x1"

2101 [(set_attr "type" "<VStype_simple>")])

2102

2103 (define_insn "vsx_floatuns<VSi><mode>2"

2104 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")

2105 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]

2106 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2107 "xvcvux<VSc><VSs> %x0,%x1"

2108 [(set_attr "type" "<VStype_simple>")])

2109

2110 (define_insn "vsx_fix_trunc<mode><VSi>2"

2111 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")

2112 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]

2113 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2114 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"

2115 [(set_attr "type" "<VStype_simple>")])

2116

2117 (define_insn "vsx_fixuns_trunc<mode><VSi>2"

2118 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")

2119 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]

2120 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2121 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"

2122 [(set_attr "type" "<VStype_simple>")])

2123

2124 ;; Math rounding functions

2125 (define_insn "vsx_x<VSv>r<VSs>i"

2126 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2127 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]

2128 UNSPEC_VSX_ROUND_I))]

2129 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2130 "x<VSv>r<VSs>i %x0,%x1"

2131 [(set_attr "type" "<VStype_simple>")])

2132

2133 (define_insn "vsx_x<VSv>r<VSs>ic"

2134 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2135 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]

2136 UNSPEC_VSX_ROUND_IC))]

2137 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2138 "x<VSv>r<VSs>ic %x0,%x1"

2139 [(set_attr "type" "<VStype_simple>")])

2140

2141 (define_insn "vsx_btrunc<mode>2"

2142 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2143 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]

2144 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2145 "xvr<VSs>iz %x0,%x1"

2146 [(set_attr "type" "<VStype_simple>")])

2147

2148 (define_insn "*vsx_b2trunc<mode>2"

2149 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2150 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]

2151 UNSPEC_FRIZ))]

2152 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2153 "x<VSv>r<VSs>iz %x0,%x1"

2154 [(set_attr "type" "<VStype_simple>")])

2155

2156 (define_insn "vsx_floor<mode>2"

2157 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2158 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]

2159 UNSPEC_FRIM))]

2160 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2161 "xvr<VSs>im %x0,%x1"

2162 [(set_attr "type" "<VStype_simple>")])

2163

2164 (define_insn "vsx_ceil<mode>2"

2165 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")

2166 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]

2167 UNSPEC_FRIP))]

2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2169 "xvr<VSs>ip %x0,%x1"

2170 [(set_attr "type" "<VStype_simple>")])

2171

2172 \f

2173 ;; VSX convert to/from double vector

2174

2175 ;; Convert between single and double precision

2176 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal

2177 ;; scalar single precision instructions internally use the double format.

2178 ;; Prefer the altivec registers, since we likely will need to do a vperm

2179 (define_insn "vsx_<VS_spdp_insn>"

2180 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")

2181 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]

2182 UNSPEC_VSX_CVSPDP))]

2183 "VECTOR_UNIT_VSX_P (<MODE>mode)"

2184 "<VS_spdp_insn> %x0,%x1"

2185 [(set_attr "type" "<VS_spdp_type>")])

2186

2187 ;; xscvspdp, represent the scalar SF type as V4SF

2188 (define_insn "vsx_xscvspdp"

2189 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")

2190 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]

2191 UNSPEC_VSX_CVSPDP))]

2192 "VECTOR_UNIT_VSX_P (V4SFmode)"

2193 "xscvspdp %x0,%x1"

2194 [(set_attr "type" "fp")])

2195

2196 ;; Same as vsx_xscvspdp, but use SF as the type

2197 (define_insn "vsx_xscvspdp_scalar2"

2198 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")

2199 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]

2200 UNSPEC_VSX_CVSPDP))]

2201 "VECTOR_UNIT_VSX_P (V4SFmode)"

2202 "xscvspdp %x0,%x1"

2203 [(set_attr "type" "fp")])

2204

2205 ;; Generate xvcvhpsp instruction

2206 (define_insn "vsx_xvcvhpsp"

2207 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")

2208 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]

2209 UNSPEC_VSX_CVHPSP))]

2210 "TARGET_P9_VECTOR"

2211 "xvcvhpsp %x0,%x1"

2212 [(set_attr "type" "vecfloat")])

2213

2214 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF

2215 ;; format of scalars is actually DF.

2216 (define_insn "vsx_xscvdpsp_scalar"

2217 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")

2218 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]

2219 UNSPEC_VSX_CVSPDP))]

2220 "VECTOR_UNIT_VSX_P (V4SFmode)"

2221 "xscvdpsp %x0,%x1"

2222 [(set_attr "type" "fp")])

2223

2224 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs

2225 (define_insn "vsx_xscvdpspn"

2226 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")

2227 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]

2228 UNSPEC_VSX_CVDPSPN))]

2229 "TARGET_XSCVDPSPN"

2230 "xscvdpspn %x0,%x1"

2231 [(set_attr "type" "fp")])

2232

2233 (define_insn "vsx_xscvspdpn"

2234 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")

2235 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]

2236 UNSPEC_VSX_CVSPDPN))]

2237 "TARGET_XSCVSPDPN"

2238 "xscvspdpn %x0,%x1"

2239 [(set_attr "type" "fp")])

2240

2241 (define_insn "vsx_xscvdpspn_scalar"

2242 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")

2243 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]

2244 UNSPEC_VSX_CVDPSPN))]

2245 "TARGET_XSCVDPSPN"

2246 "xscvdpspn %x0,%x1"

2247 [(set_attr "type" "fp")])

2248

2249 ;; Used by direct move to move a SFmode value from GPR to VSX register

2250 (define_insn "vsx_xscvspdpn_directmove"

2251 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")

2252 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]

2253 UNSPEC_VSX_CVSPDPN))]

2254 "TARGET_XSCVSPDPN"

2255 "xscvspdpn %x0,%x1"

2256 [(set_attr "type" "fp")])

2257

2258 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)

2259

2260 (define_expand "vsx_xvcvsxddp_scale"

2261 [(match_operand:V2DF 0 "vsx_register_operand")

2262 (match_operand:V2DI 1 "vsx_register_operand")

2263 (match_operand:QI 2 "immediate_operand")]

2264 "VECTOR_UNIT_VSX_P (V2DFmode)"

2265 {

2266 rtx op0 = operands[0];

2267 rtx op1 = operands[1];

2268 int scale = INTVAL(operands[2]);

2269 emit_insn (gen_vsx_xvcvsxddp (op0, op1));

2270 if (scale != 0)

2271 rs6000_scale_v2df (op0, op0, -scale);

2272 DONE;

2273 })

2274

2275 (define_insn "vsx_xvcvsxddp"

2276 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")

2277 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]

2278 UNSPEC_VSX_XVCVSXDDP))]

2279 "VECTOR_UNIT_VSX_P (V2DFmode)"

2280 "xvcvsxddp %x0,%x1"

2281 [(set_attr "type" "vecdouble")])

2282

2283 (define_expand "vsx_xvcvuxddp_scale"

2284 [(match_operand:V2DF 0 "vsx_register_operand")

2285 (match_operand:V2DI 1 "vsx_register_operand")

2286 (match_operand:QI 2 "immediate_operand")]

2287 "VECTOR_UNIT_VSX_P (V2DFmode)"

2288 {

2289 rtx op0 = operands[0];

2290 rtx op1 = operands[1];

2291 int scale = INTVAL(operands[2]);

2292 emit_insn (gen_vsx_xvcvuxddp (op0, op1));

2293 if (scale != 0)

2294 rs6000_scale_v2df (op0, op0, -scale);

2295 DONE;

2296 })

2297

2298 (define_insn "vsx_xvcvuxddp"

2299 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")

2300 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]

2301 UNSPEC_VSX_XVCVUXDDP))]

2302 "VECTOR_UNIT_VSX_P (V2DFmode)"

2303 "xvcvuxddp %x0,%x1"

2304 [(set_attr "type" "vecdouble")])

2305

2306 (define_expand "vsx_xvcvdpsxds_scale"

2307 [(match_operand:V2DI 0 "vsx_register_operand")

2308 (match_operand:V2DF 1 "vsx_register_operand")

2309 (match_operand:QI 2 "immediate_operand")]

2310 "VECTOR_UNIT_VSX_P (V2DFmode)"

2311 {

2312 rtx op0 = operands[0];

2313 rtx op1 = operands[1];

2314 rtx tmp;

2315 int scale = INTVAL (operands[2]);

2316 if (scale == 0)

2317 tmp = op1;

2318 else

2319 {

2320 tmp = gen_reg_rtx (V2DFmode);

2321 rs6000_scale_v2df (tmp, op1, scale);

2322 }

2323 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));

2324 DONE;

2325 })

2326

2327 ;; convert vector of 64-bit floating point numbers to vector of

2328 ;; 64-bit signed integer

2329 (define_insn "vsx_xvcvdpsxds"

2330 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

2331 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]

2332 UNSPEC_VSX_XVCVDPSXDS))]

2333 "VECTOR_UNIT_VSX_P (V2DFmode)"

2334 "xvcvdpsxds %x0,%x1"

2335 [(set_attr "type" "vecdouble")])

2336

2337 ;; convert vector of 32-bit floating point numbers to vector of

2338 ;; 32-bit signed integer

2339 (define_insn "vsx_xvcvspsxws"

2340 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")

2341 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]

2342 UNSPEC_VSX_XVCVSPSXWS))]

2343 "VECTOR_UNIT_VSX_P (V4SFmode)"

2344 "xvcvspsxws %x0,%x1"

2345 [(set_attr "type" "vecfloat")])

2346

2347 ;; convert vector of 64-bit floating point numbers to vector of

2348 ;; 64-bit unsigned integer

2349 (define_expand "vsx_xvcvdpuxds_scale"

2350 [(match_operand:V2DI 0 "vsx_register_operand")

2351 (match_operand:V2DF 1 "vsx_register_operand")

2352 (match_operand:QI 2 "immediate_operand")]

2353 "VECTOR_UNIT_VSX_P (V2DFmode)"

2354 {

2355 rtx op0 = operands[0];

2356 rtx op1 = operands[1];

2357 rtx tmp;

2358 int scale = INTVAL (operands[2]);

2359 if (scale == 0)

2360 tmp = op1;

2361 else

2362 {

2363 tmp = gen_reg_rtx (V2DFmode);

2364 rs6000_scale_v2df (tmp, op1, scale);

2365 }

2366 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));

2367 DONE;

2368 })

2369

2370 ;; convert vector of 32-bit floating point numbers to vector of

2371 ;; 32-bit unsigned integer

2372 (define_insn "vsx_xvcvspuxws"

2373 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")

2374 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]

2375 UNSPEC_VSX_XVCVSPSXWS))]

2376 "VECTOR_UNIT_VSX_P (V4SFmode)"

2377 "xvcvspuxws %x0,%x1"

2378 [(set_attr "type" "vecfloat")])

2379

2380 (define_insn "vsx_xvcvdpuxds"

2381 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

2382 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]

2383 UNSPEC_VSX_XVCVDPUXDS))]

2384 "VECTOR_UNIT_VSX_P (V2DFmode)"

2385 "xvcvdpuxds %x0,%x1"

2386 [(set_attr "type" "vecdouble")])

2387

2388 ;; Convert from 64-bit to 32-bit types

2389 ;; Note, favor the Altivec registers since the usual use of these instructions

2390 ;; is in vector converts and we need to use the Altivec vperm instruction.

2391

2392 (define_insn "vsx_xvcvdpsxws"

2393 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")

2394 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]

2395 UNSPEC_VSX_CVDPSXWS))]

2396 "VECTOR_UNIT_VSX_P (V2DFmode)"

2397 "xvcvdpsxws %x0,%x1"

2398 [(set_attr "type" "vecdouble")])

2399

2400 (define_insn "vsx_xvcvdpuxws"

2401 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")

2402 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]

2403 UNSPEC_VSX_CVDPUXWS))]

2404 "VECTOR_UNIT_VSX_P (V2DFmode)"

2405 "xvcvdpuxws %x0,%x1"

2406 [(set_attr "type" "vecdouble")])

2407

2408 (define_insn "vsx_xvcvsxdsp"

2409 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")

2410 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]

2411 UNSPEC_VSX_CVSXDSP))]

2412 "VECTOR_UNIT_VSX_P (V2DFmode)"

2413 "xvcvsxdsp %x0,%x1"

2414 [(set_attr "type" "vecfloat")])

2415

2416 (define_insn "vsx_xvcvuxdsp"

2417 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")

2418 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]

2419 UNSPEC_VSX_CVUXDSP))]

2420 "VECTOR_UNIT_VSX_P (V2DFmode)"

2421 "xvcvuxdsp %x0,%x1"

2422 [(set_attr "type" "vecdouble")])

2423

2424 (define_insn "vsx_xvcdpsp"

2425 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")

2426 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]

2427 UNSPEC_VSX_XVCDPSP))]

2428 "VECTOR_UNIT_VSX_P (V2DFmode)"

2429 "xvcvdpsp %x0,%x1"

2430 [(set_attr "type" "vecdouble")])

2431

2432 ;; Convert from 32-bit to 64-bit types

2433 ;; Provide both vector and scalar targets

2434 (define_insn "vsx_xvcvsxwdp"

2435 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")

2436 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]

2437 UNSPEC_VSX_CVSXWDP))]

2438 "VECTOR_UNIT_VSX_P (V2DFmode)"

2439 "xvcvsxwdp %x0,%x1"

2440 [(set_attr "type" "vecdouble")])

2441

2442 (define_insn "vsx_xvcvsxwdp_df"

2443 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")

2444 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]

2445 UNSPEC_VSX_CVSXWDP))]

2446 "TARGET_VSX"

2447 "xvcvsxwdp %x0,%x1"

2448 [(set_attr "type" "vecdouble")])

2449

2450 (define_insn "vsx_xvcvuxwdp"

2451 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")

2452 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]

2453 UNSPEC_VSX_CVUXWDP))]

2454 "VECTOR_UNIT_VSX_P (V2DFmode)"

2455 "xvcvuxwdp %x0,%x1"

2456 [(set_attr "type" "vecdouble")])

2457

2458 (define_insn "vsx_xvcvuxwdp_df"

2459 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")

2460 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]

2461 UNSPEC_VSX_CVUXWDP))]

2462 "TARGET_VSX"

2463 "xvcvuxwdp %x0,%x1"

2464 [(set_attr "type" "vecdouble")])

2465

2466 (define_insn "vsx_xvcvspsxds"

2467 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")

2468 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]

2469 UNSPEC_VSX_CVSPSXDS))]

2470 "VECTOR_UNIT_VSX_P (V2DFmode)"

2471 "xvcvspsxds %x0,%x1"

2472 [(set_attr "type" "vecdouble")])

2473

2474 (define_insn "vsx_xvcvspuxds"

2475 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")

2476 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]

2477 UNSPEC_VSX_CVSPUXDS))]

2478 "VECTOR_UNIT_VSX_P (V2DFmode)"

2479 "xvcvspuxds %x0,%x1"

2480 [(set_attr "type" "vecdouble")])

2481

2482 (define_insn "vsx_xvcvsxwsp"

2483 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")

2484 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]

2485 UNSPEC_VSX_CVSXWSP))]

2486 "VECTOR_UNIT_VSX_P (V4SFmode)"

2487 "xvcvsxwsp %x0,%x1"

2488 [(set_attr "type" "vecfloat")])

2489

2490 (define_insn "vsx_xvcvuxwsp"

2491 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")

2492 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]

2493 UNSPEC_VSX_CVUXWSP))]

2494 "VECTOR_UNIT_VSX_P (V4SFmode)"

2495 "xvcvuxwsp %x0,%x1"

2496 [(set_attr "type" "vecfloat")])

2497

2498 ;; Generate float2 double

2499 ;; convert two double to float

2500 (define_expand "float2_v2df"

2501 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2502 (use (match_operand:V2DF 1 "register_operand" "wa"))

2503 (use (match_operand:V2DF 2 "register_operand" "wa"))]

2504 "VECTOR_UNIT_VSX_P (V4SFmode)"

2505 {

2506 rtx rtx_src1, rtx_src2, rtx_dst;

2507

2508 rtx_dst = operands[0];

2509 rtx_src1 = operands[1];

2510 rtx_src2 = operands[2];

2511

2512 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);

2513 DONE;

2514 })

2515

2516 ;; Generate float2

2517 ;; convert two long long signed ints to float

2518 (define_expand "float2_v2di"

2519 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2520 (use (match_operand:V2DI 1 "register_operand" "wa"))

2521 (use (match_operand:V2DI 2 "register_operand" "wa"))]

2522 "VECTOR_UNIT_VSX_P (V4SFmode)"

2523 {

2524 rtx rtx_src1, rtx_src2, rtx_dst;

2525

2526 rtx_dst = operands[0];

2527 rtx_src1 = operands[1];

2528 rtx_src2 = operands[2];

2529

2530 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);

2531 DONE;

2532 })

2533

2534 ;; Generate uns_float2

2535 ;; convert two long long unsigned ints to float

2536 (define_expand "uns_float2_v2di"

2537 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2538 (use (match_operand:V2DI 1 "register_operand" "wa"))

2539 (use (match_operand:V2DI 2 "register_operand" "wa"))]

2540 "VECTOR_UNIT_VSX_P (V4SFmode)"

2541 {

2542 rtx rtx_src1, rtx_src2, rtx_dst;

2543

2544 rtx_dst = operands[0];

2545 rtx_src1 = operands[1];

2546 rtx_src2 = operands[2];

2547

2548 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);

2549 DONE;

2550 })

2551

2552 ;; Generate floate

2553 ;; convert double or long long signed to float

2554 ;; (Only even words are valid, BE numbering)

2555 (define_expand "floate<mode>"

2556 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2557 (use (match_operand:VSX_D 1 "register_operand" "wa"))]

2558 "VECTOR_UNIT_VSX_P (V4SFmode)"

2559 {

2560 if (BYTES_BIG_ENDIAN)

2561 {

2562 /* Shift left one word to put even word correct location */

2563 rtx rtx_tmp;

2564 rtx rtx_val = GEN_INT (4);

2565

2566 rtx_tmp = gen_reg_rtx (V4SFmode);

2567 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));

2568 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],

2569 rtx_tmp, rtx_tmp, rtx_val));

2570 }

2571 else

2572 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));

2573

2574 DONE;

2575 })

2576

2577 ;; Generate uns_floate

2578 ;; convert long long unsigned to float

2579 ;; (Only even words are valid, BE numbering)

2580 (define_expand "unsfloatev2di"

2581 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2582 (use (match_operand:V2DI 1 "register_operand" "wa"))]

2583 "VECTOR_UNIT_VSX_P (V4SFmode)"

2584 {

2585 if (BYTES_BIG_ENDIAN)

2586 {

2587 /* Shift left one word to put even word correct location */

2588 rtx rtx_tmp;

2589 rtx rtx_val = GEN_INT (4);

2590

2591 rtx_tmp = gen_reg_rtx (V4SFmode);

2592 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));

2593 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],

2594 rtx_tmp, rtx_tmp, rtx_val));

2595 }

2596 else

2597 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));

2598

2599 DONE;

2600 })

2601

2602 ;; Generate floato

2603 ;; convert double or long long signed to float

2604 ;; Only odd words are valid, BE numbering)

2605 (define_expand "floato<mode>"

2606 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2607 (use (match_operand:VSX_D 1 "register_operand" "wa"))]

2608 "VECTOR_UNIT_VSX_P (V4SFmode)"

2609 {

2610 if (BYTES_BIG_ENDIAN)

2611 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));

2612 else

2613 {

2614 /* Shift left one word to put odd word correct location */

2615 rtx rtx_tmp;

2616 rtx rtx_val = GEN_INT (4);

2617

2618 rtx_tmp = gen_reg_rtx (V4SFmode);

2619 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));

2620 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],

2621 rtx_tmp, rtx_tmp, rtx_val));

2622 }

2623 DONE;

2624 })

2625

2626 ;; Generate uns_floato

2627 ;; convert long long unsigned to float

2628 ;; (Only odd words are valid, BE numbering)

2629 (define_expand "unsfloatov2di"

2630 [(use (match_operand:V4SF 0 "register_operand" "=wa"))

2631 (use (match_operand:V2DI 1 "register_operand" "wa"))]

2632 "VECTOR_UNIT_VSX_P (V4SFmode)"

2633 {

2634 if (BYTES_BIG_ENDIAN)

2635 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));

2636 else

2637 {

2638 /* Shift left one word to put odd word correct location */

2639 rtx rtx_tmp;

2640 rtx rtx_val = GEN_INT (4);

2641

2642 rtx_tmp = gen_reg_rtx (V4SFmode);

2643 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));

2644 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],

2645 rtx_tmp, rtx_tmp, rtx_val));

2646 }

2647 DONE;

2648 })

2649

2650 ;; Generate vsigned2

2651 ;; convert two double float vectors to a vector of single precision ints

2652 (define_expand "vsigned2_v2df"

2653 [(match_operand:V4SI 0 "register_operand" "=wa")

2654 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")

2655 (match_operand:V2DF 2 "register_operand" "wa")]

2656 UNSPEC_VSX_VSIGNED2)]

2657 "TARGET_VSX"

2658 {

2659 rtx rtx_src1, rtx_src2, rtx_dst;

2660 bool signed_convert=true;

2661

2662 rtx_dst = operands[0];

2663 rtx_src1 = operands[1];

2664 rtx_src2 = operands[2];

2665

2666 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);

2667 DONE;

2668 })

2669

2670 ;; Generate vsignedo_v2df

2671 ;; signed double float to int convert odd word

2672 (define_expand "vsignedo_v2df"

2673 [(set (match_operand:V4SI 0 "register_operand" "=wa")

2674 (match_operand:V2DF 1 "register_operand" "wa"))]

2675 "TARGET_VSX"

2676 {

2677 if (BYTES_BIG_ENDIAN)

2678 {

2679 rtx rtx_tmp;

2680 rtx rtx_val = GEN_INT (12);

2681 rtx_tmp = gen_reg_rtx (V4SImode);

2682

2683 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));

2684

2685 /* Big endian word numbering for words in operand is 0 1 2 3.

2686 take (operand[1] operand[1]) and shift left one word

2687 0 1 2 3 0 1 2 3 => 1 2 3 0

2688 Words 1 and 3 are now are now where they need to be for result. */

2689

2690 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,

2691 rtx_tmp, rtx_val));

2692 }

2693 else

2694 /* Little endian word numbering for operand is 3 2 1 0.

2695 Result words 3 and 1 are where they need to be. */

2696 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));

2697

2698 DONE;

2699 }

2700 [(set_attr "type" "veccomplex")])

2701

2702 ;; Generate vsignede_v2df

2703 ;; signed double float to int even word

2704 (define_expand "vsignede_v2df"

2705 [(set (match_operand:V4SI 0 "register_operand" "=v")

2706 (match_operand:V2DF 1 "register_operand" "v"))]

2707 "TARGET_VSX"

2708 {

2709 if (BYTES_BIG_ENDIAN)

2710 /* Big endian word numbering for words in operand is 0 1

2711 Result words 0 is where they need to be. */

2712 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));

2713

2714 else

2715 {

2716 rtx rtx_tmp;

2717 rtx rtx_val = GEN_INT (12);

2718 rtx_tmp = gen_reg_rtx (V4SImode);

2719

2720 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));

2721

2722 /* Little endian word numbering for operand is 3 2 1 0.

2723 take (operand[1] operand[1]) and shift left three words

2724 0 1 2 3 0 1 2 3 => 3 0 1 2

2725 Words 0 and 2 are now where they need to be for the result. */

2726 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,

2727 rtx_tmp, rtx_val));

2728 }

2729 DONE;

2730 }

2731 [(set_attr "type" "veccomplex")])

2732

2733 ;; Generate unsigned2

2734 ;; convert two double float vectors to a vector of single precision

2735 ;; unsigned ints

2736 (define_expand "vunsigned2_v2df"

2737 [(match_operand:V4SI 0 "register_operand" "=v")

2738 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")

2739 (match_operand:V2DF 2 "register_operand" "v")]

2740 UNSPEC_VSX_VSIGNED2)]

2741 "TARGET_VSX"

2742 {

2743 rtx rtx_src1, rtx_src2, rtx_dst;

2744 bool signed_convert=false;

2745

2746 rtx_dst = operands[0];

2747 rtx_src1 = operands[1];

2748 rtx_src2 = operands[2];

2749

2750 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);

2751 DONE;

2752 })

2753

2754 ;; Generate vunsignedo_v2df

2755 ;; unsigned double float to int convert odd word

2756 (define_expand "vunsignedo_v2df"

2757 [(set (match_operand:V4SI 0 "register_operand" "=v")

2758 (match_operand:V2DF 1 "register_operand" "v"))]

2759 "TARGET_VSX"

2760 {

2761 if (BYTES_BIG_ENDIAN)

2762 {

2763 rtx rtx_tmp;

2764 rtx rtx_val = GEN_INT (12);

2765 rtx_tmp = gen_reg_rtx (V4SImode);

2766

2767 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));

2768

2769 /* Big endian word numbering for words in operand is 0 1 2 3.

2770 take (operand[1] operand[1]) and shift left one word

2771 0 1 2 3 0 1 2 3 => 1 2 3 0

2772 Words 1 and 3 are now are now where they need to be for result. */

2773

2774 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,

2775 rtx_tmp, rtx_val));

2776 }

2777 else

2778 /* Little endian word numbering for operand is 3 2 1 0.

2779 Result words 3 and 1 are where they need to be. */

2780 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));

2781

2782 DONE;

2783 }

2784 [(set_attr "type" "veccomplex")])

2785

2786 ;; Generate vunsignede_v2df

2787 ;; unsigned double float to int even word

2788 (define_expand "vunsignede_v2df"

2789 [(set (match_operand:V4SI 0 "register_operand" "=v")

2790 (match_operand:V2DF 1 "register_operand" "v"))]

2791 "TARGET_VSX"

2792 {

2793 if (BYTES_BIG_ENDIAN)

2794 /* Big endian word numbering for words in operand is 0 1

2795 Result words 0 is where they need to be. */

2796 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));

2797

2798 else

2799 {

2800 rtx rtx_tmp;

2801 rtx rtx_val = GEN_INT (12);

2802 rtx_tmp = gen_reg_rtx (V4SImode);

2803

2804 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));

2805

2806 /* Little endian word numbering for operand is 3 2 1 0.

2807 take (operand[1] operand[1]) and shift left three words

2808 0 1 2 3 0 1 2 3 => 3 0 1 2

2809 Words 0 and 2 are now where they need to be for the result. */

2810 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,

2811 rtx_tmp, rtx_val));

2812 }

2813 DONE;

2814 }

2815 [(set_attr "type" "veccomplex")])

2816

2817 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since

2818 ;; since the xvrdpiz instruction does not truncate the value if the floating

2819 ;; point value is < LONG_MIN or > LONG_MAX.

2820 (define_insn "*vsx_float_fix_v2df2"

2821 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")

2822 (float:V2DF

2823 (fix:V2DI

2824 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]

2825 "TARGET_HARD_FLOAT

2826 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations

2827 && !flag_trapping_math && TARGET_FRIZ"

2828 "xvrdpiz %x0,%x1"

2829 [(set_attr "type" "vecdouble")])

2830

2831 \f

2832 ;; Permute operations

2833

2834 ;; Build a V2DF/V2DI vector from two scalars

2835 (define_insn "vsx_concat_<mode>"

2836 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")

2837 (vec_concat:VSX_D

2838 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")

2839 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]

2840 "VECTOR_MEM_VSX_P (<MODE>mode)"

2841 {

2842 if (which_alternative == 0)

2843 return (BYTES_BIG_ENDIAN

2844 ? "xxpermdi %x0,%x1,%x2,0"

2845 : "xxpermdi %x0,%x2,%x1,0");

2846

2847 else if (which_alternative == 1)

2848 return (BYTES_BIG_ENDIAN

2849 ? "mtvsrdd %x0,%1,%2"

2850 : "mtvsrdd %x0,%2,%1");

2851

2852 else

2853 gcc_unreachable ();

2854 }

2855 [(set_attr "type" "vecperm")])

2856

2857 ;; Combiner patterns to allow creating XXPERMDI's to access either double

2858 ;; word element in a vector register.

2859 (define_insn "*vsx_concat_<mode>_1"

2860 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")

2861 (vec_concat:VSX_D

2862 (vec_select:<VS_scalar>

2863 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")

2864 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))

2865 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]

2866 "VECTOR_MEM_VSX_P (<MODE>mode)"

2867 {

2868 HOST_WIDE_INT dword = INTVAL (operands[2]);

2869 if (BYTES_BIG_ENDIAN)

2870 {

2871 operands[4] = GEN_INT (2*dword);

2872 return "xxpermdi %x0,%x1,%x3,%4";

2873 }

2874 else

2875 {

2876 operands[4] = GEN_INT (!dword);

2877 return "xxpermdi %x0,%x3,%x1,%4";

2878 }

2879 }

2880 [(set_attr "type" "vecperm")])

2881

2882 (define_insn "*vsx_concat_<mode>_2"

2883 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")

2884 (vec_concat:VSX_D

2885 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")

2886 (vec_select:<VS_scalar>

2887 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")

2888 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]

2889 "VECTOR_MEM_VSX_P (<MODE>mode)"

2890 {

2891 HOST_WIDE_INT dword = INTVAL (operands[3]);

2892 if (BYTES_BIG_ENDIAN)

2893 {

2894 operands[4] = GEN_INT (dword);

2895 return "xxpermdi %x0,%x1,%x2,%4";

2896 }

2897 else

2898 {

2899 operands[4] = GEN_INT (2 * !dword);

2900 return "xxpermdi %x0,%x2,%x1,%4";

2901 }

2902 }

2903 [(set_attr "type" "vecperm")])

2904

2905 (define_insn "*vsx_concat_<mode>_3"

2906 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")

2907 (vec_concat:VSX_D

2908 (vec_select:<VS_scalar>

2909 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")

2910 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))

2911 (vec_select:<VS_scalar>

2912 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")

2913 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]

2914 "VECTOR_MEM_VSX_P (<MODE>mode)"

2915 {

2916 HOST_WIDE_INT dword1 = INTVAL (operands[2]);

2917 HOST_WIDE_INT dword2 = INTVAL (operands[4]);

2918 if (BYTES_BIG_ENDIAN)

2919 {

2920 operands[5] = GEN_INT ((2 * dword1) + dword2);

2921 return "xxpermdi %x0,%x1,%x3,%5";

2922 }

2923 else

2924 {

2925 operands[5] = GEN_INT ((2 * !dword2) + !dword1);

2926 return "xxpermdi %x0,%x3,%x1,%5";

2927 }

2928 }

2929 [(set_attr "type" "vecperm")])

2930

2931 ;; Special purpose concat using xxpermdi to glue two single precision values

2932 ;; together, relying on the fact that internally scalar floats are represented

2933 ;; as doubles. This is used to initialize a V4SF vector with 4 floats

2934 (define_insn "vsx_concat_v2sf"

2935 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")

2936 (unspec:V2DF

2937 [(match_operand:SF 1 "vsx_register_operand" "ww")

2938 (match_operand:SF 2 "vsx_register_operand" "ww")]

2939 UNSPEC_VSX_CONCAT))]

2940 "VECTOR_MEM_VSX_P (V2DFmode)"

2941 {

2942 if (BYTES_BIG_ENDIAN)

2943 return "xxpermdi %x0,%x1,%x2,0";

2944 else

2945 return "xxpermdi %x0,%x2,%x1,0";

2946 }

2947 [(set_attr "type" "vecperm")])

2948

2949 ;; V4SImode initialization splitter

2950 (define_insn_and_split "vsx_init_v4si"

2951 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")

2952 (unspec:V4SI

2953 [(match_operand:SI 1 "reg_or_cint_operand" "rn")

2954 (match_operand:SI 2 "reg_or_cint_operand" "rn")

2955 (match_operand:SI 3 "reg_or_cint_operand" "rn")

2956 (match_operand:SI 4 "reg_or_cint_operand" "rn")]

2957 UNSPEC_VSX_VEC_INIT))

2958 (clobber (match_scratch:DI 5 "=&r"))

2959 (clobber (match_scratch:DI 6 "=&r"))]

2960 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"

2961 "#"

2962 "&& reload_completed"

2963 [(const_int 0)]

2964 {

2965 rs6000_split_v4si_init (operands);

2966 DONE;

2967 })

2968

2969 ;; xxpermdi for little endian loads and stores. We need several of

2970 ;; these since the form of the PARALLEL differs by mode.

2971 (define_insn "*vsx_xxpermdi2_le_<mode>"

2972 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")

2973 (vec_select:VSX_D

2974 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")

2975 (parallel [(const_int 1) (const_int 0)])))]

2976 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"

2977 "xxpermdi %x0,%x1,%x1,2"

2978 [(set_attr "type" "vecperm")])

2979

2980 (define_insn "*vsx_xxpermdi4_le_<mode>"

2981 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")

2982 (vec_select:VSX_W

2983 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")

2984 (parallel [(const_int 2) (const_int 3)

2985 (const_int 0) (const_int 1)])))]

2986 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"

2987 "xxpermdi %x0,%x1,%x1,2"

2988 [(set_attr "type" "vecperm")])

2989

2990 (define_insn "*vsx_xxpermdi8_le_V8HI"

2991 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")

2992 (vec_select:V8HI

2993 (match_operand:V8HI 1 "vsx_register_operand" "wa")

2994 (parallel [(const_int 4) (const_int 5)

2995 (const_int 6) (const_int 7)

2996 (const_int 0) (const_int 1)

2997 (const_int 2) (const_int 3)])))]

2998 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"

2999 "xxpermdi %x0,%x1,%x1,2"

3000 [(set_attr "type" "vecperm")])

3001

3002 (define_insn "*vsx_xxpermdi16_le_V16QI"

3003 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

3004 (vec_select:V16QI

3005 (match_operand:V16QI 1 "vsx_register_operand" "wa")

3006 (parallel [(const_int 8) (const_int 9)

3007 (const_int 10) (const_int 11)

3008 (const_int 12) (const_int 13)

3009 (const_int 14) (const_int 15)

3010 (const_int 0) (const_int 1)

3011 (const_int 2) (const_int 3)

3012 (const_int 4) (const_int 5)

3013 (const_int 6) (const_int 7)])))]

3014 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"

3015 "xxpermdi %x0,%x1,%x1,2"

3016 [(set_attr "type" "vecperm")])

3017

3018 ;; lxvd2x for little endian loads. We need several of

3019 ;; these since the form of the PARALLEL differs by mode.

3020 (define_insn "*vsx_lxvd2x2_le_<mode>"

3021 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")

3022 (vec_select:VSX_D

3023 (match_operand:VSX_D 1 "memory_operand" "Z")

3024 (parallel [(const_int 1) (const_int 0)])))]

3025 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"

3026 "lxvd2x %x0,%y1"

3027 [(set_attr "type" "vecload")])

3028

3029 (define_insn "*vsx_lxvd2x4_le_<mode>"

3030 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")

3031 (vec_select:VSX_W

3032 (match_operand:VSX_W 1 "memory_operand" "Z")

3033 (parallel [(const_int 2) (const_int 3)

3034 (const_int 0) (const_int 1)])))]

3035 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"

3036 "lxvd2x %x0,%y1"

3037 [(set_attr "type" "vecload")])

3038

3039 (define_insn "*vsx_lxvd2x8_le_V8HI"

3040 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")

3041 (vec_select:V8HI

3042 (match_operand:V8HI 1 "memory_operand" "Z")

3043 (parallel [(const_int 4) (const_int 5)

3044 (const_int 6) (const_int 7)

3045 (const_int 0) (const_int 1)

3046 (const_int 2) (const_int 3)])))]

3047 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"

3048 "lxvd2x %x0,%y1"

3049 [(set_attr "type" "vecload")])

3050

3051 (define_insn "*vsx_lxvd2x16_le_V16QI"

3052 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

3053 (vec_select:V16QI

3054 (match_operand:V16QI 1 "memory_operand" "Z")

3055 (parallel [(const_int 8) (const_int 9)

3056 (const_int 10) (const_int 11)

3057 (const_int 12) (const_int 13)

3058 (const_int 14) (const_int 15)

3059 (const_int 0) (const_int 1)

3060 (const_int 2) (const_int 3)

3061 (const_int 4) (const_int 5)

3062 (const_int 6) (const_int 7)])))]

3063 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"

3064 "lxvd2x %x0,%y1"

3065 [(set_attr "type" "vecload")])

3066

3067 ;; stxvd2x for little endian stores. We need several of

3068 ;; these since the form of the PARALLEL differs by mode.

3069 (define_insn "*vsx_stxvd2x2_le_<mode>"

3070 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")

3071 (vec_select:VSX_D

3072 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")

3073 (parallel [(const_int 1) (const_int 0)])))]

3074 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"

3075 "stxvd2x %x1,%y0"

3076 [(set_attr "type" "vecstore")])

3077

3078 (define_insn "*vsx_stxvd2x4_le_<mode>"

3079 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")

3080 (vec_select:VSX_W

3081 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")

3082 (parallel [(const_int 2) (const_int 3)

3083 (const_int 0) (const_int 1)])))]

3084 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"

3085 "stxvd2x %x1,%y0"

3086 [(set_attr "type" "vecstore")])

3087

3088 (define_insn "*vsx_stxvd2x8_le_V8HI"

3089 [(set (match_operand:V8HI 0 "memory_operand" "=Z")

3090 (vec_select:V8HI

3091 (match_operand:V8HI 1 "vsx_register_operand" "wa")

3092 (parallel [(const_int 4) (const_int 5)

3093 (const_int 6) (const_int 7)

3094 (const_int 0) (const_int 1)

3095 (const_int 2) (const_int 3)])))]

3096 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"

3097 "stxvd2x %x1,%y0"

3098 [(set_attr "type" "vecstore")])

3099

3100 (define_insn "*vsx_stxvd2x16_le_V16QI"

3101 [(set (match_operand:V16QI 0 "memory_operand" "=Z")

3102 (vec_select:V16QI

3103 (match_operand:V16QI 1 "vsx_register_operand" "wa")

3104 (parallel [(const_int 8) (const_int 9)

3105 (const_int 10) (const_int 11)

3106 (const_int 12) (const_int 13)

3107 (const_int 14) (const_int 15)

3108 (const_int 0) (const_int 1)

3109 (const_int 2) (const_int 3)

3110 (const_int 4) (const_int 5)

3111 (const_int 6) (const_int 7)])))]

3112 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"

3113 "stxvd2x %x1,%y0"

3114 [(set_attr "type" "vecstore")])

3115

3116 ;; Convert a TImode value into V1TImode

3117 (define_expand "vsx_set_v1ti"

3118 [(match_operand:V1TI 0 "nonimmediate_operand")

3119 (match_operand:V1TI 1 "nonimmediate_operand")

3120 (match_operand:TI 2 "input_operand")

3121 (match_operand:QI 3 "u5bit_cint_operand")]

3122 "VECTOR_MEM_VSX_P (V1TImode)"

3123 {

3124 if (operands[3] != const0_rtx)

3125 gcc_unreachable ();

3126

3127 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));

3128 DONE;

3129 })

3130

3131 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT

3132 (define_expand "vsx_set_<mode>"

3133 [(use (match_operand:VSX_D 0 "vsx_register_operand"))

3134 (use (match_operand:VSX_D 1 "vsx_register_operand"))

3135 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))

3136 (use (match_operand:QI 3 "const_0_to_1_operand"))]

3137 "VECTOR_MEM_VSX_P (<MODE>mode)"

3138 {

3139 rtx dest = operands[0];

3140 rtx vec_reg = operands[1];

3141 rtx value = operands[2];

3142 rtx ele = operands[3];

3143 rtx tmp = gen_reg_rtx (<VS_scalar>mode);

3144

3145 if (ele == const0_rtx)

3146 {

3147 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));

3148 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));

3149 DONE;

3150 }

3151 else if (ele == const1_rtx)

3152 {

3153 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));

3154 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));

3155 DONE;

3156 }

3157 else

3158 gcc_unreachable ();

3159 })

3160

3161 ;; Extract a DF/DI element from V2DF/V2DI

3162 ;; Optimize cases were we can do a simple or direct move.

3163 ;; Or see if we can avoid doing the move at all

3164

3165 ;; There are some unresolved problems with reload that show up if an Altivec

3166 ;; register was picked. Limit the scalar value to FPRs for now.

3167

3168 (define_insn "vsx_extract_<mode>"

3169 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")

3170

3171 (vec_select:<VS_scalar>

3172 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")

3173

3174 (parallel

3175 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]

3176 "VECTOR_MEM_VSX_P (<MODE>mode)"

3177 {

3178 int element = INTVAL (operands[2]);

3179 int op0_regno = REGNO (operands[0]);

3180 int op1_regno = REGNO (operands[1]);

3181 int fldDM;

3182

3183 gcc_assert (IN_RANGE (element, 0, 1));

3184 gcc_assert (VSX_REGNO_P (op1_regno));

3185

3186 if (element == VECTOR_ELEMENT_SCALAR_64BIT)

3187 {

3188 if (op0_regno == op1_regno)

3189 return ASM_COMMENT_START " vec_extract to same register";

3190

3191 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE

3192 && TARGET_POWERPC64)

3193 return "mfvsrd %0,%x1";

3194

3195 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))

3196 return "fmr %0,%1";

3197

3198 else if (VSX_REGNO_P (op0_regno))

3199 return "xxlor %x0,%x1,%x1";

3200

3201 else

3202 gcc_unreachable ();

3203 }

3204

3205 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)

3206 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)

3207 return "mfvsrld %0,%x1";

3208

3209 else if (VSX_REGNO_P (op0_regno))

3210 {

3211 fldDM = element << 1;

3212 if (!BYTES_BIG_ENDIAN)

3213 fldDM = 3 - fldDM;

3214 operands[3] = GEN_INT (fldDM);

3215 return "xxpermdi %x0,%x1,%x1,%3";

3216 }

3217

3218 else

3219 gcc_unreachable ();

3220 }

3221 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])

3222

3223 ;; Optimize extracting a single scalar element from memory.

3224 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"

3225 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")

3226 (vec_select:<VSX_D:VS_scalar>

3227 (match_operand:VSX_D 1 "memory_operand" "m,m")

3228 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))

3229 (clobber (match_scratch:P 3 "=&b,&b"))]

3230 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"

3231 "#"

3232 "&& reload_completed"

3233 [(set (match_dup 0) (match_dup 4))]

3234 {

3235 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],

3236 operands[3], <VSX_D:VS_scalar>mode);

3237 }

3238 [(set_attr "type" "fpload,load")

3239 (set_attr "length" "8")])

3240

3241 ;; Optimize storing a single scalar element that is the right location to

3242 ;; memory

3243 (define_insn "*vsx_extract_<mode>_store"

3244 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")

3245 (vec_select:<VS_scalar>

3246 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")

3247 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]

3248 "VECTOR_MEM_VSX_P (<MODE>mode)"

3249 "@

3250 stfd%U0%X0 %1,%0

3251 stxsd%U0x %x1,%y0

3252 stxsd %1,%0"

3253 [(set_attr "type" "fpstore")

3254 (set_attr "length" "4")])

3255

3256 ;; Variable V2DI/V2DF extract shift

3257 (define_insn "vsx_vslo_<mode>"

3258 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")

3259 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")

3260 (match_operand:V2DI 2 "gpc_reg_operand" "v")]

3261 UNSPEC_VSX_VSLO))]

3262 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

3263 "vslo %0,%1,%2"

3264 [(set_attr "type" "vecperm")])

3265

3266 ;; Variable V2DI/V2DF extract

3267 (define_insn_and_split "vsx_extract_<mode>_var"

3268 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")

3269 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")

3270 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]

3271 UNSPEC_VSX_EXTRACT))

3272 (clobber (match_scratch:DI 3 "=r,&b,&b"))

3273 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]

3274 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

3275 "#"

3276 "&& reload_completed"

3277 [(const_int 0)]

3278 {

3279 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],

3280 operands[3], operands[4]);

3281 DONE;

3282 })

3283

3284 ;; Extract a SF element from V4SF

3285 (define_insn_and_split "vsx_extract_v4sf"

3286 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")

3287 (vec_select:SF

3288 (match_operand:V4SF 1 "vsx_register_operand" "wa")

3289 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))

3290 (clobber (match_scratch:V4SF 3 "=0"))]

3291 "VECTOR_UNIT_VSX_P (V4SFmode)"

3292 "#"

3293 "&& 1"

3294 [(const_int 0)]

3295 {

3296 rtx op0 = operands[0];

3297 rtx op1 = operands[1];

3298 rtx op2 = operands[2];

3299 rtx op3 = operands[3];

3300 rtx tmp;

3301 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);

3302

3303 if (ele == 0)

3304 tmp = op1;

3305 else

3306 {

3307 if (GET_CODE (op3) == SCRATCH)

3308 op3 = gen_reg_rtx (V4SFmode);

3309 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));

3310 tmp = op3;

3311 }

3312 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));

3313 DONE;

3314 }

3315 [(set_attr "length" "8")

3316 (set_attr "type" "fp")])

3317

3318 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"

3319 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")

3320 (vec_select:SF

3321 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")

3322 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))

3323 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]

3324 "VECTOR_MEM_VSX_P (V4SFmode)"

3325 "#"

3326 "&& reload_completed"

3327 [(set (match_dup 0) (match_dup 4))]

3328 {

3329 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],

3330 operands[3], SFmode);

3331 }

3332 [(set_attr "type" "fpload,fpload,fpload,load")

3333 (set_attr "length" "8")])

3334

3335 ;; Variable V4SF extract

3336 (define_insn_and_split "vsx_extract_v4sf_var"

3337 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")

3338 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")

3339 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]

3340 UNSPEC_VSX_EXTRACT))

3341 (clobber (match_scratch:DI 3 "=r,&b,&b"))

3342 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]

3343 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"

3344 "#"

3345 "&& reload_completed"

3346 [(const_int 0)]

3347 {

3348 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],

3349 operands[3], operands[4]);

3350 DONE;

3351 })

3352

3353 ;; Expand the builtin form of xxpermdi to canonical rtl.

3354 (define_expand "vsx_xxpermdi_<mode>"

3355 [(match_operand:VSX_L 0 "vsx_register_operand")

3356 (match_operand:VSX_L 1 "vsx_register_operand")

3357 (match_operand:VSX_L 2 "vsx_register_operand")

3358 (match_operand:QI 3 "u5bit_cint_operand")]

3359 "VECTOR_MEM_VSX_P (<MODE>mode)"

3360 {

3361 rtx target = operands[0];

3362 rtx op0 = operands[1];

3363 rtx op1 = operands[2];

3364 int mask = INTVAL (operands[3]);

3365 rtx perm0 = GEN_INT ((mask >> 1) & 1);

3366 rtx perm1 = GEN_INT ((mask & 1) + 2);

3367 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);

3368

3369 if (<MODE>mode == V2DFmode)

3370 gen = gen_vsx_xxpermdi2_v2df_1;

3371 else

3372 {

3373 gen = gen_vsx_xxpermdi2_v2di_1;

3374 if (<MODE>mode != V2DImode)

3375 {

3376 target = gen_lowpart (V2DImode, target);

3377 op0 = gen_lowpart (V2DImode, op0);

3378 op1 = gen_lowpart (V2DImode, op1);

3379 }

3380 }

3381 emit_insn (gen (target, op0, op1, perm0, perm1));

3382 DONE;

3383 })

3384

3385 ;; Special version of xxpermdi that retains big-endian semantics.

3386 (define_expand "vsx_xxpermdi_<mode>_be"

3387 [(match_operand:VSX_L 0 "vsx_register_operand")

3388 (match_operand:VSX_L 1 "vsx_register_operand")

3389 (match_operand:VSX_L 2 "vsx_register_operand")

3390 (match_operand:QI 3 "u5bit_cint_operand")]

3391 "VECTOR_MEM_VSX_P (<MODE>mode)"

3392 {

3393 rtx target = operands[0];

3394 rtx op0 = operands[1];

3395 rtx op1 = operands[2];

3396 int mask = INTVAL (operands[3]);

3397 rtx perm0 = GEN_INT ((mask >> 1) & 1);

3398 rtx perm1 = GEN_INT ((mask & 1) + 2);

3399 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);

3400

3401 if (<MODE>mode == V2DFmode)

3402 gen = gen_vsx_xxpermdi2_v2df_1;

3403 else

3404 {

3405 gen = gen_vsx_xxpermdi2_v2di_1;

3406 if (<MODE>mode != V2DImode)

3407 {

3408 target = gen_lowpart (V2DImode, target);

3409 op0 = gen_lowpart (V2DImode, op0);

3410 op1 = gen_lowpart (V2DImode, op1);

3411 }

3412 }

3413 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a

3414 transformation we don't want; it is necessary for

3415 rs6000_expand_vec_perm_const_1 but not for this use. So we

3416 prepare for that by reversing the transformation here. */

3417 if (BYTES_BIG_ENDIAN)

3418 emit_insn (gen (target, op0, op1, perm0, perm1));

3419 else

3420 {

3421 rtx p0 = GEN_INT (3 - INTVAL (perm1));

3422 rtx p1 = GEN_INT (3 - INTVAL (perm0));

3423 emit_insn (gen (target, op1, op0, p0, p1));

3424 }

3425 DONE;

3426 })

3427

3428 (define_insn "vsx_xxpermdi2_<mode>_1"

3429 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")

3430 (vec_select:VSX_D

3431 (vec_concat:<VS_double>

3432 (match_operand:VSX_D 1 "vsx_register_operand" "wd")

3433 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))

3434 (parallel [(match_operand 3 "const_0_to_1_operand" "")

3435 (match_operand 4 "const_2_to_3_operand" "")])))]

3436 "VECTOR_MEM_VSX_P (<MODE>mode)"

3437 {

3438 int op3, op4, mask;

3439

3440 /* For little endian, swap operands and invert/swap selectors

3441 to get the correct xxpermdi. The operand swap sets up the

3442 inputs as a little endian array. The selectors are swapped

3443 because they are defined to use big endian ordering. The

3444 selectors are inverted to get the correct doublewords for

3445 little endian ordering. */

3446 if (BYTES_BIG_ENDIAN)

3447 {

3448 op3 = INTVAL (operands[3]);

3449 op4 = INTVAL (operands[4]);

3450 }

3451 else

3452 {

3453 op3 = 3 - INTVAL (operands[4]);

3454 op4 = 3 - INTVAL (operands[3]);

3455 }

3456

3457 mask = (op3 << 1) | (op4 - 2);

3458 operands[3] = GEN_INT (mask);

3459

3460 if (BYTES_BIG_ENDIAN)

3461 return "xxpermdi %x0,%x1,%x2,%3";

3462 else

3463 return "xxpermdi %x0,%x2,%x1,%3";

3464 }

3465 [(set_attr "type" "vecperm")])

3466

3467 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,

3468 ;; none of the small types were allowed in a vector register, so we had to

3469 ;; extract to a DImode and either do a direct move or store.

3470 (define_expand "vsx_extract_<mode>"

3471 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")

3472 (vec_select:<VS_scalar>

3473 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")

3474 (parallel [(match_operand:QI 2 "const_int_operand")])))

3475 (clobber (match_scratch:VSX_EXTRACT_I 3))])]

3476 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

3477 {

3478 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */

3479 if (TARGET_P9_VECTOR)

3480 {

3481 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],

3482 operands[2]));

3483 DONE;

3484 }

3485 })

3486

3487 (define_insn "vsx_extract_<mode>_p9"

3488 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")

3489 (vec_select:<VS_scalar>

3490 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")

3491 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))

3492 (clobber (match_scratch:SI 3 "=r,X"))]

3493 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"

3494 {

3495 if (which_alternative == 0)

3496 return "#";

3497

3498 else

3499 {

3500 HOST_WIDE_INT elt = INTVAL (operands[2]);

3501 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN

3502 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt

3503 : elt);

3504

3505 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);

3506 HOST_WIDE_INT offset = unit_size * elt_adj;

3507

3508 operands[2] = GEN_INT (offset);

3509 if (unit_size == 4)

3510 return "xxextractuw %x0,%x1,%2";

3511 else

3512 return "vextractu<wd> %0,%1,%2";

3513 }

3514 }

3515 [(set_attr "type" "vecsimple")])

3516

3517 (define_split

3518 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")

3519 (vec_select:<VS_scalar>

3520 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")

3521 (parallel [(match_operand:QI 2 "const_int_operand")])))

3522 (clobber (match_operand:SI 3 "int_reg_operand"))]

3523 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"

3524 [(const_int 0)]

3525 {

3526 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));

3527 rtx op1 = operands[1];

3528 rtx op2 = operands[2];

3529 rtx op3 = operands[3];

3530 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);

3531

3532 emit_move_insn (op3, GEN_INT (offset));

3533 if (BYTES_BIG_ENDIAN)

3534 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));

3535 else

3536 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));

3537 DONE;

3538 })

3539

3540 ;; Optimize zero extracts to eliminate the AND after the extract.

3541 (define_insn_and_split "*vsx_extract_<mode>_di_p9"

3542 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")

3543 (zero_extend:DI

3544 (vec_select:<VS_scalar>

3545 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")

3546 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))

3547 (clobber (match_scratch:SI 3 "=r,X"))]

3548 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"

3549 "#"

3550 "&& reload_completed"

3551 [(parallel [(set (match_dup 4)

3552 (vec_select:<VS_scalar>

3553 (match_dup 1)

3554 (parallel [(match_dup 2)])))

3555 (clobber (match_dup 3))])]

3556 {

3557 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));

3558 })

3559

3560 ;; Optimize stores to use the ISA 3.0 scalar store instructions

3561 (define_insn_and_split "*vsx_extract_<mode>_store_p9"

3562 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")

3563 (vec_select:<VS_scalar>

3564 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")

3565 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))

3566 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))

3567 (clobber (match_scratch:SI 4 "=X,&r"))]

3568 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"

3569 "#"

3570 "&& reload_completed"

3571 [(parallel [(set (match_dup 3)

3572 (vec_select:<VS_scalar>

3573 (match_dup 1)

3574 (parallel [(match_dup 2)])))

3575 (clobber (match_dup 4))])

3576 (set (match_dup 0)

3577 (match_dup 3))])

3578

3579 (define_insn_and_split "*vsx_extract_si"

3580 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")

3581 (vec_select:SI

3582 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")

3583 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))

3584 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]

3585 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"

3586 "#"

3587 "&& reload_completed"

3588 [(const_int 0)]

3589 {

3590 rtx dest = operands[0];

3591 rtx src = operands[1];

3592 rtx element = operands[2];

3593 rtx vec_tmp = operands[3];

3594 int value;

3595

3596 if (!BYTES_BIG_ENDIAN)

3597 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));

3598

3599 /* If the value is in the correct position, we can avoid doing the VSPLT<x>

3600 instruction. */

3601 value = INTVAL (element);

3602 if (value != 1)

3603 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));

3604 else

3605 vec_tmp = src;

3606

3607 if (MEM_P (operands[0]))

3608 {

3609 if (can_create_pseudo_p ())

3610 dest = rs6000_address_for_fpconvert (dest);

3611

3612 if (TARGET_P8_VECTOR)

3613 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));

3614 else

3615 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));

3616 }

3617

3618 else if (TARGET_P8_VECTOR)

3619 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));

3620 else

3621 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),

3622 gen_rtx_REG (DImode, REGNO (vec_tmp)));

3623

3624 DONE;

3625 }

3626 [(set_attr "type" "mftgpr,vecperm,fpstore")

3627 (set_attr "length" "8")])

3628

3629 (define_insn_and_split "*vsx_extract_<mode>_p8"

3630 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")

3631 (vec_select:<VS_scalar>

3632 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")

3633 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))

3634 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]

3635 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT

3636 && !TARGET_P9_VECTOR"

3637 "#"

3638 "&& reload_completed"

3639 [(const_int 0)]

3640 {

3641 rtx dest = operands[0];

3642 rtx src = operands[1];

3643 rtx element = operands[2];

3644 rtx vec_tmp = operands[3];

3645 int value;

3646

3647 if (!BYTES_BIG_ENDIAN)

3648 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));

3649

3650 /* If the value is in the correct position, we can avoid doing the VSPLT<x>

3651 instruction. */

3652 value = INTVAL (element);

3653 if (<MODE>mode == V16QImode)

3654 {

3655 if (value != 7)

3656 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));

3657 else

3658 vec_tmp = src;

3659 }

3660 else if (<MODE>mode == V8HImode)

3661 {

3662 if (value != 3)

3663 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));

3664 else

3665 vec_tmp = src;

3666 }

3667 else

3668 gcc_unreachable ();

3669

3670 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),

3671 gen_rtx_REG (DImode, REGNO (vec_tmp)));

3672 DONE;

3673 }

3674 [(set_attr "type" "mftgpr")])

3675

3676 ;; Optimize extracting a single scalar element from memory.

3677 (define_insn_and_split "*vsx_extract_<mode>_load"

3678 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")

3679 (vec_select:<VS_scalar>

3680 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")

3681 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))

3682 (clobber (match_scratch:DI 3 "=&b"))]

3683 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

3684 "#"

3685 "&& reload_completed"

3686 [(set (match_dup 0) (match_dup 4))]

3687 {

3688 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],

3689 operands[3], <VS_scalar>mode);

3690 }

3691 [(set_attr "type" "load")

3692 (set_attr "length" "8")])

3693

3694 ;; Variable V16QI/V8HI/V4SI extract

3695 (define_insn_and_split "vsx_extract_<mode>_var"

3696 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")

3697 (unspec:<VS_scalar>

3698 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")

3699 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]

3700 UNSPEC_VSX_EXTRACT))

3701 (clobber (match_scratch:DI 3 "=r,r,&b"))

3702 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]

3703 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

3704 "#"

3705 "&& reload_completed"

3706 [(const_int 0)]

3707 {

3708 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],

3709 operands[3], operands[4]);

3710 DONE;

3711 })

3712

3713 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"

3714 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")

3715 (zero_extend:SDI

3716 (unspec:<VSX_EXTRACT_I:VS_scalar>

3717 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")

3718 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]

3719 UNSPEC_VSX_EXTRACT)))

3720 (clobber (match_scratch:DI 3 "=r,r,&b"))

3721 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]

3722 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

3723 "#"

3724 "&& reload_completed"

3725 [(const_int 0)]

3726 {

3727 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;

3728 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),

3729 operands[1], operands[2],

3730 operands[3], operands[4]);

3731 DONE;

3732 })

3733

3734 ;; VSX_EXTRACT optimizations

3735 ;; Optimize double d = (double) vec_extract (vi, <n>)

3736 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP

3737 (define_insn_and_split "*vsx_extract_si_<uns>float_df"

3738 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")

3739 (any_float:DF

3740 (vec_select:SI

3741 (match_operand:V4SI 1 "gpc_reg_operand" "v")

3742 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))

3743 (clobber (match_scratch:V4SI 3 "=v"))]

3744 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"

3745 "#"

3746 "&& 1"

3747 [(const_int 0)]

3748 {

3749 rtx dest = operands[0];

3750 rtx src = operands[1];

3751 rtx element = operands[2];

3752 rtx v4si_tmp = operands[3];

3753 int value;

3754

3755 if (!BYTES_BIG_ENDIAN)

3756 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));

3757

3758 /* If the value is in the correct position, we can avoid doing the VSPLT<x>

3759 instruction. */

3760 value = INTVAL (element);

3761 if (value != 0)

3762 {

3763 if (GET_CODE (v4si_tmp) == SCRATCH)

3764 v4si_tmp = gen_reg_rtx (V4SImode);

3765 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));

3766 }

3767 else

3768 v4si_tmp = src;

3769

3770 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));

3771 DONE;

3772 })

3773

3774 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)

3775 ;; where <type> is a floating point type that supported by the hardware that is

3776 ;; not double. First convert the value to double, and then to the desired

3777 ;; type.

3778 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"

3779 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")

3780 (any_float:VSX_EXTRACT_FL

3781 (vec_select:SI

3782 (match_operand:V4SI 1 "gpc_reg_operand" "v")

3783 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))

3784 (clobber (match_scratch:V4SI 3 "=v"))

3785 (clobber (match_scratch:DF 4 "=ws"))]

3786 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"

3787 "#"

3788 "&& 1"

3789 [(const_int 0)]

3790 {

3791 rtx dest = operands[0];

3792 rtx src = operands[1];

3793 rtx element = operands[2];

3794 rtx v4si_tmp = operands[3];

3795 rtx df_tmp = operands[4];

3796 int value;

3797

3798 if (!BYTES_BIG_ENDIAN)

3799 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));

3800

3801 /* If the value is in the correct position, we can avoid doing the VSPLT<x>

3802 instruction. */

3803 value = INTVAL (element);

3804 if (value != 0)

3805 {

3806 if (GET_CODE (v4si_tmp) == SCRATCH)

3807 v4si_tmp = gen_reg_rtx (V4SImode);

3808 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));

3809 }

3810 else

3811 v4si_tmp = src;

3812

3813 if (GET_CODE (df_tmp) == SCRATCH)

3814 df_tmp = gen_reg_rtx (DFmode);

3815

3816 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));

3817

3818 if (<MODE>mode == SFmode)

3819 emit_insn (gen_truncdfsf2 (dest, df_tmp));

3820 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))

3821 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));

3822 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)

3823 && TARGET_FLOAT128_HW)

3824 emit_insn (gen_extenddftf2_hw (dest, df_tmp));

3825 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))

3826 emit_insn (gen_extenddfif2 (dest, df_tmp));

3827 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)

3828 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));

3829 else

3830 gcc_unreachable ();

3831

3832 DONE;

3833 })

3834

3835 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)

3836 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE

3837 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,

3838 ;; vector short or vector unsigned short.

3839 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"

3840 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")

3841 (float:FL_CONV

3842 (vec_select:<VSX_EXTRACT_I:VS_scalar>

3843 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")

3844 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))

3845 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]

3846 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT

3847 && TARGET_P9_VECTOR"

3848 "#"

3849 "&& reload_completed"

3850 [(parallel [(set (match_dup 3)

3851 (vec_select:<VSX_EXTRACT_I:VS_scalar>

3852 (match_dup 1)

3853 (parallel [(match_dup 2)])))

3854 (clobber (scratch:SI))])

3855 (set (match_dup 4)

3856 (sign_extend:DI (match_dup 3)))

3857 (set (match_dup 0)

3858 (float:<FL_CONV:MODE> (match_dup 4)))]

3859 {

3860 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));

3861 })

3862

3863 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"

3864 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")

3865 (unsigned_float:FL_CONV

3866 (vec_select:<VSX_EXTRACT_I:VS_scalar>

3867 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")

3868 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))

3869 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]

3870 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT

3871 && TARGET_P9_VECTOR"

3872 "#"

3873 "&& reload_completed"

3874 [(parallel [(set (match_dup 3)

3875 (vec_select:<VSX_EXTRACT_I:VS_scalar>

3876 (match_dup 1)

3877 (parallel [(match_dup 2)])))

3878 (clobber (scratch:SI))])

3879 (set (match_dup 0)

3880 (float:<FL_CONV:MODE> (match_dup 4)))]

3881 {

3882 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));

3883 })

3884

3885 ;; V4SI/V8HI/V16QI set operation on ISA 3.0

3886 (define_insn "vsx_set_<mode>_p9"

3887 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")

3888 (unspec:VSX_EXTRACT_I

3889 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")

3890 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")

3891 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]

3892 UNSPEC_VSX_SET))]

3893 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"

3894 {

3895 int ele = INTVAL (operands[3]);

3896 int nunits = GET_MODE_NUNITS (<MODE>mode);

3897

3898 if (!BYTES_BIG_ENDIAN)

3899 ele = nunits - 1 - ele;

3900

3901 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);

3902 if (<MODE>mode == V4SImode)

3903 return "xxinsertw %x0,%x2,%3";

3904 else

3905 return "vinsert<wd> %0,%2,%3";

3906 }

3907 [(set_attr "type" "vecperm")])

3908

3909 (define_insn_and_split "vsx_set_v4sf_p9"

3910 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")

3911 (unspec:V4SF

3912 [(match_operand:V4SF 1 "gpc_reg_operand" "0")

3913 (match_operand:SF 2 "gpc_reg_operand" "ww")

3914 (match_operand:QI 3 "const_0_to_3_operand" "n")]

3915 UNSPEC_VSX_SET))

3916 (clobber (match_scratch:SI 4 "=&wJwK"))]

3917 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"

3918 "#"

3919 "&& reload_completed"

3920 [(set (match_dup 5)

3921 (unspec:V4SF [(match_dup 2)]

3922 UNSPEC_VSX_CVDPSPN))

3923 (parallel [(set (match_dup 4)

3924 (vec_select:SI (match_dup 6)

3925 (parallel [(match_dup 7)])))

3926 (clobber (scratch:SI))])

3927 (set (match_dup 8)

3928 (unspec:V4SI [(match_dup 8)

3929 (match_dup 4)

3930 (match_dup 3)]

3931 UNSPEC_VSX_SET))]

3932 {

3933 unsigned int tmp_regno = reg_or_subregno (operands[4]);

3934

3935 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);

3936 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);

3937 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 2);

3938 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));

3939 }

3940 [(set_attr "type" "vecperm")

3941 (set_attr "length" "12")])

3942

3943 ;; Special case setting 0.0f to a V4SF element

3944 (define_insn_and_split "*vsx_set_v4sf_p9_zero"

3945 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")

3946 (unspec:V4SF

3947 [(match_operand:V4SF 1 "gpc_reg_operand" "0")

3948 (match_operand:SF 2 "zero_fp_constant" "j")

3949 (match_operand:QI 3 "const_0_to_3_operand" "n")]

3950 UNSPEC_VSX_SET))

3951 (clobber (match_scratch:SI 4 "=&wJwK"))]

3952 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"

3953 "#"

3954 "&& reload_completed"

3955 [(set (match_dup 4)

3956 (const_int 0))

3957 (set (match_dup 5)

3958 (unspec:V4SI [(match_dup 5)

3959 (match_dup 4)

3960 (match_dup 3)]

3961 UNSPEC_VSX_SET))]

3962 {

3963 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));

3964 }

3965 [(set_attr "type" "vecperm")

3966 (set_attr "length" "8")])

3967

3968 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element

3969 ;; that is in the default scalar position (1 for big endian, 2 for little

3970 ;; endian). We just need to do an xxinsertw since the element is in the

3971 ;; correct location.

3972

3973 (define_insn "*vsx_insert_extract_v4sf_p9"

3974 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")

3975 (unspec:V4SF

3976 [(match_operand:V4SF 1 "gpc_reg_operand" "0")

3977 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")

3978 (parallel

3979 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))

3980 (match_operand:QI 4 "const_0_to_3_operand" "n")]

3981 UNSPEC_VSX_SET))]

3982 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64

3983 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"

3984 {

3985 int ele = INTVAL (operands[4]);

3986

3987 if (!BYTES_BIG_ENDIAN)

3988 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;

3989

3990 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);

3991 return "xxinsertw %x0,%x2,%4";

3992 }

3993 [(set_attr "type" "vecperm")])

3994

3995 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element

3996 ;; that is in the default scalar position (1 for big endian, 2 for little

3997 ;; endian). Convert the insert/extract to int and avoid doing the conversion.

3998

3999 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"

4000 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")

4001 (unspec:V4SF

4002 [(match_operand:V4SF 1 "gpc_reg_operand" "0")

4003 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")

4004 (parallel

4005 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))

4006 (match_operand:QI 4 "const_0_to_3_operand" "n")]

4007 UNSPEC_VSX_SET))

4008 (clobber (match_scratch:SI 5 "=&wJwK"))]

4009 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)

4010 && TARGET_P9_VECTOR && TARGET_POWERPC64

4011 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"

4012 "#"

4013 "&& 1"

4014 [(parallel [(set (match_dup 5)

4015 (vec_select:SI (match_dup 6)

4016 (parallel [(match_dup 3)])))

4017 (clobber (scratch:SI))])

4018 (set (match_dup 7)

4019 (unspec:V4SI [(match_dup 8)

4020 (match_dup 5)

4021 (match_dup 4)]

4022 UNSPEC_VSX_SET))]

4023 {

4024 if (GET_CODE (operands[5]) == SCRATCH)

4025 operands[5] = gen_reg_rtx (SImode);

4026

4027 operands[6] = gen_lowpart (V4SImode, operands[2]);

4028 operands[7] = gen_lowpart (V4SImode, operands[0]);

4029 operands[8] = gen_lowpart (V4SImode, operands[1]);

4030 }

4031 [(set_attr "type" "vecperm")])

4032

4033 ;; Expanders for builtins

4034 (define_expand "vsx_mergel_<mode>"

4035 [(use (match_operand:VSX_D 0 "vsx_register_operand"))

4036 (use (match_operand:VSX_D 1 "vsx_register_operand"))

4037 (use (match_operand:VSX_D 2 "vsx_register_operand"))]

4038 "VECTOR_MEM_VSX_P (<MODE>mode)"

4039 {

4040 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));

4041 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);

4042 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));

4043 emit_insn (gen_rtx_SET (operands[0], x));

4044 DONE;

4045 })

4046

4047 (define_expand "vsx_mergeh_<mode>"

4048 [(use (match_operand:VSX_D 0 "vsx_register_operand"))

4049 (use (match_operand:VSX_D 1 "vsx_register_operand"))

4050 (use (match_operand:VSX_D 2 "vsx_register_operand"))]

4051 "VECTOR_MEM_VSX_P (<MODE>mode)"

4052 {

4053 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));

4054 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);

4055 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));

4056 emit_insn (gen_rtx_SET (operands[0], x));

4057 DONE;

4058 })

4059

4060 ;; V2DF/V2DI splat

4061 ;; We separate the register splat insn from the memory splat insn to force the

4062 ;; register allocator to generate the indexed form of the SPLAT when it is

4063 ;; given an offsettable memory reference. Otherwise, if the register and

4064 ;; memory insns were combined into a single insn, the register allocator will

4065 ;; load the value into a register, and then do a double word permute.

4066 (define_expand "vsx_splat_<mode>"

4067 [(set (match_operand:VSX_D 0 "vsx_register_operand")

4068 (vec_duplicate:VSX_D

4069 (match_operand:<VS_scalar> 1 "input_operand")))]

4070 "VECTOR_MEM_VSX_P (<MODE>mode)"

4071 {

4072 rtx op1 = operands[1];

4073 if (MEM_P (op1))

4074 operands[1] = rs6000_address_for_fpconvert (op1);

4075 else if (!REG_P (op1))

4076 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);

4077 })

4078

4079 (define_insn "vsx_splat_<mode>_reg"

4080 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")

4081 (vec_duplicate:VSX_D

4082 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]

4083 "VECTOR_MEM_VSX_P (<MODE>mode)"

4084 "@

4085 xxpermdi %x0,%x1,%x1,0

4086 mtvsrdd %x0,%1,%1"

4087 [(set_attr "type" "vecperm")])

4088

4089 (define_insn "vsx_splat_<VSX_D:mode>_mem"

4090 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")

4091 (vec_duplicate:VSX_D

4092 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]

4093 "VECTOR_MEM_VSX_P (<MODE>mode)"

4094 "lxvdsx %x0,%y1"

4095 [(set_attr "type" "vecload")])

4096

4097 ;; V4SI splat support

4098 (define_insn "vsx_splat_v4si"

4099 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")

4100 (vec_duplicate:V4SI

4101 (match_operand:SI 1 "splat_input_operand" "r,Z")))]

4102 "TARGET_P9_VECTOR"

4103 "@

4104 mtvsrws %x0,%1

4105 lxvwsx %x0,%y1"

4106 [(set_attr "type" "vecperm,vecload")])

4107

4108 ;; SImode is not currently allowed in vector registers. This pattern

4109 ;; allows us to use direct move to get the value in a vector register

4110 ;; so that we can use XXSPLTW

4111 (define_insn "vsx_splat_v4si_di"

4112 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")

4113 (vec_duplicate:V4SI

4114 (truncate:SI

4115 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]

4116 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"

4117 "@

4118 xxspltw %x0,%x1,1

4119 mtvsrws %x0,%1"

4120 [(set_attr "type" "vecperm")])

4121

4122 ;; V4SF splat (ISA 3.0)

4123 (define_insn_and_split "vsx_splat_v4sf"

4124 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")

4125 (vec_duplicate:V4SF

4126 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]

4127 "TARGET_P9_VECTOR"

4128 "@

4129 lxvwsx %x0,%y1

4130 #

4131 mtvsrws %x0,%1"

4132 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"

4133 [(set (match_dup 0)

4134 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))

4135 (set (match_dup 0)

4136 (unspec:V4SF [(match_dup 0)

4137 (const_int 0)] UNSPEC_VSX_XXSPLTW))]

4138 ""

4139 [(set_attr "type" "vecload,vecperm,mftgpr")

4140 (set_attr "length" "4,8,4")])

4141

4142 ;; V4SF/V4SI splat from a vector element

4143 (define_insn "vsx_xxspltw_<mode>"

4144 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")

4145 (vec_duplicate:VSX_W

4146 (vec_select:<VS_scalar>

4147 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")

4148 (parallel

4149 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]

4150 "VECTOR_MEM_VSX_P (<MODE>mode)"

4151 {

4152 if (!BYTES_BIG_ENDIAN)

4153 operands[2] = GEN_INT (3 - INTVAL (operands[2]));

4154

4155 return "xxspltw %x0,%x1,%2";

4156 }

4157 [(set_attr "type" "vecperm")])

4158

4159 (define_insn "vsx_xxspltw_<mode>_direct"

4160 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")

4161 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")

4162 (match_operand:QI 2 "u5bit_cint_operand" "i")]

4163 UNSPEC_VSX_XXSPLTW))]

4164 "VECTOR_MEM_VSX_P (<MODE>mode)"

4165 "xxspltw %x0,%x1,%2"

4166 [(set_attr "type" "vecperm")])

4167

4168 ;; V16QI/V8HI splat support on ISA 2.07

4169 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"

4170 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")

4171 (vec_duplicate:VSX_SPLAT_I

4172 (truncate:<VS_scalar>

4173 (match_operand:DI 1 "altivec_register_operand" "v"))))]

4174 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"

4175 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"

4176 [(set_attr "type" "vecperm")])

4177

4178 ;; V2DF/V2DI splat for use by vec_splat builtin

4179 (define_insn "vsx_xxspltd_<mode>"

4180 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")

4181 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")

4182 (match_operand:QI 2 "u5bit_cint_operand" "i")]

4183 UNSPEC_VSX_XXSPLTD))]

4184 "VECTOR_MEM_VSX_P (<MODE>mode)"

4185 {

4186 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)

4187 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))

4188 return "xxpermdi %x0,%x1,%x1,0";

4189 else

4190 return "xxpermdi %x0,%x1,%x1,3";

4191 }

4192 [(set_attr "type" "vecperm")])

4193

4194 ;; V4SF/V4SI interleave

4195 (define_insn "vsx_xxmrghw_<mode>"

4196 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")

4197 (vec_select:VSX_W

4198 (vec_concat:<VS_double>

4199 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")

4200 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))

4201 (parallel [(const_int 0) (const_int 4)

4202 (const_int 1) (const_int 5)])))]

4203 "VECTOR_MEM_VSX_P (<MODE>mode)"

4204 {

4205 if (BYTES_BIG_ENDIAN)

4206 return "xxmrghw %x0,%x1,%x2";

4207 else

4208 return "xxmrglw %x0,%x2,%x1";

4209 }

4210 [(set_attr "type" "vecperm")])

4211

4212 (define_insn "vsx_xxmrglw_<mode>"

4213 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")

4214 (vec_select:VSX_W

4215 (vec_concat:<VS_double>

4216 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")

4217 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))

4218 (parallel [(const_int 2) (const_int 6)

4219 (const_int 3) (const_int 7)])))]

4220 "VECTOR_MEM_VSX_P (<MODE>mode)"

4221 {

4222 if (BYTES_BIG_ENDIAN)

4223 return "xxmrglw %x0,%x1,%x2";

4224 else

4225 return "xxmrghw %x0,%x2,%x1";

4226 }

4227 [(set_attr "type" "vecperm")])

4228

4229 ;; Shift left double by word immediate

4230 (define_insn "vsx_xxsldwi_<mode>"

4231 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")

4232 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")

4233 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")

4234 (match_operand:QI 3 "u5bit_cint_operand" "i")]

4235 UNSPEC_VSX_SLDWI))]

4236 "VECTOR_MEM_VSX_P (<MODE>mode)"

4237 "xxsldwi %x0,%x1,%x2,%3"

4238 [(set_attr "type" "vecperm")])

4239

4240 \f

4241 ;; Vector reduction insns and splitters

4242

4243 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"

4244 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")

4245 (VEC_reduc:V2DF

4246 (vec_concat:V2DF

4247 (vec_select:DF

4248 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")

4249 (parallel [(const_int 1)]))

4250 (vec_select:DF

4251 (match_dup 1)

4252 (parallel [(const_int 0)])))

4253 (match_dup 1)))

4254 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]

4255 "VECTOR_UNIT_VSX_P (V2DFmode)"

4256 "#"

4257 ""

4258 [(const_int 0)]

4259 {

4260 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)

4261 ? gen_reg_rtx (V2DFmode)

4262 : operands[2];

4263 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));

4264 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));

4265 DONE;

4266 }

4267 [(set_attr "length" "8")

4268 (set_attr "type" "veccomplex")])

4269

4270 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"

4271 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")

4272 (VEC_reduc:V4SF

4273 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)

4274 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))

4275 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))

4276 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]

4277 "VECTOR_UNIT_VSX_P (V4SFmode)"

4278 "#"

4279 ""

4280 [(const_int 0)]

4281 {

4282 rtx op0 = operands[0];

4283 rtx op1 = operands[1];

4284 rtx tmp2, tmp3, tmp4;

4285

4286 if (can_create_pseudo_p ())

4287 {

4288 tmp2 = gen_reg_rtx (V4SFmode);

4289 tmp3 = gen_reg_rtx (V4SFmode);

4290 tmp4 = gen_reg_rtx (V4SFmode);

4291 }

4292 else

4293 {

4294 tmp2 = operands[2];

4295 tmp3 = operands[3];

4296 tmp4 = tmp2;

4297 }

4298

4299 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));

4300 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));

4301 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));

4302 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));

4303 DONE;

4304 }

4305 [(set_attr "length" "16")

4306 (set_attr "type" "veccomplex")])

4307

4308 ;; Combiner patterns with the vector reduction patterns that knows we can get

4309 ;; to the top element of the V2DF array without doing an extract.

4310

4311 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"

4312 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")

4313 (vec_select:DF

4314 (VEC_reduc:V2DF

4315 (vec_concat:V2DF

4316 (vec_select:DF

4317 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")

4318 (parallel [(const_int 1)]))

4319 (vec_select:DF

4320 (match_dup 1)

4321 (parallel [(const_int 0)])))

4322 (match_dup 1))

4323 (parallel [(const_int 1)])))

4324 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]

4325 "VECTOR_UNIT_VSX_P (V2DFmode)"

4326 "#"

4327 ""

4328 [(const_int 0)]

4329 {

4330 rtx hi = gen_highpart (DFmode, operands[1]);

4331 rtx lo = (GET_CODE (operands[2]) == SCRATCH)

4332 ? gen_reg_rtx (DFmode)

4333 : operands[2];

4334

4335 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));

4336 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));

4337 DONE;

4338 }

4339 [(set_attr "length" "8")

4340 (set_attr "type" "veccomplex")])

4341

4342 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"

4343 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")

4344 (vec_select:SF

4345 (VEC_reduc:V4SF

4346 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)

4347 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))

4348 (parallel [(const_int 3)])))

4349 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))

4350 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))

4351 (clobber (match_scratch:V4SF 4 "=0,0"))]

4352 "VECTOR_UNIT_VSX_P (V4SFmode)"

4353 "#"

4354 ""

4355 [(const_int 0)]

4356 {

4357 rtx op0 = operands[0];

4358 rtx op1 = operands[1];

4359 rtx tmp2, tmp3, tmp4, tmp5;

4360

4361 if (can_create_pseudo_p ())

4362 {

4363 tmp2 = gen_reg_rtx (V4SFmode);

4364 tmp3 = gen_reg_rtx (V4SFmode);

4365 tmp4 = gen_reg_rtx (V4SFmode);

4366 tmp5 = gen_reg_rtx (V4SFmode);

4367 }

4368 else

4369 {

4370 tmp2 = operands[2];

4371 tmp3 = operands[3];

4372 tmp4 = tmp2;

4373 tmp5 = operands[4];

4374 }

4375

4376 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));

4377 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));

4378 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));

4379 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));

4380 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));

4381 DONE;

4382 }

4383 [(set_attr "length" "20")

4384 (set_attr "type" "veccomplex")])

4385

4386 \f

4387 ;; Power8 Vector fusion. The fused ops must be physically adjacent.

4388 (define_peephole

4389 [(set (match_operand:P 0 "base_reg_operand")

4390 (match_operand:P 1 "short_cint_operand"))

4391 (set (match_operand:VSX_M 2 "vsx_register_operand")

4392 (mem:VSX_M (plus:P (match_dup 0)

4393 (match_operand:P 3 "int_reg_operand"))))]

4394 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"

4395 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"

4396 [(set_attr "length" "8")

4397 (set_attr "type" "vecload")])

4398

4399 (define_peephole

4400 [(set (match_operand:P 0 "base_reg_operand")

4401 (match_operand:P 1 "short_cint_operand"))

4402 (set (match_operand:VSX_M 2 "vsx_register_operand")

4403 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")

4404 (match_dup 0))))]

4405 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"

4406 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"

4407 [(set_attr "length" "8")

4408 (set_attr "type" "vecload")])

4409

4410 \f

4411 ;; ISA 3.0 vector extend sign support

4412

4413 (define_insn "vsx_sign_extend_qi_<mode>"

4414 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")

4415 (unspec:VSINT_84

4416 [(match_operand:V16QI 1 "vsx_register_operand" "v")]

4417 UNSPEC_VSX_SIGN_EXTEND))]

4418 "TARGET_P9_VECTOR"

4419 "vextsb2<wd> %0,%1"

4420 [(set_attr "type" "vecexts")])

4421

4422 (define_insn "vsx_sign_extend_hi_<mode>"

4423 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")

4424 (unspec:VSINT_84

4425 [(match_operand:V8HI 1 "vsx_register_operand" "v")]

4426 UNSPEC_VSX_SIGN_EXTEND))]

4427 "TARGET_P9_VECTOR"

4428 "vextsh2<wd> %0,%1"

4429 [(set_attr "type" "vecexts")])

4430

4431 (define_insn "*vsx_sign_extend_si_v2di"

4432 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")

4433 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]

4434 UNSPEC_VSX_SIGN_EXTEND))]

4435 "TARGET_P9_VECTOR"

4436 "vextsw2d %0,%1"

4437 [(set_attr "type" "vecexts")])

4438

4439 \f

4440 ;; ISA 3.0 Binary Floating-Point Support

4441

4442 ;; VSX Scalar Extract Exponent Quad-Precision

4443 (define_insn "xsxexpqp_<mode>"

4444 [(set (match_operand:DI 0 "altivec_register_operand" "=v")

4445 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]

4446 UNSPEC_VSX_SXEXPDP))]

4447 "TARGET_P9_VECTOR"

4448 "xsxexpqp %0,%1"

4449 [(set_attr "type" "vecmove")])

4450

4451 ;; VSX Scalar Extract Exponent Double-Precision

4452 (define_insn "xsxexpdp"

4453 [(set (match_operand:DI 0 "register_operand" "=r")

4454 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]

4455 UNSPEC_VSX_SXEXPDP))]

4456 "TARGET_P9_VECTOR && TARGET_64BIT"

4457 "xsxexpdp %0,%x1"

4458 [(set_attr "type" "integer")])

4459

4460 ;; VSX Scalar Extract Significand Quad-Precision

4461 (define_insn "xsxsigqp_<mode>"

4462 [(set (match_operand:TI 0 "altivec_register_operand" "=v")

4463 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]

4464 UNSPEC_VSX_SXSIG))]

4465 "TARGET_P9_VECTOR"

4466 "xsxsigqp %0,%1"

4467 [(set_attr "type" "vecmove")])

4468

4469 ;; VSX Scalar Extract Significand Double-Precision

4470 (define_insn "xsxsigdp"

4471 [(set (match_operand:DI 0 "register_operand" "=r")

4472 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]

4473 UNSPEC_VSX_SXSIG))]

4474 "TARGET_P9_VECTOR && TARGET_64BIT"

4475 "xsxsigdp %0,%x1"

4476 [(set_attr "type" "integer")])

4477

4478 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument

4479 (define_insn "xsiexpqpf_<mode>"

4480 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")

4481 (unspec:IEEE128

4482 [(match_operand:IEEE128 1 "altivec_register_operand" "v")

4483 (match_operand:DI 2 "altivec_register_operand" "v")]

4484 UNSPEC_VSX_SIEXPQP))]

4485 "TARGET_P9_VECTOR"

4486 "xsiexpqp %0,%1,%2"

4487 [(set_attr "type" "vecmove")])

4488

4489 ;; VSX Scalar Insert Exponent Quad-Precision

4490 (define_insn "xsiexpqp_<mode>"

4491 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")

4492 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")

4493 (match_operand:DI 2 "altivec_register_operand" "v")]

4494 UNSPEC_VSX_SIEXPQP))]

4495 "TARGET_P9_VECTOR"

4496 "xsiexpqp %0,%1,%2"

4497 [(set_attr "type" "vecmove")])

4498

4499 ;; VSX Scalar Insert Exponent Double-Precision

4500 (define_insn "xsiexpdp"

4501 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")

4502 (unspec:DF [(match_operand:DI 1 "register_operand" "r")

4503 (match_operand:DI 2 "register_operand" "r")]

4504 UNSPEC_VSX_SIEXPDP))]

4505 "TARGET_P9_VECTOR && TARGET_64BIT"

4506 "xsiexpdp %x0,%1,%2"

4507 [(set_attr "type" "fpsimple")])

4508

4509 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument

4510 (define_insn "xsiexpdpf"

4511 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")

4512 (unspec:DF [(match_operand:DF 1 "register_operand" "r")

4513 (match_operand:DI 2 "register_operand" "r")]

4514 UNSPEC_VSX_SIEXPDP))]

4515 "TARGET_P9_VECTOR && TARGET_64BIT"

4516 "xsiexpdp %x0,%1,%2"

4517 [(set_attr "type" "fpsimple")])

4518

4519 ;; VSX Scalar Compare Exponents Double-Precision

4520 (define_expand "xscmpexpdp_<code>"

4521 [(set (match_dup 3)

4522 (compare:CCFP

4523 (unspec:DF

4524 [(match_operand:DF 1 "vsx_register_operand" "wa")

4525 (match_operand:DF 2 "vsx_register_operand" "wa")]

4526 UNSPEC_VSX_SCMPEXPDP)

4527 (const_int 0)))

4528 (set (match_operand:SI 0 "register_operand" "=r")

4529 (CMP_TEST:SI (match_dup 3)

4530 (const_int 0)))]

4531 "TARGET_P9_VECTOR"

4532 {

4533 operands[3] = gen_reg_rtx (CCFPmode);

4534 })

4535

4536 (define_insn "*xscmpexpdp"

4537 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")

4538 (compare:CCFP

4539 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")

4540 (match_operand:DF 2 "vsx_register_operand" "wa")]

4541 UNSPEC_VSX_SCMPEXPDP)

4542 (match_operand:SI 3 "zero_constant" "j")))]

4543 "TARGET_P9_VECTOR"

4544 "xscmpexpdp %0,%x1,%x2"

4545 [(set_attr "type" "fpcompare")])

4546

4547 ;; VSX Scalar Test Data Class Quad-Precision

4548 ;; (Expansion for scalar_test_data_class (__ieee128, int))

4549 ;; (Has side effect of setting the lt bit if operand 1 is negative,

4550 ;; setting the eq bit if any of the conditions tested by operand 2

4551 ;; are satisfied, and clearing the gt and undordered bits to zero.)

4552 (define_expand "xststdcqp_<mode>"

4553 [(set (match_dup 3)

4554 (compare:CCFP

4555 (unspec:IEEE128

4556 [(match_operand:IEEE128 1 "altivec_register_operand" "v")

4557 (match_operand:SI 2 "u7bit_cint_operand" "n")]

4558 UNSPEC_VSX_STSTDC)

4559 (const_int 0)))

4560 (set (match_operand:SI 0 "register_operand" "=r")

4561 (eq:SI (match_dup 3)

4562 (const_int 0)))]

4563 "TARGET_P9_VECTOR"

4564 {

4565 operands[3] = gen_reg_rtx (CCFPmode);

4566 })

4567

4568 ;; VSX Scalar Test Data Class Double- and Single-Precision

4569 ;; (The lt bit is set if operand 1 is negative. The eq bit is set

4570 ;; if any of the conditions tested by operand 2 are satisfied.

4571 ;; The gt and unordered bits are cleared to zero.)

4572 (define_expand "xststdc<Fvsx>"

4573 [(set (match_dup 3)

4574 (compare:CCFP

4575 (unspec:SFDF

4576 [(match_operand:SFDF 1 "vsx_register_operand" "wa")

4577 (match_operand:SI 2 "u7bit_cint_operand" "n")]

4578 UNSPEC_VSX_STSTDC)

4579 (match_dup 4)))

4580 (set (match_operand:SI 0 "register_operand" "=r")

4581 (eq:SI (match_dup 3)

4582 (const_int 0)))]

4583 "TARGET_P9_VECTOR"

4584 {

4585 operands[3] = gen_reg_rtx (CCFPmode);

4586 operands[4] = CONST0_RTX (SImode);

4587 })

4588

4589 ;; The VSX Scalar Test Negative Quad-Precision

4590 (define_expand "xststdcnegqp_<mode>"

4591 [(set (match_dup 2)

4592 (compare:CCFP

4593 (unspec:IEEE128

4594 [(match_operand:IEEE128 1 "altivec_register_operand" "v")

4595 (const_int 0)]

4596 UNSPEC_VSX_STSTDC)

4597 (const_int 0)))

4598 (set (match_operand:SI 0 "register_operand" "=r")

4599 (lt:SI (match_dup 2)

4600 (const_int 0)))]

4601 "TARGET_P9_VECTOR"

4602 {

4603 operands[2] = gen_reg_rtx (CCFPmode);

4604 })

4605

4606 ;; The VSX Scalar Test Negative Double- and Single-Precision

4607 (define_expand "xststdcneg<Fvsx>"

4608 [(set (match_dup 2)

4609 (compare:CCFP

4610 (unspec:SFDF

4611 [(match_operand:SFDF 1 "vsx_register_operand" "wa")

4612 (const_int 0)]

4613 UNSPEC_VSX_STSTDC)

4614 (match_dup 3)))

4615 (set (match_operand:SI 0 "register_operand" "=r")

4616 (lt:SI (match_dup 2)

4617 (const_int 0)))]

4618 "TARGET_P9_VECTOR"

4619 {

4620 operands[2] = gen_reg_rtx (CCFPmode);

4621 operands[3] = CONST0_RTX (SImode);

4622 })

4623

4624 (define_insn "*xststdcqp_<mode>"

4625 [(set (match_operand:CCFP 0 "" "=y")

4626 (compare:CCFP

4627 (unspec:IEEE128

4628 [(match_operand:IEEE128 1 "altivec_register_operand" "v")

4629 (match_operand:SI 2 "u7bit_cint_operand" "n")]

4630 UNSPEC_VSX_STSTDC)

4631 (const_int 0)))]

4632 "TARGET_P9_VECTOR"

4633 "xststdcqp %0,%1,%2"

4634 [(set_attr "type" "fpcompare")])

4635

4636 (define_insn "*xststdc<Fvsx>"

4637 [(set (match_operand:CCFP 0 "" "=y")

4638 (compare:CCFP

4639 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")

4640 (match_operand:SI 2 "u7bit_cint_operand" "n")]

4641 UNSPEC_VSX_STSTDC)

4642 (match_operand:SI 3 "zero_constant" "j")))]

4643 "TARGET_P9_VECTOR"

4644 "xststdc<Fvsx> %0,%x1,%2"

4645 [(set_attr "type" "fpcompare")])

4646

4647 ;; VSX Vector Extract Exponent Double and Single Precision

4648 (define_insn "xvxexp<VSs>"

4649 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")

4650 (unspec:VSX_F

4651 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]

4652 UNSPEC_VSX_VXEXP))]

4653 "TARGET_P9_VECTOR"

4654 "xvxexp<VSs> %x0,%x1"

4655 [(set_attr "type" "vecsimple")])

4656

4657 ;; VSX Vector Extract Significand Double and Single Precision

4658 (define_insn "xvxsig<VSs>"

4659 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")

4660 (unspec:VSX_F

4661 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]

4662 UNSPEC_VSX_VXSIG))]

4663 "TARGET_P9_VECTOR"

4664 "xvxsig<VSs> %x0,%x1"

4665 [(set_attr "type" "vecsimple")])

4666

4667 ;; VSX Vector Insert Exponent Double and Single Precision

4668 (define_insn "xviexp<VSs>"

4669 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")

4670 (unspec:VSX_F

4671 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")

4672 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]

4673 UNSPEC_VSX_VIEXP))]

4674 "TARGET_P9_VECTOR"

4675 "xviexp<VSs> %x0,%x1,%x2"

4676 [(set_attr "type" "vecsimple")])

4677

4678 ;; VSX Vector Test Data Class Double and Single Precision

4679 ;; The corresponding elements of the result vector are all ones

4680 ;; if any of the conditions tested by operand 3 are satisfied.

4681 (define_insn "xvtstdc<VSs>"

4682 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")

4683 (unspec:<VSI>

4684 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")

4685 (match_operand:SI 2 "u7bit_cint_operand" "n")]

4686 UNSPEC_VSX_VTSTDC))]

4687 "TARGET_P9_VECTOR"

4688 "xvtstdc<VSs> %x0,%x1,%2"

4689 [(set_attr "type" "vecsimple")])

4690

4691 ;; ISA 3.0 String Operations Support

4692

4693 ;; Compare vectors producing a vector result and a predicate, setting CR6

4694 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and

4695 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no

4696 ;; need to match v4sf, v2df, or v2di modes because those are expanded

4697 ;; to use Power8 instructions.

4698 (define_insn "*vsx_ne_<mode>_p"

4699 [(set (reg:CC CR6_REGNO)

4700 (unspec:CC

4701 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")

4702 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]

4703 UNSPEC_PREDICATE))

4704 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")

4705 (ne:VSX_EXTRACT_I (match_dup 1)

4706 (match_dup 2)))]

4707 "TARGET_P9_VECTOR"

4708 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"

4709 [(set_attr "type" "vecsimple")])

4710

4711 (define_insn "*vector_nez_<mode>_p"

4712 [(set (reg:CC CR6_REGNO)

4713 (unspec:CC [(unspec:VI

4714 [(match_operand:VI 1 "gpc_reg_operand" "v")

4715 (match_operand:VI 2 "gpc_reg_operand" "v")]

4716 UNSPEC_NEZ_P)]

4717 UNSPEC_PREDICATE))

4718 (set (match_operand:VI 0 "gpc_reg_operand" "=v")

4719 (unspec:VI [(match_dup 1)

4720 (match_dup 2)]

4721 UNSPEC_NEZ_P))]

4722 "TARGET_P9_VECTOR"

4723 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"

4724 [(set_attr "type" "vecsimple")])

4725

4726 ;; Return first position of match between vectors using natural order

4727 ;; for both LE and BE execution modes.

4728 (define_expand "first_match_index_<mode>"

4729 [(match_operand:SI 0 "register_operand")

4730 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")

4731 (match_operand:VSX_EXTRACT_I 2 "register_operand")]

4732 UNSPEC_VSX_FIRST_MATCH_INDEX)]

4733 "TARGET_P9_VECTOR"

4734 {

4735 int sh;

4736

4737 rtx cmp_result = gen_reg_rtx (<MODE>mode);

4738 rtx not_result = gen_reg_rtx (<MODE>mode);

4739

4740 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],

4741 operands[2]));

4742 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));

4743

4744 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;

4745

4746 if (<MODE>mode == V16QImode)

4747 {

4748 if (!BYTES_BIG_ENDIAN)

4749 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));

4750 else

4751 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));

4752 }

4753 else

4754 {

4755 rtx tmp = gen_reg_rtx (SImode);

4756 if (!BYTES_BIG_ENDIAN)

4757 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));

4758 else

4759 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));

4760 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));

4761 }

4762 DONE;

4763 })

4764

4765 ;; Return first position of match between vectors or end of string (EOS) using

4766 ;; natural element order for both LE and BE execution modes.

4767 (define_expand "first_match_or_eos_index_<mode>"

4768 [(match_operand:SI 0 "register_operand")

4769 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")

4770 (match_operand:VSX_EXTRACT_I 2 "register_operand")]

4771 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]

4772 "TARGET_P9_VECTOR"

4773 {

4774 int sh;

4775 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);

4776 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);

4777 rtx cmpz_result = gen_reg_rtx (<MODE>mode);

4778 rtx and_result = gen_reg_rtx (<MODE>mode);

4779 rtx result = gen_reg_rtx (<MODE>mode);

4780 rtx vzero = gen_reg_rtx (<MODE>mode);

4781

4782 /* Vector with zeros in elements that correspond to zeros in operands. */

4783 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));

4784 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));

4785 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));

4786 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));

4787

4788 /* Vector with ones in elments that do not match. */

4789 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],

4790 operands[2]));

4791

4792 /* Create vector with ones in elements where there was a zero in one of

4793 the source elements or the elements that match. */

4794 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));

4795 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;

4796

4797 if (<MODE>mode == V16QImode)

4798 {

4799 if (!BYTES_BIG_ENDIAN)

4800 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));

4801 else

4802 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));

4803 }

4804 else

4805 {

4806 rtx tmp = gen_reg_rtx (SImode);

4807 if (!BYTES_BIG_ENDIAN)

4808 emit_insn (gen_vctzlsbb_<mode> (tmp, result));

4809 else

4810 emit_insn (gen_vclzlsbb_<mode> (tmp, result));

4811 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));

4812 }

4813 DONE;

4814 })

4815

4816 ;; Return first position of mismatch between vectors using natural

4817 ;; element order for both LE and BE execution modes.

4818 (define_expand "first_mismatch_index_<mode>"

4819 [(match_operand:SI 0 "register_operand")

4820 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")

4821 (match_operand:VSX_EXTRACT_I 2 "register_operand")]

4822 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]

4823 "TARGET_P9_VECTOR"

4824 {

4825 int sh;

4826 rtx cmp_result = gen_reg_rtx (<MODE>mode);

4827

4828 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],

4829 operands[2]));

4830 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;

4831

4832 if (<MODE>mode == V16QImode)

4833 {

4834 if (!BYTES_BIG_ENDIAN)

4835 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));

4836 else

4837 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));

4838 }

4839 else

4840 {

4841 rtx tmp = gen_reg_rtx (SImode);

4842 if (!BYTES_BIG_ENDIAN)

4843 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));

4844 else

4845 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));

4846 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));

4847 }

4848 DONE;

4849 })

4850

4851 ;; Return first position of mismatch between vectors or end of string (EOS)

4852 ;; using natural element order for both LE and BE execution modes.

4853 (define_expand "first_mismatch_or_eos_index_<mode>"

4854 [(match_operand:SI 0 "register_operand")

4855 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")

4856 (match_operand:VSX_EXTRACT_I 2 "register_operand")]

4857 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]

4858 "TARGET_P9_VECTOR"

4859 {

4860 int sh;

4861 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);

4862 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);

4863 rtx cmpz_result = gen_reg_rtx (<MODE>mode);

4864 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);

4865 rtx and_result = gen_reg_rtx (<MODE>mode);

4866 rtx result = gen_reg_rtx (<MODE>mode);

4867 rtx vzero = gen_reg_rtx (<MODE>mode);

4868

4869 /* Vector with zeros in elements that correspond to zeros in operands. */

4870 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));

4871

4872 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));

4873 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));

4874 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));

4875

4876 /* Vector with ones in elments that match. */

4877 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],

4878 operands[2]));

4879 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));

4880

4881 /* Create vector with ones in elements where there was a zero in one of

4882 the source elements or the elements did not match. */

4883 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));

4884 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;

4885

4886 if (<MODE>mode == V16QImode)

4887 {

4888 if (!BYTES_BIG_ENDIAN)

4889 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));

4890 else

4891 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));

4892 }

4893 else

4894 {

4895 rtx tmp = gen_reg_rtx (SImode);

4896 if (!BYTES_BIG_ENDIAN)

4897 emit_insn (gen_vctzlsbb_<mode> (tmp, result));

4898 else

4899 emit_insn (gen_vclzlsbb_<mode> (tmp, result));

4900 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));

4901 }

4902 DONE;

4903 })

4904

4905 ;; Load VSX Vector with Length

4906 (define_expand "lxvl"

4907 [(set (match_dup 3)

4908 (ashift:DI (match_operand:DI 2 "register_operand")

4909 (const_int 56)))

4910 (set (match_operand:V16QI 0 "vsx_register_operand")

4911 (unspec:V16QI

4912 [(match_operand:DI 1 "gpc_reg_operand")

4913 (mem:V16QI (match_dup 1))

4914 (match_dup 3)]

4915 UNSPEC_LXVL))]

4916 "TARGET_P9_VECTOR && TARGET_64BIT"

4917 {

4918 operands[3] = gen_reg_rtx (DImode);

4919 })

4920

4921 (define_insn "*lxvl"

4922 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

4923 (unspec:V16QI

4924 [(match_operand:DI 1 "gpc_reg_operand" "b")

4925 (mem:V16QI (match_dup 1))

4926 (match_operand:DI 2 "register_operand" "r")]

4927 UNSPEC_LXVL))]

4928 "TARGET_P9_VECTOR && TARGET_64BIT"

4929 "lxvl %x0,%1,%2"

4930 [(set_attr "type" "vecload")])

4931

4932 (define_insn "lxvll"

4933 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

4934 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")

4935 (mem:V16QI (match_dup 1))

4936 (match_operand:DI 2 "register_operand" "r")]

4937 UNSPEC_LXVLL))]

4938 "TARGET_P9_VECTOR"

4939 "lxvll %x0,%1,%2"

4940 [(set_attr "type" "vecload")])

4941

4942 ;; Expand for builtin xl_len_r

4943 (define_expand "xl_len_r"

4944 [(match_operand:V16QI 0 "vsx_register_operand")

4945 (match_operand:DI 1 "register_operand")

4946 (match_operand:DI 2 "register_operand")]

4947 ""

4948 {

4949 rtx shift_mask = gen_reg_rtx (V16QImode);

4950 rtx rtx_vtmp = gen_reg_rtx (V16QImode);

4951 rtx tmp = gen_reg_rtx (DImode);

4952

4953 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));

4954 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));

4955 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));

4956 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,

4957 shift_mask));

4958 DONE;

4959 })

4960

4961 (define_insn "stxvll"

4962 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))

4963 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")

4964 (mem:V16QI (match_dup 1))

4965 (match_operand:DI 2 "register_operand" "r")]

4966 UNSPEC_STXVLL))]

4967 "TARGET_P9_VECTOR"

4968 "stxvll %x0,%1,%2"

4969 [(set_attr "type" "vecstore")])

4970

4971 ;; Store VSX Vector with Length

4972 (define_expand "stxvl"

4973 [(set (match_dup 3)

4974 (ashift:DI (match_operand:DI 2 "register_operand")

4975 (const_int 56)))

4976 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))

4977 (unspec:V16QI

4978 [(match_operand:V16QI 0 "vsx_register_operand")

4979 (mem:V16QI (match_dup 1))

4980 (match_dup 3)]

4981 UNSPEC_STXVL))]

4982 "TARGET_P9_VECTOR && TARGET_64BIT"

4983 {

4984 operands[3] = gen_reg_rtx (DImode);

4985 })

4986

4987 (define_insn "*stxvl"

4988 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))

4989 (unspec:V16QI

4990 [(match_operand:V16QI 0 "vsx_register_operand" "wa")

4991 (mem:V16QI (match_dup 1))

4992 (match_operand:DI 2 "register_operand" "r")]

4993 UNSPEC_STXVL))]

4994 "TARGET_P9_VECTOR && TARGET_64BIT"

4995 "stxvl %x0,%1,%2"

4996 [(set_attr "type" "vecstore")])

4997

4998 ;; Expand for builtin xst_len_r

4999 (define_expand "xst_len_r"

5000 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")

5001 (match_operand:DI 1 "register_operand" "b")

5002 (match_operand:DI 2 "register_operand" "r")]

5003 "UNSPEC_XST_LEN_R"

5004 {

5005 rtx shift_mask = gen_reg_rtx (V16QImode);

5006 rtx rtx_vtmp = gen_reg_rtx (V16QImode);

5007 rtx tmp = gen_reg_rtx (DImode);

5008

5009 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));

5010 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],

5011 shift_mask));

5012 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));

5013 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));

5014 DONE;

5015 })

5016

5017 ;; Vector Compare Not Equal Byte (specified/not+eq:)

5018 (define_insn "vcmpneb"

5019 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")

5020 (not:V16QI

5021 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")

5022 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]

5023 "TARGET_P9_VECTOR"

5024 "vcmpneb %0,%1,%2"

5025 [(set_attr "type" "vecsimple")])

5026

5027 ;; Vector Compare Not Equal or Zero Byte

5028 (define_insn "vcmpnezb"

5029 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")

5030 (unspec:V16QI

5031 [(match_operand:V16QI 1 "altivec_register_operand" "v")

5032 (match_operand:V16QI 2 "altivec_register_operand" "v")]

5033 UNSPEC_VCMPNEZB))]

5034 "TARGET_P9_VECTOR"

5035 "vcmpnezb %0,%1,%2"

5036 [(set_attr "type" "vecsimple")])

5037

5038 ;; Vector Compare Not Equal Half Word (specified/not+eq:)

5039 (define_insn "vcmpneh"

5040 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")

5041 (not:V8HI

5042 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")

5043 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]

5044 "TARGET_P9_VECTOR"

5045 "vcmpneh %0,%1,%2"

5046 [(set_attr "type" "vecsimple")])

5047

5048 ;; Vector Compare Not Equal or Zero Half Word

5049 (define_insn "vcmpnezh"

5050 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")

5051 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")

5052 (match_operand:V8HI 2 "altivec_register_operand" "v")]

5053 UNSPEC_VCMPNEZH))]

5054 "TARGET_P9_VECTOR"

5055 "vcmpnezh %0,%1,%2"

5056 [(set_attr "type" "vecsimple")])

5057

5058 ;; Vector Compare Not Equal Word (specified/not+eq:)

5059 (define_insn "vcmpnew"

5060 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")

5061 (not:V4SI

5062 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")

5063 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]

5064 "TARGET_P9_VECTOR"

5065 "vcmpnew %0,%1,%2"

5066 [(set_attr "type" "vecsimple")])

5067

5068 ;; Vector Compare Not Equal or Zero Word

5069 (define_insn "vcmpnezw"

5070 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")

5071 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")

5072 (match_operand:V4SI 2 "altivec_register_operand" "v")]

5073 UNSPEC_VCMPNEZW))]

5074 "TARGET_P9_VECTOR"

5075 "vcmpnezw %0,%1,%2"

5076 [(set_attr "type" "vecsimple")])

5077

5078 ;; Vector Count Leading Zero Least-Significant Bits Byte

5079 (define_insn "vclzlsbb_<mode>"

5080 [(set (match_operand:SI 0 "register_operand" "=r")

5081 (unspec:SI

5082 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]

5083 UNSPEC_VCLZLSBB))]

5084 "TARGET_P9_VECTOR"

5085 "vclzlsbb %0,%1"

5086 [(set_attr "type" "vecsimple")])

5087

5088 ;; Vector Count Trailing Zero Least-Significant Bits Byte

5089 (define_insn "vctzlsbb_<mode>"

5090 [(set (match_operand:SI 0 "register_operand" "=r")

5091 (unspec:SI

5092 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]

5093 UNSPEC_VCTZLSBB))]

5094 "TARGET_P9_VECTOR"

5095 "vctzlsbb %0,%1"

5096 [(set_attr "type" "vecsimple")])

5097

5098 ;; Vector Extract Unsigned Byte Left-Indexed

5099 (define_insn "vextublx"

5100 [(set (match_operand:SI 0 "register_operand" "=r")

5101 (unspec:SI

5102 [(match_operand:SI 1 "register_operand" "r")

5103 (match_operand:V16QI 2 "altivec_register_operand" "v")]

5104 UNSPEC_VEXTUBLX))]

5105 "TARGET_P9_VECTOR"

5106 "vextublx %0,%1,%2"

5107 [(set_attr "type" "vecsimple")])

5108

5109 ;; Vector Extract Unsigned Byte Right-Indexed

5110 (define_insn "vextubrx"

5111 [(set (match_operand:SI 0 "register_operand" "=r")

5112 (unspec:SI

5113 [(match_operand:SI 1 "register_operand" "r")

5114 (match_operand:V16QI 2 "altivec_register_operand" "v")]

5115 UNSPEC_VEXTUBRX))]

5116 "TARGET_P9_VECTOR"

5117 "vextubrx %0,%1,%2"

5118 [(set_attr "type" "vecsimple")])

5119

5120 ;; Vector Extract Unsigned Half Word Left-Indexed

5121 (define_insn "vextuhlx"

5122 [(set (match_operand:SI 0 "register_operand" "=r")

5123 (unspec:SI

5124 [(match_operand:SI 1 "register_operand" "r")

5125 (match_operand:V8HI 2 "altivec_register_operand" "v")]

5126 UNSPEC_VEXTUHLX))]

5127 "TARGET_P9_VECTOR"

5128 "vextuhlx %0,%1,%2"

5129 [(set_attr "type" "vecsimple")])

5130

5131 ;; Vector Extract Unsigned Half Word Right-Indexed

5132 (define_insn "vextuhrx"

5133 [(set (match_operand:SI 0 "register_operand" "=r")

5134 (unspec:SI

5135 [(match_operand:SI 1 "register_operand" "r")

5136 (match_operand:V8HI 2 "altivec_register_operand" "v")]

5137 UNSPEC_VEXTUHRX))]

5138 "TARGET_P9_VECTOR"

5139 "vextuhrx %0,%1,%2"

5140 [(set_attr "type" "vecsimple")])

5141

5142 ;; Vector Extract Unsigned Word Left-Indexed

5143 (define_insn "vextuwlx"

5144 [(set (match_operand:SI 0 "register_operand" "=r")

5145 (unspec:SI

5146 [(match_operand:SI 1 "register_operand" "r")

5147 (match_operand:V4SI 2 "altivec_register_operand" "v")]

5148 UNSPEC_VEXTUWLX))]

5149 "TARGET_P9_VECTOR"

5150 "vextuwlx %0,%1,%2"

5151 [(set_attr "type" "vecsimple")])

5152

5153 ;; Vector Extract Unsigned Word Right-Indexed

5154 (define_insn "vextuwrx"

5155 [(set (match_operand:SI 0 "register_operand" "=r")

5156 (unspec:SI

5157 [(match_operand:SI 1 "register_operand" "r")

5158 (match_operand:V4SI 2 "altivec_register_operand" "v")]

5159 UNSPEC_VEXTUWRX))]

5160 "TARGET_P9_VECTOR"

5161 "vextuwrx %0,%1,%2"

5162 [(set_attr "type" "vecsimple")])

5163

5164 ;; Vector insert/extract word at arbitrary byte values. Note, the little

5165 ;; endian version needs to adjust the byte number, and the V4SI element in

5166 ;; vinsert4b.

5167 (define_insn "extract4b"

5168 [(set (match_operand:V2DI 0 "vsx_register_operand")

5169 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")

5170 (match_operand:QI 2 "const_0_to_12_operand" "n")]

5171 UNSPEC_XXEXTRACTUW))]

5172 "TARGET_P9_VECTOR"

5173 {

5174 if (!BYTES_BIG_ENDIAN)

5175 operands[2] = GEN_INT (12 - INTVAL (operands[2]));

5176

5177 return "xxextractuw %x0,%x1,%2";

5178 })

5179

5180 (define_expand "insert4b"

5181 [(set (match_operand:V16QI 0 "vsx_register_operand")

5182 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")

5183 (match_operand:V16QI 2 "vsx_register_operand")

5184 (match_operand:QI 3 "const_0_to_12_operand")]

5185 UNSPEC_XXINSERTW))]

5186 "TARGET_P9_VECTOR"

5187 {

5188 if (!BYTES_BIG_ENDIAN)

5189 {

5190 rtx op1 = operands[1];

5191 rtx v4si_tmp = gen_reg_rtx (V4SImode);

5192 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));

5193 operands[1] = v4si_tmp;

5194 operands[3] = GEN_INT (12 - INTVAL (operands[3]));

5195 }

5196 })

5197

5198 (define_insn "*insert4b_internal"

5199 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")

5200 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")

5201 (match_operand:V16QI 2 "vsx_register_operand" "0")

5202 (match_operand:QI 3 "const_0_to_12_operand" "n")]

5203 UNSPEC_XXINSERTW))]

5204 "TARGET_P9_VECTOR"

5205 "xxinsertw %x0,%x1,%3"

5206 [(set_attr "type" "vecperm")])

5207

5208

5209 ;; Generate vector extract four float 32 values from left four elements

5210 ;; of eight element vector of float 16 values.

5211 (define_expand "vextract_fp_from_shorth"

5212 [(set (match_operand:V4SF 0 "register_operand" "=wa")

5213 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]

5214 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]

5215 "TARGET_P9_VECTOR"

5216 {

5217 int i;

5218 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};

5219 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};

5220

5221 rtx rvals[16];

5222 rtx mask = gen_reg_rtx (V16QImode);

5223 rtx tmp = gen_reg_rtx (V16QImode);

5224 rtvec v;

5225

5226 for (i = 0; i < 16; i++)

5227 if (!BYTES_BIG_ENDIAN)

5228 rvals[i] = GEN_INT (vals_le[i]);

5229 else

5230 rvals[i] = GEN_INT (vals_be[i]);

5231

5232 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16

5233 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move

5234 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the

5235 conversion instruction. */

5236 v = gen_rtvec_v (16, rvals);

5237 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));

5238 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],

5239 operands[1], mask));

5240 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));

5241 DONE;

5242 })

5243

5244 ;; Generate vector extract four float 32 values from right four elements

5245 ;; of eight element vector of float 16 values.

5246 (define_expand "vextract_fp_from_shortl"

5247 [(set (match_operand:V4SF 0 "register_operand" "=wa")

5248 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]

5249 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]

5250 "TARGET_P9_VECTOR"

5251 {

5252 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};

5253 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};

5254

5255 int i;

5256 rtx rvals[16];

5257 rtx mask = gen_reg_rtx (V16QImode);

5258 rtx tmp = gen_reg_rtx (V16QImode);

5259 rtvec v;

5260

5261 for (i = 0; i < 16; i++)

5262 if (!BYTES_BIG_ENDIAN)

5263 rvals[i] = GEN_INT (vals_le[i]);

5264 else

5265 rvals[i] = GEN_INT (vals_be[i]);

5266

5267 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16

5268 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move

5269 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the

5270 conversion instruction. */

5271 v = gen_rtvec_v (16, rvals);

5272 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));

5273 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],

5274 operands[1], mask));

5275 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));

5276 DONE;

5277 })

5278

5279 ;; Support for ISA 3.0 vector byte reverse

5280

5281 ;; Swap all bytes with in a vector

5282 (define_insn "p9_xxbrq_v1ti"

5283 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")

5284 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]

5285 "TARGET_P9_VECTOR"

5286 "xxbrq %x0,%x1"

5287 [(set_attr "type" "vecperm")])

5288

5289 (define_expand "p9_xxbrq_v16qi"

5290 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))

5291 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]

5292 "TARGET_P9_VECTOR"

5293 {

5294 rtx op0 = gen_reg_rtx (V1TImode);

5295 rtx op1 = gen_lowpart (V1TImode, operands[1]);

5296 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));

5297 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));

5298 DONE;

5299 })

5300

5301 ;; Swap all bytes in each 64-bit element

5302 (define_insn "p9_xxbrd_v2di"

5303 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")

5304 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]

5305 "TARGET_P9_VECTOR"

5306 "xxbrd %x0,%x1"

5307 [(set_attr "type" "vecperm")])

5308

5309 (define_expand "p9_xxbrd_v2df"

5310 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))

5311 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]

5312 "TARGET_P9_VECTOR"

5313 {

5314 rtx op0 = gen_reg_rtx (V2DImode);

5315 rtx op1 = gen_lowpart (V2DImode, operands[1]);

5316 emit_insn (gen_p9_xxbrd_v2di (op0, op1));

5317 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));

5318 DONE;

5319 })

5320

5321 ;; Swap all bytes in each 32-bit element

5322 (define_insn "p9_xxbrw_v4si"

5323 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")

5324 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]

5325 "TARGET_P9_VECTOR"

5326 "xxbrw %x0,%x1"

5327 [(set_attr "type" "vecperm")])

5328

5329 (define_expand "p9_xxbrw_v4sf"

5330 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))

5331 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]

5332 "TARGET_P9_VECTOR"

5333 {

5334 rtx op0 = gen_reg_rtx (V4SImode);

5335 rtx op1 = gen_lowpart (V4SImode, operands[1]);

5336 emit_insn (gen_p9_xxbrw_v4si (op0, op1));

5337 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));

5338 DONE;

5339 })

5340

5341 ;; Swap all bytes in each element of vector

5342 (define_expand "revb_<mode>"

5343 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))

5344 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]

5345 ""

5346 {

5347 if (TARGET_P9_VECTOR)

5348 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));

5349 else

5350 {

5351 /* Want to have the elements in reverse order relative

5352 to the endian mode in use, i.e. in LE mode, put elements

5353 in BE order. */

5354 rtx sel = swap_endian_selector_for_mode(<MODE>mode);

5355 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],

5356 operands[1], sel));

5357 }

5358

5359 DONE;

5360 })

5361

5362 ;; Reversing bytes in vector char is just a NOP.

5363 (define_expand "revb_v16qi"

5364 [(set (match_operand:V16QI 0 "vsx_register_operand")

5365 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]

5366 ""

5367 {

5368 emit_move_insn (operands[0], operands[1]);

5369 DONE;

5370 })

5371

5372 ;; Swap all bytes in each 16-bit element

5373 (define_insn "p9_xxbrh_v8hi"

5374 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")

5375 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]

5376 "TARGET_P9_VECTOR"

5377 "xxbrh %x0,%x1"

5378 [(set_attr "type" "vecperm")])

5379 \f

5380

5381 ;; Operand numbers for the following peephole2

5382 (define_constants

5383 [(SFBOOL_TMP_GPR 0) ;; GPR temporary

5384 (SFBOOL_TMP_VSX 1) ;; vector temporary

5385 (SFBOOL_MFVSR_D 2) ;; move to gpr dest

5386 (SFBOOL_MFVSR_A 3) ;; move to gpr src

5387 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest

5388 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1

5389 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1

5390 (SFBOOL_SHL_D 7) ;; shift left dest

5391 (SFBOOL_SHL_A 8) ;; shift left arg

5392 (SFBOOL_MTVSR_D 9) ;; move to vecter dest

5393 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode

5394 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode

5395 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode

5396 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode

5397

5398 ;; Attempt to optimize some common GLIBC operations using logical operations to

5399 ;; pick apart SFmode operations. For example, there is code from e_powf.c

5400 ;; after macro expansion that looks like:

5401 ;;

5402 ;; typedef union {

5403 ;; float value;

5404 ;; uint32_t word;

5405 ;; } ieee_float_shape_type;

5406 ;;

5407 ;; float t1;

5408 ;; int32_t is;

5409 ;;

5410 ;; do {

5411 ;; ieee_float_shape_type gf_u;

5412 ;; gf_u.value = (t1);

5413 ;; (is) = gf_u.word;

5414 ;; } while (0);

5415 ;;

5416 ;; do {

5417 ;; ieee_float_shape_type sf_u;

5418 ;; sf_u.word = (is & 0xfffff000);

5419 ;; (t1) = sf_u.value;

5420 ;; } while (0);

5421 ;;

5422 ;;

5423 ;; This would result in two direct move operations (convert to memory format,

5424 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to

5425 ;; scalar format). With this peephole, we eliminate the direct move to the

5426 ;; GPR, and instead move the integer mask value to the vector register after a

5427 ;; shift and do the VSX logical operation.

5428

5429 ;; The insns for dealing with SFmode in GPR registers looks like:

5430 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))

5431 ;;

5432 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))

5433 ;;

5434 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))

5435 ;;

5436 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))

5437 ;;

5438 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))

5439 ;;

5440 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))

5441

5442 (define_peephole2

5443 [(match_scratch:DI SFBOOL_TMP_GPR "r")

5444 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")

5445

5446 ;; MFVSRWZ (aka zero_extend)

5447 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")

5448 (zero_extend:DI

5449 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))

5450

5451 ;; AND/IOR/XOR operation on int

5452 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")

5453 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")

5454 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))

5455

5456 ;; SLDI

5457 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")

5458 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")

5459 (const_int 32)))

5460

5461 ;; MTVSRD

5462 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")

5463 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]

5464

5465 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE

5466 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO

5467 to compare registers, when the mode is different. */

5468 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])

5469 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])

5470 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])

5471 && (REG_P (operands[SFBOOL_BOOL_A2])

5472 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))

5473 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])

5474 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))

5475 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])

5476 || (REG_P (operands[SFBOOL_BOOL_A2])

5477 && REGNO (operands[SFBOOL_MFVSR_D])

5478 == REGNO (operands[SFBOOL_BOOL_A2])))

5479 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])

5480 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])

5481 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))

5482 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"

5483 [(set (match_dup SFBOOL_TMP_GPR)

5484 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)

5485 (const_int 32)))

5486

5487 (set (match_dup SFBOOL_TMP_VSX_DI)

5488 (match_dup SFBOOL_TMP_GPR))

5489

5490 (set (match_dup SFBOOL_MTVSR_D_V4SF)

5491 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)

5492 (match_dup SFBOOL_TMP_VSX)))]

5493 {

5494 rtx bool_a1 = operands[SFBOOL_BOOL_A1];

5495 rtx bool_a2 = operands[SFBOOL_BOOL_A2];

5496 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);

5497 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);

5498 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);

5499 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);

5500

5501 if (CONST_INT_P (bool_a2))

5502 {

5503 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];

5504 emit_move_insn (tmp_gpr, bool_a2);

5505 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;

5506 }

5507 else

5508 {

5509 int regno_bool_a1 = REGNO (bool_a1);

5510 int regno_bool_a2 = REGNO (bool_a2);

5511 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1

5512 ? regno_bool_a2 : regno_bool_a1);

5513 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);

5514 }

5515

5516 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);

5517 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);

5518 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);

5519 })