1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "R600ISelLowering.h"
16 #include "R600Defines.h"
17 #include "R600InstrInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/Argument.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
26 R600TargetLowering::R600TargetLowering(TargetMachine
&TM
) :
27 AMDGPUTargetLowering(TM
),
28 TII(static_cast<const R600InstrInfo
*>(TM
.getInstrInfo()))
30 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
31 addRegisterClass(MVT::v4f32
, &AMDGPU::R600_Reg128RegClass
);
32 addRegisterClass(MVT::f32
, &AMDGPU::R600_Reg32RegClass
);
33 addRegisterClass(MVT::v4i32
, &AMDGPU::R600_Reg128RegClass
);
34 addRegisterClass(MVT::i32
, &AMDGPU::R600_Reg32RegClass
);
35 computeRegisterProperties();
37 setOperationAction(ISD::FADD
, MVT::v4f32
, Expand
);
38 setOperationAction(ISD::FMUL
, MVT::v4f32
, Expand
);
40 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
41 setOperationAction(ISD::BR_CC
, MVT::f32
, Custom
);
43 setOperationAction(ISD::FSUB
, MVT::f32
, Expand
);
45 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
46 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
47 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::i1
, Custom
);
49 setOperationAction(ISD::ROTL
, MVT::i32
, Custom
);
51 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
52 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
54 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
55 setOperationAction(ISD::SETCC
, MVT::f32
, Custom
);
56 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Custom
);
58 setTargetDAGCombine(ISD::FP_ROUND
);
60 setSchedulingPreference(Sched::VLIW
);
63 MachineBasicBlock
* R600TargetLowering::EmitInstrWithCustomInserter(
64 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
66 MachineFunction
* MF
= BB
->getParent();
67 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
68 MachineBasicBlock::iterator I
= *MI
;
70 switch (MI
->getOpcode()) {
71 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
72 case AMDGPU::SHADER_TYPE
: break;
73 case AMDGPU::CLAMP_R600
:
76 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
77 .addOperand(MI
->getOperand(0))
78 .addOperand(MI
->getOperand(1))
80 .addReg(AMDGPU::PRED_SEL_OFF
);
81 TII
->addFlag(NewMI
, 0, MO_FLAG_CLAMP
);
84 case AMDGPU::FABS_R600
:
87 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
88 .addOperand(MI
->getOperand(0))
89 .addOperand(MI
->getOperand(1))
91 .addReg(AMDGPU::PRED_SEL_OFF
);
92 TII
->addFlag(NewMI
, 1, MO_FLAG_ABS
);
96 case AMDGPU::FNEG_R600
:
99 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
100 .addOperand(MI
->getOperand(0))
101 .addOperand(MI
->getOperand(1))
103 .addReg(AMDGPU::PRED_SEL_OFF
);
104 TII
->addFlag(NewMI
, 1, MO_FLAG_NEG
);
108 case AMDGPU::R600_LOAD_CONST
:
110 int64_t RegIndex
= MI
->getOperand(1).getImm();
111 unsigned ConstantReg
= AMDGPU::R600_CReg32RegClass
.getRegister(RegIndex
);
112 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::COPY
))
113 .addOperand(MI
->getOperand(0))
114 .addReg(ConstantReg
);
118 case AMDGPU::MASK_WRITE
:
120 unsigned maskedRegister
= MI
->getOperand(0).getReg();
121 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister
));
122 MachineInstr
* defInstr
= MRI
.getVRegDef(maskedRegister
);
123 TII
->addFlag(defInstr
, 0, MO_FLAG_MASK
);
124 // Return early so the instruction is not erased
128 case AMDGPU::RAT_WRITE_CACHELESS_32_eg
:
129 case AMDGPU::RAT_WRITE_CACHELESS_128_eg
:
131 // Convert to DWORD address
132 unsigned NewAddr
= MRI
.createVirtualRegister(
133 &AMDGPU::R600_TReg32_XRegClass
);
134 unsigned ShiftValue
= MRI
.createVirtualRegister(
135 &AMDGPU::R600_TReg32RegClass
);
136 unsigned EOP
= (llvm::next(I
)->getOpcode() == AMDGPU::RETURN
) ? 1 : 0;
138 // XXX In theory, we should be able to pass ShiftValue directly to
139 // the LSHR_eg instruction as an inline literal, but I tried doing it
140 // this way and it didn't produce the correct results.
141 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV_IMM_I32
),
143 .addReg(AMDGPU::ALU_LITERAL_X
)
144 .addReg(AMDGPU::PRED_SEL_OFF
)
146 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::LSHR_eg
), NewAddr
)
147 .addOperand(MI
->getOperand(1))
149 .addReg(AMDGPU::PRED_SEL_OFF
);
150 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
->getOpcode()))
151 .addOperand(MI
->getOperand(0))
153 .addImm(EOP
); // Set End of program bit
157 case AMDGPU::RESERVE_REG
:
159 R600MachineFunctionInfo
* MFI
= MF
->getInfo
<R600MachineFunctionInfo
>();
160 int64_t ReservedIndex
= MI
->getOperand(0).getImm();
161 unsigned ReservedReg
=
162 AMDGPU::R600_TReg32RegClass
.getRegister(ReservedIndex
);
163 MFI
->ReservedRegs
.push_back(ReservedReg
);
169 unsigned t0
= MRI
.createVirtualRegister(&AMDGPU::R600_Reg128RegClass
);
170 unsigned t1
= MRI
.createVirtualRegister(&AMDGPU::R600_Reg128RegClass
);
172 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
173 .addOperand(MI
->getOperand(3))
174 .addOperand(MI
->getOperand(4))
175 .addOperand(MI
->getOperand(5));
176 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
177 .addOperand(MI
->getOperand(2))
178 .addOperand(MI
->getOperand(4))
179 .addOperand(MI
->getOperand(5));
180 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_G
))
181 .addOperand(MI
->getOperand(0))
182 .addOperand(MI
->getOperand(1))
183 .addOperand(MI
->getOperand(4))
184 .addOperand(MI
->getOperand(5))
185 .addReg(t0
, RegState::Implicit
)
186 .addReg(t1
, RegState::Implicit
);
189 case AMDGPU::TXD_SHADOW
:
191 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
192 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
194 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
195 .addOperand(MI
->getOperand(3))
196 .addOperand(MI
->getOperand(4))
197 .addOperand(MI
->getOperand(5));
198 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
199 .addOperand(MI
->getOperand(2))
200 .addOperand(MI
->getOperand(4))
201 .addOperand(MI
->getOperand(5));
202 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_C_G
))
203 .addOperand(MI
->getOperand(0))
204 .addOperand(MI
->getOperand(1))
205 .addOperand(MI
->getOperand(4))
206 .addOperand(MI
->getOperand(5))
207 .addReg(t0
, RegState::Implicit
)
208 .addReg(t1
, RegState::Implicit
);
212 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
213 .addOperand(MI
->getOperand(0))
216 case AMDGPU::BRANCH_COND_f32
:
218 MachineInstr
*NewMI
=
219 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
))
220 .addReg(AMDGPU::PREDICATE_BIT
)
221 .addOperand(MI
->getOperand(1))
222 .addImm(OPCODE_IS_NOT_ZERO
)
224 TII
->addFlag(NewMI
, 1, MO_FLAG_PUSH
);
225 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
226 .addOperand(MI
->getOperand(0))
227 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
230 case AMDGPU::BRANCH_COND_i32
:
232 MachineInstr
*NewMI
=
233 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
))
234 .addReg(AMDGPU::PREDICATE_BIT
)
235 .addOperand(MI
->getOperand(1))
236 .addImm(OPCODE_IS_NOT_ZERO_INT
)
238 TII
->addFlag(NewMI
, 1, MO_FLAG_PUSH
);
239 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
240 .addOperand(MI
->getOperand(0))
241 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
244 case AMDGPU::input_perspective
:
246 R600MachineFunctionInfo
*MFI
= MF
->getInfo
<R600MachineFunctionInfo
>();
248 // XXX Be more fine about register reservation
249 for (unsigned i
= 0; i
< 4; i
++) {
250 unsigned ReservedReg
= AMDGPU::R600_TReg32RegClass
.getRegister(i
);
251 MFI
->ReservedRegs
.push_back(ReservedReg
);
254 switch (MI
->getOperand(1).getImm()) {
255 case 0:// Perspective
256 MFI
->HasPerspectiveInterpolation
= true;
259 MFI
->HasLinearInterpolation
= true;
262 assert(0 && "Unknow ij index");
269 MI
->eraseFromParent();
273 //===----------------------------------------------------------------------===//
274 // Custom DAG Lowering Operations
275 //===----------------------------------------------------------------------===//
277 using namespace llvm::Intrinsic
;
278 using namespace llvm::AMDGPUIntrinsic
;
280 SDValue
R600TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
282 switch (Op
.getOpcode()) {
283 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
284 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
285 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
286 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
287 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
288 case ISD::INTRINSIC_VOID
: {
289 SDValue Chain
= Op
.getOperand(0);
290 unsigned IntrinsicID
=
291 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
292 switch (IntrinsicID
) {
293 case AMDGPUIntrinsic::AMDGPU_store_output
: {
294 MachineFunction
&MF
= DAG
.getMachineFunction();
295 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
296 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
297 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
298 if (!MRI
.isLiveOut(Reg
)) {
301 return DAG
.getCopyToReg(Chain
, Op
.getDebugLoc(), Reg
, Op
.getOperand(2));
303 // default for switch(IntrinsicID)
306 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
309 case ISD::INTRINSIC_WO_CHAIN
: {
310 unsigned IntrinsicID
=
311 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
312 EVT VT
= Op
.getValueType();
313 DebugLoc DL
= Op
.getDebugLoc();
314 switch(IntrinsicID
) {
315 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
316 case AMDGPUIntrinsic::R600_load_input
: {
317 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
318 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
319 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
, Reg
, VT
);
321 case AMDGPUIntrinsic::R600_load_input_perspective
: {
322 unsigned slot
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
323 SDValue FullVector
= DAG
.getNode(
326 DAG
.getConstant(0, MVT::i32
), DAG
.getConstant(slot
/ 4 , MVT::i32
));
327 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
328 DL
, VT
, FullVector
, DAG
.getConstant(slot
% 4, MVT::i32
));
330 case AMDGPUIntrinsic::R600_load_input_linear
: {
331 unsigned slot
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
332 SDValue FullVector
= DAG
.getNode(
335 DAG
.getConstant(1, MVT::i32
), DAG
.getConstant(slot
/ 4 , MVT::i32
));
336 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
337 DL
, VT
, FullVector
, DAG
.getConstant(slot
% 4, MVT::i32
));
339 case AMDGPUIntrinsic::R600_load_input_constant
: {
340 unsigned slot
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
341 SDValue FullVector
= DAG
.getNode(
342 AMDGPUISD::INTERP_P0
,
344 DAG
.getConstant(slot
/ 4 , MVT::i32
));
345 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
,
346 DL
, VT
, FullVector
, DAG
.getConstant(slot
% 4, MVT::i32
));
348 case AMDGPUIntrinsic::R600_load_input_position
: {
349 unsigned slot
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
350 unsigned RegIndex
= AMDGPU::R600_TReg32RegClass
.getRegister(slot
);
351 SDValue Reg
= CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
353 if ((slot
% 4) == 3) {
354 return DAG
.getNode(ISD::FDIV
,
356 DAG
.getConstantFP(1.0f
, MVT::f32
),
363 case r600_read_ngroups_x
:
364 return LowerImplicitParameter(DAG
, VT
, DL
, 0);
365 case r600_read_ngroups_y
:
366 return LowerImplicitParameter(DAG
, VT
, DL
, 1);
367 case r600_read_ngroups_z
:
368 return LowerImplicitParameter(DAG
, VT
, DL
, 2);
369 case r600_read_global_size_x
:
370 return LowerImplicitParameter(DAG
, VT
, DL
, 3);
371 case r600_read_global_size_y
:
372 return LowerImplicitParameter(DAG
, VT
, DL
, 4);
373 case r600_read_global_size_z
:
374 return LowerImplicitParameter(DAG
, VT
, DL
, 5);
375 case r600_read_local_size_x
:
376 return LowerImplicitParameter(DAG
, VT
, DL
, 6);
377 case r600_read_local_size_y
:
378 return LowerImplicitParameter(DAG
, VT
, DL
, 7);
379 case r600_read_local_size_z
:
380 return LowerImplicitParameter(DAG
, VT
, DL
, 8);
382 case r600_read_tgid_x
:
383 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
385 case r600_read_tgid_y
:
386 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
388 case r600_read_tgid_z
:
389 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
391 case r600_read_tidig_x
:
392 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
394 case r600_read_tidig_y
:
395 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
397 case r600_read_tidig_z
:
398 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
401 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
404 } // end switch(Op.getOpcode())
408 void R600TargetLowering::ReplaceNodeResults(SDNode
*N
,
409 SmallVectorImpl
<SDValue
> &Results
,
410 SelectionDAG
&DAG
) const
412 switch (N
->getOpcode()) {
414 case ISD::FP_TO_UINT
: Results
.push_back(LowerFPTOUINT(N
->getOperand(0), DAG
));
415 case ISD::INTRINSIC_WO_CHAIN
:
417 unsigned IntrinsicID
=
418 cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
419 if (IntrinsicID
== AMDGPUIntrinsic::R600_load_input_face
) {
420 Results
.push_back(LowerInputFace(N
, DAG
));
428 SDValue
R600TargetLowering::LowerInputFace(SDNode
* Op
, SelectionDAG
&DAG
) const
430 unsigned slot
= cast
<ConstantSDNode
>(Op
->getOperand(1))->getZExtValue();
431 unsigned RegIndex
= AMDGPU::R600_TReg32RegClass
.getRegister(slot
);
432 SDValue Reg
= CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
434 return DAG
.getNode(ISD::SETCC
, Op
->getDebugLoc(), MVT::i1
,
435 Reg
, DAG
.getConstantFP(0.0f
, MVT::f32
),
436 DAG
.getCondCode(ISD::SETUGT
));
439 SDValue
R600TargetLowering::LowerFPTOUINT(SDValue Op
, SelectionDAG
&DAG
) const
445 Op
, DAG
.getConstantFP(0.0f
, MVT::f32
),
446 DAG
.getCondCode(ISD::SETNE
)
450 SDValue
R600TargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
452 SDValue Chain
= Op
.getOperand(0);
453 SDValue CC
= Op
.getOperand(1);
454 SDValue LHS
= Op
.getOperand(2);
455 SDValue RHS
= Op
.getOperand(3);
456 SDValue JumpT
= Op
.getOperand(4);
460 if (LHS
.getValueType() == MVT::i32
) {
461 CmpValue
= DAG
.getNode(
466 DAG
.getConstant(-1, MVT::i32
),
467 DAG
.getConstant(0, MVT::i32
),
469 } else if (LHS
.getValueType() == MVT::f32
) {
470 CmpValue
= DAG
.getNode(
475 DAG
.getConstantFP(1.0f
, MVT::f32
),
476 DAG
.getConstantFP(0.0f
, MVT::f32
),
479 assert(0 && "Not valid type for br_cc");
481 Result
= DAG
.getNode(
482 AMDGPUISD::BRANCH_COND
,
483 CmpValue
.getDebugLoc(),
489 SDValue
R600TargetLowering::LowerImplicitParameter(SelectionDAG
&DAG
, EVT VT
,
491 unsigned DwordOffset
) const
493 unsigned ByteOffset
= DwordOffset
* 4;
494 PointerType
* PtrType
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
495 AMDGPUAS::PARAM_I_ADDRESS
);
497 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
498 assert(isInt
<16>(ByteOffset
));
500 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
501 DAG
.getConstant(ByteOffset
, MVT::i32
), // PTR
502 MachinePointerInfo(ConstantPointerNull::get(PtrType
)),
503 false, false, false, 0);
506 SDValue
R600TargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const
508 DebugLoc DL
= Op
.getDebugLoc();
509 EVT VT
= Op
.getValueType();
511 return DAG
.getNode(AMDGPUISD::BITALIGN
, DL
, VT
,
514 DAG
.getNode(ISD::SUB
, DL
, VT
,
515 DAG
.getConstant(32, MVT::i32
),
519 bool R600TargetLowering::isZero(SDValue Op
) const
521 if(ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(Op
)) {
522 return Cst
->isNullValue();
523 } else if(ConstantFPSDNode
*CstFP
= dyn_cast
<ConstantFPSDNode
>(Op
)){
524 return CstFP
->isZero();
530 SDValue
R600TargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
532 DebugLoc DL
= Op
.getDebugLoc();
533 EVT VT
= Op
.getValueType();
535 SDValue LHS
= Op
.getOperand(0);
536 SDValue RHS
= Op
.getOperand(1);
537 SDValue True
= Op
.getOperand(2);
538 SDValue False
= Op
.getOperand(3);
539 SDValue CC
= Op
.getOperand(4);
542 // LHS and RHS are guaranteed to be the same value type
543 EVT CompareVT
= LHS
.getValueType();
545 // We need all the operands of SELECT_CC to have the same value type, so if
546 // necessary we need to convert LHS and RHS to be the same type True and
547 // False. True and False are guaranteed to have the same type as this
550 if (isHWTrueValue(True
) && isHWFalseValue(False
)) {
551 if (CompareVT
!= VT
) {
552 if (VT
== MVT::f32
&& CompareVT
== MVT::i32
) {
553 SDValue Boolean
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
,
555 DAG
.getConstant(-1, MVT::i32
),
556 DAG
.getConstant(0, MVT::i32
),
558 return DAG
.getNode(ISD::UINT_TO_FP
, DL
, VT
, Boolean
);
559 } else if (VT
== MVT::i32
&& CompareVT
== MVT::f32
) {
560 SDValue BoolAsFlt
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
,
562 DAG
.getConstantFP(1.0f
, MVT::f32
),
563 DAG
.getConstantFP(0.0f
, MVT::f32
),
565 return DAG
.getNode(ISD::FP_TO_UINT
, DL
, VT
, BoolAsFlt
);
567 // I don't think there will be any other type pairings.
568 assert(!"Unhandled operand type parings in SELECT_CC");
571 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
576 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
577 // we can handle this with a native instruction, but we need to swap true
578 // and false and change the conditional.
579 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
582 // Check if we can lower this to a native operation.
583 // CND* instructions requires all operands to have the same type,
584 // and RHS to be zero.
586 if (isZero(LHS
) || isZero(RHS
)) {
587 SDValue Cond
= (isZero(LHS
) ? RHS
: LHS
);
588 SDValue Zero
= (isZero(LHS
) ? LHS
: RHS
);
589 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
590 if (CompareVT
!= VT
) {
591 True
= DAG
.getNode(ISD::BITCAST
, DL
, CompareVT
, True
);
592 False
= DAG
.getNode(ISD::BITCAST
, DL
, CompareVT
, False
);
595 CCOpcode
= ISD::getSetCCSwappedOperands(CCOpcode
);
608 CCOpcode
= ISD::getSetCCInverse(CCOpcode
, CompareVT
== MVT::i32
);
616 SDValue SelectNode
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
,
619 DAG
.getCondCode(CCOpcode
));
620 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, SelectNode
);
624 // If we make it this for it means we have no native instructions to handle
625 // this SELECT_CC, so we must lower it.
626 SDValue HWTrue
, HWFalse
;
628 if (CompareVT
== MVT::f32
) {
629 HWTrue
= DAG
.getConstantFP(1.0f
, CompareVT
);
630 HWFalse
= DAG
.getConstantFP(0.0f
, CompareVT
);
631 } else if (CompareVT
== MVT::i32
) {
632 HWTrue
= DAG
.getConstant(-1, CompareVT
);
633 HWFalse
= DAG
.getConstant(0, CompareVT
);
636 assert(!"Unhandled value type in LowerSELECT_CC");
639 // Lower this unsupported SELECT_CC into a combination of two supported
640 // SELECT_CC operations.
641 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
643 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
,
646 DAG
.getCondCode(ISD::SETNE
));
649 SDValue
R600TargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
652 SDValue LHS
= Op
.getOperand(0);
653 SDValue RHS
= Op
.getOperand(1);
654 SDValue CC
= Op
.getOperand(2);
655 DebugLoc DL
= Op
.getDebugLoc();
656 assert(Op
.getValueType() == MVT::i32
);
657 if (LHS
.getValueType() == MVT::i32
) {
663 DAG
.getConstant(-1, MVT::i32
),
664 DAG
.getConstant(0, MVT::i32
),
666 } else if (LHS
.getValueType() == MVT::f32
) {
672 DAG
.getConstantFP(1.0f
, MVT::f32
),
673 DAG
.getConstantFP(0.0f
, MVT::f32
),
681 assert(0 && "Not valid type for set_cc");
687 DAG
.getConstant(1, MVT::i32
),
692 // XXX Only kernel functions are supporte, so we can assume for now that
693 // every function is a kernel function, but in the future we should use
694 // separate calling conventions for kernel and non-kernel functions.
695 // Only kernel functions are supported, so we can assume for now
696 SDValue
R600TargetLowering::LowerFormalArguments(
698 CallingConv::ID CallConv
,
700 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
701 DebugLoc DL
, SelectionDAG
&DAG
,
702 SmallVectorImpl
<SDValue
> &InVals
) const
704 unsigned ParamOffsetBytes
= 36;
705 for (unsigned i
= 0, e
= Ins
.size(); i
< e
; ++i
) {
707 PointerType
*PtrTy
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
708 AMDGPUAS::PARAM_I_ADDRESS
);
709 SDValue Arg
= DAG
.getLoad(VT
, DL
, DAG
.getRoot(),
710 DAG
.getConstant(ParamOffsetBytes
, MVT::i32
),
711 MachinePointerInfo(new Argument(PtrTy
)),
712 false, false, false, 4);
713 InVals
.push_back(Arg
);
714 ParamOffsetBytes
+= (VT
.getStoreSize());
719 //===----------------------------------------------------------------------===//
720 // Custom DAG Optimizations
721 //===----------------------------------------------------------------------===//
723 SDValue
R600TargetLowering::PerformDAGCombine(SDNode
*N
,
724 DAGCombinerInfo
&DCI
) const
726 SelectionDAG
&DAG
= DCI
.DAG
;
728 switch (N
->getOpcode()) {
729 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
730 case ISD::FP_ROUND
: {
731 SDValue Arg
= N
->getOperand(0);
732 if (Arg
.getOpcode() == ISD::UINT_TO_FP
&& Arg
.getValueType() == MVT::f64
) {
733 return DAG
.getNode(ISD::UINT_TO_FP
, N
->getDebugLoc(), N
->getValueType(0),