1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "R600ISelLowering.h"
16 #include "R600InstrInfo.h"
17 #include "R600MachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
24 R600TargetLowering::R600TargetLowering(TargetMachine
&TM
) :
25 AMDGPUTargetLowering(TM
),
26 TII(static_cast<const R600InstrInfo
*>(TM
.getInstrInfo()))
28 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
29 addRegisterClass(MVT::v4f32
, &AMDGPU::R600_Reg128RegClass
);
30 addRegisterClass(MVT::f32
, &AMDGPU::R600_Reg32RegClass
);
31 addRegisterClass(MVT::v4i32
, &AMDGPU::R600_Reg128RegClass
);
32 addRegisterClass(MVT::i32
, &AMDGPU::R600_Reg32RegClass
);
33 computeRegisterProperties();
35 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
37 setOperationAction(ISD::FSUB
, MVT::f32
, Expand
);
39 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
40 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
42 setOperationAction(ISD::ROTL
, MVT::i32
, Custom
);
44 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
45 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
47 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
49 setSchedulingPreference(Sched::VLIW
);
52 MachineBasicBlock
* R600TargetLowering::EmitInstrWithCustomInserter(
53 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
55 MachineFunction
* MF
= BB
->getParent();
56 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
57 MachineBasicBlock::iterator I
= *MI
;
59 switch (MI
->getOpcode()) {
60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
61 case AMDGPU::CLAMP_R600
:
64 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
65 .addOperand(MI
->getOperand(0))
66 .addOperand(MI
->getOperand(1))
68 .addReg(AMDGPU::PRED_SEL_OFF
);
69 TII
->AddFlag(NewMI
, 0, MO_FLAG_CLAMP
);
72 case AMDGPU::FABS_R600
:
75 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
76 .addOperand(MI
->getOperand(0))
77 .addOperand(MI
->getOperand(1))
79 .addReg(AMDGPU::PRED_SEL_OFF
);
80 TII
->AddFlag(NewMI
, 1, MO_FLAG_ABS
);
84 case AMDGPU::FNEG_R600
:
87 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
88 .addOperand(MI
->getOperand(0))
89 .addOperand(MI
->getOperand(1))
91 .addReg(AMDGPU::PRED_SEL_OFF
);
92 TII
->AddFlag(NewMI
, 1, MO_FLAG_NEG
);
96 case AMDGPU::R600_LOAD_CONST
:
98 int64_t RegIndex
= MI
->getOperand(1).getImm();
99 unsigned ConstantReg
= AMDGPU::R600_CReg32RegClass
.getRegister(RegIndex
);
100 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::COPY
))
101 .addOperand(MI
->getOperand(0))
102 .addReg(ConstantReg
);
106 case AMDGPU::MASK_WRITE
:
108 unsigned maskedRegister
= MI
->getOperand(0).getReg();
109 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister
));
110 MachineInstr
* defInstr
= MRI
.getVRegDef(maskedRegister
);
111 TII
->AddFlag(defInstr
, 0, MO_FLAG_MASK
);
112 // Return early so the instruction is not erased
116 case AMDGPU::RAT_WRITE_CACHELESS_eg
:
118 // Convert to DWORD address
119 unsigned NewAddr
= MRI
.createVirtualRegister(
120 AMDGPU::R600_TReg32_XRegisterClass
);
121 unsigned ShiftValue
= MRI
.createVirtualRegister(
122 AMDGPU::R600_TReg32RegisterClass
);
124 // XXX In theory, we should be able to pass ShiftValue directly to
125 // the LSHR_eg instruction as an inline literal, but I tried doing it
126 // this way and it didn't produce the correct results.
127 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
), ShiftValue
)
128 .addReg(AMDGPU::ALU_LITERAL_X
)
129 .addReg(AMDGPU::PRED_SEL_OFF
)
131 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::LSHR_eg
), NewAddr
)
132 .addOperand(MI
->getOperand(1))
134 .addReg(AMDGPU::PRED_SEL_OFF
);
135 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
->getOpcode()))
136 .addOperand(MI
->getOperand(0))
141 case AMDGPU::RESERVE_REG
:
143 R600MachineFunctionInfo
* MFI
= MF
->getInfo
<R600MachineFunctionInfo
>();
144 int64_t ReservedIndex
= MI
->getOperand(0).getImm();
145 unsigned ReservedReg
=
146 AMDGPU::R600_TReg32RegClass
.getRegister(ReservedIndex
);
147 MFI
->ReservedRegs
.push_back(ReservedReg
);
153 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
154 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
156 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
157 .addOperand(MI
->getOperand(3))
158 .addOperand(MI
->getOperand(4))
159 .addOperand(MI
->getOperand(5));
160 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
161 .addOperand(MI
->getOperand(2))
162 .addOperand(MI
->getOperand(4))
163 .addOperand(MI
->getOperand(5));
164 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_G
))
165 .addOperand(MI
->getOperand(0))
166 .addOperand(MI
->getOperand(1))
167 .addOperand(MI
->getOperand(4))
168 .addOperand(MI
->getOperand(5))
169 .addReg(t0
, RegState::Implicit
)
170 .addReg(t1
, RegState::Implicit
);
173 case AMDGPU::TXD_SHADOW
:
175 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
176 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
178 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
179 .addOperand(MI
->getOperand(3))
180 .addOperand(MI
->getOperand(4))
181 .addOperand(MI
->getOperand(5));
182 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
183 .addOperand(MI
->getOperand(2))
184 .addOperand(MI
->getOperand(4))
185 .addOperand(MI
->getOperand(5));
186 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_C_G
))
187 .addOperand(MI
->getOperand(0))
188 .addOperand(MI
->getOperand(1))
189 .addOperand(MI
->getOperand(4))
190 .addOperand(MI
->getOperand(5))
191 .addReg(t0
, RegState::Implicit
)
192 .addReg(t1
, RegState::Implicit
);
196 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
197 .addOperand(MI
->getOperand(0))
200 case AMDGPU::BRANCH_COND_f32
:
202 MachineInstr
*NewMI
=
203 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
))
204 .addReg(AMDGPU::PREDICATE_BIT
)
205 .addOperand(MI
->getOperand(1))
206 .addImm(OPCODE_IS_ZERO
)
208 TII
->AddFlag(NewMI
, 1, MO_FLAG_PUSH
);
209 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
210 .addOperand(MI
->getOperand(0))
211 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
214 case AMDGPU::BRANCH_COND_i32
:
216 MachineInstr
*NewMI
=
217 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
))
218 .addReg(AMDGPU::PREDICATE_BIT
)
219 .addOperand(MI
->getOperand(1))
220 .addImm(OPCODE_IS_ZERO_INT
)
222 TII
->AddFlag(NewMI
, 1, MO_FLAG_PUSH
);
223 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
224 .addOperand(MI
->getOperand(0))
225 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
230 MI
->eraseFromParent();
234 //===----------------------------------------------------------------------===//
235 // Custom DAG Lowering Operations
236 //===----------------------------------------------------------------------===//
238 using namespace llvm::Intrinsic
;
239 using namespace llvm::AMDGPUIntrinsic
;
241 SDValue
R600TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
243 switch (Op
.getOpcode()) {
244 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
245 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
246 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
247 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
248 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
249 case ISD::INTRINSIC_VOID
: {
250 SDValue Chain
= Op
.getOperand(0);
251 unsigned IntrinsicID
=
252 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
253 switch (IntrinsicID
) {
254 case AMDGPUIntrinsic::AMDGPU_store_output
: {
255 MachineFunction
&MF
= DAG
.getMachineFunction();
256 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
257 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
258 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
259 if (!MRI
.isLiveOut(Reg
)) {
262 return DAG
.getCopyToReg(Chain
, Op
.getDebugLoc(), Reg
, Op
.getOperand(2));
264 // default for switch(IntrinsicID)
267 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
270 case ISD::INTRINSIC_WO_CHAIN
: {
271 unsigned IntrinsicID
=
272 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
273 EVT VT
= Op
.getValueType();
274 DebugLoc DL
= Op
.getDebugLoc();
275 switch(IntrinsicID
) {
276 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
277 case AMDGPUIntrinsic::R600_load_input
: {
278 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
279 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
280 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
, Reg
, VT
);
283 case r600_read_ngroups_x
:
284 return LowerImplicitParameter(DAG
, VT
, DL
, 0);
285 case r600_read_ngroups_y
:
286 return LowerImplicitParameter(DAG
, VT
, DL
, 1);
287 case r600_read_ngroups_z
:
288 return LowerImplicitParameter(DAG
, VT
, DL
, 2);
289 case r600_read_global_size_x
:
290 return LowerImplicitParameter(DAG
, VT
, DL
, 3);
291 case r600_read_global_size_y
:
292 return LowerImplicitParameter(DAG
, VT
, DL
, 4);
293 case r600_read_global_size_z
:
294 return LowerImplicitParameter(DAG
, VT
, DL
, 5);
295 case r600_read_local_size_x
:
296 return LowerImplicitParameter(DAG
, VT
, DL
, 6);
297 case r600_read_local_size_y
:
298 return LowerImplicitParameter(DAG
, VT
, DL
, 7);
299 case r600_read_local_size_z
:
300 return LowerImplicitParameter(DAG
, VT
, DL
, 8);
302 case r600_read_tgid_x
:
303 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
305 case r600_read_tgid_y
:
306 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
308 case r600_read_tgid_z
:
309 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
311 case r600_read_tidig_x
:
312 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
314 case r600_read_tidig_y
:
315 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
317 case r600_read_tidig_z
:
318 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
321 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
324 } // end switch(Op.getOpcode())
328 SDValue
R600TargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
330 SDValue Chain
= Op
.getOperand(0);
331 SDValue CC
= Op
.getOperand(1);
332 SDValue LHS
= Op
.getOperand(2);
333 SDValue RHS
= Op
.getOperand(3);
334 SDValue JumpT
= Op
.getOperand(4);
337 CmpValue
= DAG
.getNode(
342 DAG
.getConstant(-1, MVT::i32
),
343 DAG
.getConstant(0, MVT::i32
),
345 Result
= DAG
.getNode(
346 AMDGPUISD::BRANCH_COND
,
347 CmpValue
.getDebugLoc(),
353 SDValue
R600TargetLowering::LowerImplicitParameter(SelectionDAG
&DAG
, EVT VT
,
355 unsigned DwordOffset
) const
357 unsigned ByteOffset
= DwordOffset
* 4;
358 PointerType
* PtrType
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
359 AMDGPUAS::PARAM_I_ADDRESS
);
361 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
362 assert(isInt
<16>(ByteOffset
));
364 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
365 DAG
.getConstant(ByteOffset
, MVT::i32
), // PTR
366 MachinePointerInfo(ConstantPointerNull::get(PtrType
)),
367 false, false, false, 0);
370 SDValue
R600TargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const
372 DebugLoc DL
= Op
.getDebugLoc();
373 EVT VT
= Op
.getValueType();
375 return DAG
.getNode(AMDGPUISD::BITALIGN
, DL
, VT
,
378 DAG
.getNode(ISD::SUB
, DL
, VT
,
379 DAG
.getConstant(32, MVT::i32
),
383 SDValue
R600TargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
385 DebugLoc DL
= Op
.getDebugLoc();
386 EVT VT
= Op
.getValueType();
388 SDValue LHS
= Op
.getOperand(0);
389 SDValue RHS
= Op
.getOperand(1);
390 SDValue True
= Op
.getOperand(2);
391 SDValue False
= Op
.getOperand(3);
392 SDValue CC
= Op
.getOperand(4);
393 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
396 // LHS and RHS are guaranteed to be the same value type
397 EVT CompareVT
= LHS
.getValueType();
399 // We need all the operands of SELECT_CC to have the same value type, so if
400 // necessary we need to convert LHS and RHS to be the same type True and
401 // False. True and False are guaranteed to have the same type as this
404 if (CompareVT
!= VT
) {
405 ISD::NodeType ConversionOp
= ISD::DELETED_NODE
;
406 if (VT
== MVT::f32
&& CompareVT
== MVT::i32
) {
407 if (isUnsignedIntSetCC(CCOpcode
)) {
408 ConversionOp
= ISD::UINT_TO_FP
;
410 ConversionOp
= ISD::SINT_TO_FP
;
412 } else if (VT
== MVT::i32
&& CompareVT
== MVT::f32
) {
413 ConversionOp
= ISD::FP_TO_SINT
;
415 // I don't think there will be any other type pairings.
416 assert(!"Unhandled operand type parings in SELECT_CC");
418 // XXX Check the value of LHS and RHS and avoid creating sequences like
420 LHS
= DAG
.getNode(ConversionOp
, DL
, VT
, LHS
);
421 RHS
= DAG
.getNode(ConversionOp
, DL
, VT
, RHS
);
424 // If True is a hardware TRUE value and False is a hardware FALSE value or
425 // vice-versa we can handle this with a native instruction (SET* instructions).
426 if ((isHWTrueValue(True
) && isHWFalseValue(False
))) {
427 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
430 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
431 // we can handle this with a native instruction, but we need to swap true
432 // and false and change the conditional.
433 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
436 // XXX Check if we can lower this to a SELECT or if it is supported by a native
437 // operation. (The code below does this but we don't have the Instruction
438 // selection patterns to do this yet.
440 if (isZero(LHS
) || isZero(RHS
)) {
441 SDValue Cond
= (isZero(LHS
) ? RHS
: LHS
);
452 // We can lower to select
459 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
461 // Supported by a native operation (CNDGE, CNDGT)
462 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
467 // If we make it this for it means we have no native instructions to handle
468 // this SELECT_CC, so we must lower it.
469 SDValue HWTrue
, HWFalse
;
471 if (VT
== MVT::f32
) {
472 HWTrue
= DAG
.getConstantFP(1.0f
, VT
);
473 HWFalse
= DAG
.getConstantFP(0.0f
, VT
);
474 } else if (VT
== MVT::i32
) {
475 HWTrue
= DAG
.getConstant(-1, VT
);
476 HWFalse
= DAG
.getConstant(0, VT
);
479 assert(!"Unhandled value type in LowerSELECT_CC");
482 // Lower this unsupported SELECT_CC into a combination of two supported
483 // SELECT_CC operations.
484 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
486 // Convert floating point condition to i1
487 if (VT
== MVT::f32
) {
488 Cond
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, MVT::i32
,
489 DAG
.getNode(ISD::FNEG
, DL
, VT
, Cond
));
492 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
495 SDValue
R600TargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
498 SDValue LHS
= Op
.getOperand(0);
499 SDValue RHS
= Op
.getOperand(1);
500 SDValue CC
= Op
.getOperand(2);
501 DebugLoc DL
= Op
.getDebugLoc();
502 assert(Op
.getValueType() == MVT::i32
);
508 DAG
.getConstant(-1, MVT::i32
),
509 DAG
.getConstant(0, MVT::i32
),
515 DAG
.getConstant(1, MVT::i32
),