1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "R600ISelLowering.h"
16 #include "R600InstrInfo.h"
17 #include "R600MachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
24 R600TargetLowering::R600TargetLowering(TargetMachine
&TM
) :
25 AMDGPUTargetLowering(TM
),
26 TII(static_cast<const R600InstrInfo
*>(TM
.getInstrInfo()))
28 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
29 addRegisterClass(MVT::v4f32
, &AMDGPU::R600_Reg128RegClass
);
30 addRegisterClass(MVT::f32
, &AMDGPU::R600_Reg32RegClass
);
31 addRegisterClass(MVT::v4i32
, &AMDGPU::R600_Reg128RegClass
);
32 addRegisterClass(MVT::i32
, &AMDGPU::R600_Reg32RegClass
);
33 computeRegisterProperties();
35 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
37 setOperationAction(ISD::FSUB
, MVT::f32
, Expand
);
39 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
40 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
42 setOperationAction(ISD::ROTL
, MVT::i32
, Custom
);
44 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
45 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
47 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
49 setSchedulingPreference(Sched::VLIW
);
52 MachineBasicBlock
* R600TargetLowering::EmitInstrWithCustomInserter(
53 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
55 MachineFunction
* MF
= BB
->getParent();
56 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
57 MachineBasicBlock::iterator I
= *MI
;
59 switch (MI
->getOpcode()) {
60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
61 case AMDGPU::CLAMP_R600
:
62 MI
->getOperand(0).addTargetFlag(MO_FLAG_CLAMP
);
63 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
64 .addOperand(MI
->getOperand(0))
65 .addOperand(MI
->getOperand(1))
66 .addReg(AMDGPU::PRED_SEL_OFF
);
69 case AMDGPU::FABS_R600
:
70 MI
->getOperand(1).addTargetFlag(MO_FLAG_ABS
);
71 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
72 .addOperand(MI
->getOperand(0))
73 .addOperand(MI
->getOperand(1))
74 .addReg(AMDGPU::PRED_SEL_OFF
);
77 case AMDGPU::FNEG_R600
:
78 MI
->getOperand(1).addTargetFlag(MO_FLAG_NEG
);
79 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
80 .addOperand(MI
->getOperand(0))
81 .addOperand(MI
->getOperand(1))
82 .addReg(AMDGPU::PRED_SEL_OFF
);
85 case AMDGPU::R600_LOAD_CONST
:
87 int64_t RegIndex
= MI
->getOperand(1).getImm();
88 unsigned ConstantReg
= AMDGPU::R600_CReg32RegClass
.getRegister(RegIndex
);
89 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::COPY
))
90 .addOperand(MI
->getOperand(0))
95 case AMDGPU::MASK_WRITE
:
97 unsigned maskedRegister
= MI
->getOperand(0).getReg();
98 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister
));
99 MachineInstr
* defInstr
= MRI
.getVRegDef(maskedRegister
);
100 MachineOperand
* def
= defInstr
->findRegisterDefOperand(maskedRegister
);
101 def
->addTargetFlag(MO_FLAG_MASK
);
102 // Return early so the instruction is not erased
106 case AMDGPU::RAT_WRITE_CACHELESS_eg
:
108 // Convert to DWORD address
109 unsigned NewAddr
= MRI
.createVirtualRegister(
110 AMDGPU::R600_TReg32_XRegisterClass
);
111 unsigned ShiftValue
= MRI
.createVirtualRegister(
112 AMDGPU::R600_TReg32RegisterClass
);
114 // XXX In theory, we should be able to pass ShiftValue directly to
115 // the LSHR_eg instruction as an inline literal, but I tried doing it
116 // this way and it didn't produce the correct results.
117 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
), ShiftValue
)
118 .addReg(AMDGPU::ALU_LITERAL_X
)
119 .addReg(AMDGPU::PRED_SEL_OFF
)
121 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::LSHR_eg
), NewAddr
)
122 .addOperand(MI
->getOperand(1))
124 .addReg(AMDGPU::PRED_SEL_OFF
);
125 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
->getOpcode()))
126 .addOperand(MI
->getOperand(0))
131 case AMDGPU::RESERVE_REG
:
133 R600MachineFunctionInfo
* MFI
= MF
->getInfo
<R600MachineFunctionInfo
>();
134 int64_t ReservedIndex
= MI
->getOperand(0).getImm();
135 unsigned ReservedReg
=
136 AMDGPU::R600_TReg32RegClass
.getRegister(ReservedIndex
);
137 MFI
->ReservedRegs
.push_back(ReservedReg
);
143 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
144 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
146 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
147 .addOperand(MI
->getOperand(3))
148 .addOperand(MI
->getOperand(4))
149 .addOperand(MI
->getOperand(5));
150 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
151 .addOperand(MI
->getOperand(2))
152 .addOperand(MI
->getOperand(4))
153 .addOperand(MI
->getOperand(5));
154 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_G
))
155 .addOperand(MI
->getOperand(0))
156 .addOperand(MI
->getOperand(1))
157 .addOperand(MI
->getOperand(4))
158 .addOperand(MI
->getOperand(5))
159 .addReg(t0
, RegState::Implicit
)
160 .addReg(t1
, RegState::Implicit
);
163 case AMDGPU::TXD_SHADOW
:
165 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
166 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
168 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
169 .addOperand(MI
->getOperand(3))
170 .addOperand(MI
->getOperand(4))
171 .addOperand(MI
->getOperand(5));
172 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
173 .addOperand(MI
->getOperand(2))
174 .addOperand(MI
->getOperand(4))
175 .addOperand(MI
->getOperand(5));
176 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_C_G
))
177 .addOperand(MI
->getOperand(0))
178 .addOperand(MI
->getOperand(1))
179 .addOperand(MI
->getOperand(4))
180 .addOperand(MI
->getOperand(5))
181 .addReg(t0
, RegState::Implicit
)
182 .addReg(t1
, RegState::Implicit
);
186 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
187 .addOperand(MI
->getOperand(0))
190 case AMDGPU::BRANCH_COND_f32
:
191 MI
->getOperand(1).addTargetFlag(MO_FLAG_PUSH
);
193 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
))
194 .addReg(AMDGPU::PREDICATE_BIT
)
195 .addOperand(MI
->getOperand(1))
196 .addImm(OPCODE_IS_ZERO
);
197 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
198 .addOperand(MI
->getOperand(0))
199 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
201 case AMDGPU::BRANCH_COND_i32
:
202 MI
->getOperand(1).addTargetFlag(MO_FLAG_PUSH
);
204 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::PRED_X
))
205 .addReg(AMDGPU::PREDICATE_BIT
)
206 .addOperand(MI
->getOperand(1))
207 .addImm(OPCODE_IS_ZERO_INT
);
208 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::JUMP
))
209 .addOperand(MI
->getOperand(0))
210 .addReg(AMDGPU::PREDICATE_BIT
, RegState::Kill
);
216 MI
->eraseFromParent();
220 //===----------------------------------------------------------------------===//
221 // Custom DAG Lowering Operations
222 //===----------------------------------------------------------------------===//
224 using namespace llvm::Intrinsic
;
225 using namespace llvm::AMDGPUIntrinsic
;
227 SDValue
R600TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
229 switch (Op
.getOpcode()) {
230 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
231 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
232 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
233 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
234 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
235 case ISD::INTRINSIC_VOID
: {
236 SDValue Chain
= Op
.getOperand(0);
237 unsigned IntrinsicID
=
238 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
239 switch (IntrinsicID
) {
240 case AMDGPUIntrinsic::AMDGPU_store_output
: {
241 MachineFunction
&MF
= DAG
.getMachineFunction();
242 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
243 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(3))->getZExtValue();
244 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
245 if (!MRI
.isLiveOut(Reg
)) {
248 return DAG
.getCopyToReg(Chain
, Op
.getDebugLoc(), Reg
, Op
.getOperand(2));
250 // default for switch(IntrinsicID)
253 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
256 case ISD::INTRINSIC_WO_CHAIN
: {
257 unsigned IntrinsicID
=
258 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
259 EVT VT
= Op
.getValueType();
260 DebugLoc DL
= Op
.getDebugLoc();
261 switch(IntrinsicID
) {
262 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
263 case AMDGPUIntrinsic::R600_load_input
: {
264 int64_t RegIndex
= cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
265 unsigned Reg
= AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
);
266 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
, Reg
, VT
);
269 case r600_read_ngroups_x
:
270 return LowerImplicitParameter(DAG
, VT
, DL
, 0);
271 case r600_read_ngroups_y
:
272 return LowerImplicitParameter(DAG
, VT
, DL
, 1);
273 case r600_read_ngroups_z
:
274 return LowerImplicitParameter(DAG
, VT
, DL
, 2);
275 case r600_read_global_size_x
:
276 return LowerImplicitParameter(DAG
, VT
, DL
, 3);
277 case r600_read_global_size_y
:
278 return LowerImplicitParameter(DAG
, VT
, DL
, 4);
279 case r600_read_global_size_z
:
280 return LowerImplicitParameter(DAG
, VT
, DL
, 5);
281 case r600_read_local_size_x
:
282 return LowerImplicitParameter(DAG
, VT
, DL
, 6);
283 case r600_read_local_size_y
:
284 return LowerImplicitParameter(DAG
, VT
, DL
, 7);
285 case r600_read_local_size_z
:
286 return LowerImplicitParameter(DAG
, VT
, DL
, 8);
288 case r600_read_tgid_x
:
289 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
291 case r600_read_tgid_y
:
292 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
294 case r600_read_tgid_z
:
295 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
297 case r600_read_tidig_x
:
298 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
300 case r600_read_tidig_y
:
301 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
303 case r600_read_tidig_z
:
304 return CreateLiveInRegister(DAG
, &AMDGPU::R600_TReg32RegClass
,
307 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
310 } // end switch(Op.getOpcode())
314 SDValue
R600TargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
316 SDValue Chain
= Op
.getOperand(0);
317 SDValue CC
= Op
.getOperand(1);
318 SDValue LHS
= Op
.getOperand(2);
319 SDValue RHS
= Op
.getOperand(3);
320 SDValue JumpT
= Op
.getOperand(4);
323 CmpValue
= DAG
.getNode(
328 DAG
.getConstant(-1, MVT::i32
),
329 DAG
.getConstant(0, MVT::i32
),
331 Result
= DAG
.getNode(
332 AMDGPUISD::BRANCH_COND
,
333 CmpValue
.getDebugLoc(),
339 SDValue
R600TargetLowering::LowerImplicitParameter(SelectionDAG
&DAG
, EVT VT
,
341 unsigned DwordOffset
) const
343 unsigned ByteOffset
= DwordOffset
* 4;
344 PointerType
* PtrType
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
345 AMDGPUAS::PARAM_I_ADDRESS
);
347 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
348 assert(isInt
<16>(ByteOffset
));
350 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
351 DAG
.getConstant(ByteOffset
, MVT::i32
), // PTR
352 MachinePointerInfo(ConstantPointerNull::get(PtrType
)),
353 false, false, false, 0);
356 SDValue
R600TargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const
358 DebugLoc DL
= Op
.getDebugLoc();
359 EVT VT
= Op
.getValueType();
361 return DAG
.getNode(AMDGPUISD::BITALIGN
, DL
, VT
,
364 DAG
.getNode(ISD::SUB
, DL
, VT
,
365 DAG
.getConstant(32, MVT::i32
),
369 SDValue
R600TargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
371 DebugLoc DL
= Op
.getDebugLoc();
372 EVT VT
= Op
.getValueType();
374 SDValue LHS
= Op
.getOperand(0);
375 SDValue RHS
= Op
.getOperand(1);
376 SDValue True
= Op
.getOperand(2);
377 SDValue False
= Op
.getOperand(3);
378 SDValue CC
= Op
.getOperand(4);
379 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
382 // LHS and RHS are guaranteed to be the same value type
383 EVT CompareVT
= LHS
.getValueType();
385 // We need all the operands of SELECT_CC to have the same value type, so if
386 // necessary we need to convert LHS and RHS to be the same type True and
387 // False. True and False are guaranteed to have the same type as this
390 if (CompareVT
!= VT
) {
391 ISD::NodeType ConversionOp
= ISD::DELETED_NODE
;
392 if (VT
== MVT::f32
&& CompareVT
== MVT::i32
) {
393 if (isUnsignedIntSetCC(CCOpcode
)) {
394 ConversionOp
= ISD::UINT_TO_FP
;
396 ConversionOp
= ISD::SINT_TO_FP
;
398 } else if (VT
== MVT::i32
&& CompareVT
== MVT::f32
) {
399 ConversionOp
= ISD::FP_TO_SINT
;
401 // I don't think there will be any other type pairings.
402 assert(!"Unhandled operand type parings in SELECT_CC");
404 // XXX Check the value of LHS and RHS and avoid creating sequences like
406 LHS
= DAG
.getNode(ConversionOp
, DL
, VT
, LHS
);
407 RHS
= DAG
.getNode(ConversionOp
, DL
, VT
, RHS
);
410 // If True is a hardware TRUE value and False is a hardware FALSE value or
411 // vice-versa we can handle this with a native instruction (SET* instructions).
412 if ((isHWTrueValue(True
) && isHWFalseValue(False
))) {
413 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
416 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
417 // we can handle this with a native instruction, but we need to swap true
418 // and false and change the conditional.
419 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
422 // XXX Check if we can lower this to a SELECT or if it is supported by a native
423 // operation. (The code below does this but we don't have the Instruction
424 // selection patterns to do this yet.
426 if (isZero(LHS
) || isZero(RHS
)) {
427 SDValue Cond
= (isZero(LHS
) ? RHS
: LHS
);
438 // We can lower to select
445 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
447 // Supported by a native operation (CNDGE, CNDGT)
448 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
453 // If we make it this for it means we have no native instructions to handle
454 // this SELECT_CC, so we must lower it.
455 SDValue HWTrue
, HWFalse
;
457 if (VT
== MVT::f32
) {
458 HWTrue
= DAG
.getConstantFP(1.0f
, VT
);
459 HWFalse
= DAG
.getConstantFP(0.0f
, VT
);
460 } else if (VT
== MVT::i32
) {
461 HWTrue
= DAG
.getConstant(-1, VT
);
462 HWFalse
= DAG
.getConstant(0, VT
);
465 assert(!"Unhandled value type in LowerSELECT_CC");
468 // Lower this unsupported SELECT_CC into a combination of two supported
469 // SELECT_CC operations.
470 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
472 // Convert floating point condition to i1
473 if (VT
== MVT::f32
) {
474 Cond
= DAG
.getNode(ISD::FP_TO_SINT
, DL
, MVT::i32
,
475 DAG
.getNode(ISD::FNEG
, DL
, VT
, Cond
));
478 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
481 SDValue
R600TargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
484 SDValue LHS
= Op
.getOperand(0);
485 SDValue RHS
= Op
.getOperand(1);
486 SDValue CC
= Op
.getOperand(2);
487 DebugLoc DL
= Op
.getDebugLoc();
488 assert(Op
.getValueType() == MVT::i32
);
494 DAG
.getConstant(-1, MVT::i32
),
495 DAG
.getConstant(0, MVT::i32
),
501 DAG
.getConstant(1, MVT::i32
),