1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "R600ISelLowering.h"
16 #include "AMDGPUUtil.h"
17 #include "R600InstrInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 R600TargetLowering::R600TargetLowering(TargetMachine
&TM
) :
24 AMDGPUTargetLowering(TM
),
25 TII(static_cast<const R600InstrInfo
*>(TM
.getInstrInfo()))
27 setOperationAction(ISD::MUL
, MVT::i64
, Expand
);
28 addRegisterClass(MVT::v4f32
, &AMDGPU::R600_Reg128RegClass
);
29 addRegisterClass(MVT::f32
, &AMDGPU::R600_Reg32RegClass
);
30 addRegisterClass(MVT::v4i32
, &AMDGPU::R600_Reg128RegClass
);
31 addRegisterClass(MVT::i32
, &AMDGPU::R600_Reg32RegClass
);
32 computeRegisterProperties();
34 setOperationAction(ISD::FSUB
, MVT::f32
, Expand
);
36 setOperationAction(ISD::ROTL
, MVT::i32
, Custom
);
38 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
39 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
41 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
43 setSchedulingPreference(Sched::VLIW
);
46 MachineBasicBlock
* R600TargetLowering::EmitInstrWithCustomInserter(
47 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
49 MachineFunction
* MF
= BB
->getParent();
50 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
51 MachineBasicBlock::iterator I
= *MI
;
53 switch (MI
->getOpcode()) {
54 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
56 addLiveIn(MI
, MF
, MRI
, TII
, AMDGPU::T1_X
);
59 addLiveIn(MI
, MF
, MRI
, TII
, AMDGPU::T1_Y
);
62 addLiveIn(MI
, MF
, MRI
, TII
, AMDGPU::T1_Z
);
65 addLiveIn(MI
, MF
, MRI
, TII
, AMDGPU::T0_X
);
68 addLiveIn(MI
, MF
, MRI
, TII
, AMDGPU::T0_Y
);
71 addLiveIn(MI
, MF
, MRI
, TII
, AMDGPU::T0_Z
);
73 case AMDGPU::NGROUPS_X
:
74 lowerImplicitParameter(MI
, *BB
, MRI
, 0);
76 case AMDGPU::NGROUPS_Y
:
77 lowerImplicitParameter(MI
, *BB
, MRI
, 1);
79 case AMDGPU::NGROUPS_Z
:
80 lowerImplicitParameter(MI
, *BB
, MRI
, 2);
82 case AMDGPU::GLOBAL_SIZE_X
:
83 lowerImplicitParameter(MI
, *BB
, MRI
, 3);
85 case AMDGPU::GLOBAL_SIZE_Y
:
86 lowerImplicitParameter(MI
, *BB
, MRI
, 4);
88 case AMDGPU::GLOBAL_SIZE_Z
:
89 lowerImplicitParameter(MI
, *BB
, MRI
, 5);
91 case AMDGPU::LOCAL_SIZE_X
:
92 lowerImplicitParameter(MI
, *BB
, MRI
, 6);
94 case AMDGPU::LOCAL_SIZE_Y
:
95 lowerImplicitParameter(MI
, *BB
, MRI
, 7);
97 case AMDGPU::LOCAL_SIZE_Z
:
98 lowerImplicitParameter(MI
, *BB
, MRI
, 8);
101 case AMDGPU::CLAMP_R600
:
102 MI
->getOperand(0).addTargetFlag(MO_FLAG_CLAMP
);
103 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
104 .addOperand(MI
->getOperand(0))
105 .addOperand(MI
->getOperand(1));
108 case AMDGPU::FABS_R600
:
109 MI
->getOperand(1).addTargetFlag(MO_FLAG_ABS
);
110 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
111 .addOperand(MI
->getOperand(0))
112 .addOperand(MI
->getOperand(1));
115 case AMDGPU::FNEG_R600
:
116 MI
->getOperand(1).addTargetFlag(MO_FLAG_NEG
);
117 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
))
118 .addOperand(MI
->getOperand(0))
119 .addOperand(MI
->getOperand(1));
122 case AMDGPU::R600_LOAD_CONST
:
124 int64_t RegIndex
= MI
->getOperand(1).getImm();
125 unsigned ConstantReg
= AMDGPU::R600_CReg32RegClass
.getRegister(RegIndex
);
126 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::COPY
))
127 .addOperand(MI
->getOperand(0))
128 .addReg(ConstantReg
);
132 case AMDGPU::LOAD_INPUT
:
134 int64_t RegIndex
= MI
->getOperand(1).getImm();
135 addLiveIn(MI
, MF
, MRI
, TII
,
136 AMDGPU::R600_TReg32RegClass
.getRegister(RegIndex
));
140 case AMDGPU::MASK_WRITE
:
142 unsigned maskedRegister
= MI
->getOperand(0).getReg();
143 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister
));
144 MachineInstr
* defInstr
= MRI
.getVRegDef(maskedRegister
);
145 MachineOperand
* def
= defInstr
->findRegisterDefOperand(maskedRegister
);
146 def
->addTargetFlag(MO_FLAG_MASK
);
147 // Return early so the instruction is not erased
151 case AMDGPU::RAT_WRITE_CACHELESS_eg
:
153 // Convert to DWORD address
154 unsigned NewAddr
= MRI
.createVirtualRegister(
155 AMDGPU::R600_TReg32_XRegisterClass
);
156 unsigned ShiftValue
= MRI
.createVirtualRegister(
157 AMDGPU::R600_TReg32RegisterClass
);
159 // XXX In theory, we should be able to pass ShiftValue directly to
160 // the LSHR_eg instruction as an inline literal, but I tried doing it
161 // this way and it didn't produce the correct results.
162 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::MOV
), ShiftValue
)
163 .addReg(AMDGPU::ALU_LITERAL_X
)
165 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::LSHR_eg
), NewAddr
)
166 .addOperand(MI
->getOperand(1))
168 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
->getOpcode()))
169 .addOperand(MI
->getOperand(0))
174 case AMDGPU::STORE_OUTPUT
:
176 int64_t OutputIndex
= MI
->getOperand(1).getImm();
177 unsigned OutputReg
= AMDGPU::R600_TReg32RegClass
.getRegister(OutputIndex
);
179 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::COPY
), OutputReg
)
180 .addOperand(MI
->getOperand(0));
182 if (!MRI
.isLiveOut(OutputReg
)) {
183 MRI
.addLiveOut(OutputReg
);
188 case AMDGPU::RESERVE_REG
:
190 R600MachineFunctionInfo
* MFI
= MF
->getInfo
<R600MachineFunctionInfo
>();
191 int64_t ReservedIndex
= MI
->getOperand(0).getImm();
192 unsigned ReservedReg
=
193 AMDGPU::R600_TReg32RegClass
.getRegister(ReservedIndex
);
194 MFI
->ReservedRegs
.push_back(ReservedReg
);
200 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
201 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
203 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
204 .addOperand(MI
->getOperand(3))
205 .addOperand(MI
->getOperand(4))
206 .addOperand(MI
->getOperand(5));
207 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
208 .addOperand(MI
->getOperand(2))
209 .addOperand(MI
->getOperand(4))
210 .addOperand(MI
->getOperand(5));
211 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_G
))
212 .addOperand(MI
->getOperand(0))
213 .addOperand(MI
->getOperand(1))
214 .addOperand(MI
->getOperand(4))
215 .addOperand(MI
->getOperand(5))
216 .addReg(t0
, RegState::Implicit
)
217 .addReg(t1
, RegState::Implicit
);
220 case AMDGPU::TXD_SHADOW
:
222 unsigned t0
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
223 unsigned t1
= MRI
.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass
);
225 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_H
), t0
)
226 .addOperand(MI
->getOperand(3))
227 .addOperand(MI
->getOperand(4))
228 .addOperand(MI
->getOperand(5));
229 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SET_GRADIENTS_V
), t1
)
230 .addOperand(MI
->getOperand(2))
231 .addOperand(MI
->getOperand(4))
232 .addOperand(MI
->getOperand(5));
233 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::TEX_SAMPLE_C_G
))
234 .addOperand(MI
->getOperand(0))
235 .addOperand(MI
->getOperand(1))
236 .addOperand(MI
->getOperand(4))
237 .addOperand(MI
->getOperand(5))
238 .addReg(t0
, RegState::Implicit
)
239 .addReg(t1
, RegState::Implicit
);
246 MI
->eraseFromParent();
250 void R600TargetLowering::lowerImplicitParameter(MachineInstr
*MI
, MachineBasicBlock
&BB
,
251 MachineRegisterInfo
& MRI
, unsigned dword_offset
) const
253 MachineBasicBlock::iterator I
= *MI
;
254 unsigned PtrReg
= MRI
.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass
);
255 MRI
.setRegClass(MI
->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass
);
257 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::MOV
), PtrReg
)
258 .addReg(AMDGPU::ALU_LITERAL_X
)
259 .addImm(dword_offset
* 4);
261 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::VTX_READ_PARAM_i32_eg
))
262 .addOperand(MI
->getOperand(0))
267 //===----------------------------------------------------------------------===//
268 // Custom DAG Lowering Operations
269 //===----------------------------------------------------------------------===//
272 SDValue
R600TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
274 switch (Op
.getOpcode()) {
275 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
276 case ISD::ROTL
: return LowerROTL(Op
, DAG
);
277 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
278 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
282 SDValue
R600TargetLowering::LowerROTL(SDValue Op
, SelectionDAG
&DAG
) const
284 DebugLoc DL
= Op
.getDebugLoc();
285 EVT VT
= Op
.getValueType();
287 return DAG
.getNode(AMDGPUISD::BITALIGN
, DL
, VT
,
290 DAG
.getNode(ISD::SUB
, DL
, VT
,
291 DAG
.getConstant(32, MVT::i32
),
295 SDValue
R600TargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
297 DebugLoc DL
= Op
.getDebugLoc();
298 EVT VT
= Op
.getValueType();
300 SDValue LHS
= Op
.getOperand(0);
301 SDValue RHS
= Op
.getOperand(1);
302 SDValue True
= Op
.getOperand(2);
303 SDValue False
= Op
.getOperand(3);
304 SDValue CC
= Op
.getOperand(4);
305 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
308 // LHS and RHS are guaranteed to be the same value type
309 EVT CompareVT
= LHS
.getValueType();
311 // We need all the operands of SELECT_CC to have the same value type, so if
312 // necessary we need to convert LHS and RHS to be the same type True and
313 // False. True and False are guaranteed to have the same type as this
316 if (CompareVT
!= VT
) {
317 ISD::NodeType ConversionOp
= ISD::DELETED_NODE
;
318 if (VT
== MVT::f32
&& CompareVT
== MVT::i32
) {
319 if (isUnsignedIntSetCC(CCOpcode
)) {
320 ConversionOp
= ISD::UINT_TO_FP
;
322 ConversionOp
= ISD::SINT_TO_FP
;
324 } else if (VT
== MVT::i32
&& CompareVT
== MVT::f32
) {
325 ConversionOp
= ISD::FP_TO_SINT
;
327 // I don't think there will be any other type pairings.
328 assert(!"Unhandled operand type parings in SELECT_CC");
330 // XXX Check the value of LHS and RHS and avoid creating sequences like
332 LHS
= DAG
.getNode(ConversionOp
, DL
, VT
, LHS
);
333 RHS
= DAG
.getNode(ConversionOp
, DL
, VT
, RHS
);
336 // If True is a hardware TRUE value and False is a hardware FALSE value or
337 // vice-versa we can handle this with a native instruction (SET* instructions).
338 if ((isHWTrueValue(True
) && isHWFalseValue(False
))) {
339 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
342 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
343 // we can handle this with a native instruction, but we need to swap true
344 // and false and change the conditional.
345 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
348 // XXX Check if we can lower this to a SELECT or if it is supported by a native
349 // operation. (The code below does this but we don't have the Instruction
350 // selection patterns to do this yet.
352 if (isZero(LHS
) || isZero(RHS
)) {
353 SDValue Cond
= (isZero(LHS
) ? RHS
: LHS
);
364 // We can lower to select
371 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
373 // Supported by a native operation (CNDGE, CNDGT)
374 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
379 // If we make it this for it means we have no native instructions to handle
380 // this SELECT_CC, so we must lower it.
381 SDValue HWTrue
, HWFalse
;
383 if (VT
== MVT::f32
) {
384 HWTrue
= DAG
.getConstantFP(1.0f
, VT
);
385 HWFalse
= DAG
.getConstantFP(0.0f
, VT
);
386 } else if (VT
== MVT::i32
) {
387 HWTrue
= DAG
.getConstant(-1, VT
);
388 HWFalse
= DAG
.getConstant(0, VT
);
391 assert(!"Unhandled value type in LowerSELECT_CC");
394 // Lower this unsupported SELECT_CC into a combination of two supported
395 // SELECT_CC operations.
396 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
398 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
401 SDValue
R600TargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const
404 SDValue LHS
= Op
.getOperand(0);
405 SDValue RHS
= Op
.getOperand(1);
406 SDValue CC
= Op
.getOperand(2);
407 DebugLoc DL
= Op
.getDebugLoc();
408 assert(Op
.getValueType() == MVT::i32
);
414 DAG
.getConstant(-1, MVT::i32
),
415 DAG
.getConstant(0, MVT::i32
),
421 DAG
.getConstant(1, MVT::i32
),