1 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
11 // mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "SIISelLowering.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "SIInstrInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
26 SITargetLowering::SITargetLowering(TargetMachine
&TM
) :
27 AMDGPUTargetLowering(TM
),
28 TII(static_cast<const SIInstrInfo
*>(TM
.getInstrInfo()))
30 addRegisterClass(MVT::v4f32
, &AMDGPU::VReg_128RegClass
);
31 addRegisterClass(MVT::f32
, &AMDGPU::VReg_32RegClass
);
32 addRegisterClass(MVT::i32
, &AMDGPU::VReg_32RegClass
);
33 addRegisterClass(MVT::i64
, &AMDGPU::VReg_64RegClass
);
34 addRegisterClass(MVT::i1
, &AMDGPU::SCCRegRegClass
);
35 addRegisterClass(MVT::i1
, &AMDGPU::VCCRegRegClass
);
37 addRegisterClass(MVT::v4i32
, &AMDGPU::SReg_128RegClass
);
38 addRegisterClass(MVT::v8i32
, &AMDGPU::SReg_256RegClass
);
40 computeRegisterProperties();
42 setOperationAction(ISD::AND
, MVT::i1
, Custom
);
44 setOperationAction(ISD::ADD
, MVT::i64
, Legal
);
45 setOperationAction(ISD::ADD
, MVT::i32
, Legal
);
47 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
49 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
51 // We need to custom lower loads from the USER_SGPR address space, so we can
52 // add the SGPRs as livein registers.
53 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
54 setOperationAction(ISD::LOAD
, MVT::i64
, Custom
);
56 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
57 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
59 setOperationAction(ISD::SELECT_CC
, MVT::Other
, Expand
);
60 setTargetDAGCombine(ISD::SELECT_CC
);
62 setTargetDAGCombine(ISD::SETCC
);
65 MachineBasicBlock
* SITargetLowering::EmitInstrWithCustomInserter(
66 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
68 const TargetInstrInfo
* TII
= getTargetMachine().getInstrInfo();
69 MachineRegisterInfo
& MRI
= BB
->getParent()->getRegInfo();
70 MachineBasicBlock::iterator I
= MI
;
72 if (TII
->get(MI
->getOpcode()).TSFlags
& SIInstrFlags::NEED_WAIT
) {
73 AppendS_WAITCNT(MI
, *BB
, llvm::next(I
));
77 switch (MI
->getOpcode()) {
79 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
81 case AMDGPU::CLAMP_SI
:
82 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
83 .addOperand(MI
->getOperand(0))
84 .addOperand(MI
->getOperand(1))
85 // VSRC1-2 are unused, but we still need to fill all the
86 // operand slots, so we just reuse the VSRC0 operand
87 .addOperand(MI
->getOperand(1))
88 .addOperand(MI
->getOperand(1))
93 MI
->eraseFromParent();
97 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
98 .addOperand(MI
->getOperand(0))
99 .addOperand(MI
->getOperand(1))
100 // VSRC1-2 are unused, but we still need to fill all the
101 // operand slots, so we just reuse the VSRC0 operand
102 .addOperand(MI
->getOperand(1))
103 .addOperand(MI
->getOperand(1))
108 MI
->eraseFromParent();
111 case AMDGPU::FNEG_SI
:
112 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
113 .addOperand(MI
->getOperand(0))
114 .addOperand(MI
->getOperand(1))
115 // VSRC1-2 are unused, but we still need to fill all the
116 // operand slots, so we just reuse the VSRC0 operand
117 .addOperand(MI
->getOperand(1))
118 .addOperand(MI
->getOperand(1))
123 MI
->eraseFromParent();
126 case AMDGPU::SI_INTERP
:
127 LowerSI_INTERP(MI
, *BB
, I
, MRI
);
129 case AMDGPU::SI_INTERP_CONST
:
130 LowerSI_INTERP_CONST(MI
, *BB
, I
);
133 LowerSI_KIL(MI
, *BB
, I
, MRI
);
135 case AMDGPU::SI_V_CNDLT
:
136 LowerSI_V_CNDLT(MI
, *BB
, I
, MRI
);
142 void SITargetLowering::AppendS_WAITCNT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
143 MachineBasicBlock::iterator I
) const
145 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_WAITCNT
))
149 void SITargetLowering::LowerSI_INTERP(MachineInstr
*MI
, MachineBasicBlock
&BB
,
150 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
152 unsigned tmp
= MRI
.createVirtualRegister(&AMDGPU::VReg_32RegClass
);
153 MachineOperand dst
= MI
->getOperand(0);
154 MachineOperand iReg
= MI
->getOperand(1);
155 MachineOperand jReg
= MI
->getOperand(2);
156 MachineOperand attr_chan
= MI
->getOperand(3);
157 MachineOperand attr
= MI
->getOperand(4);
158 MachineOperand params
= MI
->getOperand(5);
160 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
164 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P1_F32
), tmp
)
166 .addOperand(attr_chan
)
169 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P2_F32
))
173 .addOperand(attr_chan
)
176 MI
->eraseFromParent();
179 void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr
*MI
,
180 MachineBasicBlock
&BB
, MachineBasicBlock::iterator I
) const
182 MachineOperand dst
= MI
->getOperand(0);
183 MachineOperand attr_chan
= MI
->getOperand(1);
184 MachineOperand attr
= MI
->getOperand(2);
185 MachineOperand params
= MI
->getOperand(3);
187 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
191 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_MOV_F32
))
193 .addOperand(attr_chan
)
196 MI
->eraseFromParent();
199 void SITargetLowering::LowerSI_KIL(MachineInstr
*MI
, MachineBasicBlock
&BB
,
200 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
202 // Clear this pixel from the exec mask if the operand is negative
203 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CMPX_LE_F32_e32
),
205 .addReg(AMDGPU::SREG_LIT_0
)
206 .addOperand(MI
->getOperand(0));
208 // If the exec mask is non-zero, skip the next two instructions
209 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_CBRANCH_EXECNZ
))
211 .addReg(AMDGPU::EXEC
);
213 // Exec mask is zero: Export to NULL target...
214 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::EXP
))
216 .addImm(0x09) // V_008DFC_SQ_EXP_NULL
220 .addReg(AMDGPU::SREG_LIT_0
)
221 .addReg(AMDGPU::SREG_LIT_0
)
222 .addReg(AMDGPU::SREG_LIT_0
)
223 .addReg(AMDGPU::SREG_LIT_0
);
225 // ... and terminate wavefront
226 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_ENDPGM
));
228 MI
->eraseFromParent();
231 void SITargetLowering::LowerSI_V_CNDLT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
232 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
234 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CMP_LT_F32_e32
),
236 .addOperand(MI
->getOperand(1))
237 .addReg(AMDGPU::SREG_LIT_0
);
239 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CNDMASK_B32
))
240 .addOperand(MI
->getOperand(0))
242 .addOperand(MI
->getOperand(2))
243 .addOperand(MI
->getOperand(3));
245 MI
->eraseFromParent();
248 EVT
SITargetLowering::getSetCCResultType(EVT VT
) const
253 //===----------------------------------------------------------------------===//
254 // Custom DAG Lowering Operations
255 //===----------------------------------------------------------------------===//
257 SDValue
SITargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
259 switch (Op
.getOpcode()) {
260 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
261 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
262 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
263 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
264 case ISD::AND
: return Loweri1ContextSwitch(Op
, DAG
, ISD::AND
);
265 case ISD::INTRINSIC_WO_CHAIN
: {
266 unsigned IntrinsicID
=
267 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
268 EVT VT
= Op
.getValueType();
269 switch (IntrinsicID
) {
270 case AMDGPUIntrinsic::SI_vs_load_buffer_index
:
271 return CreateLiveInRegister(DAG
, &AMDGPU::VReg_32RegClass
,
273 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
281 /// Loweri1ContextSwitch - The function is for lowering i1 operations on the
282 /// VCC register. In the VALU context, VCC is a one bit register, but in the
283 /// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
284 /// the SALU can perform operations on the VCC register, we need to promote
285 /// the operand types from i1 to i64 in order for tablegen to be able to match
286 /// this operation to the correct SALU instruction. We do this promotion by
287 /// wrapping the operands in a CopyToReg node.
289 SDValue
SITargetLowering::Loweri1ContextSwitch(SDValue Op
,
291 unsigned VCCNode
) const
293 DebugLoc DL
= Op
.getDebugLoc();
295 SDValue OpNode
= DAG
.getNode(VCCNode
, DL
, MVT::i64
,
296 DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i64
,
298 DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i64
,
301 return DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i1
, OpNode
);
304 SDValue
SITargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
306 SDValue Chain
= Op
.getOperand(0);
307 SDValue CC
= Op
.getOperand(1);
308 SDValue LHS
= Op
.getOperand(2);
309 SDValue RHS
= Op
.getOperand(3);
310 SDValue JumpT
= Op
.getOperand(4);
313 CmpValue
= DAG
.getNode(
320 Result
= DAG
.getNode(
321 AMDGPUISD::BRANCH_COND
,
322 CmpValue
.getDebugLoc(),
328 SDValue
SITargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const
330 EVT VT
= Op
.getValueType();
331 LoadSDNode
*Ptr
= dyn_cast
<LoadSDNode
>(Op
);
335 unsigned AddrSpace
= Ptr
->getPointerInfo().getAddrSpace();
337 // We only need to lower USER_SGPR address space loads
338 if (AddrSpace
!= AMDGPUAS::USER_SGPR_ADDRESS
) {
342 // Loads from the USER_SGPR address space can only have constant value
344 ConstantSDNode
*BasePtr
= dyn_cast
<ConstantSDNode
>(Ptr
->getBasePtr());
347 unsigned TypeDwordWidth
= VT
.getSizeInBits() / 32;
348 const TargetRegisterClass
* dstClass
;
349 switch (TypeDwordWidth
) {
351 assert(!"USER_SGPR value size not implemented");
354 dstClass
= &AMDGPU::SReg_32RegClass
;
357 dstClass
= &AMDGPU::SReg_64RegClass
;
360 uint64_t Index
= BasePtr
->getZExtValue();
361 assert(Index
% TypeDwordWidth
== 0 && "USER_SGPR not properly aligned");
362 unsigned SGPRIndex
= Index
/ TypeDwordWidth
;
363 unsigned Reg
= dstClass
->getRegister(SGPRIndex
);
365 DAG
.ReplaceAllUsesOfValueWith(Op
, CreateLiveInRegister(DAG
, dstClass
, Reg
,
370 SDValue
SITargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
372 SDValue LHS
= Op
.getOperand(0);
373 SDValue RHS
= Op
.getOperand(1);
374 SDValue True
= Op
.getOperand(2);
375 SDValue False
= Op
.getOperand(3);
376 SDValue CC
= Op
.getOperand(4);
377 EVT VT
= Op
.getValueType();
378 DebugLoc DL
= Op
.getDebugLoc();
380 SDValue Cond
= DAG
.getNode(ISD::SETCC
, DL
, MVT::i1
, LHS
, RHS
, CC
);
381 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
384 //===----------------------------------------------------------------------===//
385 // Custom DAG optimizations
386 //===----------------------------------------------------------------------===//
388 SDValue
SITargetLowering::PerformDAGCombine(SDNode
*N
,
389 DAGCombinerInfo
&DCI
) const {
390 SelectionDAG
&DAG
= DCI
.DAG
;
391 DebugLoc DL
= N
->getDebugLoc();
392 EVT VT
= N
->getValueType(0);
394 switch (N
->getOpcode()) {
396 case ISD::SELECT_CC
: {
398 ConstantSDNode
*True
, *False
;
399 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
400 if ((True
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
401 && (False
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
402 && True
->isAllOnesValue()
403 && False
->isNullValue()
405 return DAG
.getNode(ISD::SETCC
, DL
, VT
, N
->getOperand(0),
406 N
->getOperand(1), N
->getOperand(4));
412 SDValue Arg0
= N
->getOperand(0);
413 SDValue Arg1
= N
->getOperand(1);
414 SDValue CC
= N
->getOperand(2);
415 ConstantSDNode
* C
= NULL
;
416 ISD::CondCode CCOp
= dyn_cast
<CondCodeSDNode
>(CC
)->get();
418 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
420 && Arg0
.getOpcode() == ISD::SIGN_EXTEND
421 && Arg0
.getOperand(0).getValueType() == MVT::i1
422 && (C
= dyn_cast
<ConstantSDNode
>(Arg1
))
424 && CCOp
== ISD::SETNE
) {
425 return SimplifySetCC(VT
, Arg0
.getOperand(0),
426 DAG
.getConstant(0, MVT::i1
), CCOp
, true, DCI
, DL
);
434 #define NODE_NAME_CASE(node) case SIISD::node: return #node;
436 const char* SITargetLowering::getTargetNodeName(unsigned Opcode
) const
439 default: return AMDGPUTargetLowering::getTargetNodeName(Opcode
);
440 NODE_NAME_CASE(VCC_AND
)
441 NODE_NAME_CASE(VCC_BITCAST
)