092c2fa67e29f2fd6086153524e122a92222c4a3
1 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
11 // mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "SIISelLowering.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "SIInstrInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
26 SITargetLowering::SITargetLowering(TargetMachine
&TM
) :
27 AMDGPUTargetLowering(TM
),
28 TII(static_cast<const SIInstrInfo
*>(TM
.getInstrInfo()))
30 addRegisterClass(MVT::v4f32
, &AMDGPU::VReg_128RegClass
);
31 addRegisterClass(MVT::f32
, &AMDGPU::VReg_32RegClass
);
32 addRegisterClass(MVT::i32
, &AMDGPU::VReg_32RegClass
);
33 addRegisterClass(MVT::i64
, &AMDGPU::VReg_64RegClass
);
34 addRegisterClass(MVT::i1
, &AMDGPU::SCCRegRegClass
);
35 addRegisterClass(MVT::i1
, &AMDGPU::VCCRegRegClass
);
37 addRegisterClass(MVT::v4i32
, &AMDGPU::SReg_128RegClass
);
38 addRegisterClass(MVT::v8i32
, &AMDGPU::SReg_256RegClass
);
40 computeRegisterProperties();
42 setOperationAction(ISD::AND
, MVT::i1
, Custom
);
44 setOperationAction(ISD::ADD
, MVT::i64
, Legal
);
45 setOperationAction(ISD::ADD
, MVT::i32
, Legal
);
47 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
49 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
51 // We need to custom lower loads from the USER_SGPR address space, so we can
52 // add the SGPRs as livein registers.
53 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
54 setOperationAction(ISD::LOAD
, MVT::i64
, Custom
);
56 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
57 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
59 setOperationAction(ISD::SELECT_CC
, MVT::Other
, Expand
);
60 setTargetDAGCombine(ISD::SELECT_CC
);
62 setTargetDAGCombine(ISD::SETCC
);
65 MachineBasicBlock
* SITargetLowering::EmitInstrWithCustomInserter(
66 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
68 const TargetInstrInfo
* TII
= getTargetMachine().getInstrInfo();
69 MachineRegisterInfo
& MRI
= BB
->getParent()->getRegInfo();
70 MachineBasicBlock::iterator I
= MI
;
72 if (TII
->get(MI
->getOpcode()).TSFlags
& SIInstrFlags::NEED_WAIT
) {
73 AppendS_WAITCNT(MI
, *BB
, llvm::next(I
));
77 switch (MI
->getOpcode()) {
79 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
81 case AMDGPU::CLAMP_SI
:
82 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
83 .addOperand(MI
->getOperand(0))
84 .addOperand(MI
->getOperand(1))
85 // VSRC1-2 are unused, but we still need to fill all the
86 // operand slots, so we just reuse the VSRC0 operand
87 .addOperand(MI
->getOperand(1))
88 .addOperand(MI
->getOperand(1))
93 MI
->eraseFromParent();
97 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
98 .addOperand(MI
->getOperand(0))
99 .addOperand(MI
->getOperand(1))
100 // VSRC1-2 are unused, but we still need to fill all the
101 // operand slots, so we just reuse the VSRC0 operand
102 .addOperand(MI
->getOperand(1))
103 .addOperand(MI
->getOperand(1))
108 MI
->eraseFromParent();
111 case AMDGPU::FNEG_SI
:
112 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
113 .addOperand(MI
->getOperand(0))
114 .addOperand(MI
->getOperand(1))
115 // VSRC1-2 are unused, but we still need to fill all the
116 // operand slots, so we just reuse the VSRC0 operand
117 .addOperand(MI
->getOperand(1))
118 .addOperand(MI
->getOperand(1))
123 MI
->eraseFromParent();
126 case AMDGPU::SI_INTERP
:
127 LowerSI_INTERP(MI
, *BB
, I
, MRI
);
129 case AMDGPU::SI_INTERP_CONST
:
130 LowerSI_INTERP_CONST(MI
, *BB
, I
);
132 case AMDGPU::SI_V_CNDLT
:
133 LowerSI_V_CNDLT(MI
, *BB
, I
, MRI
);
139 void SITargetLowering::AppendS_WAITCNT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
140 MachineBasicBlock::iterator I
) const
142 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_WAITCNT
))
146 void SITargetLowering::LowerSI_INTERP(MachineInstr
*MI
, MachineBasicBlock
&BB
,
147 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
149 unsigned tmp
= MRI
.createVirtualRegister(&AMDGPU::VReg_32RegClass
);
150 MachineOperand dst
= MI
->getOperand(0);
151 MachineOperand iReg
= MI
->getOperand(1);
152 MachineOperand jReg
= MI
->getOperand(2);
153 MachineOperand attr_chan
= MI
->getOperand(3);
154 MachineOperand attr
= MI
->getOperand(4);
155 MachineOperand params
= MI
->getOperand(5);
157 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
161 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P1_F32
), tmp
)
163 .addOperand(attr_chan
)
166 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P2_F32
))
170 .addOperand(attr_chan
)
173 MI
->eraseFromParent();
176 void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr
*MI
,
177 MachineBasicBlock
&BB
, MachineBasicBlock::iterator I
) const
179 MachineOperand dst
= MI
->getOperand(0);
180 MachineOperand attr_chan
= MI
->getOperand(1);
181 MachineOperand attr
= MI
->getOperand(2);
182 MachineOperand params
= MI
->getOperand(3);
184 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
188 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_MOV_F32
))
190 .addOperand(attr_chan
)
193 MI
->eraseFromParent();
196 void SITargetLowering::LowerSI_V_CNDLT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
197 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
199 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CMP_LT_F32_e32
),
201 .addOperand(MI
->getOperand(1))
202 .addReg(AMDGPU::SREG_LIT_0
);
204 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CNDMASK_B32
))
205 .addOperand(MI
->getOperand(0))
207 .addOperand(MI
->getOperand(2))
208 .addOperand(MI
->getOperand(3));
210 MI
->eraseFromParent();
213 EVT
SITargetLowering::getSetCCResultType(EVT VT
) const
218 //===----------------------------------------------------------------------===//
219 // Custom DAG Lowering Operations
220 //===----------------------------------------------------------------------===//
222 SDValue
SITargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
224 switch (Op
.getOpcode()) {
225 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
226 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
227 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
228 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
229 case ISD::AND
: return Loweri1ContextSwitch(Op
, DAG
, ISD::AND
);
230 case ISD::INTRINSIC_WO_CHAIN
: {
231 unsigned IntrinsicID
=
232 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
233 EVT VT
= Op
.getValueType();
234 switch (IntrinsicID
) {
235 case AMDGPUIntrinsic::SI_vs_load_buffer_index
:
236 return CreateLiveInRegister(DAG
, &AMDGPU::VReg_32RegClass
,
238 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
246 /// Loweri1ContextSwitch - The function is for lowering i1 operations on the
247 /// VCC register. In the VALU context, VCC is a one bit register, but in the
248 /// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
249 /// the SALU can perform operations on the VCC register, we need to promote
250 /// the operand types from i1 to i64 in order for tablegen to be able to match
251 /// this operation to the correct SALU instruction. We do this promotion by
252 /// wrapping the operands in a CopyToReg node.
254 SDValue
SITargetLowering::Loweri1ContextSwitch(SDValue Op
,
256 unsigned VCCNode
) const
258 DebugLoc DL
= Op
.getDebugLoc();
260 SDValue OpNode
= DAG
.getNode(VCCNode
, DL
, MVT::i64
,
261 DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i64
,
263 DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i64
,
266 return DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i1
, OpNode
);
269 SDValue
SITargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
271 SDValue Chain
= Op
.getOperand(0);
272 SDValue CC
= Op
.getOperand(1);
273 SDValue LHS
= Op
.getOperand(2);
274 SDValue RHS
= Op
.getOperand(3);
275 SDValue JumpT
= Op
.getOperand(4);
278 CmpValue
= DAG
.getNode(
285 Result
= DAG
.getNode(
286 AMDGPUISD::BRANCH_COND
,
287 CmpValue
.getDebugLoc(),
293 SDValue
SITargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const
295 EVT VT
= Op
.getValueType();
296 LoadSDNode
*Ptr
= dyn_cast
<LoadSDNode
>(Op
);
300 unsigned AddrSpace
= Ptr
->getPointerInfo().getAddrSpace();
302 // We only need to lower USER_SGPR address space loads
303 if (AddrSpace
!= AMDGPUAS::USER_SGPR_ADDRESS
) {
307 // Loads from the USER_SGPR address space can only have constant value
309 ConstantSDNode
*BasePtr
= dyn_cast
<ConstantSDNode
>(Ptr
->getBasePtr());
312 unsigned TypeDwordWidth
= VT
.getSizeInBits() / 32;
313 const TargetRegisterClass
* dstClass
;
314 switch (TypeDwordWidth
) {
316 assert(!"USER_SGPR value size not implemented");
319 dstClass
= &AMDGPU::SReg_32RegClass
;
322 dstClass
= &AMDGPU::SReg_64RegClass
;
325 uint64_t Index
= BasePtr
->getZExtValue();
326 assert(Index
% TypeDwordWidth
== 0 && "USER_SGPR not properly aligned");
327 unsigned SGPRIndex
= Index
/ TypeDwordWidth
;
328 unsigned Reg
= dstClass
->getRegister(SGPRIndex
);
330 DAG
.ReplaceAllUsesOfValueWith(Op
, CreateLiveInRegister(DAG
, dstClass
, Reg
,
335 SDValue
SITargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
337 SDValue LHS
= Op
.getOperand(0);
338 SDValue RHS
= Op
.getOperand(1);
339 SDValue True
= Op
.getOperand(2);
340 SDValue False
= Op
.getOperand(3);
341 SDValue CC
= Op
.getOperand(4);
342 EVT VT
= Op
.getValueType();
343 DebugLoc DL
= Op
.getDebugLoc();
345 SDValue Cond
= DAG
.getNode(ISD::SETCC
, DL
, MVT::i1
, LHS
, RHS
, CC
);
346 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
349 //===----------------------------------------------------------------------===//
350 // Custom DAG optimizations
351 //===----------------------------------------------------------------------===//
353 SDValue
SITargetLowering::PerformDAGCombine(SDNode
*N
,
354 DAGCombinerInfo
&DCI
) const {
355 SelectionDAG
&DAG
= DCI
.DAG
;
356 DebugLoc DL
= N
->getDebugLoc();
357 EVT VT
= N
->getValueType(0);
359 switch (N
->getOpcode()) {
361 case ISD::SELECT_CC
: {
363 ConstantSDNode
*True
, *False
;
364 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
365 if ((True
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
366 && (False
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
367 && True
->isAllOnesValue()
368 && False
->isNullValue()
370 return DAG
.getNode(ISD::SETCC
, DL
, VT
, N
->getOperand(0),
371 N
->getOperand(1), N
->getOperand(4));
377 SDValue Arg0
= N
->getOperand(0);
378 SDValue Arg1
= N
->getOperand(1);
379 SDValue CC
= N
->getOperand(2);
380 ConstantSDNode
* C
= NULL
;
381 ISD::CondCode CCOp
= dyn_cast
<CondCodeSDNode
>(CC
)->get();
383 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
385 && Arg0
.getOpcode() == ISD::SIGN_EXTEND
386 && Arg0
.getOperand(0).getValueType() == MVT::i1
387 && (C
= dyn_cast
<ConstantSDNode
>(Arg1
))
389 && CCOp
== ISD::SETNE
) {
390 return SimplifySetCC(VT
, Arg0
.getOperand(0),
391 DAG
.getConstant(0, MVT::i1
), CCOp
, true, DCI
, DL
);
399 #define NODE_NAME_CASE(node) case SIISD::node: return #node;
401 const char* SITargetLowering::getTargetNodeName(unsigned Opcode
) const
404 default: return AMDGPUTargetLowering::getTargetNodeName(Opcode
);
405 NODE_NAME_CASE(VCC_AND
)
406 NODE_NAME_CASE(VCC_BITCAST
)