1 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
11 // mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "SIISelLowering.h"
16 #include "AMDILIntrinsicInfo.h"
17 #include "SIInstrInfo.h"
18 #include "SIRegisterInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
25 SITargetLowering::SITargetLowering(TargetMachine
&TM
) :
26 AMDGPUTargetLowering(TM
),
27 TII(static_cast<const SIInstrInfo
*>(TM
.getInstrInfo()))
29 addRegisterClass(MVT::v4f32
, &AMDGPU::VReg_128RegClass
);
30 addRegisterClass(MVT::f32
, &AMDGPU::VReg_32RegClass
);
31 addRegisterClass(MVT::i32
, &AMDGPU::VReg_32RegClass
);
32 addRegisterClass(MVT::i64
, &AMDGPU::VReg_64RegClass
);
33 addRegisterClass(MVT::i1
, &AMDGPU::SCCRegRegClass
);
34 addRegisterClass(MVT::i1
, &AMDGPU::VCCRegRegClass
);
36 addRegisterClass(MVT::v4i32
, &AMDGPU::SReg_128RegClass
);
37 addRegisterClass(MVT::v8i32
, &AMDGPU::SReg_256RegClass
);
39 computeRegisterProperties();
41 setOperationAction(ISD::AND
, MVT::i1
, Custom
);
43 setOperationAction(ISD::ADD
, MVT::i64
, Legal
);
44 setOperationAction(ISD::ADD
, MVT::i32
, Legal
);
46 setOperationAction(ISD::BR_CC
, MVT::i32
, Custom
);
48 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
50 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
51 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
53 setOperationAction(ISD::SELECT_CC
, MVT::Other
, Expand
);
54 setTargetDAGCombine(ISD::SELECT_CC
);
56 setTargetDAGCombine(ISD::SETCC
);
59 MachineBasicBlock
* SITargetLowering::EmitInstrWithCustomInserter(
60 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
62 const TargetInstrInfo
* TII
= getTargetMachine().getInstrInfo();
63 MachineRegisterInfo
& MRI
= BB
->getParent()->getRegInfo();
64 MachineBasicBlock::iterator I
= MI
;
66 if (TII
->get(MI
->getOpcode()).TSFlags
& SIInstrFlags::NEED_WAIT
) {
67 AppendS_WAITCNT(MI
, *BB
, llvm::next(I
));
71 switch (MI
->getOpcode()) {
73 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
75 case AMDGPU::CLAMP_SI
:
76 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
77 .addOperand(MI
->getOperand(0))
78 .addOperand(MI
->getOperand(1))
79 // VSRC1-2 are unused, but we still need to fill all the
80 // operand slots, so we just reuse the VSRC0 operand
81 .addOperand(MI
->getOperand(1))
82 .addOperand(MI
->getOperand(1))
87 MI
->eraseFromParent();
91 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
92 .addOperand(MI
->getOperand(0))
93 .addOperand(MI
->getOperand(1))
94 // VSRC1-2 are unused, but we still need to fill all the
95 // operand slots, so we just reuse the VSRC0 operand
96 .addOperand(MI
->getOperand(1))
97 .addOperand(MI
->getOperand(1))
102 MI
->eraseFromParent();
105 case AMDGPU::FNEG_SI
:
106 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
107 .addOperand(MI
->getOperand(0))
108 .addOperand(MI
->getOperand(1))
109 // VSRC1-2 are unused, but we still need to fill all the
110 // operand slots, so we just reuse the VSRC0 operand
111 .addOperand(MI
->getOperand(1))
112 .addOperand(MI
->getOperand(1))
117 MI
->eraseFromParent();
120 case AMDGPU::SI_INTERP
:
121 LowerSI_INTERP(MI
, *BB
, I
, MRI
);
123 case AMDGPU::SI_INTERP_CONST
:
124 LowerSI_INTERP_CONST(MI
, *BB
, I
);
126 case AMDGPU::SI_V_CNDLT
:
127 LowerSI_V_CNDLT(MI
, *BB
, I
, MRI
);
129 case AMDGPU::USE_SGPR_32
:
130 case AMDGPU::USE_SGPR_64
:
131 lowerUSE_SGPR(MI
, BB
->getParent(), MRI
);
132 MI
->eraseFromParent();
138 void SITargetLowering::AppendS_WAITCNT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
139 MachineBasicBlock::iterator I
) const
141 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_WAITCNT
))
145 void SITargetLowering::LowerSI_INTERP(MachineInstr
*MI
, MachineBasicBlock
&BB
,
146 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
148 unsigned tmp
= MRI
.createVirtualRegister(&AMDGPU::VReg_32RegClass
);
149 MachineOperand dst
= MI
->getOperand(0);
150 MachineOperand iReg
= MI
->getOperand(1);
151 MachineOperand jReg
= MI
->getOperand(2);
152 MachineOperand attr_chan
= MI
->getOperand(3);
153 MachineOperand attr
= MI
->getOperand(4);
154 MachineOperand params
= MI
->getOperand(5);
156 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
160 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P1_F32
), tmp
)
162 .addOperand(attr_chan
)
165 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P2_F32
))
169 .addOperand(attr_chan
)
172 MI
->eraseFromParent();
175 void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr
*MI
,
176 MachineBasicBlock
&BB
, MachineBasicBlock::iterator I
) const
178 MachineOperand dst
= MI
->getOperand(0);
179 MachineOperand attr_chan
= MI
->getOperand(1);
180 MachineOperand attr
= MI
->getOperand(2);
181 MachineOperand params
= MI
->getOperand(3);
183 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
187 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_MOV_F32
))
189 .addOperand(attr_chan
)
192 MI
->eraseFromParent();
195 void SITargetLowering::LowerSI_V_CNDLT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
196 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
198 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CMP_LT_F32_e32
),
200 .addOperand(MI
->getOperand(1))
201 .addReg(AMDGPU::SREG_LIT_0
);
203 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CNDMASK_B32
))
204 .addOperand(MI
->getOperand(0))
206 .addOperand(MI
->getOperand(2))
207 .addOperand(MI
->getOperand(3));
209 MI
->eraseFromParent();
212 void SITargetLowering::lowerUSE_SGPR(MachineInstr
*MI
,
213 MachineFunction
* MF
, MachineRegisterInfo
& MRI
) const
215 const TargetInstrInfo
* TII
= getTargetMachine().getInstrInfo();
216 unsigned dstReg
= MI
->getOperand(0).getReg();
217 int64_t newIndex
= MI
->getOperand(1).getImm();
218 const TargetRegisterClass
* dstClass
= MRI
.getRegClass(dstReg
);
219 unsigned DwordWidth
= dstClass
->getSize() / 4;
220 assert(newIndex
% DwordWidth
== 0 && "USER_SGPR not properly aligned");
221 newIndex
= newIndex
/ DwordWidth
;
223 unsigned newReg
= dstClass
->getRegister(newIndex
);
224 addLiveIn(MI
, MF
, MRI
, TII
, newReg
);
227 EVT
SITargetLowering::getSetCCResultType(EVT VT
) const
232 //===----------------------------------------------------------------------===//
233 // Custom DAG Lowering Operations
234 //===----------------------------------------------------------------------===//
236 SDValue
SITargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const
238 switch (Op
.getOpcode()) {
239 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
240 case ISD::BR_CC
: return LowerBR_CC(Op
, DAG
);
241 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
242 case ISD::AND
: return Loweri1ContextSwitch(Op
, DAG
, ISD::AND
);
243 case ISD::INTRINSIC_WO_CHAIN
: {
244 unsigned IntrinsicID
=
245 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
246 EVT VT
= Op
.getValueType();
247 switch (IntrinsicID
) {
248 case AMDGPUIntrinsic::SI_vs_load_buffer_index
:
249 return CreateLiveInRegister(DAG
, &AMDGPU::VReg_32RegClass
,
251 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
259 /// Loweri1ContextSwitch - The function is for lowering i1 operations on the
260 /// VCC register. In the VALU context, VCC is a one bit register, but in the
261 /// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
262 /// the SALU can perform operations on the VCC register, we need to promote
263 /// the operand types from i1 to i64 in order for tablegen to be able to match
264 /// this operation to the correct SALU instruction. We do this promotion by
265 /// wrapping the operands in a CopyToReg node.
267 SDValue
SITargetLowering::Loweri1ContextSwitch(SDValue Op
,
269 unsigned VCCNode
) const
271 DebugLoc DL
= Op
.getDebugLoc();
273 SDValue OpNode
= DAG
.getNode(VCCNode
, DL
, MVT::i64
,
274 DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i64
,
276 DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i64
,
279 return DAG
.getNode(SIISD::VCC_BITCAST
, DL
, MVT::i1
, OpNode
);
282 SDValue
SITargetLowering::LowerBR_CC(SDValue Op
, SelectionDAG
&DAG
) const
284 SDValue Chain
= Op
.getOperand(0);
285 SDValue CC
= Op
.getOperand(1);
286 SDValue LHS
= Op
.getOperand(2);
287 SDValue RHS
= Op
.getOperand(3);
288 SDValue JumpT
= Op
.getOperand(4);
291 CmpValue
= DAG
.getNode(
298 Result
= DAG
.getNode(
299 AMDGPUISD::BRANCH_COND
,
300 CmpValue
.getDebugLoc(),
306 SDValue
SITargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const
308 SDValue LHS
= Op
.getOperand(0);
309 SDValue RHS
= Op
.getOperand(1);
310 SDValue True
= Op
.getOperand(2);
311 SDValue False
= Op
.getOperand(3);
312 SDValue CC
= Op
.getOperand(4);
313 EVT VT
= Op
.getValueType();
314 DebugLoc DL
= Op
.getDebugLoc();
316 SDValue Cond
= DAG
.getNode(ISD::SETCC
, DL
, MVT::i1
, LHS
, RHS
, CC
);
317 return DAG
.getNode(ISD::SELECT
, DL
, VT
, Cond
, True
, False
);
320 //===----------------------------------------------------------------------===//
321 // Custom DAG optimizations
322 //===----------------------------------------------------------------------===//
324 SDValue
SITargetLowering::PerformDAGCombine(SDNode
*N
,
325 DAGCombinerInfo
&DCI
) const {
326 SelectionDAG
&DAG
= DCI
.DAG
;
327 DebugLoc DL
= N
->getDebugLoc();
328 EVT VT
= N
->getValueType(0);
330 switch (N
->getOpcode()) {
332 case ISD::SELECT_CC
: {
334 ConstantSDNode
*True
, *False
;
335 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
336 if ((True
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
337 && (False
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
338 && True
->isAllOnesValue()
339 && False
->isNullValue()
341 return DAG
.getNode(ISD::SETCC
, DL
, VT
, N
->getOperand(0),
342 N
->getOperand(1), N
->getOperand(4));
348 SDValue Arg0
= N
->getOperand(0);
349 SDValue Arg1
= N
->getOperand(1);
350 SDValue CC
= N
->getOperand(2);
351 ConstantSDNode
* C
= NULL
;
352 ISD::CondCode CCOp
= dyn_cast
<CondCodeSDNode
>(CC
)->get();
354 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
356 && Arg0
.getOpcode() == ISD::SIGN_EXTEND
357 && Arg0
.getOperand(0).getValueType() == MVT::i1
358 && (C
= dyn_cast
<ConstantSDNode
>(Arg1
))
360 && CCOp
== ISD::SETNE
) {
361 return SimplifySetCC(VT
, Arg0
.getOperand(0),
362 DAG
.getConstant(0, MVT::i1
), CCOp
, true, DCI
, DL
);
370 #define NODE_NAME_CASE(node) case SIISD::node: return #node;
372 const char* SITargetLowering::getTargetNodeName(unsigned Opcode
) const
375 default: return AMDGPUTargetLowering::getTargetNodeName(Opcode
);
376 NODE_NAME_CASE(VCC_AND
)
377 NODE_NAME_CASE(VCC_BITCAST
)