1 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
11 // mostly EmitInstrWithCustomInserter().
13 //===----------------------------------------------------------------------===//
15 #include "SIISelLowering.h"
16 #include "SIInstrInfo.h"
17 #include "SIRegisterInfo.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 SITargetLowering::SITargetLowering(TargetMachine
&TM
) :
23 AMDGPUTargetLowering(TM
),
24 TII(static_cast<const SIInstrInfo
*>(TM
.getInstrInfo()))
26 addRegisterClass(MVT::v4f32
, &AMDGPU::VReg_128RegClass
);
27 addRegisterClass(MVT::f32
, &AMDGPU::VReg_32RegClass
);
28 addRegisterClass(MVT::i32
, &AMDGPU::VReg_32RegClass
);
29 addRegisterClass(MVT::i64
, &AMDGPU::VReg_64RegClass
);
31 addRegisterClass(MVT::v4i32
, &AMDGPU::SReg_128RegClass
);
32 addRegisterClass(MVT::v8i32
, &AMDGPU::SReg_256RegClass
);
34 computeRegisterProperties();
36 setOperationAction(ISD::ADD
, MVT::i64
, Legal
);
37 setOperationAction(ISD::ADD
, MVT::i32
, Legal
);
41 MachineBasicBlock
* SITargetLowering::EmitInstrWithCustomInserter(
42 MachineInstr
* MI
, MachineBasicBlock
* BB
) const
44 const TargetInstrInfo
* TII
= getTargetMachine().getInstrInfo();
45 MachineRegisterInfo
& MRI
= BB
->getParent()->getRegInfo();
46 MachineBasicBlock::iterator I
= MI
;
48 if (TII
->get(MI
->getOpcode()).TSFlags
& SIInstrFlags::NEED_WAIT
) {
49 AppendS_WAITCNT(MI
, *BB
, llvm::next(I
));
53 switch (MI
->getOpcode()) {
55 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
57 case AMDGPU::CLAMP_SI
:
58 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
59 .addOperand(MI
->getOperand(0))
60 .addOperand(MI
->getOperand(1))
61 // VSRC1-2 are unused, but we still need to fill all the
62 // operand slots, so we just reuse the VSRC0 operand
63 .addOperand(MI
->getOperand(1))
64 .addOperand(MI
->getOperand(1))
69 MI
->eraseFromParent();
73 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(AMDGPU::V_MOV_B32_e64
))
74 .addOperand(MI
->getOperand(0))
75 .addOperand(MI
->getOperand(1))
76 // VSRC1-2 are unused, but we still need to fill all the
77 // operand slots, so we just reuse the VSRC0 operand
78 .addOperand(MI
->getOperand(1))
79 .addOperand(MI
->getOperand(1))
84 MI
->eraseFromParent();
87 case AMDGPU::SI_INTERP
:
88 LowerSI_INTERP(MI
, *BB
, I
, MRI
);
90 case AMDGPU::SI_INTERP_CONST
:
91 LowerSI_INTERP_CONST(MI
, *BB
, I
);
93 case AMDGPU::SI_V_CNDLT
:
94 LowerSI_V_CNDLT(MI
, *BB
, I
, MRI
);
96 case AMDGPU::USE_SGPR_32
:
97 case AMDGPU::USE_SGPR_64
:
98 lowerUSE_SGPR(MI
, BB
->getParent(), MRI
);
99 MI
->eraseFromParent();
101 case AMDGPU::VS_LOAD_BUFFER_INDEX
:
102 addLiveIn(MI
, BB
->getParent(), MRI
, TII
, AMDGPU::VGPR0
);
103 MI
->eraseFromParent();
109 void SITargetLowering::AppendS_WAITCNT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
110 MachineBasicBlock::iterator I
) const
112 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_WAITCNT
))
116 void SITargetLowering::LowerSI_INTERP(MachineInstr
*MI
, MachineBasicBlock
&BB
,
117 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
119 unsigned tmp
= MRI
.createVirtualRegister(&AMDGPU::VReg_32RegClass
);
120 MachineOperand dst
= MI
->getOperand(0);
121 MachineOperand iReg
= MI
->getOperand(1);
122 MachineOperand jReg
= MI
->getOperand(2);
123 MachineOperand attr_chan
= MI
->getOperand(3);
124 MachineOperand attr
= MI
->getOperand(4);
125 MachineOperand params
= MI
->getOperand(5);
127 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
131 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P1_F32
), tmp
)
133 .addOperand(attr_chan
)
136 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_P2_F32
))
140 .addOperand(attr_chan
)
143 MI
->eraseFromParent();
146 void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr
*MI
,
147 MachineBasicBlock
&BB
, MachineBasicBlock::iterator I
) const
149 MachineOperand dst
= MI
->getOperand(0);
150 MachineOperand attr_chan
= MI
->getOperand(1);
151 MachineOperand attr
= MI
->getOperand(2);
152 MachineOperand params
= MI
->getOperand(3);
154 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::S_MOV_B32
))
158 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_INTERP_MOV_F32
))
160 .addOperand(attr_chan
)
163 MI
->eraseFromParent();
166 void SITargetLowering::LowerSI_V_CNDLT(MachineInstr
*MI
, MachineBasicBlock
&BB
,
167 MachineBasicBlock::iterator I
, MachineRegisterInfo
& MRI
) const
169 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CMP_LT_F32_e32
))
170 .addOperand(MI
->getOperand(1))
171 .addReg(AMDGPU::SREG_LIT_0
);
173 BuildMI(BB
, I
, BB
.findDebugLoc(I
), TII
->get(AMDGPU::V_CNDMASK_B32
))
174 .addOperand(MI
->getOperand(0))
175 .addOperand(MI
->getOperand(2))
176 .addOperand(MI
->getOperand(3));
178 MI
->eraseFromParent();
181 void SITargetLowering::lowerUSE_SGPR(MachineInstr
*MI
,
182 MachineFunction
* MF
, MachineRegisterInfo
& MRI
) const
184 const TargetInstrInfo
* TII
= getTargetMachine().getInstrInfo();
185 unsigned dstReg
= MI
->getOperand(0).getReg();
186 int64_t newIndex
= MI
->getOperand(1).getImm();
187 const TargetRegisterClass
* dstClass
= MRI
.getRegClass(dstReg
);
188 unsigned DwordWidth
= dstClass
->getSize() / 4;
189 assert(newIndex
% DwordWidth
== 0 && "USER_SGPR not properly aligned");
190 newIndex
= newIndex
/ DwordWidth
;
192 unsigned newReg
= dstClass
->getRegister(newIndex
);
193 addLiveIn(MI
, MF
, MRI
, TII
, newReg
);