radeon/llvm: Use multiclasses for floating point loads
[mesa.git] / src / gallium / drivers / radeon / R600ISelLowering.cpp
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "R600ISelLowering.h"
16 #include "AMDGPUUtil.h"
17 #include "R600InstrInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20
21 using namespace llvm;
22
23 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
24 AMDGPUTargetLowering(TM),
25 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
26 {
27 setOperationAction(ISD::MUL, MVT::i64, Expand);
28 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
29 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
30 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
31 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
32 computeRegisterProperties();
33
34 setOperationAction(ISD::FSUB, MVT::f32, Expand);
35
36 setOperationAction(ISD::ROTL, MVT::i32, Custom);
37
38 setSchedulingPreference(Sched::VLIW);
39 }
40
41 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
42 MachineInstr * MI, MachineBasicBlock * BB) const
43 {
44 MachineFunction * MF = BB->getParent();
45 MachineRegisterInfo &MRI = MF->getRegInfo();
46 MachineBasicBlock::iterator I = *MI;
47
48 switch (MI->getOpcode()) {
49 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
50 case AMDGPU::TGID_X:
51 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
52 break;
53 case AMDGPU::TGID_Y:
54 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
55 break;
56 case AMDGPU::TGID_Z:
57 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
58 break;
59 case AMDGPU::TIDIG_X:
60 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
61 break;
62 case AMDGPU::TIDIG_Y:
63 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
64 break;
65 case AMDGPU::TIDIG_Z:
66 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
67 break;
68 case AMDGPU::NGROUPS_X:
69 lowerImplicitParameter(MI, *BB, MRI, 0);
70 break;
71 case AMDGPU::NGROUPS_Y:
72 lowerImplicitParameter(MI, *BB, MRI, 1);
73 break;
74 case AMDGPU::NGROUPS_Z:
75 lowerImplicitParameter(MI, *BB, MRI, 2);
76 break;
77 case AMDGPU::GLOBAL_SIZE_X:
78 lowerImplicitParameter(MI, *BB, MRI, 3);
79 break;
80 case AMDGPU::GLOBAL_SIZE_Y:
81 lowerImplicitParameter(MI, *BB, MRI, 4);
82 break;
83 case AMDGPU::GLOBAL_SIZE_Z:
84 lowerImplicitParameter(MI, *BB, MRI, 5);
85 break;
86 case AMDGPU::LOCAL_SIZE_X:
87 lowerImplicitParameter(MI, *BB, MRI, 6);
88 break;
89 case AMDGPU::LOCAL_SIZE_Y:
90 lowerImplicitParameter(MI, *BB, MRI, 7);
91 break;
92 case AMDGPU::LOCAL_SIZE_Z:
93 lowerImplicitParameter(MI, *BB, MRI, 8);
94 break;
95
96 case AMDGPU::CLAMP_R600:
97 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
98 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
99 .addOperand(MI->getOperand(0))
100 .addOperand(MI->getOperand(1));
101 break;
102
103 case AMDGPU::FABS_R600:
104 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
105 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
106 .addOperand(MI->getOperand(0))
107 .addOperand(MI->getOperand(1));
108 break;
109
110 case AMDGPU::FNEG_R600:
111 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
112 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
113 .addOperand(MI->getOperand(0))
114 .addOperand(MI->getOperand(1));
115 break;
116
117 case AMDGPU::R600_LOAD_CONST:
118 {
119 int64_t RegIndex = MI->getOperand(1).getImm();
120 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
121 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
122 .addOperand(MI->getOperand(0))
123 .addReg(ConstantReg);
124 break;
125 }
126
127 case AMDGPU::LOAD_INPUT:
128 {
129 int64_t RegIndex = MI->getOperand(1).getImm();
130 addLiveIn(MI, MF, MRI, TII,
131 AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
132 break;
133 }
134
135 case AMDGPU::MASK_WRITE:
136 {
137 unsigned maskedRegister = MI->getOperand(0).getReg();
138 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
139 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
140 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
141 def->addTargetFlag(MO_FLAG_MASK);
142 // Return early so the instruction is not erased
143 return BB;
144 }
145
146 case AMDGPU::RAT_WRITE_CACHELESS_eg:
147 {
148 // Convert to DWORD address
149 unsigned NewAddr = MRI.createVirtualRegister(
150 AMDGPU::R600_TReg32_XRegisterClass);
151 unsigned ShiftValue = MRI.createVirtualRegister(
152 AMDGPU::R600_TReg32RegisterClass);
153
154 // XXX In theory, we should be able to pass ShiftValue directly to
155 // the LSHR_eg instruction as an inline literal, but I tried doing it
156 // this way and it didn't produce the correct results.
157 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
158 .addReg(AMDGPU::ALU_LITERAL_X)
159 .addImm(2);
160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
161 .addOperand(MI->getOperand(1))
162 .addReg(ShiftValue);
163 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
164 .addOperand(MI->getOperand(0))
165 .addReg(NewAddr);
166 break;
167 }
168
169 case AMDGPU::STORE_OUTPUT:
170 {
171 int64_t OutputIndex = MI->getOperand(1).getImm();
172 unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
173
174 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
175 .addOperand(MI->getOperand(0));
176
177 if (!MRI.isLiveOut(OutputReg)) {
178 MRI.addLiveOut(OutputReg);
179 }
180 break;
181 }
182
183 case AMDGPU::RESERVE_REG:
184 {
185 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
186 int64_t ReservedIndex = MI->getOperand(0).getImm();
187 unsigned ReservedReg =
188 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
189 MFI->ReservedRegs.push_back(ReservedReg);
190 break;
191 }
192
193 case AMDGPU::TXD:
194 {
195 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
196 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
197
198 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
199 .addOperand(MI->getOperand(3))
200 .addOperand(MI->getOperand(4))
201 .addOperand(MI->getOperand(5));
202 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
203 .addOperand(MI->getOperand(2))
204 .addOperand(MI->getOperand(4))
205 .addOperand(MI->getOperand(5));
206 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
207 .addOperand(MI->getOperand(0))
208 .addOperand(MI->getOperand(1))
209 .addOperand(MI->getOperand(4))
210 .addOperand(MI->getOperand(5))
211 .addReg(t0, RegState::Implicit)
212 .addReg(t1, RegState::Implicit);
213 break;
214 }
215 case AMDGPU::TXD_SHADOW:
216 {
217 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
218 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
219
220 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
221 .addOperand(MI->getOperand(3))
222 .addOperand(MI->getOperand(4))
223 .addOperand(MI->getOperand(5));
224 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
225 .addOperand(MI->getOperand(2))
226 .addOperand(MI->getOperand(4))
227 .addOperand(MI->getOperand(5));
228 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
229 .addOperand(MI->getOperand(0))
230 .addOperand(MI->getOperand(1))
231 .addOperand(MI->getOperand(4))
232 .addOperand(MI->getOperand(5))
233 .addReg(t0, RegState::Implicit)
234 .addReg(t1, RegState::Implicit);
235 break;
236 }
237
238
239 }
240
241 MI->eraseFromParent();
242 return BB;
243 }
244
245 void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
246 MachineRegisterInfo & MRI, unsigned dword_offset) const
247 {
248 MachineBasicBlock::iterator I = *MI;
249 unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
250 MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
251
252 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
253 .addReg(AMDGPU::ALU_LITERAL_X)
254 .addImm(dword_offset * 4);
255
256 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
257 .addOperand(MI->getOperand(0))
258 .addReg(PtrReg)
259 .addImm(0);
260 }
261
262 //===----------------------------------------------------------------------===//
263 // Custom DAG Lowering Operations
264 //===----------------------------------------------------------------------===//
265
266
267 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
268 {
269 switch (Op.getOpcode()) {
270 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
271 case ISD::ROTL: return LowerROTL(Op, DAG);
272 }
273 }
274
275 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
276 {
277 DebugLoc DL = Op.getDebugLoc();
278 EVT VT = Op.getValueType();
279
280 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
281 Op.getOperand(0),
282 Op.getOperand(0),
283 DAG.getNode(ISD::SUB, DL, VT,
284 DAG.getConstant(32, MVT::i32),
285 Op.getOperand(1)));
286 }