radeon/llvm: Move lowering of SETCC node to R600ISelLowering
[mesa.git] / src / gallium / drivers / radeon / R600ISelLowering.cpp
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "R600ISelLowering.h"
16 #include "AMDGPUUtil.h"
17 #include "R600InstrInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20
21 using namespace llvm;
22
23 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
24 AMDGPUTargetLowering(TM),
25 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
26 {
27 setOperationAction(ISD::MUL, MVT::i64, Expand);
28 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
29 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
30 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
31 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
32 computeRegisterProperties();
33
34 setOperationAction(ISD::FSUB, MVT::f32, Expand);
35
36 setOperationAction(ISD::ROTL, MVT::i32, Custom);
37
38 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
39 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
40
41 setOperationAction(ISD::SETCC, MVT::i32, Custom);
42
43 setSchedulingPreference(Sched::VLIW);
44 }
45
46 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
47 MachineInstr * MI, MachineBasicBlock * BB) const
48 {
49 MachineFunction * MF = BB->getParent();
50 MachineRegisterInfo &MRI = MF->getRegInfo();
51 MachineBasicBlock::iterator I = *MI;
52
53 switch (MI->getOpcode()) {
54 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
55 case AMDGPU::TGID_X:
56 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
57 break;
58 case AMDGPU::TGID_Y:
59 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
60 break;
61 case AMDGPU::TGID_Z:
62 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
63 break;
64 case AMDGPU::TIDIG_X:
65 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
66 break;
67 case AMDGPU::TIDIG_Y:
68 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
69 break;
70 case AMDGPU::TIDIG_Z:
71 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
72 break;
73 case AMDGPU::NGROUPS_X:
74 lowerImplicitParameter(MI, *BB, MRI, 0);
75 break;
76 case AMDGPU::NGROUPS_Y:
77 lowerImplicitParameter(MI, *BB, MRI, 1);
78 break;
79 case AMDGPU::NGROUPS_Z:
80 lowerImplicitParameter(MI, *BB, MRI, 2);
81 break;
82 case AMDGPU::GLOBAL_SIZE_X:
83 lowerImplicitParameter(MI, *BB, MRI, 3);
84 break;
85 case AMDGPU::GLOBAL_SIZE_Y:
86 lowerImplicitParameter(MI, *BB, MRI, 4);
87 break;
88 case AMDGPU::GLOBAL_SIZE_Z:
89 lowerImplicitParameter(MI, *BB, MRI, 5);
90 break;
91 case AMDGPU::LOCAL_SIZE_X:
92 lowerImplicitParameter(MI, *BB, MRI, 6);
93 break;
94 case AMDGPU::LOCAL_SIZE_Y:
95 lowerImplicitParameter(MI, *BB, MRI, 7);
96 break;
97 case AMDGPU::LOCAL_SIZE_Z:
98 lowerImplicitParameter(MI, *BB, MRI, 8);
99 break;
100
101 case AMDGPU::CLAMP_R600:
102 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
103 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
104 .addOperand(MI->getOperand(0))
105 .addOperand(MI->getOperand(1));
106 break;
107
108 case AMDGPU::FABS_R600:
109 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
110 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
111 .addOperand(MI->getOperand(0))
112 .addOperand(MI->getOperand(1));
113 break;
114
115 case AMDGPU::FNEG_R600:
116 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
117 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
118 .addOperand(MI->getOperand(0))
119 .addOperand(MI->getOperand(1));
120 break;
121
122 case AMDGPU::R600_LOAD_CONST:
123 {
124 int64_t RegIndex = MI->getOperand(1).getImm();
125 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
126 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
127 .addOperand(MI->getOperand(0))
128 .addReg(ConstantReg);
129 break;
130 }
131
132 case AMDGPU::LOAD_INPUT:
133 {
134 int64_t RegIndex = MI->getOperand(1).getImm();
135 addLiveIn(MI, MF, MRI, TII,
136 AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
137 break;
138 }
139
140 case AMDGPU::MASK_WRITE:
141 {
142 unsigned maskedRegister = MI->getOperand(0).getReg();
143 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
144 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
145 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
146 def->addTargetFlag(MO_FLAG_MASK);
147 // Return early so the instruction is not erased
148 return BB;
149 }
150
151 case AMDGPU::RAT_WRITE_CACHELESS_eg:
152 {
153 // Convert to DWORD address
154 unsigned NewAddr = MRI.createVirtualRegister(
155 AMDGPU::R600_TReg32_XRegisterClass);
156 unsigned ShiftValue = MRI.createVirtualRegister(
157 AMDGPU::R600_TReg32RegisterClass);
158
159 // XXX In theory, we should be able to pass ShiftValue directly to
160 // the LSHR_eg instruction as an inline literal, but I tried doing it
161 // this way and it didn't produce the correct results.
162 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
163 .addReg(AMDGPU::ALU_LITERAL_X)
164 .addImm(2);
165 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
166 .addOperand(MI->getOperand(1))
167 .addReg(ShiftValue);
168 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
169 .addOperand(MI->getOperand(0))
170 .addReg(NewAddr);
171 break;
172 }
173
174 case AMDGPU::STORE_OUTPUT:
175 {
176 int64_t OutputIndex = MI->getOperand(1).getImm();
177 unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
178
179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
180 .addOperand(MI->getOperand(0));
181
182 if (!MRI.isLiveOut(OutputReg)) {
183 MRI.addLiveOut(OutputReg);
184 }
185 break;
186 }
187
188 case AMDGPU::RESERVE_REG:
189 {
190 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
191 int64_t ReservedIndex = MI->getOperand(0).getImm();
192 unsigned ReservedReg =
193 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
194 MFI->ReservedRegs.push_back(ReservedReg);
195 break;
196 }
197
198 case AMDGPU::TXD:
199 {
200 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
201 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
202
203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
204 .addOperand(MI->getOperand(3))
205 .addOperand(MI->getOperand(4))
206 .addOperand(MI->getOperand(5));
207 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
208 .addOperand(MI->getOperand(2))
209 .addOperand(MI->getOperand(4))
210 .addOperand(MI->getOperand(5));
211 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
212 .addOperand(MI->getOperand(0))
213 .addOperand(MI->getOperand(1))
214 .addOperand(MI->getOperand(4))
215 .addOperand(MI->getOperand(5))
216 .addReg(t0, RegState::Implicit)
217 .addReg(t1, RegState::Implicit);
218 break;
219 }
220 case AMDGPU::TXD_SHADOW:
221 {
222 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
223 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
224
225 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
226 .addOperand(MI->getOperand(3))
227 .addOperand(MI->getOperand(4))
228 .addOperand(MI->getOperand(5));
229 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
230 .addOperand(MI->getOperand(2))
231 .addOperand(MI->getOperand(4))
232 .addOperand(MI->getOperand(5));
233 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
234 .addOperand(MI->getOperand(0))
235 .addOperand(MI->getOperand(1))
236 .addOperand(MI->getOperand(4))
237 .addOperand(MI->getOperand(5))
238 .addReg(t0, RegState::Implicit)
239 .addReg(t1, RegState::Implicit);
240 break;
241 }
242
243
244 }
245
246 MI->eraseFromParent();
247 return BB;
248 }
249
250 void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
251 MachineRegisterInfo & MRI, unsigned dword_offset) const
252 {
253 MachineBasicBlock::iterator I = *MI;
254 unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
255 MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
256
257 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
258 .addReg(AMDGPU::ALU_LITERAL_X)
259 .addImm(dword_offset * 4);
260
261 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
262 .addOperand(MI->getOperand(0))
263 .addReg(PtrReg)
264 .addImm(0);
265 }
266
267 //===----------------------------------------------------------------------===//
268 // Custom DAG Lowering Operations
269 //===----------------------------------------------------------------------===//
270
271
272 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
273 {
274 switch (Op.getOpcode()) {
275 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
276 case ISD::ROTL: return LowerROTL(Op, DAG);
277 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
278 case ISD::SETCC: return LowerSETCC(Op, DAG);
279 }
280 }
281
282 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
283 {
284 DebugLoc DL = Op.getDebugLoc();
285 EVT VT = Op.getValueType();
286
287 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
288 Op.getOperand(0),
289 Op.getOperand(0),
290 DAG.getNode(ISD::SUB, DL, VT,
291 DAG.getConstant(32, MVT::i32),
292 Op.getOperand(1)));
293 }
294
295 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
296 {
297 DebugLoc DL = Op.getDebugLoc();
298 EVT VT = Op.getValueType();
299
300 SDValue LHS = Op.getOperand(0);
301 SDValue RHS = Op.getOperand(1);
302 SDValue True = Op.getOperand(2);
303 SDValue False = Op.getOperand(3);
304 SDValue CC = Op.getOperand(4);
305 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
306 SDValue Temp;
307
308 // LHS and RHS are guaranteed to be the same value type
309 EVT CompareVT = LHS.getValueType();
310
311 // We need all the operands of SELECT_CC to have the same value type, so if
312 // necessary we need to convert LHS and RHS to be the same type True and
313 // False. True and False are guaranteed to have the same type as this
314 // SELECT_CC node.
315
316 if (CompareVT != VT) {
317 ISD::NodeType ConversionOp = ISD::DELETED_NODE;
318 if (VT == MVT::f32 && CompareVT == MVT::i32) {
319 if (isUnsignedIntSetCC(CCOpcode)) {
320 ConversionOp = ISD::UINT_TO_FP;
321 } else {
322 ConversionOp = ISD::SINT_TO_FP;
323 }
324 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
325 ConversionOp = ISD::FP_TO_SINT;
326 } else {
327 // I don't think there will be any other type pairings.
328 assert(!"Unhandled operand type parings in SELECT_CC");
329 }
330 // XXX Check the value of LHS and RHS and avoid creating sequences like
331 // (FTOI (ITOF))
332 LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
333 RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
334 }
335
336 // If True is a hardware TRUE value and False is a hardware FALSE value or
337 // vice-versa we can handle this with a native instruction (SET* instructions).
338 if ((isHWTrueValue(True) && isHWFalseValue(False))) {
339 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
340 }
341
342 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
343 // we can handle this with a native instruction, but we need to swap true
344 // and false and change the conditional.
345 if (isHWTrueValue(False) && isHWFalseValue(True)) {
346 }
347
348 // XXX Check if we can lower this to a SELECT or if it is supported by a native
349 // operation. (The code below does this but we don't have the Instruction
350 // selection patterns to do this yet.
351 #if 0
352 if (isZero(LHS) || isZero(RHS)) {
353 SDValue Cond = (isZero(LHS) ? RHS : LHS);
354 bool SwapTF = false;
355 switch (CCOpcode) {
356 case ISD::SETOEQ:
357 case ISD::SETUEQ:
358 case ISD::SETEQ:
359 SwapTF = true;
360 // Fall through
361 case ISD::SETONE:
362 case ISD::SETUNE:
363 case ISD::SETNE:
364 // We can lower to select
365 if (SwapTF) {
366 Temp = True;
367 True = False;
368 False = Temp;
369 }
370 // CNDE
371 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
372 default:
373 // Supported by a native operation (CNDGE, CNDGT)
374 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
375 }
376 }
377 #endif
378
379 // If we make it this for it means we have no native instructions to handle
380 // this SELECT_CC, so we must lower it.
381 SDValue HWTrue, HWFalse;
382
383 if (VT == MVT::f32) {
384 HWTrue = DAG.getConstantFP(1.0f, VT);
385 HWFalse = DAG.getConstantFP(0.0f, VT);
386 } else if (VT == MVT::i32) {
387 HWTrue = DAG.getConstant(-1, VT);
388 HWFalse = DAG.getConstant(0, VT);
389 }
390 else {
391 assert(!"Unhandled value type in LowerSELECT_CC");
392 }
393
394 // Lower this unsupported SELECT_CC into a combination of two supported
395 // SELECT_CC operations.
396 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
397
398 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
399 }
400
401 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
402 {
403 SDValue Cond;
404 SDValue LHS = Op.getOperand(0);
405 SDValue RHS = Op.getOperand(1);
406 SDValue CC = Op.getOperand(2);
407 DebugLoc DL = Op.getDebugLoc();
408 assert(Op.getValueType() == MVT::i32);
409 Cond = DAG.getNode(
410 ISD::SELECT_CC,
411 Op.getDebugLoc(),
412 MVT::i32,
413 LHS, RHS,
414 DAG.getConstant(-1, MVT::i32),
415 DAG.getConstant(0, MVT::i32),
416 CC);
417 Cond = DAG.getNode(
418 ISD::AND,
419 DL,
420 MVT::i32,
421 DAG.getConstant(1, MVT::i32),
422 Cond);
423 return Cond;
424 }