radeon/llvm: Lower implicit parameters before ISel
[mesa.git] / src / gallium / drivers / radeon / R600ISelLowering.cpp
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "R600ISelLowering.h"
16 #include "R600InstrInfo.h"
17 #include "R600MachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21
22 using namespace llvm;
23
24 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25 AMDGPUTargetLowering(TM),
26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27 {
28 setOperationAction(ISD::MUL, MVT::i64, Expand);
29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33 computeRegisterProperties();
34
35 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37 setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42 setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47 setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49 setSchedulingPreference(Sched::VLIW);
50 }
51
52 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53 MachineInstr * MI, MachineBasicBlock * BB) const
54 {
55 MachineFunction * MF = BB->getParent();
56 MachineRegisterInfo &MRI = MF->getRegInfo();
57 MachineBasicBlock::iterator I = *MI;
58
59 switch (MI->getOpcode()) {
60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61 case AMDGPU::CLAMP_R600:
62 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
63 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
64 .addOperand(MI->getOperand(0))
65 .addOperand(MI->getOperand(1))
66 .addReg(AMDGPU::PRED_SEL_OFF);
67 break;
68
69 case AMDGPU::FABS_R600:
70 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
71 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
72 .addOperand(MI->getOperand(0))
73 .addOperand(MI->getOperand(1))
74 .addReg(AMDGPU::PRED_SEL_OFF);
75 break;
76
77 case AMDGPU::FNEG_R600:
78 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
79 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
80 .addOperand(MI->getOperand(0))
81 .addOperand(MI->getOperand(1))
82 .addReg(AMDGPU::PRED_SEL_OFF);
83 break;
84
85 case AMDGPU::R600_LOAD_CONST:
86 {
87 int64_t RegIndex = MI->getOperand(1).getImm();
88 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
89 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
90 .addOperand(MI->getOperand(0))
91 .addReg(ConstantReg);
92 break;
93 }
94
95 case AMDGPU::MASK_WRITE:
96 {
97 unsigned maskedRegister = MI->getOperand(0).getReg();
98 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
99 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
100 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
101 def->addTargetFlag(MO_FLAG_MASK);
102 // Return early so the instruction is not erased
103 return BB;
104 }
105
106 case AMDGPU::RAT_WRITE_CACHELESS_eg:
107 {
108 // Convert to DWORD address
109 unsigned NewAddr = MRI.createVirtualRegister(
110 AMDGPU::R600_TReg32_XRegisterClass);
111 unsigned ShiftValue = MRI.createVirtualRegister(
112 AMDGPU::R600_TReg32RegisterClass);
113
114 // XXX In theory, we should be able to pass ShiftValue directly to
115 // the LSHR_eg instruction as an inline literal, but I tried doing it
116 // this way and it didn't produce the correct results.
117 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
118 .addReg(AMDGPU::ALU_LITERAL_X)
119 .addReg(AMDGPU::PRED_SEL_OFF)
120 .addImm(2);
121 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
122 .addOperand(MI->getOperand(1))
123 .addReg(ShiftValue)
124 .addReg(AMDGPU::PRED_SEL_OFF);
125 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
126 .addOperand(MI->getOperand(0))
127 .addReg(NewAddr);
128 break;
129 }
130
131 case AMDGPU::RESERVE_REG:
132 {
133 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
134 int64_t ReservedIndex = MI->getOperand(0).getImm();
135 unsigned ReservedReg =
136 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
137 MFI->ReservedRegs.push_back(ReservedReg);
138 break;
139 }
140
141 case AMDGPU::TXD:
142 {
143 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
144 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
145
146 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
147 .addOperand(MI->getOperand(3))
148 .addOperand(MI->getOperand(4))
149 .addOperand(MI->getOperand(5));
150 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
151 .addOperand(MI->getOperand(2))
152 .addOperand(MI->getOperand(4))
153 .addOperand(MI->getOperand(5));
154 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
155 .addOperand(MI->getOperand(0))
156 .addOperand(MI->getOperand(1))
157 .addOperand(MI->getOperand(4))
158 .addOperand(MI->getOperand(5))
159 .addReg(t0, RegState::Implicit)
160 .addReg(t1, RegState::Implicit);
161 break;
162 }
163 case AMDGPU::TXD_SHADOW:
164 {
165 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
166 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
167
168 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
169 .addOperand(MI->getOperand(3))
170 .addOperand(MI->getOperand(4))
171 .addOperand(MI->getOperand(5));
172 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
173 .addOperand(MI->getOperand(2))
174 .addOperand(MI->getOperand(4))
175 .addOperand(MI->getOperand(5));
176 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
177 .addOperand(MI->getOperand(0))
178 .addOperand(MI->getOperand(1))
179 .addOperand(MI->getOperand(4))
180 .addOperand(MI->getOperand(5))
181 .addReg(t0, RegState::Implicit)
182 .addReg(t1, RegState::Implicit);
183 break;
184 }
185 case AMDGPU::BRANCH:
186 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
187 .addOperand(MI->getOperand(0))
188 .addReg(0);
189 break;
190 case AMDGPU::BRANCH_COND_f32:
191 MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
192
193 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
194 .addReg(AMDGPU::PREDICATE_BIT)
195 .addOperand(MI->getOperand(1))
196 .addImm(OPCODE_IS_ZERO);
197 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
198 .addOperand(MI->getOperand(0))
199 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
200 break;
201 case AMDGPU::BRANCH_COND_i32:
202 MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
203
204 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
205 .addReg(AMDGPU::PREDICATE_BIT)
206 .addOperand(MI->getOperand(1))
207 .addImm(OPCODE_IS_ZERO_INT);
208 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
209 .addOperand(MI->getOperand(0))
210 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
211 break;
212
213
214 }
215
216 MI->eraseFromParent();
217 return BB;
218 }
219
220 //===----------------------------------------------------------------------===//
221 // Custom DAG Lowering Operations
222 //===----------------------------------------------------------------------===//
223
224 using namespace llvm::Intrinsic;
225 using namespace llvm::AMDGPUIntrinsic;
226
227 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
228 {
229 switch (Op.getOpcode()) {
230 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
231 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
232 case ISD::ROTL: return LowerROTL(Op, DAG);
233 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
234 case ISD::SETCC: return LowerSETCC(Op, DAG);
235 case ISD::INTRINSIC_VOID: {
236 SDValue Chain = Op.getOperand(0);
237 unsigned IntrinsicID =
238 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
239 switch (IntrinsicID) {
240 case AMDGPUIntrinsic::AMDGPU_store_output: {
241 MachineFunction &MF = DAG.getMachineFunction();
242 MachineRegisterInfo &MRI = MF.getRegInfo();
243 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
244 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
245 if (!MRI.isLiveOut(Reg)) {
246 MRI.addLiveOut(Reg);
247 }
248 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
249 }
250 // default for switch(IntrinsicID)
251 default: break;
252 }
253 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
254 break;
255 }
256 case ISD::INTRINSIC_WO_CHAIN: {
257 unsigned IntrinsicID =
258 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
259 EVT VT = Op.getValueType();
260 DebugLoc DL = Op.getDebugLoc();
261 switch(IntrinsicID) {
262 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
263 case AMDGPUIntrinsic::R600_load_input: {
264 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
265 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
266 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
267 }
268
269 case r600_read_ngroups_x:
270 return LowerImplicitParameter(DAG, VT, DL, 0);
271 case r600_read_ngroups_y:
272 return LowerImplicitParameter(DAG, VT, DL, 1);
273 case r600_read_ngroups_z:
274 return LowerImplicitParameter(DAG, VT, DL, 2);
275 case r600_read_global_size_x:
276 return LowerImplicitParameter(DAG, VT, DL, 3);
277 case r600_read_global_size_y:
278 return LowerImplicitParameter(DAG, VT, DL, 4);
279 case r600_read_global_size_z:
280 return LowerImplicitParameter(DAG, VT, DL, 5);
281 case r600_read_local_size_x:
282 return LowerImplicitParameter(DAG, VT, DL, 6);
283 case r600_read_local_size_y:
284 return LowerImplicitParameter(DAG, VT, DL, 7);
285 case r600_read_local_size_z:
286 return LowerImplicitParameter(DAG, VT, DL, 8);
287
288 case r600_read_tgid_x:
289 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
290 AMDGPU::T1_X, VT);
291 case r600_read_tgid_y:
292 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
293 AMDGPU::T1_Y, VT);
294 case r600_read_tgid_z:
295 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
296 AMDGPU::T1_Z, VT);
297 case r600_read_tidig_x:
298 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
299 AMDGPU::T0_X, VT);
300 case r600_read_tidig_y:
301 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
302 AMDGPU::T0_Y, VT);
303 case r600_read_tidig_z:
304 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
305 AMDGPU::T0_Z, VT);
306 }
307 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
308 break;
309 }
310 } // end switch(Op.getOpcode())
311 return SDValue();
312 }
313
314 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
315 {
316 SDValue Chain = Op.getOperand(0);
317 SDValue CC = Op.getOperand(1);
318 SDValue LHS = Op.getOperand(2);
319 SDValue RHS = Op.getOperand(3);
320 SDValue JumpT = Op.getOperand(4);
321 SDValue CmpValue;
322 SDValue Result;
323 CmpValue = DAG.getNode(
324 ISD::SELECT_CC,
325 Op.getDebugLoc(),
326 MVT::i32,
327 LHS, RHS,
328 DAG.getConstant(-1, MVT::i32),
329 DAG.getConstant(0, MVT::i32),
330 CC);
331 Result = DAG.getNode(
332 AMDGPUISD::BRANCH_COND,
333 CmpValue.getDebugLoc(),
334 MVT::Other, Chain,
335 JumpT, CmpValue);
336 return Result;
337 }
338
339 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
340 DebugLoc DL,
341 unsigned DwordOffset) const
342 {
343 unsigned ByteOffset = DwordOffset * 4;
344 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
345 AMDGPUAS::PARAM_I_ADDRESS);
346
347 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
348 assert(isInt<16>(ByteOffset));
349
350 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
351 DAG.getConstant(ByteOffset, MVT::i32), // PTR
352 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
353 false, false, false, 0);
354 }
355
356 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
357 {
358 DebugLoc DL = Op.getDebugLoc();
359 EVT VT = Op.getValueType();
360
361 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
362 Op.getOperand(0),
363 Op.getOperand(0),
364 DAG.getNode(ISD::SUB, DL, VT,
365 DAG.getConstant(32, MVT::i32),
366 Op.getOperand(1)));
367 }
368
369 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
370 {
371 DebugLoc DL = Op.getDebugLoc();
372 EVT VT = Op.getValueType();
373
374 SDValue LHS = Op.getOperand(0);
375 SDValue RHS = Op.getOperand(1);
376 SDValue True = Op.getOperand(2);
377 SDValue False = Op.getOperand(3);
378 SDValue CC = Op.getOperand(4);
379 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
380 SDValue Temp;
381
382 // LHS and RHS are guaranteed to be the same value type
383 EVT CompareVT = LHS.getValueType();
384
385 // We need all the operands of SELECT_CC to have the same value type, so if
386 // necessary we need to convert LHS and RHS to be the same type True and
387 // False. True and False are guaranteed to have the same type as this
388 // SELECT_CC node.
389
390 if (CompareVT != VT) {
391 ISD::NodeType ConversionOp = ISD::DELETED_NODE;
392 if (VT == MVT::f32 && CompareVT == MVT::i32) {
393 if (isUnsignedIntSetCC(CCOpcode)) {
394 ConversionOp = ISD::UINT_TO_FP;
395 } else {
396 ConversionOp = ISD::SINT_TO_FP;
397 }
398 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
399 ConversionOp = ISD::FP_TO_SINT;
400 } else {
401 // I don't think there will be any other type pairings.
402 assert(!"Unhandled operand type parings in SELECT_CC");
403 }
404 // XXX Check the value of LHS and RHS and avoid creating sequences like
405 // (FTOI (ITOF))
406 LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
407 RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
408 }
409
410 // If True is a hardware TRUE value and False is a hardware FALSE value or
411 // vice-versa we can handle this with a native instruction (SET* instructions).
412 if ((isHWTrueValue(True) && isHWFalseValue(False))) {
413 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
414 }
415
416 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
417 // we can handle this with a native instruction, but we need to swap true
418 // and false and change the conditional.
419 if (isHWTrueValue(False) && isHWFalseValue(True)) {
420 }
421
422 // XXX Check if we can lower this to a SELECT or if it is supported by a native
423 // operation. (The code below does this but we don't have the Instruction
424 // selection patterns to do this yet.
425 #if 0
426 if (isZero(LHS) || isZero(RHS)) {
427 SDValue Cond = (isZero(LHS) ? RHS : LHS);
428 bool SwapTF = false;
429 switch (CCOpcode) {
430 case ISD::SETOEQ:
431 case ISD::SETUEQ:
432 case ISD::SETEQ:
433 SwapTF = true;
434 // Fall through
435 case ISD::SETONE:
436 case ISD::SETUNE:
437 case ISD::SETNE:
438 // We can lower to select
439 if (SwapTF) {
440 Temp = True;
441 True = False;
442 False = Temp;
443 }
444 // CNDE
445 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
446 default:
447 // Supported by a native operation (CNDGE, CNDGT)
448 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
449 }
450 }
451 #endif
452
453 // If we make it this for it means we have no native instructions to handle
454 // this SELECT_CC, so we must lower it.
455 SDValue HWTrue, HWFalse;
456
457 if (VT == MVT::f32) {
458 HWTrue = DAG.getConstantFP(1.0f, VT);
459 HWFalse = DAG.getConstantFP(0.0f, VT);
460 } else if (VT == MVT::i32) {
461 HWTrue = DAG.getConstant(-1, VT);
462 HWFalse = DAG.getConstant(0, VT);
463 }
464 else {
465 assert(!"Unhandled value type in LowerSELECT_CC");
466 }
467
468 // Lower this unsupported SELECT_CC into a combination of two supported
469 // SELECT_CC operations.
470 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
471
472 // Convert floating point condition to i1
473 if (VT == MVT::f32) {
474 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
475 DAG.getNode(ISD::FNEG, DL, VT, Cond));
476 }
477
478 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
479 }
480
481 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
482 {
483 SDValue Cond;
484 SDValue LHS = Op.getOperand(0);
485 SDValue RHS = Op.getOperand(1);
486 SDValue CC = Op.getOperand(2);
487 DebugLoc DL = Op.getDebugLoc();
488 assert(Op.getValueType() == MVT::i32);
489 Cond = DAG.getNode(
490 ISD::SELECT_CC,
491 Op.getDebugLoc(),
492 MVT::i32,
493 LHS, RHS,
494 DAG.getConstant(-1, MVT::i32),
495 DAG.getConstant(0, MVT::i32),
496 CC);
497 Cond = DAG.getNode(
498 ISD::AND,
499 DL,
500 MVT::i32,
501 DAG.getConstant(1, MVT::i32),
502 Cond);
503 return Cond;
504 }