radeon/llvm: Add flag operand to some instructions
[mesa.git] / src / gallium / drivers / radeon / R600ISelLowering.cpp
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "R600ISelLowering.h"
16 #include "R600InstrInfo.h"
17 #include "R600MachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21
22 using namespace llvm;
23
24 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25 AMDGPUTargetLowering(TM),
26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27 {
28 setOperationAction(ISD::MUL, MVT::i64, Expand);
29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33 computeRegisterProperties();
34
35 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37 setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42 setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47 setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49 setSchedulingPreference(Sched::VLIW);
50 }
51
52 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53 MachineInstr * MI, MachineBasicBlock * BB) const
54 {
55 MachineFunction * MF = BB->getParent();
56 MachineRegisterInfo &MRI = MF->getRegInfo();
57 MachineBasicBlock::iterator I = *MI;
58
59 switch (MI->getOpcode()) {
60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61 case AMDGPU::CLAMP_R600:
62 {
63 MachineInstr *NewMI =
64 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
65 .addOperand(MI->getOperand(0))
66 .addOperand(MI->getOperand(1))
67 .addImm(0) // Flags
68 .addReg(AMDGPU::PRED_SEL_OFF);
69 TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP);
70 break;
71 }
72 case AMDGPU::FABS_R600:
73 {
74 MachineInstr *NewMI =
75 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
76 .addOperand(MI->getOperand(0))
77 .addOperand(MI->getOperand(1))
78 .addImm(0) // Flags
79 .addReg(AMDGPU::PRED_SEL_OFF);
80 TII->AddFlag(NewMI, 1, MO_FLAG_ABS);
81 break;
82 }
83
84 case AMDGPU::FNEG_R600:
85 {
86 MachineInstr *NewMI =
87 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
88 .addOperand(MI->getOperand(0))
89 .addOperand(MI->getOperand(1))
90 .addImm(0) // Flags
91 .addReg(AMDGPU::PRED_SEL_OFF);
92 TII->AddFlag(NewMI, 1, MO_FLAG_NEG);
93 break;
94 }
95
96 case AMDGPU::R600_LOAD_CONST:
97 {
98 int64_t RegIndex = MI->getOperand(1).getImm();
99 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
100 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
101 .addOperand(MI->getOperand(0))
102 .addReg(ConstantReg);
103 break;
104 }
105
106 case AMDGPU::MASK_WRITE:
107 {
108 unsigned maskedRegister = MI->getOperand(0).getReg();
109 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
110 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
111 TII->AddFlag(defInstr, 0, MO_FLAG_MASK);
112 // Return early so the instruction is not erased
113 return BB;
114 }
115
116 case AMDGPU::RAT_WRITE_CACHELESS_eg:
117 {
118 // Convert to DWORD address
119 unsigned NewAddr = MRI.createVirtualRegister(
120 AMDGPU::R600_TReg32_XRegisterClass);
121 unsigned ShiftValue = MRI.createVirtualRegister(
122 AMDGPU::R600_TReg32RegisterClass);
123
124 // XXX In theory, we should be able to pass ShiftValue directly to
125 // the LSHR_eg instruction as an inline literal, but I tried doing it
126 // this way and it didn't produce the correct results.
127 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
128 .addReg(AMDGPU::ALU_LITERAL_X)
129 .addReg(AMDGPU::PRED_SEL_OFF)
130 .addImm(2);
131 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
132 .addOperand(MI->getOperand(1))
133 .addReg(ShiftValue)
134 .addReg(AMDGPU::PRED_SEL_OFF);
135 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
136 .addOperand(MI->getOperand(0))
137 .addReg(NewAddr);
138 break;
139 }
140
141 case AMDGPU::RESERVE_REG:
142 {
143 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
144 int64_t ReservedIndex = MI->getOperand(0).getImm();
145 unsigned ReservedReg =
146 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
147 MFI->ReservedRegs.push_back(ReservedReg);
148 break;
149 }
150
151 case AMDGPU::TXD:
152 {
153 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
154 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
155
156 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
157 .addOperand(MI->getOperand(3))
158 .addOperand(MI->getOperand(4))
159 .addOperand(MI->getOperand(5));
160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
161 .addOperand(MI->getOperand(2))
162 .addOperand(MI->getOperand(4))
163 .addOperand(MI->getOperand(5));
164 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
165 .addOperand(MI->getOperand(0))
166 .addOperand(MI->getOperand(1))
167 .addOperand(MI->getOperand(4))
168 .addOperand(MI->getOperand(5))
169 .addReg(t0, RegState::Implicit)
170 .addReg(t1, RegState::Implicit);
171 break;
172 }
173 case AMDGPU::TXD_SHADOW:
174 {
175 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
176 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
177
178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
179 .addOperand(MI->getOperand(3))
180 .addOperand(MI->getOperand(4))
181 .addOperand(MI->getOperand(5));
182 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
183 .addOperand(MI->getOperand(2))
184 .addOperand(MI->getOperand(4))
185 .addOperand(MI->getOperand(5));
186 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
187 .addOperand(MI->getOperand(0))
188 .addOperand(MI->getOperand(1))
189 .addOperand(MI->getOperand(4))
190 .addOperand(MI->getOperand(5))
191 .addReg(t0, RegState::Implicit)
192 .addReg(t1, RegState::Implicit);
193 break;
194 }
195 case AMDGPU::BRANCH:
196 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
197 .addOperand(MI->getOperand(0))
198 .addReg(0);
199 break;
200 case AMDGPU::BRANCH_COND_f32:
201 {
202 MachineInstr *NewMI =
203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
204 .addReg(AMDGPU::PREDICATE_BIT)
205 .addOperand(MI->getOperand(1))
206 .addImm(OPCODE_IS_ZERO)
207 .addImm(0); // Flags
208 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
209 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
210 .addOperand(MI->getOperand(0))
211 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
212 break;
213 }
214 case AMDGPU::BRANCH_COND_i32:
215 {
216 MachineInstr *NewMI =
217 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
218 .addReg(AMDGPU::PREDICATE_BIT)
219 .addOperand(MI->getOperand(1))
220 .addImm(OPCODE_IS_ZERO_INT)
221 .addImm(0); // Flags
222 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
224 .addOperand(MI->getOperand(0))
225 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
226 break;
227 }
228 }
229
230 MI->eraseFromParent();
231 return BB;
232 }
233
234 //===----------------------------------------------------------------------===//
235 // Custom DAG Lowering Operations
236 //===----------------------------------------------------------------------===//
237
238 using namespace llvm::Intrinsic;
239 using namespace llvm::AMDGPUIntrinsic;
240
241 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
242 {
243 switch (Op.getOpcode()) {
244 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
245 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
246 case ISD::ROTL: return LowerROTL(Op, DAG);
247 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
248 case ISD::SETCC: return LowerSETCC(Op, DAG);
249 case ISD::INTRINSIC_VOID: {
250 SDValue Chain = Op.getOperand(0);
251 unsigned IntrinsicID =
252 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
253 switch (IntrinsicID) {
254 case AMDGPUIntrinsic::AMDGPU_store_output: {
255 MachineFunction &MF = DAG.getMachineFunction();
256 MachineRegisterInfo &MRI = MF.getRegInfo();
257 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
258 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
259 if (!MRI.isLiveOut(Reg)) {
260 MRI.addLiveOut(Reg);
261 }
262 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
263 }
264 // default for switch(IntrinsicID)
265 default: break;
266 }
267 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
268 break;
269 }
270 case ISD::INTRINSIC_WO_CHAIN: {
271 unsigned IntrinsicID =
272 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
273 EVT VT = Op.getValueType();
274 DebugLoc DL = Op.getDebugLoc();
275 switch(IntrinsicID) {
276 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
277 case AMDGPUIntrinsic::R600_load_input: {
278 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
279 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
280 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
281 }
282
283 case r600_read_ngroups_x:
284 return LowerImplicitParameter(DAG, VT, DL, 0);
285 case r600_read_ngroups_y:
286 return LowerImplicitParameter(DAG, VT, DL, 1);
287 case r600_read_ngroups_z:
288 return LowerImplicitParameter(DAG, VT, DL, 2);
289 case r600_read_global_size_x:
290 return LowerImplicitParameter(DAG, VT, DL, 3);
291 case r600_read_global_size_y:
292 return LowerImplicitParameter(DAG, VT, DL, 4);
293 case r600_read_global_size_z:
294 return LowerImplicitParameter(DAG, VT, DL, 5);
295 case r600_read_local_size_x:
296 return LowerImplicitParameter(DAG, VT, DL, 6);
297 case r600_read_local_size_y:
298 return LowerImplicitParameter(DAG, VT, DL, 7);
299 case r600_read_local_size_z:
300 return LowerImplicitParameter(DAG, VT, DL, 8);
301
302 case r600_read_tgid_x:
303 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
304 AMDGPU::T1_X, VT);
305 case r600_read_tgid_y:
306 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
307 AMDGPU::T1_Y, VT);
308 case r600_read_tgid_z:
309 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
310 AMDGPU::T1_Z, VT);
311 case r600_read_tidig_x:
312 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
313 AMDGPU::T0_X, VT);
314 case r600_read_tidig_y:
315 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
316 AMDGPU::T0_Y, VT);
317 case r600_read_tidig_z:
318 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
319 AMDGPU::T0_Z, VT);
320 }
321 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
322 break;
323 }
324 } // end switch(Op.getOpcode())
325 return SDValue();
326 }
327
328 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
329 {
330 SDValue Chain = Op.getOperand(0);
331 SDValue CC = Op.getOperand(1);
332 SDValue LHS = Op.getOperand(2);
333 SDValue RHS = Op.getOperand(3);
334 SDValue JumpT = Op.getOperand(4);
335 SDValue CmpValue;
336 SDValue Result;
337 CmpValue = DAG.getNode(
338 ISD::SELECT_CC,
339 Op.getDebugLoc(),
340 MVT::i32,
341 LHS, RHS,
342 DAG.getConstant(-1, MVT::i32),
343 DAG.getConstant(0, MVT::i32),
344 CC);
345 Result = DAG.getNode(
346 AMDGPUISD::BRANCH_COND,
347 CmpValue.getDebugLoc(),
348 MVT::Other, Chain,
349 JumpT, CmpValue);
350 return Result;
351 }
352
353 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
354 DebugLoc DL,
355 unsigned DwordOffset) const
356 {
357 unsigned ByteOffset = DwordOffset * 4;
358 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
359 AMDGPUAS::PARAM_I_ADDRESS);
360
361 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
362 assert(isInt<16>(ByteOffset));
363
364 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
365 DAG.getConstant(ByteOffset, MVT::i32), // PTR
366 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
367 false, false, false, 0);
368 }
369
370 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
371 {
372 DebugLoc DL = Op.getDebugLoc();
373 EVT VT = Op.getValueType();
374
375 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
376 Op.getOperand(0),
377 Op.getOperand(0),
378 DAG.getNode(ISD::SUB, DL, VT,
379 DAG.getConstant(32, MVT::i32),
380 Op.getOperand(1)));
381 }
382
383 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
384 {
385 DebugLoc DL = Op.getDebugLoc();
386 EVT VT = Op.getValueType();
387
388 SDValue LHS = Op.getOperand(0);
389 SDValue RHS = Op.getOperand(1);
390 SDValue True = Op.getOperand(2);
391 SDValue False = Op.getOperand(3);
392 SDValue CC = Op.getOperand(4);
393 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
394 SDValue Temp;
395
396 // LHS and RHS are guaranteed to be the same value type
397 EVT CompareVT = LHS.getValueType();
398
399 // We need all the operands of SELECT_CC to have the same value type, so if
400 // necessary we need to convert LHS and RHS to be the same type True and
401 // False. True and False are guaranteed to have the same type as this
402 // SELECT_CC node.
403
404 if (CompareVT != VT) {
405 ISD::NodeType ConversionOp = ISD::DELETED_NODE;
406 if (VT == MVT::f32 && CompareVT == MVT::i32) {
407 if (isUnsignedIntSetCC(CCOpcode)) {
408 ConversionOp = ISD::UINT_TO_FP;
409 } else {
410 ConversionOp = ISD::SINT_TO_FP;
411 }
412 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
413 ConversionOp = ISD::FP_TO_SINT;
414 } else {
415 // I don't think there will be any other type pairings.
416 assert(!"Unhandled operand type parings in SELECT_CC");
417 }
418 // XXX Check the value of LHS and RHS and avoid creating sequences like
419 // (FTOI (ITOF))
420 LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
421 RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
422 }
423
424 // If True is a hardware TRUE value and False is a hardware FALSE value or
425 // vice-versa we can handle this with a native instruction (SET* instructions).
426 if ((isHWTrueValue(True) && isHWFalseValue(False))) {
427 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
428 }
429
430 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
431 // we can handle this with a native instruction, but we need to swap true
432 // and false and change the conditional.
433 if (isHWTrueValue(False) && isHWFalseValue(True)) {
434 }
435
436 // XXX Check if we can lower this to a SELECT or if it is supported by a native
437 // operation. (The code below does this but we don't have the Instruction
438 // selection patterns to do this yet.
439 #if 0
440 if (isZero(LHS) || isZero(RHS)) {
441 SDValue Cond = (isZero(LHS) ? RHS : LHS);
442 bool SwapTF = false;
443 switch (CCOpcode) {
444 case ISD::SETOEQ:
445 case ISD::SETUEQ:
446 case ISD::SETEQ:
447 SwapTF = true;
448 // Fall through
449 case ISD::SETONE:
450 case ISD::SETUNE:
451 case ISD::SETNE:
452 // We can lower to select
453 if (SwapTF) {
454 Temp = True;
455 True = False;
456 False = Temp;
457 }
458 // CNDE
459 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
460 default:
461 // Supported by a native operation (CNDGE, CNDGT)
462 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
463 }
464 }
465 #endif
466
467 // If we make it this for it means we have no native instructions to handle
468 // this SELECT_CC, so we must lower it.
469 SDValue HWTrue, HWFalse;
470
471 if (VT == MVT::f32) {
472 HWTrue = DAG.getConstantFP(1.0f, VT);
473 HWFalse = DAG.getConstantFP(0.0f, VT);
474 } else if (VT == MVT::i32) {
475 HWTrue = DAG.getConstant(-1, VT);
476 HWFalse = DAG.getConstant(0, VT);
477 }
478 else {
479 assert(!"Unhandled value type in LowerSELECT_CC");
480 }
481
482 // Lower this unsupported SELECT_CC into a combination of two supported
483 // SELECT_CC operations.
484 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
485
486 // Convert floating point condition to i1
487 if (VT == MVT::f32) {
488 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
489 DAG.getNode(ISD::FNEG, DL, VT, Cond));
490 }
491
492 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
493 }
494
495 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
496 {
497 SDValue Cond;
498 SDValue LHS = Op.getOperand(0);
499 SDValue RHS = Op.getOperand(1);
500 SDValue CC = Op.getOperand(2);
501 DebugLoc DL = Op.getDebugLoc();
502 assert(Op.getValueType() == MVT::i32);
503 Cond = DAG.getNode(
504 ISD::SELECT_CC,
505 Op.getDebugLoc(),
506 MVT::i32,
507 LHS, RHS,
508 DAG.getConstant(-1, MVT::i32),
509 DAG.getConstant(0, MVT::i32),
510 CC);
511 Cond = DAG.getNode(
512 ISD::AND,
513 DL,
514 MVT::i32,
515 DAG.getConstant(1, MVT::i32),
516 Cond);
517 return Cond;
518 }