radeonsi: Fix sampler views for depth textures.
[mesa.git] / src / gallium / drivers / radeon / R600ISelLowering.cpp
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
11 // is mostly EmitInstrWithCustomInserter().
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "R600ISelLowering.h"
16 #include "R600Defines.h"
17 #include "R600InstrInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/Argument.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23
24 using namespace llvm;
25
26 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
27 AMDGPUTargetLowering(TM),
28 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
29 {
30 setOperationAction(ISD::MUL, MVT::i64, Expand);
31 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
36
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39
40 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
41 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
42
43 setOperationAction(ISD::FSUB, MVT::f32, Expand);
44
45 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
46 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
47 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
48
49 setOperationAction(ISD::ROTL, MVT::i32, Custom);
50
51 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
52 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
53
54 setOperationAction(ISD::SETCC, MVT::i32, Custom);
55 setOperationAction(ISD::SETCC, MVT::f32, Custom);
56 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
57
58 setTargetDAGCombine(ISD::FP_ROUND);
59
60 setSchedulingPreference(Sched::VLIW);
61 }
62
63 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
64 MachineInstr * MI, MachineBasicBlock * BB) const
65 {
66 MachineFunction * MF = BB->getParent();
67 MachineRegisterInfo &MRI = MF->getRegInfo();
68 MachineBasicBlock::iterator I = *MI;
69
70 switch (MI->getOpcode()) {
71 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
72 case AMDGPU::SHADER_TYPE: break;
73 case AMDGPU::CLAMP_R600:
74 {
75 MachineInstr *NewMI =
76 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
77 .addOperand(MI->getOperand(0))
78 .addOperand(MI->getOperand(1))
79 .addImm(0) // Flags
80 .addReg(AMDGPU::PRED_SEL_OFF);
81 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
82 break;
83 }
84 case AMDGPU::FABS_R600:
85 {
86 MachineInstr *NewMI =
87 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
88 .addOperand(MI->getOperand(0))
89 .addOperand(MI->getOperand(1))
90 .addImm(0) // Flags
91 .addReg(AMDGPU::PRED_SEL_OFF);
92 TII->addFlag(NewMI, 1, MO_FLAG_ABS);
93 break;
94 }
95
96 case AMDGPU::FNEG_R600:
97 {
98 MachineInstr *NewMI =
99 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
100 .addOperand(MI->getOperand(0))
101 .addOperand(MI->getOperand(1))
102 .addImm(0) // Flags
103 .addReg(AMDGPU::PRED_SEL_OFF);
104 TII->addFlag(NewMI, 1, MO_FLAG_NEG);
105 break;
106 }
107
108 case AMDGPU::R600_LOAD_CONST:
109 {
110 int64_t RegIndex = MI->getOperand(1).getImm();
111 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
112 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
113 .addOperand(MI->getOperand(0))
114 .addReg(ConstantReg);
115 break;
116 }
117
118 case AMDGPU::MASK_WRITE:
119 {
120 unsigned maskedRegister = MI->getOperand(0).getReg();
121 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
122 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
123 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
124 // Return early so the instruction is not erased
125 return BB;
126 }
127
128 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
129 case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
130 {
131 // Convert to DWORD address
132 unsigned NewAddr = MRI.createVirtualRegister(
133 &AMDGPU::R600_TReg32_XRegClass);
134 unsigned ShiftValue = MRI.createVirtualRegister(
135 &AMDGPU::R600_TReg32RegClass);
136 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
137
138 // XXX In theory, we should be able to pass ShiftValue directly to
139 // the LSHR_eg instruction as an inline literal, but I tried doing it
140 // this way and it didn't produce the correct results.
141 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
142 ShiftValue)
143 .addReg(AMDGPU::ALU_LITERAL_X)
144 .addReg(AMDGPU::PRED_SEL_OFF)
145 .addImm(2);
146 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
147 .addOperand(MI->getOperand(1))
148 .addReg(ShiftValue)
149 .addReg(AMDGPU::PRED_SEL_OFF);
150 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
151 .addOperand(MI->getOperand(0))
152 .addReg(NewAddr)
153 .addImm(EOP); // Set End of program bit
154 break;
155 }
156
157 case AMDGPU::RESERVE_REG:
158 {
159 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
160 int64_t ReservedIndex = MI->getOperand(0).getImm();
161 unsigned ReservedReg =
162 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
163 MFI->ReservedRegs.push_back(ReservedReg);
164 break;
165 }
166
167 case AMDGPU::TXD:
168 {
169 unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
170 unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
171
172 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
173 .addOperand(MI->getOperand(3))
174 .addOperand(MI->getOperand(4))
175 .addOperand(MI->getOperand(5));
176 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
177 .addOperand(MI->getOperand(2))
178 .addOperand(MI->getOperand(4))
179 .addOperand(MI->getOperand(5));
180 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
181 .addOperand(MI->getOperand(0))
182 .addOperand(MI->getOperand(1))
183 .addOperand(MI->getOperand(4))
184 .addOperand(MI->getOperand(5))
185 .addReg(t0, RegState::Implicit)
186 .addReg(t1, RegState::Implicit);
187 break;
188 }
189 case AMDGPU::TXD_SHADOW:
190 {
191 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
192 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
193
194 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
195 .addOperand(MI->getOperand(3))
196 .addOperand(MI->getOperand(4))
197 .addOperand(MI->getOperand(5));
198 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
199 .addOperand(MI->getOperand(2))
200 .addOperand(MI->getOperand(4))
201 .addOperand(MI->getOperand(5));
202 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
203 .addOperand(MI->getOperand(0))
204 .addOperand(MI->getOperand(1))
205 .addOperand(MI->getOperand(4))
206 .addOperand(MI->getOperand(5))
207 .addReg(t0, RegState::Implicit)
208 .addReg(t1, RegState::Implicit);
209 break;
210 }
211 case AMDGPU::BRANCH:
212 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
213 .addOperand(MI->getOperand(0))
214 .addReg(0);
215 break;
216 case AMDGPU::BRANCH_COND_f32:
217 {
218 MachineInstr *NewMI =
219 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
220 .addReg(AMDGPU::PREDICATE_BIT)
221 .addOperand(MI->getOperand(1))
222 .addImm(OPCODE_IS_NOT_ZERO)
223 .addImm(0); // Flags
224 TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
225 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
226 .addOperand(MI->getOperand(0))
227 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
228 break;
229 }
230 case AMDGPU::BRANCH_COND_i32:
231 {
232 MachineInstr *NewMI =
233 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
234 .addReg(AMDGPU::PREDICATE_BIT)
235 .addOperand(MI->getOperand(1))
236 .addImm(OPCODE_IS_NOT_ZERO_INT)
237 .addImm(0); // Flags
238 TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
239 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
240 .addOperand(MI->getOperand(0))
241 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
242 break;
243 }
244 case AMDGPU::input_perspective:
245 {
246 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
247
248 // XXX Be more fine about register reservation
249 for (unsigned i = 0; i < 4; i ++) {
250 unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
251 MFI->ReservedRegs.push_back(ReservedReg);
252 }
253
254 switch (MI->getOperand(1).getImm()) {
255 case 0:// Perspective
256 MFI->HasPerspectiveInterpolation = true;
257 break;
258 case 1:// Linear
259 MFI->HasLinearInterpolation = true;
260 break;
261 default:
262 assert(0 && "Unknow ij index");
263 }
264
265 return BB;
266 }
267 }
268
269 MI->eraseFromParent();
270 return BB;
271 }
272
273 //===----------------------------------------------------------------------===//
274 // Custom DAG Lowering Operations
275 //===----------------------------------------------------------------------===//
276
277 using namespace llvm::Intrinsic;
278 using namespace llvm::AMDGPUIntrinsic;
279
280 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
281 {
282 switch (Op.getOpcode()) {
283 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
284 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
285 case ISD::ROTL: return LowerROTL(Op, DAG);
286 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
287 case ISD::SETCC: return LowerSETCC(Op, DAG);
288 case ISD::INTRINSIC_VOID: {
289 SDValue Chain = Op.getOperand(0);
290 unsigned IntrinsicID =
291 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
292 switch (IntrinsicID) {
293 case AMDGPUIntrinsic::AMDGPU_store_output: {
294 MachineFunction &MF = DAG.getMachineFunction();
295 MachineRegisterInfo &MRI = MF.getRegInfo();
296 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
297 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
298 if (!MRI.isLiveOut(Reg)) {
299 MRI.addLiveOut(Reg);
300 }
301 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
302 }
303 // default for switch(IntrinsicID)
304 default: break;
305 }
306 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
307 break;
308 }
309 case ISD::INTRINSIC_WO_CHAIN: {
310 unsigned IntrinsicID =
311 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
312 EVT VT = Op.getValueType();
313 DebugLoc DL = Op.getDebugLoc();
314 switch(IntrinsicID) {
315 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
316 case AMDGPUIntrinsic::R600_load_input: {
317 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
318 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
319 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
320 }
321 case AMDGPUIntrinsic::R600_load_input_perspective: {
322 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
323 SDValue FullVector = DAG.getNode(
324 AMDGPUISD::INTERP,
325 DL, MVT::v4f32,
326 DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
327 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
328 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
329 }
330 case AMDGPUIntrinsic::R600_load_input_linear: {
331 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
332 SDValue FullVector = DAG.getNode(
333 AMDGPUISD::INTERP,
334 DL, MVT::v4f32,
335 DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
336 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
337 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
338 }
339 case AMDGPUIntrinsic::R600_load_input_constant: {
340 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
341 SDValue FullVector = DAG.getNode(
342 AMDGPUISD::INTERP_P0,
343 DL, MVT::v4f32,
344 DAG.getConstant(slot / 4 , MVT::i32));
345 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
346 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
347 }
348 case AMDGPUIntrinsic::R600_load_input_position: {
349 unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
350 unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
351 SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
352 RegIndex, MVT::f32);
353 if ((slot % 4) == 3) {
354 return DAG.getNode(ISD::FDIV,
355 DL, VT,
356 DAG.getConstantFP(1.0f, MVT::f32),
357 Reg);
358 } else {
359 return Reg;
360 }
361 }
362
363 case r600_read_ngroups_x:
364 return LowerImplicitParameter(DAG, VT, DL, 0);
365 case r600_read_ngroups_y:
366 return LowerImplicitParameter(DAG, VT, DL, 1);
367 case r600_read_ngroups_z:
368 return LowerImplicitParameter(DAG, VT, DL, 2);
369 case r600_read_global_size_x:
370 return LowerImplicitParameter(DAG, VT, DL, 3);
371 case r600_read_global_size_y:
372 return LowerImplicitParameter(DAG, VT, DL, 4);
373 case r600_read_global_size_z:
374 return LowerImplicitParameter(DAG, VT, DL, 5);
375 case r600_read_local_size_x:
376 return LowerImplicitParameter(DAG, VT, DL, 6);
377 case r600_read_local_size_y:
378 return LowerImplicitParameter(DAG, VT, DL, 7);
379 case r600_read_local_size_z:
380 return LowerImplicitParameter(DAG, VT, DL, 8);
381
382 case r600_read_tgid_x:
383 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
384 AMDGPU::T1_X, VT);
385 case r600_read_tgid_y:
386 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
387 AMDGPU::T1_Y, VT);
388 case r600_read_tgid_z:
389 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
390 AMDGPU::T1_Z, VT);
391 case r600_read_tidig_x:
392 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
393 AMDGPU::T0_X, VT);
394 case r600_read_tidig_y:
395 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
396 AMDGPU::T0_Y, VT);
397 case r600_read_tidig_z:
398 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
399 AMDGPU::T0_Z, VT);
400 }
401 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
402 break;
403 }
404 } // end switch(Op.getOpcode())
405 return SDValue();
406 }
407
408 void R600TargetLowering::ReplaceNodeResults(SDNode *N,
409 SmallVectorImpl<SDValue> &Results,
410 SelectionDAG &DAG) const
411 {
412 switch (N->getOpcode()) {
413 default: return;
414 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
415 case ISD::INTRINSIC_WO_CHAIN:
416 {
417 unsigned IntrinsicID =
418 cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
419 if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
420 Results.push_back(LowerInputFace(N, DAG));
421 } else {
422 return;
423 }
424 }
425 }
426 }
427
428 SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
429 {
430 unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
431 unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
432 SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
433 RegIndex, MVT::f32);
434 return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
435 Reg, DAG.getConstantFP(0.0f, MVT::f32),
436 DAG.getCondCode(ISD::SETUGT));
437 }
438
439 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
440 {
441 return DAG.getNode(
442 ISD::SETCC,
443 Op.getDebugLoc(),
444 MVT::i1,
445 Op, DAG.getConstantFP(0.0f, MVT::f32),
446 DAG.getCondCode(ISD::SETNE)
447 );
448 }
449
450 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
451 {
452 SDValue Chain = Op.getOperand(0);
453 SDValue CC = Op.getOperand(1);
454 SDValue LHS = Op.getOperand(2);
455 SDValue RHS = Op.getOperand(3);
456 SDValue JumpT = Op.getOperand(4);
457 SDValue CmpValue;
458 SDValue Result;
459
460 if (LHS.getValueType() == MVT::i32) {
461 CmpValue = DAG.getNode(
462 ISD::SELECT_CC,
463 Op.getDebugLoc(),
464 MVT::i32,
465 LHS, RHS,
466 DAG.getConstant(-1, MVT::i32),
467 DAG.getConstant(0, MVT::i32),
468 CC);
469 } else if (LHS.getValueType() == MVT::f32) {
470 CmpValue = DAG.getNode(
471 ISD::SELECT_CC,
472 Op.getDebugLoc(),
473 MVT::f32,
474 LHS, RHS,
475 DAG.getConstantFP(1.0f, MVT::f32),
476 DAG.getConstantFP(0.0f, MVT::f32),
477 CC);
478 } else {
479 assert(0 && "Not valid type for br_cc");
480 }
481 Result = DAG.getNode(
482 AMDGPUISD::BRANCH_COND,
483 CmpValue.getDebugLoc(),
484 MVT::Other, Chain,
485 JumpT, CmpValue);
486 return Result;
487 }
488
489 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
490 DebugLoc DL,
491 unsigned DwordOffset) const
492 {
493 unsigned ByteOffset = DwordOffset * 4;
494 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
495 AMDGPUAS::PARAM_I_ADDRESS);
496
497 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
498 assert(isInt<16>(ByteOffset));
499
500 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
501 DAG.getConstant(ByteOffset, MVT::i32), // PTR
502 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
503 false, false, false, 0);
504 }
505
506 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
507 {
508 DebugLoc DL = Op.getDebugLoc();
509 EVT VT = Op.getValueType();
510
511 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
512 Op.getOperand(0),
513 Op.getOperand(0),
514 DAG.getNode(ISD::SUB, DL, VT,
515 DAG.getConstant(32, MVT::i32),
516 Op.getOperand(1)));
517 }
518
519 bool R600TargetLowering::isZero(SDValue Op) const
520 {
521 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
522 return Cst->isNullValue();
523 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
524 return CstFP->isZero();
525 } else {
526 return false;
527 }
528 }
529
530 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
531 {
532 DebugLoc DL = Op.getDebugLoc();
533 EVT VT = Op.getValueType();
534
535 SDValue LHS = Op.getOperand(0);
536 SDValue RHS = Op.getOperand(1);
537 SDValue True = Op.getOperand(2);
538 SDValue False = Op.getOperand(3);
539 SDValue CC = Op.getOperand(4);
540 SDValue Temp;
541
542 // LHS and RHS are guaranteed to be the same value type
543 EVT CompareVT = LHS.getValueType();
544
545 // We need all the operands of SELECT_CC to have the same value type, so if
546 // necessary we need to convert LHS and RHS to be the same type True and
547 // False. True and False are guaranteed to have the same type as this
548 // SELECT_CC node.
549
550 if (isHWTrueValue(True) && isHWFalseValue(False)) {
551 if (CompareVT != VT) {
552 if (VT == MVT::f32 && CompareVT == MVT::i32) {
553 SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
554 LHS, RHS,
555 DAG.getConstant(-1, MVT::i32),
556 DAG.getConstant(0, MVT::i32),
557 CC);
558 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
559 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
560 SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
561 LHS, RHS,
562 DAG.getConstantFP(1.0f, MVT::f32),
563 DAG.getConstantFP(0.0f, MVT::f32),
564 CC);
565 return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
566 } else {
567 // I don't think there will be any other type pairings.
568 assert(!"Unhandled operand type parings in SELECT_CC");
569 }
570 } else {
571 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
572 }
573 }
574
575
576 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
577 // we can handle this with a native instruction, but we need to swap true
578 // and false and change the conditional.
579 if (isHWTrueValue(False) && isHWFalseValue(True)) {
580 }
581
582 // Check if we can lower this to a native operation.
583 // CND* instructions requires all operands to have the same type,
584 // and RHS to be zero.
585
586 if (isZero(LHS) || isZero(RHS)) {
587 SDValue Cond = (isZero(LHS) ? RHS : LHS);
588 SDValue Zero = (isZero(LHS) ? LHS : RHS);
589 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
590 if (CompareVT != VT) {
591 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
592 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
593 }
594 if (isZero(LHS)) {
595 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
596 }
597
598 switch (CCOpcode) {
599 case ISD::SETONE:
600 case ISD::SETUNE:
601 case ISD::SETNE:
602 case ISD::SETULE:
603 case ISD::SETULT:
604 case ISD::SETOLE:
605 case ISD::SETOLT:
606 case ISD::SETLE:
607 case ISD::SETLT:
608 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
609 Temp = True;
610 True = False;
611 False = Temp;
612 break;
613 default:
614 break;
615 }
616 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
617 Cond, Zero,
618 True, False,
619 DAG.getCondCode(CCOpcode));
620 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
621 }
622
623
624 // If we make it this for it means we have no native instructions to handle
625 // this SELECT_CC, so we must lower it.
626 SDValue HWTrue, HWFalse;
627
628 if (CompareVT == MVT::f32) {
629 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
630 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
631 } else if (CompareVT == MVT::i32) {
632 HWTrue = DAG.getConstant(-1, CompareVT);
633 HWFalse = DAG.getConstant(0, CompareVT);
634 }
635 else {
636 assert(!"Unhandled value type in LowerSELECT_CC");
637 }
638
639 // Lower this unsupported SELECT_CC into a combination of two supported
640 // SELECT_CC operations.
641 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
642
643 return DAG.getNode(ISD::SELECT_CC, DL, VT,
644 Cond, HWFalse,
645 True, False,
646 DAG.getCondCode(ISD::SETNE));
647 }
648
649 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
650 {
651 SDValue Cond;
652 SDValue LHS = Op.getOperand(0);
653 SDValue RHS = Op.getOperand(1);
654 SDValue CC = Op.getOperand(2);
655 DebugLoc DL = Op.getDebugLoc();
656 assert(Op.getValueType() == MVT::i32);
657 if (LHS.getValueType() == MVT::i32) {
658 Cond = DAG.getNode(
659 ISD::SELECT_CC,
660 Op.getDebugLoc(),
661 MVT::i32,
662 LHS, RHS,
663 DAG.getConstant(-1, MVT::i32),
664 DAG.getConstant(0, MVT::i32),
665 CC);
666 } else if (LHS.getValueType() == MVT::f32) {
667 Cond = DAG.getNode(
668 ISD::SELECT_CC,
669 Op.getDebugLoc(),
670 MVT::f32,
671 LHS, RHS,
672 DAG.getConstantFP(1.0f, MVT::f32),
673 DAG.getConstantFP(0.0f, MVT::f32),
674 CC);
675 Cond = DAG.getNode(
676 ISD::FP_TO_SINT,
677 DL,
678 MVT::i32,
679 Cond);
680 } else {
681 assert(0 && "Not valid type for set_cc");
682 }
683 Cond = DAG.getNode(
684 ISD::AND,
685 DL,
686 MVT::i32,
687 DAG.getConstant(1, MVT::i32),
688 Cond);
689 return Cond;
690 }
691
692 // XXX Only kernel functions are supporte, so we can assume for now that
693 // every function is a kernel function, but in the future we should use
694 // separate calling conventions for kernel and non-kernel functions.
695 // Only kernel functions are supported, so we can assume for now
696 SDValue R600TargetLowering::LowerFormalArguments(
697 SDValue Chain,
698 CallingConv::ID CallConv,
699 bool isVarArg,
700 const SmallVectorImpl<ISD::InputArg> &Ins,
701 DebugLoc DL, SelectionDAG &DAG,
702 SmallVectorImpl<SDValue> &InVals) const
703 {
704 unsigned ParamOffsetBytes = 36;
705 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
706 EVT VT = Ins[i].VT;
707 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
708 AMDGPUAS::PARAM_I_ADDRESS);
709 SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
710 DAG.getConstant(ParamOffsetBytes, MVT::i32),
711 MachinePointerInfo(new Argument(PtrTy)),
712 false, false, false, 4);
713 InVals.push_back(Arg);
714 ParamOffsetBytes += (VT.getStoreSize());
715 }
716 return Chain;
717 }
718
719 //===----------------------------------------------------------------------===//
720 // Custom DAG Optimizations
721 //===----------------------------------------------------------------------===//
722
723 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
724 DAGCombinerInfo &DCI) const
725 {
726 SelectionDAG &DAG = DCI.DAG;
727
728 switch (N->getOpcode()) {
729 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
730 case ISD::FP_ROUND: {
731 SDValue Arg = N->getOperand(0);
732 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
733 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
734 Arg.getOperand(0));
735 }
736 break;
737 }
738 }
739 return SDValue();
740 }