radeon/llvm: Handle floating point loads on R600
[mesa.git] / src / gallium / drivers / radeon / AMDGPUISelLowering.cpp
1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is the parent TargetLowering class for hardware code gen targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUISelLowering.h"
15 #include "AMDILIntrinsicInfo.h"
16 #include "AMDGPUUtil.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18
19 using namespace llvm;
20
21 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
22 AMDILTargetLowering(TM)
23 {
24 // We need to custom lower some of the intrinsics
25 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
26
27 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
28 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
29
30 // Library functions. These default to Expand, but we have instructions
31 // for them.
32 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
33 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
34 setOperationAction(ISD::FRINT, MVT::f32, Legal);
35
36 setOperationAction(ISD::LOAD, MVT::f32, Custom);
37
38 setOperationAction(ISD::UDIV, MVT::i32, Expand);
39 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
40 setOperationAction(ISD::UREM, MVT::i32, Expand);
41 }
42
43 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
44 const
45 {
46 switch (Op.getOpcode()) {
47 default: return AMDILTargetLowering::LowerOperation(Op, DAG);
48 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
49 case ISD::LOAD: return BitcastLOAD(Op, DAG);
50 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
51 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
52 }
53 }
54
55 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
56 SelectionDAG &DAG) const
57 {
58 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
59 DebugLoc DL = Op.getDebugLoc();
60 EVT VT = Op.getValueType();
61
62 switch (IntrinsicID) {
63 default: return Op;
64 case AMDGPUIntrinsic::AMDIL_abs:
65 return LowerIntrinsicIABS(Op, DAG);
66 case AMDGPUIntrinsic::AMDIL_exp:
67 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
68 case AMDGPUIntrinsic::AMDIL_fabs:
69 return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
70 case AMDGPUIntrinsic::AMDGPU_lrp:
71 return LowerIntrinsicLRP(Op, DAG);
72 case AMDGPUIntrinsic::AMDIL_fraction:
73 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
74 case AMDGPUIntrinsic::AMDIL_mad:
75 return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
76 Op.getOperand(2), Op.getOperand(3));
77 case AMDGPUIntrinsic::AMDIL_max:
78 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
79 Op.getOperand(2));
80 case AMDGPUIntrinsic::AMDGPU_imax:
81 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
82 Op.getOperand(2));
83 case AMDGPUIntrinsic::AMDGPU_umax:
84 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
85 Op.getOperand(2));
86 case AMDGPUIntrinsic::AMDIL_min:
87 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
88 Op.getOperand(2));
89 case AMDGPUIntrinsic::AMDGPU_imin:
90 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
91 Op.getOperand(2));
92 case AMDGPUIntrinsic::AMDGPU_umin:
93 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
94 Op.getOperand(2));
95 case AMDGPUIntrinsic::AMDIL_round_nearest:
96 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
97 case AMDGPUIntrinsic::AMDIL_round_posinf:
98 return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
99 }
100 }
101
102 ///IABS(a) = SMAX(sub(0, a), a)
103 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
104 SelectionDAG &DAG) const
105 {
106
107 DebugLoc DL = Op.getDebugLoc();
108 EVT VT = Op.getValueType();
109 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
110 Op.getOperand(1));
111
112 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
113 }
114
115 /// Linear Interpolation
116 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
117 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
118 SelectionDAG &DAG) const
119 {
120 DebugLoc DL = Op.getDebugLoc();
121 EVT VT = Op.getValueType();
122 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
123 DAG.getConstantFP(1.0f, MVT::f32),
124 Op.getOperand(1));
125 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
126 Op.getOperand(3));
127 return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
128 Op.getOperand(2),
129 OneSubAC);
130 }
131
132 /// BitcastLoad - Convert floating point loads to integer loads of the same
133 /// type width and the bitcast the result back to a floating point type.
134 SDValue AMDGPUTargetLowering::BitcastLOAD(SDValue Op, SelectionDAG &DAG) const
135 {
136 DebugLoc DL = Op.getDebugLoc();
137 EVT VT = Op.getValueType();
138 EVT IntVT;
139
140 if (VT == MVT::f32) {
141 IntVT = MVT::i32;
142 } else {
143 return Op;
144 }
145 LoadSDNode * LD = dyn_cast<LoadSDNode>(Op);
146 assert(LD);
147
148 SDValue NewLoad = DAG.getLoad (LD->getAddressingMode(),
149 LD->getExtensionType(), IntVT, DL,
150 LD->getChain(), LD->getBasePtr(),
151 LD->getOffset(), IntVT,
152 LD->getMemOperand());
153
154 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, NewLoad);
155 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Bitcast);
156
157 return Op;
158 }
159
160 SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
161 SelectionDAG &DAG) const
162 {
163 DebugLoc DL = Op.getDebugLoc();
164 EVT VT = Op.getValueType();
165
166 SDValue LHS = Op.getOperand(0);
167 SDValue RHS = Op.getOperand(1);
168 SDValue True = Op.getOperand(2);
169 SDValue False = Op.getOperand(3);
170 SDValue CC = Op.getOperand(4);
171 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
172 SDValue Temp;
173
174 // LHS and RHS are guaranteed to be the same value type
175 EVT CompareVT = LHS.getValueType();
176
177 // We need all the operands of SELECT_CC to have the same value type, so if
178 // necessary we need to convert LHS and RHS to be the same type True and
179 // False. True and False are guaranteed to have the same type as this
180 // SELECT_CC node.
181
182 if (CompareVT != VT) {
183 ISD::NodeType ConversionOp = ISD::DELETED_NODE;
184 if (VT == MVT::f32 && CompareVT == MVT::i32) {
185 if (isUnsignedIntSetCC(CCOpcode)) {
186 ConversionOp = ISD::UINT_TO_FP;
187 } else {
188 ConversionOp = ISD::SINT_TO_FP;
189 }
190 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
191 ConversionOp = ISD::FP_TO_SINT;
192 } else {
193 // I don't think there will be any other type pairings.
194 assert(!"Unhandled operand type parings in SELECT_CC");
195 }
196 // XXX Check the value of LHS and RHS and avoid creating sequences like
197 // (FTOI (ITOF))
198 LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
199 RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
200 }
201
202 // If True is a hardware TRUE value and False is a hardware FALSE value or
203 // vice-versa we can handle this with a native instruction (SET* instructions).
204 if ((isHWTrueValue(True) && isHWFalseValue(False))) {
205 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
206 }
207
208 // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
209 // we can handle this with a native instruction, but we need to swap true
210 // and false and change the conditional.
211 if (isHWTrueValue(False) && isHWFalseValue(True)) {
212 }
213
214 // XXX Check if we can lower this to a SELECT or if it is supported by a native
215 // operation. (The code below does this but we don't have the Instruction
216 // selection patterns to do this yet.
217 #if 0
218 if (isZero(LHS) || isZero(RHS)) {
219 SDValue Cond = (isZero(LHS) ? RHS : LHS);
220 bool SwapTF = false;
221 switch (CCOpcode) {
222 case ISD::SETOEQ:
223 case ISD::SETUEQ:
224 case ISD::SETEQ:
225 SwapTF = true;
226 // Fall through
227 case ISD::SETONE:
228 case ISD::SETUNE:
229 case ISD::SETNE:
230 // We can lower to select
231 if (SwapTF) {
232 Temp = True;
233 True = False;
234 False = Temp;
235 }
236 // CNDE
237 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
238 default:
239 // Supported by a native operation (CNDGE, CNDGT)
240 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
241 }
242 }
243 #endif
244
245 // If we make it this for it means we have no native instructions to handle
246 // this SELECT_CC, so we must lower it.
247 SDValue HWTrue, HWFalse;
248
249 if (VT == MVT::f32) {
250 HWTrue = DAG.getConstantFP(1.0f, VT);
251 HWFalse = DAG.getConstantFP(0.0f, VT);
252 } else if (VT == MVT::i32) {
253 HWTrue = DAG.getConstant(-1, VT);
254 HWFalse = DAG.getConstant(0, VT);
255 }
256 else {
257 assert(!"Unhandled value type in LowerSELECT_CC");
258 }
259
260 // Lower this unsupported SELECT_CC into a combination of two supported
261 // SELECT_CC operations.
262 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
263
264 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
265 }
266
267
268 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
269 SelectionDAG &DAG) const
270 {
271 DebugLoc DL = Op.getDebugLoc();
272 EVT VT = Op.getValueType();
273
274 SDValue Num = Op.getOperand(0);
275 SDValue Den = Op.getOperand(1);
276
277 SmallVector<SDValue, 8> Results;
278
279 // RCP = URECIP(Den) = 2^32 / Den + e
280 // e is rounding error.
281 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
282
283 // RCP_LO = umulo(RCP, Den) */
284 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
285
286 // RCP_HI = mulhu (RCP, Den) */
287 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
288
289 // NEG_RCP_LO = -RCP_LO
290 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
291 RCP_LO);
292
293 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
294 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
295 NEG_RCP_LO, RCP_LO,
296 ISD::SETEQ);
297 // Calculate the rounding error from the URECIP instruction
298 // E = mulhu(ABS_RCP_LO, RCP)
299 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
300
301 // RCP_A_E = RCP + E
302 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
303
304 // RCP_S_E = RCP - E
305 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
306
307 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
308 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
309 RCP_A_E, RCP_S_E,
310 ISD::SETEQ);
311 // Quotient = mulhu(Tmp0, Num)
312 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
313
314 // Num_S_Remainder = Quotient * Den
315 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
316
317 // Remainder = Num - Num_S_Remainder
318 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
319
320 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
321 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
322 DAG.getConstant(-1, VT),
323 DAG.getConstant(0, VT),
324 ISD::SETGE);
325 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
326 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
327 DAG.getConstant(0, VT),
328 DAG.getConstant(-1, VT),
329 DAG.getConstant(0, VT),
330 ISD::SETGE);
331 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
332 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
333 Remainder_GE_Zero);
334
335 // Calculate Division result:
336
337 // Quotient_A_One = Quotient + 1
338 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
339 DAG.getConstant(1, VT));
340
341 // Quotient_S_One = Quotient - 1
342 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
343 DAG.getConstant(1, VT));
344
345 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
346 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
347 Quotient, Quotient_A_One, ISD::SETEQ);
348
349 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
350 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
351 Quotient_S_One, Div, ISD::SETEQ);
352
353 // Calculate Rem result:
354
355 // Remainder_S_Den = Remainder - Den
356 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
357
358 // Remainder_A_Den = Remainder + Den
359 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
360
361 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
362 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
363 Remainder, Remainder_S_Den, ISD::SETEQ);
364
365 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
366 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
367 Remainder_A_Den, Rem, ISD::SETEQ);
368
369 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
370 DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
371
372 return Op;
373 }
374
375 //===----------------------------------------------------------------------===//
376 // Helper functions
377 //===----------------------------------------------------------------------===//
378
379 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
380 {
381 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
382 return CFP->isExactlyValue(1.0);
383 }
384 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
385 return C->isAllOnesValue();
386 }
387 return false;
388 }
389
390 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
391 {
392 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
393 return CFP->getValueAPF().isZero();
394 }
395 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
396 return C->isNullValue();
397 }
398 return false;
399 }
400
401 void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
402 MachineFunction * MF, MachineRegisterInfo & MRI,
403 const TargetInstrInfo * TII, unsigned reg) const
404 {
405 AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
406 }
407
408 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
409
410 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
411 {
412 switch (Opcode) {
413 default: return AMDILTargetLowering::getTargetNodeName(Opcode);
414
415 NODE_NAME_CASE(FRACT)
416 NODE_NAME_CASE(FMAX)
417 NODE_NAME_CASE(SMAX)
418 NODE_NAME_CASE(UMAX)
419 NODE_NAME_CASE(FMIN)
420 NODE_NAME_CASE(SMIN)
421 NODE_NAME_CASE(UMIN)
422 NODE_NAME_CASE(URECIP)
423 }
424 }