644d492832790ed688076e1a96a61b9ed3f41553
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_lowering_gv100.cpp
1 /*
2 * Copyright 2020 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "codegen/nv50_ir.h"
23 #include "codegen/nv50_ir_build_util.h"
24
25 #include "codegen/nv50_ir_target_nvc0.h"
26 #include "codegen/nv50_ir_lowering_gv100.h"
27
28 #include <limits>
29
30 namespace nv50_ir {
31
32 bool
33 GV100LegalizeSSA::handleCMP(Instruction *i)
34 {
35 Value *pred = bld.getSSA(1, FILE_PREDICATE);
36
37 bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
38 i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz;
39 bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
40 return true;
41 }
42
43 // NIR deals with most of these for us, but codegen generates more in pointer
44 // calculations from other lowering passes.
45 bool
46 GV100LegalizeSSA::handleIADD64(Instruction *i)
47 {
48 Value *carry = bld.getSSA(1, FILE_PREDICATE);
49 Value *def[2] = { bld.getSSA(), bld.getSSA() };
50 Value *src[2][2];
51
52 for (int s = 0; s < 2; s++) {
53 if (i->getSrc(s)->reg.size == 8) {
54 bld.mkSplit(src[s], 4, i->getSrc(s));
55 } else {
56 src[s][0] = i->getSrc(s);
57 src[s][1] = bld.mkImm(0);
58 }
59 }
60
61 bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
62 setFlagsDef(1, carry);
63 bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
64 setFlagsSrc(2, carry);
65 bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
66 return true;
67 }
68
69 bool
70 GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
71 {
72 Value *def = bld.getSSA(8), *defs[2];
73 Value *src2;
74
75 if (i->srcExists(2) &&
76 (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
77 Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
78 bld.mkMov(src2s[0], bld.mkImm(0));
79 bld.mkMov(src2s[1], i->getSrc(2));
80 src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
81 } else {
82 src2 = bld.mkImm(0);
83 }
84
85 bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
86 i->getSrc(0), i->getSrc(1), src2);
87
88 bld.mkSplit(defs, 4, def);
89 i->def(0).replace(defs[1], false);
90 return true;
91 }
92
93 // XXX: We should be able to do this in GV100LoweringPass, but codegen messes
94 // up somehow and swaps the condcode without swapping the sources.
95 // - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
96 bool
97 GV100LegalizeSSA::handleIMNMX(Instruction *i)
98 {
99 Value *pred = bld.getSSA(1, FILE_PREDICATE);
100
101 bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
102 i->sType, i->getSrc(0), i->getSrc(1));
103 bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
104 return true;
105 }
106
107 bool
108 GV100LegalizeSSA::handleIMUL(Instruction *i)
109 {
110 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
111 return handleIMAD_HIGH(i);
112
113 bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
114 bld.mkImm(0));
115 return true;
116 }
117
118 bool
119 GV100LegalizeSSA::handleLOP2(Instruction *i)
120 {
121 uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
122 uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
123 uint8_t subOp;
124
125 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
126 src0 = ~src0;
127 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
128 src1 = ~src1;
129
130 switch (i->op) {
131 case OP_AND: subOp = src0 & src1; break;
132 case OP_OR : subOp = src0 | src1; break;
133 case OP_XOR: subOp = src0 ^ src1; break;
134 default:
135 assert(!"invalid LOP2 opcode");
136 break;
137 }
138
139 bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
140 bld.mkImm(0))->subOp = subOp;
141 return true;
142 }
143
144 bool
145 GV100LegalizeSSA::handleNOT(Instruction *i)
146 {
147 bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
148 bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
149 return true;
150 }
151
152 bool
153 GV100LegalizeSSA::handlePREEX2(Instruction *i)
154 {
155 i->def(0).replace(i->src(0), false);
156 return true;
157 }
158
159 bool
160 GV100LegalizeSSA::handleQUADON(Instruction *i)
161 {
162 handleSHFL(i); // Inserts OP_WARPSYNC
163 return true;
164 }
165
166 bool
167 GV100LegalizeSSA::handleQUADPOP(Instruction *i)
168 {
169 return true;
170 }
171
172 bool
173 GV100LegalizeSSA::handleSET(Instruction *i)
174 {
175 Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
176 Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
177 Instruction *xsetp;
178
179 if (isFloatType(i->dType)) {
180 if (i->sType == TYPE_F32)
181 return false; // HW has FSET.BF
182 met = bld.mkImm(0x3f800000);
183 } else {
184 met = bld.mkImm(0xffffffff);
185 }
186
187 xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
188 i->getSrc(0), i->getSrc(1));
189 xsetp->src(0).mod = i->src(0).mod;
190 xsetp->src(1).mod = i->src(1).mod;
191 xsetp->setSrc(2, src2);
192 xsetp->ftz = i->ftz;
193
194 i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
195 i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
196 return true;
197 }
198
199 bool
200 GV100LegalizeSSA::handleSHFL(Instruction *i)
201 {
202 Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
203 sync->fixed = 1;
204 sync->setSrc(0, bld.mkImm(0xffffffff));
205 i->bb->insertBefore(i, sync);
206 return false;
207 }
208
209 bool
210 GV100LegalizeSSA::handleShift(Instruction *i)
211 {
212 Value *zero = bld.mkImm(0);
213 Value *src1 = i->getSrc(1);
214 Value *src0, *src2;
215 uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R;
216
217 if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) {
218 src0 = i->getSrc(0);
219 src2 = zero;
220 } else {
221 src0 = zero;
222 src2 = i->getSrc(0);
223 subOp |= NV50_IR_SUBOP_SHF_HI;
224 }
225 if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
226 subOp |= NV50_IR_SUBOP_SHF_W;
227
228 bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp;
229 return true;
230 }
231
232 bool
233 GV100LegalizeSSA::handleSUB(Instruction *i)
234 {
235 Instruction *xadd =
236 bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
237 xadd->src(0).mod = i->src(0).mod;
238 xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
239 xadd->ftz = i->ftz;
240 return true;
241 }
242
243 bool
244 GV100LegalizeSSA::visit(Instruction *i)
245 {
246 bool lowered = false;
247
248 bld.setPosition(i, false);
249 if (i->sType == TYPE_F32 && i->dType != TYPE_F16 &&
250 prog->getType() != Program::TYPE_COMPUTE)
251 handleFTZ(i);
252
253 switch (i->op) {
254 case OP_AND:
255 case OP_OR:
256 case OP_XOR:
257 if (i->def(0).getFile() != FILE_PREDICATE)
258 lowered = handleLOP2(i);
259 break;
260 case OP_NOT:
261 lowered = handleNOT(i);
262 break;
263 case OP_SHL:
264 case OP_SHR:
265 lowered = handleShift(i);
266 break;
267 case OP_SET:
268 case OP_SET_AND:
269 case OP_SET_OR:
270 case OP_SET_XOR:
271 if (i->def(0).getFile() != FILE_PREDICATE)
272 lowered = handleSET(i);
273 break;
274 case OP_SLCT:
275 lowered = handleCMP(i);
276 break;
277 case OP_PREEX2:
278 lowered = handlePREEX2(i);
279 break;
280 case OP_MUL:
281 if (!isFloatType(i->dType))
282 lowered = handleIMUL(i);
283 break;
284 case OP_MAD:
285 if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
286 lowered = handleIMAD_HIGH(i);
287 break;
288 case OP_SHFL:
289 lowered = handleSHFL(i);
290 break;
291 case OP_QUADON:
292 lowered = handleQUADON(i);
293 break;
294 case OP_QUADPOP:
295 lowered = handleQUADPOP(i);
296 break;
297 case OP_SUB:
298 lowered = handleSUB(i);
299 break;
300 case OP_MAX:
301 case OP_MIN:
302 if (!isFloatType(i->dType))
303 lowered = handleIMNMX(i);
304 break;
305 case OP_ADD:
306 if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
307 lowered = handleIADD64(i);
308 break;
309 case OP_PFETCH:
310 handlePFETCH(i);
311 break;
312 case OP_LOAD:
313 handleLOAD(i);
314 break;
315 default:
316 break;
317 }
318
319 if (lowered)
320 delete_Instruction(prog, i);
321
322 return true;
323 }
324
325 bool
326 GV100LoweringPass::handleDMNMX(Instruction *i)
327 {
328 Value *pred = bld.getSSA(1, FILE_PREDICATE);
329 Value *src0[2], *src1[2], *dest[2];
330
331 bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
332 i->sType, i->getSrc(0), i->getSrc(1));
333 bld.mkSplit(src0, 4, i->getSrc(0));
334 bld.mkSplit(src1, 4, i->getSrc(1));
335 bld.mkSplit(dest, 4, i->getDef(0));
336 bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
337 bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
338 bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
339 return true;
340 }
341
342 bool
343 GV100LoweringPass::handleEXTBF(Instruction *i)
344 {
345 Value *bit = bld.getScratch();
346 Value *cnt = bld.getScratch();
347 Value *mask = bld.getScratch();
348 Value *zero = bld.mkImm(0);
349
350 bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
351 bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
352 bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
353 bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
354 bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
355 if (isSignedType(i->dType))
356 bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
357
358 return true;
359 }
360
361 bool
362 GV100LoweringPass::handleFLOW(Instruction *i)
363 {
364 i->op = OP_BRA;
365 return false;
366 }
367
368 bool
369 GV100LoweringPass::handleI2I(Instruction *i)
370 {
371 bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
372 subOp = i->subOp;
373 bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
374 return true;
375 }
376
377 bool
378 GV100LoweringPass::handleINSBF(Instruction *i)
379 {
380 Value *bit = bld.getScratch();
381 Value *cnt = bld.getScratch();
382 Value *mask = bld.getScratch();
383 Value *src0 = bld.getScratch();
384 Value *zero = bld.mkImm(0);
385
386 bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
387 bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
388 bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
389
390 bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
391 bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
392
393 bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
394 bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
395 subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
396
397 return true;
398 }
399
400 bool
401 GV100LoweringPass::handlePINTERP(Instruction *i)
402 {
403 Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
404 Instruction *ipa, *mul;
405
406 ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
407 ipa->ipa = i->ipa;
408 mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
409
410 if (i->getInterpMode() == NV50_IR_INTERP_SC) {
411 ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
412 mul->setPredicate(CC_NOT_P, ipa->getDef(1));
413 }
414
415 return true;
416 }
417
418 bool
419 GV100LoweringPass::handlePREFLOW(Instruction *i)
420 {
421 return true;
422 }
423
424 bool
425 GV100LoweringPass::handlePRESIN(Instruction *i)
426 {
427 const float f = 1.0 / (2.0 * 3.14159265);
428 bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
429 return true;
430 }
431
432 bool
433 GV100LoweringPass::visit(Instruction *i)
434 {
435 bool lowered = false;
436
437 bld.setPosition(i, false);
438
439 switch (i->op) {
440 case OP_BREAK:
441 case OP_CONT:
442 lowered = handleFLOW(i);
443 break;
444 case OP_PREBREAK:
445 case OP_PRECONT:
446 lowered = handlePREFLOW(i);
447 break;
448 case OP_CVT:
449 if (i->src(0).getFile() != FILE_PREDICATE &&
450 i->def(0).getFile() != FILE_PREDICATE &&
451 !isFloatType(i->dType) && !isFloatType(i->sType))
452 lowered = handleI2I(i);
453 break;
454 case OP_EXTBF:
455 lowered = handleEXTBF(i);
456 break;
457 case OP_INSBF:
458 lowered = handleINSBF(i);
459 break;
460 case OP_MAX:
461 case OP_MIN:
462 if (i->dType == TYPE_F64)
463 lowered = handleDMNMX(i);
464 break;
465 case OP_PINTERP:
466 lowered = handlePINTERP(i);
467 break;
468 case OP_PRESIN:
469 lowered = handlePRESIN(i);
470 break;
471 default:
472 break;
473 }
474
475 if (lowered)
476 delete_Instruction(prog, i);
477
478 return true;
479 }
480
481 } // namespace nv50_ir