2 * Copyright 2020 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 #include "codegen/nv50_ir.h"
23 #include "codegen/nv50_ir_build_util.h"
25 #include "codegen/nv50_ir_target_nvc0.h"
26 #include "codegen/nv50_ir_lowering_gv100.h"
33 GV100LegalizeSSA::handleCMP(Instruction
*i
)
35 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
);
37 bld
.mkCmp(OP_SET
, reverseCondCode(i
->asCmp()->setCond
), TYPE_U8
, pred
,
38 i
->sType
, bld
.mkImm(0), i
->getSrc(2))->ftz
= i
->ftz
;
39 bld
.mkOp3(OP_SELP
, TYPE_U32
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1), pred
);
43 // NIR deals with most of these for us, but codegen generates more in pointer
44 // calculations from other lowering passes.
46 GV100LegalizeSSA::handleIADD64(Instruction
*i
)
48 Value
*carry
= bld
.getSSA(1, FILE_PREDICATE
);
49 Value
*def
[2] = { bld
.getSSA(), bld
.getSSA() };
52 for (int s
= 0; s
< 2; s
++) {
53 if (i
->getSrc(s
)->reg
.size
== 8) {
54 bld
.mkSplit(src
[s
], 4, i
->getSrc(s
));
56 src
[s
][0] = i
->getSrc(s
);
57 src
[s
][1] = bld
.mkImm(0);
61 bld
.mkOp2(OP_ADD
, TYPE_U32
, def
[0], src
[0][0], src
[1][0])->
62 setFlagsDef(1, carry
);
63 bld
.mkOp2(OP_ADD
, TYPE_U32
, def
[1], src
[0][1], src
[1][1])->
64 setFlagsSrc(2, carry
);
65 bld
.mkOp2(OP_MERGE
, i
->dType
, i
->getDef(0), def
[0], def
[1]);
70 GV100LegalizeSSA::handleIMAD_HIGH(Instruction
*i
)
72 Value
*def
= bld
.getSSA(8), *defs
[2];
75 if (i
->srcExists(2) &&
76 (!i
->getSrc(2)->asImm() || i
->getSrc(2)->asImm()->reg
.data
.u32
)) {
77 Value
*src2s
[2] = { bld
.getSSA(), bld
.getSSA() };
78 bld
.mkMov(src2s
[0], bld
.mkImm(0));
79 bld
.mkMov(src2s
[1], i
->getSrc(2));
80 src2
= bld
.mkOp2(OP_MERGE
, TYPE_U64
, bld
.getSSA(8), src2s
[0], src2s
[1])->getDef(0);
85 bld
.mkOp3(OP_MAD
, isSignedType(i
->sType
) ? TYPE_S64
: TYPE_U64
, def
,
86 i
->getSrc(0), i
->getSrc(1), src2
);
88 bld
.mkSplit(defs
, 4, def
);
89 i
->def(0).replace(defs
[1], false);
93 // XXX: We should be able to do this in GV100LoweringPass, but codegen messes
94 // up somehow and swaps the condcode without swapping the sources.
95 // - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
97 GV100LegalizeSSA::handleIMNMX(Instruction
*i
)
99 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
);
101 bld
.mkCmp(OP_SET
, (i
->op
== OP_MIN
) ? CC_LT
: CC_GT
, i
->dType
, pred
,
102 i
->sType
, i
->getSrc(0), i
->getSrc(1));
103 bld
.mkOp3(OP_SELP
, i
->dType
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1), pred
);
108 GV100LegalizeSSA::handleIMUL(Instruction
*i
)
110 if (i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
)
111 return handleIMAD_HIGH(i
);
113 bld
.mkOp3(OP_MAD
, i
->dType
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1),
119 GV100LegalizeSSA::handleLOP2(Instruction
*i
)
121 uint8_t src0
= NV50_IR_SUBOP_LOP3_LUT_SRC0
;
122 uint8_t src1
= NV50_IR_SUBOP_LOP3_LUT_SRC1
;
125 if (i
->src(0).mod
& Modifier(NV50_IR_MOD_NOT
))
127 if (i
->src(1).mod
& Modifier(NV50_IR_MOD_NOT
))
131 case OP_AND
: subOp
= src0
& src1
; break;
132 case OP_OR
: subOp
= src0
| src1
; break;
133 case OP_XOR
: subOp
= src0
^ src1
; break;
135 assert(!"invalid LOP2 opcode");
139 bld
.mkOp3(OP_LOP3_LUT
, TYPE_U32
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1),
140 bld
.mkImm(0))->subOp
= subOp
;
145 GV100LegalizeSSA::handleNOT(Instruction
*i
)
147 bld
.mkOp3(OP_LOP3_LUT
, TYPE_U32
, i
->getDef(0), bld
.mkImm(0), i
->getSrc(0),
148 bld
.mkImm(0))->subOp
= (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1
;
153 GV100LegalizeSSA::handlePREEX2(Instruction
*i
)
155 i
->def(0).replace(i
->src(0), false);
160 GV100LegalizeSSA::handleQUADON(Instruction
*i
)
162 handleSHFL(i
); // Inserts OP_WARPSYNC
167 GV100LegalizeSSA::handleQUADPOP(Instruction
*i
)
173 GV100LegalizeSSA::handleSET(Instruction
*i
)
175 Value
*src2
= i
->srcExists(2) ? i
->getSrc(2) : NULL
;
176 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
), *met
;
179 if (isFloatType(i
->dType
)) {
180 if (i
->sType
== TYPE_F32
)
181 return false; // HW has FSET.BF
182 met
= bld
.mkImm(0x3f800000);
184 met
= bld
.mkImm(0xffffffff);
187 xsetp
= bld
.mkCmp(i
->op
, i
->asCmp()->setCond
, TYPE_U8
, pred
, i
->sType
,
188 i
->getSrc(0), i
->getSrc(1));
189 xsetp
->src(0).mod
= i
->src(0).mod
;
190 xsetp
->src(1).mod
= i
->src(1).mod
;
191 xsetp
->setSrc(2, src2
);
194 i
= bld
.mkOp3(OP_SELP
, TYPE_U32
, i
->getDef(0), bld
.mkImm(0), met
, pred
);
195 i
->src(2).mod
= Modifier(NV50_IR_MOD_NOT
);
200 GV100LegalizeSSA::handleSHFL(Instruction
*i
)
202 Instruction
*sync
= new_Instruction(func
, OP_WARPSYNC
, TYPE_NONE
);
204 sync
->setSrc(0, bld
.mkImm(0xffffffff));
205 i
->bb
->insertBefore(i
, sync
);
210 GV100LegalizeSSA::handleShift(Instruction
*i
)
212 Value
*zero
= bld
.mkImm(0);
213 Value
*src1
= i
->getSrc(1);
215 uint8_t subOp
= i
->op
== OP_SHL
? NV50_IR_SUBOP_SHF_L
: NV50_IR_SUBOP_SHF_R
;
217 if (i
->op
== OP_SHL
&& i
->src(0).getFile() == FILE_GPR
) {
223 subOp
|= NV50_IR_SUBOP_SHF_HI
;
225 if (i
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
)
226 subOp
|= NV50_IR_SUBOP_SHF_W
;
228 bld
.mkOp3(OP_SHF
, i
->dType
, i
->getDef(0), src0
, src1
, src2
)->subOp
= subOp
;
233 GV100LegalizeSSA::handleSUB(Instruction
*i
)
236 bld
.mkOp2(OP_ADD
, i
->dType
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1));
237 xadd
->src(0).mod
= i
->src(0).mod
;
238 xadd
->src(1).mod
= i
->src(1).mod
^ Modifier(NV50_IR_MOD_NEG
);
244 GV100LegalizeSSA::visit(Instruction
*i
)
246 bool lowered
= false;
248 bld
.setPosition(i
, false);
249 if (i
->sType
== TYPE_F32
&& i
->dType
!= TYPE_F16
&&
250 prog
->getType() != Program::TYPE_COMPUTE
)
257 if (i
->def(0).getFile() != FILE_PREDICATE
)
258 lowered
= handleLOP2(i
);
261 lowered
= handleNOT(i
);
265 lowered
= handleShift(i
);
271 if (i
->def(0).getFile() != FILE_PREDICATE
)
272 lowered
= handleSET(i
);
275 lowered
= handleCMP(i
);
278 lowered
= handlePREEX2(i
);
281 if (!isFloatType(i
->dType
))
282 lowered
= handleIMUL(i
);
285 if (!isFloatType(i
->dType
) && i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
)
286 lowered
= handleIMAD_HIGH(i
);
289 lowered
= handleSHFL(i
);
292 lowered
= handleQUADON(i
);
295 lowered
= handleQUADPOP(i
);
298 lowered
= handleSUB(i
);
302 if (!isFloatType(i
->dType
))
303 lowered
= handleIMNMX(i
);
306 if (!isFloatType(i
->dType
) && typeSizeof(i
->dType
) == 8)
307 lowered
= handleIADD64(i
);
320 delete_Instruction(prog
, i
);
326 GV100LoweringPass::handleDMNMX(Instruction
*i
)
328 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
);
329 Value
*src0
[2], *src1
[2], *dest
[2];
331 bld
.mkCmp(OP_SET
, (i
->op
== OP_MIN
) ? CC_LT
: CC_GT
, TYPE_U32
, pred
,
332 i
->sType
, i
->getSrc(0), i
->getSrc(1));
333 bld
.mkSplit(src0
, 4, i
->getSrc(0));
334 bld
.mkSplit(src1
, 4, i
->getSrc(1));
335 bld
.mkSplit(dest
, 4, i
->getDef(0));
336 bld
.mkOp3(OP_SELP
, TYPE_U32
, dest
[0], src0
[0], src1
[0], pred
);
337 bld
.mkOp3(OP_SELP
, TYPE_U32
, dest
[1], src0
[1], src1
[1], pred
);
338 bld
.mkOp2(OP_MERGE
, TYPE_U64
, i
->getDef(0), dest
[0], dest
[1]);
343 GV100LoweringPass::handleEXTBF(Instruction
*i
)
345 Value
*bit
= bld
.getScratch();
346 Value
*cnt
= bld
.getScratch();
347 Value
*mask
= bld
.getScratch();
348 Value
*zero
= bld
.mkImm(0);
350 bld
.mkOp3(OP_PERMT
, TYPE_U32
, bit
, i
->getSrc(1), bld
.mkImm(0x4440), zero
);
351 bld
.mkOp3(OP_PERMT
, TYPE_U32
, cnt
, i
->getSrc(1), bld
.mkImm(0x4441), zero
);
352 bld
.mkOp2(OP_BMSK
, TYPE_U32
, mask
, bit
, cnt
);
353 bld
.mkOp2(OP_AND
, TYPE_U32
, mask
, i
->getSrc(0), mask
);
354 bld
.mkOp2(OP_SHR
, TYPE_U32
, i
->getDef(0), mask
, bit
);
355 if (isSignedType(i
->dType
))
356 bld
.mkOp2(OP_SGXT
, TYPE_S32
, i
->getDef(0), i
->getDef(0), cnt
);
362 GV100LoweringPass::handleFLOW(Instruction
*i
)
369 GV100LoweringPass::handleI2I(Instruction
*i
)
371 bld
.mkCvt(OP_CVT
, TYPE_F32
, i
->getDef(0), i
->sType
, i
->getSrc(0))->
373 bld
.mkCvt(OP_CVT
, i
->dType
, i
->getDef(0), TYPE_F32
, i
->getDef(0));
378 GV100LoweringPass::handleINSBF(Instruction
*i
)
380 Value
*bit
= bld
.getScratch();
381 Value
*cnt
= bld
.getScratch();
382 Value
*mask
= bld
.getScratch();
383 Value
*src0
= bld
.getScratch();
384 Value
*zero
= bld
.mkImm(0);
386 bld
.mkOp3(OP_PERMT
, TYPE_U32
, bit
, i
->getSrc(1), bld
.mkImm(0x4440), zero
);
387 bld
.mkOp3(OP_PERMT
, TYPE_U32
, cnt
, i
->getSrc(1), bld
.mkImm(0x4441), zero
);
388 bld
.mkOp2(OP_BMSK
, TYPE_U32
, mask
, zero
, cnt
);
390 bld
.mkOp2(OP_AND
, TYPE_U32
, src0
, i
->getSrc(0), mask
);
391 bld
.mkOp2(OP_SHL
, TYPE_U32
, src0
, src0
, bit
);
393 bld
.mkOp2(OP_SHL
, TYPE_U32
, mask
, mask
, bit
);
394 bld
.mkOp3(OP_LOP3_LUT
, TYPE_U32
, i
->getDef(0), src0
, i
->getSrc(2), mask
)->
395 subOp
= NV50_IR_SUBOP_LOP3_LUT(a
| (b
& ~c
));
401 GV100LoweringPass::handlePINTERP(Instruction
*i
)
403 Value
*src2
= i
->srcExists(2) ? i
->getSrc(2) : NULL
;
404 Instruction
*ipa
, *mul
;
406 ipa
= bld
.mkOp2(OP_LINTERP
, TYPE_F32
, i
->getDef(0), i
->getSrc(0), src2
);
408 mul
= bld
.mkOp2(OP_MUL
, TYPE_F32
, i
->getDef(0), i
->getDef(0), i
->getSrc(1));
410 if (i
->getInterpMode() == NV50_IR_INTERP_SC
) {
411 ipa
->setDef(1, bld
.getSSA(1, FILE_PREDICATE
));
412 mul
->setPredicate(CC_NOT_P
, ipa
->getDef(1));
419 GV100LoweringPass::handlePREFLOW(Instruction
*i
)
425 GV100LoweringPass::handlePRESIN(Instruction
*i
)
427 const float f
= 1.0 / (2.0 * 3.14159265);
428 bld
.mkOp2(OP_MUL
, i
->dType
, i
->getDef(0), i
->getSrc(0), bld
.mkImm(f
));
433 GV100LoweringPass::visit(Instruction
*i
)
435 bool lowered
= false;
437 bld
.setPosition(i
, false);
442 lowered
= handleFLOW(i
);
446 lowered
= handlePREFLOW(i
);
449 if (i
->src(0).getFile() != FILE_PREDICATE
&&
450 i
->def(0).getFile() != FILE_PREDICATE
&&
451 !isFloatType(i
->dType
) && !isFloatType(i
->sType
))
452 lowered
= handleI2I(i
);
455 lowered
= handleEXTBF(i
);
458 lowered
= handleINSBF(i
);
462 if (i
->dType
== TYPE_F64
)
463 lowered
= handleDMNMX(i
);
466 lowered
= handlePINTERP(i
);
469 lowered
= handlePRESIN(i
);
476 delete_Instruction(prog
, i
);
481 } // namespace nv50_ir