2 * Copyright 2020 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 #include "codegen/nv50_ir.h"
23 #include "codegen/nv50_ir_build_util.h"
25 #include "codegen/nv50_ir_target_nvc0.h"
26 #include "codegen/nv50_ir_lowering_gv100.h"
33 GV100LegalizeSSA::handleCMP(Instruction
*i
)
35 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
);
37 bld
.mkCmp(OP_SET
, reverseCondCode(i
->asCmp()->setCond
), TYPE_U8
, pred
,
38 i
->sType
, bld
.mkImm(0), i
->getSrc(2));
39 bld
.mkOp3(OP_SELP
, TYPE_U32
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1), pred
);
43 // NIR deals with most of these for us, but codegen generates more in pointer
44 // calculations from other lowering passes.
46 GV100LegalizeSSA::handleIADD64(Instruction
*i
)
48 Value
*carry
= bld
.getSSA(1, FILE_PREDICATE
);
49 Value
*def
[2] = { bld
.getSSA(), bld
.getSSA() };
52 for (int s
= 0; s
< 2; s
++) {
53 if (i
->getSrc(s
)->reg
.size
== 8) {
54 bld
.mkSplit(src
[s
], 4, i
->getSrc(s
));
56 src
[s
][0] = i
->getSrc(s
);
57 src
[s
][1] = bld
.mkImm(0);
61 bld
.mkOp2(OP_ADD
, TYPE_U32
, def
[0], src
[0][0], src
[1][0])->
62 setFlagsDef(1, carry
);
63 bld
.mkOp2(OP_ADD
, TYPE_U32
, def
[1], src
[0][1], src
[1][1])->
64 setFlagsSrc(2, carry
);
65 bld
.mkOp2(OP_MERGE
, i
->dType
, i
->getDef(0), def
[0], def
[1]);
70 GV100LegalizeSSA::handleIMAD_HIGH(Instruction
*i
)
72 Value
*def
= bld
.getSSA(8), *defs
[2];
75 if (i
->srcExists(2) &&
76 (!i
->getSrc(2)->asImm() || i
->getSrc(2)->asImm()->reg
.data
.u32
)) {
77 Value
*src2s
[2] = { bld
.getSSA(), bld
.getSSA() };
78 bld
.mkMov(src2s
[0], bld
.mkImm(0));
79 bld
.mkMov(src2s
[1], i
->getSrc(2));
80 src2
= bld
.mkOp2(OP_MERGE
, TYPE_U64
, bld
.getSSA(8), src2s
[0], src2s
[1])->getDef(0);
85 bld
.mkOp3(OP_MAD
, isSignedType(i
->sType
) ? TYPE_S64
: TYPE_U64
, def
,
86 i
->getSrc(0), i
->getSrc(1), src2
);
88 bld
.mkSplit(defs
, 4, def
);
89 i
->def(0).replace(defs
[1], false);
93 // XXX: We should be able to do this in GV100LoweringPass, but codegen messes
94 // up somehow and swaps the condcode without swapping the sources.
95 // - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
97 GV100LegalizeSSA::handleIMNMX(Instruction
*i
)
99 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
);
101 bld
.mkCmp(OP_SET
, (i
->op
== OP_MIN
) ? CC_LT
: CC_GT
, i
->dType
, pred
,
102 i
->sType
, i
->getSrc(0), i
->getSrc(1));
103 bld
.mkOp3(OP_SELP
, i
->dType
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1), pred
);
108 GV100LegalizeSSA::handleIMUL(Instruction
*i
)
110 if (i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
)
111 return handleIMAD_HIGH(i
);
113 bld
.mkOp3(OP_MAD
, i
->dType
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1),
119 GV100LegalizeSSA::handleLOP2(Instruction
*i
)
121 uint8_t src0
= NV50_IR_SUBOP_LOP3_LUT_SRC0
;
122 uint8_t src1
= NV50_IR_SUBOP_LOP3_LUT_SRC1
;
125 if (i
->src(0).mod
& Modifier(NV50_IR_MOD_NOT
))
127 if (i
->src(1).mod
& Modifier(NV50_IR_MOD_NOT
))
131 case OP_AND
: subOp
= src0
& src1
; break;
132 case OP_OR
: subOp
= src0
| src1
; break;
133 case OP_XOR
: subOp
= src0
^ src1
; break;
135 assert(!"invalid LOP2 opcode");
139 bld
.mkOp3(OP_LOP3_LUT
, TYPE_U32
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1),
140 bld
.mkImm(0))->subOp
= subOp
;
145 GV100LegalizeSSA::handleNOT(Instruction
*i
)
147 bld
.mkOp3(OP_LOP3_LUT
, TYPE_U32
, i
->getDef(0), bld
.mkImm(0), i
->getSrc(0),
148 bld
.mkImm(0))->subOp
= (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1
;
153 GV100LegalizeSSA::handlePREEX2(Instruction
*i
)
155 i
->def(0).replace(i
->src(0), false);
160 GV100LegalizeSSA::handleQUADON(Instruction
*i
)
162 handleSHFL(i
); // Inserts OP_WARPSYNC
167 GV100LegalizeSSA::handleQUADPOP(Instruction
*i
)
173 GV100LegalizeSSA::handleSET(Instruction
*i
)
175 Value
*src2
= i
->srcExists(2) ? i
->getSrc(2) : NULL
;
176 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
), *met
;
179 if (isFloatType(i
->dType
)) {
180 if (i
->sType
== TYPE_F32
)
181 return false; // HW has FSET.BF
182 met
= bld
.mkImm(0x3f800000);
184 met
= bld
.mkImm(0xffffffff);
187 xsetp
= bld
.mkCmp(i
->op
, i
->asCmp()->setCond
, TYPE_U8
, pred
, i
->sType
,
188 i
->getSrc(0), i
->getSrc(1));
189 xsetp
->src(0).mod
= i
->src(0).mod
;
190 xsetp
->src(1).mod
= i
->src(1).mod
;
191 xsetp
->setSrc(2, src2
);
193 i
= bld
.mkOp3(OP_SELP
, TYPE_U32
, i
->getDef(0), bld
.mkImm(0), met
, pred
);
194 i
->src(2).mod
= Modifier(NV50_IR_MOD_NOT
);
199 GV100LegalizeSSA::handleSHFL(Instruction
*i
)
201 Instruction
*sync
= new_Instruction(func
, OP_WARPSYNC
, TYPE_NONE
);
203 sync
->setSrc(0, bld
.mkImm(0xffffffff));
204 i
->bb
->insertBefore(i
, sync
);
209 GV100LegalizeSSA::handleShift(Instruction
*i
)
211 Value
*zero
= bld
.mkImm(0);
212 Value
*src1
= i
->getSrc(1);
214 uint8_t subOp
= i
->op
== OP_SHL
? NV50_IR_SUBOP_SHF_L
: NV50_IR_SUBOP_SHF_R
;
216 if (i
->op
== OP_SHL
&& i
->src(0).getFile() == FILE_GPR
) {
222 subOp
|= NV50_IR_SUBOP_SHF_HI
;
224 if (i
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
)
225 subOp
|= NV50_IR_SUBOP_SHF_W
;
227 bld
.mkOp3(OP_SHF
, i
->dType
, i
->getDef(0), src0
, src1
, src2
)->subOp
= subOp
;
232 GV100LegalizeSSA::handleSUB(Instruction
*i
)
235 bld
.mkOp2(OP_ADD
, i
->dType
, i
->getDef(0), i
->getSrc(0), i
->getSrc(1));
236 xadd
->src(0).mod
= i
->src(0).mod
;
237 xadd
->src(1).mod
= i
->src(1).mod
^ Modifier(NV50_IR_MOD_NEG
);
242 GV100LegalizeSSA::visit(Instruction
*i
)
244 bool lowered
= false;
246 bld
.setPosition(i
, false);
252 if (i
->def(0).getFile() != FILE_PREDICATE
)
253 lowered
= handleLOP2(i
);
256 lowered
= handleNOT(i
);
260 lowered
= handleShift(i
);
266 if (i
->def(0).getFile() != FILE_PREDICATE
)
267 lowered
= handleSET(i
);
270 lowered
= handleCMP(i
);
273 lowered
= handlePREEX2(i
);
276 if (!isFloatType(i
->dType
))
277 lowered
= handleIMUL(i
);
280 if (!isFloatType(i
->dType
) && i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
)
281 lowered
= handleIMAD_HIGH(i
);
284 lowered
= handleSHFL(i
);
287 lowered
= handleQUADON(i
);
290 lowered
= handleQUADPOP(i
);
293 lowered
= handleSUB(i
);
297 if (!isFloatType(i
->dType
))
298 lowered
= handleIMNMX(i
);
301 if (!isFloatType(i
->dType
) && typeSizeof(i
->dType
) == 8)
302 lowered
= handleIADD64(i
);
315 delete_Instruction(prog
, i
);
321 GV100LoweringPass::handleDMNMX(Instruction
*i
)
323 Value
*pred
= bld
.getSSA(1, FILE_PREDICATE
);
324 Value
*src0
[2], *src1
[2], *dest
[2];
326 bld
.mkCmp(OP_SET
, (i
->op
== OP_MIN
) ? CC_LT
: CC_GT
, TYPE_U32
, pred
,
327 i
->sType
, i
->getSrc(0), i
->getSrc(1));
328 bld
.mkSplit(src0
, 4, i
->getSrc(0));
329 bld
.mkSplit(src1
, 4, i
->getSrc(1));
330 bld
.mkSplit(dest
, 4, i
->getDef(0));
331 bld
.mkOp3(OP_SELP
, TYPE_U32
, dest
[0], src0
[0], src1
[0], pred
);
332 bld
.mkOp3(OP_SELP
, TYPE_U32
, dest
[1], src0
[1], src1
[1], pred
);
333 bld
.mkOp2(OP_MERGE
, TYPE_U64
, i
->getDef(0), dest
[0], dest
[1]);
338 GV100LoweringPass::handleEXTBF(Instruction
*i
)
340 Value
*bit
= bld
.getScratch();
341 Value
*cnt
= bld
.getScratch();
342 Value
*mask
= bld
.getScratch();
343 Value
*zero
= bld
.mkImm(0);
345 bld
.mkOp3(OP_PERMT
, TYPE_U32
, bit
, i
->getSrc(1), bld
.mkImm(0x4440), zero
);
346 bld
.mkOp3(OP_PERMT
, TYPE_U32
, cnt
, i
->getSrc(1), bld
.mkImm(0x4441), zero
);
347 bld
.mkOp2(OP_BMSK
, TYPE_U32
, mask
, bit
, cnt
);
348 bld
.mkOp2(OP_AND
, TYPE_U32
, mask
, i
->getSrc(0), mask
);
349 bld
.mkOp2(OP_SHR
, TYPE_U32
, i
->getDef(0), mask
, bit
);
350 if (isSignedType(i
->dType
))
351 bld
.mkOp2(OP_SGXT
, TYPE_S32
, i
->getDef(0), i
->getDef(0), cnt
);
357 GV100LoweringPass::handleFLOW(Instruction
*i
)
364 GV100LoweringPass::handleI2I(Instruction
*i
)
366 bld
.mkCvt(OP_CVT
, TYPE_F32
, i
->getDef(0), i
->sType
, i
->getSrc(0))->
368 bld
.mkCvt(OP_CVT
, i
->dType
, i
->getDef(0), TYPE_F32
, i
->getDef(0));
373 GV100LoweringPass::handleINSBF(Instruction
*i
)
375 Value
*bit
= bld
.getScratch();
376 Value
*cnt
= bld
.getScratch();
377 Value
*mask
= bld
.getScratch();
378 Value
*src0
= bld
.getScratch();
379 Value
*zero
= bld
.mkImm(0);
381 bld
.mkOp3(OP_PERMT
, TYPE_U32
, bit
, i
->getSrc(1), bld
.mkImm(0x4440), zero
);
382 bld
.mkOp3(OP_PERMT
, TYPE_U32
, cnt
, i
->getSrc(1), bld
.mkImm(0x4441), zero
);
383 bld
.mkOp2(OP_BMSK
, TYPE_U32
, mask
, zero
, cnt
);
385 bld
.mkOp2(OP_AND
, TYPE_U32
, src0
, i
->getSrc(0), mask
);
386 bld
.mkOp2(OP_SHL
, TYPE_U32
, src0
, src0
, bit
);
388 bld
.mkOp2(OP_SHL
, TYPE_U32
, mask
, mask
, bit
);
389 bld
.mkOp3(OP_LOP3_LUT
, TYPE_U32
, i
->getDef(0), src0
, i
->getSrc(2), mask
)->
390 subOp
= NV50_IR_SUBOP_LOP3_LUT(a
| (b
& ~c
));
396 GV100LoweringPass::handlePINTERP(Instruction
*i
)
398 Value
*src2
= i
->srcExists(2) ? i
->getSrc(2) : NULL
;
399 Instruction
*ipa
, *mul
;
401 ipa
= bld
.mkOp2(OP_LINTERP
, TYPE_F32
, i
->getDef(0), i
->getSrc(0), src2
);
403 mul
= bld
.mkOp2(OP_MUL
, TYPE_F32
, i
->getDef(0), i
->getDef(0), i
->getSrc(1));
405 if (i
->getInterpMode() == NV50_IR_INTERP_SC
) {
406 ipa
->setDef(1, bld
.getSSA(1, FILE_PREDICATE
));
407 mul
->setPredicate(CC_NOT_P
, ipa
->getDef(1));
414 GV100LoweringPass::handlePREFLOW(Instruction
*i
)
420 GV100LoweringPass::handlePRESIN(Instruction
*i
)
422 const float f
= 1.0 / (2.0 * 3.14159265);
423 bld
.mkOp2(OP_MUL
, i
->dType
, i
->getDef(0), i
->getSrc(0), bld
.mkImm(f
));
428 GV100LoweringPass::visit(Instruction
*i
)
430 bool lowered
= false;
432 bld
.setPosition(i
, false);
437 lowered
= handleFLOW(i
);
441 lowered
= handlePREFLOW(i
);
444 if (i
->src(0).getFile() != FILE_PREDICATE
&&
445 i
->def(0).getFile() != FILE_PREDICATE
&&
446 !isFloatType(i
->dType
) && !isFloatType(i
->sType
))
447 lowered
= handleI2I(i
);
450 lowered
= handleEXTBF(i
);
453 lowered
= handleINSBF(i
);
457 if (i
->dType
== TYPE_F64
)
458 lowered
= handleDMNMX(i
);
461 lowered
= handlePINTERP(i
);
464 lowered
= handlePRESIN(i
);
471 delete_Instruction(prog
, i
);
476 } // namespace nv50_ir