2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_build_util.h"
26 #include "nv50_ir_target_nvc0.h"
35 #define QUADOP(q, r, s, t) \
36 ((QOP_##q << 0) | (QOP_##r << 2) | \
37 (QOP_##s << 4) | (QOP_##t << 6))
39 class NVC0LegalizeSSA
: public Pass
42 virtual bool visit(BasicBlock
*);
43 virtual bool visit(Function
*);
45 // we want to insert calls to the builtin library only after optimization
46 void handleDIV(Instruction
*); // integer division, modulus
47 void handleRCPRSQ(Instruction
*); // double precision float recip/rsqrt
54 NVC0LegalizeSSA::handleDIV(Instruction
*i
)
56 FlowInstruction
*call
;
60 bld
.setPosition(i
, false);
61 def
[0] = bld
.mkMovToReg(0, i
->getSrc(0))->getDef(0);
62 def
[1] = bld
.mkMovToReg(1, i
->getSrc(1))->getDef(0);
64 case TYPE_U32
: builtin
= NVC0_BUILTIN_DIV_U32
; break;
65 case TYPE_S32
: builtin
= NVC0_BUILTIN_DIV_S32
; break;
69 call
= bld
.mkFlow(OP_CALL
, NULL
, CC_ALWAYS
, NULL
);
70 bld
.mkMov(i
->getDef(0), def
[(i
->op
== OP_DIV
) ? 0 : 1]);
71 bld
.mkClobber(FILE_GPR
, (i
->op
== OP_DIV
) ? 0xe : 0xd, 2);
72 bld
.mkClobber(FILE_PREDICATE
, (i
->dType
== TYPE_S32
) ? 0xf : 0x3, 0);
75 call
->absolute
= call
->builtin
= 1;
76 call
->target
.builtin
= builtin
;
77 delete_Instruction(prog
, i
);
81 NVC0LegalizeSSA::handleRCPRSQ(Instruction
*i
)
87 NVC0LegalizeSSA::visit(Function
*fn
)
89 bld
.setProgram(fn
->getProgram());
94 NVC0LegalizeSSA::visit(BasicBlock
*bb
)
97 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
99 if (i
->dType
== TYPE_F32
)
108 if (i
->dType
== TYPE_F64
)
118 class NVC0LegalizePostRA
: public Pass
121 virtual bool visit(Function
*);
122 virtual bool visit(BasicBlock
*);
124 void replaceZero(Instruction
*);
125 void split64BitOp(Instruction
*);
126 bool tryReplaceContWithBra(BasicBlock
*);
127 void propagateJoin(BasicBlock
*);
133 NVC0LegalizePostRA::visit(Function
*fn
)
135 r63
= new_LValue(fn
, FILE_GPR
);
136 r63
->reg
.data
.id
= 63;
141 NVC0LegalizePostRA::replaceZero(Instruction
*i
)
143 for (int s
= 0; i
->srcExists(s
); ++s
) {
144 ImmediateValue
*imm
= i
->getSrc(s
)->asImm();
145 if (imm
&& imm
->reg
.data
.u64
== 0)
151 NVC0LegalizePostRA::split64BitOp(Instruction
*i
)
153 if (i
->dType
== TYPE_F64
) {
156 if (i
->op
== OP_ADD
|| i
->op
== OP_MUL
|| i
->op
== OP_FMA
||
157 i
->op
== OP_CVT
|| i
->op
== OP_MIN
|| i
->op
== OP_MAX
||
160 i
->dType
= i
->sType
= TYPE_U32
;
162 i
->bb
->insertAfter(i
, i
->clone(true)); // deep cloning
166 // replace CONT with BRA for single unconditional continue
168 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock
*bb
)
170 if (bb
->cfg
.incidentCount() != 2 || bb
->getEntry()->op
!= OP_PRECONT
)
172 Graph::EdgeIterator ei
= bb
->cfg
.incident();
173 if (ei
.getType() != Graph::Edge::BACK
)
175 if (ei
.getType() != Graph::Edge::BACK
)
177 BasicBlock
*contBB
= BasicBlock::get(ei
.getNode());
179 if (!contBB
->getExit() || contBB
->getExit()->op
!= OP_CONT
||
180 contBB
->getExit()->getPredicate())
182 contBB
->getExit()->op
= OP_BRA
;
183 bb
->remove(bb
->getEntry()); // delete PRECONT
186 assert(ei
.end() || ei
.getType() != Graph::Edge::BACK
);
190 // replace branches to join blocks with join ops
192 NVC0LegalizePostRA::propagateJoin(BasicBlock
*bb
)
194 if (bb
->getEntry()->op
!= OP_JOIN
|| bb
->getEntry()->asFlow()->limit
)
196 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
197 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
198 Instruction
*exit
= in
->getExit();
200 in
->insertTail(new FlowInstruction(func
, OP_JOIN
, bb
));
201 // there should always be a terminator instruction
202 WARN("inserted missing terminator in BB:%i\n", in
->getId());
204 if (exit
->op
== OP_BRA
) {
206 exit
->asFlow()->limit
= 1; // must-not-propagate marker
209 bb
->remove(bb
->getEntry());
213 NVC0LegalizePostRA::visit(BasicBlock
*bb
)
215 Instruction
*i
, *next
;
217 // remove pseudo operations and non-fixed no-ops, split 64 bit operations
218 for (i
= bb
->getFirst(); i
; i
= next
) {
220 if (i
->op
== OP_EMIT
|| i
->op
== OP_RESTART
) {
221 if (!i
->getDef(0)->refCount())
223 if (i
->src
[0].getFile() == FILE_IMMEDIATE
)
224 i
->setSrc(0, r63
); // initial value must be 0
229 if (i
->op
!= OP_MOV
&& i
->op
!= OP_PFETCH
)
231 if (typeSizeof(i
->dType
) == 8)
238 if (!tryReplaceContWithBra(bb
))
244 class NVC0LoweringPass
: public Pass
247 NVC0LoweringPass(Program
*);
250 virtual bool visit(Function
*);
251 virtual bool visit(BasicBlock
*);
252 virtual bool visit(Instruction
*);
254 bool handleRDSV(Instruction
*);
255 bool handleWRSV(Instruction
*);
256 bool handleEXPORT(Instruction
*);
257 bool handleOUT(Instruction
*);
258 bool handleDIV(Instruction
*);
259 bool handleMOD(Instruction
*);
260 bool handleSQRT(Instruction
*);
261 bool handlePOW(Instruction
*);
262 bool handleTEX(TexInstruction
*);
263 bool handleTXD(TexInstruction
*);
264 bool handleTXQ(TexInstruction
*);
265 bool handleManualTXD(TexInstruction
*);
267 void checkPredicate(Instruction
*);
269 void readTessCoord(LValue
*dst
, int c
);
272 const Target
*const targ
;
276 LValue
*gpEmitAddress
;
279 NVC0LoweringPass::NVC0LoweringPass(Program
*prog
) : targ(prog
->getTarget())
281 bld
.setProgram(prog
);
285 NVC0LoweringPass::visit(Function
*fn
)
287 if (prog
->getType() == Program::TYPE_GEOMETRY
) {
288 assert(!strncmp(fn
->getName(), "MAIN", 4));
289 // TODO: when we generate actual functions pass this value along somehow
290 bld
.setPosition(BasicBlock::get(fn
->cfg
.getRoot()), false);
291 gpEmitAddress
= bld
.loadImm(NULL
, 0)->asLValue();
293 bld
.setPosition(BasicBlock::get(fn
->cfgExit
)->getExit(), false);
294 bld
.mkMovToReg(0, gpEmitAddress
);
301 NVC0LoweringPass::visit(BasicBlock
*bb
)
306 // move array source to first slot, convert to u16, add indirections
308 NVC0LoweringPass::handleTEX(TexInstruction
*i
)
310 const int dim
= i
->tex
.target
.getDim() + i
->tex
.target
.isCube();
311 const int arg
= i
->tex
.target
.getArgCount();
313 // generate and move the tsc/tic/array source to the front
314 if (dim
!= arg
|| i
->tex
.rIndirectSrc
>= 0 || i
->tex
.sIndirectSrc
>= 0) {
315 LValue
*src
= new_LValue(func
, FILE_GPR
); // 0xttxsaaaa
317 Value
*arrayIndex
= i
->tex
.target
.isArray() ? i
->getSrc(arg
- 1) : NULL
;
318 for (int s
= dim
; s
>= 1; --s
)
319 i
->setSrc(s
, i
->getSrc(s
- 1));
320 i
->setSrc(0, arrayIndex
);
322 Value
*ticRel
= i
->getIndirectR();
323 Value
*tscRel
= i
->getIndirectS();
326 int sat
= (i
->op
== OP_TXF
) ? 1 : 0;
327 DataType sTy
= (i
->op
== OP_TXF
) ? TYPE_U32
: TYPE_F32
;
328 bld
.mkCvt(OP_CVT
, TYPE_U16
, src
, sTy
, arrayIndex
)->saturate
= sat
;
334 i
->setSrc(i
->tex
.rIndirectSrc
, NULL
);
335 bld
.mkOp3(OP_INSBF
, TYPE_U32
, src
, ticRel
, bld
.mkImm(0x0917), src
);
338 i
->setSrc(i
->tex
.sIndirectSrc
, NULL
);
339 bld
.mkOp3(OP_INSBF
, TYPE_U32
, src
, tscRel
, bld
.mkImm(0x0710), src
);
345 // offset is last source (lod 1st, dc 2nd)
346 if (i
->tex
.useOffsets
) {
349 int s
= i
->srcCount(0xff);
350 for (n
= 0; n
< i
->tex
.useOffsets
; ++n
)
351 for (c
= 0; c
< 3; ++c
)
352 value
|= (i
->tex
.offset
[n
][c
] & 0xf) << (n
* 12 + c
* 4);
353 i
->setSrc(s
, bld
.loadImm(NULL
, value
));
360 NVC0LoweringPass::handleManualTXD(TexInstruction
*i
)
362 static const uint8_t qOps
[4][2] =
364 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l0
365 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l1
366 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l2
367 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l3
372 Value
*zero
= bld
.loadImm(bld
.getSSA(), 0);
374 const int dim
= i
->tex
.target
.getDim();
376 i
->op
= OP_TEX
; // no need to clone dPdx/dPdy later
378 for (c
= 0; c
< dim
; ++c
)
379 crd
[c
] = bld
.getScratch();
381 bld
.mkOp(OP_QUADON
, TYPE_NONE
, NULL
);
382 for (l
= 0; l
< 4; ++l
) {
383 // mov coordinates from lane l to all lanes
384 for (c
= 0; c
< dim
; ++c
)
385 bld
.mkQuadop(0x00, crd
[c
], l
, i
->getSrc(c
), zero
);
386 // add dPdx from lane l to lanes dx
387 for (c
= 0; c
< dim
; ++c
)
388 bld
.mkQuadop(qOps
[l
][0], crd
[c
], l
, i
->dPdx
[c
].get(), crd
[c
]);
389 // add dPdy from lane l to lanes dy
390 for (c
= 0; c
< dim
; ++c
)
391 bld
.mkQuadop(qOps
[l
][1], crd
[c
], l
, i
->dPdy
[c
].get(), crd
[c
]);
393 bld
.insert(tex
= i
->clone(true));
394 for (c
= 0; c
< dim
; ++c
)
395 tex
->setSrc(c
, crd
[c
]);
397 for (c
= 0; i
->defExists(c
); ++c
) {
399 def
[c
][l
] = bld
.getSSA();
400 mov
= bld
.mkMov(def
[c
][l
], tex
->getDef(c
));
405 bld
.mkOp(OP_QUADPOP
, TYPE_NONE
, NULL
);
407 for (c
= 0; i
->defExists(c
); ++c
) {
408 Instruction
*u
= bld
.mkOp(OP_UNION
, TYPE_U32
, i
->getDef(c
));
409 for (l
= 0; l
< 4; ++l
)
410 u
->setSrc(l
, def
[c
][l
]);
418 NVC0LoweringPass::handleTXD(TexInstruction
*txd
)
420 int dim
= txd
->tex
.target
.getDim();
421 int arg
= txd
->tex
.target
.getDim() + txd
->tex
.target
.isArray();
424 while (txd
->src
[arg
].exists())
427 txd
->tex
.derivAll
= true;
428 if (dim
> 2 || txd
->tex
.target
.isShadow())
429 return handleManualTXD(txd
);
431 assert(arg
<= 4); // at most s/t/array, x, y, offset
433 for (int c
= 0; c
< dim
; ++c
) {
434 txd
->src
[arg
+ c
* 2 + 0].set(txd
->dPdx
[c
]);
435 txd
->src
[arg
+ c
* 2 + 1].set(txd
->dPdy
[c
]);
443 NVC0LoweringPass::handleTXQ(TexInstruction
*txq
)
445 // TODO: indirect resource/sampler index
450 NVC0LoweringPass::handleWRSV(Instruction
*i
)
456 // must replace, $sreg are not writeable
457 addr
= targ
->getSVAddress(FILE_SHADER_OUTPUT
, i
->getSrc(0)->asSym());
460 sym
= bld
.mkSymbol(FILE_SHADER_OUTPUT
, 0, i
->sType
, addr
);
462 st
= bld
.mkStore(OP_EXPORT
, i
->dType
, sym
, i
->getIndirect(0, 0),
464 st
->perPatch
= i
->perPatch
;
466 bld
.getBB()->remove(i
);
471 NVC0LoweringPass::readTessCoord(LValue
*dst
, int c
)
473 Value
*laneid
= bld
.getSSA();
476 bld
.mkOp1(OP_RDSV
, TYPE_U32
, laneid
, bld
.mkSysVal(SV_LANEID
, 0));
491 bld
.mkFetch(x
, TYPE_F32
, FILE_SHADER_OUTPUT
, 0x2f0, NULL
, laneid
);
493 bld
.mkFetch(y
, TYPE_F32
, FILE_SHADER_OUTPUT
, 0x2f4, NULL
, laneid
);
496 bld
.mkOp2(OP_ADD
, TYPE_F32
, dst
, x
, y
);
497 bld
.mkOp2(OP_SUB
, TYPE_F32
, dst
, bld
.loadImm(NULL
, 1.0f
), dst
);
502 NVC0LoweringPass::handleRDSV(Instruction
*i
)
504 Symbol
*sym
= i
->getSrc(0)->asSym();
507 uint32_t addr
= targ
->getSVAddress(FILE_SHADER_INPUT
, sym
);
509 if (addr
>= 0x400) // mov $sreg
512 switch (i
->getSrc(0)->reg
.data
.sv
.sv
) {
514 assert(prog
->getType() == Program::TYPE_FRAGMENT
);
515 bld
.mkInterp(NV50_IR_INTERP_LINEAR
, i
->getDef(0), addr
, NULL
);
519 Value
*face
= i
->getDef(0);
520 bld
.mkInterp(NV50_IR_INTERP_FLAT
, face
, addr
, NULL
);
521 if (i
->dType
== TYPE_F32
) {
522 bld
.mkOp2(OP_AND
, TYPE_U32
, face
, face
, bld
.mkImm(0x80000000));
523 bld
.mkOp2(OP_XOR
, TYPE_U32
, face
, face
, bld
.mkImm(0xbf800000));
528 assert(prog
->getType() == Program::TYPE_TESSELLATION_EVAL
);
529 readTessCoord(i
->getDef(0)->asLValue(), i
->getSrc(0)->reg
.data
.sv
.index
);
532 if (prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
533 vtx
= bld
.mkOp1v(OP_PFETCH
, TYPE_U32
, bld
.getSSA(), bld
.mkImm(0));
534 ld
= bld
.mkFetch(i
->getDef(0), i
->dType
,
535 FILE_SHADER_INPUT
, addr
, i
->getIndirect(0, 0), vtx
);
536 ld
->perPatch
= i
->perPatch
;
539 bld
.getBB()->remove(i
);
544 NVC0LoweringPass::handleDIV(Instruction
*i
)
546 if (!isFloatType(i
->dType
))
548 bld
.setPosition(i
, false);
549 Instruction
*rcp
= bld
.mkOp1(OP_RCP
, i
->dType
, bld
.getSSA(), i
->getSrc(1));
551 i
->setSrc(1, rcp
->getDef(0));
556 NVC0LoweringPass::handleMOD(Instruction
*i
)
558 if (i
->dType
!= TYPE_F32
)
560 LValue
*value
= bld
.getScratch();
561 bld
.mkOp1(OP_RCP
, TYPE_F32
, value
, i
->getSrc(1));
562 bld
.mkOp2(OP_MUL
, TYPE_F32
, value
, i
->getSrc(0), value
);
563 bld
.mkOp1(OP_TRUNC
, TYPE_F32
, value
, value
);
564 bld
.mkOp2(OP_MUL
, TYPE_F32
, value
, i
->getSrc(1), value
);
571 NVC0LoweringPass::handleSQRT(Instruction
*i
)
573 Instruction
*rsq
= bld
.mkOp1(OP_RSQ
, TYPE_F32
,
574 bld
.getSSA(), i
->getSrc(0));
576 i
->setSrc(1, rsq
->getDef(0));
582 NVC0LoweringPass::handlePOW(Instruction
*i
)
584 LValue
*val
= bld
.getScratch();
586 bld
.mkOp1(OP_LG2
, TYPE_F32
, val
, i
->getSrc(0));
587 bld
.mkOp2(OP_MUL
, TYPE_F32
, val
, i
->getSrc(1), val
)->dnz
= 1;
588 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, val
, val
);
598 NVC0LoweringPass::handleEXPORT(Instruction
*i
)
600 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
601 int id
= i
->getSrc(0)->reg
.data
.offset
/ 4;
603 if (i
->src
[0].isIndirect(0)) // TODO, ugly
606 i
->src
[0].set(i
->src
[1]);
608 i
->setDef(0, new_LValue(func
, FILE_GPR
));
609 i
->getDef(0)->reg
.data
.id
= id
;
611 prog
->maxGPR
= MAX2(prog
->maxGPR
, id
);
613 if (prog
->getType() == Program::TYPE_GEOMETRY
) {
614 i
->setIndirect(0, 1, gpEmitAddress
);
620 NVC0LoweringPass::handleOUT(Instruction
*i
)
622 if (i
->op
== OP_RESTART
&& i
->prev
&& i
->prev
->op
== OP_EMIT
) {
623 i
->prev
->subOp
= NV50_IR_SUBOP_EMIT_RESTART
;
624 delete_Instruction(prog
, i
);
626 assert(gpEmitAddress
);
627 i
->setDef(0, gpEmitAddress
);
629 i
->setSrc(1, i
->getSrc(0));
630 i
->setSrc(0, gpEmitAddress
);
635 // Generate a binary predicate if an instruction is predicated by
636 // e.g. an f32 value.
638 NVC0LoweringPass::checkPredicate(Instruction
*insn
)
640 Value
*pred
= insn
->getPredicate();
643 if (!pred
|| pred
->reg
.file
== FILE_PREDICATE
)
645 pdst
= new_LValue(func
, FILE_PREDICATE
);
647 // CAUTION: don't use pdst->getInsn, the definition might not be unique,
648 // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass
650 bld
.mkCmp(OP_SET
, CC_NEU
, TYPE_U32
, pdst
, bld
.mkImm(0), pred
);
652 insn
->setPredicate(insn
->cc
, pdst
);
656 // - add quadop dance for texturing
657 // - put FP outputs in GPRs
658 // - convert instruction sequences
661 NVC0LoweringPass::visit(Instruction
*i
)
663 bld
.setPosition(i
, false);
665 if (i
->cc
!= CC_ALWAYS
)
674 return handleTEX(i
->asTex());
676 return handleTXD(i
->asTex());
678 return handleTXQ(i
->asTex());
680 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, i
->getDef(0), i
->getSrc(0));
681 i
->setSrc(0, i
->getDef(0));
690 return handleSQRT(i
);
692 return handleEXPORT(i
);
697 return handleRDSV(i
);
699 return handleWRSV(i
);
701 if (i
->src
[0].getFile() == FILE_SHADER_INPUT
) {
703 assert(prog
->getType() != Program::TYPE_FRAGMENT
);
713 TargetNVC0::runLegalizePass(Program
*prog
, CGStage stage
) const
715 if (stage
== CG_STAGE_PRE_SSA
) {
716 NVC0LoweringPass
pass(prog
);
717 return pass
.run(prog
, false, true);
719 if (stage
== CG_STAGE_POST_RA
) {
720 NVC0LegalizePostRA pass
;
721 return pass
.run(prog
, false, true);
723 if (stage
== CG_STAGE_SSA
) {
724 NVC0LegalizeSSA pass
;
725 return pass
.run(prog
, false, true);
730 } // namespace nv50_ir