2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "codegen/nv50_ir_target_nvc0.h"
27 // Argh, all these assertions ...
29 class CodeEmitterNVC0
: public CodeEmitter
32 CodeEmitterNVC0(const TargetNVC0
*);
34 virtual bool emitInstruction(Instruction
*);
35 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
36 virtual void prepareEmission(Function
*);
38 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
41 const TargetNVC0
*targNVC0
;
43 Program::Type progType
;
45 const bool writeIssueDelays
;
48 void emitForm_A(const Instruction
*, uint64_t);
49 void emitForm_B(const Instruction
*, uint64_t);
50 void emitForm_S(const Instruction
*, uint32_t, bool pred
);
52 void emitPredicate(const Instruction
*);
54 void setAddress16(const ValueRef
&);
55 void setAddress24(const ValueRef
&);
56 void setAddressByFile(const ValueRef
&);
57 void setImmediate(const Instruction
*, const int s
); // needs op already set
58 void setImmediateS8(const ValueRef
&);
59 void setSUConst16(const Instruction
*, const int s
);
60 void setSUPred(const Instruction
*, const int s
);
62 void emitCondCode(CondCode cc
, int pos
);
63 void emitInterpMode(const Instruction
*);
64 void emitLoadStoreType(DataType ty
);
65 void emitSUGType(DataType
);
66 void emitSUAddr(const TexInstruction
*);
67 void emitSUDim(const TexInstruction
*);
68 void emitCachingMode(CacheMode c
);
70 void emitShortSrc2(const ValueRef
&);
72 inline uint8_t getSRegEncoding(const ValueRef
&);
74 void roundMode_A(const Instruction
*);
75 void roundMode_C(const Instruction
*);
76 void roundMode_CS(const Instruction
*);
78 void emitNegAbs12(const Instruction
*);
80 void emitNOP(const Instruction
*);
82 void emitLOAD(const Instruction
*);
83 void emitSTORE(const Instruction
*);
84 void emitMOV(const Instruction
*);
85 void emitATOM(const Instruction
*);
86 void emitMEMBAR(const Instruction
*);
87 void emitCCTL(const Instruction
*);
89 void emitINTERP(const Instruction
*);
90 void emitAFETCH(const Instruction
*);
91 void emitPFETCH(const Instruction
*);
92 void emitVFETCH(const Instruction
*);
93 void emitEXPORT(const Instruction
*);
94 void emitOUT(const Instruction
*);
96 void emitUADD(const Instruction
*);
97 void emitFADD(const Instruction
*);
98 void emitDADD(const Instruction
*);
99 void emitUMUL(const Instruction
*);
100 void emitFMUL(const Instruction
*);
101 void emitDMUL(const Instruction
*);
102 void emitIMAD(const Instruction
*);
103 void emitISAD(const Instruction
*);
104 void emitSHLADD(const Instruction
*a
);
105 void emitFMAD(const Instruction
*);
106 void emitDMAD(const Instruction
*);
107 void emitMADSP(const Instruction
*);
109 void emitNOT(Instruction
*);
110 void emitLogicOp(const Instruction
*, uint8_t subOp
);
111 void emitPOPC(const Instruction
*);
112 void emitINSBF(const Instruction
*);
113 void emitEXTBF(const Instruction
*);
114 void emitBFIND(const Instruction
*);
115 void emitPERMT(const Instruction
*);
116 void emitShift(const Instruction
*);
118 void emitSFnOp(const Instruction
*, uint8_t subOp
);
120 void emitCVT(Instruction
*);
121 void emitMINMAX(const Instruction
*);
122 void emitPreOp(const Instruction
*);
124 void emitSET(const CmpInstruction
*);
125 void emitSLCT(const CmpInstruction
*);
126 void emitSELP(const Instruction
*);
128 void emitTEXBAR(const Instruction
*);
129 void emitTEX(const TexInstruction
*);
130 void emitTEXCSAA(const TexInstruction
*);
131 void emitTXQ(const TexInstruction
*);
133 void emitQUADOP(const Instruction
*, uint8_t qOp
, uint8_t laneMask
);
135 void emitFlow(const Instruction
*);
136 void emitBAR(const Instruction
*);
138 void emitSUCLAMPMode(uint16_t);
139 void emitSUCalc(Instruction
*);
140 void emitSULDGB(const TexInstruction
*);
141 void emitSUSTGx(const TexInstruction
*);
143 void emitSULDB(const TexInstruction
*);
144 void emitSUSTx(const TexInstruction
*);
145 void emitSULEA(const TexInstruction
*);
147 void emitVSHL(const Instruction
*);
148 void emitVectorSubOp(const Instruction
*);
150 void emitPIXLD(const Instruction
*);
152 void emitVOTE(const Instruction
*);
154 inline void defId(const ValueDef
&, const int pos
);
155 inline void defId(const Instruction
*, int d
, const int pos
);
156 inline void srcId(const ValueRef
&, const int pos
);
157 inline void srcId(const ValueRef
*, const int pos
);
158 inline void srcId(const Instruction
*, int s
, const int pos
);
159 inline void srcAddr32(const ValueRef
&, int pos
, int shr
);
161 inline bool isLIMM(const ValueRef
&, DataType ty
);
164 // for better visibility
165 #define HEX64(h, l) 0x##h##l##ULL
167 #define SDATA(a) ((a).rep()->reg.data)
168 #define DDATA(a) ((a).rep()->reg.data)
170 void CodeEmitterNVC0::srcId(const ValueRef
& src
, const int pos
)
172 code
[pos
/ 32] |= (src
.get() ? SDATA(src
).id
: 63) << (pos
% 32);
175 void CodeEmitterNVC0::srcId(const ValueRef
*src
, const int pos
)
177 code
[pos
/ 32] |= (src
? SDATA(*src
).id
: 63) << (pos
% 32);
180 void CodeEmitterNVC0::srcId(const Instruction
*insn
, int s
, int pos
)
182 int r
= insn
->srcExists(s
) ? SDATA(insn
->src(s
)).id
: 63;
183 code
[pos
/ 32] |= r
<< (pos
% 32);
187 CodeEmitterNVC0::srcAddr32(const ValueRef
& src
, int pos
, int shr
)
189 const uint32_t offset
= SDATA(src
).offset
>> shr
;
191 code
[pos
/ 32] |= offset
<< (pos
% 32);
192 if (pos
&& (pos
< 32))
193 code
[1] |= offset
>> (32 - pos
);
196 void CodeEmitterNVC0::defId(const ValueDef
& def
, const int pos
)
198 code
[pos
/ 32] |= (def
.get() ? DDATA(def
).id
: 63) << (pos
% 32);
201 void CodeEmitterNVC0::defId(const Instruction
*insn
, int d
, int pos
)
203 int r
= insn
->defExists(d
) ? DDATA(insn
->def(d
)).id
: 63;
204 code
[pos
/ 32] |= r
<< (pos
% 32);
207 bool CodeEmitterNVC0::isLIMM(const ValueRef
& ref
, DataType ty
)
209 const ImmediateValue
*imm
= ref
.get()->asImm();
211 return imm
&& (imm
->reg
.data
.u32
& ((ty
== TYPE_F32
) ? 0xfff : 0xfff00000));
215 CodeEmitterNVC0::roundMode_A(const Instruction
*insn
)
218 case ROUND_M
: code
[1] |= 1 << 23; break;
219 case ROUND_P
: code
[1] |= 2 << 23; break;
220 case ROUND_Z
: code
[1] |= 3 << 23; break;
222 assert(insn
->rnd
== ROUND_N
);
228 CodeEmitterNVC0::emitNegAbs12(const Instruction
*i
)
230 if (i
->src(1).mod
.abs()) code
[0] |= 1 << 6;
231 if (i
->src(0).mod
.abs()) code
[0] |= 1 << 7;
232 if (i
->src(1).mod
.neg()) code
[0] |= 1 << 8;
233 if (i
->src(0).mod
.neg()) code
[0] |= 1 << 9;
236 void CodeEmitterNVC0::emitCondCode(CondCode cc
, int pos
)
241 case CC_LT
: val
= 0x1; break;
242 case CC_LTU
: val
= 0x9; break;
243 case CC_EQ
: val
= 0x2; break;
244 case CC_EQU
: val
= 0xa; break;
245 case CC_LE
: val
= 0x3; break;
246 case CC_LEU
: val
= 0xb; break;
247 case CC_GT
: val
= 0x4; break;
248 case CC_GTU
: val
= 0xc; break;
249 case CC_NE
: val
= 0x5; break;
250 case CC_NEU
: val
= 0xd; break;
251 case CC_GE
: val
= 0x6; break;
252 case CC_GEU
: val
= 0xe; break;
253 case CC_TR
: val
= 0xf; break;
254 case CC_FL
: val
= 0x0; break;
256 case CC_A
: val
= 0x14; break;
257 case CC_NA
: val
= 0x13; break;
258 case CC_S
: val
= 0x15; break;
259 case CC_NS
: val
= 0x12; break;
260 case CC_C
: val
= 0x16; break;
261 case CC_NC
: val
= 0x11; break;
262 case CC_O
: val
= 0x17; break;
263 case CC_NO
: val
= 0x10; break;
267 assert(!"invalid condition code");
270 code
[pos
/ 32] |= val
<< (pos
% 32);
274 CodeEmitterNVC0::emitPredicate(const Instruction
*i
)
276 if (i
->predSrc
>= 0) {
277 assert(i
->getPredicate()->reg
.file
== FILE_PREDICATE
);
278 srcId(i
->src(i
->predSrc
), 10);
279 if (i
->cc
== CC_NOT_P
)
280 code
[0] |= 0x2000; // negate
287 CodeEmitterNVC0::setAddressByFile(const ValueRef
& src
)
289 switch (src
.getFile()) {
290 case FILE_MEMORY_GLOBAL
:
291 srcAddr32(src
, 26, 0);
293 case FILE_MEMORY_LOCAL
:
294 case FILE_MEMORY_SHARED
:
298 assert(src
.getFile() == FILE_MEMORY_CONST
);
305 CodeEmitterNVC0::setAddress16(const ValueRef
& src
)
307 Symbol
*sym
= src
.get()->asSym();
311 code
[0] |= (sym
->reg
.data
.offset
& 0x003f) << 26;
312 code
[1] |= (sym
->reg
.data
.offset
& 0xffc0) >> 6;
316 CodeEmitterNVC0::setAddress24(const ValueRef
& src
)
318 Symbol
*sym
= src
.get()->asSym();
322 code
[0] |= (sym
->reg
.data
.offset
& 0x00003f) << 26;
323 code
[1] |= (sym
->reg
.data
.offset
& 0xffffc0) >> 6;
327 CodeEmitterNVC0::setImmediate(const Instruction
*i
, const int s
)
329 const ImmediateValue
*imm
= i
->src(s
).get()->asImm();
333 u32
= imm
->reg
.data
.u32
;
335 if ((code
[0] & 0xf) == 0x1) {
337 uint64_t u64
= imm
->reg
.data
.u64
;
338 assert(!(u64
& 0x00000fffffffffffULL
));
339 assert(!(code
[1] & 0xc000));
340 code
[0] |= ((u64
>> 44) & 0x3f) << 26;
341 code
[1] |= 0xc000 | (u64
>> 50);
343 if ((code
[0] & 0xf) == 0x2) {
345 code
[0] |= (u32
& 0x3f) << 26;
348 if ((code
[0] & 0xf) == 0x3 || (code
[0] & 0xf) == 4) {
350 assert((u32
& 0xfff00000) == 0 || (u32
& 0xfff00000) == 0xfff00000);
351 assert(!(code
[1] & 0xc000));
353 code
[0] |= (u32
& 0x3f) << 26;
354 code
[1] |= 0xc000 | (u32
>> 6);
357 assert(!(u32
& 0x00000fff));
358 assert(!(code
[1] & 0xc000));
359 code
[0] |= ((u32
>> 12) & 0x3f) << 26;
360 code
[1] |= 0xc000 | (u32
>> 18);
364 void CodeEmitterNVC0::setImmediateS8(const ValueRef
&ref
)
366 const ImmediateValue
*imm
= ref
.get()->asImm();
368 int8_t s8
= static_cast<int8_t>(imm
->reg
.data
.s32
);
370 assert(s8
== imm
->reg
.data
.s32
);
372 code
[0] |= (s8
& 0x3f) << 26;
373 code
[0] |= (s8
>> 6) << 8;
377 CodeEmitterNVC0::emitForm_A(const Instruction
*i
, uint64_t opc
)
384 defId(i
->def(0), 14);
387 if (i
->srcExists(2) && i
->getSrc(2)->reg
.file
== FILE_MEMORY_CONST
)
390 for (int s
= 0; s
< 3 && i
->srcExists(s
); ++s
) {
391 switch (i
->getSrc(s
)->reg
.file
) {
392 case FILE_MEMORY_CONST
:
393 assert(!(code
[1] & 0xc000));
394 code
[1] |= (s
== 2) ? 0x8000 : 0x4000;
395 code
[1] |= i
->getSrc(s
)->reg
.fileIndex
<< 10;
396 setAddress16(i
->src(s
));
400 i
->op
== OP_MOV
|| i
->op
== OP_PRESIN
|| i
->op
== OP_PREEX2
);
401 assert(!(code
[1] & 0xc000));
405 if ((s
== 2) && ((code
[0] & 0x7) == 2)) // LIMM: 3rd src == dst
407 srcId(i
->src(s
), s
? ((s
== 2) ? 49 : s1
) : 20);
410 if (i
->op
== OP_SELP
) {
411 // OP_SELP is used to implement shared+atomics on Fermi.
412 assert(s
== 2 && i
->src(s
).getFile() == FILE_PREDICATE
);
413 srcId(i
->src(s
), 49);
415 // ignore here, can be predicate or flags, but must not be address
422 CodeEmitterNVC0::emitForm_B(const Instruction
*i
, uint64_t opc
)
429 defId(i
->def(0), 14);
431 switch (i
->src(0).getFile()) {
432 case FILE_MEMORY_CONST
:
433 assert(!(code
[1] & 0xc000));
434 code
[1] |= 0x4000 | (i
->src(0).get()->reg
.fileIndex
<< 10);
435 setAddress16(i
->src(0));
438 assert(!(code
[1] & 0xc000));
442 srcId(i
->src(0), 26);
445 // ignore here, can be predicate or flags, but must not be address
451 CodeEmitterNVC0::emitForm_S(const Instruction
*i
, uint32_t opc
, bool pred
)
456 if (opc
== 0x0d || opc
== 0x0e)
459 defId(i
->def(0), 14);
460 srcId(i
->src(0), 20);
462 assert(pred
|| (i
->predSrc
< 0));
466 for (int s
= 1; s
< 3 && i
->srcExists(s
); ++s
) {
467 if (i
->src(s
).get()->reg
.file
== FILE_MEMORY_CONST
) {
468 assert(!(code
[0] & (0x300 >> ss2a
)));
469 switch (i
->src(s
).get()->reg
.fileIndex
) {
470 case 0: code
[0] |= 0x100 >> ss2a
; break;
471 case 1: code
[0] |= 0x200 >> ss2a
; break;
472 case 16: code
[0] |= 0x300 >> ss2a
; break;
474 ERROR("invalid c[] space for short form\n");
478 code
[0] |= i
->getSrc(s
)->reg
.data
.offset
<< 24;
480 code
[0] |= i
->getSrc(s
)->reg
.data
.offset
<< 6;
482 if (i
->src(s
).getFile() == FILE_IMMEDIATE
) {
484 setImmediateS8(i
->src(s
));
486 if (i
->src(s
).getFile() == FILE_GPR
) {
487 srcId(i
->src(s
), (s
== 1) ? 26 : 8);
493 CodeEmitterNVC0::emitShortSrc2(const ValueRef
&src
)
495 if (src
.getFile() == FILE_MEMORY_CONST
) {
496 switch (src
.get()->reg
.fileIndex
) {
497 case 0: code
[0] |= 0x100; break;
498 case 1: code
[0] |= 0x200; break;
499 case 16: code
[0] |= 0x300; break;
501 assert(!"unsupported file index for short op");
504 srcAddr32(src
, 20, 2);
507 assert(src
.getFile() == FILE_GPR
);
512 CodeEmitterNVC0::emitNOP(const Instruction
*i
)
514 code
[0] = 0x000001e4;
515 code
[1] = 0x40000000;
520 CodeEmitterNVC0::emitFMAD(const Instruction
*i
)
522 bool neg1
= (i
->src(0).mod
^ i
->src(1).mod
).neg();
524 if (i
->encSize
== 8) {
525 if (isLIMM(i
->src(1), TYPE_F32
)) {
526 emitForm_A(i
, HEX64(20000000, 00000002));
528 emitForm_A(i
, HEX64(30000000, 00000000));
530 if (i
->src(2).mod
.neg())
547 assert(!i
->saturate
&& !i
->src(2).mod
.neg());
548 emitForm_S(i
, (i
->src(2).getFile() == FILE_MEMORY_CONST
) ? 0x2e : 0x0e,
556 CodeEmitterNVC0::emitDMAD(const Instruction
*i
)
558 bool neg1
= (i
->src(0).mod
^ i
->src(1).mod
).neg();
560 emitForm_A(i
, HEX64(20000000, 00000001));
562 if (i
->src(2).mod
.neg())
570 assert(!i
->saturate
);
575 CodeEmitterNVC0::emitFMUL(const Instruction
*i
)
577 bool neg
= (i
->src(0).mod
^ i
->src(1).mod
).neg();
579 assert(i
->postFactor
>= -3 && i
->postFactor
<= 3);
581 if (i
->encSize
== 8) {
582 if (isLIMM(i
->src(1), TYPE_F32
)) {
583 assert(i
->postFactor
== 0); // constant folded, hopefully
584 emitForm_A(i
, HEX64(30000000, 00000002));
586 emitForm_A(i
, HEX64(58000000, 00000000));
588 code
[1] |= ((i
->postFactor
> 0) ?
589 (7 - i
->postFactor
) : (0 - i
->postFactor
)) << 17;
592 code
[1] ^= 1 << 25; // aliases with LIMM sign bit
603 assert(!neg
&& !i
->saturate
&& !i
->ftz
&& !i
->postFactor
);
604 emitForm_S(i
, 0xa8, true);
609 CodeEmitterNVC0::emitDMUL(const Instruction
*i
)
611 bool neg
= (i
->src(0).mod
^ i
->src(1).mod
).neg();
613 emitForm_A(i
, HEX64(50000000, 00000001));
619 assert(!i
->saturate
);
622 assert(!i
->postFactor
);
626 CodeEmitterNVC0::emitUMUL(const Instruction
*i
)
628 if (i
->encSize
== 8) {
629 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
630 emitForm_A(i
, HEX64(10000000, 00000002));
632 emitForm_A(i
, HEX64(50000000, 00000003));
634 if (i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
)
636 if (i
->sType
== TYPE_S32
)
638 if (i
->dType
== TYPE_S32
)
641 emitForm_S(i
, i
->src(1).getFile() == FILE_IMMEDIATE
? 0xaa : 0x2a, true);
643 if (i
->sType
== TYPE_S32
)
649 CodeEmitterNVC0::emitFADD(const Instruction
*i
)
651 if (i
->encSize
== 8) {
652 if (isLIMM(i
->src(1), TYPE_F32
)) {
653 assert(!i
->saturate
);
654 emitForm_A(i
, HEX64(28000000, 00000002));
656 code
[0] |= i
->src(0).mod
.abs() << 7;
657 code
[0] |= i
->src(0).mod
.neg() << 9;
659 if (i
->src(1).mod
.abs())
660 code
[1] &= 0xfdffffff;
661 if ((i
->op
== OP_SUB
) != static_cast<bool>(i
->src(1).mod
.neg()))
662 code
[1] ^= 0x02000000;
664 emitForm_A(i
, HEX64(50000000, 00000000));
671 if (i
->op
== OP_SUB
) code
[0] ^= 1 << 8;
676 assert(!i
->saturate
&& i
->op
!= OP_SUB
&&
677 !i
->src(0).mod
.abs() &&
678 !i
->src(1).mod
.neg() && !i
->src(1).mod
.abs());
680 emitForm_S(i
, 0x49, true);
682 if (i
->src(0).mod
.neg())
688 CodeEmitterNVC0::emitDADD(const Instruction
*i
)
690 assert(i
->encSize
== 8);
691 emitForm_A(i
, HEX64(48000000, 00000001));
693 assert(!i
->saturate
);
701 CodeEmitterNVC0::emitUADD(const Instruction
*i
)
705 assert(!i
->src(0).mod
.abs() && !i
->src(1).mod
.abs());
707 if (i
->src(0).mod
.neg())
709 if (i
->src(1).mod
.neg())
714 assert(addOp
!= 0x300); // would be add-plus-one
716 if (i
->encSize
== 8) {
717 if (isLIMM(i
->src(1), TYPE_U32
)) {
718 emitForm_A(i
, HEX64(08000000, 00000002));
720 code
[1] |= 1 << 26; // write carry
722 emitForm_A(i
, HEX64(48000000, 00000003));
724 code
[1] |= 1 << 16; // write carry
730 if (i
->flagsSrc
>= 0) // add carry
733 assert(!(addOp
& 0x100));
734 emitForm_S(i
, (addOp
>> 3) |
735 ((i
->src(1).getFile() == FILE_IMMEDIATE
) ? 0xac : 0x2c), true);
740 CodeEmitterNVC0::emitIMAD(const Instruction
*i
)
743 i
->src(2).mod
.neg() | ((i
->src(0).mod
.neg() ^ i
->src(1).mod
.neg()) << 1);
745 assert(i
->encSize
== 8);
746 emitForm_A(i
, HEX64(20000000, 00000003));
749 code
[0] |= addOp
<< 8;
751 if (isSignedType(i
->dType
))
753 if (isSignedType(i
->sType
))
756 code
[1] |= i
->saturate
<< 24;
758 if (i
->flagsDef
>= 0) code
[1] |= 1 << 16;
759 if (i
->flagsSrc
>= 0) code
[1] |= 1 << 23;
761 if (i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
)
766 CodeEmitterNVC0::emitSHLADD(const Instruction
*i
)
768 uint8_t addOp
= (i
->src(0).mod
.neg() << 1) | i
->src(2).mod
.neg();
769 const ImmediateValue
*imm
= i
->src(1).get()->asImm();
772 code
[0] = 0x00000003;
773 code
[1] = 0x40000000 | addOp
<< 23;
777 defId(i
->def(0), 14);
778 srcId(i
->src(0), 20);
780 if (i
->flagsDef
>= 0)
783 assert(!(imm
->reg
.data
.u32
& 0xffffffe0));
784 code
[0] |= imm
->reg
.data
.u32
<< 5;
786 switch (i
->src(2).getFile()) {
788 srcId(i
->src(2), 26);
790 case FILE_MEMORY_CONST
:
792 code
[1] |= i
->getSrc(2)->reg
.fileIndex
<< 10;
793 setAddress16(i
->src(2));
799 assert(!"bad src2 file");
805 CodeEmitterNVC0::emitMADSP(const Instruction
*i
)
807 assert(targ
->getChipset() >= NVISA_GK104_CHIPSET
);
809 emitForm_A(i
, HEX64(00000000, 00000003));
811 if (i
->subOp
== NV50_IR_SUBOP_MADSP_SD
) {
812 code
[1] |= 0x01800000;
814 code
[0] |= (i
->subOp
& 0x00f) << 7;
815 code
[0] |= (i
->subOp
& 0x0f0) << 1;
816 code
[0] |= (i
->subOp
& 0x100) >> 3;
817 code
[0] |= (i
->subOp
& 0x200) >> 2;
818 code
[1] |= (i
->subOp
& 0xc00) << 13;
821 if (i
->flagsDef
>= 0)
826 CodeEmitterNVC0::emitISAD(const Instruction
*i
)
828 assert(i
->dType
== TYPE_S32
|| i
->dType
== TYPE_U32
);
829 assert(i
->encSize
== 8);
831 emitForm_A(i
, HEX64(38000000, 00000003));
833 if (i
->dType
== TYPE_S32
)
838 CodeEmitterNVC0::emitNOT(Instruction
*i
)
840 assert(i
->encSize
== 8);
841 i
->setSrc(1, i
->src(0));
842 emitForm_A(i
, HEX64(68000000, 000001c3
));
846 CodeEmitterNVC0::emitLogicOp(const Instruction
*i
, uint8_t subOp
)
848 if (i
->def(0).getFile() == FILE_PREDICATE
) {
849 code
[0] = 0x00000004 | (subOp
<< 30);
850 code
[1] = 0x0c000000;
854 defId(i
->def(0), 17);
855 srcId(i
->src(0), 20);
856 if (i
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
)) code
[0] |= 1 << 23;
857 srcId(i
->src(1), 26);
858 if (i
->src(1).mod
== Modifier(NV50_IR_MOD_NOT
)) code
[0] |= 1 << 29;
860 if (i
->defExists(1)) {
861 defId(i
->def(1), 14);
866 if (i
->predSrc
!= 2 && i
->srcExists(2)) {
867 code
[1] |= subOp
<< 21;
868 srcId(i
->src(2), 49);
869 if (i
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
)) code
[1] |= 1 << 20;
871 code
[1] |= 0x000e0000;
874 if (i
->encSize
== 8) {
875 if (isLIMM(i
->src(1), TYPE_U32
)) {
876 emitForm_A(i
, HEX64(38000000, 00000002));
878 if (i
->flagsDef
>= 0)
881 emitForm_A(i
, HEX64(68000000, 00000003));
883 if (i
->flagsDef
>= 0)
886 code
[0] |= subOp
<< 6;
888 if (i
->flagsSrc
>= 0) // carry
891 if (i
->src(0).mod
& Modifier(NV50_IR_MOD_NOT
)) code
[0] |= 1 << 9;
892 if (i
->src(1).mod
& Modifier(NV50_IR_MOD_NOT
)) code
[0] |= 1 << 8;
894 emitForm_S(i
, (subOp
<< 5) |
895 ((i
->src(1).getFile() == FILE_IMMEDIATE
) ? 0x1d : 0x8d), true);
900 CodeEmitterNVC0::emitPOPC(const Instruction
*i
)
902 emitForm_A(i
, HEX64(54000000, 00000004));
904 if (i
->src(0).mod
& Modifier(NV50_IR_MOD_NOT
)) code
[0] |= 1 << 9;
905 if (i
->src(1).mod
& Modifier(NV50_IR_MOD_NOT
)) code
[0] |= 1 << 8;
909 CodeEmitterNVC0::emitINSBF(const Instruction
*i
)
911 emitForm_A(i
, HEX64(28000000, 00000003));
915 CodeEmitterNVC0::emitEXTBF(const Instruction
*i
)
917 emitForm_A(i
, HEX64(70000000, 00000003));
919 if (i
->dType
== TYPE_S32
)
921 if (i
->subOp
== NV50_IR_SUBOP_EXTBF_REV
)
926 CodeEmitterNVC0::emitBFIND(const Instruction
*i
)
928 emitForm_B(i
, HEX64(78000000, 00000003));
930 if (i
->dType
== TYPE_S32
)
932 if (i
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
))
934 if (i
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
)
939 CodeEmitterNVC0::emitPERMT(const Instruction
*i
)
941 emitForm_A(i
, HEX64(24000000, 00000004));
943 code
[0] |= i
->subOp
<< 5;
947 CodeEmitterNVC0::emitShift(const Instruction
*i
)
949 if (i
->op
== OP_SHR
) {
950 emitForm_A(i
, HEX64(58000000, 00000003)
951 | (isSignedType(i
->dType
) ? 0x20 : 0x00));
953 emitForm_A(i
, HEX64(60000000, 00000003));
956 if (i
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
)
961 CodeEmitterNVC0::emitPreOp(const Instruction
*i
)
963 if (i
->encSize
== 8) {
964 emitForm_B(i
, HEX64(60000000, 00000000));
966 if (i
->op
== OP_PREEX2
)
969 if (i
->src(0).mod
.abs()) code
[0] |= 1 << 6;
970 if (i
->src(0).mod
.neg()) code
[0] |= 1 << 8;
972 emitForm_S(i
, i
->op
== OP_PREEX2
? 0x74000008 : 0x70000008, true);
977 CodeEmitterNVC0::emitSFnOp(const Instruction
*i
, uint8_t subOp
)
979 if (i
->encSize
== 8) {
980 code
[0] = 0x00000000 | (subOp
<< 26);
981 code
[1] = 0xc8000000;
985 defId(i
->def(0), 14);
986 srcId(i
->src(0), 20);
988 assert(i
->src(0).getFile() == FILE_GPR
);
990 if (i
->saturate
) code
[0] |= 1 << 5;
992 if (i
->src(0).mod
.abs()) code
[0] |= 1 << 7;
993 if (i
->src(0).mod
.neg()) code
[0] |= 1 << 9;
995 emitForm_S(i
, 0x80000008 | (subOp
<< 26), true);
997 assert(!i
->src(0).mod
.neg());
998 if (i
->src(0).mod
.abs()) code
[0] |= 1 << 30;
1003 CodeEmitterNVC0::emitMINMAX(const Instruction
*i
)
1007 assert(i
->encSize
== 8);
1009 op
= (i
->op
== OP_MIN
) ? 0x080e000000000000ULL
: 0x081e000000000000ULL
;
1014 if (!isFloatType(i
->dType
)) {
1015 op
|= isSignedType(i
->dType
) ? 0x23 : 0x03;
1016 op
|= i
->subOp
<< 6;
1018 if (i
->dType
== TYPE_F64
)
1024 if (i
->flagsDef
>= 0)
1029 CodeEmitterNVC0::roundMode_C(const Instruction
*i
)
1032 case ROUND_M
: code
[1] |= 1 << 17; break;
1033 case ROUND_P
: code
[1] |= 2 << 17; break;
1034 case ROUND_Z
: code
[1] |= 3 << 17; break;
1035 case ROUND_NI
: code
[0] |= 1 << 7; break;
1036 case ROUND_MI
: code
[0] |= 1 << 7; code
[1] |= 1 << 17; break;
1037 case ROUND_PI
: code
[0] |= 1 << 7; code
[1] |= 2 << 17; break;
1038 case ROUND_ZI
: code
[0] |= 1 << 7; code
[1] |= 3 << 17; break;
1039 case ROUND_N
: break;
1041 assert(!"invalid round mode");
1047 CodeEmitterNVC0::roundMode_CS(const Instruction
*i
)
1051 case ROUND_MI
: code
[0] |= 1 << 16; break;
1053 case ROUND_PI
: code
[0] |= 2 << 16; break;
1055 case ROUND_ZI
: code
[0] |= 3 << 16; break;
1062 CodeEmitterNVC0::emitCVT(Instruction
*i
)
1064 const bool f2f
= isFloatType(i
->dType
) && isFloatType(i
->sType
);
1068 case OP_CEIL
: i
->rnd
= f2f
? ROUND_PI
: ROUND_P
; break;
1069 case OP_FLOOR
: i
->rnd
= f2f
? ROUND_MI
: ROUND_M
; break;
1070 case OP_TRUNC
: i
->rnd
= f2f
? ROUND_ZI
: ROUND_Z
; break;
1075 const bool sat
= (i
->op
== OP_SAT
) || i
->saturate
;
1076 const bool abs
= (i
->op
== OP_ABS
) || i
->src(0).mod
.abs();
1077 const bool neg
= (i
->op
== OP_NEG
) || i
->src(0).mod
.neg();
1079 if (i
->op
== OP_NEG
&& i
->dType
== TYPE_U32
)
1084 if (i
->encSize
== 8) {
1085 emitForm_B(i
, HEX64(10000000, 00000004));
1089 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1090 code
[0] |= util_logbase2(typeSizeof(dType
)) << 20;
1091 code
[0] |= util_logbase2(typeSizeof(i
->sType
)) << 23;
1093 // for 8/16 source types, the byte/word is in subOp. word 1 is
1094 // represented as 2.
1095 if (!isFloatType(i
->sType
))
1096 code
[1] |= i
->subOp
<< 0x17;
1098 code
[1] |= i
->subOp
<< 0x18;
1104 if (neg
&& i
->op
!= OP_ABS
)
1110 if (isSignedIntType(dType
))
1112 if (isSignedIntType(i
->sType
))
1115 if (isFloatType(dType
)) {
1116 if (!isFloatType(i
->sType
))
1117 code
[1] |= 0x08000000;
1119 if (isFloatType(i
->sType
))
1120 code
[1] |= 0x04000000;
1122 code
[1] |= 0x0c000000;
1125 if (i
->op
== OP_CEIL
|| i
->op
== OP_FLOOR
|| i
->op
== OP_TRUNC
) {
1128 if (isFloatType(dType
)) {
1129 if (isFloatType(i
->sType
))
1132 code
[0] = 0x088 | (isSignedType(i
->sType
) ? (1 << 8) : 0);
1134 assert(isFloatType(i
->sType
));
1136 code
[0] = 0x288 | (isSignedType(i
->sType
) ? (1 << 8) : 0);
1139 if (neg
) code
[0] |= 1 << 16;
1140 if (sat
) code
[0] |= 1 << 18;
1141 if (abs
) code
[0] |= 1 << 19;
1148 CodeEmitterNVC0::emitSET(const CmpInstruction
*i
)
1153 if (i
->sType
== TYPE_F64
)
1156 if (!isFloatType(i
->sType
))
1159 if (isSignedIntType(i
->sType
))
1161 if (isFloatType(i
->dType
)) {
1162 if (isFloatType(i
->sType
))
1169 case OP_SET_AND
: hi
= 0x10000000; break;
1170 case OP_SET_OR
: hi
= 0x10200000; break;
1171 case OP_SET_XOR
: hi
= 0x10400000; break;
1176 emitForm_A(i
, (static_cast<uint64_t>(hi
) << 32) | lo
);
1178 if (i
->op
!= OP_SET
)
1179 srcId(i
->src(2), 32 + 17);
1181 if (i
->def(0).getFile() == FILE_PREDICATE
) {
1182 if (i
->sType
== TYPE_F32
)
1183 code
[1] += 0x10000000;
1185 code
[1] += 0x08000000;
1187 code
[0] &= ~0xfc000;
1188 defId(i
->def(0), 17);
1189 if (i
->defExists(1))
1190 defId(i
->def(1), 14);
1198 emitCondCode(i
->setCond
, 32 + 23);
1203 CodeEmitterNVC0::emitSLCT(const CmpInstruction
*i
)
1209 op
= HEX64(30000000, 00000023);
1212 op
= HEX64(30000000, 00000003);
1215 op
= HEX64(38000000, 00000000);
1218 assert(!"invalid type for SLCT");
1224 CondCode cc
= i
->setCond
;
1226 if (i
->src(2).mod
.neg())
1227 cc
= reverseCondCode(cc
);
1229 emitCondCode(cc
, 32 + 23);
1236 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
1238 int loc
= entry
->loc
;
1239 if (data
.force_persample_interp
)
1240 code
[loc
+ 1] |= 1 << 20;
1242 code
[loc
+ 1] &= ~(1 << 20);
1245 void CodeEmitterNVC0::emitSELP(const Instruction
*i
)
1247 emitForm_A(i
, HEX64(20000000, 00000004));
1249 if (i
->src(2).mod
& Modifier(NV50_IR_MOD_NOT
))
1252 if (i
->subOp
== 1) {
1253 addInterp(0, 0, selpFlip
);
1257 void CodeEmitterNVC0::emitTEXBAR(const Instruction
*i
)
1259 code
[0] = 0x00000006 | (i
->subOp
<< 26);
1260 code
[1] = 0xf0000000;
1262 emitCondCode(i
->flagsSrc
>= 0 ? i
->cc
: CC_ALWAYS
, 5);
1265 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction
*i
)
1267 code
[0] = 0x00000086;
1268 code
[1] = 0xd0000000;
1270 code
[1] |= i
->tex
.r
;
1271 code
[1] |= i
->tex
.s
<< 8;
1273 if (i
->tex
.liveOnly
)
1276 defId(i
->def(0), 14);
1277 srcId(i
->src(0), 20);
1281 isNextIndependentTex(const TexInstruction
*i
)
1283 if (!i
->next
|| !isTextureOp(i
->next
->op
))
1285 if (i
->getDef(0)->interfers(i
->next
->getSrc(0)))
1287 return !i
->next
->srcExists(1) || !i
->getDef(0)->interfers(i
->next
->getSrc(1));
1291 CodeEmitterNVC0::emitTEX(const TexInstruction
*i
)
1293 code
[0] = 0x00000006;
1295 if (isNextIndependentTex(i
))
1296 code
[0] |= 0x080; // t mode
1298 code
[0] |= 0x100; // p mode
1300 if (i
->tex
.liveOnly
)
1304 case OP_TEX
: code
[1] = 0x80000000; break;
1305 case OP_TXB
: code
[1] = 0x84000000; break;
1306 case OP_TXL
: code
[1] = 0x86000000; break;
1307 case OP_TXF
: code
[1] = 0x90000000; break;
1308 case OP_TXG
: code
[1] = 0xa0000000; break;
1309 case OP_TXLQ
: code
[1] = 0xb0000000; break;
1310 case OP_TXD
: code
[1] = 0xe0000000; break;
1312 assert(!"invalid texture op");
1315 if (i
->op
== OP_TXF
) {
1316 if (!i
->tex
.levelZero
)
1317 code
[1] |= 0x02000000;
1319 if (i
->tex
.levelZero
) {
1320 code
[1] |= 0x02000000;
1323 if (i
->op
!= OP_TXD
&& i
->tex
.derivAll
)
1326 defId(i
->def(0), 14);
1327 srcId(i
->src(0), 20);
1331 if (i
->op
== OP_TXG
) code
[0] |= i
->tex
.gatherComp
<< 5;
1333 code
[1] |= i
->tex
.mask
<< 14;
1335 code
[1] |= i
->tex
.r
;
1336 code
[1] |= i
->tex
.s
<< 8;
1337 if (i
->tex
.rIndirectSrc
>= 0 || i
->tex
.sIndirectSrc
>= 0)
1338 code
[1] |= 1 << 18; // in 1st source (with array index)
1341 code
[1] |= (i
->tex
.target
.getDim() - 1) << 20;
1342 if (i
->tex
.target
.isCube())
1344 if (i
->tex
.target
.isArray())
1346 if (i
->tex
.target
.isShadow())
1349 const int src1
= (i
->predSrc
== 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1351 if (i
->srcExists(src1
) && i
->src(src1
).getFile() == FILE_IMMEDIATE
) {
1353 if (i
->op
== OP_TXL
)
1354 code
[1] &= ~(1 << 26);
1356 if (i
->op
== OP_TXF
)
1357 code
[1] &= ~(1 << 25);
1359 if (i
->tex
.target
== TEX_TARGET_2D_MS
||
1360 i
->tex
.target
== TEX_TARGET_2D_MS_ARRAY
)
1363 if (i
->tex
.useOffsets
== 1)
1365 if (i
->tex
.useOffsets
== 4)
1372 CodeEmitterNVC0::emitTXQ(const TexInstruction
*i
)
1374 code
[0] = 0x00000086;
1375 code
[1] = 0xc0000000;
1377 switch (i
->tex
.query
) {
1378 case TXQ_DIMS
: code
[1] |= 0 << 22; break;
1379 case TXQ_TYPE
: code
[1] |= 1 << 22; break;
1380 case TXQ_SAMPLE_POSITION
: code
[1] |= 2 << 22; break;
1381 case TXQ_FILTER
: code
[1] |= 3 << 22; break;
1382 case TXQ_LOD
: code
[1] |= 4 << 22; break;
1383 case TXQ_BORDER_COLOUR
: code
[1] |= 5 << 22; break;
1385 assert(!"invalid texture query");
1389 code
[1] |= i
->tex
.mask
<< 14;
1391 code
[1] |= i
->tex
.r
;
1392 code
[1] |= i
->tex
.s
<< 8;
1393 if (i
->tex
.sIndirectSrc
>= 0 || i
->tex
.rIndirectSrc
>= 0)
1396 const int src1
= (i
->predSrc
== 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1398 defId(i
->def(0), 14);
1399 srcId(i
->src(0), 20);
1406 CodeEmitterNVC0::emitQUADOP(const Instruction
*i
, uint8_t qOp
, uint8_t laneMask
)
1408 code
[0] = 0x00000200 | (laneMask
<< 6); // dall
1409 code
[1] = 0x48000000 | qOp
;
1411 defId(i
->def(0), 14);
1412 srcId(i
->src(0), 20);
1413 srcId((i
->srcExists(1) && i
->predSrc
!= 1) ? i
->src(1) : i
->src(0), 26);
1419 CodeEmitterNVC0::emitFlow(const Instruction
*i
)
1421 const FlowInstruction
*f
= i
->asFlow();
1423 unsigned mask
; // bit 0: predicate, bit 1: target
1425 code
[0] = 0x00000007;
1429 code
[1] = f
->absolute
? 0x00000000 : 0x40000000;
1430 if (i
->srcExists(0) && i
->src(0).getFile() == FILE_MEMORY_CONST
)
1435 code
[1] = f
->absolute
? 0x10000000 : 0x50000000;
1437 code
[0] |= 0x4000; // indirect calls always use c[] source
1441 case OP_EXIT
: code
[1] = 0x80000000; mask
= 1; break;
1442 case OP_RET
: code
[1] = 0x90000000; mask
= 1; break;
1443 case OP_DISCARD
: code
[1] = 0x98000000; mask
= 1; break;
1444 case OP_BREAK
: code
[1] = 0xa8000000; mask
= 1; break;
1445 case OP_CONT
: code
[1] = 0xb0000000; mask
= 1; break;
1447 case OP_JOINAT
: code
[1] = 0x60000000; mask
= 2; break;
1448 case OP_PREBREAK
: code
[1] = 0x68000000; mask
= 2; break;
1449 case OP_PRECONT
: code
[1] = 0x70000000; mask
= 2; break;
1450 case OP_PRERET
: code
[1] = 0x78000000; mask
= 2; break;
1452 case OP_QUADON
: code
[1] = 0xc0000000; mask
= 0; break;
1453 case OP_QUADPOP
: code
[1] = 0xc8000000; mask
= 0; break;
1454 case OP_BRKPT
: code
[1] = 0xd0000000; mask
= 0; break;
1456 assert(!"invalid flow operation");
1462 if (i
->flagsSrc
< 0)
1475 if (code
[0] & 0x4000) {
1476 assert(i
->srcExists(0) && i
->src(0).getFile() == FILE_MEMORY_CONST
);
1477 setAddress16(i
->src(0));
1478 code
[1] |= i
->getSrc(0)->reg
.fileIndex
<< 10;
1479 if (f
->op
== OP_BRA
)
1480 srcId(f
->src(0).getIndirect(0), 20);
1486 if (f
->op
== OP_CALL
) {
1491 assert(f
->absolute
);
1492 uint32_t pcAbs
= targNVC0
->getBuiltinOffset(f
->target
.builtin
);
1493 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfc000000, 26);
1494 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x03ffffff, -6);
1496 assert(!f
->absolute
);
1497 int32_t pcRel
= f
->target
.fn
->binPos
- (codeSize
+ 8);
1498 code
[0] |= (pcRel
& 0x3f) << 26;
1499 code
[1] |= (pcRel
>> 6) & 0x3ffff;
1503 int32_t pcRel
= f
->target
.bb
->binPos
- (codeSize
+ 8);
1504 if (writeIssueDelays
&& !(f
->target
.bb
->binPos
& 0x3f))
1506 // currently we don't want absolute branches
1507 assert(!f
->absolute
);
1508 code
[0] |= (pcRel
& 0x3f) << 26;
1509 code
[1] |= (pcRel
>> 6) & 0x3ffff;
1514 CodeEmitterNVC0::emitBAR(const Instruction
*i
)
1516 Value
*rDef
= NULL
, *pDef
= NULL
;
1519 case NV50_IR_SUBOP_BAR_ARRIVE
: code
[0] = 0x84; break;
1520 case NV50_IR_SUBOP_BAR_RED_AND
: code
[0] = 0x24; break;
1521 case NV50_IR_SUBOP_BAR_RED_OR
: code
[0] = 0x44; break;
1522 case NV50_IR_SUBOP_BAR_RED_POPC
: code
[0] = 0x04; break;
1525 assert(i
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
1528 code
[1] = 0x50000000;
1530 code
[0] |= 63 << 14;
1536 if (i
->src(0).getFile() == FILE_GPR
) {
1537 srcId(i
->src(0), 20);
1539 ImmediateValue
*imm
= i
->getSrc(0)->asImm();
1541 code
[0] |= imm
->reg
.data
.u32
<< 20;
1546 if (i
->src(1).getFile() == FILE_GPR
) {
1547 srcId(i
->src(1), 26);
1549 ImmediateValue
*imm
= i
->getSrc(1)->asImm();
1551 assert(imm
->reg
.data
.u32
<= 0xfff);
1552 code
[0] |= imm
->reg
.data
.u32
<< 26;
1553 code
[1] |= imm
->reg
.data
.u32
>> 6;
1557 if (i
->srcExists(2) && (i
->predSrc
!= 2)) {
1558 srcId(i
->src(2), 32 + 17);
1559 if (i
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
))
1565 if (i
->defExists(0)) {
1566 if (i
->def(0).getFile() == FILE_GPR
)
1567 rDef
= i
->getDef(0);
1569 pDef
= i
->getDef(0);
1571 if (i
->defExists(1)) {
1572 if (i
->def(1).getFile() == FILE_GPR
)
1573 rDef
= i
->getDef(1);
1575 pDef
= i
->getDef(1);
1579 code
[0] &= ~(63 << 14);
1583 code
[1] &= ~(7 << 21);
1584 defId(pDef
, 32 + 21);
1589 CodeEmitterNVC0::emitAFETCH(const Instruction
*i
)
1591 code
[0] = 0x00000006;
1592 code
[1] = 0x0c000000 | (i
->src(0).get()->reg
.data
.offset
& 0x7ff);
1594 if (i
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
)
1599 defId(i
->def(0), 14);
1600 srcId(i
->src(0).getIndirect(0), 20);
1604 CodeEmitterNVC0::emitPFETCH(const Instruction
*i
)
1606 uint32_t prim
= i
->src(0).get()->reg
.data
.u32
;
1608 code
[0] = 0x00000006 | ((prim
& 0x3f) << 26);
1609 code
[1] = 0x00000000 | (prim
>> 6);
1613 const int src1
= (i
->predSrc
== 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1615 defId(i
->def(0), 14);
1620 CodeEmitterNVC0::emitVFETCH(const Instruction
*i
)
1622 code
[0] = 0x00000006;
1623 code
[1] = 0x06000000 | i
->src(0).get()->reg
.data
.offset
;
1627 if (i
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
)
1628 code
[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1632 code
[0] |= ((i
->getDef(0)->reg
.size
/ 4) - 1) << 5;
1634 defId(i
->def(0), 14);
1635 srcId(i
->src(0).getIndirect(0), 20);
1636 srcId(i
->src(0).getIndirect(1), 26); // vertex address
1640 CodeEmitterNVC0::emitEXPORT(const Instruction
*i
)
1642 unsigned int size
= typeSizeof(i
->dType
);
1644 code
[0] = 0x00000006 | ((size
/ 4 - 1) << 5);
1645 code
[1] = 0x0a000000 | i
->src(0).get()->reg
.data
.offset
;
1647 assert(!(code
[1] & ((size
== 12) ? 15 : (size
- 1))));
1654 assert(i
->src(1).getFile() == FILE_GPR
);
1656 srcId(i
->src(0).getIndirect(0), 20);
1657 srcId(i
->src(0).getIndirect(1), 32 + 17); // vertex base address
1658 srcId(i
->src(1), 26);
1662 CodeEmitterNVC0::emitOUT(const Instruction
*i
)
1664 code
[0] = 0x00000006;
1665 code
[1] = 0x1c000000;
1669 defId(i
->def(0), 14); // new secret address
1670 srcId(i
->src(0), 20); // old secret address, should be 0 initially
1672 assert(i
->src(0).getFile() == FILE_GPR
);
1674 if (i
->op
== OP_EMIT
)
1676 if (i
->op
== OP_RESTART
|| i
->subOp
== NV50_IR_SUBOP_EMIT_RESTART
)
1680 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
1681 unsigned int stream
= SDATA(i
->src(1)).u32
;
1685 code
[0] |= stream
<< 26;
1690 srcId(i
->src(1), 26);
1695 CodeEmitterNVC0::emitInterpMode(const Instruction
*i
)
1697 if (i
->encSize
== 8) {
1698 code
[0] |= i
->ipa
<< 6; // TODO: INTERP_SAMPLEID
1700 if (i
->getInterpMode() == NV50_IR_INTERP_SC
)
1702 assert(i
->op
== OP_PINTERP
&& i
->getSampleMode() == 0);
1707 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
1709 int ipa
= entry
->ipa
;
1710 int reg
= entry
->reg
;
1711 int loc
= entry
->loc
;
1713 if (data
.flatshade
&&
1714 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
1715 ipa
= NV50_IR_INTERP_FLAT
;
1717 } else if (data
.force_persample_interp
&&
1718 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
1719 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
1720 ipa
|= NV50_IR_INTERP_CENTROID
;
1722 code
[loc
+ 0] &= ~(0xf << 6);
1723 code
[loc
+ 0] |= ipa
<< 6;
1724 code
[loc
+ 0] &= ~(0x3f << 26);
1725 code
[loc
+ 0] |= reg
<< 26;
1729 CodeEmitterNVC0::emitINTERP(const Instruction
*i
)
1731 const uint32_t base
= i
->getSrc(0)->reg
.data
.offset
;
1733 if (i
->encSize
== 8) {
1734 code
[0] = 0x00000000;
1735 code
[1] = 0xc0000000 | (base
& 0xffff);
1740 if (i
->op
== OP_PINTERP
) {
1741 srcId(i
->src(1), 26);
1742 addInterp(i
->ipa
, SDATA(i
->src(1)).id
, interpApply
);
1744 code
[0] |= 0x3f << 26;
1745 addInterp(i
->ipa
, 0x3f, interpApply
);
1748 srcId(i
->src(0).getIndirect(0), 20);
1750 assert(i
->op
== OP_PINTERP
);
1751 code
[0] = 0x00000009 | ((base
& 0xc) << 6) | ((base
>> 4) << 26);
1752 srcId(i
->src(1), 20);
1757 defId(i
->def(0), 14);
1759 if (i
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
1760 srcId(i
->src(i
->op
== OP_PINTERP
? 2 : 1), 32 + 17);
1762 code
[1] |= 0x3f << 17;
1766 CodeEmitterNVC0::emitLoadStoreType(DataType ty
)
1799 assert(!"invalid type");
1806 CodeEmitterNVC0::emitCachingMode(CacheMode c
)
1827 assert(!"invalid caching mode");
1834 uses64bitAddress(const Instruction
*ldst
)
1836 return ldst
->src(0).getFile() == FILE_MEMORY_GLOBAL
&&
1837 ldst
->src(0).isIndirect(0) &&
1838 ldst
->getIndirect(0, 0)->reg
.size
== 8;
1842 CodeEmitterNVC0::emitSTORE(const Instruction
*i
)
1846 switch (i
->src(0).getFile()) {
1847 case FILE_MEMORY_GLOBAL
: opc
= 0x90000000; break;
1848 case FILE_MEMORY_LOCAL
: opc
= 0xc8000000; break;
1849 case FILE_MEMORY_SHARED
:
1850 if (i
->subOp
== NV50_IR_SUBOP_STORE_UNLOCKED
) {
1851 if (targ
->getChipset() >= NVISA_GK104_CHIPSET
)
1860 assert(!"invalid memory file");
1864 code
[0] = 0x00000005;
1867 if (targ
->getChipset() >= NVISA_GK104_CHIPSET
) {
1868 // Unlocked store on shared memory can fail.
1869 if (i
->src(0).getFile() == FILE_MEMORY_SHARED
&&
1870 i
->subOp
== NV50_IR_SUBOP_STORE_UNLOCKED
) {
1871 assert(i
->defExists(0));
1872 defId(i
->def(0), 8);
1876 setAddressByFile(i
->src(0));
1877 srcId(i
->src(1), 14);
1878 srcId(i
->src(0).getIndirect(0), 20);
1879 if (uses64bitAddress(i
))
1884 emitLoadStoreType(i
->dType
);
1885 emitCachingMode(i
->cache
);
1889 CodeEmitterNVC0::emitLOAD(const Instruction
*i
)
1893 code
[0] = 0x00000005;
1895 switch (i
->src(0).getFile()) {
1896 case FILE_MEMORY_GLOBAL
: opc
= 0x80000000; break;
1897 case FILE_MEMORY_LOCAL
: opc
= 0xc0000000; break;
1898 case FILE_MEMORY_SHARED
:
1899 if (i
->subOp
== NV50_IR_SUBOP_LOAD_LOCKED
) {
1900 if (targ
->getChipset() >= NVISA_GK104_CHIPSET
)
1908 case FILE_MEMORY_CONST
:
1909 if (!i
->src(0).isIndirect(0) && typeSizeof(i
->dType
) == 4) {
1910 emitMOV(i
); // not sure if this is any better
1913 opc
= 0x14000000 | (i
->src(0).get()->reg
.fileIndex
<< 10);
1914 code
[0] = 0x00000006 | (i
->subOp
<< 8);
1917 assert(!"invalid memory file");
1924 if (i
->src(0).getFile() == FILE_MEMORY_SHARED
) {
1925 if (i
->subOp
== NV50_IR_SUBOP_LOAD_LOCKED
) {
1926 if (i
->def(0).getFile() == FILE_PREDICATE
) { // p, #
1929 } else if (i
->defExists(1)) { // r, p
1932 assert(!"Expected predicate dest for load locked");
1938 defId(i
->def(r
), 14);
1940 code
[0] |= 63 << 14;
1943 if (targ
->getChipset() >= NVISA_GK104_CHIPSET
)
1944 defId(i
->def(p
), 8);
1946 defId(i
->def(p
), 32 + 18);
1949 setAddressByFile(i
->src(0));
1950 srcId(i
->src(0).getIndirect(0), 20);
1951 if (uses64bitAddress(i
))
1956 emitLoadStoreType(i
->dType
);
1957 emitCachingMode(i
->cache
);
1961 CodeEmitterNVC0::getSRegEncoding(const ValueRef
& ref
)
1963 switch (SDATA(ref
).sv
.sv
) {
1964 case SV_LANEID
: return 0x00;
1965 case SV_PHYSID
: return 0x03;
1966 case SV_VERTEX_COUNT
: return 0x10;
1967 case SV_INVOCATION_ID
: return 0x11;
1968 case SV_YDIR
: return 0x12;
1969 case SV_THREAD_KILL
: return 0x13;
1970 case SV_TID
: return 0x21 + SDATA(ref
).sv
.index
;
1971 case SV_CTAID
: return 0x25 + SDATA(ref
).sv
.index
;
1972 case SV_NTID
: return 0x29 + SDATA(ref
).sv
.index
;
1973 case SV_GRIDID
: return 0x2c;
1974 case SV_NCTAID
: return 0x2d + SDATA(ref
).sv
.index
;
1975 case SV_LBASE
: return 0x34;
1976 case SV_SBASE
: return 0x30;
1977 case SV_CLOCK
: return 0x50 + SDATA(ref
).sv
.index
;
1979 assert(!"no sreg for system value");
1985 CodeEmitterNVC0::emitMOV(const Instruction
*i
)
1987 if (i
->def(0).getFile() == FILE_PREDICATE
) {
1988 if (i
->src(0).getFile() == FILE_GPR
) {
1989 code
[0] = 0xfc01c003;
1990 code
[1] = 0x1a8e0000;
1991 srcId(i
->src(0), 20);
1993 code
[0] = 0x0001c004;
1994 code
[1] = 0x0c0e0000;
1995 if (i
->src(0).getFile() == FILE_IMMEDIATE
) {
1997 if (!i
->getSrc(0)->reg
.data
.u32
)
2000 srcId(i
->src(0), 20);
2003 defId(i
->def(0), 17);
2006 if (i
->src(0).getFile() == FILE_SYSTEM_VALUE
) {
2007 uint8_t sr
= getSRegEncoding(i
->src(0));
2009 if (i
->encSize
== 8) {
2010 code
[0] = 0x00000004 | (sr
<< 26);
2011 code
[1] = 0x2c000000;
2013 code
[0] = 0x40000008 | (sr
<< 20);
2015 defId(i
->def(0), 14);
2019 if (i
->encSize
== 8) {
2022 if (i
->src(0).getFile() == FILE_IMMEDIATE
)
2023 opc
= HEX64(18000000, 000001e2
);
2025 if (i
->src(0).getFile() == FILE_PREDICATE
)
2026 opc
= HEX64(080e0000
, 1c000004
);
2028 opc
= HEX64(28000000, 00000004);
2030 if (i
->src(0).getFile() != FILE_PREDICATE
)
2031 opc
|= i
->lanes
<< 5;
2035 // Explicitly emit the predicate source as emitForm_B skips it.
2036 if (i
->src(0).getFile() == FILE_PREDICATE
)
2037 srcId(i
->src(0), 20);
2041 if (i
->src(0).getFile() == FILE_IMMEDIATE
) {
2042 imm
= SDATA(i
->src(0)).u32
;
2043 if (imm
& 0xfff00000) {
2044 assert(!(imm
& 0x000fffff));
2045 code
[0] = 0x00000318 | imm
;
2047 assert(imm
< 0x800 || ((int32_t)imm
>= -0x800));
2048 code
[0] = 0x00000118 | (imm
<< 20);
2052 emitShortSrc2(i
->src(0));
2054 defId(i
->def(0), 14);
2061 CodeEmitterNVC0::emitATOM(const Instruction
*i
)
2063 const bool hasDst
= i
->defExists(0);
2064 const bool casOrExch
=
2065 i
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
||
2066 i
->subOp
== NV50_IR_SUBOP_ATOM_CAS
;
2068 if (i
->dType
== TYPE_U64
) {
2070 case NV50_IR_SUBOP_ATOM_ADD
:
2073 code
[1] = 0x507e0000;
2075 code
[1] = 0x10000000;
2077 case NV50_IR_SUBOP_ATOM_EXCH
:
2079 code
[1] = 0x507e0000;
2081 case NV50_IR_SUBOP_ATOM_CAS
:
2083 code
[1] = 0x50000000;
2086 assert(!"invalid u64 red op");
2090 if (i
->dType
== TYPE_U32
) {
2092 case NV50_IR_SUBOP_ATOM_EXCH
:
2094 code
[1] = 0x507e0000;
2096 case NV50_IR_SUBOP_ATOM_CAS
:
2098 code
[1] = 0x50000000;
2101 code
[0] = 0x5 | (i
->subOp
<< 5);
2103 code
[1] = 0x507e0000;
2105 code
[1] = 0x10000000;
2109 if (i
->dType
== TYPE_S32
) {
2110 assert(i
->subOp
<= 2);
2111 code
[0] = 0x205 | (i
->subOp
<< 5);
2113 code
[1] = 0x587e0000;
2115 code
[1] = 0x18000000;
2117 if (i
->dType
== TYPE_F32
) {
2118 assert(i
->subOp
== NV50_IR_SUBOP_ATOM_ADD
);
2121 code
[1] = 0x687e0000;
2123 code
[1] = 0x28000000;
2128 srcId(i
->src(1), 14);
2131 defId(i
->def(0), 32 + 11);
2134 code
[1] |= 63 << 11;
2136 if (hasDst
|| casOrExch
) {
2137 const int32_t offset
= SDATA(i
->src(0)).offset
;
2138 assert(offset
< 0x80000 && offset
>= -0x80000);
2139 code
[0] |= offset
<< 26;
2140 code
[1] |= (offset
& 0x1ffc0) >> 6;
2141 code
[1] |= (offset
& 0xe0000) << 6;
2143 srcAddr32(i
->src(0), 26, 0);
2145 if (i
->getIndirect(0, 0)) {
2146 srcId(i
->getIndirect(0, 0), 20);
2147 if (i
->getIndirect(0, 0)->reg
.size
== 8)
2150 code
[0] |= 63 << 20;
2153 if (i
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2154 assert(i
->src(1).getSize() == 2 * typeSizeof(i
->sType
));
2155 code
[1] |= (SDATA(i
->src(1)).id
+ 1) << 17;
2160 CodeEmitterNVC0::emitMEMBAR(const Instruction
*i
)
2162 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i
->subOp
)) {
2163 case NV50_IR_SUBOP_MEMBAR_CTA
: code
[0] = 0x05; break;
2164 case NV50_IR_SUBOP_MEMBAR_GL
: code
[0] = 0x25; break;
2167 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i
->subOp
) == NV50_IR_SUBOP_MEMBAR_SYS
);
2170 code
[1] = 0xe0000000;
2176 CodeEmitterNVC0::emitCCTL(const Instruction
*i
)
2178 code
[0] = 0x00000005 | (i
->subOp
<< 5);
2180 if (i
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2181 code
[1] = 0x98000000;
2182 srcAddr32(i
->src(0), 28, 2);
2184 code
[1] = 0xd0000000;
2185 setAddress24(i
->src(0));
2187 if (uses64bitAddress(i
))
2189 srcId(i
->src(0).getIndirect(0), 20);
2197 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp
)
2200 switch (subOp
& ~NV50_IR_SUBOP_SUCLAMP_2D
) {
2201 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m
= 0; break;
2202 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m
= 1; break;
2203 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m
= 2; break;
2204 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m
= 3; break;
2205 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m
= 4; break;
2206 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m
= 5; break;
2207 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m
= 6; break;
2208 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m
= 7; break;
2209 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m
= 8; break;
2210 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m
= 9; break;
2211 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m
= 10; break;
2212 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m
= 11; break;
2213 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m
= 12; break;
2214 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m
= 13; break;
2215 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m
= 14; break;
2220 if (subOp
& NV50_IR_SUBOP_SUCLAMP_2D
)
2225 CodeEmitterNVC0::emitSUCalc(Instruction
*i
)
2227 ImmediateValue
*imm
= NULL
;
2230 if (i
->srcExists(2)) {
2231 imm
= i
->getSrc(2)->asImm();
2233 i
->setSrc(2, NULL
); // special case, make emitForm_A not assert
2237 case OP_SUCLAMP
: opc
= HEX64(58000000, 00000004); break;
2238 case OP_SUBFM
: opc
= HEX64(5c000000
, 00000004); break;
2239 case OP_SUEAU
: opc
= HEX64(60000000, 00000004); break;
2246 if (i
->op
== OP_SUCLAMP
) {
2247 if (i
->dType
== TYPE_S32
)
2249 emitSUCLAMPMode(i
->subOp
);
2252 if (i
->op
== OP_SUBFM
&& i
->subOp
== NV50_IR_SUBOP_SUBFM_3D
)
2255 if (i
->op
!= OP_SUEAU
) {
2256 if (i
->def(0).getFile() == FILE_PREDICATE
) { // p, #
2257 code
[0] |= 63 << 14;
2258 code
[1] |= i
->getDef(0)->reg
.data
.id
<< 23;
2260 if (i
->defExists(1)) { // r, p
2261 assert(i
->def(1).getFile() == FILE_PREDICATE
);
2262 code
[1] |= i
->getDef(1)->reg
.data
.id
<< 23;
2268 assert(i
->op
== OP_SUCLAMP
);
2270 code
[1] |= (imm
->reg
.data
.u32
& 0x3f) << 17; // sint6
2275 CodeEmitterNVC0::emitSUGType(DataType ty
)
2278 case TYPE_S32
: code
[1] |= 1 << 13; break;
2279 case TYPE_U8
: code
[1] |= 2 << 13; break;
2280 case TYPE_S8
: code
[1] |= 3 << 13; break;
2282 assert(ty
== TYPE_U32
);
2288 CodeEmitterNVC0::setSUConst16(const Instruction
*i
, const int s
)
2290 const uint32_t offset
= i
->getSrc(s
)->reg
.data
.offset
;
2292 assert(i
->src(s
).getFile() == FILE_MEMORY_CONST
);
2293 assert(offset
== (offset
& 0xfffc));
2296 code
[0] |= offset
<< 24;
2297 code
[1] |= offset
>> 8;
2298 code
[1] |= i
->getSrc(s
)->reg
.fileIndex
<< 8;
2302 CodeEmitterNVC0::setSUPred(const Instruction
*i
, const int s
)
2304 if (!i
->srcExists(s
) || (i
->predSrc
== s
)) {
2305 code
[1] |= 0x7 << 17;
2307 if (i
->src(s
).mod
== Modifier(NV50_IR_MOD_NOT
))
2309 srcId(i
->src(s
), 32 + 17);
2314 CodeEmitterNVC0::emitSULDGB(const TexInstruction
*i
)
2317 code
[1] = 0xd4000000 | (i
->subOp
<< 15);
2319 emitLoadStoreType(i
->dType
);
2320 emitSUGType(i
->sType
);
2321 emitCachingMode(i
->cache
);
2324 defId(i
->def(0), 14); // destination
2325 srcId(i
->src(0), 20); // address
2327 if (i
->src(1).getFile() == FILE_GPR
)
2328 srcId(i
->src(1), 26);
2335 CodeEmitterNVC0::emitSUSTGx(const TexInstruction
*i
)
2338 code
[1] = 0xdc000000 | (i
->subOp
<< 15);
2340 if (i
->op
== OP_SUSTP
)
2341 code
[1] |= i
->tex
.mask
<< 22;
2343 emitLoadStoreType(i
->dType
);
2344 emitSUGType(i
->sType
);
2345 emitCachingMode(i
->cache
);
2348 srcId(i
->src(0), 20); // address
2350 if (i
->src(1).getFile() == FILE_GPR
)
2351 srcId(i
->src(1), 26);
2354 srcId(i
->src(3), 14); // values
2359 CodeEmitterNVC0::emitSUAddr(const TexInstruction
*i
)
2361 assert(targ
->getChipset() < NVISA_GK104_CHIPSET
);
2363 if (i
->tex
.rIndirectSrc
< 0) {
2364 code
[1] |= 0x00004000;
2365 code
[0] |= i
->tex
.r
<< 26;
2367 srcId(i
, i
->tex
.rIndirectSrc
, 26);
2372 CodeEmitterNVC0::emitSUDim(const TexInstruction
*i
)
2374 assert(targ
->getChipset() < NVISA_GK104_CHIPSET
);
2376 code
[1] |= (i
->tex
.target
.getDim() - 1) << 12;
2377 if (i
->tex
.target
.isArray() || i
->tex
.target
.isCube() ||
2378 i
->tex
.target
.getDim() == 3) {
2379 // use e2d mode for 3-dim images, arrays and cubes.
2383 srcId(i
->src(0), 20);
2387 CodeEmitterNVC0::emitSULEA(const TexInstruction
*i
)
2389 assert(targ
->getChipset() < NVISA_GK104_CHIPSET
);
2392 code
[1] = 0xf0000000;
2395 emitLoadStoreType(i
->sType
);
2397 defId(i
->def(0), 14);
2399 if (i
->defExists(1)) {
2400 defId(i
->def(1), 32 + 22);
2410 CodeEmitterNVC0::emitSULDB(const TexInstruction
*i
)
2412 assert(targ
->getChipset() < NVISA_GK104_CHIPSET
);
2415 code
[1] = 0xd4000000 | (i
->subOp
<< 15);
2418 emitLoadStoreType(i
->dType
);
2420 defId(i
->def(0), 14);
2422 emitCachingMode(i
->cache
);
2428 CodeEmitterNVC0::emitSUSTx(const TexInstruction
*i
)
2430 assert(targ
->getChipset() < NVISA_GK104_CHIPSET
);
2433 code
[1] = 0xdc000000 | (i
->subOp
<< 15);
2435 if (i
->op
== OP_SUSTP
)
2436 code
[1] |= i
->tex
.mask
<< 17;
2438 emitLoadStoreType(i
->dType
);
2442 srcId(i
->src(1), 14);
2444 emitCachingMode(i
->cache
);
2450 CodeEmitterNVC0::emitVectorSubOp(const Instruction
*i
)
2452 switch (NV50_IR_SUBOP_Vn(i
->subOp
)) {
2454 code
[1] |= (i
->subOp
& 0x000f) << 12; // vsrc1
2455 code
[1] |= (i
->subOp
& 0x00e0) >> 5; // vsrc2
2456 code
[1] |= (i
->subOp
& 0x0100) << 7; // vsrc2
2457 code
[1] |= (i
->subOp
& 0x3c00) << 13; // vdst
2460 code
[1] |= (i
->subOp
& 0x000f) << 8; // v2src1
2461 code
[1] |= (i
->subOp
& 0x0010) << 11; // v2src1
2462 code
[1] |= (i
->subOp
& 0x01e0) >> 1; // v2src2
2463 code
[1] |= (i
->subOp
& 0x0200) << 6; // v2src2
2464 code
[1] |= (i
->subOp
& 0x3c00) << 2; // v4dst
2465 code
[1] |= (i
->mask
& 0x3) << 2;
2468 code
[1] |= (i
->subOp
& 0x000f) << 8; // v4src1
2469 code
[1] |= (i
->subOp
& 0x01e0) >> 1; // v4src2
2470 code
[1] |= (i
->subOp
& 0x3c00) << 2; // v4dst
2471 code
[1] |= (i
->mask
& 0x3) << 2;
2472 code
[1] |= (i
->mask
& 0xc) << 21;
2481 CodeEmitterNVC0::emitVSHL(const Instruction
*i
)
2485 switch (NV50_IR_SUBOP_Vn(i
->subOp
)) {
2486 case 0: opc
|= 0xe8ULL
<< 56; break;
2487 case 1: opc
|= 0xb4ULL
<< 56; break;
2488 case 2: opc
|= 0x94ULL
<< 56; break;
2493 if (NV50_IR_SUBOP_Vn(i
->subOp
) == 1) {
2494 if (isSignedType(i
->dType
)) opc
|= 1ULL << 0x2a;
2495 if (isSignedType(i
->sType
)) opc
|= (1 << 6) | (1 << 5);
2497 if (isSignedType(i
->dType
)) opc
|= 1ULL << 0x39;
2498 if (isSignedType(i
->sType
)) opc
|= 1 << 6;
2505 if (i
->flagsDef
>= 0)
2510 CodeEmitterNVC0::emitPIXLD(const Instruction
*i
)
2512 assert(i
->encSize
== 8);
2513 emitForm_A(i
, HEX64(10000000, 00000006));
2514 code
[0] |= i
->subOp
<< 5;
2515 code
[1] |= 0x00e00000;
2519 CodeEmitterNVC0::emitVOTE(const Instruction
*i
)
2521 assert(i
->src(0).getFile() == FILE_PREDICATE
);
2523 code
[0] = 0x00000004 | (i
->subOp
<< 5);
2524 code
[1] = 0x48000000;
2529 for (int d
= 0; i
->defExists(d
); d
++) {
2530 if (i
->def(d
).getFile() == FILE_PREDICATE
) {
2533 defId(i
->def(d
), 32 + 22);
2534 } else if (i
->def(d
).getFile() == FILE_GPR
) {
2537 defId(i
->def(d
), 14);
2539 assert(!"Unhandled def");
2543 code
[0] |= 63 << 14;
2546 if (i
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
))
2548 srcId(i
->src(0), 20);
2552 CodeEmitterNVC0::emitInstruction(Instruction
*insn
)
2554 unsigned int size
= insn
->encSize
;
2556 if (writeIssueDelays
&& !(codeSize
& 0x3f))
2559 if (!insn
->encSize
) {
2560 ERROR("skipping unencodable instruction: "); insn
->print();
2563 if (codeSize
+ size
> codeSizeLimit
) {
2564 ERROR("code emitter output buffer too small\n");
2568 if (writeIssueDelays
) {
2569 if (!(codeSize
& 0x3f)) {
2570 code
[0] = 0x00000007; // cf issue delay "instruction"
2571 code
[1] = 0x20000000;
2575 const unsigned int id
= (codeSize
& 0x3f) / 8 - 1;
2576 uint32_t *data
= code
- (id
* 2 + 2);
2578 data
[0] |= insn
->sched
<< (id
* 8 + 4);
2581 data
[0] |= insn
->sched
<< 28;
2582 data
[1] |= insn
->sched
>> 4;
2584 data
[1] |= insn
->sched
<< ((id
- 4) * 8 + 4);
2588 // assert that instructions with multiple defs don't corrupt registers
2589 for (int d
= 0; insn
->defExists(d
); ++d
)
2590 assert(insn
->asTex() || insn
->def(d
).rep()->reg
.data
.id
>= 0);
2627 if (insn
->dType
== TYPE_F64
)
2629 else if (isFloatType(insn
->dType
))
2635 if (insn
->dType
== TYPE_F64
)
2637 else if (isFloatType(insn
->dType
))
2644 if (insn
->dType
== TYPE_F64
)
2646 else if (isFloatType(insn
->dType
))
2661 emitLogicOp(insn
, 0);
2664 emitLogicOp(insn
, 1);
2667 emitLogicOp(insn
, 2);
2677 emitSET(insn
->asCmp());
2683 emitSLCT(insn
->asCmp());
2698 if (insn
->def(0).getFile() == FILE_PREDICATE
||
2699 insn
->src(0).getFile() == FILE_PREDICATE
)
2705 emitSFnOp(insn
, 5 + 2 * insn
->subOp
);
2708 emitSFnOp(insn
, 4 + 2 * insn
->subOp
);
2733 emitTEX(insn
->asTex());
2736 emitTXQ(insn
->asTex());
2750 if (targ
->getChipset() >= NVISA_GK104_CHIPSET
)
2751 emitSULDGB(insn
->asTex());
2753 emitSULDB(insn
->asTex());
2757 if (targ
->getChipset() >= NVISA_GK104_CHIPSET
)
2758 emitSUSTGx(insn
->asTex());
2760 emitSUSTx(insn
->asTex());
2763 emitSULEA(insn
->asTex());
2785 emitQUADOP(insn
, insn
->subOp
, insn
->lanes
);
2788 emitQUADOP(insn
, insn
->src(0).mod
.neg() ? 0x66 : 0x99, 0x4);
2791 emitQUADOP(insn
, insn
->src(0).mod
.neg() ? 0x5a : 0xa5, 0x5);
2833 ERROR("operation should have been eliminated");
2839 ERROR("operation should have been lowered\n");
2842 ERROR("unknown op: %u\n", insn
->op
);
2848 assert(insn
->encSize
== 8);
2851 code
+= insn
->encSize
/ 4;
2852 codeSize
+= insn
->encSize
;
2857 CodeEmitterNVC0::getMinEncodingSize(const Instruction
*i
) const
2859 const Target::OpInfo
&info
= targ
->getOpInfo(i
);
2861 if (writeIssueDelays
|| info
.minEncSize
== 8 || 1)
2864 if (i
->ftz
|| i
->saturate
|| i
->join
)
2866 if (i
->rnd
!= ROUND_N
)
2868 if (i
->predSrc
>= 0 && i
->op
== OP_MAD
)
2871 if (i
->op
== OP_PINTERP
) {
2872 if (i
->getSampleMode() || 1) // XXX: grr, short op doesn't work
2875 if (i
->op
== OP_MOV
&& i
->lanes
!= 0xf) {
2879 for (int s
= 0; i
->srcExists(s
); ++s
) {
2880 if (i
->src(s
).isIndirect(0))
2883 if (i
->src(s
).getFile() == FILE_MEMORY_CONST
) {
2884 if (SDATA(i
->src(s
)).offset
>= 0x100)
2886 if (i
->getSrc(s
)->reg
.fileIndex
> 1 &&
2887 i
->getSrc(s
)->reg
.fileIndex
!= 16)
2890 if (i
->src(s
).getFile() == FILE_IMMEDIATE
) {
2891 if (i
->dType
== TYPE_F32
) {
2892 if (SDATA(i
->src(s
)).u32
>= 0x100)
2895 if (SDATA(i
->src(s
)).u32
> 0xff)
2900 if (i
->op
== OP_CVT
)
2902 if (i
->src(s
).mod
!= Modifier(0)) {
2903 if (i
->src(s
).mod
== Modifier(NV50_IR_MOD_ABS
))
2904 if (i
->op
!= OP_RSQ
)
2906 if (i
->src(s
).mod
== Modifier(NV50_IR_MOD_NEG
))
2907 if (i
->op
!= OP_ADD
|| s
!= 0)
2915 // Simplified, erring on safe side.
2916 class SchedDataCalculator
: public Pass
2919 SchedDataCalculator(const Target
*targ
) : targ(targ
) { }
2925 int st
[DATA_FILE_COUNT
]; // LD to LD delay 3
2926 int ld
[DATA_FILE_COUNT
]; // ST to ST delay 3
2927 int tex
; // TEX to non-TEX delay 17 (0x11)
2928 int sfu
; // SFU to SFU delay 3 (except PRE-ops)
2929 int imul
; // integer MUL to MUL delay 3
2939 void rebase(const int base
)
2941 const int delta
= this->base
- base
;
2946 for (int i
= 0; i
< regs
; ++i
) {
2950 for (int i
= 0; i
< 8; ++i
) {
2957 for (unsigned int f
= 0; f
< DATA_FILE_COUNT
; ++f
) {
2967 memset(&rd
, 0, sizeof(rd
));
2968 memset(&wr
, 0, sizeof(wr
));
2969 memset(&res
, 0, sizeof(res
));
2972 int getLatest(const ScoreData
& d
) const
2975 for (int i
= 0; i
< regs
; ++i
)
2978 for (int i
= 0; i
< 8; ++i
)
2985 inline int getLatestRd() const
2987 return getLatest(rd
);
2989 inline int getLatestWr() const
2991 return getLatest(wr
);
2993 inline int getLatest() const
2995 const int a
= getLatestRd();
2996 const int b
= getLatestWr();
2998 int max
= MAX2(a
, b
);
2999 for (unsigned int f
= 0; f
< DATA_FILE_COUNT
; ++f
) {
3000 max
= MAX2(res
.ld
[f
], max
);
3001 max
= MAX2(res
.st
[f
], max
);
3003 max
= MAX2(res
.sfu
, max
);
3004 max
= MAX2(res
.imul
, max
);
3005 max
= MAX2(res
.tex
, max
);
3008 void setMax(const RegScores
*that
)
3010 for (int i
= 0; i
< regs
; ++i
) {
3011 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3012 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3014 for (int i
= 0; i
< 8; ++i
) {
3015 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3016 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3018 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3019 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3021 for (unsigned int f
= 0; f
< DATA_FILE_COUNT
; ++f
) {
3022 res
.ld
[f
] = MAX2(res
.ld
[f
], that
->res
.ld
[f
]);
3023 res
.st
[f
] = MAX2(res
.st
[f
], that
->res
.st
[f
]);
3025 res
.sfu
= MAX2(res
.sfu
, that
->res
.sfu
);
3026 res
.imul
= MAX2(res
.imul
, that
->res
.imul
);
3027 res
.tex
= MAX2(res
.tex
, that
->res
.tex
);
3029 void print(int cycle
)
3031 for (int i
= 0; i
< regs
; ++i
) {
3032 if (rd
.r
[i
] > cycle
)
3033 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3034 if (wr
.r
[i
] > cycle
)
3035 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3037 for (int i
= 0; i
< 8; ++i
) {
3038 if (rd
.p
[i
] > cycle
)
3039 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3040 if (wr
.p
[i
] > cycle
)
3041 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3044 INFO("rd $c @ %i\n", rd
.c
);
3046 INFO("wr $c @ %i\n", wr
.c
);
3047 if (res
.sfu
> cycle
)
3048 INFO("sfu @ %i\n", res
.sfu
);
3049 if (res
.imul
> cycle
)
3050 INFO("imul @ %i\n", res
.imul
);
3051 if (res
.tex
> cycle
)
3052 INFO("tex @ %i\n", res
.tex
);
3056 RegScores
*score
; // for current BB
3057 std::vector
<RegScores
> scoreBoards
;
3063 bool visit(Function
*);
3064 bool visit(BasicBlock
*);
3066 void commitInsn(const Instruction
*, int cycle
);
3067 int calcDelay(const Instruction
*, int cycle
) const;
3068 void setDelay(Instruction
*, int delay
, Instruction
*next
);
3070 void recordRd(const Value
*, const int ready
);
3071 void recordWr(const Value
*, const int ready
);
3072 void checkRd(const Value
*, int cycle
, int& delay
) const;
3073 void checkWr(const Value
*, int cycle
, int& delay
) const;
3075 int getCycles(const Instruction
*, int origDelay
) const;
3079 SchedDataCalculator::setDelay(Instruction
*insn
, int delay
, Instruction
*next
)
3081 if (insn
->op
== OP_EXIT
|| insn
->op
== OP_RET
)
3082 delay
= MAX2(delay
, 14);
3084 if (insn
->op
== OP_TEXBAR
) {
3085 // TODO: except if results not used before EXIT
3088 if (insn
->op
== OP_JOIN
|| insn
->join
) {
3091 if (delay
>= 0 || prevData
== 0x04 ||
3092 !next
|| !targ
->canDualIssue(insn
, next
)) {
3093 insn
->sched
= static_cast<uint8_t>(MAX2(delay
, 0));
3094 if (prevOp
== OP_EXPORT
)
3095 insn
->sched
|= 0x40;
3097 insn
->sched
|= 0x20;
3099 insn
->sched
= 0x04; // dual-issue
3102 if (prevData
!= 0x04 || prevOp
!= OP_EXPORT
)
3103 if (insn
->sched
!= 0x04 || insn
->op
== OP_EXPORT
)
3106 prevData
= insn
->sched
;
3110 SchedDataCalculator::getCycles(const Instruction
*insn
, int origDelay
) const
3112 if (insn
->sched
& 0x80) {
3113 int c
= (insn
->sched
& 0x0f) * 2 + 1;
3114 if (insn
->op
== OP_TEXBAR
&& origDelay
> 0)
3118 if (insn
->sched
& 0x60)
3119 return (insn
->sched
& 0x1f) + 1;
3120 return (insn
->sched
== 0x04) ? 0 : 32;
3124 SchedDataCalculator::visit(Function
*func
)
3126 int regs
= targ
->getFileSize(FILE_GPR
) + 1;
3127 scoreBoards
.resize(func
->cfg
.getSize());
3128 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
3129 scoreBoards
[i
].wipe(regs
);
3134 SchedDataCalculator::visit(BasicBlock
*bb
)
3137 Instruction
*next
= NULL
;
3143 score
= &scoreBoards
.at(bb
->getId());
3145 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
3146 // back branches will wait until all target dependencies are satisfied
3147 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
3149 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
3150 if (in
->getExit()) {
3151 if (prevData
!= 0x04)
3152 prevData
= in
->getExit()->sched
;
3153 prevOp
= in
->getExit()->op
;
3155 score
->setMax(&scoreBoards
.at(in
->getId()));
3157 if (bb
->cfg
.incidentCount() > 1)
3160 #ifdef NVC0_DEBUG_SCHED_DATA
3161 INFO("=== BB:%i initial scores\n", bb
->getId());
3162 score
->print(cycle
);
3165 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
3168 commitInsn(insn
, cycle
);
3169 int delay
= calcDelay(next
, cycle
);
3170 setDelay(insn
, delay
, next
);
3171 cycle
+= getCycles(insn
, delay
);
3173 #ifdef NVC0_DEBUG_SCHED_DATA
3174 INFO("cycle %i, sched %02x\n", cycle
, insn
->sched
);
3181 commitInsn(insn
, cycle
);
3185 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
3186 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
3188 if (ei
.getType() != Graph::Edge::BACK
) {
3189 // only test the first instruction of the outgoing block
3190 next
= out
->getEntry();
3192 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
3194 // wait until all dependencies are satisfied
3195 const int regsFree
= score
->getLatest();
3196 next
= out
->getFirst();
3197 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
3198 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
3199 c
+= getCycles(next
, bbDelay
);
3204 if (bb
->cfg
.outgoingCount() != 1)
3206 setDelay(insn
, bbDelay
, next
);
3207 cycle
+= getCycles(insn
, bbDelay
);
3209 score
->rebase(cycle
); // common base for initializing out blocks' scores
3213 #define NVE4_MAX_ISSUE_DELAY 0x1f
3215 SchedDataCalculator::calcDelay(const Instruction
*insn
, int cycle
) const
3217 int delay
= 0, ready
= cycle
;
3219 for (int s
= 0; insn
->srcExists(s
); ++s
)
3220 checkRd(insn
->getSrc(s
), cycle
, delay
);
3221 // WAR & WAW don't seem to matter
3222 // for (int s = 0; insn->srcExists(s); ++s)
3223 // recordRd(insn->getSrc(s), cycle);
3225 switch (Target::getOpClass(insn
->op
)) {
3227 ready
= score
->res
.sfu
;
3230 if (insn
->op
== OP_MUL
&& !isFloatType(insn
->dType
))
3231 ready
= score
->res
.imul
;
3233 case OPCLASS_TEXTURE
:
3234 ready
= score
->res
.tex
;
3237 ready
= score
->res
.ld
[insn
->src(0).getFile()];
3240 ready
= score
->res
.st
[insn
->src(0).getFile()];
3245 if (Target::getOpClass(insn
->op
) != OPCLASS_TEXTURE
)
3246 ready
= MAX2(ready
, score
->res
.tex
);
3248 delay
= MAX2(delay
, ready
- cycle
);
3250 // if can issue next cycle, delay is 0, not 1
3251 return MIN2(delay
- 1, NVE4_MAX_ISSUE_DELAY
);
3255 SchedDataCalculator::commitInsn(const Instruction
*insn
, int cycle
)
3257 const int ready
= cycle
+ targ
->getLatency(insn
);
3259 for (int d
= 0; insn
->defExists(d
); ++d
)
3260 recordWr(insn
->getDef(d
), ready
);
3261 // WAR & WAW don't seem to matter
3262 // for (int s = 0; insn->srcExists(s); ++s)
3263 // recordRd(insn->getSrc(s), cycle);
3265 switch (Target::getOpClass(insn
->op
)) {
3267 score
->res
.sfu
= cycle
+ 4;
3270 if (insn
->op
== OP_MUL
&& !isFloatType(insn
->dType
))
3271 score
->res
.imul
= cycle
+ 4;
3273 case OPCLASS_TEXTURE
:
3274 score
->res
.tex
= cycle
+ 18;
3277 if (insn
->src(0).getFile() == FILE_MEMORY_CONST
)
3279 score
->res
.ld
[insn
->src(0).getFile()] = cycle
+ 4;
3280 score
->res
.st
[insn
->src(0).getFile()] = ready
;
3283 score
->res
.st
[insn
->src(0).getFile()] = cycle
+ 4;
3284 score
->res
.ld
[insn
->src(0).getFile()] = ready
;
3287 if (insn
->op
== OP_TEXBAR
)
3288 score
->res
.tex
= cycle
;
3294 #ifdef NVC0_DEBUG_SCHED_DATA
3295 score
->print(cycle
);
3300 SchedDataCalculator::checkRd(const Value
*v
, int cycle
, int& delay
) const
3305 switch (v
->reg
.file
) {
3308 b
= a
+ v
->reg
.size
/ 4;
3309 for (int r
= a
; r
< b
; ++r
)
3310 ready
= MAX2(ready
, score
->rd
.r
[r
]);
3312 case FILE_PREDICATE
:
3313 ready
= MAX2(ready
, score
->rd
.p
[v
->reg
.data
.id
]);
3316 ready
= MAX2(ready
, score
->rd
.c
);
3318 case FILE_SHADER_INPUT
:
3319 case FILE_SHADER_OUTPUT
: // yes, TCPs can read outputs
3320 case FILE_MEMORY_LOCAL
:
3321 case FILE_MEMORY_CONST
:
3322 case FILE_MEMORY_SHARED
:
3323 case FILE_MEMORY_GLOBAL
:
3324 case FILE_SYSTEM_VALUE
:
3325 // TODO: any restrictions here ?
3327 case FILE_IMMEDIATE
:
3334 delay
= MAX2(delay
, ready
- cycle
);
3338 SchedDataCalculator::checkWr(const Value
*v
, int cycle
, int& delay
) const
3343 switch (v
->reg
.file
) {
3346 b
= a
+ v
->reg
.size
/ 4;
3347 for (int r
= a
; r
< b
; ++r
)
3348 ready
= MAX2(ready
, score
->wr
.r
[r
]);
3350 case FILE_PREDICATE
:
3351 ready
= MAX2(ready
, score
->wr
.p
[v
->reg
.data
.id
]);
3354 assert(v
->reg
.file
== FILE_FLAGS
);
3355 ready
= MAX2(ready
, score
->wr
.c
);
3359 delay
= MAX2(delay
, ready
- cycle
);
3363 SchedDataCalculator::recordWr(const Value
*v
, const int ready
)
3365 int a
= v
->reg
.data
.id
;
3367 if (v
->reg
.file
== FILE_GPR
) {
3368 int b
= a
+ v
->reg
.size
/ 4;
3369 for (int r
= a
; r
< b
; ++r
)
3370 score
->rd
.r
[r
] = ready
;
3372 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3373 if (v
->reg
.file
== FILE_PREDICATE
) {
3374 score
->rd
.p
[a
] = ready
+ 4;
3376 assert(v
->reg
.file
== FILE_FLAGS
);
3377 score
->rd
.c
= ready
+ 4;
3382 SchedDataCalculator::recordRd(const Value
*v
, const int ready
)
3384 int a
= v
->reg
.data
.id
;
3386 if (v
->reg
.file
== FILE_GPR
) {
3387 int b
= a
+ v
->reg
.size
/ 4;
3388 for (int r
= a
; r
< b
; ++r
)
3389 score
->wr
.r
[r
] = ready
;
3391 if (v
->reg
.file
== FILE_PREDICATE
) {
3392 score
->wr
.p
[a
] = ready
;
3394 if (v
->reg
.file
== FILE_FLAGS
) {
3395 score
->wr
.c
= ready
;
3400 calculateSchedDataNVC0(const Target
*targ
, Function
*func
)
3402 SchedDataCalculator
sched(targ
);
3403 return sched
.run(func
, true, true);
3407 CodeEmitterNVC0::prepareEmission(Function
*func
)
3409 CodeEmitter::prepareEmission(func
);
3411 if (targ
->hasSWSched
)
3412 calculateSchedDataNVC0(targ
, func
);
3415 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0
*target
)
3416 : CodeEmitter(target
),
3418 writeIssueDelays(target
->hasSWSched
)
3421 codeSize
= codeSizeLimit
= 0;
3426 TargetNVC0::createCodeEmitterNVC0(Program::Type type
)
3428 CodeEmitterNVC0
*emit
= new CodeEmitterNVC0(this);
3429 emit
->setProgramType(type
);
3434 TargetNVC0::getCodeEmitter(Program::Type type
)
3436 if (chipset
>= NVISA_GK20A_CHIPSET
)
3437 return createCodeEmitterGK110(type
);
3438 return createCodeEmitterNVC0(type
);
3441 } // namespace nv50_ir