2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
33 class CodeEmitterNV50
: public CodeEmitter
36 CodeEmitterNV50(const TargetNV50
*);
38 virtual bool emitInstruction(Instruction
*);
40 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
44 virtual void prepareEmission(Function
*);
47 Program::Type progType
;
49 const TargetNV50
*targNV50
;
52 inline void defId(const ValueDef
&, const int pos
);
53 inline void srcId(const ValueRef
&, const int pos
);
54 inline void srcId(const ValueRef
*, const int pos
);
56 inline void srcAddr16(const ValueRef
&, bool adj
, const int pos
);
57 inline void srcAddr8(const ValueRef
&, const int pos
);
59 void emitFlagsRd(const Instruction
*);
60 void emitFlagsWr(const Instruction
*);
62 void emitCondCode(CondCode cc
, DataType ty
, int pos
);
64 inline void setARegBits(unsigned int);
66 void setAReg16(const Instruction
*, int s
);
67 void setImmediate(const Instruction
*, int s
);
69 void setDst(const Value
*);
70 void setDst(const Instruction
*, int d
);
71 void setSrcFileBits(const Instruction
*, int enc
);
72 void setSrc(const Instruction
*, unsigned int s
, int slot
);
74 void emitForm_MAD(const Instruction
*);
75 void emitForm_ADD(const Instruction
*);
76 void emitForm_MUL(const Instruction
*);
77 void emitForm_IMM(const Instruction
*);
79 void emitLoadStoreSizeLG(DataType ty
, int pos
);
80 void emitLoadStoreSizeCS(DataType ty
);
82 void roundMode_MAD(const Instruction
*);
83 void roundMode_CVT(RoundMode
);
85 void emitMNeg12(const Instruction
*);
87 void emitLOAD(const Instruction
*);
88 void emitSTORE(const Instruction
*);
89 void emitMOV(const Instruction
*);
90 void emitRDSV(const Instruction
*);
92 void emitINTERP(const Instruction
*);
93 void emitPFETCH(const Instruction
*);
94 void emitOUT(const Instruction
*);
96 void emitUADD(const Instruction
*);
97 void emitAADD(const Instruction
*);
98 void emitFADD(const Instruction
*);
99 void emitIMUL(const Instruction
*);
100 void emitFMUL(const Instruction
*);
101 void emitFMAD(const Instruction
*);
102 void emitIMAD(const Instruction
*);
103 void emitISAD(const Instruction
*);
105 void emitMINMAX(const Instruction
*);
107 void emitPreOp(const Instruction
*);
108 void emitSFnOp(const Instruction
*, uint8_t subOp
);
110 void emitShift(const Instruction
*);
111 void emitARL(const Instruction
*, unsigned int shl
);
112 void emitLogicOp(const Instruction
*);
113 void emitNOT(const Instruction
*);
115 void emitCVT(const Instruction
*);
116 void emitSET(const Instruction
*);
118 void emitTEX(const TexInstruction
*);
119 void emitTXQ(const TexInstruction
*);
120 void emitTEXPREP(const TexInstruction
*);
122 void emitQUADOP(const Instruction
*, uint8_t lane
, uint8_t quOp
);
124 void emitFlow(const Instruction
*, uint8_t flowOp
);
125 void emitPRERETEmu(const FlowInstruction
*);
126 void emitBAR(const Instruction
*);
128 void emitATOM(const Instruction
*);
131 #define SDATA(a) ((a).rep()->reg.data)
132 #define DDATA(a) ((a).rep()->reg.data)
134 void CodeEmitterNV50::srcId(const ValueRef
& src
, const int pos
)
137 code
[pos
/ 32] |= SDATA(src
).id
<< (pos
% 32);
140 void CodeEmitterNV50::srcId(const ValueRef
*src
, const int pos
)
143 code
[pos
/ 32] |= SDATA(*src
).id
<< (pos
% 32);
146 void CodeEmitterNV50::srcAddr16(const ValueRef
& src
, bool adj
, const int pos
)
150 int32_t offset
= SDATA(src
).offset
;
152 assert(!adj
|| src
.get()->reg
.size
<= 4);
154 offset
/= src
.get()->reg
.size
;
156 assert(offset
<= 0x7fff && offset
>= (int32_t)-0x8000 && (pos
% 32) <= 16);
159 offset
&= adj
? (0xffff >> (src
.get()->reg
.size
>> 1)) : 0xffff;
161 code
[pos
/ 32] |= offset
<< (pos
% 32);
164 void CodeEmitterNV50::srcAddr8(const ValueRef
& src
, const int pos
)
168 uint32_t offset
= SDATA(src
).offset
;
170 assert((offset
<= 0x1fc || offset
== 0x3fc) && !(offset
& 0x3));
172 code
[pos
/ 32] |= (offset
>> 2) << (pos
% 32);
175 void CodeEmitterNV50::defId(const ValueDef
& def
, const int pos
)
177 assert(def
.get() && def
.getFile() != FILE_SHADER_OUTPUT
);
179 code
[pos
/ 32] |= DDATA(def
).id
<< (pos
% 32);
183 CodeEmitterNV50::roundMode_MAD(const Instruction
*insn
)
186 case ROUND_M
: code
[1] |= 1 << 22; break;
187 case ROUND_P
: code
[1] |= 2 << 22; break;
188 case ROUND_Z
: code
[1] |= 3 << 22; break;
190 assert(insn
->rnd
== ROUND_N
);
196 CodeEmitterNV50::emitMNeg12(const Instruction
*i
)
198 code
[1] |= i
->src(0).mod
.neg() << 26;
199 code
[1] |= i
->src(1).mod
.neg() << 27;
202 void CodeEmitterNV50::emitCondCode(CondCode cc
, DataType ty
, int pos
)
206 assert(pos
>= 32 || pos
<= 27);
209 case CC_LT
: enc
= 0x1; break;
210 case CC_LTU
: enc
= 0x9; break;
211 case CC_EQ
: enc
= 0x2; break;
212 case CC_EQU
: enc
= 0xa; break;
213 case CC_LE
: enc
= 0x3; break;
214 case CC_LEU
: enc
= 0xb; break;
215 case CC_GT
: enc
= 0x4; break;
216 case CC_GTU
: enc
= 0xc; break;
217 case CC_NE
: enc
= 0x5; break;
218 case CC_NEU
: enc
= 0xd; break;
219 case CC_GE
: enc
= 0x6; break;
220 case CC_GEU
: enc
= 0xe; break;
221 case CC_TR
: enc
= 0xf; break;
222 case CC_FL
: enc
= 0x0; break;
224 case CC_O
: enc
= 0x10; break;
225 case CC_C
: enc
= 0x11; break;
226 case CC_A
: enc
= 0x12; break;
227 case CC_S
: enc
= 0x13; break;
228 case CC_NS
: enc
= 0x1c; break;
229 case CC_NA
: enc
= 0x1d; break;
230 case CC_NC
: enc
= 0x1e; break;
231 case CC_NO
: enc
= 0x1f; break;
235 assert(!"invalid condition code");
238 if (ty
!= TYPE_NONE
&& !isFloatType(ty
))
239 enc
&= ~0x8; // unordered only exists for float types
241 code
[pos
/ 32] |= enc
<< (pos
% 32);
245 CodeEmitterNV50::emitFlagsRd(const Instruction
*i
)
247 int s
= (i
->flagsSrc
>= 0) ? i
->flagsSrc
: i
->predSrc
;
249 assert(!(code
[1] & 0x00003f80));
252 assert(i
->getSrc(s
)->reg
.file
== FILE_FLAGS
);
253 emitCondCode(i
->cc
, TYPE_NONE
, 32 + 7);
254 srcId(i
->src(s
), 32 + 12);
261 CodeEmitterNV50::emitFlagsWr(const Instruction
*i
)
263 assert(!(code
[1] & 0x70));
265 int flagsDef
= i
->flagsDef
;
267 // find flags definition and check that it is the last def
269 for (int d
= 0; i
->defExists(d
); ++d
)
270 if (i
->def(d
).getFile() == FILE_FLAGS
)
272 if (flagsDef
>= 0 && 0) // TODO: enforce use of flagsDef at some point
273 WARN("Instruction::flagsDef was not set properly\n");
275 if (flagsDef
== 0 && i
->defExists(1))
276 WARN("flags def should not be the primary definition\n");
279 code
[1] |= (DDATA(i
->def(flagsDef
)).id
<< 4) | 0x40;
284 CodeEmitterNV50::setARegBits(unsigned int u
)
286 code
[0] |= (u
& 3) << 26;
291 CodeEmitterNV50::setAReg16(const Instruction
*i
, int s
)
293 if (i
->srcExists(s
)) {
294 s
= i
->src(s
).indirect
[0];
296 setARegBits(SDATA(i
->src(s
)).id
+ 1);
301 CodeEmitterNV50::setImmediate(const Instruction
*i
, int s
)
303 const ImmediateValue
*imm
= i
->src(s
).get()->asImm();
306 uint32_t u
= imm
->reg
.data
.u32
;
308 if (i
->src(s
).mod
& Modifier(NV50_IR_MOD_NOT
))
312 code
[0] |= (u
& 0x3f) << 16;
313 code
[1] |= (u
>> 6) << 2;
317 CodeEmitterNV50::setDst(const Value
*dst
)
319 const Storage
*reg
= &dst
->join
->reg
;
321 assert(reg
->file
!= FILE_ADDRESS
);
323 if (reg
->data
.id
< 0 || reg
->file
== FILE_FLAGS
) {
324 code
[0] |= (127 << 2) | 1;
328 if (reg
->file
== FILE_SHADER_OUTPUT
) {
330 id
= reg
->data
.offset
/ 4;
339 CodeEmitterNV50::setDst(const Instruction
*i
, int d
)
341 if (i
->defExists(d
)) {
342 setDst(i
->getDef(d
));
345 code
[0] |= 0x01fc; // bit bucket
356 CodeEmitterNV50::setSrcFileBits(const Instruction
*i
, int enc
)
360 for (unsigned int s
= 0; s
< Target::operationSrcNr
[i
->op
]; ++s
) {
361 switch (i
->src(s
).getFile()) {
364 case FILE_MEMORY_SHARED
:
365 case FILE_SHADER_INPUT
:
366 mode
|= 1 << (s
* 2);
368 case FILE_MEMORY_CONST
:
369 mode
|= 2 << (s
* 2);
372 mode
|= 3 << (s
* 2);
375 ERROR("invalid file on source %i: %u\n", s
, i
->src(s
).getFile());
383 case 0x01: // arr/grr
384 if (progType
== Program::TYPE_GEOMETRY
&& i
->src(0).isIndirect(0)) {
385 code
[0] |= 0x01800000;
386 if (enc
== NV50_OP_ENC_LONG
|| enc
== NV50_OP_ENC_LONG_ALT
)
387 code
[1] |= 0x00200000;
389 if (enc
== NV50_OP_ENC_SHORT
)
390 code
[0] |= 0x01000000;
392 code
[1] |= 0x00200000;
396 assert(i
->op
== OP_MOV
);
401 assert(progType
== Program::TYPE_GEOMETRY
||
402 progType
== Program::TYPE_COMPUTE
);
403 code
[0] |= 0x01000000;
404 if (progType
== Program::TYPE_GEOMETRY
&& i
->src(0).isIndirect(0)) {
405 int reg
= i
->src(0).getIndirect(0)->rep()->reg
.data
.id
;
407 code
[0] |= (reg
+ 1) << 26;
411 code
[0] |= (enc
== NV50_OP_ENC_LONG_ALT
) ? 0x01000000 : 0x00800000;
412 code
[1] |= (i
->getSrc(1)->reg
.fileIndex
<< 22);
414 case 0x09: // acr/gcr
415 if (progType
== Program::TYPE_GEOMETRY
&& i
->src(0).isIndirect(0)) {
416 code
[0] |= 0x01800000;
418 code
[0] |= (enc
== NV50_OP_ENC_LONG_ALT
) ? 0x01000000 : 0x00800000;
419 code
[1] |= 0x00200000;
421 code
[1] |= (i
->getSrc(1)->reg
.fileIndex
<< 22);
424 code
[0] |= 0x01000000;
425 code
[1] |= (i
->getSrc(2)->reg
.fileIndex
<< 22);
428 code
[0] |= 0x01000000;
429 code
[1] |= 0x00200000 | (i
->getSrc(2)->reg
.fileIndex
<< 22);
430 assert(progType
!= Program::TYPE_GEOMETRY
);
433 ERROR("not encodable: %x\n", mode
);
437 if (progType
!= Program::TYPE_COMPUTE
)
440 if ((mode
& 3) == 1) {
441 const int pos
= i
->src(1).getFile() == FILE_IMMEDIATE
? 13 : 14;
443 switch (i
->getSrc(0)->reg
.type
) {
454 assert(i
->getSrc(0)->reg
.size
== 4);
461 CodeEmitterNV50::setSrc(const Instruction
*i
, unsigned int s
, int slot
)
463 if (Target::operationSrcNr
[i
->op
] <= s
)
465 const Storage
*reg
= &i
->src(s
).rep()->reg
;
467 unsigned int id
= (reg
->file
== FILE_GPR
) ?
469 reg
->data
.offset
>> (reg
->size
>> 1); // no > 4 byte sources here
472 case 0: code
[0] |= id
<< 9; break;
473 case 1: code
[0] |= id
<< 16; break;
474 case 2: code
[1] |= id
<< 14; break;
482 // - long instruction
483 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
486 CodeEmitterNV50::emitForm_MAD(const Instruction
*i
)
488 assert(i
->encSize
== 8);
496 setSrcFileBits(i
, NV50_OP_ENC_LONG
);
501 if (i
->getIndirect(0, 0)) {
502 assert(!i
->getIndirect(1, 0));
509 // like default form, but 2nd source in slot 2, and no 3rd source
511 CodeEmitterNV50::emitForm_ADD(const Instruction
*i
)
513 assert(i
->encSize
== 8);
521 setSrcFileBits(i
, NV50_OP_ENC_LONG_ALT
);
525 if (i
->getIndirect(0, 0)) {
526 assert(!i
->getIndirect(1, 0));
533 // default short form (rr, ar, rc, gr)
535 CodeEmitterNV50::emitForm_MUL(const Instruction
*i
)
537 assert(i
->encSize
== 4 && !(code
[0] & 1));
538 assert(i
->defExists(0));
539 assert(!i
->getPredicate());
543 setSrcFileBits(i
, NV50_OP_ENC_SHORT
);
548 // usual immediate form
549 // - 1 to 3 sources where last is immediate (rir, gir)
550 // - no address or predicate possible
552 CodeEmitterNV50::emitForm_IMM(const Instruction
*i
)
554 assert(i
->encSize
== 8);
557 assert(i
->defExists(0) && i
->srcExists(0));
561 setSrcFileBits(i
, NV50_OP_ENC_IMM
);
562 if (Target::operationSrcNr
[i
->op
] > 1) {
572 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty
, int pos
)
577 case TYPE_F32
: // fall through
578 case TYPE_S32
: // fall through
579 case TYPE_U32
: enc
= 0x6; break;
580 case TYPE_B128
: enc
= 0x5; break;
581 case TYPE_F64
: // fall through
582 case TYPE_S64
: // fall through
583 case TYPE_U64
: enc
= 0x4; break;
584 case TYPE_S16
: enc
= 0x3; break;
585 case TYPE_U16
: enc
= 0x2; break;
586 case TYPE_S8
: enc
= 0x1; break;
587 case TYPE_U8
: enc
= 0x0; break;
590 assert(!"invalid load/store type");
593 code
[pos
/ 32] |= enc
<< (pos
% 32);
597 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty
)
601 case TYPE_U16
: code
[1] |= 0x4000; break;
602 case TYPE_S16
: code
[1] |= 0x8000; break;
605 case TYPE_U32
: code
[1] |= 0xc000; break;
613 CodeEmitterNV50::emitLOAD(const Instruction
*i
)
615 DataFile sf
= i
->src(0).getFile();
616 int32_t offset
= i
->getSrc(0)->reg
.data
.offset
;
619 case FILE_SHADER_INPUT
:
620 if (progType
== Program::TYPE_GEOMETRY
&& i
->src(0).isIndirect(0))
621 code
[0] = 0x11800001;
623 // use 'mov' where we can
624 code
[0] = i
->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
625 code
[1] = 0x00200000 | (i
->lanes
<< 14);
626 if (typeSizeof(i
->dType
) == 4)
627 code
[1] |= 0x04000000;
629 case FILE_MEMORY_SHARED
:
630 if (targ
->getChipset() >= 0x84) {
631 assert(offset
<= (int32_t)(0x3fff * typeSizeof(i
->sType
)));
632 code
[0] = 0x10000001;
633 code
[1] = 0x40000000;
635 if (typeSizeof(i
->dType
) == 4)
636 code
[1] |= 0x04000000;
638 emitLoadStoreSizeCS(i
->sType
);
640 assert(offset
<= (int32_t)(0x1f * typeSizeof(i
->sType
)));
641 code
[0] = 0x10000001;
642 code
[1] = 0x00200000 | (i
->lanes
<< 14);
643 emitLoadStoreSizeCS(i
->sType
);
646 case FILE_MEMORY_CONST
:
647 code
[0] = 0x10000001;
648 code
[1] = 0x20000000 | (i
->getSrc(0)->reg
.fileIndex
<< 22);
649 if (typeSizeof(i
->dType
) == 4)
650 code
[1] |= 0x04000000;
651 emitLoadStoreSizeCS(i
->sType
);
653 case FILE_MEMORY_LOCAL
:
654 code
[0] = 0xd0000001;
655 code
[1] = 0x40000000;
657 case FILE_MEMORY_GLOBAL
:
658 code
[0] = 0xd0000001 | (i
->getSrc(0)->reg
.fileIndex
<< 16);
659 code
[1] = 0x80000000;
662 assert(!"invalid load source file");
665 if (sf
== FILE_MEMORY_LOCAL
||
666 sf
== FILE_MEMORY_GLOBAL
)
667 emitLoadStoreSizeLG(i
->sType
, 21 + 32);
674 if (i
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
675 srcId(*i
->src(0).getIndirect(0), 9);
678 srcAddr16(i
->src(0), i
->src(0).getFile() != FILE_MEMORY_LOCAL
, 9);
683 CodeEmitterNV50::emitSTORE(const Instruction
*i
)
685 DataFile f
= i
->getSrc(0)->reg
.file
;
686 int32_t offset
= i
->getSrc(0)->reg
.data
.offset
;
689 case FILE_SHADER_OUTPUT
:
690 code
[0] = 0x00000001 | ((offset
>> 2) << 9);
691 code
[1] = 0x80c00000;
692 srcId(i
->src(1), 32 + 14);
694 case FILE_MEMORY_GLOBAL
:
695 code
[0] = 0xd0000001 | (i
->getSrc(0)->reg
.fileIndex
<< 16);
696 code
[1] = 0xa0000000;
697 emitLoadStoreSizeLG(i
->dType
, 21 + 32);
700 case FILE_MEMORY_LOCAL
:
701 code
[0] = 0xd0000001;
702 code
[1] = 0x60000000;
703 emitLoadStoreSizeLG(i
->dType
, 21 + 32);
706 case FILE_MEMORY_SHARED
:
707 code
[0] = 0x00000001;
708 code
[1] = 0xe0000000;
709 switch (typeSizeof(i
->dType
)) {
711 code
[0] |= offset
<< 9;
712 code
[1] |= 0x00400000;
715 code
[0] |= (offset
>> 1) << 9;
718 code
[0] |= (offset
>> 2) << 9;
719 code
[1] |= 0x04200000;
725 srcId(i
->src(1), 32 + 14);
728 assert(!"invalid store destination file");
732 if (f
== FILE_MEMORY_GLOBAL
)
733 srcId(*i
->src(0).getIndirect(0), 9);
737 if (f
== FILE_MEMORY_LOCAL
)
738 srcAddr16(i
->src(0), false, 9);
744 CodeEmitterNV50::emitMOV(const Instruction
*i
)
746 DataFile sf
= i
->getSrc(0)->reg
.file
;
747 DataFile df
= i
->getDef(0)->reg
.file
;
749 assert(sf
== FILE_GPR
|| df
== FILE_GPR
);
751 if (sf
== FILE_FLAGS
) {
752 code
[0] = 0x00000001;
753 code
[1] = 0x20000000;
755 srcId(i
->src(0), 12);
758 if (sf
== FILE_ADDRESS
) {
759 code
[0] = 0x00000001;
760 code
[1] = 0x40000000;
762 setARegBits(SDATA(i
->src(0)).id
+ 1);
765 if (df
== FILE_FLAGS
) {
766 code
[0] = 0x00000001;
767 code
[1] = 0xa0000000;
772 if (sf
== FILE_IMMEDIATE
) {
773 code
[0] = 0x10008001;
774 code
[1] = 0x00000003;
777 if (i
->encSize
== 4) {
778 code
[0] = 0x10008000;
780 code
[0] = 0x10000001;
781 code
[1] = (typeSizeof(i
->dType
) == 2) ? 0 : 0x04000000;
782 code
[1] |= (i
->lanes
<< 14);
788 if (df
== FILE_SHADER_OUTPUT
) {
789 assert(i
->encSize
== 8);
794 static inline uint8_t getSRegEncoding(const ValueRef
&ref
)
796 switch (SDATA(ref
).sv
.sv
) {
797 case SV_PHYSID
: return 0;
798 case SV_CLOCK
: return 1;
799 case SV_VERTEX_STRIDE
: return 3;
800 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
801 case SV_SAMPLE_INDEX
: return 8;
803 assert(!"no sreg for system value");
809 CodeEmitterNV50::emitRDSV(const Instruction
*i
)
811 code
[0] = 0x00000001;
812 code
[1] = 0x60000000 | (getSRegEncoding(i
->src(0)) << 14);
818 CodeEmitterNV50::emitNOP()
820 code
[0] = 0xf0000001;
821 code
[1] = 0xe0000000;
825 CodeEmitterNV50::emitQUADOP(const Instruction
*i
, uint8_t lane
, uint8_t quOp
)
827 code
[0] = 0xc0000000 | (lane
<< 16);
828 code
[1] = 0x80000000;
830 code
[0] |= (quOp
& 0x03) << 20;
831 code
[1] |= (quOp
& 0xfc) << 20;
835 if (!i
->srcExists(1))
836 srcId(i
->src(0), 32 + 14);
839 /* NOTE: This returns the base address of a vertex inside the primitive.
840 * src0 is an immediate, the index (not offset) of the vertex
841 * inside the primitive. XXX: signed or unsigned ?
842 * src1 (may be NULL) should use whatever units the hardware requires
843 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
846 CodeEmitterNV50::emitPFETCH(const Instruction
*i
)
848 const uint32_t prim
= i
->src(0).get()->reg
.data
.u32
;
851 if (i
->def(0).getFile() == FILE_ADDRESS
) {
853 code
[0] = 0x00000001 | ((DDATA(i
->def(0)).id
+ 1) << 2);
854 code
[1] = 0xc0200000;
855 code
[0] |= prim
<< 9;
856 assert(!i
->srcExists(1));
858 if (i
->srcExists(1)) {
859 // ld b32 $rX a[$aX+base]
860 code
[0] = 0x00000001;
861 code
[1] = 0x04200000 | (0xf << 14);
863 code
[0] |= prim
<< 9;
864 setARegBits(SDATA(i
->src(1)).id
+ 1);
867 code
[0] = 0x10000001;
868 code
[1] = 0x04200000 | (0xf << 14);
870 code
[0] |= prim
<< 9;
876 CodeEmitterNV50::emitINTERP(const Instruction
*i
)
878 code
[0] = 0x80000000;
881 srcAddr8(i
->src(0), 16);
883 if (i
->getInterpMode() == NV50_IR_INTERP_FLAT
) {
886 if (i
->op
== OP_PINTERP
) {
890 if (i
->getSampleMode() == NV50_IR_INTERP_CENTROID
)
894 if (i
->encSize
== 8) {
896 (code
[0] & (3 << 24)) >> (24 - 16) |
897 (code
[0] & (1 << 8)) << (18 - 8);
898 code
[0] &= ~0x03000100;
905 CodeEmitterNV50::emitMINMAX(const Instruction
*i
)
907 if (i
->dType
== TYPE_F64
) {
908 code
[0] = 0xe0000000;
909 code
[1] = (i
->op
== OP_MIN
) ? 0xa0000000 : 0xc0000000;
911 code
[0] = 0x30000000;
912 code
[1] = 0x80000000;
914 code
[1] |= 0x20000000;
917 case TYPE_F32
: code
[0] |= 0x80000000; break;
918 case TYPE_S32
: code
[1] |= 0x8c000000; break;
919 case TYPE_U32
: code
[1] |= 0x84000000; break;
920 case TYPE_S16
: code
[1] |= 0x80000000; break;
921 case TYPE_U16
: break;
926 code
[1] |= i
->src(0).mod
.abs() << 20;
927 code
[1] |= i
->src(1).mod
.abs() << 19;
933 CodeEmitterNV50::emitFMAD(const Instruction
*i
)
935 const int neg_mul
= i
->src(0).mod
.neg() ^ i
->src(1).mod
.neg();
936 const int neg_add
= i
->src(2).mod
.neg();
938 code
[0] = 0xe0000000;
940 if (i
->encSize
== 4) {
942 assert(!neg_mul
&& !neg_add
);
944 code
[1] = neg_mul
<< 26;
945 code
[1] |= neg_add
<< 27;
953 CodeEmitterNV50::emitFADD(const Instruction
*i
)
955 const int neg0
= i
->src(0).mod
.neg();
956 const int neg1
= i
->src(1).mod
.neg() ^ ((i
->op
== OP_SUB
) ? 1 : 0);
958 code
[0] = 0xb0000000;
960 assert(!(i
->src(0).mod
| i
->src(1).mod
).abs());
962 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
965 code
[0] |= neg0
<< 15;
966 code
[0] |= neg1
<< 22;
970 if (i
->encSize
== 8) {
973 code
[1] |= neg0
<< 26;
974 code
[1] |= neg1
<< 27;
979 code
[0] |= neg0
<< 15;
980 code
[0] |= neg1
<< 22;
987 CodeEmitterNV50::emitUADD(const Instruction
*i
)
989 const int neg0
= i
->src(0).mod
.neg();
990 const int neg1
= i
->src(1).mod
.neg() ^ ((i
->op
== OP_SUB
) ? 1 : 0);
992 code
[0] = 0x20008000;
994 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
998 if (i
->encSize
== 8) {
999 code
[0] = 0x20000000;
1000 code
[1] = (typeSizeof(i
->dType
) == 2) ? 0 : 0x04000000;
1005 assert(!(neg0
&& neg1
));
1006 code
[0] |= neg0
<< 28;
1007 code
[0] |= neg1
<< 22;
1009 if (i
->flagsSrc
>= 0) {
1010 // addc == sub | subr
1011 assert(!(code
[0] & 0x10400000) && !i
->getPredicate());
1012 code
[0] |= 0x10400000;
1013 srcId(i
->src(i
->flagsSrc
), 32 + 12);
1018 CodeEmitterNV50::emitAADD(const Instruction
*i
)
1020 const int s
= (i
->op
== OP_MOV
) ? 0 : 1;
1022 code
[0] = 0xd0000001 | (i
->getSrc(s
)->reg
.data
.u16
<< 9);
1023 code
[1] = 0x20000000;
1025 code
[0] |= (DDATA(i
->def(0)).id
+ 1) << 2;
1029 if (s
&& i
->srcExists(0))
1030 setARegBits(SDATA(i
->src(0)).id
+ 1);
1034 CodeEmitterNV50::emitIMUL(const Instruction
*i
)
1036 code
[0] = 0x40000000;
1038 if (i
->encSize
== 8) {
1039 code
[1] = (i
->sType
== TYPE_S16
) ? (0x8000 | 0x4000) : 0x0000;
1042 if (i
->sType
== TYPE_S16
)
1049 CodeEmitterNV50::emitFMUL(const Instruction
*i
)
1051 const int neg
= (i
->src(0).mod
^ i
->src(1).mod
).neg();
1053 code
[0] = 0xc0000000;
1055 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
1061 if (i
->encSize
== 8) {
1062 code
[1] = i
->rnd
== ROUND_Z
? 0x0000c000 : 0;
1064 code
[1] |= 0x08000000;
1074 CodeEmitterNV50::emitIMAD(const Instruction
*i
)
1076 code
[0] = 0x60000000;
1077 if (isSignedType(i
->sType
))
1078 code
[1] = i
->saturate
? 0x40000000 : 0x20000000;
1080 code
[1] = 0x00000000;
1082 int neg1
= i
->src(0).mod
.neg() ^ i
->src(1).mod
.neg();
1083 int neg2
= i
->src(2).mod
.neg();
1085 assert(!(neg1
& neg2
));
1086 code
[1] |= neg1
<< 27;
1087 code
[1] |= neg2
<< 26;
1091 if (i
->flagsSrc
>= 0) {
1092 // add with carry from $cX
1093 assert(!(code
[1] & 0x0c000000) && !i
->getPredicate());
1094 code
[1] |= 0xc << 24;
1095 srcId(i
->src(i
->flagsSrc
), 32 + 12);
1100 CodeEmitterNV50::emitISAD(const Instruction
*i
)
1102 if (i
->encSize
== 8) {
1103 code
[0] = 0x50000000;
1105 case TYPE_U32
: code
[1] = 0x04000000; break;
1106 case TYPE_S32
: code
[1] = 0x0c000000; break;
1107 case TYPE_U16
: code
[1] = 0x00000000; break;
1108 case TYPE_S16
: code
[1] = 0x08000000; break;
1116 case TYPE_U32
: code
[0] = 0x50008000; break;
1117 case TYPE_S32
: code
[0] = 0x50008100; break;
1118 case TYPE_U16
: code
[0] = 0x50000000; break;
1119 case TYPE_S16
: code
[0] = 0x50000100; break;
1129 CodeEmitterNV50::emitSET(const Instruction
*i
)
1131 code
[0] = 0x30000000;
1132 code
[1] = 0x60000000;
1134 emitCondCode(i
->asCmp()->setCond
, i
->sType
, 32 + 14);
1137 case TYPE_F32
: code
[0] |= 0x80000000; break;
1138 case TYPE_S32
: code
[1] |= 0x0c000000; break;
1139 case TYPE_U32
: code
[1] |= 0x04000000; break;
1140 case TYPE_S16
: code
[1] |= 0x08000000; break;
1141 case TYPE_U16
: break;
1146 if (i
->src(0).mod
.neg()) code
[1] |= 0x04000000;
1147 if (i
->src(1).mod
.neg()) code
[1] |= 0x08000000;
1148 if (i
->src(0).mod
.abs()) code
[1] |= 0x00100000;
1149 if (i
->src(1).mod
.abs()) code
[1] |= 0x00080000;
1155 CodeEmitterNV50::roundMode_CVT(RoundMode rnd
)
1158 case ROUND_NI
: code
[1] |= 0x08000000; break;
1159 case ROUND_M
: code
[1] |= 0x00020000; break;
1160 case ROUND_MI
: code
[1] |= 0x08020000; break;
1161 case ROUND_P
: code
[1] |= 0x00040000; break;
1162 case ROUND_PI
: code
[1] |= 0x08040000; break;
1163 case ROUND_Z
: code
[1] |= 0x00060000; break;
1164 case ROUND_ZI
: code
[1] |= 0x08060000; break;
1166 assert(rnd
== ROUND_N
);
1172 CodeEmitterNV50::emitCVT(const Instruction
*i
)
1174 const bool f2f
= isFloatType(i
->dType
) && isFloatType(i
->sType
);
1179 case OP_CEIL
: rnd
= f2f
? ROUND_PI
: ROUND_P
; break;
1180 case OP_FLOOR
: rnd
= f2f
? ROUND_MI
: ROUND_M
; break;
1181 case OP_TRUNC
: rnd
= f2f
? ROUND_ZI
: ROUND_Z
; break;
1187 if (i
->op
== OP_NEG
&& i
->dType
== TYPE_U32
)
1192 code
[0] = 0xa0000000;
1197 case TYPE_F64
: code
[1] = 0xc4404000; break;
1198 case TYPE_S64
: code
[1] = 0x44414000; break;
1199 case TYPE_U64
: code
[1] = 0x44404000; break;
1200 case TYPE_F32
: code
[1] = 0xc4400000; break;
1201 case TYPE_S32
: code
[1] = 0x44410000; break;
1202 case TYPE_U32
: code
[1] = 0x44400000; break;
1210 case TYPE_F64
: code
[1] = 0x8c404000; break;
1211 case TYPE_F32
: code
[1] = 0x8c400000; break;
1219 case TYPE_F64
: code
[1] = 0x84404000; break;
1220 case TYPE_F32
: code
[1] = 0x84400000; break;
1228 case TYPE_F64
: code
[1] = 0xc0404000; break;
1229 case TYPE_S64
: code
[1] = 0x40414000; break;
1230 case TYPE_U64
: code
[1] = 0x40404000; break;
1231 case TYPE_F32
: code
[1] = 0xc4004000; break;
1232 case TYPE_S32
: code
[1] = 0x44014000; break;
1233 case TYPE_U32
: code
[1] = 0x44004000; break;
1234 case TYPE_F16
: code
[1] = 0xc4000000; break;
1235 case TYPE_U16
: code
[1] = 0x44000000; break;
1243 case TYPE_F64
: code
[1] = 0x88404000; break;
1244 case TYPE_F32
: code
[1] = 0x8c004000; break;
1245 case TYPE_S32
: code
[1] = 0x0c014000; break;
1246 case TYPE_U32
: code
[1] = 0x0c004000; break;
1247 case TYPE_F16
: code
[1] = 0x8c000000; break;
1248 case TYPE_S16
: code
[1] = 0x0c010000; break;
1249 case TYPE_U16
: code
[1] = 0x0c000000; break;
1250 case TYPE_S8
: code
[1] = 0x0c018000; break;
1251 case TYPE_U8
: code
[1] = 0x0c008000; break;
1259 case TYPE_F64
: code
[1] = 0x80404000; break;
1260 case TYPE_F32
: code
[1] = 0x84004000; break;
1261 case TYPE_S32
: code
[1] = 0x04014000; break;
1262 case TYPE_U32
: code
[1] = 0x04004000; break;
1263 case TYPE_F16
: code
[1] = 0x84000000; break;
1264 case TYPE_S16
: code
[1] = 0x04010000; break;
1265 case TYPE_U16
: code
[1] = 0x04000000; break;
1266 case TYPE_S8
: code
[1] = 0x04018000; break;
1267 case TYPE_U8
: code
[1] = 0x04008000; break;
1281 if (typeSizeof(i
->sType
) == 1 && i
->getSrc(0)->reg
.size
== 4)
1282 code
[1] |= 0x00004000;
1287 case OP_ABS
: code
[1] |= 1 << 20; break;
1288 case OP_SAT
: code
[1] |= 1 << 19; break;
1289 case OP_NEG
: code
[1] |= 1 << 29; break;
1293 code
[1] ^= i
->src(0).mod
.neg() << 29;
1294 code
[1] |= i
->src(0).mod
.abs() << 20;
1298 assert(i
->op
!= OP_ABS
|| !i
->src(0).mod
.neg());
1304 CodeEmitterNV50::emitPreOp(const Instruction
*i
)
1306 code
[0] = 0xb0000000;
1307 code
[1] = (i
->op
== OP_PREEX2
) ? 0xc0004000 : 0xc0000000;
1309 code
[1] |= i
->src(0).mod
.abs() << 20;
1310 code
[1] |= i
->src(0).mod
.neg() << 26;
1316 CodeEmitterNV50::emitSFnOp(const Instruction
*i
, uint8_t subOp
)
1318 code
[0] = 0x90000000;
1320 if (i
->encSize
== 4) {
1321 assert(i
->op
== OP_RCP
);
1322 code
[0] |= i
->src(0).mod
.abs() << 15;
1323 code
[0] |= i
->src(0).mod
.neg() << 22;
1326 code
[1] = subOp
<< 29;
1327 code
[1] |= i
->src(0).mod
.abs() << 20;
1328 code
[1] |= i
->src(0).mod
.neg() << 26;
1334 CodeEmitterNV50::emitNOT(const Instruction
*i
)
1336 code
[0] = 0xd0000000;
1337 code
[1] = 0x0002c000;
1342 code
[1] |= 0x04000000;
1352 CodeEmitterNV50::emitLogicOp(const Instruction
*i
)
1354 code
[0] = 0xd0000000;
1357 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
1359 case OP_OR
: code
[0] |= 0x0100; break;
1360 case OP_XOR
: code
[0] |= 0x8000; break;
1362 assert(i
->op
== OP_AND
);
1365 if (i
->src(0).mod
& Modifier(NV50_IR_MOD_NOT
))
1371 case OP_AND
: code
[1] = 0x04000000; break;
1372 case OP_OR
: code
[1] = 0x04004000; break;
1373 case OP_XOR
: code
[1] = 0x04008000; break;
1378 if (i
->src(0).mod
& Modifier(NV50_IR_MOD_NOT
))
1380 if (i
->src(1).mod
& Modifier(NV50_IR_MOD_NOT
))
1388 CodeEmitterNV50::emitARL(const Instruction
*i
, unsigned int shl
)
1390 code
[0] = 0x00000001 | (shl
<< 16);
1391 code
[1] = 0xc0000000;
1393 code
[0] |= (DDATA(i
->def(0)).id
+ 1) << 2;
1395 setSrcFileBits(i
, NV50_OP_ENC_IMM
);
1401 CodeEmitterNV50::emitShift(const Instruction
*i
)
1403 if (i
->def(0).getFile() == FILE_ADDRESS
) {
1404 assert(i
->srcExists(1) && i
->src(1).getFile() == FILE_IMMEDIATE
);
1405 emitARL(i
, i
->getSrc(1)->reg
.data
.u32
& 0x3f);
1407 code
[0] = 0x30000001;
1408 code
[1] = (i
->op
== OP_SHR
) ? 0xe4000000 : 0xc4000000;
1409 if (i
->op
== OP_SHR
&& isSignedType(i
->sType
))
1412 if (i
->src(1).getFile() == FILE_IMMEDIATE
) {
1414 code
[0] |= (i
->getSrc(1)->reg
.data
.u32
& 0x7f) << 16;
1415 defId(i
->def(0), 2);
1416 srcId(i
->src(0), 9);
1425 CodeEmitterNV50::emitOUT(const Instruction
*i
)
1427 code
[0] = (i
->op
== OP_EMIT
) ? 0xf0000201 : 0xf0000401;
1428 code
[1] = 0xc0000000;
1434 CodeEmitterNV50::emitTEX(const TexInstruction
*i
)
1436 code
[0] = 0xf0000001;
1437 code
[1] = 0x00000000;
1441 code
[1] = 0x20000000;
1444 code
[1] = 0x40000000;
1447 code
[0] |= 0x01000000;
1450 code
[0] |= 0x01000000;
1451 code
[1] = 0x80000000;
1454 code
[1] = 0x60020000;
1457 assert(i
->op
== OP_TEX
);
1461 code
[0] |= i
->tex
.r
<< 9;
1462 code
[0] |= i
->tex
.s
<< 17;
1464 int argc
= i
->tex
.target
.getArgCount();
1466 if (i
->op
== OP_TXB
|| i
->op
== OP_TXL
|| i
->op
== OP_TXF
)
1468 if (i
->tex
.target
.isShadow())
1472 code
[0] |= (argc
- 1) << 22;
1474 if (i
->tex
.target
.isCube()) {
1475 code
[0] |= 0x08000000;
1477 if (i
->tex
.useOffsets
) {
1478 code
[1] |= (i
->tex
.offset
[0] & 0xf) << 24;
1479 code
[1] |= (i
->tex
.offset
[1] & 0xf) << 20;
1480 code
[1] |= (i
->tex
.offset
[2] & 0xf) << 16;
1483 code
[0] |= (i
->tex
.mask
& 0x3) << 25;
1484 code
[1] |= (i
->tex
.mask
& 0xc) << 12;
1486 if (i
->tex
.liveOnly
)
1489 defId(i
->def(0), 2);
1495 CodeEmitterNV50::emitTXQ(const TexInstruction
*i
)
1497 assert(i
->tex
.query
== TXQ_DIMS
);
1499 code
[0] = 0xf0000001;
1500 code
[1] = 0x60000000;
1502 code
[0] |= i
->tex
.r
<< 9;
1503 code
[0] |= i
->tex
.s
<< 17;
1505 code
[0] |= (i
->tex
.mask
& 0x3) << 25;
1506 code
[1] |= (i
->tex
.mask
& 0xc) << 12;
1508 defId(i
->def(0), 2);
1514 CodeEmitterNV50::emitTEXPREP(const TexInstruction
*i
)
1516 code
[0] = 0xf8000001 | (3 << 22) | (i
->tex
.s
<< 17) | (i
->tex
.r
<< 9);
1517 code
[1] = 0x60010000;
1519 code
[0] |= (i
->tex
.mask
& 0x3) << 25;
1520 code
[1] |= (i
->tex
.mask
& 0xc) << 12;
1521 defId(i
->def(0), 2);
1527 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction
*i
)
1529 uint32_t pos
= i
->target
.bb
->binPos
+ 8; // +8 to skip an op */
1531 code
[0] = 0x10000003; // bra
1532 code
[1] = 0x00000780; // always
1535 case NV50_IR_SUBOP_EMU_PRERET
+ 0: // bra to the call
1537 case NV50_IR_SUBOP_EMU_PRERET
+ 1: // bra to skip the call
1541 assert(i
->subOp
== (NV50_IR_SUBOP_EMU_PRERET
+ 2));
1542 code
[0] = 0x20000003; // call
1543 code
[1] = 0x00000000; // no predicate
1546 addReloc(RelocEntry::TYPE_CODE
, 0, pos
, 0x07fff800, 9);
1547 addReloc(RelocEntry::TYPE_CODE
, 1, pos
, 0x000fc000, -4);
1551 CodeEmitterNV50::emitFlow(const Instruction
*i
, uint8_t flowOp
)
1553 const FlowInstruction
*f
= i
->asFlow();
1554 bool hasPred
= false;
1555 bool hasTarg
= false;
1557 code
[0] = 0x00000003 | (flowOp
<< 28);
1558 code
[1] = 0x00000000;
1578 if (i
->subOp
>= NV50_IR_SUBOP_EMU_PRERET
) {
1593 if (f
->op
== OP_CALL
) {
1595 pos
= targNV50
->getBuiltinOffset(f
->target
.builtin
);
1597 pos
= f
->target
.fn
->binPos
;
1600 pos
= f
->target
.bb
->binPos
;
1603 code
[0] |= ((pos
>> 2) & 0xffff) << 11;
1604 code
[1] |= ((pos
>> 18) & 0x003f) << 14;
1606 RelocEntry::Type relocTy
;
1608 relocTy
= f
->builtin
? RelocEntry::TYPE_BUILTIN
: RelocEntry::TYPE_CODE
;
1610 addReloc(relocTy
, 0, pos
, 0x07fff800, 9);
1611 addReloc(relocTy
, 1, pos
, 0x000fc000, -4);
1616 CodeEmitterNV50::emitBAR(const Instruction
*i
)
1618 ImmediateValue
*barId
= i
->getSrc(0)->asImm();
1621 code
[0] = 0x82000003 | (barId
->reg
.data
.u32
<< 21);
1622 code
[1] = 0x00004000;
1624 if (i
->subOp
== NV50_IR_SUBOP_BAR_SYNC
)
1629 CodeEmitterNV50::emitATOM(const Instruction
*i
)
1633 case NV50_IR_SUBOP_ATOM_ADD
: subOp
= 0x0; break;
1634 case NV50_IR_SUBOP_ATOM_MIN
: subOp
= 0x7; break;
1635 case NV50_IR_SUBOP_ATOM_MAX
: subOp
= 0x6; break;
1636 case NV50_IR_SUBOP_ATOM_INC
: subOp
= 0x4; break;
1637 case NV50_IR_SUBOP_ATOM_DEC
: subOp
= 0x5; break;
1638 case NV50_IR_SUBOP_ATOM_AND
: subOp
= 0xa; break;
1639 case NV50_IR_SUBOP_ATOM_OR
: subOp
= 0xb; break;
1640 case NV50_IR_SUBOP_ATOM_XOR
: subOp
= 0xc; break;
1641 case NV50_IR_SUBOP_ATOM_CAS
: subOp
= 0x2; break;
1642 case NV50_IR_SUBOP_ATOM_EXCH
: subOp
= 0x1; break;
1644 assert(!"invalid subop");
1647 code
[0] = 0xd0000001;
1648 code
[1] = 0xe0c00000 | (subOp
<< 2);
1649 if (isSignedType(i
->dType
))
1656 if (i
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
1660 code
[0] |= i
->getSrc(0)->reg
.fileIndex
<< 23;
1661 srcId(i
->getIndirect(0, 0), 9);
1665 CodeEmitterNV50::emitInstruction(Instruction
*insn
)
1667 if (!insn
->encSize
) {
1668 ERROR("skipping unencodable instruction: "); insn
->print();
1671 if (codeSize
+ insn
->encSize
> codeSizeLimit
) {
1672 ERROR("code emitter output buffer too small\n");
1676 if (insn
->bb
->getProgram()->dbgFlags
& NV50_IR_DEBUG_BASIC
) {
1677 INFO("EMIT: "); insn
->print();
1709 if (isFloatType(insn
->dType
))
1711 else if (insn
->getDef(0)->reg
.file
== FILE_ADDRESS
)
1717 if (isFloatType(insn
->dType
))
1724 if (isFloatType(insn
->dType
))
1760 if (insn
->def(0).getFile() == FILE_ADDRESS
)
1763 if (insn
->def(0).getFile() == FILE_FLAGS
||
1764 insn
->src(0).getFile() == FILE_FLAGS
||
1765 insn
->src(0).getFile() == FILE_ADDRESS
)
1798 emitTEX(insn
->asTex());
1801 emitTXQ(insn
->asTex());
1804 emitTEXPREP(insn
->asTex());
1811 emitFlow(insn
, 0x0);
1814 emitFlow(insn
, 0x1);
1817 emitFlow(insn
, 0x2);
1820 emitFlow(insn
, 0x3);
1823 emitFlow(insn
, 0x4);
1826 emitFlow(insn
, 0x5);
1829 emitFlow(insn
, 0x6);
1832 emitFlow(insn
, 0x7);
1835 emitFlow(insn
, 0xa);
1838 emitFlow(insn
, 0xd);
1841 emitQUADOP(insn
, insn
->lanes
, insn
->subOp
);
1844 emitQUADOP(insn
, 4, insn
->src(0).mod
.neg() ? 0x66 : 0x99);
1847 emitQUADOP(insn
, 5, insn
->src(0).mod
.neg() ? 0x5a : 0xa5);
1858 ERROR("operation should have been eliminated\n");
1872 ERROR("operation should have been lowered\n");
1875 ERROR("unknown op: %u\n", insn
->op
);
1878 if (insn
->join
|| insn
->op
== OP_JOIN
)
1881 if (insn
->exit
|| insn
->op
== OP_EXIT
)
1884 assert((insn
->encSize
== 8) == (code
[0] & 1));
1886 code
+= insn
->encSize
/ 4;
1887 codeSize
+= insn
->encSize
;
1892 CodeEmitterNV50::getMinEncodingSize(const Instruction
*i
) const
1894 const Target::OpInfo
&info
= targ
->getOpInfo(i
);
1896 if (info
.minEncSize
> 4)
1899 // check constraints on dst and src operands
1900 for (int d
= 0; i
->defExists(d
); ++d
) {
1901 if (i
->def(d
).rep()->reg
.data
.id
> 63 ||
1902 i
->def(d
).rep()->reg
.file
!= FILE_GPR
)
1906 for (int s
= 0; i
->srcExists(s
); ++s
) {
1907 DataFile sf
= i
->src(s
).getFile();
1909 if (sf
!= FILE_SHADER_INPUT
|| progType
!= Program::TYPE_FRAGMENT
)
1911 if (i
->src(s
).rep()->reg
.data
.id
> 63)
1915 // check modifiers & rounding
1916 if (i
->join
|| i
->lanes
!= 0xf || i
->exit
)
1918 if (i
->op
== OP_MUL
&& i
->rnd
!= ROUND_N
)
1922 return 8; // TODO: short tex encoding
1924 // check constraints on short MAD
1925 if (info
.srcNr
>= 2 && i
->srcExists(2)) {
1926 if (i
->saturate
|| i
->src(2).mod
)
1928 if ((i
->src(0).mod
^ i
->src(1).mod
) ||
1929 (i
->src(0).mod
| i
->src(1).mod
).abs())
1931 if (!i
->defExists(0) ||
1932 i
->def(0).rep()->reg
.data
.id
!= i
->src(2).rep()->reg
.data
.id
)
1936 return info
.minEncSize
;
1939 // Change the encoding size of an instruction after BBs have been scheduled.
1941 makeInstructionLong(Instruction
*insn
)
1943 if (insn
->encSize
== 8)
1945 Function
*fn
= insn
->bb
->getFunction();
1949 for (Instruction
*i
= insn
->next
; i
&& i
->encSize
== 4; ++n
, i
= i
->next
);
1953 insn
->next
->encSize
= 8;
1955 if (insn
->prev
&& insn
->prev
->encSize
== 4) {
1957 insn
->prev
->encSize
= 8;
1961 for (int i
= fn
->bbCount
- 1; i
>= 0 && fn
->bbArray
[i
] != insn
->bb
; --i
) {
1962 fn
->bbArray
[i
]->binPos
+= 4;
1965 insn
->bb
->binSize
+= adj
;
1969 trySetExitModifier(Instruction
*insn
)
1971 if (insn
->op
== OP_DISCARD
||
1972 insn
->op
== OP_QUADON
||
1973 insn
->op
== OP_QUADPOP
)
1975 for (int s
= 0; insn
->srcExists(s
); ++s
)
1976 if (insn
->src(s
).getFile() == FILE_IMMEDIATE
)
1978 if (insn
->asFlow()) {
1979 if (insn
->op
== OP_CALL
) // side effects !
1981 if (insn
->getPredicate()) // cannot do conditional exit (or can we ?)
1986 makeInstructionLong(insn
);
1991 replaceExitWithModifier(Function
*func
)
1993 BasicBlock
*epilogue
= BasicBlock::get(func
->cfgExit
);
1995 if (!epilogue
->getExit() ||
1996 epilogue
->getExit()->op
!= OP_EXIT
) // only main will use OP_EXIT
1999 if (epilogue
->getEntry()->op
!= OP_EXIT
) {
2000 Instruction
*insn
= epilogue
->getExit()->prev
;
2001 if (!insn
|| !trySetExitModifier(insn
))
2005 for (Graph::EdgeIterator ei
= func
->cfgExit
->incident();
2006 !ei
.end(); ei
.next()) {
2007 BasicBlock
*bb
= BasicBlock::get(ei
.getNode());
2008 Instruction
*i
= bb
->getExit();
2010 if (!i
|| !trySetExitModifier(i
))
2014 epilogue
->binSize
-= 8;
2016 delete_Instruction(func
->getProgram(), epilogue
->getExit());
2020 CodeEmitterNV50::prepareEmission(Function
*func
)
2022 CodeEmitter::prepareEmission(func
);
2024 replaceExitWithModifier(func
);
2027 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50
*target
) :
2028 CodeEmitter(target
), targNV50(target
)
2030 targ
= target
; // specialized
2032 codeSize
= codeSizeLimit
= 0;
2037 TargetNV50::getCodeEmitter(Program::Type type
)
2039 CodeEmitterNV50
*emit
= new CodeEmitterNV50(this);
2040 emit
->setProgramType(type
);
2044 } // namespace nv50_ir