2 * Copyright 2014 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
25 #include "codegen/nv50_ir_target_gm107.h"
27 //#define GM107_DEBUG_SCHED_DATA
31 class CodeEmitterGM107
: public CodeEmitter
34 CodeEmitterGM107(const TargetGM107
*);
36 virtual bool emitInstruction(Instruction
*);
37 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
39 virtual void prepareEmission(Program
*);
40 virtual void prepareEmission(Function
*);
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
45 const TargetGM107
*targGM107
;
47 Program::Type progType
;
49 const Instruction
*insn
;
50 const bool writeIssueDelays
;
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b
, int s
, uint32_t v
) { emitField(code
, b
, s
, v
); }
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o
) { emitInsn(o
, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value
*);
61 inline void emitGPR(int pos
) {
62 emitGPR(pos
, (const Value
*)NULL
);
64 inline void emitGPR(int pos
, const ValueRef
&ref
) {
65 emitGPR(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
67 inline void emitGPR(int pos
, const ValueRef
*ref
) {
68 emitGPR(pos
, ref
? ref
->rep() : (const Value
*)NULL
);
70 inline void emitGPR(int pos
, const ValueDef
&def
) {
71 emitGPR(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
73 inline void emitSYS(int, const Value
*);
74 inline void emitSYS(int pos
, const ValueRef
&ref
) {
75 emitSYS(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
77 inline void emitPRED(int, const Value
*);
78 inline void emitPRED(int pos
) {
79 emitPRED(pos
, (const Value
*)NULL
);
81 inline void emitPRED(int pos
, const ValueRef
&ref
) {
82 emitPRED(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
84 inline void emitPRED(int pos
, const ValueDef
&def
) {
85 emitPRED(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
87 inline void emitADDR(int, int, int, int, const ValueRef
&);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef
&);
89 inline bool longIMMD(const ValueRef
&);
90 inline void emitIMMD(int, int, const ValueRef
&);
92 void emitCond3(int, CondCode
);
93 void emitCond4(int, CondCode
);
94 void emitCond5(int pos
, CondCode cc
) { emitCond4(pos
, cc
); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef
&);
101 inline void emitNEG(int, const ValueRef
&);
102 inline void emitNEG2(int, const ValueRef
&, const ValueRef
&);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode
, int);
105 inline void emitRND(int pos
) {
106 emitRND(pos
, insn
->rnd
, -1);
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef
&);
170 void emitLDSTs(int, DataType
);
210 void emitSUHandle(const int s
);
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
221 CodeEmitterGM107::emitField(uint32_t *data
, int b
, int s
, uint32_t v
)
224 uint32_t m
= ((1ULL << s
) - 1);
225 uint64_t d
= (uint64_t)(v
& m
) << b
;
226 assert(!(v
& ~m
) || (v
& ~m
) == ~m
);
233 CodeEmitterGM107::emitPred()
235 if (insn
->predSrc
>= 0) {
236 emitField(16, 3, insn
->getSrc(insn
->predSrc
)->rep()->reg
.data
.id
);
237 emitField(19, 1, insn
->cc
== CC_NOT_P
);
244 CodeEmitterGM107::emitInsn(uint32_t hi
, bool pred
)
246 code
[0] = 0x00000000;
253 CodeEmitterGM107::emitGPR(int pos
, const Value
*val
)
255 emitField(pos
, 8, val
&& !val
->inFile(FILE_FLAGS
) ?
256 val
->reg
.data
.id
: 255);
260 CodeEmitterGM107::emitSYS(int pos
, const Value
*val
)
262 int id
= val
? val
->reg
.data
.id
: -1;
265 case SV_LANEID
: id
= 0x00; break;
266 case SV_VERTEX_COUNT
: id
= 0x10; break;
267 case SV_INVOCATION_ID
: id
= 0x11; break;
268 case SV_THREAD_KILL
: id
= 0x13; break;
269 case SV_INVOCATION_INFO
: id
= 0x1d; break;
270 case SV_TID
: id
= 0x21 + val
->reg
.data
.sv
.index
; break;
271 case SV_CTAID
: id
= 0x25 + val
->reg
.data
.sv
.index
; break;
273 assert(!"invalid system value");
278 emitField(pos
, 8, id
);
282 CodeEmitterGM107::emitPRED(int pos
, const Value
*val
)
284 emitField(pos
, 3, val
? val
->reg
.data
.id
: 7);
288 CodeEmitterGM107::emitADDR(int gpr
, int off
, int len
, int shr
,
291 const Value
*v
= ref
.get();
292 assert(!(v
->reg
.data
.offset
& ((1 << shr
) - 1)));
294 emitGPR(gpr
, ref
.getIndirect(0));
295 emitField(off
, len
, v
->reg
.data
.offset
>> shr
);
299 CodeEmitterGM107::emitCBUF(int buf
, int gpr
, int off
, int len
, int shr
,
302 const Value
*v
= ref
.get();
303 const Symbol
*s
= v
->asSym();
305 assert(!(s
->reg
.data
.offset
& ((1 << shr
) - 1)));
307 emitField(buf
, 5, v
->reg
.fileIndex
);
309 emitGPR(gpr
, ref
.getIndirect(0));
310 emitField(off
, 16, s
->reg
.data
.offset
>> shr
);
314 CodeEmitterGM107::longIMMD(const ValueRef
&ref
)
316 if (ref
.getFile() == FILE_IMMEDIATE
) {
317 const ImmediateValue
*imm
= ref
.get()->asImm();
318 if (isFloatType(insn
->sType
)) {
319 if ((imm
->reg
.data
.u32
& 0x00000fff) != 0x00000000)
322 if ((imm
->reg
.data
.u32
& 0xfff00000) != 0x00000000 &&
323 (imm
->reg
.data
.u32
& 0xfff00000) != 0xfff00000)
331 CodeEmitterGM107::emitIMMD(int pos
, int len
, const ValueRef
&ref
)
333 const ImmediateValue
*imm
= ref
.get()->asImm();
334 uint32_t val
= imm
->reg
.data
.u32
;
337 if (insn
->sType
== TYPE_F32
|| insn
->sType
== TYPE_F16
) {
338 assert(!(val
& 0x00000fff));
340 } else if (insn
->sType
== TYPE_F64
) {
341 assert(!(imm
->reg
.data
.u64
& 0x00000fffffffffffULL
));
342 val
= imm
->reg
.data
.u64
>> 44;
344 assert(!(val
& 0xfff00000) || (val
& 0xfff00000) == 0xfff00000);
345 emitField( 56, 1, (val
& 0x80000) >> 19);
346 emitField(pos
, len
, (val
& 0x7ffff));
348 emitField(pos
, len
, val
);
352 /*******************************************************************************
354 ******************************************************************************/
357 CodeEmitterGM107::emitCond3(int pos
, CondCode code
)
362 case CC_FL
: data
= 0x00; break;
364 case CC_LT
: data
= 0x01; break;
366 case CC_EQ
: data
= 0x02; break;
368 case CC_LE
: data
= 0x03; break;
370 case CC_GT
: data
= 0x04; break;
372 case CC_NE
: data
= 0x05; break;
374 case CC_GE
: data
= 0x06; break;
375 case CC_TR
: data
= 0x07; break;
377 assert(!"invalid cond3");
381 emitField(pos
, 3, data
);
385 CodeEmitterGM107::emitCond4(int pos
, CondCode code
)
390 case CC_FL
: data
= 0x00; break;
391 case CC_LT
: data
= 0x01; break;
392 case CC_EQ
: data
= 0x02; break;
393 case CC_LE
: data
= 0x03; break;
394 case CC_GT
: data
= 0x04; break;
395 case CC_NE
: data
= 0x05; break;
396 case CC_GE
: data
= 0x06; break;
397 // case CC_NUM: data = 0x07; break;
398 // case CC_NAN: data = 0x08; break;
399 case CC_LTU
: data
= 0x09; break;
400 case CC_EQU
: data
= 0x0a; break;
401 case CC_LEU
: data
= 0x0b; break;
402 case CC_GTU
: data
= 0x0c; break;
403 case CC_NEU
: data
= 0x0d; break;
404 case CC_GEU
: data
= 0x0e; break;
405 case CC_TR
: data
= 0x0f; break;
407 assert(!"invalid cond4");
411 emitField(pos
, 4, data
);
415 CodeEmitterGM107::emitO(int pos
)
417 emitField(pos
, 1, insn
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
);
421 CodeEmitterGM107::emitP(int pos
)
423 emitField(pos
, 1, insn
->perPatch
);
427 CodeEmitterGM107::emitSAT(int pos
)
429 emitField(pos
, 1, insn
->saturate
);
433 CodeEmitterGM107::emitCC(int pos
)
435 emitField(pos
, 1, insn
->flagsDef
>= 0);
439 CodeEmitterGM107::emitX(int pos
)
441 emitField(pos
, 1, insn
->flagsSrc
>= 0);
445 CodeEmitterGM107::emitABS(int pos
, const ValueRef
&ref
)
447 emitField(pos
, 1, ref
.mod
.abs());
451 CodeEmitterGM107::emitNEG(int pos
, const ValueRef
&ref
)
453 emitField(pos
, 1, ref
.mod
.neg());
457 CodeEmitterGM107::emitNEG2(int pos
, const ValueRef
&a
, const ValueRef
&b
)
459 emitField(pos
, 1, a
.mod
.neg() ^ b
.mod
.neg());
463 CodeEmitterGM107::emitFMZ(int pos
, int len
)
465 emitField(pos
, len
, insn
->dnz
<< 1 | insn
->ftz
);
469 CodeEmitterGM107::emitRND(int rmp
, RoundMode rnd
, int rip
)
473 case ROUND_NI
: ri
= 1;
474 case ROUND_N
: rm
= 0; break;
475 case ROUND_MI
: ri
= 1;
476 case ROUND_M
: rm
= 1; break;
477 case ROUND_PI
: ri
= 1;
478 case ROUND_P
: rm
= 2; break;
479 case ROUND_ZI
: ri
= 1;
480 case ROUND_Z
: rm
= 3; break;
482 assert(!"invalid round mode");
485 emitField(rip
, 1, ri
);
486 emitField(rmp
, 2, rm
);
490 CodeEmitterGM107::emitPDIV(int pos
)
492 assert(insn
->postFactor
>= -3 && insn
->postFactor
<= 3);
493 if (insn
->postFactor
> 0)
494 emitField(pos
, 3, 7 - insn
->postFactor
);
496 emitField(pos
, 3, 0 - insn
->postFactor
);
500 CodeEmitterGM107::emitINV(int pos
, const ValueRef
&ref
)
502 emitField(pos
, 1, !!(ref
.mod
& Modifier(NV50_IR_MOD_NOT
)));
505 /*******************************************************************************
507 ******************************************************************************/
510 CodeEmitterGM107::emitEXIT()
512 emitInsn (0xe3000000);
513 emitCond5(0x00, CC_TR
);
517 CodeEmitterGM107::emitBRA()
519 const FlowInstruction
*insn
= this->insn
->asFlow();
522 if (insn
->indirect
) {
524 emitInsn(0xe2000000); // JMX
526 emitInsn(0xe2500000); // BRX
530 emitInsn(0xe2100000); // JMP
532 emitInsn(0xe2400000); // BRA
533 emitField(0x07, 1, insn
->allWarp
);
536 emitField(0x06, 1, insn
->limit
);
537 emitCond5(0x00, CC_TR
);
539 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
540 int32_t pos
= insn
->target
.bb
->binPos
;
541 if (writeIssueDelays
&& !(pos
& 0x1f))
544 emitField(0x14, 24, pos
- (codeSize
+ 8));
546 emitField(0x14, 32, pos
);
548 emitCBUF (0x24, gpr
, 20, 16, 0, insn
->src(0));
549 emitField(0x05, 1, 1);
554 CodeEmitterGM107::emitCAL()
556 const FlowInstruction
*insn
= this->insn
->asFlow();
558 if (insn
->absolute
) {
559 emitInsn(0xe2200000, 0); // JCAL
561 emitInsn(0xe2600000, 0); // CAL
564 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
566 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
569 int pcAbs
= targGM107
->getBuiltinOffset(insn
->target
.builtin
);
570 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfff00000, 20);
571 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x000fffff, -12);
573 emitField(0x14, 32, insn
->target
.bb
->binPos
);
577 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
578 emitField(0x05, 1, 1);
583 CodeEmitterGM107::emitPCNT()
585 const FlowInstruction
*insn
= this->insn
->asFlow();
587 emitInsn(0xe2b00000, 0);
589 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
590 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
592 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
593 emitField(0x05, 1, 1);
598 CodeEmitterGM107::emitCONT()
600 emitInsn (0xe3500000);
601 emitCond5(0x00, CC_TR
);
605 CodeEmitterGM107::emitPBK()
607 const FlowInstruction
*insn
= this->insn
->asFlow();
609 emitInsn(0xe2a00000, 0);
611 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
612 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
614 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
615 emitField(0x05, 1, 1);
620 CodeEmitterGM107::emitBRK()
622 emitInsn (0xe3400000);
623 emitCond5(0x00, CC_TR
);
627 CodeEmitterGM107::emitPRET()
629 const FlowInstruction
*insn
= this->insn
->asFlow();
631 emitInsn(0xe2700000, 0);
633 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
634 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
636 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
637 emitField(0x05, 1, 1);
642 CodeEmitterGM107::emitRET()
644 emitInsn (0xe3200000);
645 emitCond5(0x00, CC_TR
);
649 CodeEmitterGM107::emitSSY()
651 const FlowInstruction
*insn
= this->insn
->asFlow();
653 emitInsn(0xe2900000, 0);
655 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
656 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
658 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
659 emitField(0x05, 1, 1);
664 CodeEmitterGM107::emitSYNC()
666 emitInsn (0xf0f80000);
667 emitCond5(0x00, CC_TR
);
671 CodeEmitterGM107::emitSAM()
673 emitInsn(0xe3700000, 0);
677 CodeEmitterGM107::emitRAM()
679 emitInsn(0xe3800000, 0);
682 /*******************************************************************************
684 ******************************************************************************/
686 /*******************************************************************************
687 * movement / conversion
688 ******************************************************************************/
691 CodeEmitterGM107::emitMOV()
693 if (insn
->src(0).getFile() != FILE_IMMEDIATE
) {
694 switch (insn
->src(0).getFile()) {
696 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
697 emitInsn(0x5b6a0000);
700 emitInsn(0x5c980000);
702 emitGPR (0x14, insn
->src(0));
704 case FILE_MEMORY_CONST
:
705 emitInsn(0x4c980000);
706 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
709 emitInsn(0x38980000);
710 emitIMMD(0x14, 19, insn
->src(0));
713 emitInsn(0x50880000);
714 emitPRED(0x0c, insn
->src(0));
719 assert(!"bad src file");
722 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
723 insn
->src(0).getFile() != FILE_PREDICATE
)
724 emitField(0x27, 4, insn
->lanes
);
726 emitInsn (0x01000000);
727 emitIMMD (0x14, 32, insn
->src(0));
728 emitField(0x0c, 4, insn
->lanes
);
731 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
733 emitPRED(0x03, insn
->def(0));
736 emitGPR(0x00, insn
->def(0));
741 CodeEmitterGM107::emitS2R()
743 emitInsn(0xf0c80000);
744 emitSYS (0x14, insn
->src(0));
745 emitGPR (0x00, insn
->def(0));
749 CodeEmitterGM107::emitF2F()
751 RoundMode rnd
= insn
->rnd
;
754 case OP_FLOOR
: rnd
= ROUND_MI
; break;
755 case OP_CEIL
: rnd
= ROUND_PI
; break;
756 case OP_TRUNC
: rnd
= ROUND_ZI
; break;
761 switch (insn
->src(0).getFile()) {
763 emitInsn(0x5ca80000);
764 emitGPR (0x14, insn
->src(0));
766 case FILE_MEMORY_CONST
:
767 emitInsn(0x4ca80000);
768 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
771 emitInsn(0x38a80000);
772 emitIMMD(0x14, 19, insn
->src(0));
775 assert(!"bad src0 file");
779 emitField(0x32, 1, (insn
->op
== OP_SAT
) || insn
->saturate
);
780 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
782 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
784 emitField(0x29, 1, insn
->subOp
);
785 emitRND (0x27, rnd
, 0x2a);
786 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
787 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
788 emitGPR (0x00, insn
->def(0));
792 CodeEmitterGM107::emitF2I()
794 RoundMode rnd
= insn
->rnd
;
797 case OP_FLOOR
: rnd
= ROUND_M
; break;
798 case OP_CEIL
: rnd
= ROUND_P
; break;
799 case OP_TRUNC
: rnd
= ROUND_Z
; break;
804 switch (insn
->src(0).getFile()) {
806 emitInsn(0x5cb00000);
807 emitGPR (0x14, insn
->src(0));
809 case FILE_MEMORY_CONST
:
810 emitInsn(0x4cb00000);
811 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
814 emitInsn(0x38b00000);
815 emitIMMD(0x14, 19, insn
->src(0));
818 assert(!"bad src0 file");
822 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
824 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
826 emitRND (0x27, rnd
, 0x2a);
827 emitField(0x0c, 1, isSignedType(insn
->dType
));
828 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
829 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
830 emitGPR (0x00, insn
->def(0));
834 CodeEmitterGM107::emitI2F()
836 RoundMode rnd
= insn
->rnd
;
839 case OP_FLOOR
: rnd
= ROUND_M
; break;
840 case OP_CEIL
: rnd
= ROUND_P
; break;
841 case OP_TRUNC
: rnd
= ROUND_Z
; break;
846 switch (insn
->src(0).getFile()) {
848 emitInsn(0x5cb80000);
849 emitGPR (0x14, insn
->src(0));
851 case FILE_MEMORY_CONST
:
852 emitInsn(0x4cb80000);
853 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
856 emitInsn(0x38b80000);
857 emitIMMD(0x14, 19, insn
->src(0));
860 assert(!"bad src0 file");
864 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
866 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
867 emitField(0x29, 2, insn
->subOp
);
868 emitRND (0x27, rnd
, -1);
869 emitField(0x0d, 1, isSignedType(insn
->sType
));
870 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
871 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
872 emitGPR (0x00, insn
->def(0));
876 CodeEmitterGM107::emitI2I()
878 switch (insn
->src(0).getFile()) {
880 emitInsn(0x5ce00000);
881 emitGPR (0x14, insn
->src(0));
883 case FILE_MEMORY_CONST
:
884 emitInsn(0x4ce00000);
885 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
888 emitInsn(0x38e00000);
889 emitIMMD(0x14, 19, insn
->src(0));
892 assert(!"bad src0 file");
897 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
899 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
900 emitField(0x29, 2, insn
->subOp
);
901 emitField(0x0d, 1, isSignedType(insn
->sType
));
902 emitField(0x0c, 1, isSignedType(insn
->dType
));
903 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
904 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
905 emitGPR (0x00, insn
->def(0));
909 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
911 int loc
= entry
->loc
;
912 if (data
.force_persample_interp
)
913 code
[loc
+ 1] |= 1 << 10;
915 code
[loc
+ 1] &= ~(1 << 10);
919 CodeEmitterGM107::emitSEL()
921 switch (insn
->src(1).getFile()) {
923 emitInsn(0x5ca00000);
924 emitGPR (0x14, insn
->src(1));
926 case FILE_MEMORY_CONST
:
927 emitInsn(0x4ca00000);
928 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
931 emitInsn(0x38a00000);
932 emitIMMD(0x14, 19, insn
->src(1));
935 assert(!"bad src1 file");
939 emitINV (0x2a, insn
->src(2));
940 emitPRED(0x27, insn
->src(2));
941 emitGPR (0x08, insn
->src(0));
942 emitGPR (0x00, insn
->def(0));
944 if (insn
->subOp
== 1) {
945 addInterp(0, 0, selpFlip
);
950 CodeEmitterGM107::emitSHFL()
954 emitInsn (0xef100000);
956 switch (insn
->src(1).getFile()) {
958 emitGPR(0x14, insn
->src(1));
961 emitIMMD(0x14, 5, insn
->src(1));
965 assert(!"invalid src1 file");
969 /*XXX: what is this arg? hardcode immediate for now */
970 emitField(0x22, 13, 0x1c03);
974 emitField(0x1e, 2, insn
->subOp
);
975 emitField(0x1c, 2, type
);
976 emitGPR (0x08, insn
->src(0));
977 emitGPR (0x00, insn
->def(0));
980 /*******************************************************************************
982 ******************************************************************************/
985 CodeEmitterGM107::emitDADD()
987 switch (insn
->src(1).getFile()) {
989 emitInsn(0x5c700000);
990 emitGPR (0x14, insn
->src(1));
992 case FILE_MEMORY_CONST
:
993 emitInsn(0x4c700000);
994 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
997 emitInsn(0x38700000);
998 emitIMMD(0x14, 19, insn
->src(1));
1001 assert(!"bad src1 file");
1004 emitABS(0x31, insn
->src(1));
1005 emitNEG(0x30, insn
->src(0));
1007 emitABS(0x2e, insn
->src(0));
1008 emitNEG(0x2d, insn
->src(1));
1010 if (insn
->op
== OP_SUB
)
1011 code
[1] ^= 0x00002000;
1013 emitGPR(0x08, insn
->src(0));
1014 emitGPR(0x00, insn
->def(0));
1018 CodeEmitterGM107::emitDMUL()
1020 switch (insn
->src(1).getFile()) {
1022 emitInsn(0x5c800000);
1023 emitGPR (0x14, insn
->src(1));
1025 case FILE_MEMORY_CONST
:
1026 emitInsn(0x4c800000);
1027 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1029 case FILE_IMMEDIATE
:
1030 emitInsn(0x38800000);
1031 emitIMMD(0x14, 19, insn
->src(1));
1034 assert(!"bad src1 file");
1038 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1041 emitGPR (0x08, insn
->src(0));
1042 emitGPR (0x00, insn
->def(0));
1046 CodeEmitterGM107::emitDFMA()
1048 switch(insn
->src(2).getFile()) {
1050 switch (insn
->src(1).getFile()) {
1052 emitInsn(0x5b700000);
1053 emitGPR (0x14, insn
->src(1));
1055 case FILE_MEMORY_CONST
:
1056 emitInsn(0x4b700000);
1057 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1059 case FILE_IMMEDIATE
:
1060 emitInsn(0x36700000);
1061 emitIMMD(0x14, 19, insn
->src(1));
1064 assert(!"bad src1 file");
1067 emitGPR (0x27, insn
->src(2));
1069 case FILE_MEMORY_CONST
:
1070 emitInsn(0x53700000);
1071 emitGPR (0x27, insn
->src(1));
1072 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1075 assert(!"bad src2 file");
1080 emitNEG (0x31, insn
->src(2));
1081 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1083 emitGPR (0x08, insn
->src(0));
1084 emitGPR (0x00, insn
->def(0));
1088 CodeEmitterGM107::emitDMNMX()
1090 switch (insn
->src(1).getFile()) {
1092 emitInsn(0x5c500000);
1093 emitGPR (0x14, insn
->src(1));
1095 case FILE_MEMORY_CONST
:
1096 emitInsn(0x4c500000);
1097 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1099 case FILE_IMMEDIATE
:
1100 emitInsn(0x38500000);
1101 emitIMMD(0x14, 19, insn
->src(1));
1104 assert(!"bad src1 file");
1108 emitABS (0x31, insn
->src(1));
1109 emitNEG (0x30, insn
->src(0));
1111 emitABS (0x2e, insn
->src(0));
1112 emitNEG (0x2d, insn
->src(1));
1113 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1115 emitGPR (0x08, insn
->src(0));
1116 emitGPR (0x00, insn
->def(0));
1120 CodeEmitterGM107::emitDSET()
1122 const CmpInstruction
*insn
= this->insn
->asCmp();
1124 switch (insn
->src(1).getFile()) {
1126 emitInsn(0x59000000);
1127 emitGPR (0x14, insn
->src(1));
1129 case FILE_MEMORY_CONST
:
1130 emitInsn(0x49000000);
1131 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1133 case FILE_IMMEDIATE
:
1134 emitInsn(0x32000000);
1135 emitIMMD(0x14, 19, insn
->src(1));
1138 assert(!"bad src1 file");
1142 if (insn
->op
!= OP_SET
) {
1144 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1145 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1146 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1148 assert(!"invalid set op");
1151 emitPRED(0x27, insn
->src(2));
1156 emitABS (0x36, insn
->src(0));
1157 emitNEG (0x35, insn
->src(1));
1158 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1159 emitCond4(0x30, insn
->setCond
);
1161 emitABS (0x2c, insn
->src(1));
1162 emitNEG (0x2b, insn
->src(0));
1163 emitGPR (0x08, insn
->src(0));
1164 emitGPR (0x00, insn
->def(0));
1168 CodeEmitterGM107::emitDSETP()
1170 const CmpInstruction
*insn
= this->insn
->asCmp();
1172 switch (insn
->src(1).getFile()) {
1174 emitInsn(0x5b800000);
1175 emitGPR (0x14, insn
->src(1));
1177 case FILE_MEMORY_CONST
:
1178 emitInsn(0x4b800000);
1179 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1181 case FILE_IMMEDIATE
:
1182 emitInsn(0x36800000);
1183 emitIMMD(0x14, 19, insn
->src(1));
1186 assert(!"bad src1 file");
1190 if (insn
->op
!= OP_SET
) {
1192 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1193 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1194 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1196 assert(!"invalid set op");
1199 emitPRED(0x27, insn
->src(2));
1204 emitCond4(0x30, insn
->setCond
);
1205 emitABS (0x2c, insn
->src(1));
1206 emitNEG (0x2b, insn
->src(0));
1207 emitGPR (0x08, insn
->src(0));
1208 emitABS (0x07, insn
->src(0));
1209 emitNEG (0x06, insn
->src(1));
1210 emitPRED (0x03, insn
->def(0));
1211 if (insn
->defExists(1))
1212 emitPRED(0x00, insn
->def(1));
1217 /*******************************************************************************
1219 ******************************************************************************/
1222 CodeEmitterGM107::emitFADD()
1224 if (!longIMMD(insn
->src(1))) {
1225 switch (insn
->src(1).getFile()) {
1227 emitInsn(0x5c580000);
1228 emitGPR (0x14, insn
->src(1));
1230 case FILE_MEMORY_CONST
:
1231 emitInsn(0x4c580000);
1232 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1234 case FILE_IMMEDIATE
:
1235 emitInsn(0x38580000);
1236 emitIMMD(0x14, 19, insn
->src(1));
1239 assert(!"bad src1 file");
1243 emitABS(0x31, insn
->src(1));
1244 emitNEG(0x30, insn
->src(0));
1246 emitABS(0x2e, insn
->src(0));
1247 emitNEG(0x2d, insn
->src(1));
1250 if (insn
->op
== OP_SUB
)
1251 code
[1] ^= 0x00002000;
1253 emitInsn(0x08000000);
1254 emitABS(0x39, insn
->src(1));
1255 emitNEG(0x38, insn
->src(0));
1257 emitABS(0x36, insn
->src(0));
1258 emitNEG(0x35, insn
->src(1));
1260 emitIMMD(0x14, 32, insn
->src(1));
1262 if (insn
->op
== OP_SUB
)
1263 code
[1] ^= 0x00080000;
1266 emitGPR(0x08, insn
->src(0));
1267 emitGPR(0x00, insn
->def(0));
1271 CodeEmitterGM107::emitFMUL()
1273 if (!longIMMD(insn
->src(1))) {
1274 switch (insn
->src(1).getFile()) {
1276 emitInsn(0x5c680000);
1277 emitGPR (0x14, insn
->src(1));
1279 case FILE_MEMORY_CONST
:
1280 emitInsn(0x4c680000);
1281 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1283 case FILE_IMMEDIATE
:
1284 emitInsn(0x38680000);
1285 emitIMMD(0x14, 19, insn
->src(1));
1288 assert(!"bad src1 file");
1292 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1298 emitInsn(0x1e000000);
1302 emitIMMD(0x14, 32, insn
->src(1));
1303 if (insn
->src(0).mod
.neg() ^ insn
->src(1).mod
.neg())
1304 code
[1] ^= 0x00080000; /* flip immd sign bit */
1307 emitGPR(0x08, insn
->src(0));
1308 emitGPR(0x00, insn
->def(0));
1312 CodeEmitterGM107::emitFFMA()
1314 bool isLongIMMD
= false;
1315 switch(insn
->src(2).getFile()) {
1317 switch (insn
->src(1).getFile()) {
1319 emitInsn(0x59800000);
1320 emitGPR (0x14, insn
->src(1));
1322 case FILE_MEMORY_CONST
:
1323 emitInsn(0x49800000);
1324 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1326 case FILE_IMMEDIATE
:
1327 if (longIMMD(insn
->getSrc(1))) {
1328 assert(insn
->getDef(0)->reg
.data
.id
== insn
->getSrc(2)->reg
.data
.id
);
1330 emitInsn(0x0c000000);
1331 emitIMMD(0x14, 32, insn
->src(1));
1333 emitInsn(0x32800000);
1334 emitIMMD(0x14, 19, insn
->src(1));
1338 assert(!"bad src1 file");
1342 emitGPR (0x27, insn
->src(2));
1344 case FILE_MEMORY_CONST
:
1345 emitInsn(0x51800000);
1346 emitGPR (0x27, insn
->src(1));
1347 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1350 assert(!"bad src2 file");
1355 emitNEG (0x39, insn
->src(2));
1356 emitNEG2(0x38, insn
->src(0), insn
->src(1));
1362 emitNEG (0x31, insn
->src(2));
1363 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1368 emitGPR(0x08, insn
->src(0));
1369 emitGPR(0x00, insn
->def(0));
1373 CodeEmitterGM107::emitMUFU()
1378 case OP_COS
: mufu
= 0; break;
1379 case OP_SIN
: mufu
= 1; break;
1380 case OP_EX2
: mufu
= 2; break;
1381 case OP_LG2
: mufu
= 3; break;
1382 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
1383 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
1385 assert(!"invalid mufu");
1389 emitInsn (0x50800000);
1391 emitNEG (0x30, insn
->src(0));
1392 emitABS (0x2e, insn
->src(0));
1393 emitField(0x14, 3, mufu
);
1394 emitGPR (0x08, insn
->src(0));
1395 emitGPR (0x00, insn
->def(0));
1399 CodeEmitterGM107::emitFMNMX()
1401 switch (insn
->src(1).getFile()) {
1403 emitInsn(0x5c600000);
1404 emitGPR (0x14, insn
->src(1));
1406 case FILE_MEMORY_CONST
:
1407 emitInsn(0x4c600000);
1408 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1410 case FILE_IMMEDIATE
:
1411 emitInsn(0x38600000);
1412 emitIMMD(0x14, 19, insn
->src(1));
1415 assert(!"bad src1 file");
1419 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1422 emitABS(0x31, insn
->src(1));
1423 emitNEG(0x30, insn
->src(0));
1425 emitABS(0x2e, insn
->src(0));
1426 emitNEG(0x2d, insn
->src(1));
1428 emitGPR(0x08, insn
->src(0));
1429 emitGPR(0x00, insn
->def(0));
1433 CodeEmitterGM107::emitRRO()
1435 switch (insn
->src(0).getFile()) {
1437 emitInsn(0x5c900000);
1438 emitGPR (0x14, insn
->src(0));
1440 case FILE_MEMORY_CONST
:
1441 emitInsn(0x4c900000);
1442 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1444 case FILE_IMMEDIATE
:
1445 emitInsn(0x38900000);
1446 emitIMMD(0x14, 19, insn
->src(0));
1449 assert(!"bad src file");
1453 emitABS (0x31, insn
->src(0));
1454 emitNEG (0x2d, insn
->src(0));
1455 emitField(0x27, 1, insn
->op
== OP_PREEX2
);
1456 emitGPR (0x00, insn
->def(0));
1460 CodeEmitterGM107::emitFCMP()
1462 const CmpInstruction
*insn
= this->insn
->asCmp();
1463 CondCode cc
= insn
->setCond
;
1465 if (insn
->src(2).mod
.neg())
1466 cc
= reverseCondCode(cc
);
1468 switch(insn
->src(2).getFile()) {
1470 switch (insn
->src(1).getFile()) {
1472 emitInsn(0x5ba00000);
1473 emitGPR (0x14, insn
->src(1));
1475 case FILE_MEMORY_CONST
:
1476 emitInsn(0x4ba00000);
1477 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1479 case FILE_IMMEDIATE
:
1480 emitInsn(0x36a00000);
1481 emitIMMD(0x14, 19, insn
->src(1));
1484 assert(!"bad src1 file");
1487 emitGPR (0x27, insn
->src(2));
1489 case FILE_MEMORY_CONST
:
1490 emitInsn(0x53a00000);
1491 emitGPR (0x27, insn
->src(1));
1492 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1495 assert(!"bad src2 file");
1499 emitCond4(0x30, cc
);
1501 emitGPR (0x08, insn
->src(0));
1502 emitGPR (0x00, insn
->def(0));
1506 CodeEmitterGM107::emitFSET()
1508 const CmpInstruction
*insn
= this->insn
->asCmp();
1510 switch (insn
->src(1).getFile()) {
1512 emitInsn(0x58000000);
1513 emitGPR (0x14, insn
->src(1));
1515 case FILE_MEMORY_CONST
:
1516 emitInsn(0x48000000);
1517 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1519 case FILE_IMMEDIATE
:
1520 emitInsn(0x30000000);
1521 emitIMMD(0x14, 19, insn
->src(1));
1524 assert(!"bad src1 file");
1528 if (insn
->op
!= OP_SET
) {
1530 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1531 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1532 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1534 assert(!"invalid set op");
1537 emitPRED(0x27, insn
->src(2));
1543 emitABS (0x36, insn
->src(0));
1544 emitNEG (0x35, insn
->src(1));
1545 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1546 emitCond4(0x30, insn
->setCond
);
1548 emitABS (0x2c, insn
->src(1));
1549 emitNEG (0x2b, insn
->src(0));
1550 emitGPR (0x08, insn
->src(0));
1551 emitGPR (0x00, insn
->def(0));
1555 CodeEmitterGM107::emitFSETP()
1557 const CmpInstruction
*insn
= this->insn
->asCmp();
1559 switch (insn
->src(1).getFile()) {
1561 emitInsn(0x5bb00000);
1562 emitGPR (0x14, insn
->src(1));
1564 case FILE_MEMORY_CONST
:
1565 emitInsn(0x4bb00000);
1566 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1568 case FILE_IMMEDIATE
:
1569 emitInsn(0x36b00000);
1570 emitIMMD(0x14, 19, insn
->src(1));
1573 assert(!"bad src1 file");
1577 if (insn
->op
!= OP_SET
) {
1579 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1580 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1581 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1583 assert(!"invalid set op");
1586 emitPRED(0x27, insn
->src(2));
1591 emitCond4(0x30, insn
->setCond
);
1593 emitABS (0x2c, insn
->src(1));
1594 emitNEG (0x2b, insn
->src(0));
1595 emitGPR (0x08, insn
->src(0));
1596 emitABS (0x07, insn
->src(0));
1597 emitNEG (0x06, insn
->src(1));
1598 emitPRED (0x03, insn
->def(0));
1599 if (insn
->defExists(1))
1600 emitPRED(0x00, insn
->def(1));
1606 CodeEmitterGM107::emitFSWZADD()
1608 emitInsn (0x50f80000);
1612 emitField(0x26, 1, insn
->lanes
); /* abused for .ndv */
1613 emitField(0x1c, 8, insn
->subOp
);
1614 if (insn
->predSrc
!= 1)
1615 emitGPR (0x14, insn
->src(1));
1618 emitGPR (0x08, insn
->src(0));
1619 emitGPR (0x00, insn
->def(0));
1622 /*******************************************************************************
1624 ******************************************************************************/
1627 CodeEmitterGM107::emitLOP()
1632 case OP_AND
: lop
= 0; break;
1633 case OP_OR
: lop
= 1; break;
1634 case OP_XOR
: lop
= 2; break;
1636 assert(!"invalid lop");
1640 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1641 switch (insn
->src(1).getFile()) {
1643 emitInsn(0x5c400000);
1644 emitGPR (0x14, insn
->src(1));
1646 case FILE_MEMORY_CONST
:
1647 emitInsn(0x4c400000);
1648 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1650 case FILE_IMMEDIATE
:
1651 emitInsn(0x38400000);
1652 emitIMMD(0x14, 19, insn
->src(1));
1655 assert(!"bad src1 file");
1661 emitField(0x29, 2, lop
);
1662 emitINV (0x28, insn
->src(1));
1663 emitINV (0x27, insn
->src(0));
1665 emitInsn (0x04000000);
1667 emitINV (0x38, insn
->src(1));
1668 emitINV (0x37, insn
->src(0));
1669 emitField(0x35, 2, lop
);
1671 emitIMMD (0x14, 32, insn
->src(1));
1674 emitGPR (0x08, insn
->src(0));
1675 emitGPR (0x00, insn
->def(0));
1678 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1680 CodeEmitterGM107::emitNOT()
1682 if (!longIMMD(insn
->src(0))) {
1683 switch (insn
->src(0).getFile()) {
1685 emitInsn(0x5c400700);
1686 emitGPR (0x14, insn
->src(0));
1688 case FILE_MEMORY_CONST
:
1689 emitInsn(0x4c400700);
1690 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1692 case FILE_IMMEDIATE
:
1693 emitInsn(0x38400700);
1694 emitIMMD(0x14, 19, insn
->src(0));
1697 assert(!"bad src1 file");
1702 emitInsn (0x05600000);
1703 emitIMMD (0x14, 32, insn
->src(1));
1707 emitGPR(0x00, insn
->def(0));
1711 CodeEmitterGM107::emitIADD()
1713 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1714 switch (insn
->src(1).getFile()) {
1716 emitInsn(0x5c100000);
1717 emitGPR (0x14, insn
->src(1));
1719 case FILE_MEMORY_CONST
:
1720 emitInsn(0x4c100000);
1721 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1723 case FILE_IMMEDIATE
:
1724 emitInsn(0x38100000);
1725 emitIMMD(0x14, 19, insn
->src(1));
1728 assert(!"bad src1 file");
1732 emitNEG(0x31, insn
->src(0));
1733 emitNEG(0x30, insn
->src(1));
1737 emitInsn(0x1c000000);
1738 emitNEG (0x38, insn
->src(0));
1742 emitIMMD(0x14, 32, insn
->src(1));
1745 if (insn
->op
== OP_SUB
)
1746 code
[1] ^= 0x00010000;
1748 emitGPR(0x08, insn
->src(0));
1749 emitGPR(0x00, insn
->def(0));
1753 CodeEmitterGM107::emitIMUL()
1755 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1756 switch (insn
->src(1).getFile()) {
1758 emitInsn(0x5c380000);
1759 emitGPR (0x14, insn
->src(1));
1761 case FILE_MEMORY_CONST
:
1762 emitInsn(0x4c380000);
1763 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1765 case FILE_IMMEDIATE
:
1766 emitInsn(0x38380000);
1767 emitIMMD(0x14, 19, insn
->src(1));
1770 assert(!"bad src1 file");
1774 emitField(0x29, 1, isSignedType(insn
->sType
));
1775 emitField(0x28, 1, isSignedType(insn
->dType
));
1776 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1778 emitInsn (0x1f000000);
1779 emitField(0x37, 1, isSignedType(insn
->sType
));
1780 emitField(0x36, 1, isSignedType(insn
->dType
));
1781 emitField(0x35, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1783 emitIMMD (0x14, 32, insn
->src(1));
1786 emitGPR(0x08, insn
->src(0));
1787 emitGPR(0x00, insn
->def(0));
1791 CodeEmitterGM107::emitIMAD()
1793 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1794 switch(insn
->src(2).getFile()) {
1796 switch (insn
->src(1).getFile()) {
1798 emitInsn(0x5a000000);
1799 emitGPR (0x14, insn
->src(1));
1801 case FILE_MEMORY_CONST
:
1802 emitInsn(0x4a000000);
1803 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1805 case FILE_IMMEDIATE
:
1806 emitInsn(0x34000000);
1807 emitIMMD(0x14, 19, insn
->src(1));
1810 assert(!"bad src1 file");
1813 emitGPR (0x27, insn
->src(2));
1815 case FILE_MEMORY_CONST
:
1816 emitInsn(0x52000000);
1817 emitGPR (0x27, insn
->src(1));
1818 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1821 assert(!"bad src2 file");
1825 emitField(0x36, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1826 emitField(0x35, 1, isSignedType(insn
->sType
));
1827 emitNEG (0x34, insn
->src(2));
1828 emitNEG2 (0x33, insn
->src(0), insn
->src(1));
1831 emitField(0x30, 1, isSignedType(insn
->dType
));
1833 emitGPR (0x08, insn
->src(0));
1834 emitGPR (0x00, insn
->def(0));
1838 CodeEmitterGM107::emitISCADD()
1840 switch (insn
->src(2).getFile()) {
1842 emitInsn(0x5c180000);
1843 emitGPR (0x14, insn
->src(2));
1845 case FILE_MEMORY_CONST
:
1846 emitInsn(0x4c180000);
1847 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1849 case FILE_IMMEDIATE
:
1850 emitInsn(0x38180000);
1851 emitIMMD(0x14, 19, insn
->src(2));
1854 assert(!"bad src1 file");
1857 emitNEG (0x31, insn
->src(0));
1858 emitNEG (0x30, insn
->src(2));
1860 emitIMMD(0x27, 5, insn
->src(1));
1861 emitGPR (0x08, insn
->src(0));
1862 emitGPR (0x00, insn
->def(0));
1866 CodeEmitterGM107::emitIMNMX()
1868 switch (insn
->src(1).getFile()) {
1870 emitInsn(0x5c200000);
1871 emitGPR (0x14, insn
->src(1));
1873 case FILE_MEMORY_CONST
:
1874 emitInsn(0x4c200000);
1875 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1877 case FILE_IMMEDIATE
:
1878 emitInsn(0x38200000);
1879 emitIMMD(0x14, 19, insn
->src(1));
1882 assert(!"bad src1 file");
1886 emitField(0x30, 1, isSignedType(insn
->dType
));
1888 emitField(0x2b, 2, insn
->subOp
);
1889 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1891 emitGPR (0x08, insn
->src(0));
1892 emitGPR (0x00, insn
->def(0));
1896 CodeEmitterGM107::emitICMP()
1898 const CmpInstruction
*insn
= this->insn
->asCmp();
1899 CondCode cc
= insn
->setCond
;
1901 if (insn
->src(2).mod
.neg())
1902 cc
= reverseCondCode(cc
);
1904 switch(insn
->src(2).getFile()) {
1906 switch (insn
->src(1).getFile()) {
1908 emitInsn(0x5b400000);
1909 emitGPR (0x14, insn
->src(1));
1911 case FILE_MEMORY_CONST
:
1912 emitInsn(0x4b400000);
1913 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1915 case FILE_IMMEDIATE
:
1916 emitInsn(0x36400000);
1917 emitIMMD(0x14, 19, insn
->src(1));
1920 assert(!"bad src1 file");
1923 emitGPR (0x27, insn
->src(2));
1925 case FILE_MEMORY_CONST
:
1926 emitInsn(0x53400000);
1927 emitGPR (0x27, insn
->src(1));
1928 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1931 assert(!"bad src2 file");
1935 emitCond3(0x31, cc
);
1936 emitField(0x30, 1, isSignedType(insn
->sType
));
1937 emitGPR (0x08, insn
->src(0));
1938 emitGPR (0x00, insn
->def(0));
1942 CodeEmitterGM107::emitISET()
1944 const CmpInstruction
*insn
= this->insn
->asCmp();
1946 switch (insn
->src(1).getFile()) {
1948 emitInsn(0x5b500000);
1949 emitGPR (0x14, insn
->src(1));
1951 case FILE_MEMORY_CONST
:
1952 emitInsn(0x4b500000);
1953 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1955 case FILE_IMMEDIATE
:
1956 emitInsn(0x36500000);
1957 emitIMMD(0x14, 19, insn
->src(1));
1960 assert(!"bad src1 file");
1964 if (insn
->op
!= OP_SET
) {
1966 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1967 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1968 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1970 assert(!"invalid set op");
1973 emitPRED(0x27, insn
->src(2));
1978 emitCond3(0x31, insn
->setCond
);
1979 emitField(0x30, 1, isSignedType(insn
->sType
));
1981 emitField(0x2c, 1, insn
->dType
== TYPE_F32
);
1983 emitGPR (0x08, insn
->src(0));
1984 emitGPR (0x00, insn
->def(0));
1988 CodeEmitterGM107::emitISETP()
1990 const CmpInstruction
*insn
= this->insn
->asCmp();
1992 switch (insn
->src(1).getFile()) {
1994 emitInsn(0x5b600000);
1995 emitGPR (0x14, insn
->src(1));
1997 case FILE_MEMORY_CONST
:
1998 emitInsn(0x4b600000);
1999 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2001 case FILE_IMMEDIATE
:
2002 emitInsn(0x36600000);
2003 emitIMMD(0x14, 19, insn
->src(1));
2006 assert(!"bad src1 file");
2010 if (insn
->op
!= OP_SET
) {
2012 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2013 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2014 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2016 assert(!"invalid set op");
2019 emitPRED(0x27, insn
->src(2));
2024 emitCond3(0x31, insn
->setCond
);
2025 emitField(0x30, 1, isSignedType(insn
->sType
));
2027 emitGPR (0x08, insn
->src(0));
2028 emitPRED (0x03, insn
->def(0));
2029 if (insn
->defExists(1))
2030 emitPRED(0x00, insn
->def(1));
2036 CodeEmitterGM107::emitSHL()
2038 switch (insn
->src(1).getFile()) {
2040 emitInsn(0x5c480000);
2041 emitGPR (0x14, insn
->src(1));
2043 case FILE_MEMORY_CONST
:
2044 emitInsn(0x4c480000);
2045 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2047 case FILE_IMMEDIATE
:
2048 emitInsn(0x38480000);
2049 emitIMMD(0x14, 19, insn
->src(1));
2052 assert(!"bad src1 file");
2058 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2059 emitGPR (0x08, insn
->src(0));
2060 emitGPR (0x00, insn
->def(0));
2064 CodeEmitterGM107::emitSHR()
2066 switch (insn
->src(1).getFile()) {
2068 emitInsn(0x5c280000);
2069 emitGPR (0x14, insn
->src(1));
2071 case FILE_MEMORY_CONST
:
2072 emitInsn(0x4c280000);
2073 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2075 case FILE_IMMEDIATE
:
2076 emitInsn(0x38280000);
2077 emitIMMD(0x14, 19, insn
->src(1));
2080 assert(!"bad src1 file");
2084 emitField(0x30, 1, isSignedType(insn
->dType
));
2087 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2088 emitGPR (0x08, insn
->src(0));
2089 emitGPR (0x00, insn
->def(0));
2093 CodeEmitterGM107::emitSHF()
2097 switch (insn
->src(1).getFile()) {
2099 emitInsn(insn
->op
== OP_SHL
? 0x5bf80000 : 0x5cf80000);
2100 emitGPR(0x14, insn
->src(1));
2102 case FILE_IMMEDIATE
:
2103 emitInsn(insn
->op
== OP_SHL
? 0x36f80000 : 0x38f80000);
2104 emitIMMD(0x14, 19, insn
->src(1));
2107 assert(!"bad src1 file");
2111 switch (insn
->sType
) {
2123 emitField(0x32, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
));
2125 emitField(0x30, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_HIGH
));
2127 emitGPR (0x27, insn
->src(2));
2128 emitField(0x25, 2, type
);
2129 emitGPR (0x08, insn
->src(0));
2130 emitGPR (0x00, insn
->def(0));
2134 CodeEmitterGM107::emitPOPC()
2136 switch (insn
->src(0).getFile()) {
2138 emitInsn(0x5c080000);
2139 emitGPR (0x14, insn
->src(0));
2141 case FILE_MEMORY_CONST
:
2142 emitInsn(0x4c080000);
2143 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2145 case FILE_IMMEDIATE
:
2146 emitInsn(0x38080000);
2147 emitIMMD(0x14, 19, insn
->src(0));
2150 assert(!"bad src1 file");
2154 emitINV(0x28, insn
->src(0));
2155 emitGPR(0x00, insn
->def(0));
2159 CodeEmitterGM107::emitBFI()
2161 switch(insn
->src(2).getFile()) {
2163 switch (insn
->src(1).getFile()) {
2165 emitInsn(0x5bf00000);
2166 emitGPR (0x14, insn
->src(1));
2168 case FILE_MEMORY_CONST
:
2169 emitInsn(0x4bf00000);
2170 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2172 case FILE_IMMEDIATE
:
2173 emitInsn(0x36f00000);
2174 emitIMMD(0x14, 19, insn
->src(1));
2177 assert(!"bad src1 file");
2180 emitGPR (0x27, insn
->src(2));
2182 case FILE_MEMORY_CONST
:
2183 emitInsn(0x53f00000);
2184 emitGPR (0x27, insn
->src(1));
2185 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2188 assert(!"bad src2 file");
2193 emitGPR (0x08, insn
->src(0));
2194 emitGPR (0x00, insn
->def(0));
2198 CodeEmitterGM107::emitBFE()
2200 switch (insn
->src(1).getFile()) {
2202 emitInsn(0x5c000000);
2203 emitGPR (0x14, insn
->src(1));
2205 case FILE_MEMORY_CONST
:
2206 emitInsn(0x4c000000);
2207 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2209 case FILE_IMMEDIATE
:
2210 emitInsn(0x38000000);
2211 emitIMMD(0x14, 19, insn
->src(1));
2214 assert(!"bad src1 file");
2218 emitField(0x30, 1, isSignedType(insn
->dType
));
2220 emitField(0x28, 1, insn
->subOp
== NV50_IR_SUBOP_EXTBF_REV
);
2221 emitGPR (0x08, insn
->src(0));
2222 emitGPR (0x00, insn
->def(0));
2226 CodeEmitterGM107::emitFLO()
2228 switch (insn
->src(0).getFile()) {
2230 emitInsn(0x5c300000);
2231 emitGPR (0x14, insn
->src(0));
2233 case FILE_MEMORY_CONST
:
2234 emitInsn(0x4c300000);
2235 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2237 case FILE_IMMEDIATE
:
2238 emitInsn(0x38300000);
2239 emitIMMD(0x14, 19, insn
->src(0));
2242 assert(!"bad src1 file");
2246 emitField(0x30, 1, isSignedType(insn
->dType
));
2248 emitField(0x29, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
2249 emitINV (0x28, insn
->src(0));
2250 emitGPR (0x00, insn
->def(0));
2253 /*******************************************************************************
2255 ******************************************************************************/
2258 CodeEmitterGM107::emitLDSTs(int pos
, DataType type
)
2262 switch (typeSizeof(type
)) {
2263 case 1: data
= isSignedType(type
) ? 1 : 0; break;
2264 case 2: data
= isSignedType(type
) ? 3 : 2; break;
2265 case 4: data
= 4; break;
2266 case 8: data
= 5; break;
2267 case 16: data
= 6; break;
2269 assert(!"bad type");
2273 emitField(pos
, 3, data
);
2277 CodeEmitterGM107::emitLDSTc(int pos
)
2281 switch (insn
->cache
) {
2282 case CACHE_CA
: mode
= 0; break;
2283 case CACHE_CG
: mode
= 1; break;
2284 case CACHE_CS
: mode
= 2; break;
2285 case CACHE_CV
: mode
= 3; break;
2287 assert(!"invalid caching mode");
2291 emitField(pos
, 2, mode
);
2295 CodeEmitterGM107::emitLDC()
2297 emitInsn (0xef900000);
2298 emitLDSTs(0x30, insn
->dType
);
2299 emitField(0x2c, 2, insn
->subOp
);
2300 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn
->src(0));
2301 emitGPR (0x00, insn
->def(0));
2305 CodeEmitterGM107::emitLDL()
2307 emitInsn (0xef400000);
2308 emitLDSTs(0x30, insn
->dType
);
2310 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2311 emitGPR (0x00, insn
->def(0));
2315 CodeEmitterGM107::emitLDS()
2317 emitInsn (0xef480000);
2318 emitLDSTs(0x30, insn
->dType
);
2319 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2320 emitGPR (0x00, insn
->def(0));
2324 CodeEmitterGM107::emitLD()
2326 emitInsn (0x80000000);
2329 emitLDSTs(0x35, insn
->dType
);
2330 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2331 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2332 emitGPR (0x00, insn
->def(0));
2336 CodeEmitterGM107::emitSTL()
2338 emitInsn (0xef500000);
2339 emitLDSTs(0x30, insn
->dType
);
2341 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2342 emitGPR (0x00, insn
->src(1));
2346 CodeEmitterGM107::emitSTS()
2348 emitInsn (0xef580000);
2349 emitLDSTs(0x30, insn
->dType
);
2350 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2351 emitGPR (0x00, insn
->src(1));
2355 CodeEmitterGM107::emitST()
2357 emitInsn (0xa0000000);
2360 emitLDSTs(0x35, insn
->dType
);
2361 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2362 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2363 emitGPR (0x00, insn
->src(1));
2367 CodeEmitterGM107::emitALD()
2369 emitInsn (0xefd80000);
2370 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2371 emitGPR (0x27, insn
->src(0).getIndirect(1));
2374 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2375 emitGPR (0x00, insn
->def(0));
2379 CodeEmitterGM107::emitAST()
2381 emitInsn (0xeff00000);
2382 emitField(0x2f, 2, (typeSizeof(insn
->dType
) / 4) - 1);
2383 emitGPR (0x27, insn
->src(0).getIndirect(1));
2385 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2386 emitGPR (0x00, insn
->src(1));
2390 CodeEmitterGM107::emitISBERD()
2392 emitInsn(0xefd00000);
2393 emitGPR (0x08, insn
->src(0));
2394 emitGPR (0x00, insn
->def(0));
2398 CodeEmitterGM107::emitAL2P()
2400 emitInsn (0xefa00000);
2401 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2404 emitField(0x14, 11, insn
->src(0).get()->reg
.data
.offset
);
2405 emitGPR (0x08, insn
->src(0).getIndirect(0));
2406 emitGPR (0x00, insn
->def(0));
2410 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
2412 int ipa
= entry
->ipa
;
2413 int reg
= entry
->reg
;
2414 int loc
= entry
->loc
;
2416 if (data
.flatshade
&&
2417 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
2418 ipa
= NV50_IR_INTERP_FLAT
;
2420 } else if (data
.force_persample_interp
&&
2421 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
2422 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
2423 ipa
|= NV50_IR_INTERP_CENTROID
;
2425 code
[loc
+ 1] &= ~(0xf << 0x14);
2426 code
[loc
+ 1] |= (ipa
& 0x3) << 0x16;
2427 code
[loc
+ 1] |= (ipa
& 0xc) << (0x14 - 2);
2428 code
[loc
+ 0] &= ~(0xff << 0x14);
2429 code
[loc
+ 0] |= reg
<< 0x14;
2433 CodeEmitterGM107::emitIPA()
2435 int ipam
= 0, ipas
= 0;
2437 switch (insn
->getInterpMode()) {
2438 case NV50_IR_INTERP_LINEAR
: ipam
= 0; break;
2439 case NV50_IR_INTERP_PERSPECTIVE
: ipam
= 1; break;
2440 case NV50_IR_INTERP_FLAT
: ipam
= 2; break;
2441 case NV50_IR_INTERP_SC
: ipam
= 3; break;
2443 assert(!"invalid ipa mode");
2447 switch (insn
->getSampleMode()) {
2448 case NV50_IR_INTERP_DEFAULT
: ipas
= 0; break;
2449 case NV50_IR_INTERP_CENTROID
: ipas
= 1; break;
2450 case NV50_IR_INTERP_OFFSET
: ipas
= 2; break;
2452 assert(!"invalid ipa sample mode");
2456 emitInsn (0xe0000000);
2457 emitField(0x36, 2, ipam
);
2458 emitField(0x34, 2, ipas
);
2460 emitField(0x2f, 3, 7);
2461 emitADDR (0x08, 0x1c, 10, 0, insn
->src(0));
2462 if ((code
[0] & 0x0000ff00) != 0x0000ff00)
2463 code
[1] |= 0x00000040; /* .idx */
2464 emitGPR(0x00, insn
->def(0));
2466 if (insn
->op
== OP_PINTERP
) {
2467 emitGPR(0x14, insn
->src(1));
2468 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2469 emitGPR(0x27, insn
->src(2));
2470 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, interpApply
);
2472 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2473 emitGPR(0x27, insn
->src(1));
2475 addInterp(insn
->ipa
, 0xff, interpApply
);
2478 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
)
2483 CodeEmitterGM107::emitATOM()
2485 unsigned dType
, subOp
;
2487 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2488 switch (insn
->dType
) {
2489 case TYPE_U32
: dType
= 0; break;
2490 case TYPE_U64
: dType
= 1; break;
2491 default: assert(!"unexpected dType"); dType
= 0; break;
2495 emitInsn (0xee000000);
2497 switch (insn
->dType
) {
2498 case TYPE_U32
: dType
= 0; break;
2499 case TYPE_S32
: dType
= 1; break;
2500 case TYPE_U64
: dType
= 2; break;
2501 case TYPE_F32
: dType
= 3; break;
2502 case TYPE_B128
: dType
= 4; break;
2503 case TYPE_S64
: dType
= 5; break;
2504 default: assert(!"unexpected dType"); dType
= 0; break;
2506 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2509 subOp
= insn
->subOp
;
2511 emitInsn (0xed000000);
2514 emitField(0x34, 4, subOp
);
2515 emitField(0x31, 3, dType
);
2516 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2517 emitGPR (0x14, insn
->src(1));
2518 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2519 emitGPR (0x00, insn
->def(0));
2523 CodeEmitterGM107::emitATOMS()
2525 unsigned dType
, subOp
;
2527 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2528 switch (insn
->dType
) {
2529 case TYPE_U32
: dType
= 0; break;
2530 case TYPE_U64
: dType
= 1; break;
2531 default: assert(!"unexpected dType"); dType
= 0; break;
2535 emitInsn (0xee000000);
2536 emitField(0x34, 1, dType
);
2538 switch (insn
->dType
) {
2539 case TYPE_U32
: dType
= 0; break;
2540 case TYPE_S32
: dType
= 1; break;
2541 case TYPE_U64
: dType
= 2; break;
2542 case TYPE_S64
: dType
= 3; break;
2543 default: assert(!"unexpected dType"); dType
= 0; break;
2546 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2549 subOp
= insn
->subOp
;
2551 emitInsn (0xec000000);
2552 emitField(0x1c, 3, dType
);
2555 emitField(0x34, 4, subOp
);
2556 emitGPR (0x14, insn
->src(1));
2557 emitADDR (0x08, 0x1e, 22, 2, insn
->src(0));
2558 emitGPR (0x00, insn
->def(0));
2562 CodeEmitterGM107::emitRED()
2566 switch (insn
->dType
) {
2567 case TYPE_U32
: dType
= 0; break;
2568 case TYPE_S32
: dType
= 1; break;
2569 case TYPE_U64
: dType
= 2; break;
2570 case TYPE_F32
: dType
= 3; break;
2571 case TYPE_B128
: dType
= 4; break;
2572 case TYPE_S64
: dType
= 5; break;
2573 default: assert(!"unexpected dType"); dType
= 0; break;
2576 emitInsn (0xebf80000);
2577 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2578 emitField(0x17, 3, insn
->subOp
);
2579 emitField(0x14, 3, dType
);
2580 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2581 emitGPR (0x00, insn
->src(1));
2585 CodeEmitterGM107::emitCCTL()
2588 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2589 emitInsn(0xef600000);
2592 emitInsn(0xef800000);
2595 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2596 emitADDR (0x08, 0x16, width
, 2, insn
->src(0));
2597 emitField(0x00, 4, insn
->subOp
);
2600 /*******************************************************************************
2602 ******************************************************************************/
2605 CodeEmitterGM107::emitPIXLD()
2607 emitInsn (0xefe80000);
2609 emitField(0x1f, 3, insn
->subOp
);
2610 emitGPR (0x08, insn
->src(0));
2611 emitGPR (0x00, insn
->def(0));
2614 /*******************************************************************************
2616 ******************************************************************************/
2619 CodeEmitterGM107::emitTEXs(int pos
)
2621 int src1
= insn
->predSrc
== 1 ? 2 : 1;
2622 if (insn
->srcExists(src1
))
2623 emitGPR(pos
, insn
->src(src1
));
2629 CodeEmitterGM107::emitTEX()
2631 const TexInstruction
*insn
= this->insn
->asTex();
2634 if (!insn
->tex
.levelZero
) {
2636 case OP_TEX
: lodm
= 0; break;
2637 case OP_TXB
: lodm
= 2; break;
2638 case OP_TXL
: lodm
= 3; break;
2640 assert(!"invalid tex op");
2647 if (insn
->tex
.rIndirectSrc
>= 0) {
2648 emitInsn (0xdeb80000);
2649 emitField(0x25, 2, lodm
);
2650 emitField(0x24, 1, insn
->tex
.useOffsets
== 1);
2652 emitInsn (0xc0380000);
2653 emitField(0x37, 2, lodm
);
2654 emitField(0x36, 1, insn
->tex
.useOffsets
== 1);
2655 emitField(0x24, 13, insn
->tex
.r
);
2658 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2659 emitField(0x31, 1, insn
->tex
.liveOnly
);
2660 emitField(0x23, 1, insn
->tex
.derivAll
);
2661 emitField(0x1f, 4, insn
->tex
.mask
);
2662 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2663 insn
->tex
.target
.getDim() - 1);
2664 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2666 emitGPR (0x08, insn
->src(0));
2667 emitGPR (0x00, insn
->def(0));
2671 CodeEmitterGM107::emitTLD()
2673 const TexInstruction
*insn
= this->insn
->asTex();
2675 if (insn
->tex
.rIndirectSrc
>= 0) {
2676 emitInsn (0xdd380000);
2678 emitInsn (0xdc380000);
2679 emitField(0x24, 13, insn
->tex
.r
);
2682 emitField(0x37, 1, insn
->tex
.levelZero
== 0);
2683 emitField(0x32, 1, insn
->tex
.target
.isMS());
2684 emitField(0x31, 1, insn
->tex
.liveOnly
);
2685 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2686 emitField(0x1f, 4, insn
->tex
.mask
);
2687 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2688 insn
->tex
.target
.getDim() - 1);
2689 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2691 emitGPR (0x08, insn
->src(0));
2692 emitGPR (0x00, insn
->def(0));
2696 CodeEmitterGM107::emitTLD4()
2698 const TexInstruction
*insn
= this->insn
->asTex();
2700 if (insn
->tex
.rIndirectSrc
>= 0) {
2701 emitInsn (0xdef80000);
2702 emitField(0x26, 2, insn
->tex
.gatherComp
);
2703 emitField(0x25, 2, insn
->tex
.useOffsets
== 4);
2704 emitField(0x24, 2, insn
->tex
.useOffsets
== 1);
2706 emitInsn (0xc8380000);
2707 emitField(0x38, 2, insn
->tex
.gatherComp
);
2708 emitField(0x37, 2, insn
->tex
.useOffsets
== 4);
2709 emitField(0x36, 2, insn
->tex
.useOffsets
== 1);
2710 emitField(0x24, 13, insn
->tex
.r
);
2713 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2714 emitField(0x31, 1, insn
->tex
.liveOnly
);
2715 emitField(0x23, 1, insn
->tex
.derivAll
);
2716 emitField(0x1f, 4, insn
->tex
.mask
);
2717 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2718 insn
->tex
.target
.getDim() - 1);
2719 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2721 emitGPR (0x08, insn
->src(0));
2722 emitGPR (0x00, insn
->def(0));
2726 CodeEmitterGM107::emitTXD()
2728 const TexInstruction
*insn
= this->insn
->asTex();
2730 if (insn
->tex
.rIndirectSrc
>= 0) {
2731 emitInsn (0xde780000);
2733 emitInsn (0xde380000);
2734 emitField(0x24, 13, insn
->tex
.r
);
2737 emitField(0x31, 1, insn
->tex
.liveOnly
);
2738 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2739 emitField(0x1f, 4, insn
->tex
.mask
);
2740 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2741 insn
->tex
.target
.getDim() - 1);
2742 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2744 emitGPR (0x08, insn
->src(0));
2745 emitGPR (0x00, insn
->def(0));
2749 CodeEmitterGM107::emitTMML()
2751 const TexInstruction
*insn
= this->insn
->asTex();
2753 if (insn
->tex
.rIndirectSrc
>= 0) {
2754 emitInsn (0xdf600000);
2756 emitInsn (0xdf580000);
2757 emitField(0x24, 13, insn
->tex
.r
);
2760 emitField(0x31, 1, insn
->tex
.liveOnly
);
2761 emitField(0x23, 1, insn
->tex
.derivAll
);
2762 emitField(0x1f, 4, insn
->tex
.mask
);
2763 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2764 insn
->tex
.target
.getDim() - 1);
2765 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2767 emitGPR (0x08, insn
->src(0));
2768 emitGPR (0x00, insn
->def(0));
2772 CodeEmitterGM107::emitTXQ()
2774 const TexInstruction
*insn
= this->insn
->asTex();
2777 switch (insn
->tex
.query
) {
2778 case TXQ_DIMS
: type
= 0x01; break;
2779 case TXQ_TYPE
: type
= 0x02; break;
2780 case TXQ_SAMPLE_POSITION
: type
= 0x05; break;
2781 case TXQ_FILTER
: type
= 0x10; break;
2782 case TXQ_LOD
: type
= 0x12; break;
2783 case TXQ_WRAP
: type
= 0x14; break;
2784 case TXQ_BORDER_COLOUR
: type
= 0x16; break;
2786 assert(!"invalid txq query");
2790 if (insn
->tex
.rIndirectSrc
>= 0) {
2791 emitInsn (0xdf500000);
2793 emitInsn (0xdf480000);
2794 emitField(0x24, 13, insn
->tex
.r
);
2797 emitField(0x31, 1, insn
->tex
.liveOnly
);
2798 emitField(0x1f, 4, insn
->tex
.mask
);
2799 emitField(0x16, 6, type
);
2800 emitGPR (0x08, insn
->src(0));
2801 emitGPR (0x00, insn
->def(0));
2805 CodeEmitterGM107::emitDEPBAR()
2807 emitInsn (0xf0f00000);
2808 emitField(0x1d, 1, 1); /* le */
2809 emitField(0x1a, 3, 5);
2810 emitField(0x14, 6, insn
->subOp
);
2811 emitField(0x00, 6, insn
->subOp
);
2814 /*******************************************************************************
2816 ******************************************************************************/
2819 CodeEmitterGM107::emitNOP()
2821 emitInsn(0x50b00000);
2825 CodeEmitterGM107::emitKIL()
2827 emitInsn (0xe3300000);
2828 emitCond5(0x00, CC_TR
);
2832 CodeEmitterGM107::emitOUT()
2834 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
2835 const int emit
= insn
->op
== OP_EMIT
;
2837 switch (insn
->src(1).getFile()) {
2839 emitInsn(0xfbe00000);
2840 emitGPR (0x14, insn
->src(1));
2842 case FILE_IMMEDIATE
:
2843 emitInsn(0xf6e00000);
2844 emitIMMD(0x14, 19, insn
->src(1));
2846 case FILE_MEMORY_CONST
:
2847 emitInsn(0xebe00000);
2848 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2851 assert(!"bad src1 file");
2855 emitField(0x27, 2, (cut
<< 1) | emit
);
2856 emitGPR (0x08, insn
->src(0));
2857 emitGPR (0x00, insn
->def(0));
2861 CodeEmitterGM107::emitBAR()
2865 emitInsn (0xf0a80000);
2867 switch (insn
->subOp
) {
2868 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; break;
2869 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x0a; break;
2870 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x12; break;
2871 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x81; break;
2874 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
2878 emitField(0x20, 8, subop
);
2881 if (insn
->src(0).getFile() == FILE_GPR
) {
2882 emitGPR(0x08, insn
->src(0));
2884 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2886 emitField(0x08, 8, imm
->reg
.data
.u32
);
2887 emitField(0x2b, 1, 1);
2891 if (insn
->src(1).getFile() == FILE_GPR
) {
2892 emitGPR(0x14, insn
->src(1));
2894 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2896 emitField(0x14, 12, imm
->reg
.data
.u32
);
2897 emitField(0x2c, 1, 1);
2900 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
2901 emitPRED (0x27, insn
->src(2));
2902 emitField(0x2a, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
2904 emitField(0x27, 3, 7);
2909 CodeEmitterGM107::emitMEMBAR()
2911 emitInsn (0xef980000);
2912 emitField(0x08, 2, insn
->subOp
>> 2);
2916 CodeEmitterGM107::emitVOTE()
2918 assert(insn
->src(0).getFile() == FILE_PREDICATE
);
2921 for (int i
= 0; insn
->defExists(i
); i
++) {
2922 if (insn
->def(i
).getFile() == FILE_GPR
)
2924 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
2928 emitInsn (0x50d80000);
2929 emitField(0x30, 2, insn
->subOp
);
2931 emitGPR (0x00, insn
->def(r
));
2935 emitPRED (0x2d, insn
->def(p
));
2938 emitField(0x2a, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
2939 emitPRED (0x27, insn
->src(0));
2943 CodeEmitterGM107::emitSUTarget()
2945 const TexInstruction
*insn
= this->insn
->asTex();
2948 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2950 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
2952 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
2954 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
2955 insn
->tex
.target
== TEX_TARGET_RECT
) {
2957 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
2958 insn
->tex
.target
== TEX_TARGET_CUBE
||
2959 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
2961 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
2964 assert(insn
->tex
.target
== TEX_TARGET_1D
);
2966 emitField(0x20, 4, target
);
2970 CodeEmitterGM107::emitSUHandle(const int s
)
2972 const TexInstruction
*insn
= this->insn
->asTex();
2974 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2976 if (insn
->src(s
).getFile() == FILE_GPR
) {
2977 emitGPR(0x27, insn
->src(s
));
2979 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
2981 emitField(0x33, 1, 1);
2982 emitField(0x24, 13, imm
->reg
.data
.u32
);
2987 CodeEmitterGM107::emitSUSTx()
2989 const TexInstruction
*insn
= this->insn
->asTex();
2991 emitInsn(0xeb200000);
2992 if (insn
->op
== OP_SUSTB
)
2993 emitField(0x34, 1, 1);
2997 emitField(0x14, 4, 0xf); // rgba
2998 emitGPR (0x08, insn
->src(0));
2999 emitGPR (0x00, insn
->src(1));
3005 CodeEmitterGM107::emitSULDx()
3007 const TexInstruction
*insn
= this->insn
->asTex();
3010 emitInsn(0xeb000000);
3011 if (insn
->op
== OP_SULDB
)
3012 emitField(0x34, 1, 1);
3015 switch (insn
->dType
) {
3016 case TYPE_S8
: type
= 1; break;
3017 case TYPE_U16
: type
= 2; break;
3018 case TYPE_S16
: type
= 3; break;
3019 case TYPE_U32
: type
= 4; break;
3020 case TYPE_U64
: type
= 5; break;
3021 case TYPE_B128
: type
= 6; break;
3023 assert(insn
->dType
== TYPE_U8
);
3027 emitField(0x14, 3, type
);
3028 emitGPR (0x00, insn
->def(0));
3029 emitGPR (0x08, insn
->src(0));
3035 CodeEmitterGM107::emitSUREDx()
3037 const TexInstruction
*insn
= this->insn
->asTex();
3038 uint8_t type
= 0, subOp
;
3040 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
3041 emitInsn(0xeac00000);
3043 emitInsn(0xea600000);
3045 if (insn
->op
== OP_SUREDB
)
3046 emitField(0x34, 1, 1);
3050 switch (insn
->dType
) {
3051 case TYPE_S32
: type
= 1; break;
3052 case TYPE_U64
: type
= 2; break;
3053 case TYPE_F32
: type
= 3; break;
3054 case TYPE_S64
: type
= 5; break;
3056 assert(insn
->dType
== TYPE_U32
);
3061 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
3063 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
3066 subOp
= insn
->subOp
;
3069 emitField(0x24, 3, type
);
3070 emitField(0x1d, 4, subOp
);
3071 emitGPR (0x14, insn
->src(1));
3072 emitGPR (0x08, insn
->src(0));
3073 emitGPR (0x00, insn
->def(0));
3078 /*******************************************************************************
3079 * assembler front-end
3080 ******************************************************************************/
3083 CodeEmitterGM107::emitInstruction(Instruction
*i
)
3085 const unsigned int size
= (writeIssueDelays
&& !(codeSize
& 0x1f)) ? 16 : 8;
3090 if (insn
->encSize
!= 8) {
3091 ERROR("skipping undecodable instruction: "); insn
->print();
3094 if (codeSize
+ size
> codeSizeLimit
) {
3095 ERROR("code emitter output buffer too small\n");
3099 if (writeIssueDelays
) {
3100 int n
= ((codeSize
& 0x1f) / 8) - 1;
3103 data
[0] = 0x00000000;
3104 data
[1] = 0x00000000;
3110 emitField(data
, n
* 21, 21, insn
->sched
);
3166 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
3167 insn
->src(0).getFile() == FILE_PREDICATE
)) {
3169 } else if (isFloatType(insn
->dType
)) {
3170 if (isFloatType(insn
->sType
))
3175 if (isFloatType(insn
->sType
))
3186 if (isFloatType(insn
->dType
)) {
3187 if (insn
->dType
== TYPE_F64
)
3196 if (isFloatType(insn
->dType
)) {
3197 if (insn
->dType
== TYPE_F64
)
3207 if (isFloatType(insn
->dType
)) {
3208 if (insn
->dType
== TYPE_F64
)
3221 if (isFloatType(insn
->dType
)) {
3222 if (insn
->dType
== TYPE_F64
)
3231 if (typeSizeof(insn
->sType
) == 8)
3237 if (typeSizeof(insn
->sType
) == 8)
3255 if (isFloatType(insn
->dType
))
3264 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
3265 if (isFloatType(insn
->sType
))
3266 if (insn
->sType
== TYPE_F64
)
3273 if (isFloatType(insn
->sType
))
3274 if (insn
->sType
== TYPE_F64
)
3306 switch (insn
->src(0).getFile()) {
3307 case FILE_MEMORY_CONST
: emitLDC(); break;
3308 case FILE_MEMORY_LOCAL
: emitLDL(); break;
3309 case FILE_MEMORY_SHARED
: emitLDS(); break;
3310 case FILE_MEMORY_GLOBAL
: emitLD(); break;
3312 assert(!"invalid load");
3318 switch (insn
->src(0).getFile()) {
3319 case FILE_MEMORY_LOCAL
: emitSTL(); break;
3320 case FILE_MEMORY_SHARED
: emitSTS(); break;
3321 case FILE_MEMORY_GLOBAL
: emitST(); break;
3323 assert(!"invalid store");
3329 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
3332 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
3417 assert(!"invalid opcode");
3433 CodeEmitterGM107::getMinEncodingSize(const Instruction
*i
) const
3438 /*******************************************************************************
3439 * sched data calculator
3440 ******************************************************************************/
3442 class SchedDataCalculatorGM107
: public Pass
3445 SchedDataCalculatorGM107(const TargetGM107
*targ
) : targ(targ
) {}
3457 void rebase(const int base
)
3459 const int delta
= this->base
- base
;
3464 for (int i
= 0; i
< 256; ++i
) {
3468 for (int i
= 0; i
< 8; ++i
) {
3477 memset(&rd
, 0, sizeof(rd
));
3478 memset(&wr
, 0, sizeof(wr
));
3480 int getLatest(const ScoreData
& d
) const
3483 for (int i
= 0; i
< 256; ++i
)
3486 for (int i
= 0; i
< 8; ++i
)
3493 inline int getLatestRd() const
3495 return getLatest(rd
);
3497 inline int getLatestWr() const
3499 return getLatest(wr
);
3501 inline int getLatest() const
3503 return MAX2(getLatestRd(), getLatestWr());
3505 void setMax(const RegScores
*that
)
3507 for (int i
= 0; i
< 256; ++i
) {
3508 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3509 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3511 for (int i
= 0; i
< 8; ++i
) {
3512 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3513 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3515 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3516 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3518 void print(int cycle
)
3520 for (int i
= 0; i
< 256; ++i
) {
3521 if (rd
.r
[i
] > cycle
)
3522 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3523 if (wr
.r
[i
] > cycle
)
3524 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3526 for (int i
= 0; i
< 8; ++i
) {
3527 if (rd
.p
[i
] > cycle
)
3528 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3529 if (wr
.p
[i
] > cycle
)
3530 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3533 INFO("rd $c @ %i\n", rd
.c
);
3535 INFO("wr $c @ %i\n", wr
.c
);
3539 RegScores
*score
; // for current BB
3540 std::vector
<RegScores
> scoreBoards
;
3542 const TargetGM107
*targ
;
3543 bool visit(Function
*);
3544 bool visit(BasicBlock
*);
3546 void commitInsn(const Instruction
*, int);
3547 int calcDelay(const Instruction
*, int) const;
3548 void setDelay(Instruction
*, int, const Instruction
*);
3549 void recordWr(const Value
*, int, int);
3550 void checkRd(const Value
*, int, int&) const;
3552 inline void emitYield(Instruction
*);
3553 inline void emitStall(Instruction
*, uint8_t);
3554 inline void emitReuse(Instruction
*, uint8_t);
3555 inline void emitWrDepBar(Instruction
*, uint8_t);
3556 inline void emitRdDepBar(Instruction
*, uint8_t);
3557 inline void emitWtDepBar(Instruction
*, uint8_t);
3559 inline int getStall(const Instruction
*) const;
3560 inline int getWrDepBar(const Instruction
*) const;
3561 inline int getRdDepBar(const Instruction
*) const;
3562 inline int getWtDepBar(const Instruction
*) const;
3564 void setReuseFlag(Instruction
*);
3566 inline void printSchedInfo(int, const Instruction
*) const;
3569 LiveBarUse(Instruction
*insn
, Instruction
*usei
)
3570 : insn(insn
), usei(usei
) { }
3576 LiveBarDef(Instruction
*insn
, Instruction
*defi
)
3577 : insn(insn
), defi(defi
) { }
3582 bool insertBarriers(BasicBlock
*);
3584 Instruction
*findFirstUse(const Instruction
*) const;
3585 Instruction
*findFirstDef(const Instruction
*) const;
3587 bool needRdDepBar(const Instruction
*) const;
3588 bool needWrDepBar(const Instruction
*) const;
3592 SchedDataCalculatorGM107::emitStall(Instruction
*insn
, uint8_t cnt
)
3599 SchedDataCalculatorGM107::emitYield(Instruction
*insn
)
3601 insn
->sched
|= 1 << 4;
3605 SchedDataCalculatorGM107::emitWrDepBar(Instruction
*insn
, uint8_t id
)
3608 if ((insn
->sched
& 0xe0) == 0xe0)
3609 insn
->sched
^= 0xe0;
3610 insn
->sched
|= id
<< 5;
3614 SchedDataCalculatorGM107::emitRdDepBar(Instruction
*insn
, uint8_t id
)
3617 if ((insn
->sched
& 0x700) == 0x700)
3618 insn
->sched
^= 0x700;
3619 insn
->sched
|= id
<< 8;
3623 SchedDataCalculatorGM107::emitWtDepBar(Instruction
*insn
, uint8_t id
)
3626 insn
->sched
|= 1 << (11 + id
);
3630 SchedDataCalculatorGM107::emitReuse(Instruction
*insn
, uint8_t id
)
3633 insn
->sched
|= 1 << (17 + id
);
3637 SchedDataCalculatorGM107::printSchedInfo(int cycle
,
3638 const Instruction
*insn
) const
3640 uint8_t st
, yl
, wr
, rd
, wt
, ru
;
3642 st
= (insn
->sched
& 0x00000f) >> 0;
3643 yl
= (insn
->sched
& 0x000010) >> 4;
3644 wr
= (insn
->sched
& 0x0000e0) >> 5;
3645 rd
= (insn
->sched
& 0x000700) >> 8;
3646 wt
= (insn
->sched
& 0x01f800) >> 11;
3647 ru
= (insn
->sched
& 0x1e0000) >> 17;
3649 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3650 cycle
, st
, yl
, wr
, rd
, wt
, ru
);
3654 SchedDataCalculatorGM107::getStall(const Instruction
*insn
) const
3656 return insn
->sched
& 0xf;
3660 SchedDataCalculatorGM107::getWrDepBar(const Instruction
*insn
) const
3662 return (insn
->sched
& 0x0000e0) >> 5;
3666 SchedDataCalculatorGM107::getRdDepBar(const Instruction
*insn
) const
3668 return (insn
->sched
& 0x000700) >> 8;
3672 SchedDataCalculatorGM107::getWtDepBar(const Instruction
*insn
) const
3674 return (insn
->sched
& 0x01f800) >> 11;
3677 // Emit the reuse flag which allows to make use of the new memory hierarchy
3678 // introduced since Maxwell, the operand reuse cache.
3680 // It allows to reduce bank conflicts by caching operands. Each time you issue
3681 // an instruction, that flag can tell the hw which operands are going to be
3682 // re-used by the next instruction. Note that the next instruction has to use
3683 // the same GPR id in the same operand slot.
3685 SchedDataCalculatorGM107::setReuseFlag(Instruction
*insn
)
3687 Instruction
*next
= insn
->next
;
3688 BitSet
defs(255, 1);
3690 if (!targ
->isReuseSupported(insn
))
3693 for (int d
= 0; insn
->defExists(d
); ++d
) {
3694 const Value
*def
= insn
->def(d
).rep();
3695 if (insn
->def(d
).getFile() != FILE_GPR
)
3697 if (typeSizeof(insn
->dType
) != 4 || def
->reg
.data
.id
== 255)
3699 defs
.set(def
->reg
.data
.id
);
3702 for (int s
= 0; insn
->srcExists(s
); s
++) {
3703 const Value
*src
= insn
->src(s
).rep();
3704 if (insn
->src(s
).getFile() != FILE_GPR
)
3706 if (typeSizeof(insn
->sType
) != 4 || src
->reg
.data
.id
== 255)
3708 if (defs
.test(src
->reg
.data
.id
))
3710 if (!next
->srcExists(s
) || next
->src(s
).getFile() != FILE_GPR
)
3712 if (src
->reg
.data
.id
!= next
->getSrc(s
)->reg
.data
.id
)
3720 SchedDataCalculatorGM107::recordWr(const Value
*v
, int cycle
, int ready
)
3722 int a
= v
->reg
.data
.id
, b
;
3724 switch (v
->reg
.file
) {
3726 b
= a
+ v
->reg
.size
/ 4;
3727 for (int r
= a
; r
< b
; ++r
)
3728 score
->rd
.r
[r
] = ready
;
3730 case FILE_PREDICATE
:
3731 // To immediately use a predicate set by any instructions, the minimum
3732 // number of stall counts is 13.
3733 score
->rd
.p
[a
] = cycle
+ 13;
3736 score
->rd
.c
= ready
;
3744 SchedDataCalculatorGM107::checkRd(const Value
*v
, int cycle
, int &delay
) const
3746 int a
= v
->reg
.data
.id
, b
;
3749 switch (v
->reg
.file
) {
3751 b
= a
+ v
->reg
.size
/ 4;
3752 for (int r
= a
; r
< b
; ++r
)
3753 ready
= MAX2(ready
, score
->rd
.r
[r
]);
3755 case FILE_PREDICATE
:
3756 ready
= MAX2(ready
, score
->rd
.p
[a
]);
3759 ready
= MAX2(ready
, score
->rd
.c
);
3765 delay
= MAX2(delay
, ready
- cycle
);
3769 SchedDataCalculatorGM107::commitInsn(const Instruction
*insn
, int cycle
)
3771 const int ready
= cycle
+ targ
->getLatency(insn
);
3773 for (int d
= 0; insn
->defExists(d
); ++d
)
3774 recordWr(insn
->getDef(d
), cycle
, ready
);
3776 #ifdef GM107_DEBUG_SCHED_DATA
3777 score
->print(cycle
);
3781 #define GM107_MIN_ISSUE_DELAY 0x1
3782 #define GM107_MAX_ISSUE_DELAY 0xf
3785 SchedDataCalculatorGM107::calcDelay(const Instruction
*insn
, int cycle
) const
3787 int delay
= 0, ready
= cycle
;
3789 for (int s
= 0; insn
->srcExists(s
); ++s
)
3790 checkRd(insn
->getSrc(s
), cycle
, delay
);
3792 // TODO: make use of getReadLatency()!
3794 return MAX2(delay
, ready
- cycle
);
3798 SchedDataCalculatorGM107::setDelay(Instruction
*insn
, int delay
,
3799 const Instruction
*next
)
3801 const OpClass cl
= targ
->getOpClass(insn
->op
);
3804 if (insn
->op
== OP_EXIT
||
3805 insn
->op
== OP_BAR
||
3806 insn
->op
== OP_MEMBAR
) {
3807 delay
= GM107_MAX_ISSUE_DELAY
;
3809 if (insn
->op
== OP_QUADON
||
3810 insn
->op
== OP_QUADPOP
) {
3813 if (cl
== OPCLASS_FLOW
|| insn
->join
) {
3817 if (!next
|| !targ
->canDualIssue(insn
, next
)) {
3818 delay
= CLAMP(delay
, GM107_MIN_ISSUE_DELAY
, GM107_MAX_ISSUE_DELAY
);
3820 delay
= 0x0; // dual-issue
3823 wr
= getWrDepBar(insn
);
3824 rd
= getRdDepBar(insn
);
3826 if (delay
== GM107_MIN_ISSUE_DELAY
&& (wr
& rd
) != 7) {
3827 // Barriers take one additional clock cycle to become active on top of
3828 // the clock consumed by the instruction producing it.
3829 if (!next
|| insn
->bb
!= next
->bb
) {
3832 int wt
= getWtDepBar(next
);
3833 if ((wt
& (1 << wr
)) | (wt
& (1 << rd
)))
3838 emitStall(insn
, delay
);
3842 // Return true when the given instruction needs to emit a read dependency
3843 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3844 // setting the maximum number of stall counts is not enough.
3846 SchedDataCalculatorGM107::needRdDepBar(const Instruction
*insn
) const
3848 BitSet
srcs(255, 1), defs(255, 1);
3851 if (!targ
->isBarrierRequired(insn
))
3854 // Do not emit a read dependency barrier when the instruction doesn't use
3855 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3856 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3857 const Value
*src
= insn
->src(s
).rep();
3858 if (insn
->src(s
).getFile() != FILE_GPR
)
3860 if (src
->reg
.data
.id
== 255)
3863 a
= src
->reg
.data
.id
;
3864 b
= a
+ src
->reg
.size
/ 4;
3865 for (int r
= a
; r
< b
; ++r
)
3869 if (!srcs
.popCount())
3872 // Do not emit a read dependency barrier when the output GPRs are equal to
3873 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3874 // be produced and WaR hazards are prevented.
3875 for (int d
= 0; insn
->defExists(d
); ++d
) {
3876 const Value
*def
= insn
->def(d
).rep();
3877 if (insn
->def(d
).getFile() != FILE_GPR
)
3879 if (def
->reg
.data
.id
== 255)
3882 a
= def
->reg
.data
.id
;
3883 b
= a
+ def
->reg
.size
/ 4;
3884 for (int r
= a
; r
< b
; ++r
)
3889 if (!srcs
.popCount())
3895 // Return true when the given instruction needs to emit a write dependency
3896 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3897 // setting the maximum number of stall counts is not enough. This is only legal
3898 // if the instruction output something.
3900 SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn
) const
3902 if (!targ
->isBarrierRequired(insn
))
3905 for (int d
= 0; insn
->defExists(d
); ++d
) {
3906 if (insn
->def(d
).getFile() == FILE_GPR
||
3907 insn
->def(d
).getFile() == FILE_PREDICATE
)
3913 // Find the next instruction inside the same basic block which uses the output
3914 // of the given instruction in order to avoid RaW hazards.
3916 SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari
) const
3918 Instruction
*insn
, *next
;
3921 if (!bari
->defExists(0))
3924 minGPR
= bari
->def(0).rep()->reg
.data
.id
;
3925 maxGPR
= minGPR
+ bari
->def(0).rep()->reg
.size
/ 4 - 1;
3927 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3930 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3931 const Value
*src
= insn
->src(s
).rep();
3932 if (bari
->def(0).getFile() == FILE_GPR
) {
3933 if (insn
->src(s
).getFile() != FILE_GPR
||
3934 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3935 src
->reg
.data
.id
> maxGPR
)
3939 if (bari
->def(0).getFile() == FILE_PREDICATE
) {
3940 if (insn
->src(s
).getFile() != FILE_PREDICATE
||
3941 src
->reg
.data
.id
!= minGPR
)
3950 // Find the next instruction inside the same basic block which overwrites, at
3951 // least, one source of the given instruction in order to avoid WaR hazards.
3953 SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari
) const
3955 Instruction
*insn
, *next
;
3958 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3961 for (int d
= 0; insn
->defExists(d
); ++d
) {
3962 const Value
*def
= insn
->def(d
).rep();
3963 if (insn
->def(d
).getFile() != FILE_GPR
)
3966 minGPR
= def
->reg
.data
.id
;
3967 maxGPR
= minGPR
+ def
->reg
.size
/ 4 - 1;
3969 for (int s
= 0; bari
->srcExists(s
); ++s
) {
3970 const Value
*src
= bari
->src(s
).rep();
3971 if (bari
->src(s
).getFile() != FILE_GPR
||
3972 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3973 src
->reg
.data
.id
> maxGPR
)
3982 // Dependency barriers:
3983 // This pass is a bit ugly and could probably be improved by performing a
3984 // better allocation.
3986 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3987 // dependency barriers using the control codes.
3989 SchedDataCalculatorGM107::insertBarriers(BasicBlock
*bb
)
3991 std::list
<LiveBarUse
> live_uses
;
3992 std::list
<LiveBarDef
> live_defs
;
3993 Instruction
*insn
, *next
;
3997 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
3998 Instruction
*usei
= NULL
, *defi
= NULL
;
3999 bool need_wr_bar
, need_rd_bar
;
4003 // Expire old barrier uses.
4004 for (std::list
<LiveBarUse
>::iterator it
= live_uses
.begin();
4005 it
!= live_uses
.end();) {
4006 if (insn
->serial
>= it
->usei
->serial
) {
4007 int wr
= getWrDepBar(it
->insn
);
4008 emitWtDepBar(insn
, wr
);
4009 bars
.clr(wr
); // free barrier
4010 it
= live_uses
.erase(it
);
4016 // Expire old barrier defs.
4017 for (std::list
<LiveBarDef
>::iterator it
= live_defs
.begin();
4018 it
!= live_defs
.end();) {
4019 if (insn
->serial
>= it
->defi
->serial
) {
4020 int rd
= getRdDepBar(it
->insn
);
4021 emitWtDepBar(insn
, rd
);
4022 bars
.clr(rd
); // free barrier
4023 it
= live_defs
.erase(it
);
4029 need_wr_bar
= needWrDepBar(insn
);
4030 need_rd_bar
= needRdDepBar(insn
);
4033 // When the instruction requires to emit a write dependency barrier
4034 // (all which write something at a variable latency), find the next
4035 // instruction which reads the outputs.
4036 usei
= findFirstUse(insn
);
4038 // Allocate and emit a new barrier.
4039 bar_id
= bars
.findFreeRange(1);
4043 emitWrDepBar(insn
, bar_id
);
4045 live_uses
.push_back(LiveBarUse(insn
, usei
));
4049 // When the instruction requires to emit a read dependency barrier
4050 // (all which read something at a variable latency), find the next
4051 // instruction which will write the inputs.
4052 defi
= findFirstDef(insn
);
4054 if (usei
&& defi
&& usei
->serial
<= defi
->serial
)
4057 // Allocate and emit a new barrier.
4058 bar_id
= bars
.findFreeRange(1);
4062 emitRdDepBar(insn
, bar_id
);
4064 live_defs
.push_back(LiveBarDef(insn
, defi
));
4068 // Remove unnecessary barrier waits.
4069 BitSet
alive_bars(6, 1);
4070 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4075 wr
= getWrDepBar(insn
);
4076 rd
= getRdDepBar(insn
);
4077 wt
= getWtDepBar(insn
);
4079 for (int idx
= 0; idx
< 6; ++idx
) {
4080 if (!(wt
& (1 << idx
)))
4082 if (!alive_bars
.test(idx
)) {
4083 insn
->sched
&= ~(1 << (11 + idx
));
4085 alive_bars
.clr(idx
);
4099 SchedDataCalculatorGM107::visit(Function
*func
)
4103 func
->orderInstructions(insns
);
4105 scoreBoards
.resize(func
->cfg
.getSize());
4106 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
4107 scoreBoards
[i
].wipe();
4112 SchedDataCalculatorGM107::visit(BasicBlock
*bb
)
4114 Instruction
*insn
, *next
= NULL
;
4117 for (Instruction
*insn
= bb
->getEntry(); insn
; insn
= insn
->next
) {
4119 insn
->sched
= 0x7e0;
4122 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4125 // Insert read/write dependency barriers for instructions which don't
4126 // operate at a fixed latency.
4129 score
= &scoreBoards
.at(bb
->getId());
4131 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
4132 // back branches will wait until all target dependencies are satisfied
4133 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
4135 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
4136 score
->setMax(&scoreBoards
.at(in
->getId()));
4139 #ifdef GM107_DEBUG_SCHED_DATA
4140 INFO("=== BB:%i initial scores\n", bb
->getId());
4141 score
->print(cycle
);
4144 // Because barriers are allocated locally (intra-BB), we have to make sure
4145 // that all produced barriers have been consumed before entering inside a
4146 // new basic block. The best way is to do a global allocation pre RA but
4147 // it's really more difficult, especially because of the phi nodes. Anyways,
4148 // it seems like that waiting on a barrier which has already been consumed
4149 // doesn't add any additional cost, it's just not elegant!
4150 Instruction
*start
= bb
->getEntry();
4151 if (start
&& bb
->cfg
.incidentCount() > 0) {
4152 for (int b
= 0; b
< 6; b
++)
4153 emitWtDepBar(start
, b
);
4156 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
4159 commitInsn(insn
, cycle
);
4160 int delay
= calcDelay(next
, cycle
);
4161 setDelay(insn
, delay
, next
);
4162 cycle
+= getStall(insn
);
4166 // XXX: The yield flag seems to destroy a bunch of things when it is
4167 // set on every instruction, need investigation.
4170 #ifdef GM107_DEBUG_SCHED_DATA
4171 printSchedInfo(cycle
, insn
);
4179 commitInsn(insn
, cycle
);
4183 #ifdef GM107_DEBUG_SCHED_DATA
4184 fprintf(stderr
, "last instruction is : ");
4186 fprintf(stderr
, "cycle=%d\n", cycle
);
4189 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
4190 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
4192 if (ei
.getType() != Graph::Edge::BACK
) {
4193 // Only test the first instruction of the outgoing block.
4194 next
= out
->getEntry();
4196 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
4198 // When the outgoing BB is empty, make sure to set the number of
4199 // stall counts needed by the instruction because we don't know the
4200 // next instruction.
4201 bbDelay
= MAX2(bbDelay
, targ
->getLatency(insn
));
4204 // Wait until all dependencies are satisfied.
4205 const int regsFree
= score
->getLatest();
4206 next
= out
->getFirst();
4207 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
4208 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
4209 c
+= getStall(next
);
4214 if (bb
->cfg
.outgoingCount() != 1)
4216 setDelay(insn
, bbDelay
, next
);
4217 cycle
+= getStall(insn
);
4219 score
->rebase(cycle
); // common base for initializing out blocks' scores
4223 /*******************************************************************************
4225 ******************************************************************************/
4228 CodeEmitterGM107::prepareEmission(Function
*func
)
4230 SchedDataCalculatorGM107
sched(targGM107
);
4231 CodeEmitter::prepareEmission(func
);
4232 sched
.run(func
, true, true);
4235 static inline uint32_t sizeToBundlesGM107(uint32_t size
)
4237 return (size
+ 23) / 24;
4241 CodeEmitterGM107::prepareEmission(Program
*prog
)
4243 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
4244 !fi
.end(); fi
.next()) {
4245 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
4246 func
->binPos
= prog
->binSize
;
4247 prepareEmission(func
);
4249 // adjust sizes & positions for schedulding info:
4250 if (prog
->getTarget()->hasSWSched
) {
4251 uint32_t adjPos
= func
->binPos
;
4252 BasicBlock
*bb
= NULL
;
4253 for (int i
= 0; i
< func
->bbCount
; ++i
) {
4254 bb
= func
->bbArray
[i
];
4255 int32_t adjSize
= bb
->binSize
;
4257 adjSize
-= 32 - adjPos
% 32;
4261 adjSize
= bb
->binSize
+ sizeToBundlesGM107(adjSize
) * 8;
4262 bb
->binPos
= adjPos
;
4263 bb
->binSize
= adjSize
;
4267 func
->binSize
= adjPos
- func
->binPos
;
4270 prog
->binSize
+= func
->binSize
;
4274 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107
*target
)
4275 : CodeEmitter(target
),
4277 writeIssueDelays(target
->hasSWSched
)
4280 codeSize
= codeSizeLimit
= 0;
4285 TargetGM107::createCodeEmitterGM107(Program::Type type
)
4287 CodeEmitterGM107
*emit
= new CodeEmitterGM107(this);
4288 emit
->setProgramType(type
);
4292 } // namespace nv50_ir