2 * Copyright 2014 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
25 #include "codegen/nv50_ir_target_gm107.h"
27 //#define GM107_DEBUG_SCHED_DATA
31 class CodeEmitterGM107
: public CodeEmitter
34 CodeEmitterGM107(const TargetGM107
*);
36 virtual bool emitInstruction(Instruction
*);
37 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
39 virtual void prepareEmission(Program
*);
40 virtual void prepareEmission(Function
*);
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
45 const TargetGM107
*targGM107
;
47 Program::Type progType
;
49 const Instruction
*insn
;
50 const bool writeIssueDelays
;
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b
, int s
, uint32_t v
) { emitField(code
, b
, s
, v
); }
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o
) { emitInsn(o
, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value
*);
61 inline void emitGPR(int pos
) {
62 emitGPR(pos
, (const Value
*)NULL
);
64 inline void emitGPR(int pos
, const ValueRef
&ref
) {
65 emitGPR(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
67 inline void emitGPR(int pos
, const ValueRef
*ref
) {
68 emitGPR(pos
, ref
? ref
->rep() : (const Value
*)NULL
);
70 inline void emitGPR(int pos
, const ValueDef
&def
) {
71 emitGPR(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
73 inline void emitSYS(int, const Value
*);
74 inline void emitSYS(int pos
, const ValueRef
&ref
) {
75 emitSYS(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
77 inline void emitPRED(int, const Value
*);
78 inline void emitPRED(int pos
) {
79 emitPRED(pos
, (const Value
*)NULL
);
81 inline void emitPRED(int pos
, const ValueRef
&ref
) {
82 emitPRED(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
84 inline void emitPRED(int pos
, const ValueDef
&def
) {
85 emitPRED(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
87 inline void emitADDR(int, int, int, int, const ValueRef
&);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef
&);
89 inline bool longIMMD(const ValueRef
&);
90 inline void emitIMMD(int, int, const ValueRef
&);
92 void emitCond3(int, CondCode
);
93 void emitCond4(int, CondCode
);
94 void emitCond5(int pos
, CondCode cc
) { emitCond4(pos
, cc
); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef
&);
101 inline void emitNEG(int, const ValueRef
&);
102 inline void emitNEG2(int, const ValueRef
&, const ValueRef
&);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode
, int);
105 inline void emitRND(int pos
) {
106 emitRND(pos
, insn
->rnd
, -1);
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef
&);
170 void emitLDSTs(int, DataType
);
210 void emitSUHandle(const int s
);
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
221 CodeEmitterGM107::emitField(uint32_t *data
, int b
, int s
, uint32_t v
)
224 uint32_t m
= ((1ULL << s
) - 1);
225 uint64_t d
= (uint64_t)(v
& m
) << b
;
226 assert(!(v
& ~m
) || (v
& ~m
) == ~m
);
233 CodeEmitterGM107::emitPred()
235 if (insn
->predSrc
>= 0) {
236 emitField(16, 3, insn
->getSrc(insn
->predSrc
)->rep()->reg
.data
.id
);
237 emitField(19, 1, insn
->cc
== CC_NOT_P
);
244 CodeEmitterGM107::emitInsn(uint32_t hi
, bool pred
)
246 code
[0] = 0x00000000;
253 CodeEmitterGM107::emitGPR(int pos
, const Value
*val
)
255 emitField(pos
, 8, val
&& !val
->inFile(FILE_FLAGS
) ?
256 val
->reg
.data
.id
: 255);
260 CodeEmitterGM107::emitSYS(int pos
, const Value
*val
)
262 int id
= val
? val
->reg
.data
.id
: -1;
265 case SV_LANEID
: id
= 0x00; break;
266 case SV_VERTEX_COUNT
: id
= 0x10; break;
267 case SV_INVOCATION_ID
: id
= 0x11; break;
268 case SV_THREAD_KILL
: id
= 0x13; break;
269 case SV_INVOCATION_INFO
: id
= 0x1d; break;
270 case SV_TID
: id
= 0x21 + val
->reg
.data
.sv
.index
; break;
271 case SV_CTAID
: id
= 0x25 + val
->reg
.data
.sv
.index
; break;
272 case SV_LANEMASK_EQ
: id
= 0x38; break;
273 case SV_LANEMASK_LT
: id
= 0x39; break;
274 case SV_LANEMASK_LE
: id
= 0x3a; break;
275 case SV_LANEMASK_GT
: id
= 0x3b; break;
276 case SV_LANEMASK_GE
: id
= 0x3c; break;
277 case SV_CLOCK
: id
= 0x50 + val
->reg
.data
.sv
.index
; break;
279 assert(!"invalid system value");
284 emitField(pos
, 8, id
);
288 CodeEmitterGM107::emitPRED(int pos
, const Value
*val
)
290 emitField(pos
, 3, val
? val
->reg
.data
.id
: 7);
294 CodeEmitterGM107::emitADDR(int gpr
, int off
, int len
, int shr
,
297 const Value
*v
= ref
.get();
298 assert(!(v
->reg
.data
.offset
& ((1 << shr
) - 1)));
300 emitGPR(gpr
, ref
.getIndirect(0));
301 emitField(off
, len
, v
->reg
.data
.offset
>> shr
);
305 CodeEmitterGM107::emitCBUF(int buf
, int gpr
, int off
, int len
, int shr
,
308 const Value
*v
= ref
.get();
309 const Symbol
*s
= v
->asSym();
311 assert(!(s
->reg
.data
.offset
& ((1 << shr
) - 1)));
313 emitField(buf
, 5, v
->reg
.fileIndex
);
315 emitGPR(gpr
, ref
.getIndirect(0));
316 emitField(off
, 16, s
->reg
.data
.offset
>> shr
);
320 CodeEmitterGM107::longIMMD(const ValueRef
&ref
)
322 if (ref
.getFile() == FILE_IMMEDIATE
) {
323 const ImmediateValue
*imm
= ref
.get()->asImm();
324 if (isFloatType(insn
->sType
)) {
325 if ((imm
->reg
.data
.u32
& 0x00000fff) != 0x00000000)
328 if ((imm
->reg
.data
.u32
& 0xfff00000) != 0x00000000 &&
329 (imm
->reg
.data
.u32
& 0xfff00000) != 0xfff00000)
337 CodeEmitterGM107::emitIMMD(int pos
, int len
, const ValueRef
&ref
)
339 const ImmediateValue
*imm
= ref
.get()->asImm();
340 uint32_t val
= imm
->reg
.data
.u32
;
343 if (insn
->sType
== TYPE_F32
|| insn
->sType
== TYPE_F16
) {
344 assert(!(val
& 0x00000fff));
346 } else if (insn
->sType
== TYPE_F64
) {
347 assert(!(imm
->reg
.data
.u64
& 0x00000fffffffffffULL
));
348 val
= imm
->reg
.data
.u64
>> 44;
350 assert(!(val
& 0xfff00000) || (val
& 0xfff00000) == 0xfff00000);
351 emitField( 56, 1, (val
& 0x80000) >> 19);
352 emitField(pos
, len
, (val
& 0x7ffff));
354 emitField(pos
, len
, val
);
358 /*******************************************************************************
360 ******************************************************************************/
363 CodeEmitterGM107::emitCond3(int pos
, CondCode code
)
368 case CC_FL
: data
= 0x00; break;
370 case CC_LT
: data
= 0x01; break;
372 case CC_EQ
: data
= 0x02; break;
374 case CC_LE
: data
= 0x03; break;
376 case CC_GT
: data
= 0x04; break;
378 case CC_NE
: data
= 0x05; break;
380 case CC_GE
: data
= 0x06; break;
381 case CC_TR
: data
= 0x07; break;
383 assert(!"invalid cond3");
387 emitField(pos
, 3, data
);
391 CodeEmitterGM107::emitCond4(int pos
, CondCode code
)
396 case CC_FL
: data
= 0x00; break;
397 case CC_LT
: data
= 0x01; break;
398 case CC_EQ
: data
= 0x02; break;
399 case CC_LE
: data
= 0x03; break;
400 case CC_GT
: data
= 0x04; break;
401 case CC_NE
: data
= 0x05; break;
402 case CC_GE
: data
= 0x06; break;
403 // case CC_NUM: data = 0x07; break;
404 // case CC_NAN: data = 0x08; break;
405 case CC_LTU
: data
= 0x09; break;
406 case CC_EQU
: data
= 0x0a; break;
407 case CC_LEU
: data
= 0x0b; break;
408 case CC_GTU
: data
= 0x0c; break;
409 case CC_NEU
: data
= 0x0d; break;
410 case CC_GEU
: data
= 0x0e; break;
411 case CC_TR
: data
= 0x0f; break;
413 assert(!"invalid cond4");
417 emitField(pos
, 4, data
);
421 CodeEmitterGM107::emitO(int pos
)
423 emitField(pos
, 1, insn
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
);
427 CodeEmitterGM107::emitP(int pos
)
429 emitField(pos
, 1, insn
->perPatch
);
433 CodeEmitterGM107::emitSAT(int pos
)
435 emitField(pos
, 1, insn
->saturate
);
439 CodeEmitterGM107::emitCC(int pos
)
441 emitField(pos
, 1, insn
->flagsDef
>= 0);
445 CodeEmitterGM107::emitX(int pos
)
447 emitField(pos
, 1, insn
->flagsSrc
>= 0);
451 CodeEmitterGM107::emitABS(int pos
, const ValueRef
&ref
)
453 emitField(pos
, 1, ref
.mod
.abs());
457 CodeEmitterGM107::emitNEG(int pos
, const ValueRef
&ref
)
459 emitField(pos
, 1, ref
.mod
.neg());
463 CodeEmitterGM107::emitNEG2(int pos
, const ValueRef
&a
, const ValueRef
&b
)
465 emitField(pos
, 1, a
.mod
.neg() ^ b
.mod
.neg());
469 CodeEmitterGM107::emitFMZ(int pos
, int len
)
471 emitField(pos
, len
, insn
->dnz
<< 1 | insn
->ftz
);
475 CodeEmitterGM107::emitRND(int rmp
, RoundMode rnd
, int rip
)
479 case ROUND_NI
: ri
= 1;
480 case ROUND_N
: rm
= 0; break;
481 case ROUND_MI
: ri
= 1;
482 case ROUND_M
: rm
= 1; break;
483 case ROUND_PI
: ri
= 1;
484 case ROUND_P
: rm
= 2; break;
485 case ROUND_ZI
: ri
= 1;
486 case ROUND_Z
: rm
= 3; break;
488 assert(!"invalid round mode");
491 emitField(rip
, 1, ri
);
492 emitField(rmp
, 2, rm
);
496 CodeEmitterGM107::emitPDIV(int pos
)
498 assert(insn
->postFactor
>= -3 && insn
->postFactor
<= 3);
499 if (insn
->postFactor
> 0)
500 emitField(pos
, 3, 7 - insn
->postFactor
);
502 emitField(pos
, 3, 0 - insn
->postFactor
);
506 CodeEmitterGM107::emitINV(int pos
, const ValueRef
&ref
)
508 emitField(pos
, 1, !!(ref
.mod
& Modifier(NV50_IR_MOD_NOT
)));
511 /*******************************************************************************
513 ******************************************************************************/
516 CodeEmitterGM107::emitEXIT()
518 emitInsn (0xe3000000);
519 emitCond5(0x00, CC_TR
);
523 CodeEmitterGM107::emitBRA()
525 const FlowInstruction
*insn
= this->insn
->asFlow();
528 if (insn
->indirect
) {
530 emitInsn(0xe2000000); // JMX
532 emitInsn(0xe2500000); // BRX
536 emitInsn(0xe2100000); // JMP
538 emitInsn(0xe2400000); // BRA
539 emitField(0x07, 1, insn
->allWarp
);
542 emitField(0x06, 1, insn
->limit
);
543 emitCond5(0x00, CC_TR
);
545 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
546 int32_t pos
= insn
->target
.bb
->binPos
;
547 if (writeIssueDelays
&& !(pos
& 0x1f))
550 emitField(0x14, 24, pos
- (codeSize
+ 8));
552 emitField(0x14, 32, pos
);
554 emitCBUF (0x24, gpr
, 20, 16, 0, insn
->src(0));
555 emitField(0x05, 1, 1);
560 CodeEmitterGM107::emitCAL()
562 const FlowInstruction
*insn
= this->insn
->asFlow();
564 if (insn
->absolute
) {
565 emitInsn(0xe2200000, 0); // JCAL
567 emitInsn(0xe2600000, 0); // CAL
570 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
572 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
575 int pcAbs
= targGM107
->getBuiltinOffset(insn
->target
.builtin
);
576 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfff00000, 20);
577 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x000fffff, -12);
579 emitField(0x14, 32, insn
->target
.bb
->binPos
);
583 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
584 emitField(0x05, 1, 1);
589 CodeEmitterGM107::emitPCNT()
591 const FlowInstruction
*insn
= this->insn
->asFlow();
593 emitInsn(0xe2b00000, 0);
595 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
596 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
598 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
599 emitField(0x05, 1, 1);
604 CodeEmitterGM107::emitCONT()
606 emitInsn (0xe3500000);
607 emitCond5(0x00, CC_TR
);
611 CodeEmitterGM107::emitPBK()
613 const FlowInstruction
*insn
= this->insn
->asFlow();
615 emitInsn(0xe2a00000, 0);
617 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
618 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
620 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
621 emitField(0x05, 1, 1);
626 CodeEmitterGM107::emitBRK()
628 emitInsn (0xe3400000);
629 emitCond5(0x00, CC_TR
);
633 CodeEmitterGM107::emitPRET()
635 const FlowInstruction
*insn
= this->insn
->asFlow();
637 emitInsn(0xe2700000, 0);
639 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
640 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
642 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
643 emitField(0x05, 1, 1);
648 CodeEmitterGM107::emitRET()
650 emitInsn (0xe3200000);
651 emitCond5(0x00, CC_TR
);
655 CodeEmitterGM107::emitSSY()
657 const FlowInstruction
*insn
= this->insn
->asFlow();
659 emitInsn(0xe2900000, 0);
661 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
662 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
664 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
665 emitField(0x05, 1, 1);
670 CodeEmitterGM107::emitSYNC()
672 emitInsn (0xf0f80000);
673 emitCond5(0x00, CC_TR
);
677 CodeEmitterGM107::emitSAM()
679 emitInsn(0xe3700000, 0);
683 CodeEmitterGM107::emitRAM()
685 emitInsn(0xe3800000, 0);
688 /*******************************************************************************
690 ******************************************************************************/
692 /*******************************************************************************
693 * movement / conversion
694 ******************************************************************************/
697 CodeEmitterGM107::emitMOV()
699 if (insn
->src(0).getFile() != FILE_IMMEDIATE
) {
700 switch (insn
->src(0).getFile()) {
702 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
703 emitInsn(0x5b6a0000);
706 emitInsn(0x5c980000);
708 emitGPR (0x14, insn
->src(0));
710 case FILE_MEMORY_CONST
:
711 emitInsn(0x4c980000);
712 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
715 emitInsn(0x38980000);
716 emitIMMD(0x14, 19, insn
->src(0));
719 emitInsn(0x50880000);
720 emitPRED(0x0c, insn
->src(0));
725 assert(!"bad src file");
728 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
729 insn
->src(0).getFile() != FILE_PREDICATE
)
730 emitField(0x27, 4, insn
->lanes
);
732 emitInsn (0x01000000);
733 emitIMMD (0x14, 32, insn
->src(0));
734 emitField(0x0c, 4, insn
->lanes
);
737 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
739 emitPRED(0x03, insn
->def(0));
742 emitGPR(0x00, insn
->def(0));
747 CodeEmitterGM107::emitS2R()
749 emitInsn(0xf0c80000);
750 emitSYS (0x14, insn
->src(0));
751 emitGPR (0x00, insn
->def(0));
755 CodeEmitterGM107::emitF2F()
757 RoundMode rnd
= insn
->rnd
;
760 case OP_FLOOR
: rnd
= ROUND_MI
; break;
761 case OP_CEIL
: rnd
= ROUND_PI
; break;
762 case OP_TRUNC
: rnd
= ROUND_ZI
; break;
767 switch (insn
->src(0).getFile()) {
769 emitInsn(0x5ca80000);
770 emitGPR (0x14, insn
->src(0));
772 case FILE_MEMORY_CONST
:
773 emitInsn(0x4ca80000);
774 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
777 emitInsn(0x38a80000);
778 emitIMMD(0x14, 19, insn
->src(0));
781 assert(!"bad src0 file");
785 emitField(0x32, 1, (insn
->op
== OP_SAT
) || insn
->saturate
);
786 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
788 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
790 emitField(0x29, 1, insn
->subOp
);
791 emitRND (0x27, rnd
, 0x2a);
792 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
793 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
794 emitGPR (0x00, insn
->def(0));
798 CodeEmitterGM107::emitF2I()
800 RoundMode rnd
= insn
->rnd
;
803 case OP_FLOOR
: rnd
= ROUND_M
; break;
804 case OP_CEIL
: rnd
= ROUND_P
; break;
805 case OP_TRUNC
: rnd
= ROUND_Z
; break;
810 switch (insn
->src(0).getFile()) {
812 emitInsn(0x5cb00000);
813 emitGPR (0x14, insn
->src(0));
815 case FILE_MEMORY_CONST
:
816 emitInsn(0x4cb00000);
817 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
820 emitInsn(0x38b00000);
821 emitIMMD(0x14, 19, insn
->src(0));
824 assert(!"bad src0 file");
828 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
830 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
832 emitRND (0x27, rnd
, 0x2a);
833 emitField(0x0c, 1, isSignedType(insn
->dType
));
834 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
835 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
836 emitGPR (0x00, insn
->def(0));
840 CodeEmitterGM107::emitI2F()
842 RoundMode rnd
= insn
->rnd
;
845 case OP_FLOOR
: rnd
= ROUND_M
; break;
846 case OP_CEIL
: rnd
= ROUND_P
; break;
847 case OP_TRUNC
: rnd
= ROUND_Z
; break;
852 switch (insn
->src(0).getFile()) {
854 emitInsn(0x5cb80000);
855 emitGPR (0x14, insn
->src(0));
857 case FILE_MEMORY_CONST
:
858 emitInsn(0x4cb80000);
859 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
862 emitInsn(0x38b80000);
863 emitIMMD(0x14, 19, insn
->src(0));
866 assert(!"bad src0 file");
870 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
872 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
873 emitField(0x29, 2, insn
->subOp
);
874 emitRND (0x27, rnd
, -1);
875 emitField(0x0d, 1, isSignedType(insn
->sType
));
876 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
877 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
878 emitGPR (0x00, insn
->def(0));
882 CodeEmitterGM107::emitI2I()
884 switch (insn
->src(0).getFile()) {
886 emitInsn(0x5ce00000);
887 emitGPR (0x14, insn
->src(0));
889 case FILE_MEMORY_CONST
:
890 emitInsn(0x4ce00000);
891 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
894 emitInsn(0x38e00000);
895 emitIMMD(0x14, 19, insn
->src(0));
898 assert(!"bad src0 file");
903 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
905 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
906 emitField(0x29, 2, insn
->subOp
);
907 emitField(0x0d, 1, isSignedType(insn
->sType
));
908 emitField(0x0c, 1, isSignedType(insn
->dType
));
909 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
910 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
911 emitGPR (0x00, insn
->def(0));
915 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
917 int loc
= entry
->loc
;
918 if (data
.force_persample_interp
)
919 code
[loc
+ 1] |= 1 << 10;
921 code
[loc
+ 1] &= ~(1 << 10);
925 CodeEmitterGM107::emitSEL()
927 switch (insn
->src(1).getFile()) {
929 emitInsn(0x5ca00000);
930 emitGPR (0x14, insn
->src(1));
932 case FILE_MEMORY_CONST
:
933 emitInsn(0x4ca00000);
934 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
937 emitInsn(0x38a00000);
938 emitIMMD(0x14, 19, insn
->src(1));
941 assert(!"bad src1 file");
945 emitINV (0x2a, insn
->src(2));
946 emitPRED(0x27, insn
->src(2));
947 emitGPR (0x08, insn
->src(0));
948 emitGPR (0x00, insn
->def(0));
950 if (insn
->subOp
== 1) {
951 addInterp(0, 0, selpFlip
);
956 CodeEmitterGM107::emitSHFL()
960 emitInsn (0xef100000);
962 switch (insn
->src(1).getFile()) {
964 emitGPR(0x14, insn
->src(1));
967 emitIMMD(0x14, 5, insn
->src(1));
971 assert(!"invalid src1 file");
975 switch (insn
->src(2).getFile()) {
977 emitGPR(0x27, insn
->src(2));
980 emitIMMD(0x22, 13, insn
->src(2));
984 assert(!"invalid src2 file");
988 if (!insn
->defExists(1))
991 assert(insn
->def(1).getFile() == FILE_PREDICATE
);
992 emitPRED(0x30, insn
->def(1));
995 emitField(0x1e, 2, insn
->subOp
);
996 emitField(0x1c, 2, type
);
997 emitGPR (0x08, insn
->src(0));
998 emitGPR (0x00, insn
->def(0));
1001 /*******************************************************************************
1003 ******************************************************************************/
1006 CodeEmitterGM107::emitDADD()
1008 switch (insn
->src(1).getFile()) {
1010 emitInsn(0x5c700000);
1011 emitGPR (0x14, insn
->src(1));
1013 case FILE_MEMORY_CONST
:
1014 emitInsn(0x4c700000);
1015 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1017 case FILE_IMMEDIATE
:
1018 emitInsn(0x38700000);
1019 emitIMMD(0x14, 19, insn
->src(1));
1022 assert(!"bad src1 file");
1025 emitABS(0x31, insn
->src(1));
1026 emitNEG(0x30, insn
->src(0));
1028 emitABS(0x2e, insn
->src(0));
1029 emitNEG(0x2d, insn
->src(1));
1031 if (insn
->op
== OP_SUB
)
1032 code
[1] ^= 0x00002000;
1034 emitGPR(0x08, insn
->src(0));
1035 emitGPR(0x00, insn
->def(0));
1039 CodeEmitterGM107::emitDMUL()
1041 switch (insn
->src(1).getFile()) {
1043 emitInsn(0x5c800000);
1044 emitGPR (0x14, insn
->src(1));
1046 case FILE_MEMORY_CONST
:
1047 emitInsn(0x4c800000);
1048 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1050 case FILE_IMMEDIATE
:
1051 emitInsn(0x38800000);
1052 emitIMMD(0x14, 19, insn
->src(1));
1055 assert(!"bad src1 file");
1059 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1062 emitGPR (0x08, insn
->src(0));
1063 emitGPR (0x00, insn
->def(0));
1067 CodeEmitterGM107::emitDFMA()
1069 switch(insn
->src(2).getFile()) {
1071 switch (insn
->src(1).getFile()) {
1073 emitInsn(0x5b700000);
1074 emitGPR (0x14, insn
->src(1));
1076 case FILE_MEMORY_CONST
:
1077 emitInsn(0x4b700000);
1078 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1080 case FILE_IMMEDIATE
:
1081 emitInsn(0x36700000);
1082 emitIMMD(0x14, 19, insn
->src(1));
1085 assert(!"bad src1 file");
1088 emitGPR (0x27, insn
->src(2));
1090 case FILE_MEMORY_CONST
:
1091 emitInsn(0x53700000);
1092 emitGPR (0x27, insn
->src(1));
1093 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1096 assert(!"bad src2 file");
1101 emitNEG (0x31, insn
->src(2));
1102 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1104 emitGPR (0x08, insn
->src(0));
1105 emitGPR (0x00, insn
->def(0));
1109 CodeEmitterGM107::emitDMNMX()
1111 switch (insn
->src(1).getFile()) {
1113 emitInsn(0x5c500000);
1114 emitGPR (0x14, insn
->src(1));
1116 case FILE_MEMORY_CONST
:
1117 emitInsn(0x4c500000);
1118 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1120 case FILE_IMMEDIATE
:
1121 emitInsn(0x38500000);
1122 emitIMMD(0x14, 19, insn
->src(1));
1125 assert(!"bad src1 file");
1129 emitABS (0x31, insn
->src(1));
1130 emitNEG (0x30, insn
->src(0));
1132 emitABS (0x2e, insn
->src(0));
1133 emitNEG (0x2d, insn
->src(1));
1134 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1136 emitGPR (0x08, insn
->src(0));
1137 emitGPR (0x00, insn
->def(0));
1141 CodeEmitterGM107::emitDSET()
1143 const CmpInstruction
*insn
= this->insn
->asCmp();
1145 switch (insn
->src(1).getFile()) {
1147 emitInsn(0x59000000);
1148 emitGPR (0x14, insn
->src(1));
1150 case FILE_MEMORY_CONST
:
1151 emitInsn(0x49000000);
1152 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1154 case FILE_IMMEDIATE
:
1155 emitInsn(0x32000000);
1156 emitIMMD(0x14, 19, insn
->src(1));
1159 assert(!"bad src1 file");
1163 if (insn
->op
!= OP_SET
) {
1165 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1166 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1167 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1169 assert(!"invalid set op");
1172 emitPRED(0x27, insn
->src(2));
1177 emitABS (0x36, insn
->src(0));
1178 emitNEG (0x35, insn
->src(1));
1179 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1180 emitCond4(0x30, insn
->setCond
);
1182 emitABS (0x2c, insn
->src(1));
1183 emitNEG (0x2b, insn
->src(0));
1184 emitGPR (0x08, insn
->src(0));
1185 emitGPR (0x00, insn
->def(0));
1189 CodeEmitterGM107::emitDSETP()
1191 const CmpInstruction
*insn
= this->insn
->asCmp();
1193 switch (insn
->src(1).getFile()) {
1195 emitInsn(0x5b800000);
1196 emitGPR (0x14, insn
->src(1));
1198 case FILE_MEMORY_CONST
:
1199 emitInsn(0x4b800000);
1200 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1202 case FILE_IMMEDIATE
:
1203 emitInsn(0x36800000);
1204 emitIMMD(0x14, 19, insn
->src(1));
1207 assert(!"bad src1 file");
1211 if (insn
->op
!= OP_SET
) {
1213 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1214 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1215 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1217 assert(!"invalid set op");
1220 emitPRED(0x27, insn
->src(2));
1225 emitCond4(0x30, insn
->setCond
);
1226 emitABS (0x2c, insn
->src(1));
1227 emitNEG (0x2b, insn
->src(0));
1228 emitGPR (0x08, insn
->src(0));
1229 emitABS (0x07, insn
->src(0));
1230 emitNEG (0x06, insn
->src(1));
1231 emitPRED (0x03, insn
->def(0));
1232 if (insn
->defExists(1))
1233 emitPRED(0x00, insn
->def(1));
1238 /*******************************************************************************
1240 ******************************************************************************/
1243 CodeEmitterGM107::emitFADD()
1245 if (!longIMMD(insn
->src(1))) {
1246 switch (insn
->src(1).getFile()) {
1248 emitInsn(0x5c580000);
1249 emitGPR (0x14, insn
->src(1));
1251 case FILE_MEMORY_CONST
:
1252 emitInsn(0x4c580000);
1253 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1255 case FILE_IMMEDIATE
:
1256 emitInsn(0x38580000);
1257 emitIMMD(0x14, 19, insn
->src(1));
1260 assert(!"bad src1 file");
1264 emitABS(0x31, insn
->src(1));
1265 emitNEG(0x30, insn
->src(0));
1267 emitABS(0x2e, insn
->src(0));
1268 emitNEG(0x2d, insn
->src(1));
1271 if (insn
->op
== OP_SUB
)
1272 code
[1] ^= 0x00002000;
1274 emitInsn(0x08000000);
1275 emitABS(0x39, insn
->src(1));
1276 emitNEG(0x38, insn
->src(0));
1278 emitABS(0x36, insn
->src(0));
1279 emitNEG(0x35, insn
->src(1));
1281 emitIMMD(0x14, 32, insn
->src(1));
1283 if (insn
->op
== OP_SUB
)
1284 code
[1] ^= 0x00080000;
1287 emitGPR(0x08, insn
->src(0));
1288 emitGPR(0x00, insn
->def(0));
1292 CodeEmitterGM107::emitFMUL()
1294 if (!longIMMD(insn
->src(1))) {
1295 switch (insn
->src(1).getFile()) {
1297 emitInsn(0x5c680000);
1298 emitGPR (0x14, insn
->src(1));
1300 case FILE_MEMORY_CONST
:
1301 emitInsn(0x4c680000);
1302 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1304 case FILE_IMMEDIATE
:
1305 emitInsn(0x38680000);
1306 emitIMMD(0x14, 19, insn
->src(1));
1309 assert(!"bad src1 file");
1313 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1319 emitInsn(0x1e000000);
1323 emitIMMD(0x14, 32, insn
->src(1));
1324 if (insn
->src(0).mod
.neg() ^ insn
->src(1).mod
.neg())
1325 code
[1] ^= 0x00080000; /* flip immd sign bit */
1328 emitGPR(0x08, insn
->src(0));
1329 emitGPR(0x00, insn
->def(0));
1333 CodeEmitterGM107::emitFFMA()
1335 bool isLongIMMD
= false;
1336 switch(insn
->src(2).getFile()) {
1338 switch (insn
->src(1).getFile()) {
1340 emitInsn(0x59800000);
1341 emitGPR (0x14, insn
->src(1));
1343 case FILE_MEMORY_CONST
:
1344 emitInsn(0x49800000);
1345 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1347 case FILE_IMMEDIATE
:
1348 if (longIMMD(insn
->getSrc(1))) {
1349 assert(insn
->getDef(0)->reg
.data
.id
== insn
->getSrc(2)->reg
.data
.id
);
1351 emitInsn(0x0c000000);
1352 emitIMMD(0x14, 32, insn
->src(1));
1354 emitInsn(0x32800000);
1355 emitIMMD(0x14, 19, insn
->src(1));
1359 assert(!"bad src1 file");
1363 emitGPR (0x27, insn
->src(2));
1365 case FILE_MEMORY_CONST
:
1366 emitInsn(0x51800000);
1367 emitGPR (0x27, insn
->src(1));
1368 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1371 assert(!"bad src2 file");
1376 emitNEG (0x39, insn
->src(2));
1377 emitNEG2(0x38, insn
->src(0), insn
->src(1));
1383 emitNEG (0x31, insn
->src(2));
1384 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1389 emitGPR(0x08, insn
->src(0));
1390 emitGPR(0x00, insn
->def(0));
1394 CodeEmitterGM107::emitMUFU()
1399 case OP_COS
: mufu
= 0; break;
1400 case OP_SIN
: mufu
= 1; break;
1401 case OP_EX2
: mufu
= 2; break;
1402 case OP_LG2
: mufu
= 3; break;
1403 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
1404 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
1406 assert(!"invalid mufu");
1410 emitInsn (0x50800000);
1412 emitNEG (0x30, insn
->src(0));
1413 emitABS (0x2e, insn
->src(0));
1414 emitField(0x14, 3, mufu
);
1415 emitGPR (0x08, insn
->src(0));
1416 emitGPR (0x00, insn
->def(0));
1420 CodeEmitterGM107::emitFMNMX()
1422 switch (insn
->src(1).getFile()) {
1424 emitInsn(0x5c600000);
1425 emitGPR (0x14, insn
->src(1));
1427 case FILE_MEMORY_CONST
:
1428 emitInsn(0x4c600000);
1429 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1431 case FILE_IMMEDIATE
:
1432 emitInsn(0x38600000);
1433 emitIMMD(0x14, 19, insn
->src(1));
1436 assert(!"bad src1 file");
1440 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1443 emitABS(0x31, insn
->src(1));
1444 emitNEG(0x30, insn
->src(0));
1446 emitABS(0x2e, insn
->src(0));
1447 emitNEG(0x2d, insn
->src(1));
1449 emitGPR(0x08, insn
->src(0));
1450 emitGPR(0x00, insn
->def(0));
1454 CodeEmitterGM107::emitRRO()
1456 switch (insn
->src(0).getFile()) {
1458 emitInsn(0x5c900000);
1459 emitGPR (0x14, insn
->src(0));
1461 case FILE_MEMORY_CONST
:
1462 emitInsn(0x4c900000);
1463 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1465 case FILE_IMMEDIATE
:
1466 emitInsn(0x38900000);
1467 emitIMMD(0x14, 19, insn
->src(0));
1470 assert(!"bad src file");
1474 emitABS (0x31, insn
->src(0));
1475 emitNEG (0x2d, insn
->src(0));
1476 emitField(0x27, 1, insn
->op
== OP_PREEX2
);
1477 emitGPR (0x00, insn
->def(0));
1481 CodeEmitterGM107::emitFCMP()
1483 const CmpInstruction
*insn
= this->insn
->asCmp();
1484 CondCode cc
= insn
->setCond
;
1486 if (insn
->src(2).mod
.neg())
1487 cc
= reverseCondCode(cc
);
1489 switch(insn
->src(2).getFile()) {
1491 switch (insn
->src(1).getFile()) {
1493 emitInsn(0x5ba00000);
1494 emitGPR (0x14, insn
->src(1));
1496 case FILE_MEMORY_CONST
:
1497 emitInsn(0x4ba00000);
1498 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1500 case FILE_IMMEDIATE
:
1501 emitInsn(0x36a00000);
1502 emitIMMD(0x14, 19, insn
->src(1));
1505 assert(!"bad src1 file");
1508 emitGPR (0x27, insn
->src(2));
1510 case FILE_MEMORY_CONST
:
1511 emitInsn(0x53a00000);
1512 emitGPR (0x27, insn
->src(1));
1513 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1516 assert(!"bad src2 file");
1520 emitCond4(0x30, cc
);
1522 emitGPR (0x08, insn
->src(0));
1523 emitGPR (0x00, insn
->def(0));
1527 CodeEmitterGM107::emitFSET()
1529 const CmpInstruction
*insn
= this->insn
->asCmp();
1531 switch (insn
->src(1).getFile()) {
1533 emitInsn(0x58000000);
1534 emitGPR (0x14, insn
->src(1));
1536 case FILE_MEMORY_CONST
:
1537 emitInsn(0x48000000);
1538 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1540 case FILE_IMMEDIATE
:
1541 emitInsn(0x30000000);
1542 emitIMMD(0x14, 19, insn
->src(1));
1545 assert(!"bad src1 file");
1549 if (insn
->op
!= OP_SET
) {
1551 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1552 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1553 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1555 assert(!"invalid set op");
1558 emitPRED(0x27, insn
->src(2));
1564 emitABS (0x36, insn
->src(0));
1565 emitNEG (0x35, insn
->src(1));
1566 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1567 emitCond4(0x30, insn
->setCond
);
1569 emitABS (0x2c, insn
->src(1));
1570 emitNEG (0x2b, insn
->src(0));
1571 emitGPR (0x08, insn
->src(0));
1572 emitGPR (0x00, insn
->def(0));
1576 CodeEmitterGM107::emitFSETP()
1578 const CmpInstruction
*insn
= this->insn
->asCmp();
1580 switch (insn
->src(1).getFile()) {
1582 emitInsn(0x5bb00000);
1583 emitGPR (0x14, insn
->src(1));
1585 case FILE_MEMORY_CONST
:
1586 emitInsn(0x4bb00000);
1587 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1589 case FILE_IMMEDIATE
:
1590 emitInsn(0x36b00000);
1591 emitIMMD(0x14, 19, insn
->src(1));
1594 assert(!"bad src1 file");
1598 if (insn
->op
!= OP_SET
) {
1600 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1601 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1602 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1604 assert(!"invalid set op");
1607 emitPRED(0x27, insn
->src(2));
1612 emitCond4(0x30, insn
->setCond
);
1614 emitABS (0x2c, insn
->src(1));
1615 emitNEG (0x2b, insn
->src(0));
1616 emitGPR (0x08, insn
->src(0));
1617 emitABS (0x07, insn
->src(0));
1618 emitNEG (0x06, insn
->src(1));
1619 emitPRED (0x03, insn
->def(0));
1620 if (insn
->defExists(1))
1621 emitPRED(0x00, insn
->def(1));
1627 CodeEmitterGM107::emitFSWZADD()
1629 emitInsn (0x50f80000);
1633 emitField(0x26, 1, insn
->lanes
); /* abused for .ndv */
1634 emitField(0x1c, 8, insn
->subOp
);
1635 if (insn
->predSrc
!= 1)
1636 emitGPR (0x14, insn
->src(1));
1639 emitGPR (0x08, insn
->src(0));
1640 emitGPR (0x00, insn
->def(0));
1643 /*******************************************************************************
1645 ******************************************************************************/
1648 CodeEmitterGM107::emitLOP()
1653 case OP_AND
: lop
= 0; break;
1654 case OP_OR
: lop
= 1; break;
1655 case OP_XOR
: lop
= 2; break;
1657 assert(!"invalid lop");
1661 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1662 switch (insn
->src(1).getFile()) {
1664 emitInsn(0x5c400000);
1665 emitGPR (0x14, insn
->src(1));
1667 case FILE_MEMORY_CONST
:
1668 emitInsn(0x4c400000);
1669 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1671 case FILE_IMMEDIATE
:
1672 emitInsn(0x38400000);
1673 emitIMMD(0x14, 19, insn
->src(1));
1676 assert(!"bad src1 file");
1682 emitField(0x29, 2, lop
);
1683 emitINV (0x28, insn
->src(1));
1684 emitINV (0x27, insn
->src(0));
1686 emitInsn (0x04000000);
1688 emitINV (0x38, insn
->src(1));
1689 emitINV (0x37, insn
->src(0));
1690 emitField(0x35, 2, lop
);
1692 emitIMMD (0x14, 32, insn
->src(1));
1695 emitGPR (0x08, insn
->src(0));
1696 emitGPR (0x00, insn
->def(0));
1699 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1701 CodeEmitterGM107::emitNOT()
1703 if (!longIMMD(insn
->src(0))) {
1704 switch (insn
->src(0).getFile()) {
1706 emitInsn(0x5c400700);
1707 emitGPR (0x14, insn
->src(0));
1709 case FILE_MEMORY_CONST
:
1710 emitInsn(0x4c400700);
1711 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1713 case FILE_IMMEDIATE
:
1714 emitInsn(0x38400700);
1715 emitIMMD(0x14, 19, insn
->src(0));
1718 assert(!"bad src1 file");
1723 emitInsn (0x05600000);
1724 emitIMMD (0x14, 32, insn
->src(1));
1728 emitGPR(0x00, insn
->def(0));
1732 CodeEmitterGM107::emitIADD()
1734 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1735 switch (insn
->src(1).getFile()) {
1737 emitInsn(0x5c100000);
1738 emitGPR (0x14, insn
->src(1));
1740 case FILE_MEMORY_CONST
:
1741 emitInsn(0x4c100000);
1742 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1744 case FILE_IMMEDIATE
:
1745 emitInsn(0x38100000);
1746 emitIMMD(0x14, 19, insn
->src(1));
1749 assert(!"bad src1 file");
1753 emitNEG(0x31, insn
->src(0));
1754 emitNEG(0x30, insn
->src(1));
1758 emitInsn(0x1c000000);
1759 emitNEG (0x38, insn
->src(0));
1763 emitIMMD(0x14, 32, insn
->src(1));
1766 if (insn
->op
== OP_SUB
)
1767 code
[1] ^= 0x00010000;
1769 emitGPR(0x08, insn
->src(0));
1770 emitGPR(0x00, insn
->def(0));
1774 CodeEmitterGM107::emitIMUL()
1776 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1777 switch (insn
->src(1).getFile()) {
1779 emitInsn(0x5c380000);
1780 emitGPR (0x14, insn
->src(1));
1782 case FILE_MEMORY_CONST
:
1783 emitInsn(0x4c380000);
1784 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1786 case FILE_IMMEDIATE
:
1787 emitInsn(0x38380000);
1788 emitIMMD(0x14, 19, insn
->src(1));
1791 assert(!"bad src1 file");
1795 emitField(0x29, 1, isSignedType(insn
->sType
));
1796 emitField(0x28, 1, isSignedType(insn
->dType
));
1797 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1799 emitInsn (0x1f000000);
1800 emitField(0x37, 1, isSignedType(insn
->sType
));
1801 emitField(0x36, 1, isSignedType(insn
->dType
));
1802 emitField(0x35, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1804 emitIMMD (0x14, 32, insn
->src(1));
1807 emitGPR(0x08, insn
->src(0));
1808 emitGPR(0x00, insn
->def(0));
1812 CodeEmitterGM107::emitIMAD()
1814 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1815 switch(insn
->src(2).getFile()) {
1817 switch (insn
->src(1).getFile()) {
1819 emitInsn(0x5a000000);
1820 emitGPR (0x14, insn
->src(1));
1822 case FILE_MEMORY_CONST
:
1823 emitInsn(0x4a000000);
1824 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1826 case FILE_IMMEDIATE
:
1827 emitInsn(0x34000000);
1828 emitIMMD(0x14, 19, insn
->src(1));
1831 assert(!"bad src1 file");
1834 emitGPR (0x27, insn
->src(2));
1836 case FILE_MEMORY_CONST
:
1837 emitInsn(0x52000000);
1838 emitGPR (0x27, insn
->src(1));
1839 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1842 assert(!"bad src2 file");
1846 emitField(0x36, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1847 emitField(0x35, 1, isSignedType(insn
->sType
));
1848 emitNEG (0x34, insn
->src(2));
1849 emitNEG2 (0x33, insn
->src(0), insn
->src(1));
1852 emitField(0x30, 1, isSignedType(insn
->dType
));
1854 emitGPR (0x08, insn
->src(0));
1855 emitGPR (0x00, insn
->def(0));
1859 CodeEmitterGM107::emitISCADD()
1861 switch (insn
->src(2).getFile()) {
1863 emitInsn(0x5c180000);
1864 emitGPR (0x14, insn
->src(2));
1866 case FILE_MEMORY_CONST
:
1867 emitInsn(0x4c180000);
1868 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1870 case FILE_IMMEDIATE
:
1871 emitInsn(0x38180000);
1872 emitIMMD(0x14, 19, insn
->src(2));
1875 assert(!"bad src1 file");
1878 emitNEG (0x31, insn
->src(0));
1879 emitNEG (0x30, insn
->src(2));
1881 emitIMMD(0x27, 5, insn
->src(1));
1882 emitGPR (0x08, insn
->src(0));
1883 emitGPR (0x00, insn
->def(0));
1887 CodeEmitterGM107::emitIMNMX()
1889 switch (insn
->src(1).getFile()) {
1891 emitInsn(0x5c200000);
1892 emitGPR (0x14, insn
->src(1));
1894 case FILE_MEMORY_CONST
:
1895 emitInsn(0x4c200000);
1896 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1898 case FILE_IMMEDIATE
:
1899 emitInsn(0x38200000);
1900 emitIMMD(0x14, 19, insn
->src(1));
1903 assert(!"bad src1 file");
1907 emitField(0x30, 1, isSignedType(insn
->dType
));
1909 emitField(0x2b, 2, insn
->subOp
);
1910 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1912 emitGPR (0x08, insn
->src(0));
1913 emitGPR (0x00, insn
->def(0));
1917 CodeEmitterGM107::emitICMP()
1919 const CmpInstruction
*insn
= this->insn
->asCmp();
1920 CondCode cc
= insn
->setCond
;
1922 if (insn
->src(2).mod
.neg())
1923 cc
= reverseCondCode(cc
);
1925 switch(insn
->src(2).getFile()) {
1927 switch (insn
->src(1).getFile()) {
1929 emitInsn(0x5b400000);
1930 emitGPR (0x14, insn
->src(1));
1932 case FILE_MEMORY_CONST
:
1933 emitInsn(0x4b400000);
1934 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1936 case FILE_IMMEDIATE
:
1937 emitInsn(0x36400000);
1938 emitIMMD(0x14, 19, insn
->src(1));
1941 assert(!"bad src1 file");
1944 emitGPR (0x27, insn
->src(2));
1946 case FILE_MEMORY_CONST
:
1947 emitInsn(0x53400000);
1948 emitGPR (0x27, insn
->src(1));
1949 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1952 assert(!"bad src2 file");
1956 emitCond3(0x31, cc
);
1957 emitField(0x30, 1, isSignedType(insn
->sType
));
1958 emitGPR (0x08, insn
->src(0));
1959 emitGPR (0x00, insn
->def(0));
1963 CodeEmitterGM107::emitISET()
1965 const CmpInstruction
*insn
= this->insn
->asCmp();
1967 switch (insn
->src(1).getFile()) {
1969 emitInsn(0x5b500000);
1970 emitGPR (0x14, insn
->src(1));
1972 case FILE_MEMORY_CONST
:
1973 emitInsn(0x4b500000);
1974 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1976 case FILE_IMMEDIATE
:
1977 emitInsn(0x36500000);
1978 emitIMMD(0x14, 19, insn
->src(1));
1981 assert(!"bad src1 file");
1985 if (insn
->op
!= OP_SET
) {
1987 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1988 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1989 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1991 assert(!"invalid set op");
1994 emitPRED(0x27, insn
->src(2));
1999 emitCond3(0x31, insn
->setCond
);
2000 emitField(0x30, 1, isSignedType(insn
->sType
));
2002 emitField(0x2c, 1, insn
->dType
== TYPE_F32
);
2004 emitGPR (0x08, insn
->src(0));
2005 emitGPR (0x00, insn
->def(0));
2009 CodeEmitterGM107::emitISETP()
2011 const CmpInstruction
*insn
= this->insn
->asCmp();
2013 switch (insn
->src(1).getFile()) {
2015 emitInsn(0x5b600000);
2016 emitGPR (0x14, insn
->src(1));
2018 case FILE_MEMORY_CONST
:
2019 emitInsn(0x4b600000);
2020 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2022 case FILE_IMMEDIATE
:
2023 emitInsn(0x36600000);
2024 emitIMMD(0x14, 19, insn
->src(1));
2027 assert(!"bad src1 file");
2031 if (insn
->op
!= OP_SET
) {
2033 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2034 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2035 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2037 assert(!"invalid set op");
2040 emitPRED(0x27, insn
->src(2));
2045 emitCond3(0x31, insn
->setCond
);
2046 emitField(0x30, 1, isSignedType(insn
->sType
));
2048 emitGPR (0x08, insn
->src(0));
2049 emitPRED (0x03, insn
->def(0));
2050 if (insn
->defExists(1))
2051 emitPRED(0x00, insn
->def(1));
2057 CodeEmitterGM107::emitSHL()
2059 switch (insn
->src(1).getFile()) {
2061 emitInsn(0x5c480000);
2062 emitGPR (0x14, insn
->src(1));
2064 case FILE_MEMORY_CONST
:
2065 emitInsn(0x4c480000);
2066 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2068 case FILE_IMMEDIATE
:
2069 emitInsn(0x38480000);
2070 emitIMMD(0x14, 19, insn
->src(1));
2073 assert(!"bad src1 file");
2079 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2080 emitGPR (0x08, insn
->src(0));
2081 emitGPR (0x00, insn
->def(0));
2085 CodeEmitterGM107::emitSHR()
2087 switch (insn
->src(1).getFile()) {
2089 emitInsn(0x5c280000);
2090 emitGPR (0x14, insn
->src(1));
2092 case FILE_MEMORY_CONST
:
2093 emitInsn(0x4c280000);
2094 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2096 case FILE_IMMEDIATE
:
2097 emitInsn(0x38280000);
2098 emitIMMD(0x14, 19, insn
->src(1));
2101 assert(!"bad src1 file");
2105 emitField(0x30, 1, isSignedType(insn
->dType
));
2108 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2109 emitGPR (0x08, insn
->src(0));
2110 emitGPR (0x00, insn
->def(0));
2114 CodeEmitterGM107::emitSHF()
2118 switch (insn
->src(1).getFile()) {
2120 emitInsn(insn
->op
== OP_SHL
? 0x5bf80000 : 0x5cf80000);
2121 emitGPR(0x14, insn
->src(1));
2123 case FILE_IMMEDIATE
:
2124 emitInsn(insn
->op
== OP_SHL
? 0x36f80000 : 0x38f80000);
2125 emitIMMD(0x14, 19, insn
->src(1));
2128 assert(!"bad src1 file");
2132 switch (insn
->sType
) {
2144 emitField(0x32, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
));
2146 emitField(0x30, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_HIGH
));
2148 emitGPR (0x27, insn
->src(2));
2149 emitField(0x25, 2, type
);
2150 emitGPR (0x08, insn
->src(0));
2151 emitGPR (0x00, insn
->def(0));
2155 CodeEmitterGM107::emitPOPC()
2157 switch (insn
->src(0).getFile()) {
2159 emitInsn(0x5c080000);
2160 emitGPR (0x14, insn
->src(0));
2162 case FILE_MEMORY_CONST
:
2163 emitInsn(0x4c080000);
2164 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2166 case FILE_IMMEDIATE
:
2167 emitInsn(0x38080000);
2168 emitIMMD(0x14, 19, insn
->src(0));
2171 assert(!"bad src1 file");
2175 emitINV(0x28, insn
->src(0));
2176 emitGPR(0x00, insn
->def(0));
2180 CodeEmitterGM107::emitBFI()
2182 switch(insn
->src(2).getFile()) {
2184 switch (insn
->src(1).getFile()) {
2186 emitInsn(0x5bf00000);
2187 emitGPR (0x14, insn
->src(1));
2189 case FILE_MEMORY_CONST
:
2190 emitInsn(0x4bf00000);
2191 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2193 case FILE_IMMEDIATE
:
2194 emitInsn(0x36f00000);
2195 emitIMMD(0x14, 19, insn
->src(1));
2198 assert(!"bad src1 file");
2201 emitGPR (0x27, insn
->src(2));
2203 case FILE_MEMORY_CONST
:
2204 emitInsn(0x53f00000);
2205 emitGPR (0x27, insn
->src(1));
2206 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2209 assert(!"bad src2 file");
2214 emitGPR (0x08, insn
->src(0));
2215 emitGPR (0x00, insn
->def(0));
2219 CodeEmitterGM107::emitBFE()
2221 switch (insn
->src(1).getFile()) {
2223 emitInsn(0x5c000000);
2224 emitGPR (0x14, insn
->src(1));
2226 case FILE_MEMORY_CONST
:
2227 emitInsn(0x4c000000);
2228 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2230 case FILE_IMMEDIATE
:
2231 emitInsn(0x38000000);
2232 emitIMMD(0x14, 19, insn
->src(1));
2235 assert(!"bad src1 file");
2239 emitField(0x30, 1, isSignedType(insn
->dType
));
2241 emitField(0x28, 1, insn
->subOp
== NV50_IR_SUBOP_EXTBF_REV
);
2242 emitGPR (0x08, insn
->src(0));
2243 emitGPR (0x00, insn
->def(0));
2247 CodeEmitterGM107::emitFLO()
2249 switch (insn
->src(0).getFile()) {
2251 emitInsn(0x5c300000);
2252 emitGPR (0x14, insn
->src(0));
2254 case FILE_MEMORY_CONST
:
2255 emitInsn(0x4c300000);
2256 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2258 case FILE_IMMEDIATE
:
2259 emitInsn(0x38300000);
2260 emitIMMD(0x14, 19, insn
->src(0));
2263 assert(!"bad src1 file");
2267 emitField(0x30, 1, isSignedType(insn
->dType
));
2269 emitField(0x29, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
2270 emitINV (0x28, insn
->src(0));
2271 emitGPR (0x00, insn
->def(0));
2274 /*******************************************************************************
2276 ******************************************************************************/
2279 CodeEmitterGM107::emitLDSTs(int pos
, DataType type
)
2283 switch (typeSizeof(type
)) {
2284 case 1: data
= isSignedType(type
) ? 1 : 0; break;
2285 case 2: data
= isSignedType(type
) ? 3 : 2; break;
2286 case 4: data
= 4; break;
2287 case 8: data
= 5; break;
2288 case 16: data
= 6; break;
2290 assert(!"bad type");
2294 emitField(pos
, 3, data
);
2298 CodeEmitterGM107::emitLDSTc(int pos
)
2302 switch (insn
->cache
) {
2303 case CACHE_CA
: mode
= 0; break;
2304 case CACHE_CG
: mode
= 1; break;
2305 case CACHE_CS
: mode
= 2; break;
2306 case CACHE_CV
: mode
= 3; break;
2308 assert(!"invalid caching mode");
2312 emitField(pos
, 2, mode
);
2316 CodeEmitterGM107::emitLDC()
2318 emitInsn (0xef900000);
2319 emitLDSTs(0x30, insn
->dType
);
2320 emitField(0x2c, 2, insn
->subOp
);
2321 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn
->src(0));
2322 emitGPR (0x00, insn
->def(0));
2326 CodeEmitterGM107::emitLDL()
2328 emitInsn (0xef400000);
2329 emitLDSTs(0x30, insn
->dType
);
2331 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2332 emitGPR (0x00, insn
->def(0));
2336 CodeEmitterGM107::emitLDS()
2338 emitInsn (0xef480000);
2339 emitLDSTs(0x30, insn
->dType
);
2340 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2341 emitGPR (0x00, insn
->def(0));
2345 CodeEmitterGM107::emitLD()
2347 emitInsn (0x80000000);
2350 emitLDSTs(0x35, insn
->dType
);
2351 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2352 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2353 emitGPR (0x00, insn
->def(0));
2357 CodeEmitterGM107::emitSTL()
2359 emitInsn (0xef500000);
2360 emitLDSTs(0x30, insn
->dType
);
2362 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2363 emitGPR (0x00, insn
->src(1));
2367 CodeEmitterGM107::emitSTS()
2369 emitInsn (0xef580000);
2370 emitLDSTs(0x30, insn
->dType
);
2371 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2372 emitGPR (0x00, insn
->src(1));
2376 CodeEmitterGM107::emitST()
2378 emitInsn (0xa0000000);
2381 emitLDSTs(0x35, insn
->dType
);
2382 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2383 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2384 emitGPR (0x00, insn
->src(1));
2388 CodeEmitterGM107::emitALD()
2390 emitInsn (0xefd80000);
2391 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2392 emitGPR (0x27, insn
->src(0).getIndirect(1));
2395 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2396 emitGPR (0x00, insn
->def(0));
2400 CodeEmitterGM107::emitAST()
2402 emitInsn (0xeff00000);
2403 emitField(0x2f, 2, (typeSizeof(insn
->dType
) / 4) - 1);
2404 emitGPR (0x27, insn
->src(0).getIndirect(1));
2406 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2407 emitGPR (0x00, insn
->src(1));
2411 CodeEmitterGM107::emitISBERD()
2413 emitInsn(0xefd00000);
2414 emitGPR (0x08, insn
->src(0));
2415 emitGPR (0x00, insn
->def(0));
2419 CodeEmitterGM107::emitAL2P()
2421 emitInsn (0xefa00000);
2422 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2425 emitField(0x14, 11, insn
->src(0).get()->reg
.data
.offset
);
2426 emitGPR (0x08, insn
->src(0).getIndirect(0));
2427 emitGPR (0x00, insn
->def(0));
2431 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
2433 int ipa
= entry
->ipa
;
2434 int reg
= entry
->reg
;
2435 int loc
= entry
->loc
;
2437 if (data
.flatshade
&&
2438 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
2439 ipa
= NV50_IR_INTERP_FLAT
;
2441 } else if (data
.force_persample_interp
&&
2442 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
2443 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
2444 ipa
|= NV50_IR_INTERP_CENTROID
;
2446 code
[loc
+ 1] &= ~(0xf << 0x14);
2447 code
[loc
+ 1] |= (ipa
& 0x3) << 0x16;
2448 code
[loc
+ 1] |= (ipa
& 0xc) << (0x14 - 2);
2449 code
[loc
+ 0] &= ~(0xff << 0x14);
2450 code
[loc
+ 0] |= reg
<< 0x14;
2454 CodeEmitterGM107::emitIPA()
2456 int ipam
= 0, ipas
= 0;
2458 switch (insn
->getInterpMode()) {
2459 case NV50_IR_INTERP_LINEAR
: ipam
= 0; break;
2460 case NV50_IR_INTERP_PERSPECTIVE
: ipam
= 1; break;
2461 case NV50_IR_INTERP_FLAT
: ipam
= 2; break;
2462 case NV50_IR_INTERP_SC
: ipam
= 3; break;
2464 assert(!"invalid ipa mode");
2468 switch (insn
->getSampleMode()) {
2469 case NV50_IR_INTERP_DEFAULT
: ipas
= 0; break;
2470 case NV50_IR_INTERP_CENTROID
: ipas
= 1; break;
2471 case NV50_IR_INTERP_OFFSET
: ipas
= 2; break;
2473 assert(!"invalid ipa sample mode");
2477 emitInsn (0xe0000000);
2478 emitField(0x36, 2, ipam
);
2479 emitField(0x34, 2, ipas
);
2481 emitField(0x2f, 3, 7);
2482 emitADDR (0x08, 0x1c, 10, 0, insn
->src(0));
2483 if ((code
[0] & 0x0000ff00) != 0x0000ff00)
2484 code
[1] |= 0x00000040; /* .idx */
2485 emitGPR(0x00, insn
->def(0));
2487 if (insn
->op
== OP_PINTERP
) {
2488 emitGPR(0x14, insn
->src(1));
2489 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2490 emitGPR(0x27, insn
->src(2));
2491 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, interpApply
);
2493 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2494 emitGPR(0x27, insn
->src(1));
2496 addInterp(insn
->ipa
, 0xff, interpApply
);
2499 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
)
2504 CodeEmitterGM107::emitATOM()
2506 unsigned dType
, subOp
;
2508 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2509 switch (insn
->dType
) {
2510 case TYPE_U32
: dType
= 0; break;
2511 case TYPE_U64
: dType
= 1; break;
2512 default: assert(!"unexpected dType"); dType
= 0; break;
2516 emitInsn (0xee000000);
2518 switch (insn
->dType
) {
2519 case TYPE_U32
: dType
= 0; break;
2520 case TYPE_S32
: dType
= 1; break;
2521 case TYPE_U64
: dType
= 2; break;
2522 case TYPE_F32
: dType
= 3; break;
2523 case TYPE_B128
: dType
= 4; break;
2524 case TYPE_S64
: dType
= 5; break;
2525 default: assert(!"unexpected dType"); dType
= 0; break;
2527 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2530 subOp
= insn
->subOp
;
2532 emitInsn (0xed000000);
2535 emitField(0x34, 4, subOp
);
2536 emitField(0x31, 3, dType
);
2537 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2538 emitGPR (0x14, insn
->src(1));
2539 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2540 emitGPR (0x00, insn
->def(0));
2544 CodeEmitterGM107::emitATOMS()
2546 unsigned dType
, subOp
;
2548 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2549 switch (insn
->dType
) {
2550 case TYPE_U32
: dType
= 0; break;
2551 case TYPE_U64
: dType
= 1; break;
2552 default: assert(!"unexpected dType"); dType
= 0; break;
2556 emitInsn (0xee000000);
2557 emitField(0x34, 1, dType
);
2559 switch (insn
->dType
) {
2560 case TYPE_U32
: dType
= 0; break;
2561 case TYPE_S32
: dType
= 1; break;
2562 case TYPE_U64
: dType
= 2; break;
2563 case TYPE_S64
: dType
= 3; break;
2564 default: assert(!"unexpected dType"); dType
= 0; break;
2567 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2570 subOp
= insn
->subOp
;
2572 emitInsn (0xec000000);
2573 emitField(0x1c, 3, dType
);
2576 emitField(0x34, 4, subOp
);
2577 emitGPR (0x14, insn
->src(1));
2578 emitADDR (0x08, 0x1e, 22, 2, insn
->src(0));
2579 emitGPR (0x00, insn
->def(0));
2583 CodeEmitterGM107::emitRED()
2587 switch (insn
->dType
) {
2588 case TYPE_U32
: dType
= 0; break;
2589 case TYPE_S32
: dType
= 1; break;
2590 case TYPE_U64
: dType
= 2; break;
2591 case TYPE_F32
: dType
= 3; break;
2592 case TYPE_B128
: dType
= 4; break;
2593 case TYPE_S64
: dType
= 5; break;
2594 default: assert(!"unexpected dType"); dType
= 0; break;
2597 emitInsn (0xebf80000);
2598 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2599 emitField(0x17, 3, insn
->subOp
);
2600 emitField(0x14, 3, dType
);
2601 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2602 emitGPR (0x00, insn
->src(1));
2606 CodeEmitterGM107::emitCCTL()
2609 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2610 emitInsn(0xef600000);
2613 emitInsn(0xef800000);
2616 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2617 emitADDR (0x08, 0x16, width
, 2, insn
->src(0));
2618 emitField(0x00, 4, insn
->subOp
);
2621 /*******************************************************************************
2623 ******************************************************************************/
2626 CodeEmitterGM107::emitPIXLD()
2628 emitInsn (0xefe80000);
2630 emitField(0x1f, 3, insn
->subOp
);
2631 emitGPR (0x08, insn
->src(0));
2632 emitGPR (0x00, insn
->def(0));
2635 /*******************************************************************************
2637 ******************************************************************************/
2640 CodeEmitterGM107::emitTEXs(int pos
)
2642 int src1
= insn
->predSrc
== 1 ? 2 : 1;
2643 if (insn
->srcExists(src1
))
2644 emitGPR(pos
, insn
->src(src1
));
2650 CodeEmitterGM107::emitTEX()
2652 const TexInstruction
*insn
= this->insn
->asTex();
2655 if (!insn
->tex
.levelZero
) {
2657 case OP_TEX
: lodm
= 0; break;
2658 case OP_TXB
: lodm
= 2; break;
2659 case OP_TXL
: lodm
= 3; break;
2661 assert(!"invalid tex op");
2668 if (insn
->tex
.rIndirectSrc
>= 0) {
2669 emitInsn (0xdeb80000);
2670 emitField(0x25, 2, lodm
);
2671 emitField(0x24, 1, insn
->tex
.useOffsets
== 1);
2673 emitInsn (0xc0380000);
2674 emitField(0x37, 2, lodm
);
2675 emitField(0x36, 1, insn
->tex
.useOffsets
== 1);
2676 emitField(0x24, 13, insn
->tex
.r
);
2679 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2680 emitField(0x31, 1, insn
->tex
.liveOnly
);
2681 emitField(0x23, 1, insn
->tex
.derivAll
);
2682 emitField(0x1f, 4, insn
->tex
.mask
);
2683 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2684 insn
->tex
.target
.getDim() - 1);
2685 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2687 emitGPR (0x08, insn
->src(0));
2688 emitGPR (0x00, insn
->def(0));
2692 CodeEmitterGM107::emitTLD()
2694 const TexInstruction
*insn
= this->insn
->asTex();
2696 if (insn
->tex
.rIndirectSrc
>= 0) {
2697 emitInsn (0xdd380000);
2699 emitInsn (0xdc380000);
2700 emitField(0x24, 13, insn
->tex
.r
);
2703 emitField(0x37, 1, insn
->tex
.levelZero
== 0);
2704 emitField(0x32, 1, insn
->tex
.target
.isMS());
2705 emitField(0x31, 1, insn
->tex
.liveOnly
);
2706 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2707 emitField(0x1f, 4, insn
->tex
.mask
);
2708 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2709 insn
->tex
.target
.getDim() - 1);
2710 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2712 emitGPR (0x08, insn
->src(0));
2713 emitGPR (0x00, insn
->def(0));
2717 CodeEmitterGM107::emitTLD4()
2719 const TexInstruction
*insn
= this->insn
->asTex();
2721 if (insn
->tex
.rIndirectSrc
>= 0) {
2722 emitInsn (0xdef80000);
2723 emitField(0x26, 2, insn
->tex
.gatherComp
);
2724 emitField(0x25, 2, insn
->tex
.useOffsets
== 4);
2725 emitField(0x24, 2, insn
->tex
.useOffsets
== 1);
2727 emitInsn (0xc8380000);
2728 emitField(0x38, 2, insn
->tex
.gatherComp
);
2729 emitField(0x37, 2, insn
->tex
.useOffsets
== 4);
2730 emitField(0x36, 2, insn
->tex
.useOffsets
== 1);
2731 emitField(0x24, 13, insn
->tex
.r
);
2734 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2735 emitField(0x31, 1, insn
->tex
.liveOnly
);
2736 emitField(0x23, 1, insn
->tex
.derivAll
);
2737 emitField(0x1f, 4, insn
->tex
.mask
);
2738 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2739 insn
->tex
.target
.getDim() - 1);
2740 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2742 emitGPR (0x08, insn
->src(0));
2743 emitGPR (0x00, insn
->def(0));
2747 CodeEmitterGM107::emitTXD()
2749 const TexInstruction
*insn
= this->insn
->asTex();
2751 if (insn
->tex
.rIndirectSrc
>= 0) {
2752 emitInsn (0xde780000);
2754 emitInsn (0xde380000);
2755 emitField(0x24, 13, insn
->tex
.r
);
2758 emitField(0x31, 1, insn
->tex
.liveOnly
);
2759 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2760 emitField(0x1f, 4, insn
->tex
.mask
);
2761 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2762 insn
->tex
.target
.getDim() - 1);
2763 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2765 emitGPR (0x08, insn
->src(0));
2766 emitGPR (0x00, insn
->def(0));
2770 CodeEmitterGM107::emitTMML()
2772 const TexInstruction
*insn
= this->insn
->asTex();
2774 if (insn
->tex
.rIndirectSrc
>= 0) {
2775 emitInsn (0xdf600000);
2777 emitInsn (0xdf580000);
2778 emitField(0x24, 13, insn
->tex
.r
);
2781 emitField(0x31, 1, insn
->tex
.liveOnly
);
2782 emitField(0x23, 1, insn
->tex
.derivAll
);
2783 emitField(0x1f, 4, insn
->tex
.mask
);
2784 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2785 insn
->tex
.target
.getDim() - 1);
2786 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2788 emitGPR (0x08, insn
->src(0));
2789 emitGPR (0x00, insn
->def(0));
2793 CodeEmitterGM107::emitTXQ()
2795 const TexInstruction
*insn
= this->insn
->asTex();
2798 switch (insn
->tex
.query
) {
2799 case TXQ_DIMS
: type
= 0x01; break;
2800 case TXQ_TYPE
: type
= 0x02; break;
2801 case TXQ_SAMPLE_POSITION
: type
= 0x05; break;
2802 case TXQ_FILTER
: type
= 0x10; break;
2803 case TXQ_LOD
: type
= 0x12; break;
2804 case TXQ_WRAP
: type
= 0x14; break;
2805 case TXQ_BORDER_COLOUR
: type
= 0x16; break;
2807 assert(!"invalid txq query");
2811 if (insn
->tex
.rIndirectSrc
>= 0) {
2812 emitInsn (0xdf500000);
2814 emitInsn (0xdf480000);
2815 emitField(0x24, 13, insn
->tex
.r
);
2818 emitField(0x31, 1, insn
->tex
.liveOnly
);
2819 emitField(0x1f, 4, insn
->tex
.mask
);
2820 emitField(0x16, 6, type
);
2821 emitGPR (0x08, insn
->src(0));
2822 emitGPR (0x00, insn
->def(0));
2826 CodeEmitterGM107::emitDEPBAR()
2828 emitInsn (0xf0f00000);
2829 emitField(0x1d, 1, 1); /* le */
2830 emitField(0x1a, 3, 5);
2831 emitField(0x14, 6, insn
->subOp
);
2832 emitField(0x00, 6, insn
->subOp
);
2835 /*******************************************************************************
2837 ******************************************************************************/
2840 CodeEmitterGM107::emitNOP()
2842 emitInsn(0x50b00000);
2846 CodeEmitterGM107::emitKIL()
2848 emitInsn (0xe3300000);
2849 emitCond5(0x00, CC_TR
);
2853 CodeEmitterGM107::emitOUT()
2855 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
2856 const int emit
= insn
->op
== OP_EMIT
;
2858 switch (insn
->src(1).getFile()) {
2860 emitInsn(0xfbe00000);
2861 emitGPR (0x14, insn
->src(1));
2863 case FILE_IMMEDIATE
:
2864 emitInsn(0xf6e00000);
2865 emitIMMD(0x14, 19, insn
->src(1));
2867 case FILE_MEMORY_CONST
:
2868 emitInsn(0xebe00000);
2869 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2872 assert(!"bad src1 file");
2876 emitField(0x27, 2, (cut
<< 1) | emit
);
2877 emitGPR (0x08, insn
->src(0));
2878 emitGPR (0x00, insn
->def(0));
2882 CodeEmitterGM107::emitBAR()
2886 emitInsn (0xf0a80000);
2888 switch (insn
->subOp
) {
2889 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; break;
2890 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x0a; break;
2891 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x12; break;
2892 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x81; break;
2895 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
2899 emitField(0x20, 8, subop
);
2902 if (insn
->src(0).getFile() == FILE_GPR
) {
2903 emitGPR(0x08, insn
->src(0));
2905 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2907 emitField(0x08, 8, imm
->reg
.data
.u32
);
2908 emitField(0x2b, 1, 1);
2912 if (insn
->src(1).getFile() == FILE_GPR
) {
2913 emitGPR(0x14, insn
->src(1));
2915 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2917 emitField(0x14, 12, imm
->reg
.data
.u32
);
2918 emitField(0x2c, 1, 1);
2921 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
2922 emitPRED (0x27, insn
->src(2));
2923 emitField(0x2a, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
2925 emitField(0x27, 3, 7);
2930 CodeEmitterGM107::emitMEMBAR()
2932 emitInsn (0xef980000);
2933 emitField(0x08, 2, insn
->subOp
>> 2);
2937 CodeEmitterGM107::emitVOTE()
2939 const ImmediateValue
*imm
;
2943 for (int i
= 0; insn
->defExists(i
); i
++) {
2944 if (insn
->def(i
).getFile() == FILE_GPR
)
2946 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
2950 emitInsn (0x50d80000);
2951 emitField(0x30, 2, insn
->subOp
);
2953 emitGPR (0x00, insn
->def(r
));
2957 emitPRED (0x2d, insn
->def(p
));
2961 switch (insn
->src(0).getFile()) {
2962 case FILE_PREDICATE
:
2963 emitField(0x2a, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
2964 emitPRED (0x27, insn
->src(0));
2966 case FILE_IMMEDIATE
:
2967 imm
= insn
->getSrc(0)->asImm();
2969 u32
= imm
->reg
.data
.u32
;
2970 assert(u32
== 0 || u32
== 1);
2972 emitField(0x2a, 1, u32
== 0);
2975 assert(!"Unhandled src");
2981 CodeEmitterGM107::emitSUTarget()
2983 const TexInstruction
*insn
= this->insn
->asTex();
2986 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2988 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
2990 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
2992 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
2993 insn
->tex
.target
== TEX_TARGET_RECT
) {
2995 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
2996 insn
->tex
.target
== TEX_TARGET_CUBE
||
2997 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
2999 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
3002 assert(insn
->tex
.target
== TEX_TARGET_1D
);
3004 emitField(0x20, 4, target
);
3008 CodeEmitterGM107::emitSUHandle(const int s
)
3010 const TexInstruction
*insn
= this->insn
->asTex();
3012 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
3014 if (insn
->src(s
).getFile() == FILE_GPR
) {
3015 emitGPR(0x27, insn
->src(s
));
3017 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
3019 emitField(0x33, 1, 1);
3020 emitField(0x24, 13, imm
->reg
.data
.u32
);
3025 CodeEmitterGM107::emitSUSTx()
3027 const TexInstruction
*insn
= this->insn
->asTex();
3029 emitInsn(0xeb200000);
3030 if (insn
->op
== OP_SUSTB
)
3031 emitField(0x34, 1, 1);
3035 emitField(0x14, 4, 0xf); // rgba
3036 emitGPR (0x08, insn
->src(0));
3037 emitGPR (0x00, insn
->src(1));
3043 CodeEmitterGM107::emitSULDx()
3045 const TexInstruction
*insn
= this->insn
->asTex();
3048 emitInsn(0xeb000000);
3049 if (insn
->op
== OP_SULDB
)
3050 emitField(0x34, 1, 1);
3053 switch (insn
->dType
) {
3054 case TYPE_S8
: type
= 1; break;
3055 case TYPE_U16
: type
= 2; break;
3056 case TYPE_S16
: type
= 3; break;
3057 case TYPE_U32
: type
= 4; break;
3058 case TYPE_U64
: type
= 5; break;
3059 case TYPE_B128
: type
= 6; break;
3061 assert(insn
->dType
== TYPE_U8
);
3065 emitField(0x14, 3, type
);
3066 emitGPR (0x00, insn
->def(0));
3067 emitGPR (0x08, insn
->src(0));
3073 CodeEmitterGM107::emitSUREDx()
3075 const TexInstruction
*insn
= this->insn
->asTex();
3076 uint8_t type
= 0, subOp
;
3078 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
3079 emitInsn(0xeac00000);
3081 emitInsn(0xea600000);
3083 if (insn
->op
== OP_SUREDB
)
3084 emitField(0x34, 1, 1);
3088 switch (insn
->dType
) {
3089 case TYPE_S32
: type
= 1; break;
3090 case TYPE_U64
: type
= 2; break;
3091 case TYPE_F32
: type
= 3; break;
3092 case TYPE_S64
: type
= 5; break;
3094 assert(insn
->dType
== TYPE_U32
);
3099 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
3101 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
3104 subOp
= insn
->subOp
;
3107 emitField(0x24, 3, type
);
3108 emitField(0x1d, 4, subOp
);
3109 emitGPR (0x14, insn
->src(1));
3110 emitGPR (0x08, insn
->src(0));
3111 emitGPR (0x00, insn
->def(0));
3116 /*******************************************************************************
3117 * assembler front-end
3118 ******************************************************************************/
3121 CodeEmitterGM107::emitInstruction(Instruction
*i
)
3123 const unsigned int size
= (writeIssueDelays
&& !(codeSize
& 0x1f)) ? 16 : 8;
3128 if (insn
->encSize
!= 8) {
3129 ERROR("skipping undecodable instruction: "); insn
->print();
3132 if (codeSize
+ size
> codeSizeLimit
) {
3133 ERROR("code emitter output buffer too small\n");
3137 if (writeIssueDelays
) {
3138 int n
= ((codeSize
& 0x1f) / 8) - 1;
3141 data
[0] = 0x00000000;
3142 data
[1] = 0x00000000;
3148 emitField(data
, n
* 21, 21, insn
->sched
);
3204 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
3205 insn
->src(0).getFile() == FILE_PREDICATE
)) {
3207 } else if (isFloatType(insn
->dType
)) {
3208 if (isFloatType(insn
->sType
))
3213 if (isFloatType(insn
->sType
))
3224 if (isFloatType(insn
->dType
)) {
3225 if (insn
->dType
== TYPE_F64
)
3234 if (isFloatType(insn
->dType
)) {
3235 if (insn
->dType
== TYPE_F64
)
3245 if (isFloatType(insn
->dType
)) {
3246 if (insn
->dType
== TYPE_F64
)
3259 if (isFloatType(insn
->dType
)) {
3260 if (insn
->dType
== TYPE_F64
)
3269 if (typeSizeof(insn
->sType
) == 8)
3275 if (typeSizeof(insn
->sType
) == 8)
3293 if (isFloatType(insn
->dType
))
3302 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
3303 if (isFloatType(insn
->sType
))
3304 if (insn
->sType
== TYPE_F64
)
3311 if (isFloatType(insn
->sType
))
3312 if (insn
->sType
== TYPE_F64
)
3344 switch (insn
->src(0).getFile()) {
3345 case FILE_MEMORY_CONST
: emitLDC(); break;
3346 case FILE_MEMORY_LOCAL
: emitLDL(); break;
3347 case FILE_MEMORY_SHARED
: emitLDS(); break;
3348 case FILE_MEMORY_GLOBAL
: emitLD(); break;
3350 assert(!"invalid load");
3356 switch (insn
->src(0).getFile()) {
3357 case FILE_MEMORY_LOCAL
: emitSTL(); break;
3358 case FILE_MEMORY_SHARED
: emitSTS(); break;
3359 case FILE_MEMORY_GLOBAL
: emitST(); break;
3361 assert(!"invalid store");
3367 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
3370 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
3455 assert(!"invalid opcode");
3471 CodeEmitterGM107::getMinEncodingSize(const Instruction
*i
) const
3476 /*******************************************************************************
3477 * sched data calculator
3478 ******************************************************************************/
3480 class SchedDataCalculatorGM107
: public Pass
3483 SchedDataCalculatorGM107(const TargetGM107
*targ
) : targ(targ
) {}
3495 void rebase(const int base
)
3497 const int delta
= this->base
- base
;
3502 for (int i
= 0; i
< 256; ++i
) {
3506 for (int i
= 0; i
< 8; ++i
) {
3515 memset(&rd
, 0, sizeof(rd
));
3516 memset(&wr
, 0, sizeof(wr
));
3518 int getLatest(const ScoreData
& d
) const
3521 for (int i
= 0; i
< 256; ++i
)
3524 for (int i
= 0; i
< 8; ++i
)
3531 inline int getLatestRd() const
3533 return getLatest(rd
);
3535 inline int getLatestWr() const
3537 return getLatest(wr
);
3539 inline int getLatest() const
3541 return MAX2(getLatestRd(), getLatestWr());
3543 void setMax(const RegScores
*that
)
3545 for (int i
= 0; i
< 256; ++i
) {
3546 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3547 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3549 for (int i
= 0; i
< 8; ++i
) {
3550 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3551 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3553 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3554 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3556 void print(int cycle
)
3558 for (int i
= 0; i
< 256; ++i
) {
3559 if (rd
.r
[i
] > cycle
)
3560 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3561 if (wr
.r
[i
] > cycle
)
3562 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3564 for (int i
= 0; i
< 8; ++i
) {
3565 if (rd
.p
[i
] > cycle
)
3566 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3567 if (wr
.p
[i
] > cycle
)
3568 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3571 INFO("rd $c @ %i\n", rd
.c
);
3573 INFO("wr $c @ %i\n", wr
.c
);
3577 RegScores
*score
; // for current BB
3578 std::vector
<RegScores
> scoreBoards
;
3580 const TargetGM107
*targ
;
3581 bool visit(Function
*);
3582 bool visit(BasicBlock
*);
3584 void commitInsn(const Instruction
*, int);
3585 int calcDelay(const Instruction
*, int) const;
3586 void setDelay(Instruction
*, int, const Instruction
*);
3587 void recordWr(const Value
*, int, int);
3588 void checkRd(const Value
*, int, int&) const;
3590 inline void emitYield(Instruction
*);
3591 inline void emitStall(Instruction
*, uint8_t);
3592 inline void emitReuse(Instruction
*, uint8_t);
3593 inline void emitWrDepBar(Instruction
*, uint8_t);
3594 inline void emitRdDepBar(Instruction
*, uint8_t);
3595 inline void emitWtDepBar(Instruction
*, uint8_t);
3597 inline int getStall(const Instruction
*) const;
3598 inline int getWrDepBar(const Instruction
*) const;
3599 inline int getRdDepBar(const Instruction
*) const;
3600 inline int getWtDepBar(const Instruction
*) const;
3602 void setReuseFlag(Instruction
*);
3604 inline void printSchedInfo(int, const Instruction
*) const;
3607 LiveBarUse(Instruction
*insn
, Instruction
*usei
)
3608 : insn(insn
), usei(usei
) { }
3614 LiveBarDef(Instruction
*insn
, Instruction
*defi
)
3615 : insn(insn
), defi(defi
) { }
3620 bool insertBarriers(BasicBlock
*);
3622 Instruction
*findFirstUse(const Instruction
*) const;
3623 Instruction
*findFirstDef(const Instruction
*) const;
3625 bool needRdDepBar(const Instruction
*) const;
3626 bool needWrDepBar(const Instruction
*) const;
3630 SchedDataCalculatorGM107::emitStall(Instruction
*insn
, uint8_t cnt
)
3637 SchedDataCalculatorGM107::emitYield(Instruction
*insn
)
3639 insn
->sched
|= 1 << 4;
3643 SchedDataCalculatorGM107::emitWrDepBar(Instruction
*insn
, uint8_t id
)
3646 if ((insn
->sched
& 0xe0) == 0xe0)
3647 insn
->sched
^= 0xe0;
3648 insn
->sched
|= id
<< 5;
3652 SchedDataCalculatorGM107::emitRdDepBar(Instruction
*insn
, uint8_t id
)
3655 if ((insn
->sched
& 0x700) == 0x700)
3656 insn
->sched
^= 0x700;
3657 insn
->sched
|= id
<< 8;
3661 SchedDataCalculatorGM107::emitWtDepBar(Instruction
*insn
, uint8_t id
)
3664 insn
->sched
|= 1 << (11 + id
);
3668 SchedDataCalculatorGM107::emitReuse(Instruction
*insn
, uint8_t id
)
3671 insn
->sched
|= 1 << (17 + id
);
3675 SchedDataCalculatorGM107::printSchedInfo(int cycle
,
3676 const Instruction
*insn
) const
3678 uint8_t st
, yl
, wr
, rd
, wt
, ru
;
3680 st
= (insn
->sched
& 0x00000f) >> 0;
3681 yl
= (insn
->sched
& 0x000010) >> 4;
3682 wr
= (insn
->sched
& 0x0000e0) >> 5;
3683 rd
= (insn
->sched
& 0x000700) >> 8;
3684 wt
= (insn
->sched
& 0x01f800) >> 11;
3685 ru
= (insn
->sched
& 0x1e0000) >> 17;
3687 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3688 cycle
, st
, yl
, wr
, rd
, wt
, ru
);
3692 SchedDataCalculatorGM107::getStall(const Instruction
*insn
) const
3694 return insn
->sched
& 0xf;
3698 SchedDataCalculatorGM107::getWrDepBar(const Instruction
*insn
) const
3700 return (insn
->sched
& 0x0000e0) >> 5;
3704 SchedDataCalculatorGM107::getRdDepBar(const Instruction
*insn
) const
3706 return (insn
->sched
& 0x000700) >> 8;
3710 SchedDataCalculatorGM107::getWtDepBar(const Instruction
*insn
) const
3712 return (insn
->sched
& 0x01f800) >> 11;
3715 // Emit the reuse flag which allows to make use of the new memory hierarchy
3716 // introduced since Maxwell, the operand reuse cache.
3718 // It allows to reduce bank conflicts by caching operands. Each time you issue
3719 // an instruction, that flag can tell the hw which operands are going to be
3720 // re-used by the next instruction. Note that the next instruction has to use
3721 // the same GPR id in the same operand slot.
3723 SchedDataCalculatorGM107::setReuseFlag(Instruction
*insn
)
3725 Instruction
*next
= insn
->next
;
3726 BitSet
defs(255, 1);
3728 if (!targ
->isReuseSupported(insn
))
3731 for (int d
= 0; insn
->defExists(d
); ++d
) {
3732 const Value
*def
= insn
->def(d
).rep();
3733 if (insn
->def(d
).getFile() != FILE_GPR
)
3735 if (typeSizeof(insn
->dType
) != 4 || def
->reg
.data
.id
== 255)
3737 defs
.set(def
->reg
.data
.id
);
3740 for (int s
= 0; insn
->srcExists(s
); s
++) {
3741 const Value
*src
= insn
->src(s
).rep();
3742 if (insn
->src(s
).getFile() != FILE_GPR
)
3744 if (typeSizeof(insn
->sType
) != 4 || src
->reg
.data
.id
== 255)
3746 if (defs
.test(src
->reg
.data
.id
))
3748 if (!next
->srcExists(s
) || next
->src(s
).getFile() != FILE_GPR
)
3750 if (src
->reg
.data
.id
!= next
->getSrc(s
)->reg
.data
.id
)
3758 SchedDataCalculatorGM107::recordWr(const Value
*v
, int cycle
, int ready
)
3760 int a
= v
->reg
.data
.id
, b
;
3762 switch (v
->reg
.file
) {
3764 b
= a
+ v
->reg
.size
/ 4;
3765 for (int r
= a
; r
< b
; ++r
)
3766 score
->rd
.r
[r
] = ready
;
3768 case FILE_PREDICATE
:
3769 // To immediately use a predicate set by any instructions, the minimum
3770 // number of stall counts is 13.
3771 score
->rd
.p
[a
] = cycle
+ 13;
3774 score
->rd
.c
= ready
;
3782 SchedDataCalculatorGM107::checkRd(const Value
*v
, int cycle
, int &delay
) const
3784 int a
= v
->reg
.data
.id
, b
;
3787 switch (v
->reg
.file
) {
3789 b
= a
+ v
->reg
.size
/ 4;
3790 for (int r
= a
; r
< b
; ++r
)
3791 ready
= MAX2(ready
, score
->rd
.r
[r
]);
3793 case FILE_PREDICATE
:
3794 ready
= MAX2(ready
, score
->rd
.p
[a
]);
3797 ready
= MAX2(ready
, score
->rd
.c
);
3803 delay
= MAX2(delay
, ready
- cycle
);
3807 SchedDataCalculatorGM107::commitInsn(const Instruction
*insn
, int cycle
)
3809 const int ready
= cycle
+ targ
->getLatency(insn
);
3811 for (int d
= 0; insn
->defExists(d
); ++d
)
3812 recordWr(insn
->getDef(d
), cycle
, ready
);
3814 #ifdef GM107_DEBUG_SCHED_DATA
3815 score
->print(cycle
);
3819 #define GM107_MIN_ISSUE_DELAY 0x1
3820 #define GM107_MAX_ISSUE_DELAY 0xf
3823 SchedDataCalculatorGM107::calcDelay(const Instruction
*insn
, int cycle
) const
3825 int delay
= 0, ready
= cycle
;
3827 for (int s
= 0; insn
->srcExists(s
); ++s
)
3828 checkRd(insn
->getSrc(s
), cycle
, delay
);
3830 // TODO: make use of getReadLatency()!
3832 return MAX2(delay
, ready
- cycle
);
3836 SchedDataCalculatorGM107::setDelay(Instruction
*insn
, int delay
,
3837 const Instruction
*next
)
3839 const OpClass cl
= targ
->getOpClass(insn
->op
);
3842 if (insn
->op
== OP_EXIT
||
3843 insn
->op
== OP_BAR
||
3844 insn
->op
== OP_MEMBAR
) {
3845 delay
= GM107_MAX_ISSUE_DELAY
;
3847 if (insn
->op
== OP_QUADON
||
3848 insn
->op
== OP_QUADPOP
) {
3851 if (cl
== OPCLASS_FLOW
|| insn
->join
) {
3855 if (!next
|| !targ
->canDualIssue(insn
, next
)) {
3856 delay
= CLAMP(delay
, GM107_MIN_ISSUE_DELAY
, GM107_MAX_ISSUE_DELAY
);
3858 delay
= 0x0; // dual-issue
3861 wr
= getWrDepBar(insn
);
3862 rd
= getRdDepBar(insn
);
3864 if (delay
== GM107_MIN_ISSUE_DELAY
&& (wr
& rd
) != 7) {
3865 // Barriers take one additional clock cycle to become active on top of
3866 // the clock consumed by the instruction producing it.
3867 if (!next
|| insn
->bb
!= next
->bb
) {
3870 int wt
= getWtDepBar(next
);
3871 if ((wt
& (1 << wr
)) | (wt
& (1 << rd
)))
3876 emitStall(insn
, delay
);
3880 // Return true when the given instruction needs to emit a read dependency
3881 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3882 // setting the maximum number of stall counts is not enough.
3884 SchedDataCalculatorGM107::needRdDepBar(const Instruction
*insn
) const
3886 BitSet
srcs(255, 1), defs(255, 1);
3889 if (!targ
->isBarrierRequired(insn
))
3892 // Do not emit a read dependency barrier when the instruction doesn't use
3893 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3894 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3895 const Value
*src
= insn
->src(s
).rep();
3896 if (insn
->src(s
).getFile() != FILE_GPR
)
3898 if (src
->reg
.data
.id
== 255)
3901 a
= src
->reg
.data
.id
;
3902 b
= a
+ src
->reg
.size
/ 4;
3903 for (int r
= a
; r
< b
; ++r
)
3907 if (!srcs
.popCount())
3910 // Do not emit a read dependency barrier when the output GPRs are equal to
3911 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3912 // be produced and WaR hazards are prevented.
3913 for (int d
= 0; insn
->defExists(d
); ++d
) {
3914 const Value
*def
= insn
->def(d
).rep();
3915 if (insn
->def(d
).getFile() != FILE_GPR
)
3917 if (def
->reg
.data
.id
== 255)
3920 a
= def
->reg
.data
.id
;
3921 b
= a
+ def
->reg
.size
/ 4;
3922 for (int r
= a
; r
< b
; ++r
)
3927 if (!srcs
.popCount())
3933 // Return true when the given instruction needs to emit a write dependency
3934 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3935 // setting the maximum number of stall counts is not enough. This is only legal
3936 // if the instruction output something.
3938 SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn
) const
3940 if (!targ
->isBarrierRequired(insn
))
3943 for (int d
= 0; insn
->defExists(d
); ++d
) {
3944 if (insn
->def(d
).getFile() == FILE_GPR
||
3945 insn
->def(d
).getFile() == FILE_PREDICATE
)
3951 // Find the next instruction inside the same basic block which uses the output
3952 // of the given instruction in order to avoid RaW hazards.
3954 SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari
) const
3956 Instruction
*insn
, *next
;
3959 if (!bari
->defExists(0))
3962 minGPR
= bari
->def(0).rep()->reg
.data
.id
;
3963 maxGPR
= minGPR
+ bari
->def(0).rep()->reg
.size
/ 4 - 1;
3965 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3968 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3969 const Value
*src
= insn
->src(s
).rep();
3970 if (bari
->def(0).getFile() == FILE_GPR
) {
3971 if (insn
->src(s
).getFile() != FILE_GPR
||
3972 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3973 src
->reg
.data
.id
> maxGPR
)
3977 if (bari
->def(0).getFile() == FILE_PREDICATE
) {
3978 if (insn
->src(s
).getFile() != FILE_PREDICATE
||
3979 src
->reg
.data
.id
!= minGPR
)
3988 // Find the next instruction inside the same basic block which overwrites, at
3989 // least, one source of the given instruction in order to avoid WaR hazards.
3991 SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari
) const
3993 Instruction
*insn
, *next
;
3996 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3999 for (int d
= 0; insn
->defExists(d
); ++d
) {
4000 const Value
*def
= insn
->def(d
).rep();
4001 if (insn
->def(d
).getFile() != FILE_GPR
)
4004 minGPR
= def
->reg
.data
.id
;
4005 maxGPR
= minGPR
+ def
->reg
.size
/ 4 - 1;
4007 for (int s
= 0; bari
->srcExists(s
); ++s
) {
4008 const Value
*src
= bari
->src(s
).rep();
4009 if (bari
->src(s
).getFile() != FILE_GPR
||
4010 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
4011 src
->reg
.data
.id
> maxGPR
)
4020 // Dependency barriers:
4021 // This pass is a bit ugly and could probably be improved by performing a
4022 // better allocation.
4024 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4025 // dependency barriers using the control codes.
4027 SchedDataCalculatorGM107::insertBarriers(BasicBlock
*bb
)
4029 std::list
<LiveBarUse
> live_uses
;
4030 std::list
<LiveBarDef
> live_defs
;
4031 Instruction
*insn
, *next
;
4035 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4036 Instruction
*usei
= NULL
, *defi
= NULL
;
4037 bool need_wr_bar
, need_rd_bar
;
4041 // Expire old barrier uses.
4042 for (std::list
<LiveBarUse
>::iterator it
= live_uses
.begin();
4043 it
!= live_uses
.end();) {
4044 if (insn
->serial
>= it
->usei
->serial
) {
4045 int wr
= getWrDepBar(it
->insn
);
4046 emitWtDepBar(insn
, wr
);
4047 bars
.clr(wr
); // free barrier
4048 it
= live_uses
.erase(it
);
4054 // Expire old barrier defs.
4055 for (std::list
<LiveBarDef
>::iterator it
= live_defs
.begin();
4056 it
!= live_defs
.end();) {
4057 if (insn
->serial
>= it
->defi
->serial
) {
4058 int rd
= getRdDepBar(it
->insn
);
4059 emitWtDepBar(insn
, rd
);
4060 bars
.clr(rd
); // free barrier
4061 it
= live_defs
.erase(it
);
4067 need_wr_bar
= needWrDepBar(insn
);
4068 need_rd_bar
= needRdDepBar(insn
);
4071 // When the instruction requires to emit a write dependency barrier
4072 // (all which write something at a variable latency), find the next
4073 // instruction which reads the outputs.
4074 usei
= findFirstUse(insn
);
4076 // Allocate and emit a new barrier.
4077 bar_id
= bars
.findFreeRange(1);
4081 emitWrDepBar(insn
, bar_id
);
4083 live_uses
.push_back(LiveBarUse(insn
, usei
));
4087 // When the instruction requires to emit a read dependency barrier
4088 // (all which read something at a variable latency), find the next
4089 // instruction which will write the inputs.
4090 defi
= findFirstDef(insn
);
4092 if (usei
&& defi
&& usei
->serial
<= defi
->serial
)
4095 // Allocate and emit a new barrier.
4096 bar_id
= bars
.findFreeRange(1);
4100 emitRdDepBar(insn
, bar_id
);
4102 live_defs
.push_back(LiveBarDef(insn
, defi
));
4106 // Remove unnecessary barrier waits.
4107 BitSet
alive_bars(6, 1);
4108 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4113 wr
= getWrDepBar(insn
);
4114 rd
= getRdDepBar(insn
);
4115 wt
= getWtDepBar(insn
);
4117 for (int idx
= 0; idx
< 6; ++idx
) {
4118 if (!(wt
& (1 << idx
)))
4120 if (!alive_bars
.test(idx
)) {
4121 insn
->sched
&= ~(1 << (11 + idx
));
4123 alive_bars
.clr(idx
);
4137 SchedDataCalculatorGM107::visit(Function
*func
)
4141 func
->orderInstructions(insns
);
4143 scoreBoards
.resize(func
->cfg
.getSize());
4144 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
4145 scoreBoards
[i
].wipe();
4150 SchedDataCalculatorGM107::visit(BasicBlock
*bb
)
4152 Instruction
*insn
, *next
= NULL
;
4155 for (Instruction
*insn
= bb
->getEntry(); insn
; insn
= insn
->next
) {
4157 insn
->sched
= 0x7e0;
4160 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4163 // Insert read/write dependency barriers for instructions which don't
4164 // operate at a fixed latency.
4167 score
= &scoreBoards
.at(bb
->getId());
4169 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
4170 // back branches will wait until all target dependencies are satisfied
4171 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
4173 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
4174 score
->setMax(&scoreBoards
.at(in
->getId()));
4177 #ifdef GM107_DEBUG_SCHED_DATA
4178 INFO("=== BB:%i initial scores\n", bb
->getId());
4179 score
->print(cycle
);
4182 // Because barriers are allocated locally (intra-BB), we have to make sure
4183 // that all produced barriers have been consumed before entering inside a
4184 // new basic block. The best way is to do a global allocation pre RA but
4185 // it's really more difficult, especially because of the phi nodes. Anyways,
4186 // it seems like that waiting on a barrier which has already been consumed
4187 // doesn't add any additional cost, it's just not elegant!
4188 Instruction
*start
= bb
->getEntry();
4189 if (start
&& bb
->cfg
.incidentCount() > 0) {
4190 for (int b
= 0; b
< 6; b
++)
4191 emitWtDepBar(start
, b
);
4194 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
4197 commitInsn(insn
, cycle
);
4198 int delay
= calcDelay(next
, cycle
);
4199 setDelay(insn
, delay
, next
);
4200 cycle
+= getStall(insn
);
4204 // XXX: The yield flag seems to destroy a bunch of things when it is
4205 // set on every instruction, need investigation.
4208 #ifdef GM107_DEBUG_SCHED_DATA
4209 printSchedInfo(cycle
, insn
);
4217 commitInsn(insn
, cycle
);
4221 #ifdef GM107_DEBUG_SCHED_DATA
4222 fprintf(stderr
, "last instruction is : ");
4224 fprintf(stderr
, "cycle=%d\n", cycle
);
4227 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
4228 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
4230 if (ei
.getType() != Graph::Edge::BACK
) {
4231 // Only test the first instruction of the outgoing block.
4232 next
= out
->getEntry();
4234 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
4236 // When the outgoing BB is empty, make sure to set the number of
4237 // stall counts needed by the instruction because we don't know the
4238 // next instruction.
4239 bbDelay
= MAX2(bbDelay
, targ
->getLatency(insn
));
4242 // Wait until all dependencies are satisfied.
4243 const int regsFree
= score
->getLatest();
4244 next
= out
->getFirst();
4245 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
4246 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
4247 c
+= getStall(next
);
4252 if (bb
->cfg
.outgoingCount() != 1)
4254 setDelay(insn
, bbDelay
, next
);
4255 cycle
+= getStall(insn
);
4257 score
->rebase(cycle
); // common base for initializing out blocks' scores
4261 /*******************************************************************************
4263 ******************************************************************************/
4266 CodeEmitterGM107::prepareEmission(Function
*func
)
4268 SchedDataCalculatorGM107
sched(targGM107
);
4269 CodeEmitter::prepareEmission(func
);
4270 sched
.run(func
, true, true);
4273 static inline uint32_t sizeToBundlesGM107(uint32_t size
)
4275 return (size
+ 23) / 24;
4279 CodeEmitterGM107::prepareEmission(Program
*prog
)
4281 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
4282 !fi
.end(); fi
.next()) {
4283 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
4284 func
->binPos
= prog
->binSize
;
4285 prepareEmission(func
);
4287 // adjust sizes & positions for schedulding info:
4288 if (prog
->getTarget()->hasSWSched
) {
4289 uint32_t adjPos
= func
->binPos
;
4290 BasicBlock
*bb
= NULL
;
4291 for (int i
= 0; i
< func
->bbCount
; ++i
) {
4292 bb
= func
->bbArray
[i
];
4293 int32_t adjSize
= bb
->binSize
;
4295 adjSize
-= 32 - adjPos
% 32;
4299 adjSize
= bb
->binSize
+ sizeToBundlesGM107(adjSize
) * 8;
4300 bb
->binPos
= adjPos
;
4301 bb
->binSize
= adjSize
;
4305 func
->binSize
= adjPos
- func
->binPos
;
4308 prog
->binSize
+= func
->binSize
;
4312 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107
*target
)
4313 : CodeEmitter(target
),
4315 writeIssueDelays(target
->hasSWSched
)
4318 codeSize
= codeSizeLimit
= 0;
4323 TargetGM107::createCodeEmitterGM107(Program::Type type
)
4325 CodeEmitterGM107
*emit
= new CodeEmitterGM107(this);
4326 emit
->setProgramType(type
);
4330 } // namespace nv50_ir