2 * Copyright 2014 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
25 #include "codegen/nv50_ir_target_gm107.h"
27 //#define GM107_DEBUG_SCHED_DATA
31 class CodeEmitterGM107
: public CodeEmitter
34 CodeEmitterGM107(const TargetGM107
*);
36 virtual bool emitInstruction(Instruction
*);
37 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
39 virtual void prepareEmission(Program
*);
40 virtual void prepareEmission(Function
*);
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
45 const TargetGM107
*targGM107
;
47 Program::Type progType
;
49 const Instruction
*insn
;
50 const bool writeIssueDelays
;
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b
, int s
, uint32_t v
) { emitField(code
, b
, s
, v
); }
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o
) { emitInsn(o
, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value
*);
61 inline void emitGPR(int pos
) {
62 emitGPR(pos
, (const Value
*)NULL
);
64 inline void emitGPR(int pos
, const ValueRef
&ref
) {
65 emitGPR(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
67 inline void emitGPR(int pos
, const ValueRef
*ref
) {
68 emitGPR(pos
, ref
? ref
->rep() : (const Value
*)NULL
);
70 inline void emitGPR(int pos
, const ValueDef
&def
) {
71 emitGPR(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
73 inline void emitSYS(int, const Value
*);
74 inline void emitSYS(int pos
, const ValueRef
&ref
) {
75 emitSYS(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
77 inline void emitPRED(int, const Value
*);
78 inline void emitPRED(int pos
) {
79 emitPRED(pos
, (const Value
*)NULL
);
81 inline void emitPRED(int pos
, const ValueRef
&ref
) {
82 emitPRED(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
84 inline void emitPRED(int pos
, const ValueDef
&def
) {
85 emitPRED(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
87 inline void emitADDR(int, int, int, int, const ValueRef
&);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef
&);
89 inline bool longIMMD(const ValueRef
&);
90 inline void emitIMMD(int, int, const ValueRef
&);
92 void emitCond3(int, CondCode
);
93 void emitCond4(int, CondCode
);
94 void emitCond5(int pos
, CondCode cc
) { emitCond4(pos
, cc
); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef
&);
101 inline void emitNEG(int, const ValueRef
&);
102 inline void emitNEG2(int, const ValueRef
&, const ValueRef
&);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode
, int);
105 inline void emitRND(int pos
) {
106 emitRND(pos
, insn
->rnd
, -1);
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef
&);
170 void emitLDSTs(int, DataType
);
210 void emitSUHandle(const int s
);
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
221 CodeEmitterGM107::emitField(uint32_t *data
, int b
, int s
, uint32_t v
)
224 uint32_t m
= ((1ULL << s
) - 1);
225 uint64_t d
= (uint64_t)(v
& m
) << b
;
226 assert(!(v
& ~m
) || (v
& ~m
) == ~m
);
233 CodeEmitterGM107::emitPred()
235 if (insn
->predSrc
>= 0) {
236 emitField(16, 3, insn
->getSrc(insn
->predSrc
)->rep()->reg
.data
.id
);
237 emitField(19, 1, insn
->cc
== CC_NOT_P
);
244 CodeEmitterGM107::emitInsn(uint32_t hi
, bool pred
)
246 code
[0] = 0x00000000;
253 CodeEmitterGM107::emitGPR(int pos
, const Value
*val
)
255 emitField(pos
, 8, val
? val
->reg
.data
.id
: 255);
259 CodeEmitterGM107::emitSYS(int pos
, const Value
*val
)
261 int id
= val
? val
->reg
.data
.id
: -1;
264 case SV_LANEID
: id
= 0x00; break;
265 case SV_VERTEX_COUNT
: id
= 0x10; break;
266 case SV_INVOCATION_ID
: id
= 0x11; break;
267 case SV_THREAD_KILL
: id
= 0x13; break;
268 case SV_INVOCATION_INFO
: id
= 0x1d; break;
269 case SV_TID
: id
= 0x21 + val
->reg
.data
.sv
.index
; break;
270 case SV_CTAID
: id
= 0x25 + val
->reg
.data
.sv
.index
; break;
272 assert(!"invalid system value");
277 emitField(pos
, 8, id
);
281 CodeEmitterGM107::emitPRED(int pos
, const Value
*val
)
283 emitField(pos
, 3, val
? val
->reg
.data
.id
: 7);
287 CodeEmitterGM107::emitADDR(int gpr
, int off
, int len
, int shr
,
290 const Value
*v
= ref
.get();
291 assert(!(v
->reg
.data
.offset
& ((1 << shr
) - 1)));
293 emitGPR(gpr
, ref
.getIndirect(0));
294 emitField(off
, len
, v
->reg
.data
.offset
>> shr
);
298 CodeEmitterGM107::emitCBUF(int buf
, int gpr
, int off
, int len
, int shr
,
301 const Value
*v
= ref
.get();
302 const Symbol
*s
= v
->asSym();
304 assert(!(s
->reg
.data
.offset
& ((1 << shr
) - 1)));
306 emitField(buf
, 5, v
->reg
.fileIndex
);
308 emitGPR(gpr
, ref
.getIndirect(0));
309 emitField(off
, 16, s
->reg
.data
.offset
>> shr
);
313 CodeEmitterGM107::longIMMD(const ValueRef
&ref
)
315 if (ref
.getFile() == FILE_IMMEDIATE
) {
316 const ImmediateValue
*imm
= ref
.get()->asImm();
317 if (isFloatType(insn
->sType
)) {
318 if ((imm
->reg
.data
.u32
& 0x00000fff) != 0x00000000)
321 if ((imm
->reg
.data
.u32
& 0xfff00000) != 0x00000000 &&
322 (imm
->reg
.data
.u32
& 0xfff00000) != 0xfff00000)
330 CodeEmitterGM107::emitIMMD(int pos
, int len
, const ValueRef
&ref
)
332 const ImmediateValue
*imm
= ref
.get()->asImm();
333 uint32_t val
= imm
->reg
.data
.u32
;
336 if (insn
->sType
== TYPE_F32
|| insn
->sType
== TYPE_F16
) {
337 assert(!(val
& 0x00000fff));
339 } else if (insn
->sType
== TYPE_F64
) {
340 assert(!(imm
->reg
.data
.u64
& 0x00000fffffffffffULL
));
341 val
= imm
->reg
.data
.u64
>> 44;
343 assert(!(val
& 0xfff00000) || (val
& 0xfff00000) == 0xfff00000);
344 emitField( 56, 1, (val
& 0x80000) >> 19);
345 emitField(pos
, len
, (val
& 0x7ffff));
347 emitField(pos
, len
, val
);
351 /*******************************************************************************
353 ******************************************************************************/
356 CodeEmitterGM107::emitCond3(int pos
, CondCode code
)
361 case CC_FL
: data
= 0x00; break;
363 case CC_LT
: data
= 0x01; break;
365 case CC_EQ
: data
= 0x02; break;
367 case CC_LE
: data
= 0x03; break;
369 case CC_GT
: data
= 0x04; break;
371 case CC_NE
: data
= 0x05; break;
373 case CC_GE
: data
= 0x06; break;
374 case CC_TR
: data
= 0x07; break;
376 assert(!"invalid cond3");
380 emitField(pos
, 3, data
);
384 CodeEmitterGM107::emitCond4(int pos
, CondCode code
)
389 case CC_FL
: data
= 0x00; break;
390 case CC_LT
: data
= 0x01; break;
391 case CC_EQ
: data
= 0x02; break;
392 case CC_LE
: data
= 0x03; break;
393 case CC_GT
: data
= 0x04; break;
394 case CC_NE
: data
= 0x05; break;
395 case CC_GE
: data
= 0x06; break;
396 // case CC_NUM: data = 0x07; break;
397 // case CC_NAN: data = 0x08; break;
398 case CC_LTU
: data
= 0x09; break;
399 case CC_EQU
: data
= 0x0a; break;
400 case CC_LEU
: data
= 0x0b; break;
401 case CC_GTU
: data
= 0x0c; break;
402 case CC_NEU
: data
= 0x0d; break;
403 case CC_GEU
: data
= 0x0e; break;
404 case CC_TR
: data
= 0x0f; break;
406 assert(!"invalid cond4");
410 emitField(pos
, 4, data
);
414 CodeEmitterGM107::emitO(int pos
)
416 emitField(pos
, 1, insn
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
);
420 CodeEmitterGM107::emitP(int pos
)
422 emitField(pos
, 1, insn
->perPatch
);
426 CodeEmitterGM107::emitSAT(int pos
)
428 emitField(pos
, 1, insn
->saturate
);
432 CodeEmitterGM107::emitCC(int pos
)
434 emitField(pos
, 1, insn
->flagsDef
>= 0);
438 CodeEmitterGM107::emitX(int pos
)
440 emitField(pos
, 1, insn
->flagsSrc
>= 0);
444 CodeEmitterGM107::emitABS(int pos
, const ValueRef
&ref
)
446 emitField(pos
, 1, ref
.mod
.abs());
450 CodeEmitterGM107::emitNEG(int pos
, const ValueRef
&ref
)
452 emitField(pos
, 1, ref
.mod
.neg());
456 CodeEmitterGM107::emitNEG2(int pos
, const ValueRef
&a
, const ValueRef
&b
)
458 emitField(pos
, 1, a
.mod
.neg() ^ b
.mod
.neg());
462 CodeEmitterGM107::emitFMZ(int pos
, int len
)
464 emitField(pos
, len
, insn
->dnz
<< 1 | insn
->ftz
);
468 CodeEmitterGM107::emitRND(int rmp
, RoundMode rnd
, int rip
)
472 case ROUND_NI
: ri
= 1;
473 case ROUND_N
: rm
= 0; break;
474 case ROUND_MI
: ri
= 1;
475 case ROUND_M
: rm
= 1; break;
476 case ROUND_PI
: ri
= 1;
477 case ROUND_P
: rm
= 2; break;
478 case ROUND_ZI
: ri
= 1;
479 case ROUND_Z
: rm
= 3; break;
481 assert(!"invalid round mode");
484 emitField(rip
, 1, ri
);
485 emitField(rmp
, 2, rm
);
489 CodeEmitterGM107::emitPDIV(int pos
)
491 assert(insn
->postFactor
>= -3 && insn
->postFactor
<= 3);
492 if (insn
->postFactor
> 0)
493 emitField(pos
, 3, 7 - insn
->postFactor
);
495 emitField(pos
, 3, 0 - insn
->postFactor
);
499 CodeEmitterGM107::emitINV(int pos
, const ValueRef
&ref
)
501 emitField(pos
, 1, !!(ref
.mod
& Modifier(NV50_IR_MOD_NOT
)));
504 /*******************************************************************************
506 ******************************************************************************/
509 CodeEmitterGM107::emitEXIT()
511 emitInsn (0xe3000000);
512 emitCond5(0x00, CC_TR
);
516 CodeEmitterGM107::emitBRA()
518 const FlowInstruction
*insn
= this->insn
->asFlow();
521 if (insn
->indirect
) {
523 emitInsn(0xe2000000); // JMX
525 emitInsn(0xe2500000); // BRX
529 emitInsn(0xe2100000); // JMP
531 emitInsn(0xe2400000); // BRA
532 emitField(0x07, 1, insn
->allWarp
);
535 emitField(0x06, 1, insn
->limit
);
536 emitCond5(0x00, CC_TR
);
538 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
539 int32_t pos
= insn
->target
.bb
->binPos
;
540 if (writeIssueDelays
&& !(pos
& 0x1f))
543 emitField(0x14, 24, pos
- (codeSize
+ 8));
545 emitField(0x14, 32, pos
);
547 emitCBUF (0x24, gpr
, 20, 16, 0, insn
->src(0));
548 emitField(0x05, 1, 1);
553 CodeEmitterGM107::emitCAL()
555 const FlowInstruction
*insn
= this->insn
->asFlow();
557 if (insn
->absolute
) {
558 emitInsn(0xe2200000, 0); // JCAL
560 emitInsn(0xe2600000, 0); // CAL
563 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
565 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
568 int pcAbs
= targGM107
->getBuiltinOffset(insn
->target
.builtin
);
569 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfff00000, 20);
570 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x000fffff, -12);
572 emitField(0x14, 32, insn
->target
.bb
->binPos
);
576 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
577 emitField(0x05, 1, 1);
582 CodeEmitterGM107::emitPCNT()
584 const FlowInstruction
*insn
= this->insn
->asFlow();
586 emitInsn(0xe2b00000, 0);
588 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
589 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
591 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
592 emitField(0x05, 1, 1);
597 CodeEmitterGM107::emitCONT()
599 emitInsn (0xe3500000);
600 emitCond5(0x00, CC_TR
);
604 CodeEmitterGM107::emitPBK()
606 const FlowInstruction
*insn
= this->insn
->asFlow();
608 emitInsn(0xe2a00000, 0);
610 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
611 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
613 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
614 emitField(0x05, 1, 1);
619 CodeEmitterGM107::emitBRK()
621 emitInsn (0xe3400000);
622 emitCond5(0x00, CC_TR
);
626 CodeEmitterGM107::emitPRET()
628 const FlowInstruction
*insn
= this->insn
->asFlow();
630 emitInsn(0xe2700000, 0);
632 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
633 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
635 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
636 emitField(0x05, 1, 1);
641 CodeEmitterGM107::emitRET()
643 emitInsn (0xe3200000);
644 emitCond5(0x00, CC_TR
);
648 CodeEmitterGM107::emitSSY()
650 const FlowInstruction
*insn
= this->insn
->asFlow();
652 emitInsn(0xe2900000, 0);
654 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
655 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
657 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
658 emitField(0x05, 1, 1);
663 CodeEmitterGM107::emitSYNC()
665 emitInsn (0xf0f80000);
666 emitCond5(0x00, CC_TR
);
670 CodeEmitterGM107::emitSAM()
672 emitInsn(0xe3700000, 0);
676 CodeEmitterGM107::emitRAM()
678 emitInsn(0xe3800000, 0);
681 /*******************************************************************************
683 ******************************************************************************/
685 /*******************************************************************************
686 * movement / conversion
687 ******************************************************************************/
690 CodeEmitterGM107::emitMOV()
692 if (insn
->src(0).getFile() != FILE_IMMEDIATE
) {
693 switch (insn
->src(0).getFile()) {
695 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
696 emitInsn(0x5b6a0000);
699 emitInsn(0x5c980000);
701 emitGPR (0x14, insn
->src(0));
703 case FILE_MEMORY_CONST
:
704 emitInsn(0x4c980000);
705 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
708 emitInsn(0x38980000);
709 emitIMMD(0x14, 19, insn
->src(0));
712 emitInsn(0x50880000);
713 emitPRED(0x0c, insn
->src(0));
718 assert(!"bad src file");
721 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
722 insn
->src(0).getFile() != FILE_PREDICATE
)
723 emitField(0x27, 4, insn
->lanes
);
725 emitInsn (0x01000000);
726 emitIMMD (0x14, 32, insn
->src(0));
727 emitField(0x0c, 4, insn
->lanes
);
730 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
732 emitPRED(0x03, insn
->def(0));
735 emitGPR(0x00, insn
->def(0));
740 CodeEmitterGM107::emitS2R()
742 emitInsn(0xf0c80000);
743 emitSYS (0x14, insn
->src(0));
744 emitGPR (0x00, insn
->def(0));
748 CodeEmitterGM107::emitF2F()
750 RoundMode rnd
= insn
->rnd
;
753 case OP_FLOOR
: rnd
= ROUND_MI
; break;
754 case OP_CEIL
: rnd
= ROUND_PI
; break;
755 case OP_TRUNC
: rnd
= ROUND_ZI
; break;
760 switch (insn
->src(0).getFile()) {
762 emitInsn(0x5ca80000);
763 emitGPR (0x14, insn
->src(0));
765 case FILE_MEMORY_CONST
:
766 emitInsn(0x4ca80000);
767 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
770 emitInsn(0x38a80000);
771 emitIMMD(0x14, 19, insn
->src(0));
774 assert(!"bad src0 file");
778 emitField(0x32, 1, (insn
->op
== OP_SAT
) || insn
->saturate
);
779 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
781 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
783 emitField(0x29, 1, insn
->subOp
);
784 emitRND (0x27, rnd
, 0x2a);
785 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
786 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
787 emitGPR (0x00, insn
->def(0));
791 CodeEmitterGM107::emitF2I()
793 RoundMode rnd
= insn
->rnd
;
796 case OP_FLOOR
: rnd
= ROUND_M
; break;
797 case OP_CEIL
: rnd
= ROUND_P
; break;
798 case OP_TRUNC
: rnd
= ROUND_Z
; break;
803 switch (insn
->src(0).getFile()) {
805 emitInsn(0x5cb00000);
806 emitGPR (0x14, insn
->src(0));
808 case FILE_MEMORY_CONST
:
809 emitInsn(0x4cb00000);
810 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
813 emitInsn(0x38b00000);
814 emitIMMD(0x14, 19, insn
->src(0));
817 assert(!"bad src0 file");
821 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
823 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
825 emitRND (0x27, rnd
, 0x2a);
826 emitField(0x0c, 1, isSignedType(insn
->dType
));
827 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
828 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
829 emitGPR (0x00, insn
->def(0));
833 CodeEmitterGM107::emitI2F()
835 RoundMode rnd
= insn
->rnd
;
838 case OP_FLOOR
: rnd
= ROUND_M
; break;
839 case OP_CEIL
: rnd
= ROUND_P
; break;
840 case OP_TRUNC
: rnd
= ROUND_Z
; break;
845 switch (insn
->src(0).getFile()) {
847 emitInsn(0x5cb80000);
848 emitGPR (0x14, insn
->src(0));
850 case FILE_MEMORY_CONST
:
851 emitInsn(0x4cb80000);
852 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
855 emitInsn(0x38b80000);
856 emitIMMD(0x14, 19, insn
->src(0));
859 assert(!"bad src0 file");
863 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
865 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
866 emitField(0x29, 2, insn
->subOp
);
867 emitRND (0x27, rnd
, -1);
868 emitField(0x0d, 1, isSignedType(insn
->sType
));
869 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
870 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
871 emitGPR (0x00, insn
->def(0));
875 CodeEmitterGM107::emitI2I()
877 switch (insn
->src(0).getFile()) {
879 emitInsn(0x5ce00000);
880 emitGPR (0x14, insn
->src(0));
882 case FILE_MEMORY_CONST
:
883 emitInsn(0x4ce00000);
884 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
887 emitInsn(0x38e00000);
888 emitIMMD(0x14, 19, insn
->src(0));
891 assert(!"bad src0 file");
896 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
898 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
899 emitField(0x29, 2, insn
->subOp
);
900 emitField(0x0d, 1, isSignedType(insn
->sType
));
901 emitField(0x0c, 1, isSignedType(insn
->dType
));
902 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
903 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
904 emitGPR (0x00, insn
->def(0));
908 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
910 int loc
= entry
->loc
;
911 if (data
.force_persample_interp
)
912 code
[loc
+ 1] |= 1 << 10;
914 code
[loc
+ 1] &= ~(1 << 10);
918 CodeEmitterGM107::emitSEL()
920 switch (insn
->src(1).getFile()) {
922 emitInsn(0x5ca00000);
923 emitGPR (0x14, insn
->src(1));
925 case FILE_MEMORY_CONST
:
926 emitInsn(0x4ca00000);
927 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
930 emitInsn(0x38a00000);
931 emitIMMD(0x14, 19, insn
->src(1));
934 assert(!"bad src1 file");
938 emitINV (0x2a, insn
->src(2));
939 emitPRED(0x27, insn
->src(2));
940 emitGPR (0x08, insn
->src(0));
941 emitGPR (0x00, insn
->def(0));
943 if (insn
->subOp
== 1) {
944 addInterp(0, 0, selpFlip
);
949 CodeEmitterGM107::emitSHFL()
953 emitInsn (0xef100000);
955 switch (insn
->src(1).getFile()) {
957 emitGPR(0x14, insn
->src(1));
960 emitIMMD(0x14, 5, insn
->src(1));
964 assert(!"invalid src1 file");
968 /*XXX: what is this arg? hardcode immediate for now */
969 emitField(0x22, 13, 0x1c03);
973 emitField(0x1e, 2, insn
->subOp
);
974 emitField(0x1c, 2, type
);
975 emitGPR (0x08, insn
->src(0));
976 emitGPR (0x00, insn
->def(0));
979 /*******************************************************************************
981 ******************************************************************************/
984 CodeEmitterGM107::emitDADD()
986 switch (insn
->src(1).getFile()) {
988 emitInsn(0x5c700000);
989 emitGPR (0x14, insn
->src(1));
991 case FILE_MEMORY_CONST
:
992 emitInsn(0x4c700000);
993 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
996 emitInsn(0x38700000);
997 emitIMMD(0x14, 19, insn
->src(1));
1000 assert(!"bad src1 file");
1003 emitABS(0x31, insn
->src(1));
1004 emitNEG(0x30, insn
->src(0));
1006 emitABS(0x2e, insn
->src(0));
1007 emitNEG(0x2d, insn
->src(1));
1009 if (insn
->op
== OP_SUB
)
1010 code
[1] ^= 0x00002000;
1012 emitGPR(0x08, insn
->src(0));
1013 emitGPR(0x00, insn
->def(0));
1017 CodeEmitterGM107::emitDMUL()
1019 switch (insn
->src(1).getFile()) {
1021 emitInsn(0x5c800000);
1022 emitGPR (0x14, insn
->src(1));
1024 case FILE_MEMORY_CONST
:
1025 emitInsn(0x4c800000);
1026 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1028 case FILE_IMMEDIATE
:
1029 emitInsn(0x38800000);
1030 emitIMMD(0x14, 19, insn
->src(1));
1033 assert(!"bad src1 file");
1037 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1040 emitGPR (0x08, insn
->src(0));
1041 emitGPR (0x00, insn
->def(0));
1045 CodeEmitterGM107::emitDFMA()
1047 switch(insn
->src(2).getFile()) {
1049 switch (insn
->src(1).getFile()) {
1051 emitInsn(0x5b700000);
1052 emitGPR (0x14, insn
->src(1));
1054 case FILE_MEMORY_CONST
:
1055 emitInsn(0x4b700000);
1056 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1058 case FILE_IMMEDIATE
:
1059 emitInsn(0x36700000);
1060 emitIMMD(0x14, 19, insn
->src(1));
1063 assert(!"bad src1 file");
1066 emitGPR (0x27, insn
->src(2));
1068 case FILE_MEMORY_CONST
:
1069 emitInsn(0x53700000);
1070 emitGPR (0x27, insn
->src(1));
1071 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1074 assert(!"bad src2 file");
1079 emitNEG (0x31, insn
->src(2));
1080 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1082 emitGPR (0x08, insn
->src(0));
1083 emitGPR (0x00, insn
->def(0));
1087 CodeEmitterGM107::emitDMNMX()
1089 switch (insn
->src(1).getFile()) {
1091 emitInsn(0x5c500000);
1092 emitGPR (0x14, insn
->src(1));
1094 case FILE_MEMORY_CONST
:
1095 emitInsn(0x4c500000);
1096 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1098 case FILE_IMMEDIATE
:
1099 emitInsn(0x38500000);
1100 emitIMMD(0x14, 19, insn
->src(1));
1103 assert(!"bad src1 file");
1107 emitABS (0x31, insn
->src(1));
1108 emitNEG (0x30, insn
->src(0));
1110 emitABS (0x2e, insn
->src(0));
1111 emitNEG (0x2d, insn
->src(1));
1112 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1114 emitGPR (0x08, insn
->src(0));
1115 emitGPR (0x00, insn
->def(0));
1119 CodeEmitterGM107::emitDSET()
1121 const CmpInstruction
*insn
= this->insn
->asCmp();
1123 switch (insn
->src(1).getFile()) {
1125 emitInsn(0x59000000);
1126 emitGPR (0x14, insn
->src(1));
1128 case FILE_MEMORY_CONST
:
1129 emitInsn(0x49000000);
1130 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1132 case FILE_IMMEDIATE
:
1133 emitInsn(0x32000000);
1134 emitIMMD(0x14, 19, insn
->src(1));
1137 assert(!"bad src1 file");
1141 if (insn
->op
!= OP_SET
) {
1143 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1144 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1145 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1147 assert(!"invalid set op");
1150 emitPRED(0x27, insn
->src(2));
1155 emitABS (0x36, insn
->src(0));
1156 emitNEG (0x35, insn
->src(1));
1157 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1158 emitCond4(0x30, insn
->setCond
);
1160 emitABS (0x2c, insn
->src(1));
1161 emitNEG (0x2b, insn
->src(0));
1162 emitGPR (0x08, insn
->src(0));
1163 emitGPR (0x00, insn
->def(0));
1167 CodeEmitterGM107::emitDSETP()
1169 const CmpInstruction
*insn
= this->insn
->asCmp();
1171 switch (insn
->src(1).getFile()) {
1173 emitInsn(0x5b800000);
1174 emitGPR (0x14, insn
->src(1));
1176 case FILE_MEMORY_CONST
:
1177 emitInsn(0x4b800000);
1178 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1180 case FILE_IMMEDIATE
:
1181 emitInsn(0x36800000);
1182 emitIMMD(0x14, 19, insn
->src(1));
1185 assert(!"bad src1 file");
1189 if (insn
->op
!= OP_SET
) {
1191 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1192 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1193 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1195 assert(!"invalid set op");
1198 emitPRED(0x27, insn
->src(2));
1203 emitCond4(0x30, insn
->setCond
);
1204 emitABS (0x2c, insn
->src(1));
1205 emitNEG (0x2b, insn
->src(0));
1206 emitGPR (0x08, insn
->src(0));
1207 emitABS (0x07, insn
->src(0));
1208 emitNEG (0x06, insn
->src(1));
1209 emitPRED (0x03, insn
->def(0));
1210 if (insn
->defExists(1))
1211 emitPRED(0x00, insn
->def(1));
1216 /*******************************************************************************
1218 ******************************************************************************/
1221 CodeEmitterGM107::emitFADD()
1223 if (!longIMMD(insn
->src(1))) {
1224 switch (insn
->src(1).getFile()) {
1226 emitInsn(0x5c580000);
1227 emitGPR (0x14, insn
->src(1));
1229 case FILE_MEMORY_CONST
:
1230 emitInsn(0x4c580000);
1231 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1233 case FILE_IMMEDIATE
:
1234 emitInsn(0x38580000);
1235 emitIMMD(0x14, 19, insn
->src(1));
1238 assert(!"bad src1 file");
1242 emitABS(0x31, insn
->src(1));
1243 emitNEG(0x30, insn
->src(0));
1245 emitABS(0x2e, insn
->src(0));
1246 emitNEG(0x2d, insn
->src(1));
1249 if (insn
->op
== OP_SUB
)
1250 code
[1] ^= 0x00002000;
1252 emitInsn(0x08000000);
1253 emitABS(0x39, insn
->src(1));
1254 emitNEG(0x38, insn
->src(0));
1256 emitABS(0x36, insn
->src(0));
1257 emitNEG(0x35, insn
->src(1));
1259 emitIMMD(0x14, 32, insn
->src(1));
1261 if (insn
->op
== OP_SUB
)
1262 code
[1] ^= 0x00080000;
1265 emitGPR(0x08, insn
->src(0));
1266 emitGPR(0x00, insn
->def(0));
1270 CodeEmitterGM107::emitFMUL()
1272 if (!longIMMD(insn
->src(1))) {
1273 switch (insn
->src(1).getFile()) {
1275 emitInsn(0x5c680000);
1276 emitGPR (0x14, insn
->src(1));
1278 case FILE_MEMORY_CONST
:
1279 emitInsn(0x4c680000);
1280 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1282 case FILE_IMMEDIATE
:
1283 emitInsn(0x38680000);
1284 emitIMMD(0x14, 19, insn
->src(1));
1287 assert(!"bad src1 file");
1291 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1297 emitInsn(0x1e000000);
1301 emitIMMD(0x14, 32, insn
->src(1));
1302 if (insn
->src(0).mod
.neg() ^ insn
->src(1).mod
.neg())
1303 code
[1] ^= 0x00080000; /* flip immd sign bit */
1306 emitGPR(0x08, insn
->src(0));
1307 emitGPR(0x00, insn
->def(0));
1311 CodeEmitterGM107::emitFFMA()
1313 /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1314 switch(insn
->src(2).getFile()) {
1316 switch (insn
->src(1).getFile()) {
1318 emitInsn(0x59800000);
1319 emitGPR (0x14, insn
->src(1));
1321 case FILE_MEMORY_CONST
:
1322 emitInsn(0x49800000);
1323 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1325 case FILE_IMMEDIATE
:
1326 emitInsn(0x32800000);
1327 emitIMMD(0x14, 19, insn
->src(1));
1330 assert(!"bad src1 file");
1333 emitGPR (0x27, insn
->src(2));
1335 case FILE_MEMORY_CONST
:
1336 emitInsn(0x51800000);
1337 emitGPR (0x27, insn
->src(1));
1338 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1341 assert(!"bad src2 file");
1346 emitNEG (0x31, insn
->src(2));
1347 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1351 emitGPR(0x08, insn
->src(0));
1352 emitGPR(0x00, insn
->def(0));
1356 CodeEmitterGM107::emitMUFU()
1361 case OP_COS
: mufu
= 0; break;
1362 case OP_SIN
: mufu
= 1; break;
1363 case OP_EX2
: mufu
= 2; break;
1364 case OP_LG2
: mufu
= 3; break;
1365 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
1366 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
1368 assert(!"invalid mufu");
1372 emitInsn (0x50800000);
1374 emitNEG (0x30, insn
->src(0));
1375 emitABS (0x2e, insn
->src(0));
1376 emitField(0x14, 3, mufu
);
1377 emitGPR (0x08, insn
->src(0));
1378 emitGPR (0x00, insn
->def(0));
1382 CodeEmitterGM107::emitFMNMX()
1384 switch (insn
->src(1).getFile()) {
1386 emitInsn(0x5c600000);
1387 emitGPR (0x14, insn
->src(1));
1389 case FILE_MEMORY_CONST
:
1390 emitInsn(0x4c600000);
1391 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1393 case FILE_IMMEDIATE
:
1394 emitInsn(0x38600000);
1395 emitIMMD(0x14, 19, insn
->src(1));
1398 assert(!"bad src1 file");
1402 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1405 emitABS(0x31, insn
->src(1));
1406 emitNEG(0x30, insn
->src(0));
1408 emitABS(0x2e, insn
->src(0));
1409 emitNEG(0x2d, insn
->src(1));
1411 emitGPR(0x08, insn
->src(0));
1412 emitGPR(0x00, insn
->def(0));
1416 CodeEmitterGM107::emitRRO()
1418 switch (insn
->src(0).getFile()) {
1420 emitInsn(0x5c900000);
1421 emitGPR (0x14, insn
->src(0));
1423 case FILE_MEMORY_CONST
:
1424 emitInsn(0x4c900000);
1425 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1427 case FILE_IMMEDIATE
:
1428 emitInsn(0x38900000);
1429 emitIMMD(0x14, 19, insn
->src(0));
1432 assert(!"bad src file");
1436 emitABS (0x31, insn
->src(0));
1437 emitNEG (0x2d, insn
->src(0));
1438 emitField(0x27, 1, insn
->op
== OP_PREEX2
);
1439 emitGPR (0x00, insn
->def(0));
1443 CodeEmitterGM107::emitFCMP()
1445 const CmpInstruction
*insn
= this->insn
->asCmp();
1446 CondCode cc
= insn
->setCond
;
1448 if (insn
->src(2).mod
.neg())
1449 cc
= reverseCondCode(cc
);
1451 switch(insn
->src(2).getFile()) {
1453 switch (insn
->src(1).getFile()) {
1455 emitInsn(0x5ba00000);
1456 emitGPR (0x14, insn
->src(1));
1458 case FILE_MEMORY_CONST
:
1459 emitInsn(0x4ba00000);
1460 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1462 case FILE_IMMEDIATE
:
1463 emitInsn(0x36a00000);
1464 emitIMMD(0x14, 19, insn
->src(1));
1467 assert(!"bad src1 file");
1470 emitGPR (0x27, insn
->src(2));
1472 case FILE_MEMORY_CONST
:
1473 emitInsn(0x53a00000);
1474 emitGPR (0x27, insn
->src(1));
1475 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1478 assert(!"bad src2 file");
1482 emitCond4(0x30, cc
);
1484 emitGPR (0x08, insn
->src(0));
1485 emitGPR (0x00, insn
->def(0));
1489 CodeEmitterGM107::emitFSET()
1491 const CmpInstruction
*insn
= this->insn
->asCmp();
1493 switch (insn
->src(1).getFile()) {
1495 emitInsn(0x58000000);
1496 emitGPR (0x14, insn
->src(1));
1498 case FILE_MEMORY_CONST
:
1499 emitInsn(0x48000000);
1500 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1502 case FILE_IMMEDIATE
:
1503 emitInsn(0x30000000);
1504 emitIMMD(0x14, 19, insn
->src(1));
1507 assert(!"bad src1 file");
1511 if (insn
->op
!= OP_SET
) {
1513 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1514 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1515 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1517 assert(!"invalid set op");
1520 emitPRED(0x27, insn
->src(2));
1526 emitABS (0x36, insn
->src(0));
1527 emitNEG (0x35, insn
->src(1));
1528 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1529 emitCond4(0x30, insn
->setCond
);
1531 emitABS (0x2c, insn
->src(1));
1532 emitNEG (0x2b, insn
->src(0));
1533 emitGPR (0x08, insn
->src(0));
1534 emitGPR (0x00, insn
->def(0));
1538 CodeEmitterGM107::emitFSETP()
1540 const CmpInstruction
*insn
= this->insn
->asCmp();
1542 switch (insn
->src(1).getFile()) {
1544 emitInsn(0x5bb00000);
1545 emitGPR (0x14, insn
->src(1));
1547 case FILE_MEMORY_CONST
:
1548 emitInsn(0x4bb00000);
1549 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1551 case FILE_IMMEDIATE
:
1552 emitInsn(0x36b00000);
1553 emitIMMD(0x14, 19, insn
->src(1));
1556 assert(!"bad src1 file");
1560 if (insn
->op
!= OP_SET
) {
1562 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1563 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1564 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1566 assert(!"invalid set op");
1569 emitPRED(0x27, insn
->src(2));
1574 emitCond4(0x30, insn
->setCond
);
1576 emitABS (0x2c, insn
->src(1));
1577 emitNEG (0x2b, insn
->src(0));
1578 emitGPR (0x08, insn
->src(0));
1579 emitABS (0x07, insn
->src(0));
1580 emitNEG (0x06, insn
->src(1));
1581 emitPRED (0x03, insn
->def(0));
1582 if (insn
->defExists(1))
1583 emitPRED(0x00, insn
->def(1));
1589 CodeEmitterGM107::emitFSWZADD()
1591 emitInsn (0x50f80000);
1595 emitField(0x26, 1, insn
->lanes
); /* abused for .ndv */
1596 emitField(0x1c, 8, insn
->subOp
);
1597 if (insn
->predSrc
!= 1)
1598 emitGPR (0x14, insn
->src(1));
1601 emitGPR (0x08, insn
->src(0));
1602 emitGPR (0x00, insn
->def(0));
1605 /*******************************************************************************
1607 ******************************************************************************/
1610 CodeEmitterGM107::emitLOP()
1615 case OP_AND
: lop
= 0; break;
1616 case OP_OR
: lop
= 1; break;
1617 case OP_XOR
: lop
= 2; break;
1619 assert(!"invalid lop");
1623 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1624 switch (insn
->src(1).getFile()) {
1626 emitInsn(0x5c400000);
1627 emitGPR (0x14, insn
->src(1));
1629 case FILE_MEMORY_CONST
:
1630 emitInsn(0x4c400000);
1631 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1633 case FILE_IMMEDIATE
:
1634 emitInsn(0x38400000);
1635 emitIMMD(0x14, 19, insn
->src(1));
1638 assert(!"bad src1 file");
1644 emitField(0x29, 2, lop
);
1645 emitINV (0x28, insn
->src(1));
1646 emitINV (0x27, insn
->src(0));
1648 emitInsn (0x04000000);
1650 emitINV (0x38, insn
->src(1));
1651 emitINV (0x37, insn
->src(0));
1652 emitField(0x35, 2, lop
);
1654 emitIMMD (0x14, 32, insn
->src(1));
1657 emitGPR (0x08, insn
->src(0));
1658 emitGPR (0x00, insn
->def(0));
1661 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1663 CodeEmitterGM107::emitNOT()
1665 if (!longIMMD(insn
->src(0))) {
1666 switch (insn
->src(0).getFile()) {
1668 emitInsn(0x5c400700);
1669 emitGPR (0x14, insn
->src(0));
1671 case FILE_MEMORY_CONST
:
1672 emitInsn(0x4c400700);
1673 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1675 case FILE_IMMEDIATE
:
1676 emitInsn(0x38400700);
1677 emitIMMD(0x14, 19, insn
->src(0));
1680 assert(!"bad src1 file");
1685 emitInsn (0x05600000);
1686 emitIMMD (0x14, 32, insn
->src(1));
1690 emitGPR(0x00, insn
->def(0));
1694 CodeEmitterGM107::emitIADD()
1696 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1697 switch (insn
->src(1).getFile()) {
1699 emitInsn(0x5c100000);
1700 emitGPR (0x14, insn
->src(1));
1702 case FILE_MEMORY_CONST
:
1703 emitInsn(0x4c100000);
1704 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1706 case FILE_IMMEDIATE
:
1707 emitInsn(0x38100000);
1708 emitIMMD(0x14, 19, insn
->src(1));
1711 assert(!"bad src1 file");
1715 emitNEG(0x31, insn
->src(0));
1716 emitNEG(0x30, insn
->src(1));
1720 emitInsn(0x1c000000);
1721 emitNEG (0x38, insn
->src(0));
1725 emitIMMD(0x14, 32, insn
->src(1));
1728 if (insn
->op
== OP_SUB
)
1729 code
[1] ^= 0x00010000;
1731 emitGPR(0x08, insn
->src(0));
1732 emitGPR(0x00, insn
->def(0));
1736 CodeEmitterGM107::emitIMUL()
1738 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1739 switch (insn
->src(1).getFile()) {
1741 emitInsn(0x5c380000);
1742 emitGPR (0x14, insn
->src(1));
1744 case FILE_MEMORY_CONST
:
1745 emitInsn(0x4c380000);
1746 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1748 case FILE_IMMEDIATE
:
1749 emitInsn(0x38380000);
1750 emitIMMD(0x14, 19, insn
->src(1));
1753 assert(!"bad src1 file");
1757 emitField(0x29, 1, isSignedType(insn
->sType
));
1758 emitField(0x28, 1, isSignedType(insn
->dType
));
1759 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1761 emitInsn (0x1f000000);
1762 emitField(0x37, 1, isSignedType(insn
->sType
));
1763 emitField(0x36, 1, isSignedType(insn
->dType
));
1764 emitField(0x35, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1766 emitIMMD (0x14, 32, insn
->src(1));
1769 emitGPR(0x08, insn
->src(0));
1770 emitGPR(0x00, insn
->def(0));
1774 CodeEmitterGM107::emitIMAD()
1776 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1777 switch(insn
->src(2).getFile()) {
1779 switch (insn
->src(1).getFile()) {
1781 emitInsn(0x5a000000);
1782 emitGPR (0x14, insn
->src(1));
1784 case FILE_MEMORY_CONST
:
1785 emitInsn(0x4a000000);
1786 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1788 case FILE_IMMEDIATE
:
1789 emitInsn(0x34000000);
1790 emitIMMD(0x14, 19, insn
->src(1));
1793 assert(!"bad src1 file");
1796 emitGPR (0x27, insn
->src(2));
1798 case FILE_MEMORY_CONST
:
1799 emitInsn(0x52000000);
1800 emitGPR (0x27, insn
->src(1));
1801 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1804 assert(!"bad src2 file");
1808 emitField(0x36, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1809 emitField(0x35, 1, isSignedType(insn
->sType
));
1810 emitNEG (0x34, insn
->src(2));
1811 emitNEG2 (0x33, insn
->src(0), insn
->src(1));
1814 emitField(0x30, 1, isSignedType(insn
->dType
));
1816 emitGPR (0x08, insn
->src(0));
1817 emitGPR (0x00, insn
->def(0));
1821 CodeEmitterGM107::emitISCADD()
1823 switch (insn
->src(2).getFile()) {
1825 emitInsn(0x5c180000);
1826 emitGPR (0x14, insn
->src(2));
1828 case FILE_MEMORY_CONST
:
1829 emitInsn(0x4c180000);
1830 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1832 case FILE_IMMEDIATE
:
1833 emitInsn(0x38180000);
1834 emitIMMD(0x14, 19, insn
->src(2));
1837 assert(!"bad src1 file");
1840 emitNEG (0x31, insn
->src(0));
1841 emitNEG (0x30, insn
->src(2));
1843 emitIMMD(0x27, 5, insn
->src(1));
1844 emitGPR (0x08, insn
->src(0));
1845 emitGPR (0x00, insn
->def(0));
1849 CodeEmitterGM107::emitIMNMX()
1851 switch (insn
->src(1).getFile()) {
1853 emitInsn(0x5c200000);
1854 emitGPR (0x14, insn
->src(1));
1856 case FILE_MEMORY_CONST
:
1857 emitInsn(0x4c200000);
1858 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1860 case FILE_IMMEDIATE
:
1861 emitInsn(0x38200000);
1862 emitIMMD(0x14, 19, insn
->src(1));
1865 assert(!"bad src1 file");
1869 emitField(0x30, 1, isSignedType(insn
->dType
));
1871 emitField(0x2b, 2, insn
->subOp
);
1872 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1874 emitGPR (0x08, insn
->src(0));
1875 emitGPR (0x00, insn
->def(0));
1879 CodeEmitterGM107::emitICMP()
1881 const CmpInstruction
*insn
= this->insn
->asCmp();
1882 CondCode cc
= insn
->setCond
;
1884 if (insn
->src(2).mod
.neg())
1885 cc
= reverseCondCode(cc
);
1887 switch(insn
->src(2).getFile()) {
1889 switch (insn
->src(1).getFile()) {
1891 emitInsn(0x5b400000);
1892 emitGPR (0x14, insn
->src(1));
1894 case FILE_MEMORY_CONST
:
1895 emitInsn(0x4b400000);
1896 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1898 case FILE_IMMEDIATE
:
1899 emitInsn(0x36400000);
1900 emitIMMD(0x14, 19, insn
->src(1));
1903 assert(!"bad src1 file");
1906 emitGPR (0x27, insn
->src(2));
1908 case FILE_MEMORY_CONST
:
1909 emitInsn(0x53400000);
1910 emitGPR (0x27, insn
->src(1));
1911 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1914 assert(!"bad src2 file");
1918 emitCond3(0x31, cc
);
1919 emitField(0x30, 1, isSignedType(insn
->sType
));
1920 emitGPR (0x08, insn
->src(0));
1921 emitGPR (0x00, insn
->def(0));
1925 CodeEmitterGM107::emitISET()
1927 const CmpInstruction
*insn
= this->insn
->asCmp();
1929 switch (insn
->src(1).getFile()) {
1931 emitInsn(0x5b500000);
1932 emitGPR (0x14, insn
->src(1));
1934 case FILE_MEMORY_CONST
:
1935 emitInsn(0x4b500000);
1936 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1938 case FILE_IMMEDIATE
:
1939 emitInsn(0x36500000);
1940 emitIMMD(0x14, 19, insn
->src(1));
1943 assert(!"bad src1 file");
1947 if (insn
->op
!= OP_SET
) {
1949 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1950 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1951 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1953 assert(!"invalid set op");
1956 emitPRED(0x27, insn
->src(2));
1961 emitCond3(0x31, insn
->setCond
);
1962 emitField(0x30, 1, isSignedType(insn
->sType
));
1964 emitField(0x2c, 1, insn
->dType
== TYPE_F32
);
1966 emitGPR (0x08, insn
->src(0));
1967 emitGPR (0x00, insn
->def(0));
1971 CodeEmitterGM107::emitISETP()
1973 const CmpInstruction
*insn
= this->insn
->asCmp();
1975 switch (insn
->src(1).getFile()) {
1977 emitInsn(0x5b600000);
1978 emitGPR (0x14, insn
->src(1));
1980 case FILE_MEMORY_CONST
:
1981 emitInsn(0x4b600000);
1982 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1984 case FILE_IMMEDIATE
:
1985 emitInsn(0x36600000);
1986 emitIMMD(0x14, 19, insn
->src(1));
1989 assert(!"bad src1 file");
1993 if (insn
->op
!= OP_SET
) {
1995 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1996 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1997 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1999 assert(!"invalid set op");
2002 emitPRED(0x27, insn
->src(2));
2007 emitCond3(0x31, insn
->setCond
);
2008 emitField(0x30, 1, isSignedType(insn
->sType
));
2010 emitGPR (0x08, insn
->src(0));
2011 emitPRED (0x03, insn
->def(0));
2012 if (insn
->defExists(1))
2013 emitPRED(0x00, insn
->def(1));
2019 CodeEmitterGM107::emitSHL()
2021 switch (insn
->src(1).getFile()) {
2023 emitInsn(0x5c480000);
2024 emitGPR (0x14, insn
->src(1));
2026 case FILE_MEMORY_CONST
:
2027 emitInsn(0x4c480000);
2028 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2030 case FILE_IMMEDIATE
:
2031 emitInsn(0x38480000);
2032 emitIMMD(0x14, 19, insn
->src(1));
2035 assert(!"bad src1 file");
2041 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2042 emitGPR (0x08, insn
->src(0));
2043 emitGPR (0x00, insn
->def(0));
2047 CodeEmitterGM107::emitSHR()
2049 switch (insn
->src(1).getFile()) {
2051 emitInsn(0x5c280000);
2052 emitGPR (0x14, insn
->src(1));
2054 case FILE_MEMORY_CONST
:
2055 emitInsn(0x4c280000);
2056 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2058 case FILE_IMMEDIATE
:
2059 emitInsn(0x38280000);
2060 emitIMMD(0x14, 19, insn
->src(1));
2063 assert(!"bad src1 file");
2067 emitField(0x30, 1, isSignedType(insn
->dType
));
2070 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2071 emitGPR (0x08, insn
->src(0));
2072 emitGPR (0x00, insn
->def(0));
2076 CodeEmitterGM107::emitSHF()
2080 switch (insn
->src(1).getFile()) {
2082 emitInsn(insn
->op
== OP_SHL
? 0x5bf80000 : 0x5cf80000);
2083 emitGPR(0x14, insn
->src(1));
2085 case FILE_IMMEDIATE
:
2086 emitInsn(insn
->op
== OP_SHL
? 0x36f80000 : 0x38f80000);
2087 emitIMMD(0x14, 19, insn
->src(1));
2090 assert(!"bad src1 file");
2094 switch (insn
->sType
) {
2106 emitField(0x32, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
));
2108 emitField(0x30, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_HIGH
));
2110 emitGPR (0x27, insn
->src(2));
2111 emitField(0x25, 2, type
);
2112 emitGPR (0x08, insn
->src(0));
2113 emitGPR (0x00, insn
->def(0));
2117 CodeEmitterGM107::emitPOPC()
2119 switch (insn
->src(0).getFile()) {
2121 emitInsn(0x5c080000);
2122 emitGPR (0x14, insn
->src(0));
2124 case FILE_MEMORY_CONST
:
2125 emitInsn(0x4c080000);
2126 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2128 case FILE_IMMEDIATE
:
2129 emitInsn(0x38080000);
2130 emitIMMD(0x14, 19, insn
->src(0));
2133 assert(!"bad src1 file");
2137 emitINV(0x28, insn
->src(0));
2138 emitGPR(0x00, insn
->def(0));
2142 CodeEmitterGM107::emitBFI()
2144 switch(insn
->src(2).getFile()) {
2146 switch (insn
->src(1).getFile()) {
2148 emitInsn(0x5bf00000);
2149 emitGPR (0x14, insn
->src(1));
2151 case FILE_MEMORY_CONST
:
2152 emitInsn(0x4bf00000);
2153 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2155 case FILE_IMMEDIATE
:
2156 emitInsn(0x36f00000);
2157 emitIMMD(0x14, 19, insn
->src(1));
2160 assert(!"bad src1 file");
2163 emitGPR (0x27, insn
->src(2));
2165 case FILE_MEMORY_CONST
:
2166 emitInsn(0x53f00000);
2167 emitGPR (0x27, insn
->src(1));
2168 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2171 assert(!"bad src2 file");
2176 emitGPR (0x08, insn
->src(0));
2177 emitGPR (0x00, insn
->def(0));
2181 CodeEmitterGM107::emitBFE()
2183 switch (insn
->src(1).getFile()) {
2185 emitInsn(0x5c000000);
2186 emitGPR (0x14, insn
->src(1));
2188 case FILE_MEMORY_CONST
:
2189 emitInsn(0x4c000000);
2190 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2192 case FILE_IMMEDIATE
:
2193 emitInsn(0x38000000);
2194 emitIMMD(0x14, 19, insn
->src(1));
2197 assert(!"bad src1 file");
2201 emitField(0x30, 1, isSignedType(insn
->dType
));
2203 emitField(0x28, 1, insn
->subOp
== NV50_IR_SUBOP_EXTBF_REV
);
2204 emitGPR (0x08, insn
->src(0));
2205 emitGPR (0x00, insn
->def(0));
2209 CodeEmitterGM107::emitFLO()
2211 switch (insn
->src(0).getFile()) {
2213 emitInsn(0x5c300000);
2214 emitGPR (0x14, insn
->src(0));
2216 case FILE_MEMORY_CONST
:
2217 emitInsn(0x4c300000);
2218 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2220 case FILE_IMMEDIATE
:
2221 emitInsn(0x38300000);
2222 emitIMMD(0x14, 19, insn
->src(0));
2225 assert(!"bad src1 file");
2229 emitField(0x30, 1, isSignedType(insn
->dType
));
2231 emitField(0x29, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
2232 emitINV (0x28, insn
->src(0));
2233 emitGPR (0x00, insn
->def(0));
2236 /*******************************************************************************
2238 ******************************************************************************/
2241 CodeEmitterGM107::emitLDSTs(int pos
, DataType type
)
2245 switch (typeSizeof(type
)) {
2246 case 1: data
= isSignedType(type
) ? 1 : 0; break;
2247 case 2: data
= isSignedType(type
) ? 3 : 2; break;
2248 case 4: data
= 4; break;
2249 case 8: data
= 5; break;
2250 case 16: data
= 6; break;
2252 assert(!"bad type");
2256 emitField(pos
, 3, data
);
2260 CodeEmitterGM107::emitLDSTc(int pos
)
2264 switch (insn
->cache
) {
2265 case CACHE_CA
: mode
= 0; break;
2266 case CACHE_CG
: mode
= 1; break;
2267 case CACHE_CS
: mode
= 2; break;
2268 case CACHE_CV
: mode
= 3; break;
2270 assert(!"invalid caching mode");
2274 emitField(pos
, 2, mode
);
2278 CodeEmitterGM107::emitLDC()
2280 emitInsn (0xef900000);
2281 emitLDSTs(0x30, insn
->dType
);
2282 emitField(0x2c, 2, insn
->subOp
);
2283 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn
->src(0));
2284 emitGPR (0x00, insn
->def(0));
2288 CodeEmitterGM107::emitLDL()
2290 emitInsn (0xef400000);
2291 emitLDSTs(0x30, insn
->dType
);
2293 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2294 emitGPR (0x00, insn
->def(0));
2298 CodeEmitterGM107::emitLDS()
2300 emitInsn (0xef480000);
2301 emitLDSTs(0x30, insn
->dType
);
2302 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2303 emitGPR (0x00, insn
->def(0));
2307 CodeEmitterGM107::emitLD()
2309 emitInsn (0x80000000);
2312 emitLDSTs(0x35, insn
->dType
);
2313 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2314 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2315 emitGPR (0x00, insn
->def(0));
2319 CodeEmitterGM107::emitSTL()
2321 emitInsn (0xef500000);
2322 emitLDSTs(0x30, insn
->dType
);
2324 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2325 emitGPR (0x00, insn
->src(1));
2329 CodeEmitterGM107::emitSTS()
2331 emitInsn (0xef580000);
2332 emitLDSTs(0x30, insn
->dType
);
2333 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2334 emitGPR (0x00, insn
->src(1));
2338 CodeEmitterGM107::emitST()
2340 emitInsn (0xa0000000);
2343 emitLDSTs(0x35, insn
->dType
);
2344 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2345 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2346 emitGPR (0x00, insn
->src(1));
2350 CodeEmitterGM107::emitALD()
2352 emitInsn (0xefd80000);
2353 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2354 emitGPR (0x27, insn
->src(0).getIndirect(1));
2357 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2358 emitGPR (0x00, insn
->def(0));
2362 CodeEmitterGM107::emitAST()
2364 emitInsn (0xeff00000);
2365 emitField(0x2f, 2, (typeSizeof(insn
->dType
) / 4) - 1);
2366 emitGPR (0x27, insn
->src(0).getIndirect(1));
2368 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2369 emitGPR (0x00, insn
->src(1));
2373 CodeEmitterGM107::emitISBERD()
2375 emitInsn(0xefd00000);
2376 emitGPR (0x08, insn
->src(0));
2377 emitGPR (0x00, insn
->def(0));
2381 CodeEmitterGM107::emitAL2P()
2383 emitInsn (0xefa00000);
2384 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2387 emitField(0x14, 11, insn
->src(0).get()->reg
.data
.offset
);
2388 emitGPR (0x08, insn
->src(0).getIndirect(0));
2389 emitGPR (0x00, insn
->def(0));
2393 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
2395 int ipa
= entry
->ipa
;
2396 int reg
= entry
->reg
;
2397 int loc
= entry
->loc
;
2399 if (data
.flatshade
&&
2400 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
2401 ipa
= NV50_IR_INTERP_FLAT
;
2403 } else if (data
.force_persample_interp
&&
2404 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
2405 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
2406 ipa
|= NV50_IR_INTERP_CENTROID
;
2408 code
[loc
+ 1] &= ~(0xf << 0x14);
2409 code
[loc
+ 1] |= (ipa
& 0x3) << 0x16;
2410 code
[loc
+ 1] |= (ipa
& 0xc) << (0x14 - 2);
2411 code
[loc
+ 0] &= ~(0xff << 0x14);
2412 code
[loc
+ 0] |= reg
<< 0x14;
2416 CodeEmitterGM107::emitIPA()
2418 int ipam
= 0, ipas
= 0;
2420 switch (insn
->getInterpMode()) {
2421 case NV50_IR_INTERP_LINEAR
: ipam
= 0; break;
2422 case NV50_IR_INTERP_PERSPECTIVE
: ipam
= 1; break;
2423 case NV50_IR_INTERP_FLAT
: ipam
= 2; break;
2424 case NV50_IR_INTERP_SC
: ipam
= 3; break;
2426 assert(!"invalid ipa mode");
2430 switch (insn
->getSampleMode()) {
2431 case NV50_IR_INTERP_DEFAULT
: ipas
= 0; break;
2432 case NV50_IR_INTERP_CENTROID
: ipas
= 1; break;
2433 case NV50_IR_INTERP_OFFSET
: ipas
= 2; break;
2435 assert(!"invalid ipa sample mode");
2439 emitInsn (0xe0000000);
2440 emitField(0x36, 2, ipam
);
2441 emitField(0x34, 2, ipas
);
2443 emitField(0x2f, 3, 7);
2444 emitADDR (0x08, 0x1c, 10, 0, insn
->src(0));
2445 if ((code
[0] & 0x0000ff00) != 0x0000ff00)
2446 code
[1] |= 0x00000040; /* .idx */
2447 emitGPR(0x00, insn
->def(0));
2449 if (insn
->op
== OP_PINTERP
) {
2450 emitGPR(0x14, insn
->src(1));
2451 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2452 emitGPR(0x27, insn
->src(2));
2453 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, interpApply
);
2455 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2456 emitGPR(0x27, insn
->src(1));
2458 addInterp(insn
->ipa
, 0xff, interpApply
);
2461 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
)
2466 CodeEmitterGM107::emitATOM()
2468 unsigned dType
, subOp
;
2470 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2471 switch (insn
->dType
) {
2472 case TYPE_U32
: dType
= 0; break;
2473 case TYPE_U64
: dType
= 1; break;
2474 default: assert(!"unexpected dType"); dType
= 0; break;
2478 emitInsn (0xee000000);
2480 switch (insn
->dType
) {
2481 case TYPE_U32
: dType
= 0; break;
2482 case TYPE_S32
: dType
= 1; break;
2483 case TYPE_U64
: dType
= 2; break;
2484 case TYPE_F32
: dType
= 3; break;
2485 case TYPE_B128
: dType
= 4; break;
2486 case TYPE_S64
: dType
= 5; break;
2487 default: assert(!"unexpected dType"); dType
= 0; break;
2489 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2492 subOp
= insn
->subOp
;
2494 emitInsn (0xed000000);
2497 emitField(0x34, 4, subOp
);
2498 emitField(0x31, 3, dType
);
2499 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2500 emitGPR (0x14, insn
->src(1));
2501 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2502 emitGPR (0x00, insn
->def(0));
2506 CodeEmitterGM107::emitATOMS()
2508 unsigned dType
, subOp
;
2510 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2511 switch (insn
->dType
) {
2512 case TYPE_U32
: dType
= 0; break;
2513 case TYPE_U64
: dType
= 1; break;
2514 default: assert(!"unexpected dType"); dType
= 0; break;
2518 emitInsn (0xee000000);
2519 emitField(0x34, 1, dType
);
2521 switch (insn
->dType
) {
2522 case TYPE_U32
: dType
= 0; break;
2523 case TYPE_S32
: dType
= 1; break;
2524 case TYPE_U64
: dType
= 2; break;
2525 case TYPE_S64
: dType
= 3; break;
2526 default: assert(!"unexpected dType"); dType
= 0; break;
2529 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2532 subOp
= insn
->subOp
;
2534 emitInsn (0xec000000);
2535 emitField(0x1c, 3, dType
);
2538 emitField(0x34, 4, subOp
);
2539 emitGPR (0x14, insn
->src(1));
2540 emitADDR (0x08, 0x12, 22, 0, insn
->src(0));
2541 emitGPR (0x00, insn
->def(0));
2545 CodeEmitterGM107::emitRED()
2549 switch (insn
->dType
) {
2550 case TYPE_U32
: dType
= 0; break;
2551 case TYPE_S32
: dType
= 1; break;
2552 case TYPE_U64
: dType
= 2; break;
2553 case TYPE_F32
: dType
= 3; break;
2554 case TYPE_B128
: dType
= 4; break;
2555 case TYPE_S64
: dType
= 5; break;
2556 default: assert(!"unexpected dType"); dType
= 0; break;
2559 emitInsn (0xebf80000);
2560 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2561 emitField(0x17, 3, insn
->subOp
);
2562 emitField(0x14, 3, dType
);
2563 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2564 emitGPR (0x00, insn
->src(1));
2568 CodeEmitterGM107::emitCCTL()
2571 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2572 emitInsn(0xef600000);
2575 emitInsn(0xef800000);
2578 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2579 emitADDR (0x08, 0x16, width
, 2, insn
->src(0));
2580 emitField(0x00, 4, insn
->subOp
);
2583 /*******************************************************************************
2585 ******************************************************************************/
2588 CodeEmitterGM107::emitPIXLD()
2590 emitInsn (0xefe80000);
2592 emitField(0x1f, 3, insn
->subOp
);
2593 emitGPR (0x08, insn
->src(0));
2594 emitGPR (0x00, insn
->def(0));
2597 /*******************************************************************************
2599 ******************************************************************************/
2602 CodeEmitterGM107::emitTEXs(int pos
)
2604 int src1
= insn
->predSrc
== 1 ? 2 : 1;
2605 if (insn
->srcExists(src1
))
2606 emitGPR(pos
, insn
->src(src1
));
2612 CodeEmitterGM107::emitTEX()
2614 const TexInstruction
*insn
= this->insn
->asTex();
2617 if (!insn
->tex
.levelZero
) {
2619 case OP_TEX
: lodm
= 0; break;
2620 case OP_TXB
: lodm
= 2; break;
2621 case OP_TXL
: lodm
= 3; break;
2623 assert(!"invalid tex op");
2630 if (insn
->tex
.rIndirectSrc
>= 0) {
2631 emitInsn (0xdeb80000);
2632 emitField(0x25, 2, lodm
);
2633 emitField(0x24, 1, insn
->tex
.useOffsets
== 1);
2635 emitInsn (0xc0380000);
2636 emitField(0x37, 2, lodm
);
2637 emitField(0x36, 1, insn
->tex
.useOffsets
== 1);
2638 emitField(0x24, 13, insn
->tex
.r
);
2641 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2642 emitField(0x31, 1, insn
->tex
.liveOnly
);
2643 emitField(0x23, 1, insn
->tex
.derivAll
);
2644 emitField(0x1f, 4, insn
->tex
.mask
);
2645 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2646 insn
->tex
.target
.getDim() - 1);
2647 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2649 emitGPR (0x08, insn
->src(0));
2650 emitGPR (0x00, insn
->def(0));
2654 CodeEmitterGM107::emitTLD()
2656 const TexInstruction
*insn
= this->insn
->asTex();
2658 if (insn
->tex
.rIndirectSrc
>= 0) {
2659 emitInsn (0xdd380000);
2661 emitInsn (0xdc380000);
2662 emitField(0x24, 13, insn
->tex
.r
);
2665 emitField(0x37, 1, insn
->tex
.levelZero
== 0);
2666 emitField(0x32, 1, insn
->tex
.target
.isMS());
2667 emitField(0x31, 1, insn
->tex
.liveOnly
);
2668 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2669 emitField(0x1f, 4, insn
->tex
.mask
);
2670 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2671 insn
->tex
.target
.getDim() - 1);
2672 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2674 emitGPR (0x08, insn
->src(0));
2675 emitGPR (0x00, insn
->def(0));
2679 CodeEmitterGM107::emitTLD4()
2681 const TexInstruction
*insn
= this->insn
->asTex();
2683 if (insn
->tex
.rIndirectSrc
>= 0) {
2684 emitInsn (0xdef80000);
2685 emitField(0x26, 2, insn
->tex
.gatherComp
);
2686 emitField(0x25, 2, insn
->tex
.useOffsets
== 4);
2687 emitField(0x24, 2, insn
->tex
.useOffsets
== 1);
2689 emitInsn (0xc8380000);
2690 emitField(0x38, 2, insn
->tex
.gatherComp
);
2691 emitField(0x37, 2, insn
->tex
.useOffsets
== 4);
2692 emitField(0x36, 2, insn
->tex
.useOffsets
== 1);
2693 emitField(0x24, 13, insn
->tex
.r
);
2696 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2697 emitField(0x31, 1, insn
->tex
.liveOnly
);
2698 emitField(0x23, 1, insn
->tex
.derivAll
);
2699 emitField(0x1f, 4, insn
->tex
.mask
);
2700 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2701 insn
->tex
.target
.getDim() - 1);
2702 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2704 emitGPR (0x08, insn
->src(0));
2705 emitGPR (0x00, insn
->def(0));
2709 CodeEmitterGM107::emitTXD()
2711 const TexInstruction
*insn
= this->insn
->asTex();
2713 if (insn
->tex
.rIndirectSrc
>= 0) {
2714 emitInsn (0xde780000);
2716 emitInsn (0xde380000);
2717 emitField(0x24, 13, insn
->tex
.r
);
2720 emitField(0x31, 1, insn
->tex
.liveOnly
);
2721 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2722 emitField(0x1f, 4, insn
->tex
.mask
);
2723 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2724 insn
->tex
.target
.getDim() - 1);
2725 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2727 emitGPR (0x08, insn
->src(0));
2728 emitGPR (0x00, insn
->def(0));
2732 CodeEmitterGM107::emitTMML()
2734 const TexInstruction
*insn
= this->insn
->asTex();
2736 if (insn
->tex
.rIndirectSrc
>= 0) {
2737 emitInsn (0xdf600000);
2739 emitInsn (0xdf580000);
2740 emitField(0x24, 13, insn
->tex
.r
);
2743 emitField(0x31, 1, insn
->tex
.liveOnly
);
2744 emitField(0x23, 1, insn
->tex
.derivAll
);
2745 emitField(0x1f, 4, insn
->tex
.mask
);
2746 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2747 insn
->tex
.target
.getDim() - 1);
2748 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2750 emitGPR (0x08, insn
->src(0));
2751 emitGPR (0x00, insn
->def(0));
2755 CodeEmitterGM107::emitTXQ()
2757 const TexInstruction
*insn
= this->insn
->asTex();
2760 switch (insn
->tex
.query
) {
2761 case TXQ_DIMS
: type
= 0x01; break;
2762 case TXQ_TYPE
: type
= 0x02; break;
2763 case TXQ_SAMPLE_POSITION
: type
= 0x05; break;
2764 case TXQ_FILTER
: type
= 0x10; break;
2765 case TXQ_LOD
: type
= 0x12; break;
2766 case TXQ_WRAP
: type
= 0x14; break;
2767 case TXQ_BORDER_COLOUR
: type
= 0x16; break;
2769 assert(!"invalid txq query");
2773 if (insn
->tex
.rIndirectSrc
>= 0) {
2774 emitInsn (0xdf500000);
2776 emitInsn (0xdf480000);
2777 emitField(0x24, 13, insn
->tex
.r
);
2780 emitField(0x31, 1, insn
->tex
.liveOnly
);
2781 emitField(0x1f, 4, insn
->tex
.mask
);
2782 emitField(0x16, 6, type
);
2783 emitGPR (0x08, insn
->src(0));
2784 emitGPR (0x00, insn
->def(0));
2788 CodeEmitterGM107::emitDEPBAR()
2790 emitInsn (0xf0f00000);
2791 emitField(0x1d, 1, 1); /* le */
2792 emitField(0x1a, 3, 5);
2793 emitField(0x14, 6, insn
->subOp
);
2794 emitField(0x00, 6, insn
->subOp
);
2797 /*******************************************************************************
2799 ******************************************************************************/
2802 CodeEmitterGM107::emitNOP()
2804 emitInsn(0x50b00000);
2808 CodeEmitterGM107::emitKIL()
2810 emitInsn (0xe3300000);
2811 emitCond5(0x00, CC_TR
);
2815 CodeEmitterGM107::emitOUT()
2817 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
2818 const int emit
= insn
->op
== OP_EMIT
;
2820 switch (insn
->src(1).getFile()) {
2822 emitInsn(0xfbe00000);
2823 emitGPR (0x14, insn
->src(1));
2825 case FILE_IMMEDIATE
:
2826 emitInsn(0xf6e00000);
2827 emitIMMD(0x14, 19, insn
->src(1));
2829 case FILE_MEMORY_CONST
:
2830 emitInsn(0xebe00000);
2831 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2834 assert(!"bad src1 file");
2838 emitField(0x27, 2, (cut
<< 1) | emit
);
2839 emitGPR (0x08, insn
->src(0));
2840 emitGPR (0x00, insn
->def(0));
2844 CodeEmitterGM107::emitBAR()
2848 emitInsn (0xf0a80000);
2850 switch (insn
->subOp
) {
2851 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; break;
2852 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x0a; break;
2853 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x12; break;
2854 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x81; break;
2857 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
2861 emitField(0x20, 8, subop
);
2864 if (insn
->src(0).getFile() == FILE_GPR
) {
2865 emitGPR(0x08, insn
->src(0));
2867 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2869 emitField(0x08, 8, imm
->reg
.data
.u32
);
2870 emitField(0x2b, 1, 1);
2874 if (insn
->src(1).getFile() == FILE_GPR
) {
2875 emitGPR(0x14, insn
->src(1));
2877 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2879 emitField(0x14, 12, imm
->reg
.data
.u32
);
2880 emitField(0x2c, 1, 1);
2883 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
2884 emitPRED (0x27, insn
->src(2));
2885 emitField(0x2a, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
2887 emitField(0x27, 3, 7);
2892 CodeEmitterGM107::emitMEMBAR()
2894 emitInsn (0xef980000);
2895 emitField(0x08, 2, insn
->subOp
>> 2);
2899 CodeEmitterGM107::emitVOTE()
2901 assert(insn
->src(0).getFile() == FILE_PREDICATE
);
2904 for (int i
= 0; insn
->defExists(i
); i
++) {
2905 if (insn
->def(i
).getFile() == FILE_GPR
)
2907 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
2911 emitInsn (0x50d80000);
2912 emitField(0x30, 2, insn
->subOp
);
2914 emitGPR (0x00, insn
->def(r
));
2918 emitPRED (0x2d, insn
->def(p
));
2921 emitField(0x2a, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
2922 emitPRED (0x27, insn
->src(0));
2926 CodeEmitterGM107::emitSUTarget()
2928 const TexInstruction
*insn
= this->insn
->asTex();
2931 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2933 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
2935 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
2937 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
2938 insn
->tex
.target
== TEX_TARGET_RECT
) {
2940 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
2941 insn
->tex
.target
== TEX_TARGET_CUBE
||
2942 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
2944 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
2947 assert(insn
->tex
.target
== TEX_TARGET_1D
);
2949 emitField(0x20, 4, target
);
2953 CodeEmitterGM107::emitSUHandle(const int s
)
2955 const TexInstruction
*insn
= this->insn
->asTex();
2957 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2959 if (insn
->src(s
).getFile() == FILE_GPR
) {
2960 emitGPR(0x27, insn
->src(s
));
2962 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
2964 emitField(0x33, 1, 1);
2965 emitField(0x24, 13, imm
->reg
.data
.u32
);
2970 CodeEmitterGM107::emitSUSTx()
2972 const TexInstruction
*insn
= this->insn
->asTex();
2974 emitInsn(0xeb200000);
2975 if (insn
->op
== OP_SUSTB
)
2976 emitField(0x34, 1, 1);
2980 emitField(0x14, 4, 0xf); // rgba
2981 emitGPR (0x08, insn
->src(0));
2982 emitGPR (0x00, insn
->src(1));
2988 CodeEmitterGM107::emitSULDx()
2990 const TexInstruction
*insn
= this->insn
->asTex();
2993 emitInsn(0xeb000000);
2994 if (insn
->op
== OP_SULDB
)
2995 emitField(0x34, 1, 1);
2998 switch (insn
->dType
) {
2999 case TYPE_S8
: type
= 1; break;
3000 case TYPE_U16
: type
= 2; break;
3001 case TYPE_S16
: type
= 3; break;
3002 case TYPE_U32
: type
= 4; break;
3003 case TYPE_U64
: type
= 5; break;
3004 case TYPE_B128
: type
= 6; break;
3006 assert(insn
->dType
== TYPE_U8
);
3010 emitField(0x14, 3, type
);
3011 emitGPR (0x00, insn
->def(0));
3012 emitGPR (0x08, insn
->src(0));
3018 CodeEmitterGM107::emitSUREDx()
3020 const TexInstruction
*insn
= this->insn
->asTex();
3021 uint8_t type
= 0, subOp
;
3023 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
3024 emitInsn(0xeac00000);
3026 emitInsn(0xea600000);
3028 if (insn
->op
== OP_SUREDB
)
3029 emitField(0x34, 1, 1);
3033 switch (insn
->dType
) {
3034 case TYPE_S32
: type
= 1; break;
3035 case TYPE_U64
: type
= 2; break;
3036 case TYPE_F32
: type
= 3; break;
3037 case TYPE_S64
: type
= 5; break;
3039 assert(insn
->dType
== TYPE_U32
);
3044 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
3046 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
3049 subOp
= insn
->subOp
;
3052 emitField(0x24, 3, type
);
3053 emitField(0x1d, 4, subOp
);
3054 emitGPR (0x14, insn
->src(1));
3055 emitGPR (0x08, insn
->src(0));
3056 emitGPR (0x00, insn
->def(0));
3061 /*******************************************************************************
3062 * assembler front-end
3063 ******************************************************************************/
3066 CodeEmitterGM107::emitInstruction(Instruction
*i
)
3068 const unsigned int size
= (writeIssueDelays
&& !(codeSize
& 0x1f)) ? 16 : 8;
3073 if (insn
->encSize
!= 8) {
3074 ERROR("skipping undecodable instruction: "); insn
->print();
3077 if (codeSize
+ size
> codeSizeLimit
) {
3078 ERROR("code emitter output buffer too small\n");
3082 if (writeIssueDelays
) {
3083 int n
= ((codeSize
& 0x1f) / 8) - 1;
3086 data
[0] = 0x00000000;
3087 data
[1] = 0x00000000;
3093 emitField(data
, n
* 21, 21, insn
->sched
);
3149 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
3150 insn
->src(0).getFile() == FILE_PREDICATE
)) {
3152 } else if (isFloatType(insn
->dType
)) {
3153 if (isFloatType(insn
->sType
))
3158 if (isFloatType(insn
->sType
))
3169 if (isFloatType(insn
->dType
)) {
3170 if (insn
->dType
== TYPE_F64
)
3179 if (isFloatType(insn
->dType
)) {
3180 if (insn
->dType
== TYPE_F64
)
3190 if (isFloatType(insn
->dType
)) {
3191 if (insn
->dType
== TYPE_F64
)
3204 if (isFloatType(insn
->dType
)) {
3205 if (insn
->dType
== TYPE_F64
)
3214 if (typeSizeof(insn
->sType
) == 8)
3220 if (typeSizeof(insn
->sType
) == 8)
3238 if (isFloatType(insn
->dType
))
3247 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
3248 if (isFloatType(insn
->sType
))
3249 if (insn
->sType
== TYPE_F64
)
3256 if (isFloatType(insn
->sType
))
3257 if (insn
->sType
== TYPE_F64
)
3289 switch (insn
->src(0).getFile()) {
3290 case FILE_MEMORY_CONST
: emitLDC(); break;
3291 case FILE_MEMORY_LOCAL
: emitLDL(); break;
3292 case FILE_MEMORY_SHARED
: emitLDS(); break;
3293 case FILE_MEMORY_GLOBAL
: emitLD(); break;
3295 assert(!"invalid load");
3301 switch (insn
->src(0).getFile()) {
3302 case FILE_MEMORY_LOCAL
: emitSTL(); break;
3303 case FILE_MEMORY_SHARED
: emitSTS(); break;
3304 case FILE_MEMORY_GLOBAL
: emitST(); break;
3306 assert(!"invalid store");
3312 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
3315 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
3400 assert(!"invalid opcode");
3416 CodeEmitterGM107::getMinEncodingSize(const Instruction
*i
) const
3421 /*******************************************************************************
3422 * sched data calculator
3423 ******************************************************************************/
3425 class SchedDataCalculatorGM107
: public Pass
3428 SchedDataCalculatorGM107(const TargetGM107
*targ
) : targ(targ
) {}
3440 void rebase(const int base
)
3442 const int delta
= this->base
- base
;
3447 for (int i
= 0; i
< 256; ++i
) {
3451 for (int i
= 0; i
< 8; ++i
) {
3460 memset(&rd
, 0, sizeof(rd
));
3461 memset(&wr
, 0, sizeof(wr
));
3463 int getLatest(const ScoreData
& d
) const
3466 for (int i
= 0; i
< 256; ++i
)
3469 for (int i
= 0; i
< 8; ++i
)
3476 inline int getLatestRd() const
3478 return getLatest(rd
);
3480 inline int getLatestWr() const
3482 return getLatest(wr
);
3484 inline int getLatest() const
3486 return MAX2(getLatestRd(), getLatestWr());
3488 void setMax(const RegScores
*that
)
3490 for (int i
= 0; i
< 256; ++i
) {
3491 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3492 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3494 for (int i
= 0; i
< 8; ++i
) {
3495 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3496 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3498 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3499 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3501 void print(int cycle
)
3503 for (int i
= 0; i
< 256; ++i
) {
3504 if (rd
.r
[i
] > cycle
)
3505 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3506 if (wr
.r
[i
] > cycle
)
3507 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3509 for (int i
= 0; i
< 8; ++i
) {
3510 if (rd
.p
[i
] > cycle
)
3511 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3512 if (wr
.p
[i
] > cycle
)
3513 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3516 INFO("rd $c @ %i\n", rd
.c
);
3518 INFO("wr $c @ %i\n", wr
.c
);
3522 RegScores
*score
; // for current BB
3523 std::vector
<RegScores
> scoreBoards
;
3525 const TargetGM107
*targ
;
3526 bool visit(Function
*);
3527 bool visit(BasicBlock
*);
3529 void commitInsn(const Instruction
*, int);
3530 int calcDelay(const Instruction
*, int) const;
3531 void setDelay(Instruction
*, int, const Instruction
*);
3532 void recordWr(const Value
*, int, int);
3533 void checkRd(const Value
*, int, int&) const;
3535 inline void emitYield(Instruction
*);
3536 inline void emitStall(Instruction
*, uint8_t);
3537 inline void emitReuse(Instruction
*, uint8_t);
3538 inline void emitWrDepBar(Instruction
*, uint8_t);
3539 inline void emitRdDepBar(Instruction
*, uint8_t);
3540 inline void emitWtDepBar(Instruction
*, uint8_t);
3542 inline int getStall(const Instruction
*) const;
3543 inline int getWrDepBar(const Instruction
*) const;
3544 inline int getRdDepBar(const Instruction
*) const;
3545 inline int getWtDepBar(const Instruction
*) const;
3547 void setReuseFlag(Instruction
*);
3549 inline void printSchedInfo(int, const Instruction
*) const;
3552 LiveBarUse(Instruction
*insn
, Instruction
*usei
)
3553 : insn(insn
), usei(usei
) { }
3559 LiveBarDef(Instruction
*insn
, Instruction
*defi
)
3560 : insn(insn
), defi(defi
) { }
3565 bool insertBarriers(BasicBlock
*);
3567 Instruction
*findFirstUse(const Instruction
*) const;
3568 Instruction
*findFirstDef(const Instruction
*) const;
3570 bool needRdDepBar(const Instruction
*) const;
3571 bool needWrDepBar(const Instruction
*) const;
3575 SchedDataCalculatorGM107::emitStall(Instruction
*insn
, uint8_t cnt
)
3582 SchedDataCalculatorGM107::emitYield(Instruction
*insn
)
3584 insn
->sched
|= 1 << 4;
3588 SchedDataCalculatorGM107::emitWrDepBar(Instruction
*insn
, uint8_t id
)
3591 if ((insn
->sched
& 0xe0) == 0xe0)
3592 insn
->sched
^= 0xe0;
3593 insn
->sched
|= id
<< 5;
3597 SchedDataCalculatorGM107::emitRdDepBar(Instruction
*insn
, uint8_t id
)
3600 if ((insn
->sched
& 0x700) == 0x700)
3601 insn
->sched
^= 0x700;
3602 insn
->sched
|= id
<< 8;
3606 SchedDataCalculatorGM107::emitWtDepBar(Instruction
*insn
, uint8_t id
)
3609 insn
->sched
|= 1 << (11 + id
);
3613 SchedDataCalculatorGM107::emitReuse(Instruction
*insn
, uint8_t id
)
3616 insn
->sched
|= 1 << (17 + id
);
3620 SchedDataCalculatorGM107::printSchedInfo(int cycle
,
3621 const Instruction
*insn
) const
3623 uint8_t st
, yl
, wr
, rd
, wt
, ru
;
3625 st
= (insn
->sched
& 0x00000f) >> 0;
3626 yl
= (insn
->sched
& 0x000010) >> 4;
3627 wr
= (insn
->sched
& 0x0000e0) >> 5;
3628 rd
= (insn
->sched
& 0x000700) >> 8;
3629 wt
= (insn
->sched
& 0x01f800) >> 11;
3630 ru
= (insn
->sched
& 0x1e0000) >> 17;
3632 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3633 cycle
, st
, yl
, wr
, rd
, wt
, ru
);
3637 SchedDataCalculatorGM107::getStall(const Instruction
*insn
) const
3639 return insn
->sched
& 0xf;
3643 SchedDataCalculatorGM107::getWrDepBar(const Instruction
*insn
) const
3645 return (insn
->sched
& 0x0000e0) >> 5;
3649 SchedDataCalculatorGM107::getRdDepBar(const Instruction
*insn
) const
3651 return (insn
->sched
& 0x000700) >> 8;
3655 SchedDataCalculatorGM107::getWtDepBar(const Instruction
*insn
) const
3657 return (insn
->sched
& 0x01f800) >> 11;
3660 // Emit the reuse flag which allows to make use of the new memory hierarchy
3661 // introduced since Maxwell, the operand reuse cache.
3663 // It allows to reduce bank conflicts by caching operands. Each time you issue
3664 // an instruction, that flag can tell the hw which operands are going to be
3665 // re-used by the next instruction. Note that the next instruction has to use
3666 // the same GPR id in the same operand slot.
3668 SchedDataCalculatorGM107::setReuseFlag(Instruction
*insn
)
3670 Instruction
*next
= insn
->next
;
3671 BitSet
defs(255, 1);
3673 if (!targ
->isReuseSupported(insn
))
3676 for (int d
= 0; insn
->defExists(d
); ++d
) {
3677 const Value
*def
= insn
->def(d
).rep();
3678 if (insn
->def(d
).getFile() != FILE_GPR
)
3680 if (typeSizeof(insn
->dType
) != 4 || def
->reg
.data
.id
== 255)
3682 defs
.set(def
->reg
.data
.id
);
3685 for (int s
= 0; insn
->srcExists(s
); s
++) {
3686 const Value
*src
= insn
->src(s
).rep();
3687 if (insn
->src(s
).getFile() != FILE_GPR
)
3689 if (typeSizeof(insn
->sType
) != 4 || src
->reg
.data
.id
== 255)
3691 if (defs
.test(src
->reg
.data
.id
))
3693 if (!next
->srcExists(s
) || next
->src(s
).getFile() != FILE_GPR
)
3695 if (src
->reg
.data
.id
!= next
->getSrc(s
)->reg
.data
.id
)
3703 SchedDataCalculatorGM107::recordWr(const Value
*v
, int cycle
, int ready
)
3705 int a
= v
->reg
.data
.id
, b
;
3707 switch (v
->reg
.file
) {
3709 b
= a
+ v
->reg
.size
/ 4;
3710 for (int r
= a
; r
< b
; ++r
)
3711 score
->rd
.r
[r
] = ready
;
3713 case FILE_PREDICATE
:
3714 // To immediately use a predicate set by any instructions, the minimum
3715 // number of stall counts is 13.
3716 score
->rd
.p
[a
] = cycle
+ 13;
3719 score
->rd
.c
= ready
;
3727 SchedDataCalculatorGM107::checkRd(const Value
*v
, int cycle
, int &delay
) const
3729 int a
= v
->reg
.data
.id
, b
;
3732 switch (v
->reg
.file
) {
3734 b
= a
+ v
->reg
.size
/ 4;
3735 for (int r
= a
; r
< b
; ++r
)
3736 ready
= MAX2(ready
, score
->rd
.r
[r
]);
3738 case FILE_PREDICATE
:
3739 ready
= MAX2(ready
, score
->rd
.p
[a
]);
3742 ready
= MAX2(ready
, score
->rd
.c
);
3748 delay
= MAX2(delay
, ready
- cycle
);
3752 SchedDataCalculatorGM107::commitInsn(const Instruction
*insn
, int cycle
)
3754 const int ready
= cycle
+ targ
->getLatency(insn
);
3756 for (int d
= 0; insn
->defExists(d
); ++d
)
3757 recordWr(insn
->getDef(d
), cycle
, ready
);
3759 #ifdef GM107_DEBUG_SCHED_DATA
3760 score
->print(cycle
);
3764 #define GM107_MIN_ISSUE_DELAY 0x1
3765 #define GM107_MAX_ISSUE_DELAY 0xf
3768 SchedDataCalculatorGM107::calcDelay(const Instruction
*insn
, int cycle
) const
3770 int delay
= 0, ready
= cycle
;
3772 for (int s
= 0; insn
->srcExists(s
); ++s
)
3773 checkRd(insn
->getSrc(s
), cycle
, delay
);
3775 // TODO: make use of getReadLatency()!
3777 return MAX2(delay
, ready
- cycle
);
3781 SchedDataCalculatorGM107::setDelay(Instruction
*insn
, int delay
,
3782 const Instruction
*next
)
3784 const OpClass cl
= targ
->getOpClass(insn
->op
);
3787 if (insn
->op
== OP_EXIT
||
3788 insn
->op
== OP_BAR
||
3789 insn
->op
== OP_MEMBAR
) {
3790 delay
= GM107_MAX_ISSUE_DELAY
;
3792 if (insn
->op
== OP_QUADON
||
3793 insn
->op
== OP_QUADPOP
) {
3796 if (cl
== OPCLASS_FLOW
|| insn
->join
) {
3800 if (!next
|| !targ
->canDualIssue(insn
, next
)) {
3801 delay
= CLAMP(delay
, GM107_MIN_ISSUE_DELAY
, GM107_MAX_ISSUE_DELAY
);
3803 delay
= 0x0; // dual-issue
3806 wr
= getWrDepBar(insn
);
3807 rd
= getRdDepBar(insn
);
3809 if (delay
== GM107_MIN_ISSUE_DELAY
&& (wr
& rd
) != 7) {
3810 // Barriers take one additional clock cycle to become active on top of
3811 // the clock consumed by the instruction producing it.
3812 if (!next
|| insn
->bb
!= next
->bb
) {
3815 int wt
= getWtDepBar(next
);
3816 if ((wt
& (1 << wr
)) | (wt
& (1 << rd
)))
3821 emitStall(insn
, delay
);
3825 // Return true when the given instruction needs to emit a read dependency
3826 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3827 // setting the maximum number of stall counts is not enough.
3829 SchedDataCalculatorGM107::needRdDepBar(const Instruction
*insn
) const
3831 BitSet
srcs(255, 1), defs(255, 1);
3834 if (!targ
->isBarrierRequired(insn
))
3837 // Do not emit a read dependency barrier when the instruction doesn't use
3838 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3839 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3840 const Value
*src
= insn
->src(s
).rep();
3841 if (insn
->src(s
).getFile() != FILE_GPR
)
3843 if (src
->reg
.data
.id
== 255)
3846 a
= src
->reg
.data
.id
;
3847 b
= a
+ src
->reg
.size
/ 4;
3848 for (int r
= a
; r
< b
; ++r
)
3852 if (!srcs
.popCount())
3855 // Do not emit a read dependency barrier when the output GPRs are equal to
3856 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3857 // be produced and WaR hazards are prevented.
3858 for (int d
= 0; insn
->defExists(d
); ++d
) {
3859 const Value
*def
= insn
->def(d
).rep();
3860 if (insn
->def(d
).getFile() != FILE_GPR
)
3862 if (def
->reg
.data
.id
== 255)
3865 a
= def
->reg
.data
.id
;
3866 b
= a
+ def
->reg
.size
/ 4;
3867 for (int r
= a
; r
< b
; ++r
)
3872 if (!srcs
.popCount())
3878 // Return true when the given instruction needs to emit a write dependency
3879 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3880 // setting the maximum number of stall counts is not enough. This is only legal
3881 // if the instruction output something.
3883 SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn
) const
3885 if (!targ
->isBarrierRequired(insn
))
3888 for (int d
= 0; insn
->defExists(d
); ++d
) {
3889 if (insn
->def(d
).getFile() == FILE_GPR
||
3890 insn
->def(d
).getFile() == FILE_PREDICATE
)
3896 // Find the next instruction inside the same basic block which uses the output
3897 // of the given instruction in order to avoid RaW hazards.
3899 SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari
) const
3901 Instruction
*insn
, *next
;
3904 if (!bari
->defExists(0))
3907 minGPR
= bari
->def(0).rep()->reg
.data
.id
;
3908 maxGPR
= minGPR
+ bari
->def(0).rep()->reg
.size
/ 4 - 1;
3910 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3913 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3914 const Value
*src
= insn
->src(s
).rep();
3915 if (bari
->def(0).getFile() == FILE_GPR
) {
3916 if (insn
->src(s
).getFile() != FILE_GPR
||
3917 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3918 src
->reg
.data
.id
> maxGPR
)
3922 if (bari
->def(0).getFile() == FILE_PREDICATE
) {
3923 if (insn
->src(s
).getFile() != FILE_PREDICATE
||
3924 src
->reg
.data
.id
!= minGPR
)
3933 // Find the next instruction inside the same basic block which overwrites, at
3934 // least, one source of the given instruction in order to avoid WaR hazards.
3936 SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari
) const
3938 Instruction
*insn
, *next
;
3941 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3944 for (int d
= 0; insn
->defExists(d
); ++d
) {
3945 const Value
*def
= insn
->def(d
).rep();
3946 if (insn
->def(d
).getFile() != FILE_GPR
)
3949 minGPR
= def
->reg
.data
.id
;
3950 maxGPR
= minGPR
+ def
->reg
.size
/ 4 - 1;
3952 for (int s
= 0; bari
->srcExists(s
); ++s
) {
3953 const Value
*src
= bari
->src(s
).rep();
3954 if (bari
->src(s
).getFile() != FILE_GPR
||
3955 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3956 src
->reg
.data
.id
> maxGPR
)
3965 // Dependency barriers:
3966 // This pass is a bit ugly and could probably be improved by performing a
3967 // better allocation.
3969 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3970 // dependency barriers using the control codes.
3972 SchedDataCalculatorGM107::insertBarriers(BasicBlock
*bb
)
3974 std::list
<LiveBarUse
> live_uses
;
3975 std::list
<LiveBarDef
> live_defs
;
3976 Instruction
*insn
, *next
;
3980 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
3981 Instruction
*usei
= NULL
, *defi
= NULL
;
3982 bool need_wr_bar
, need_rd_bar
;
3986 // Expire old barrier uses.
3987 for (std::list
<LiveBarUse
>::iterator it
= live_uses
.begin();
3988 it
!= live_uses
.end();) {
3989 if (insn
->serial
>= it
->usei
->serial
) {
3990 int wr
= getWrDepBar(it
->insn
);
3991 emitWtDepBar(insn
, wr
);
3992 bars
.clr(wr
); // free barrier
3993 it
= live_uses
.erase(it
);
3999 // Expire old barrier defs.
4000 for (std::list
<LiveBarDef
>::iterator it
= live_defs
.begin();
4001 it
!= live_defs
.end();) {
4002 if (insn
->serial
>= it
->defi
->serial
) {
4003 int rd
= getRdDepBar(it
->insn
);
4004 emitWtDepBar(insn
, rd
);
4005 bars
.clr(rd
); // free barrier
4006 it
= live_defs
.erase(it
);
4012 need_wr_bar
= needWrDepBar(insn
);
4013 need_rd_bar
= needRdDepBar(insn
);
4016 // When the instruction requires to emit a write dependency barrier
4017 // (all which write something at a variable latency), find the next
4018 // instruction which reads the outputs.
4019 usei
= findFirstUse(insn
);
4021 // Allocate and emit a new barrier.
4022 bar_id
= bars
.findFreeRange(1);
4026 emitWrDepBar(insn
, bar_id
);
4028 live_uses
.push_back(LiveBarUse(insn
, usei
));
4032 // When the instruction requires to emit a read dependency barrier
4033 // (all which read something at a variable latency), find the next
4034 // instruction which will write the inputs.
4035 defi
= findFirstDef(insn
);
4037 if (usei
&& defi
&& usei
->serial
<= defi
->serial
)
4040 // Allocate and emit a new barrier.
4041 bar_id
= bars
.findFreeRange(1);
4045 emitRdDepBar(insn
, bar_id
);
4047 live_defs
.push_back(LiveBarDef(insn
, defi
));
4051 // Remove unnecessary barrier waits.
4052 BitSet
alive_bars(6, 1);
4053 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4058 wr
= getWrDepBar(insn
);
4059 rd
= getRdDepBar(insn
);
4060 wt
= getWtDepBar(insn
);
4062 for (int idx
= 0; idx
< 6; ++idx
) {
4063 if (!(wt
& (1 << idx
)))
4065 if (!alive_bars
.test(idx
)) {
4066 insn
->sched
&= ~(1 << (11 + idx
));
4068 alive_bars
.clr(idx
);
4082 SchedDataCalculatorGM107::visit(Function
*func
)
4086 func
->orderInstructions(insns
);
4088 scoreBoards
.resize(func
->cfg
.getSize());
4089 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
4090 scoreBoards
[i
].wipe();
4095 SchedDataCalculatorGM107::visit(BasicBlock
*bb
)
4097 Instruction
*insn
, *next
= NULL
;
4100 for (Instruction
*insn
= bb
->getEntry(); insn
; insn
= insn
->next
) {
4102 insn
->sched
= 0x7e0;
4105 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4108 // Insert read/write dependency barriers for instructions which don't
4109 // operate at a fixed latency.
4112 score
= &scoreBoards
.at(bb
->getId());
4114 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
4115 // back branches will wait until all target dependencies are satisfied
4116 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
4118 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
4119 score
->setMax(&scoreBoards
.at(in
->getId()));
4122 #ifdef GM107_DEBUG_SCHED_DATA
4123 INFO("=== BB:%i initial scores\n", bb
->getId());
4124 score
->print(cycle
);
4127 // Because barriers are allocated locally (intra-BB), we have to make sure
4128 // that all produced barriers have been consumed before entering inside a
4129 // new basic block. The best way is to do a global allocation pre RA but
4130 // it's really more difficult, especially because of the phi nodes. Anyways,
4131 // it seems like that waiting on a barrier which has already been consumed
4132 // doesn't add any additional cost, it's just not elegant!
4133 Instruction
*start
= bb
->getEntry();
4134 if (start
&& bb
->cfg
.incidentCount() > 0) {
4135 for (int b
= 0; b
< 6; b
++)
4136 emitWtDepBar(start
, b
);
4139 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
4142 commitInsn(insn
, cycle
);
4143 int delay
= calcDelay(next
, cycle
);
4144 setDelay(insn
, delay
, next
);
4145 cycle
+= getStall(insn
);
4149 // XXX: The yield flag seems to destroy a bunch of things when it is
4150 // set on every instruction, need investigation.
4153 #ifdef GM107_DEBUG_SCHED_DATA
4154 printSchedInfo(cycle
, insn
);
4162 commitInsn(insn
, cycle
);
4166 #ifdef GM107_DEBUG_SCHED_DATA
4167 fprintf(stderr
, "last instruction is : ");
4169 fprintf(stderr
, "cycle=%d\n", cycle
);
4172 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
4173 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
4175 if (ei
.getType() != Graph::Edge::BACK
) {
4176 // Only test the first instruction of the outgoing block.
4177 next
= out
->getEntry();
4179 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
4181 // When the outgoing BB is empty, make sure to set the number of
4182 // stall counts needed by the instruction because we don't know the
4183 // next instruction.
4184 bbDelay
= MAX2(bbDelay
, targ
->getLatency(insn
));
4187 // Wait until all dependencies are satisfied.
4188 const int regsFree
= score
->getLatest();
4189 next
= out
->getFirst();
4190 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
4191 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
4192 c
+= getStall(next
);
4197 if (bb
->cfg
.outgoingCount() != 1)
4199 setDelay(insn
, bbDelay
, next
);
4200 cycle
+= getStall(insn
);
4202 score
->rebase(cycle
); // common base for initializing out blocks' scores
4206 /*******************************************************************************
4208 ******************************************************************************/
4211 CodeEmitterGM107::prepareEmission(Function
*func
)
4213 SchedDataCalculatorGM107
sched(targGM107
);
4214 CodeEmitter::prepareEmission(func
);
4215 sched
.run(func
, true, true);
4218 static inline uint32_t sizeToBundlesGM107(uint32_t size
)
4220 return (size
+ 23) / 24;
4224 CodeEmitterGM107::prepareEmission(Program
*prog
)
4226 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
4227 !fi
.end(); fi
.next()) {
4228 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
4229 func
->binPos
= prog
->binSize
;
4230 prepareEmission(func
);
4232 // adjust sizes & positions for schedulding info:
4233 if (prog
->getTarget()->hasSWSched
) {
4234 uint32_t adjPos
= func
->binPos
;
4235 BasicBlock
*bb
= NULL
;
4236 for (int i
= 0; i
< func
->bbCount
; ++i
) {
4237 bb
= func
->bbArray
[i
];
4238 int32_t adjSize
= bb
->binSize
;
4240 adjSize
-= 32 - adjPos
% 32;
4244 adjSize
= bb
->binSize
+ sizeToBundlesGM107(adjSize
) * 8;
4245 bb
->binPos
= adjPos
;
4246 bb
->binSize
= adjSize
;
4250 func
->binSize
= adjPos
- func
->binPos
;
4253 prog
->binSize
+= func
->binSize
;
4257 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107
*target
)
4258 : CodeEmitter(target
),
4260 writeIssueDelays(target
->hasSWSched
)
4263 codeSize
= codeSizeLimit
= 0;
4268 TargetGM107::createCodeEmitterGM107(Program::Type type
)
4270 CodeEmitterGM107
*emit
= new CodeEmitterGM107(this);
4271 emit
->setProgramType(type
);
4275 } // namespace nv50_ir