2 * Copyright 2014 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
25 #include "codegen/nv50_ir_target_gm107.h"
27 //#define GM107_DEBUG_SCHED_DATA
31 class CodeEmitterGM107
: public CodeEmitter
34 CodeEmitterGM107(const TargetGM107
*);
36 virtual bool emitInstruction(Instruction
*);
37 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
39 virtual void prepareEmission(Program
*);
40 virtual void prepareEmission(Function
*);
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
45 const TargetGM107
*targGM107
;
47 Program::Type progType
;
49 const Instruction
*insn
;
50 const bool writeIssueDelays
;
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b
, int s
, uint32_t v
) { emitField(code
, b
, s
, v
); }
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o
) { emitInsn(o
, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value
*);
61 inline void emitGPR(int pos
) {
62 emitGPR(pos
, (const Value
*)NULL
);
64 inline void emitGPR(int pos
, const ValueRef
&ref
) {
65 emitGPR(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
67 inline void emitGPR(int pos
, const ValueRef
*ref
) {
68 emitGPR(pos
, ref
? ref
->rep() : (const Value
*)NULL
);
70 inline void emitGPR(int pos
, const ValueDef
&def
) {
71 emitGPR(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
73 inline void emitSYS(int, const Value
*);
74 inline void emitSYS(int pos
, const ValueRef
&ref
) {
75 emitSYS(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
77 inline void emitPRED(int, const Value
*);
78 inline void emitPRED(int pos
) {
79 emitPRED(pos
, (const Value
*)NULL
);
81 inline void emitPRED(int pos
, const ValueRef
&ref
) {
82 emitPRED(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
84 inline void emitPRED(int pos
, const ValueDef
&def
) {
85 emitPRED(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
87 inline void emitADDR(int, int, int, int, const ValueRef
&);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef
&);
89 inline bool longIMMD(const ValueRef
&);
90 inline void emitIMMD(int, int, const ValueRef
&);
92 void emitCond3(int, CondCode
);
93 void emitCond4(int, CondCode
);
94 void emitCond5(int pos
, CondCode cc
) { emitCond4(pos
, cc
); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef
&);
101 inline void emitNEG(int, const ValueRef
&);
102 inline void emitNEG2(int, const ValueRef
&, const ValueRef
&);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode
, int);
105 inline void emitRND(int pos
) {
106 emitRND(pos
, insn
->rnd
, -1);
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef
&);
174 void emitLDSTs(int, DataType
);
215 void emitSUHandle(const int s
);
221 /*******************************************************************************
222 * general instruction layout/fields
223 ******************************************************************************/
226 CodeEmitterGM107::emitField(uint32_t *data
, int b
, int s
, uint32_t v
)
229 uint32_t m
= ((1ULL << s
) - 1);
230 uint64_t d
= (uint64_t)(v
& m
) << b
;
231 assert(!(v
& ~m
) || (v
& ~m
) == ~m
);
238 CodeEmitterGM107::emitPred()
240 if (insn
->predSrc
>= 0) {
241 emitField(16, 3, insn
->getSrc(insn
->predSrc
)->rep()->reg
.data
.id
);
242 emitField(19, 1, insn
->cc
== CC_NOT_P
);
249 CodeEmitterGM107::emitInsn(uint32_t hi
, bool pred
)
251 code
[0] = 0x00000000;
258 CodeEmitterGM107::emitGPR(int pos
, const Value
*val
)
260 emitField(pos
, 8, val
&& !val
->inFile(FILE_FLAGS
) ?
261 val
->reg
.data
.id
: 255);
265 CodeEmitterGM107::emitSYS(int pos
, const Value
*val
)
267 int id
= val
? val
->reg
.data
.id
: -1;
270 case SV_LANEID
: id
= 0x00; break;
271 case SV_VERTEX_COUNT
: id
= 0x10; break;
272 case SV_INVOCATION_ID
: id
= 0x11; break;
273 case SV_THREAD_KILL
: id
= 0x13; break;
274 case SV_INVOCATION_INFO
: id
= 0x1d; break;
275 case SV_COMBINED_TID
: id
= 0x20; break;
276 case SV_TID
: id
= 0x21 + val
->reg
.data
.sv
.index
; break;
277 case SV_CTAID
: id
= 0x25 + val
->reg
.data
.sv
.index
; break;
278 case SV_LANEMASK_EQ
: id
= 0x38; break;
279 case SV_LANEMASK_LT
: id
= 0x39; break;
280 case SV_LANEMASK_LE
: id
= 0x3a; break;
281 case SV_LANEMASK_GT
: id
= 0x3b; break;
282 case SV_LANEMASK_GE
: id
= 0x3c; break;
283 case SV_CLOCK
: id
= 0x50 + val
->reg
.data
.sv
.index
; break;
285 assert(!"invalid system value");
290 emitField(pos
, 8, id
);
294 CodeEmitterGM107::emitPRED(int pos
, const Value
*val
)
296 emitField(pos
, 3, val
? val
->reg
.data
.id
: 7);
300 CodeEmitterGM107::emitADDR(int gpr
, int off
, int len
, int shr
,
303 const Value
*v
= ref
.get();
304 assert(!(v
->reg
.data
.offset
& ((1 << shr
) - 1)));
306 emitGPR(gpr
, ref
.getIndirect(0));
307 emitField(off
, len
, v
->reg
.data
.offset
>> shr
);
311 CodeEmitterGM107::emitCBUF(int buf
, int gpr
, int off
, int len
, int shr
,
314 const Value
*v
= ref
.get();
315 const Symbol
*s
= v
->asSym();
317 assert(!(s
->reg
.data
.offset
& ((1 << shr
) - 1)));
319 emitField(buf
, 5, v
->reg
.fileIndex
);
321 emitGPR(gpr
, ref
.getIndirect(0));
322 emitField(off
, 16, s
->reg
.data
.offset
>> shr
);
326 CodeEmitterGM107::longIMMD(const ValueRef
&ref
)
328 if (ref
.getFile() == FILE_IMMEDIATE
) {
329 const ImmediateValue
*imm
= ref
.get()->asImm();
330 if (isFloatType(insn
->sType
))
331 return imm
->reg
.data
.u32
& 0xfff;
333 return imm
->reg
.data
.s32
> 0x7ffff || imm
->reg
.data
.s32
< -0x80000;
339 CodeEmitterGM107::emitIMMD(int pos
, int len
, const ValueRef
&ref
)
341 const ImmediateValue
*imm
= ref
.get()->asImm();
342 uint32_t val
= imm
->reg
.data
.u32
;
345 if (insn
->sType
== TYPE_F32
|| insn
->sType
== TYPE_F16
) {
346 assert(!(val
& 0x00000fff));
348 } else if (insn
->sType
== TYPE_F64
) {
349 assert(!(imm
->reg
.data
.u64
& 0x00000fffffffffffULL
));
350 val
= imm
->reg
.data
.u64
>> 44;
352 assert(!(val
& 0xfff80000) || (val
& 0xfff80000) == 0xfff80000);
354 emitField( 56, 1, (val
& 0x80000) >> 19);
355 emitField(pos
, len
, (val
& 0x7ffff));
357 emitField(pos
, len
, val
);
361 /*******************************************************************************
363 ******************************************************************************/
366 CodeEmitterGM107::emitCond3(int pos
, CondCode code
)
371 case CC_FL
: data
= 0x00; break;
373 case CC_LT
: data
= 0x01; break;
375 case CC_EQ
: data
= 0x02; break;
377 case CC_LE
: data
= 0x03; break;
379 case CC_GT
: data
= 0x04; break;
381 case CC_NE
: data
= 0x05; break;
383 case CC_GE
: data
= 0x06; break;
384 case CC_TR
: data
= 0x07; break;
386 assert(!"invalid cond3");
390 emitField(pos
, 3, data
);
394 CodeEmitterGM107::emitCond4(int pos
, CondCode code
)
399 case CC_FL
: data
= 0x00; break;
400 case CC_LT
: data
= 0x01; break;
401 case CC_EQ
: data
= 0x02; break;
402 case CC_LE
: data
= 0x03; break;
403 case CC_GT
: data
= 0x04; break;
404 case CC_NE
: data
= 0x05; break;
405 case CC_GE
: data
= 0x06; break;
406 // case CC_NUM: data = 0x07; break;
407 // case CC_NAN: data = 0x08; break;
408 case CC_LTU
: data
= 0x09; break;
409 case CC_EQU
: data
= 0x0a; break;
410 case CC_LEU
: data
= 0x0b; break;
411 case CC_GTU
: data
= 0x0c; break;
412 case CC_NEU
: data
= 0x0d; break;
413 case CC_GEU
: data
= 0x0e; break;
414 case CC_TR
: data
= 0x0f; break;
416 assert(!"invalid cond4");
420 emitField(pos
, 4, data
);
424 CodeEmitterGM107::emitO(int pos
)
426 emitField(pos
, 1, insn
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
);
430 CodeEmitterGM107::emitP(int pos
)
432 emitField(pos
, 1, insn
->perPatch
);
436 CodeEmitterGM107::emitSAT(int pos
)
438 emitField(pos
, 1, insn
->saturate
);
442 CodeEmitterGM107::emitCC(int pos
)
444 emitField(pos
, 1, insn
->flagsDef
>= 0);
448 CodeEmitterGM107::emitX(int pos
)
450 emitField(pos
, 1, insn
->flagsSrc
>= 0);
454 CodeEmitterGM107::emitABS(int pos
, const ValueRef
&ref
)
456 emitField(pos
, 1, ref
.mod
.abs());
460 CodeEmitterGM107::emitNEG(int pos
, const ValueRef
&ref
)
462 emitField(pos
, 1, ref
.mod
.neg());
466 CodeEmitterGM107::emitNEG2(int pos
, const ValueRef
&a
, const ValueRef
&b
)
468 emitField(pos
, 1, a
.mod
.neg() ^ b
.mod
.neg());
472 CodeEmitterGM107::emitFMZ(int pos
, int len
)
474 emitField(pos
, len
, insn
->dnz
<< 1 | insn
->ftz
);
478 CodeEmitterGM107::emitRND(int rmp
, RoundMode rnd
, int rip
)
482 case ROUND_NI
: ri
= 1;
483 case ROUND_N
: rm
= 0; break;
484 case ROUND_MI
: ri
= 1;
485 case ROUND_M
: rm
= 1; break;
486 case ROUND_PI
: ri
= 1;
487 case ROUND_P
: rm
= 2; break;
488 case ROUND_ZI
: ri
= 1;
489 case ROUND_Z
: rm
= 3; break;
491 assert(!"invalid round mode");
494 emitField(rip
, 1, ri
);
495 emitField(rmp
, 2, rm
);
499 CodeEmitterGM107::emitPDIV(int pos
)
501 assert(insn
->postFactor
>= -3 && insn
->postFactor
<= 3);
502 if (insn
->postFactor
> 0)
503 emitField(pos
, 3, 7 - insn
->postFactor
);
505 emitField(pos
, 3, 0 - insn
->postFactor
);
509 CodeEmitterGM107::emitINV(int pos
, const ValueRef
&ref
)
511 emitField(pos
, 1, !!(ref
.mod
& Modifier(NV50_IR_MOD_NOT
)));
514 /*******************************************************************************
516 ******************************************************************************/
519 CodeEmitterGM107::emitEXIT()
521 emitInsn (0xe3000000);
522 emitCond5(0x00, CC_TR
);
526 CodeEmitterGM107::emitBRA()
528 const FlowInstruction
*insn
= this->insn
->asFlow();
531 if (insn
->indirect
) {
533 emitInsn(0xe2000000); // JMX
535 emitInsn(0xe2500000); // BRX
539 emitInsn(0xe2100000); // JMP
541 emitInsn(0xe2400000); // BRA
542 emitField(0x07, 1, insn
->allWarp
);
545 emitField(0x06, 1, insn
->limit
);
546 emitCond5(0x00, CC_TR
);
548 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
549 int32_t pos
= insn
->target
.bb
->binPos
;
550 if (writeIssueDelays
&& !(pos
& 0x1f))
553 emitField(0x14, 24, pos
- (codeSize
+ 8));
555 emitField(0x14, 32, pos
);
557 emitCBUF (0x24, gpr
, 20, 16, 0, insn
->src(0));
558 emitField(0x05, 1, 1);
563 CodeEmitterGM107::emitCAL()
565 const FlowInstruction
*insn
= this->insn
->asFlow();
567 if (insn
->absolute
) {
568 emitInsn(0xe2200000, 0); // JCAL
570 emitInsn(0xe2600000, 0); // CAL
573 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
575 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
578 int pcAbs
= targGM107
->getBuiltinOffset(insn
->target
.builtin
);
579 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfff00000, 20);
580 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x000fffff, -12);
582 emitField(0x14, 32, insn
->target
.bb
->binPos
);
586 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
587 emitField(0x05, 1, 1);
592 CodeEmitterGM107::emitPCNT()
594 const FlowInstruction
*insn
= this->insn
->asFlow();
596 emitInsn(0xe2b00000, 0);
598 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
599 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
601 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
602 emitField(0x05, 1, 1);
607 CodeEmitterGM107::emitCONT()
609 emitInsn (0xe3500000);
610 emitCond5(0x00, CC_TR
);
614 CodeEmitterGM107::emitPBK()
616 const FlowInstruction
*insn
= this->insn
->asFlow();
618 emitInsn(0xe2a00000, 0);
620 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
621 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
623 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
624 emitField(0x05, 1, 1);
629 CodeEmitterGM107::emitBRK()
631 emitInsn (0xe3400000);
632 emitCond5(0x00, CC_TR
);
636 CodeEmitterGM107::emitPRET()
638 const FlowInstruction
*insn
= this->insn
->asFlow();
640 emitInsn(0xe2700000, 0);
642 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
643 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
645 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
646 emitField(0x05, 1, 1);
651 CodeEmitterGM107::emitRET()
653 emitInsn (0xe3200000);
654 emitCond5(0x00, CC_TR
);
658 CodeEmitterGM107::emitSSY()
660 const FlowInstruction
*insn
= this->insn
->asFlow();
662 emitInsn(0xe2900000, 0);
664 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
665 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
667 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
668 emitField(0x05, 1, 1);
673 CodeEmitterGM107::emitSYNC()
675 emitInsn (0xf0f80000);
676 emitCond5(0x00, CC_TR
);
680 CodeEmitterGM107::emitSAM()
682 emitInsn(0xe3700000, 0);
686 CodeEmitterGM107::emitRAM()
688 emitInsn(0xe3800000, 0);
691 /*******************************************************************************
693 ******************************************************************************/
696 CodeEmitterGM107::emitPSETP()
699 emitInsn(0x50900000);
702 case OP_AND
: emitField(0x18, 3, 0); break;
703 case OP_OR
: emitField(0x18, 3, 1); break;
704 case OP_XOR
: emitField(0x18, 3, 2); break;
706 assert(!"unexpected operation");
711 emitPRED(0x27); // TODO: support 3-arg
712 emitINV (0x20, insn
->src(1));
713 emitPRED(0x1d, insn
->src(1));
714 emitINV (0x0f, insn
->src(0));
715 emitPRED(0x0c, insn
->src(0));
716 emitPRED(0x03, insn
->def(0));
720 /*******************************************************************************
721 * movement / conversion
722 ******************************************************************************/
725 CodeEmitterGM107::emitMOV()
727 if (insn
->src(0).getFile() != FILE_IMMEDIATE
) {
728 switch (insn
->src(0).getFile()) {
730 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
731 emitInsn(0x5b6a0000);
734 emitInsn(0x5c980000);
736 emitGPR (0x14, insn
->src(0));
738 case FILE_MEMORY_CONST
:
739 emitInsn(0x4c980000);
740 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
743 emitInsn(0x38980000);
744 emitIMMD(0x14, 19, insn
->src(0));
747 emitInsn(0x50880000);
748 emitPRED(0x0c, insn
->src(0));
753 assert(!"bad src file");
756 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
757 insn
->src(0).getFile() != FILE_PREDICATE
)
758 emitField(0x27, 4, insn
->lanes
);
760 emitInsn (0x01000000);
761 emitIMMD (0x14, 32, insn
->src(0));
762 emitField(0x0c, 4, insn
->lanes
);
765 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
767 emitPRED(0x03, insn
->def(0));
770 emitGPR(0x00, insn
->def(0));
775 CodeEmitterGM107::emitS2R()
777 emitInsn(0xf0c80000);
778 emitSYS (0x14, insn
->src(0));
779 emitGPR (0x00, insn
->def(0));
783 CodeEmitterGM107::emitCS2R()
785 emitInsn(0x50c80000);
786 emitSYS (0x14, insn
->src(0));
787 emitGPR (0x00, insn
->def(0));
791 CodeEmitterGM107::emitF2F()
793 RoundMode rnd
= insn
->rnd
;
796 case OP_FLOOR
: rnd
= ROUND_MI
; break;
797 case OP_CEIL
: rnd
= ROUND_PI
; break;
798 case OP_TRUNC
: rnd
= ROUND_ZI
; break;
803 switch (insn
->src(0).getFile()) {
805 emitInsn(0x5ca80000);
806 emitGPR (0x14, insn
->src(0));
808 case FILE_MEMORY_CONST
:
809 emitInsn(0x4ca80000);
810 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
813 emitInsn(0x38a80000);
814 emitIMMD(0x14, 19, insn
->src(0));
817 assert(!"bad src0 file");
821 emitField(0x32, 1, (insn
->op
== OP_SAT
) || insn
->saturate
);
822 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
824 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
826 emitField(0x29, 1, insn
->subOp
);
827 emitRND (0x27, rnd
, 0x2a);
828 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
829 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
830 emitGPR (0x00, insn
->def(0));
834 CodeEmitterGM107::emitF2I()
836 RoundMode rnd
= insn
->rnd
;
839 case OP_FLOOR
: rnd
= ROUND_M
; break;
840 case OP_CEIL
: rnd
= ROUND_P
; break;
841 case OP_TRUNC
: rnd
= ROUND_Z
; break;
846 switch (insn
->src(0).getFile()) {
848 emitInsn(0x5cb00000);
849 emitGPR (0x14, insn
->src(0));
851 case FILE_MEMORY_CONST
:
852 emitInsn(0x4cb00000);
853 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
856 emitInsn(0x38b00000);
857 emitIMMD(0x14, 19, insn
->src(0));
860 assert(!"bad src0 file");
864 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
866 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
868 emitRND (0x27, rnd
, 0x2a);
869 emitField(0x0c, 1, isSignedType(insn
->dType
));
870 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
871 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
872 emitGPR (0x00, insn
->def(0));
876 CodeEmitterGM107::emitI2F()
878 RoundMode rnd
= insn
->rnd
;
881 case OP_FLOOR
: rnd
= ROUND_M
; break;
882 case OP_CEIL
: rnd
= ROUND_P
; break;
883 case OP_TRUNC
: rnd
= ROUND_Z
; break;
888 switch (insn
->src(0).getFile()) {
890 emitInsn(0x5cb80000);
891 emitGPR (0x14, insn
->src(0));
893 case FILE_MEMORY_CONST
:
894 emitInsn(0x4cb80000);
895 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
898 emitInsn(0x38b80000);
899 emitIMMD(0x14, 19, insn
->src(0));
902 assert(!"bad src0 file");
906 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
908 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
909 emitField(0x29, 2, insn
->subOp
);
910 emitRND (0x27, rnd
, -1);
911 emitField(0x0d, 1, isSignedType(insn
->sType
));
912 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
913 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
914 emitGPR (0x00, insn
->def(0));
918 CodeEmitterGM107::emitI2I()
920 switch (insn
->src(0).getFile()) {
922 emitInsn(0x5ce00000);
923 emitGPR (0x14, insn
->src(0));
925 case FILE_MEMORY_CONST
:
926 emitInsn(0x4ce00000);
927 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
930 emitInsn(0x38e00000);
931 emitIMMD(0x14, 19, insn
->src(0));
934 assert(!"bad src0 file");
939 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
941 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
942 emitField(0x29, 2, insn
->subOp
);
943 emitField(0x0d, 1, isSignedType(insn
->sType
));
944 emitField(0x0c, 1, isSignedType(insn
->dType
));
945 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
946 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
947 emitGPR (0x00, insn
->def(0));
951 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
953 int loc
= entry
->loc
;
954 if (data
.force_persample_interp
)
955 code
[loc
+ 1] |= 1 << 10;
957 code
[loc
+ 1] &= ~(1 << 10);
961 CodeEmitterGM107::emitSEL()
963 switch (insn
->src(1).getFile()) {
965 emitInsn(0x5ca00000);
966 emitGPR (0x14, insn
->src(1));
968 case FILE_MEMORY_CONST
:
969 emitInsn(0x4ca00000);
970 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
973 emitInsn(0x38a00000);
974 emitIMMD(0x14, 19, insn
->src(1));
977 assert(!"bad src1 file");
981 emitINV (0x2a, insn
->src(2));
982 emitPRED(0x27, insn
->src(2));
983 emitGPR (0x08, insn
->src(0));
984 emitGPR (0x00, insn
->def(0));
986 if (insn
->subOp
== 1) {
987 addInterp(0, 0, selpFlip
);
992 CodeEmitterGM107::emitSHFL()
996 emitInsn (0xef100000);
998 switch (insn
->src(1).getFile()) {
1000 emitGPR(0x14, insn
->src(1));
1002 case FILE_IMMEDIATE
:
1003 emitIMMD(0x14, 5, insn
->src(1));
1007 assert(!"invalid src1 file");
1011 switch (insn
->src(2).getFile()) {
1013 emitGPR(0x27, insn
->src(2));
1015 case FILE_IMMEDIATE
:
1016 emitIMMD(0x22, 13, insn
->src(2));
1020 assert(!"invalid src2 file");
1024 if (!insn
->defExists(1))
1027 assert(insn
->def(1).getFile() == FILE_PREDICATE
);
1028 emitPRED(0x30, insn
->def(1));
1031 emitField(0x1e, 2, insn
->subOp
);
1032 emitField(0x1c, 2, type
);
1033 emitGPR (0x08, insn
->src(0));
1034 emitGPR (0x00, insn
->def(0));
1037 /*******************************************************************************
1039 ******************************************************************************/
1042 CodeEmitterGM107::emitDADD()
1044 switch (insn
->src(1).getFile()) {
1046 emitInsn(0x5c700000);
1047 emitGPR (0x14, insn
->src(1));
1049 case FILE_MEMORY_CONST
:
1050 emitInsn(0x4c700000);
1051 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1053 case FILE_IMMEDIATE
:
1054 emitInsn(0x38700000);
1055 emitIMMD(0x14, 19, insn
->src(1));
1058 assert(!"bad src1 file");
1061 emitABS(0x31, insn
->src(1));
1062 emitNEG(0x30, insn
->src(0));
1064 emitABS(0x2e, insn
->src(0));
1065 emitNEG(0x2d, insn
->src(1));
1067 if (insn
->op
== OP_SUB
)
1068 code
[1] ^= 0x00002000;
1070 emitGPR(0x08, insn
->src(0));
1071 emitGPR(0x00, insn
->def(0));
1075 CodeEmitterGM107::emitDMUL()
1077 switch (insn
->src(1).getFile()) {
1079 emitInsn(0x5c800000);
1080 emitGPR (0x14, insn
->src(1));
1082 case FILE_MEMORY_CONST
:
1083 emitInsn(0x4c800000);
1084 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1086 case FILE_IMMEDIATE
:
1087 emitInsn(0x38800000);
1088 emitIMMD(0x14, 19, insn
->src(1));
1091 assert(!"bad src1 file");
1095 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1098 emitGPR (0x08, insn
->src(0));
1099 emitGPR (0x00, insn
->def(0));
1103 CodeEmitterGM107::emitDFMA()
1105 switch(insn
->src(2).getFile()) {
1107 switch (insn
->src(1).getFile()) {
1109 emitInsn(0x5b700000);
1110 emitGPR (0x14, insn
->src(1));
1112 case FILE_MEMORY_CONST
:
1113 emitInsn(0x4b700000);
1114 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1116 case FILE_IMMEDIATE
:
1117 emitInsn(0x36700000);
1118 emitIMMD(0x14, 19, insn
->src(1));
1121 assert(!"bad src1 file");
1124 emitGPR (0x27, insn
->src(2));
1126 case FILE_MEMORY_CONST
:
1127 emitInsn(0x53700000);
1128 emitGPR (0x27, insn
->src(1));
1129 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1132 assert(!"bad src2 file");
1137 emitNEG (0x31, insn
->src(2));
1138 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1140 emitGPR (0x08, insn
->src(0));
1141 emitGPR (0x00, insn
->def(0));
1145 CodeEmitterGM107::emitDMNMX()
1147 switch (insn
->src(1).getFile()) {
1149 emitInsn(0x5c500000);
1150 emitGPR (0x14, insn
->src(1));
1152 case FILE_MEMORY_CONST
:
1153 emitInsn(0x4c500000);
1154 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1156 case FILE_IMMEDIATE
:
1157 emitInsn(0x38500000);
1158 emitIMMD(0x14, 19, insn
->src(1));
1161 assert(!"bad src1 file");
1165 emitABS (0x31, insn
->src(1));
1166 emitNEG (0x30, insn
->src(0));
1168 emitABS (0x2e, insn
->src(0));
1169 emitNEG (0x2d, insn
->src(1));
1170 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1172 emitGPR (0x08, insn
->src(0));
1173 emitGPR (0x00, insn
->def(0));
1177 CodeEmitterGM107::emitDSET()
1179 const CmpInstruction
*insn
= this->insn
->asCmp();
1181 switch (insn
->src(1).getFile()) {
1183 emitInsn(0x59000000);
1184 emitGPR (0x14, insn
->src(1));
1186 case FILE_MEMORY_CONST
:
1187 emitInsn(0x49000000);
1188 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1190 case FILE_IMMEDIATE
:
1191 emitInsn(0x32000000);
1192 emitIMMD(0x14, 19, insn
->src(1));
1195 assert(!"bad src1 file");
1199 if (insn
->op
!= OP_SET
) {
1201 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1202 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1203 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1205 assert(!"invalid set op");
1208 emitPRED(0x27, insn
->src(2));
1213 emitABS (0x36, insn
->src(0));
1214 emitNEG (0x35, insn
->src(1));
1215 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1216 emitCond4(0x30, insn
->setCond
);
1218 emitABS (0x2c, insn
->src(1));
1219 emitNEG (0x2b, insn
->src(0));
1220 emitGPR (0x08, insn
->src(0));
1221 emitGPR (0x00, insn
->def(0));
1225 CodeEmitterGM107::emitDSETP()
1227 const CmpInstruction
*insn
= this->insn
->asCmp();
1229 switch (insn
->src(1).getFile()) {
1231 emitInsn(0x5b800000);
1232 emitGPR (0x14, insn
->src(1));
1234 case FILE_MEMORY_CONST
:
1235 emitInsn(0x4b800000);
1236 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1238 case FILE_IMMEDIATE
:
1239 emitInsn(0x36800000);
1240 emitIMMD(0x14, 19, insn
->src(1));
1243 assert(!"bad src1 file");
1247 if (insn
->op
!= OP_SET
) {
1249 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1250 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1251 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1253 assert(!"invalid set op");
1256 emitPRED(0x27, insn
->src(2));
1261 emitCond4(0x30, insn
->setCond
);
1262 emitABS (0x2c, insn
->src(1));
1263 emitNEG (0x2b, insn
->src(0));
1264 emitGPR (0x08, insn
->src(0));
1265 emitABS (0x07, insn
->src(0));
1266 emitNEG (0x06, insn
->src(1));
1267 emitPRED (0x03, insn
->def(0));
1268 if (insn
->defExists(1))
1269 emitPRED(0x00, insn
->def(1));
1274 /*******************************************************************************
1276 ******************************************************************************/
1279 CodeEmitterGM107::emitFADD()
1281 if (!longIMMD(insn
->src(1))) {
1282 switch (insn
->src(1).getFile()) {
1284 emitInsn(0x5c580000);
1285 emitGPR (0x14, insn
->src(1));
1287 case FILE_MEMORY_CONST
:
1288 emitInsn(0x4c580000);
1289 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1291 case FILE_IMMEDIATE
:
1292 emitInsn(0x38580000);
1293 emitIMMD(0x14, 19, insn
->src(1));
1296 assert(!"bad src1 file");
1300 emitABS(0x31, insn
->src(1));
1301 emitNEG(0x30, insn
->src(0));
1303 emitABS(0x2e, insn
->src(0));
1304 emitNEG(0x2d, insn
->src(1));
1307 if (insn
->op
== OP_SUB
)
1308 code
[1] ^= 0x00002000;
1310 emitInsn(0x08000000);
1311 emitABS(0x39, insn
->src(1));
1312 emitNEG(0x38, insn
->src(0));
1314 emitABS(0x36, insn
->src(0));
1315 emitNEG(0x35, insn
->src(1));
1317 emitIMMD(0x14, 32, insn
->src(1));
1319 if (insn
->op
== OP_SUB
)
1320 code
[1] ^= 0x00080000;
1323 emitGPR(0x08, insn
->src(0));
1324 emitGPR(0x00, insn
->def(0));
1328 CodeEmitterGM107::emitFMUL()
1330 if (!longIMMD(insn
->src(1))) {
1331 switch (insn
->src(1).getFile()) {
1333 emitInsn(0x5c680000);
1334 emitGPR (0x14, insn
->src(1));
1336 case FILE_MEMORY_CONST
:
1337 emitInsn(0x4c680000);
1338 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1340 case FILE_IMMEDIATE
:
1341 emitInsn(0x38680000);
1342 emitIMMD(0x14, 19, insn
->src(1));
1345 assert(!"bad src1 file");
1349 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1355 emitInsn(0x1e000000);
1359 emitIMMD(0x14, 32, insn
->src(1));
1360 if (insn
->src(0).mod
.neg() ^ insn
->src(1).mod
.neg())
1361 code
[1] ^= 0x00080000; /* flip immd sign bit */
1364 emitGPR(0x08, insn
->src(0));
1365 emitGPR(0x00, insn
->def(0));
1369 CodeEmitterGM107::emitFFMA()
1371 bool isLongIMMD
= false;
1372 switch(insn
->src(2).getFile()) {
1374 switch (insn
->src(1).getFile()) {
1376 emitInsn(0x59800000);
1377 emitGPR (0x14, insn
->src(1));
1379 case FILE_MEMORY_CONST
:
1380 emitInsn(0x49800000);
1381 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1383 case FILE_IMMEDIATE
:
1384 if (longIMMD(insn
->getSrc(1))) {
1385 assert(insn
->getDef(0)->reg
.data
.id
== insn
->getSrc(2)->reg
.data
.id
);
1387 emitInsn(0x0c000000);
1388 emitIMMD(0x14, 32, insn
->src(1));
1390 emitInsn(0x32800000);
1391 emitIMMD(0x14, 19, insn
->src(1));
1395 assert(!"bad src1 file");
1399 emitGPR (0x27, insn
->src(2));
1401 case FILE_MEMORY_CONST
:
1402 emitInsn(0x51800000);
1403 emitGPR (0x27, insn
->src(1));
1404 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1407 assert(!"bad src2 file");
1412 emitNEG (0x39, insn
->src(2));
1413 emitNEG2(0x38, insn
->src(0), insn
->src(1));
1419 emitNEG (0x31, insn
->src(2));
1420 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1425 emitGPR(0x08, insn
->src(0));
1426 emitGPR(0x00, insn
->def(0));
1430 CodeEmitterGM107::emitMUFU()
1435 case OP_COS
: mufu
= 0; break;
1436 case OP_SIN
: mufu
= 1; break;
1437 case OP_EX2
: mufu
= 2; break;
1438 case OP_LG2
: mufu
= 3; break;
1439 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
1440 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
1441 case OP_SQRT
: mufu
= 8; break;
1443 assert(!"invalid mufu");
1447 emitInsn (0x50800000);
1449 emitNEG (0x30, insn
->src(0));
1450 emitABS (0x2e, insn
->src(0));
1451 emitField(0x14, 4, mufu
);
1452 emitGPR (0x08, insn
->src(0));
1453 emitGPR (0x00, insn
->def(0));
1457 CodeEmitterGM107::emitFMNMX()
1459 switch (insn
->src(1).getFile()) {
1461 emitInsn(0x5c600000);
1462 emitGPR (0x14, insn
->src(1));
1464 case FILE_MEMORY_CONST
:
1465 emitInsn(0x4c600000);
1466 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1468 case FILE_IMMEDIATE
:
1469 emitInsn(0x38600000);
1470 emitIMMD(0x14, 19, insn
->src(1));
1473 assert(!"bad src1 file");
1477 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1480 emitABS(0x31, insn
->src(1));
1481 emitNEG(0x30, insn
->src(0));
1483 emitABS(0x2e, insn
->src(0));
1484 emitNEG(0x2d, insn
->src(1));
1486 emitGPR(0x08, insn
->src(0));
1487 emitGPR(0x00, insn
->def(0));
1491 CodeEmitterGM107::emitRRO()
1493 switch (insn
->src(0).getFile()) {
1495 emitInsn(0x5c900000);
1496 emitGPR (0x14, insn
->src(0));
1498 case FILE_MEMORY_CONST
:
1499 emitInsn(0x4c900000);
1500 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1502 case FILE_IMMEDIATE
:
1503 emitInsn(0x38900000);
1504 emitIMMD(0x14, 19, insn
->src(0));
1507 assert(!"bad src file");
1511 emitABS (0x31, insn
->src(0));
1512 emitNEG (0x2d, insn
->src(0));
1513 emitField(0x27, 1, insn
->op
== OP_PREEX2
);
1514 emitGPR (0x00, insn
->def(0));
1518 CodeEmitterGM107::emitFCMP()
1520 const CmpInstruction
*insn
= this->insn
->asCmp();
1521 CondCode cc
= insn
->setCond
;
1523 if (insn
->src(2).mod
.neg())
1524 cc
= reverseCondCode(cc
);
1526 switch(insn
->src(2).getFile()) {
1528 switch (insn
->src(1).getFile()) {
1530 emitInsn(0x5ba00000);
1531 emitGPR (0x14, insn
->src(1));
1533 case FILE_MEMORY_CONST
:
1534 emitInsn(0x4ba00000);
1535 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1537 case FILE_IMMEDIATE
:
1538 emitInsn(0x36a00000);
1539 emitIMMD(0x14, 19, insn
->src(1));
1542 assert(!"bad src1 file");
1545 emitGPR (0x27, insn
->src(2));
1547 case FILE_MEMORY_CONST
:
1548 emitInsn(0x53a00000);
1549 emitGPR (0x27, insn
->src(1));
1550 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1553 assert(!"bad src2 file");
1557 emitCond4(0x30, cc
);
1559 emitGPR (0x08, insn
->src(0));
1560 emitGPR (0x00, insn
->def(0));
1564 CodeEmitterGM107::emitFSET()
1566 const CmpInstruction
*insn
= this->insn
->asCmp();
1568 switch (insn
->src(1).getFile()) {
1570 emitInsn(0x58000000);
1571 emitGPR (0x14, insn
->src(1));
1573 case FILE_MEMORY_CONST
:
1574 emitInsn(0x48000000);
1575 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1577 case FILE_IMMEDIATE
:
1578 emitInsn(0x30000000);
1579 emitIMMD(0x14, 19, insn
->src(1));
1582 assert(!"bad src1 file");
1586 if (insn
->op
!= OP_SET
) {
1588 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1589 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1590 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1592 assert(!"invalid set op");
1595 emitPRED(0x27, insn
->src(2));
1601 emitABS (0x36, insn
->src(0));
1602 emitNEG (0x35, insn
->src(1));
1603 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1604 emitCond4(0x30, insn
->setCond
);
1606 emitABS (0x2c, insn
->src(1));
1607 emitNEG (0x2b, insn
->src(0));
1608 emitGPR (0x08, insn
->src(0));
1609 emitGPR (0x00, insn
->def(0));
1613 CodeEmitterGM107::emitFSETP()
1615 const CmpInstruction
*insn
= this->insn
->asCmp();
1617 switch (insn
->src(1).getFile()) {
1619 emitInsn(0x5bb00000);
1620 emitGPR (0x14, insn
->src(1));
1622 case FILE_MEMORY_CONST
:
1623 emitInsn(0x4bb00000);
1624 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1626 case FILE_IMMEDIATE
:
1627 emitInsn(0x36b00000);
1628 emitIMMD(0x14, 19, insn
->src(1));
1631 assert(!"bad src1 file");
1635 if (insn
->op
!= OP_SET
) {
1637 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1638 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1639 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1641 assert(!"invalid set op");
1644 emitPRED(0x27, insn
->src(2));
1649 emitCond4(0x30, insn
->setCond
);
1651 emitABS (0x2c, insn
->src(1));
1652 emitNEG (0x2b, insn
->src(0));
1653 emitGPR (0x08, insn
->src(0));
1654 emitABS (0x07, insn
->src(0));
1655 emitNEG (0x06, insn
->src(1));
1656 emitPRED (0x03, insn
->def(0));
1657 if (insn
->defExists(1))
1658 emitPRED(0x00, insn
->def(1));
1664 CodeEmitterGM107::emitFSWZADD()
1666 emitInsn (0x50f80000);
1670 emitField(0x26, 1, insn
->lanes
); /* abused for .ndv */
1671 emitField(0x1c, 8, insn
->subOp
);
1672 if (insn
->predSrc
!= 1)
1673 emitGPR (0x14, insn
->src(1));
1676 emitGPR (0x08, insn
->src(0));
1677 emitGPR (0x00, insn
->def(0));
1680 /*******************************************************************************
1682 ******************************************************************************/
1685 CodeEmitterGM107::emitLOP()
1690 case OP_AND
: lop
= 0; break;
1691 case OP_OR
: lop
= 1; break;
1692 case OP_XOR
: lop
= 2; break;
1694 assert(!"invalid lop");
1698 if (!longIMMD(insn
->src(1))) {
1699 switch (insn
->src(1).getFile()) {
1701 emitInsn(0x5c400000);
1702 emitGPR (0x14, insn
->src(1));
1704 case FILE_MEMORY_CONST
:
1705 emitInsn(0x4c400000);
1706 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1708 case FILE_IMMEDIATE
:
1709 emitInsn(0x38400000);
1710 emitIMMD(0x14, 19, insn
->src(1));
1713 assert(!"bad src1 file");
1719 emitField(0x29, 2, lop
);
1720 emitINV (0x28, insn
->src(1));
1721 emitINV (0x27, insn
->src(0));
1723 emitInsn (0x04000000);
1725 emitINV (0x38, insn
->src(1));
1726 emitINV (0x37, insn
->src(0));
1727 emitField(0x35, 2, lop
);
1729 emitIMMD (0x14, 32, insn
->src(1));
1732 emitGPR (0x08, insn
->src(0));
1733 emitGPR (0x00, insn
->def(0));
1736 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1738 CodeEmitterGM107::emitNOT()
1740 if (!longIMMD(insn
->src(0))) {
1741 switch (insn
->src(0).getFile()) {
1743 emitInsn(0x5c400700);
1744 emitGPR (0x14, insn
->src(0));
1746 case FILE_MEMORY_CONST
:
1747 emitInsn(0x4c400700);
1748 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1750 case FILE_IMMEDIATE
:
1751 emitInsn(0x38400700);
1752 emitIMMD(0x14, 19, insn
->src(0));
1755 assert(!"bad src1 file");
1760 emitInsn (0x05600000);
1761 emitIMMD (0x14, 32, insn
->src(1));
1765 emitGPR(0x00, insn
->def(0));
1769 CodeEmitterGM107::emitIADD()
1771 if (!longIMMD(insn
->src(1))) {
1772 switch (insn
->src(1).getFile()) {
1774 emitInsn(0x5c100000);
1775 emitGPR (0x14, insn
->src(1));
1777 case FILE_MEMORY_CONST
:
1778 emitInsn(0x4c100000);
1779 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1781 case FILE_IMMEDIATE
:
1782 emitInsn(0x38100000);
1783 emitIMMD(0x14, 19, insn
->src(1));
1786 assert(!"bad src1 file");
1790 emitNEG(0x31, insn
->src(0));
1791 emitNEG(0x30, insn
->src(1));
1795 emitInsn(0x1c000000);
1796 emitNEG (0x38, insn
->src(0));
1800 emitIMMD(0x14, 32, insn
->src(1));
1803 if (insn
->op
== OP_SUB
)
1804 code
[1] ^= 0x00010000;
1806 emitGPR(0x08, insn
->src(0));
1807 emitGPR(0x00, insn
->def(0));
1811 CodeEmitterGM107::emitIMUL()
1813 if (!longIMMD(insn
->src(1))) {
1814 switch (insn
->src(1).getFile()) {
1816 emitInsn(0x5c380000);
1817 emitGPR (0x14, insn
->src(1));
1819 case FILE_MEMORY_CONST
:
1820 emitInsn(0x4c380000);
1821 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1823 case FILE_IMMEDIATE
:
1824 emitInsn(0x38380000);
1825 emitIMMD(0x14, 19, insn
->src(1));
1828 assert(!"bad src1 file");
1832 emitField(0x29, 1, isSignedType(insn
->sType
));
1833 emitField(0x28, 1, isSignedType(insn
->dType
));
1834 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1836 emitInsn (0x1f000000);
1837 emitField(0x37, 1, isSignedType(insn
->sType
));
1838 emitField(0x36, 1, isSignedType(insn
->dType
));
1839 emitField(0x35, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1841 emitIMMD (0x14, 32, insn
->src(1));
1844 emitGPR(0x08, insn
->src(0));
1845 emitGPR(0x00, insn
->def(0));
1849 CodeEmitterGM107::emitIMAD()
1851 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1852 switch(insn
->src(2).getFile()) {
1854 switch (insn
->src(1).getFile()) {
1856 emitInsn(0x5a000000);
1857 emitGPR (0x14, insn
->src(1));
1859 case FILE_MEMORY_CONST
:
1860 emitInsn(0x4a000000);
1861 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1863 case FILE_IMMEDIATE
:
1864 emitInsn(0x34000000);
1865 emitIMMD(0x14, 19, insn
->src(1));
1868 assert(!"bad src1 file");
1871 emitGPR (0x27, insn
->src(2));
1873 case FILE_MEMORY_CONST
:
1874 emitInsn(0x52000000);
1875 emitGPR (0x27, insn
->src(1));
1876 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1879 assert(!"bad src2 file");
1883 emitField(0x36, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1884 emitField(0x35, 1, isSignedType(insn
->sType
));
1885 emitNEG (0x34, insn
->src(2));
1886 emitNEG2 (0x33, insn
->src(0), insn
->src(1));
1889 emitField(0x30, 1, isSignedType(insn
->dType
));
1891 emitGPR (0x08, insn
->src(0));
1892 emitGPR (0x00, insn
->def(0));
1896 CodeEmitterGM107::emitISCADD()
1898 assert(insn
->src(1).get()->asImm());
1900 switch (insn
->src(2).getFile()) {
1902 emitInsn(0x5c180000);
1903 emitGPR (0x14, insn
->src(2));
1905 case FILE_MEMORY_CONST
:
1906 emitInsn(0x4c180000);
1907 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1909 case FILE_IMMEDIATE
:
1910 emitInsn(0x38180000);
1911 emitIMMD(0x14, 19, insn
->src(2));
1914 assert(!"bad src1 file");
1917 emitNEG (0x31, insn
->src(0));
1918 emitNEG (0x30, insn
->src(2));
1920 emitIMMD(0x27, 5, insn
->src(1));
1921 emitGPR (0x08, insn
->src(0));
1922 emitGPR (0x00, insn
->def(0));
1926 CodeEmitterGM107::emitXMAD()
1928 assert(insn
->src(0).getFile() == FILE_GPR
);
1930 bool constbuf
= false;
1931 bool psl_mrg
= true;
1932 bool immediate
= false;
1933 if (insn
->src(2).getFile() == FILE_MEMORY_CONST
) {
1934 assert(insn
->src(1).getFile() == FILE_GPR
);
1937 emitInsn(0x51000000);
1938 emitGPR(0x27, insn
->src(1));
1939 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1940 } else if (insn
->src(1).getFile() == FILE_MEMORY_CONST
) {
1941 assert(insn
->src(2).getFile() == FILE_GPR
);
1943 emitInsn(0x4e000000);
1944 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1945 emitGPR(0x27, insn
->src(2));
1946 } else if (insn
->src(1).getFile() == FILE_IMMEDIATE
) {
1947 assert(insn
->src(2).getFile() == FILE_GPR
);
1948 assert(!(insn
->subOp
& NV50_IR_SUBOP_XMAD_H1(1)));
1950 emitInsn(0x36000000);
1951 emitIMMD(0x14, 16, insn
->src(1));
1952 emitGPR(0x27, insn
->src(2));
1954 assert(insn
->src(1).getFile() == FILE_GPR
);
1955 assert(insn
->src(2).getFile() == FILE_GPR
);
1956 emitInsn(0x5b000000);
1957 emitGPR(0x14, insn
->src(1));
1958 emitGPR(0x27, insn
->src(2));
1962 emitField(constbuf
? 0x37 : 0x24, 2, insn
->subOp
& 0x3);
1964 unsigned cmode
= (insn
->subOp
& NV50_IR_SUBOP_XMAD_CMODE_MASK
);
1965 cmode
>>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT
;
1966 emitField(0x32, constbuf
? 2 : 3, cmode
);
1968 emitX(constbuf
? 0x36 : 0x26);
1971 emitGPR(0x0, insn
->def(0));
1972 emitGPR(0x8, insn
->src(0));
1975 if (isSignedType(insn
->sType
)) {
1976 uint16_t h1s
= insn
->subOp
& NV50_IR_SUBOP_XMAD_H1_MASK
;
1977 emitField(0x30, 2, h1s
>> NV50_IR_SUBOP_XMAD_H1_SHIFT
);
1979 emitField(0x35, 1, insn
->subOp
& NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1981 bool h1
= insn
->subOp
& NV50_IR_SUBOP_XMAD_H1(1);
1982 emitField(constbuf
? 0x34 : 0x23, 1, h1
);
1987 CodeEmitterGM107::emitIMNMX()
1989 switch (insn
->src(1).getFile()) {
1991 emitInsn(0x5c200000);
1992 emitGPR (0x14, insn
->src(1));
1994 case FILE_MEMORY_CONST
:
1995 emitInsn(0x4c200000);
1996 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1998 case FILE_IMMEDIATE
:
1999 emitInsn(0x38200000);
2000 emitIMMD(0x14, 19, insn
->src(1));
2003 assert(!"bad src1 file");
2007 emitField(0x30, 1, isSignedType(insn
->dType
));
2009 emitField(0x2b, 2, insn
->subOp
);
2010 emitField(0x2a, 1, insn
->op
== OP_MAX
);
2012 emitGPR (0x08, insn
->src(0));
2013 emitGPR (0x00, insn
->def(0));
2017 CodeEmitterGM107::emitICMP()
2019 const CmpInstruction
*insn
= this->insn
->asCmp();
2020 CondCode cc
= insn
->setCond
;
2022 if (insn
->src(2).mod
.neg())
2023 cc
= reverseCondCode(cc
);
2025 switch(insn
->src(2).getFile()) {
2027 switch (insn
->src(1).getFile()) {
2029 emitInsn(0x5b400000);
2030 emitGPR (0x14, insn
->src(1));
2032 case FILE_MEMORY_CONST
:
2033 emitInsn(0x4b400000);
2034 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2036 case FILE_IMMEDIATE
:
2037 emitInsn(0x36400000);
2038 emitIMMD(0x14, 19, insn
->src(1));
2041 assert(!"bad src1 file");
2044 emitGPR (0x27, insn
->src(2));
2046 case FILE_MEMORY_CONST
:
2047 emitInsn(0x53400000);
2048 emitGPR (0x27, insn
->src(1));
2049 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2052 assert(!"bad src2 file");
2056 emitCond3(0x31, cc
);
2057 emitField(0x30, 1, isSignedType(insn
->sType
));
2058 emitGPR (0x08, insn
->src(0));
2059 emitGPR (0x00, insn
->def(0));
2063 CodeEmitterGM107::emitISET()
2065 const CmpInstruction
*insn
= this->insn
->asCmp();
2067 switch (insn
->src(1).getFile()) {
2069 emitInsn(0x5b500000);
2070 emitGPR (0x14, insn
->src(1));
2072 case FILE_MEMORY_CONST
:
2073 emitInsn(0x4b500000);
2074 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2076 case FILE_IMMEDIATE
:
2077 emitInsn(0x36500000);
2078 emitIMMD(0x14, 19, insn
->src(1));
2081 assert(!"bad src1 file");
2085 if (insn
->op
!= OP_SET
) {
2087 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2088 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2089 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2091 assert(!"invalid set op");
2094 emitPRED(0x27, insn
->src(2));
2099 emitCond3(0x31, insn
->setCond
);
2100 emitField(0x30, 1, isSignedType(insn
->sType
));
2102 emitField(0x2c, 1, insn
->dType
== TYPE_F32
);
2104 emitGPR (0x08, insn
->src(0));
2105 emitGPR (0x00, insn
->def(0));
2109 CodeEmitterGM107::emitISETP()
2111 const CmpInstruction
*insn
= this->insn
->asCmp();
2113 switch (insn
->src(1).getFile()) {
2115 emitInsn(0x5b600000);
2116 emitGPR (0x14, insn
->src(1));
2118 case FILE_MEMORY_CONST
:
2119 emitInsn(0x4b600000);
2120 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2122 case FILE_IMMEDIATE
:
2123 emitInsn(0x36600000);
2124 emitIMMD(0x14, 19, insn
->src(1));
2127 assert(!"bad src1 file");
2131 if (insn
->op
!= OP_SET
) {
2133 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2134 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2135 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2137 assert(!"invalid set op");
2140 emitPRED(0x27, insn
->src(2));
2145 emitCond3(0x31, insn
->setCond
);
2146 emitField(0x30, 1, isSignedType(insn
->sType
));
2148 emitGPR (0x08, insn
->src(0));
2149 emitPRED (0x03, insn
->def(0));
2150 if (insn
->defExists(1))
2151 emitPRED(0x00, insn
->def(1));
2157 CodeEmitterGM107::emitSHL()
2159 switch (insn
->src(1).getFile()) {
2161 emitInsn(0x5c480000);
2162 emitGPR (0x14, insn
->src(1));
2164 case FILE_MEMORY_CONST
:
2165 emitInsn(0x4c480000);
2166 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2168 case FILE_IMMEDIATE
:
2169 emitInsn(0x38480000);
2170 emitIMMD(0x14, 19, insn
->src(1));
2173 assert(!"bad src1 file");
2179 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2180 emitGPR (0x08, insn
->src(0));
2181 emitGPR (0x00, insn
->def(0));
2185 CodeEmitterGM107::emitSHR()
2187 switch (insn
->src(1).getFile()) {
2189 emitInsn(0x5c280000);
2190 emitGPR (0x14, insn
->src(1));
2192 case FILE_MEMORY_CONST
:
2193 emitInsn(0x4c280000);
2194 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2196 case FILE_IMMEDIATE
:
2197 emitInsn(0x38280000);
2198 emitIMMD(0x14, 19, insn
->src(1));
2201 assert(!"bad src1 file");
2205 emitField(0x30, 1, isSignedType(insn
->dType
));
2208 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2209 emitGPR (0x08, insn
->src(0));
2210 emitGPR (0x00, insn
->def(0));
2214 CodeEmitterGM107::emitSHF()
2218 switch (insn
->src(1).getFile()) {
2220 emitInsn(insn
->op
== OP_SHL
? 0x5bf80000 : 0x5cf80000);
2221 emitGPR(0x14, insn
->src(1));
2223 case FILE_IMMEDIATE
:
2224 emitInsn(insn
->op
== OP_SHL
? 0x36f80000 : 0x38f80000);
2225 emitIMMD(0x14, 19, insn
->src(1));
2228 assert(!"bad src1 file");
2232 switch (insn
->sType
) {
2244 emitField(0x32, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
));
2246 emitField(0x30, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_HIGH
));
2248 emitGPR (0x27, insn
->src(2));
2249 emitField(0x25, 2, type
);
2250 emitGPR (0x08, insn
->src(0));
2251 emitGPR (0x00, insn
->def(0));
2255 CodeEmitterGM107::emitPOPC()
2257 switch (insn
->src(0).getFile()) {
2259 emitInsn(0x5c080000);
2260 emitGPR (0x14, insn
->src(0));
2262 case FILE_MEMORY_CONST
:
2263 emitInsn(0x4c080000);
2264 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2266 case FILE_IMMEDIATE
:
2267 emitInsn(0x38080000);
2268 emitIMMD(0x14, 19, insn
->src(0));
2271 assert(!"bad src1 file");
2275 emitINV(0x28, insn
->src(0));
2276 emitGPR(0x00, insn
->def(0));
2280 CodeEmitterGM107::emitBFI()
2282 switch(insn
->src(2).getFile()) {
2284 switch (insn
->src(1).getFile()) {
2286 emitInsn(0x5bf00000);
2287 emitGPR (0x14, insn
->src(1));
2289 case FILE_MEMORY_CONST
:
2290 emitInsn(0x4bf00000);
2291 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2293 case FILE_IMMEDIATE
:
2294 emitInsn(0x36f00000);
2295 emitIMMD(0x14, 19, insn
->src(1));
2298 assert(!"bad src1 file");
2301 emitGPR (0x27, insn
->src(2));
2303 case FILE_MEMORY_CONST
:
2304 emitInsn(0x53f00000);
2305 emitGPR (0x27, insn
->src(1));
2306 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2309 assert(!"bad src2 file");
2314 emitGPR (0x08, insn
->src(0));
2315 emitGPR (0x00, insn
->def(0));
2319 CodeEmitterGM107::emitBFE()
2321 switch (insn
->src(1).getFile()) {
2323 emitInsn(0x5c000000);
2324 emitGPR (0x14, insn
->src(1));
2326 case FILE_MEMORY_CONST
:
2327 emitInsn(0x4c000000);
2328 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2330 case FILE_IMMEDIATE
:
2331 emitInsn(0x38000000);
2332 emitIMMD(0x14, 19, insn
->src(1));
2335 assert(!"bad src1 file");
2339 emitField(0x30, 1, isSignedType(insn
->dType
));
2341 emitField(0x28, 1, insn
->subOp
== NV50_IR_SUBOP_EXTBF_REV
);
2342 emitGPR (0x08, insn
->src(0));
2343 emitGPR (0x00, insn
->def(0));
2347 CodeEmitterGM107::emitFLO()
2349 switch (insn
->src(0).getFile()) {
2351 emitInsn(0x5c300000);
2352 emitGPR (0x14, insn
->src(0));
2354 case FILE_MEMORY_CONST
:
2355 emitInsn(0x4c300000);
2356 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2358 case FILE_IMMEDIATE
:
2359 emitInsn(0x38300000);
2360 emitIMMD(0x14, 19, insn
->src(0));
2363 assert(!"bad src1 file");
2367 emitField(0x30, 1, isSignedType(insn
->dType
));
2369 emitField(0x29, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
2370 emitINV (0x28, insn
->src(0));
2371 emitGPR (0x00, insn
->def(0));
2374 /*******************************************************************************
2376 ******************************************************************************/
2379 CodeEmitterGM107::emitLDSTs(int pos
, DataType type
)
2383 switch (typeSizeof(type
)) {
2384 case 1: data
= isSignedType(type
) ? 1 : 0; break;
2385 case 2: data
= isSignedType(type
) ? 3 : 2; break;
2386 case 4: data
= 4; break;
2387 case 8: data
= 5; break;
2388 case 16: data
= 6; break;
2390 assert(!"bad type");
2394 emitField(pos
, 3, data
);
2398 CodeEmitterGM107::emitLDSTc(int pos
)
2402 switch (insn
->cache
) {
2403 case CACHE_CA
: mode
= 0; break;
2404 case CACHE_CG
: mode
= 1; break;
2405 case CACHE_CS
: mode
= 2; break;
2406 case CACHE_CV
: mode
= 3; break;
2408 assert(!"invalid caching mode");
2412 emitField(pos
, 2, mode
);
2416 CodeEmitterGM107::emitLDC()
2418 emitInsn (0xef900000);
2419 emitLDSTs(0x30, insn
->dType
);
2420 emitField(0x2c, 2, insn
->subOp
);
2421 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn
->src(0));
2422 emitGPR (0x00, insn
->def(0));
2426 CodeEmitterGM107::emitLDL()
2428 emitInsn (0xef400000);
2429 emitLDSTs(0x30, insn
->dType
);
2431 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2432 emitGPR (0x00, insn
->def(0));
2436 CodeEmitterGM107::emitLDS()
2438 emitInsn (0xef480000);
2439 emitLDSTs(0x30, insn
->dType
);
2440 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2441 emitGPR (0x00, insn
->def(0));
2445 CodeEmitterGM107::emitLD()
2447 emitInsn (0x80000000);
2450 emitLDSTs(0x35, insn
->dType
);
2451 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2452 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2453 emitGPR (0x00, insn
->def(0));
2457 CodeEmitterGM107::emitSTL()
2459 emitInsn (0xef500000);
2460 emitLDSTs(0x30, insn
->dType
);
2462 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2463 emitGPR (0x00, insn
->src(1));
2467 CodeEmitterGM107::emitSTS()
2469 emitInsn (0xef580000);
2470 emitLDSTs(0x30, insn
->dType
);
2471 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2472 emitGPR (0x00, insn
->src(1));
2476 CodeEmitterGM107::emitST()
2478 emitInsn (0xa0000000);
2481 emitLDSTs(0x35, insn
->dType
);
2482 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2483 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2484 emitGPR (0x00, insn
->src(1));
2488 CodeEmitterGM107::emitALD()
2490 emitInsn (0xefd80000);
2491 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2492 emitGPR (0x27, insn
->src(0).getIndirect(1));
2495 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2496 emitGPR (0x00, insn
->def(0));
2500 CodeEmitterGM107::emitAST()
2502 emitInsn (0xeff00000);
2503 emitField(0x2f, 2, (typeSizeof(insn
->dType
) / 4) - 1);
2504 emitGPR (0x27, insn
->src(0).getIndirect(1));
2506 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2507 emitGPR (0x00, insn
->src(1));
2511 CodeEmitterGM107::emitISBERD()
2513 emitInsn(0xefd00000);
2514 emitGPR (0x08, insn
->src(0));
2515 emitGPR (0x00, insn
->def(0));
2519 CodeEmitterGM107::emitAL2P()
2521 emitInsn (0xefa00000);
2522 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2525 emitField(0x14, 11, insn
->src(0).get()->reg
.data
.offset
);
2526 emitGPR (0x08, insn
->src(0).getIndirect(0));
2527 emitGPR (0x00, insn
->def(0));
2531 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
2533 int ipa
= entry
->ipa
;
2534 int reg
= entry
->reg
;
2535 int loc
= entry
->loc
;
2537 if (data
.flatshade
&&
2538 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
2539 ipa
= NV50_IR_INTERP_FLAT
;
2541 } else if (data
.force_persample_interp
&&
2542 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
2543 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
2544 ipa
|= NV50_IR_INTERP_CENTROID
;
2546 code
[loc
+ 1] &= ~(0xf << 0x14);
2547 code
[loc
+ 1] |= (ipa
& 0x3) << 0x16;
2548 code
[loc
+ 1] |= (ipa
& 0xc) << (0x14 - 2);
2549 code
[loc
+ 0] &= ~(0xff << 0x14);
2550 code
[loc
+ 0] |= reg
<< 0x14;
2554 CodeEmitterGM107::emitIPA()
2556 int ipam
= 0, ipas
= 0;
2558 switch (insn
->getInterpMode()) {
2559 case NV50_IR_INTERP_LINEAR
: ipam
= 0; break;
2560 case NV50_IR_INTERP_PERSPECTIVE
: ipam
= 1; break;
2561 case NV50_IR_INTERP_FLAT
: ipam
= 2; break;
2562 case NV50_IR_INTERP_SC
: ipam
= 3; break;
2564 assert(!"invalid ipa mode");
2568 switch (insn
->getSampleMode()) {
2569 case NV50_IR_INTERP_DEFAULT
: ipas
= 0; break;
2570 case NV50_IR_INTERP_CENTROID
: ipas
= 1; break;
2571 case NV50_IR_INTERP_OFFSET
: ipas
= 2; break;
2573 assert(!"invalid ipa sample mode");
2577 emitInsn (0xe0000000);
2578 emitField(0x36, 2, ipam
);
2579 emitField(0x34, 2, ipas
);
2581 emitField(0x2f, 3, 7);
2582 emitADDR (0x08, 0x1c, 10, 0, insn
->src(0));
2583 if ((code
[0] & 0x0000ff00) != 0x0000ff00)
2584 code
[1] |= 0x00000040; /* .idx */
2585 emitGPR(0x00, insn
->def(0));
2587 if (insn
->op
== OP_PINTERP
) {
2588 emitGPR(0x14, insn
->src(1));
2589 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2590 emitGPR(0x27, insn
->src(2));
2591 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, interpApply
);
2593 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2594 emitGPR(0x27, insn
->src(1));
2596 addInterp(insn
->ipa
, 0xff, interpApply
);
2599 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
)
2604 CodeEmitterGM107::emitATOM()
2606 unsigned dType
, subOp
;
2608 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2609 switch (insn
->dType
) {
2610 case TYPE_U32
: dType
= 0; break;
2611 case TYPE_U64
: dType
= 1; break;
2612 default: assert(!"unexpected dType"); dType
= 0; break;
2616 emitInsn (0xee000000);
2618 switch (insn
->dType
) {
2619 case TYPE_U32
: dType
= 0; break;
2620 case TYPE_S32
: dType
= 1; break;
2621 case TYPE_U64
: dType
= 2; break;
2622 case TYPE_F32
: dType
= 3; break;
2623 case TYPE_B128
: dType
= 4; break;
2624 case TYPE_S64
: dType
= 5; break;
2625 default: assert(!"unexpected dType"); dType
= 0; break;
2627 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2630 subOp
= insn
->subOp
;
2632 emitInsn (0xed000000);
2635 emitField(0x34, 4, subOp
);
2636 emitField(0x31, 3, dType
);
2637 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2638 emitGPR (0x14, insn
->src(1));
2639 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2640 emitGPR (0x00, insn
->def(0));
2644 CodeEmitterGM107::emitATOMS()
2646 unsigned dType
, subOp
;
2648 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2649 switch (insn
->dType
) {
2650 case TYPE_U32
: dType
= 0; break;
2651 case TYPE_U64
: dType
= 1; break;
2652 default: assert(!"unexpected dType"); dType
= 0; break;
2656 emitInsn (0xee000000);
2657 emitField(0x34, 1, dType
);
2659 switch (insn
->dType
) {
2660 case TYPE_U32
: dType
= 0; break;
2661 case TYPE_S32
: dType
= 1; break;
2662 case TYPE_U64
: dType
= 2; break;
2663 case TYPE_S64
: dType
= 3; break;
2664 default: assert(!"unexpected dType"); dType
= 0; break;
2667 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2670 subOp
= insn
->subOp
;
2672 emitInsn (0xec000000);
2673 emitField(0x1c, 3, dType
);
2676 emitField(0x34, 4, subOp
);
2677 emitGPR (0x14, insn
->src(1));
2678 emitADDR (0x08, 0x1e, 22, 2, insn
->src(0));
2679 emitGPR (0x00, insn
->def(0));
2683 CodeEmitterGM107::emitRED()
2687 switch (insn
->dType
) {
2688 case TYPE_U32
: dType
= 0; break;
2689 case TYPE_S32
: dType
= 1; break;
2690 case TYPE_U64
: dType
= 2; break;
2691 case TYPE_F32
: dType
= 3; break;
2692 case TYPE_B128
: dType
= 4; break;
2693 case TYPE_S64
: dType
= 5; break;
2694 default: assert(!"unexpected dType"); dType
= 0; break;
2697 emitInsn (0xebf80000);
2698 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2699 emitField(0x17, 3, insn
->subOp
);
2700 emitField(0x14, 3, dType
);
2701 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2702 emitGPR (0x00, insn
->src(1));
2706 CodeEmitterGM107::emitCCTL()
2709 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2710 emitInsn(0xef600000);
2713 emitInsn(0xef800000);
2716 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2717 emitADDR (0x08, 0x16, width
, 2, insn
->src(0));
2718 emitField(0x00, 4, insn
->subOp
);
2721 /*******************************************************************************
2723 ******************************************************************************/
2726 CodeEmitterGM107::emitPIXLD()
2728 emitInsn (0xefe80000);
2730 emitField(0x1f, 3, insn
->subOp
);
2731 emitGPR (0x08, insn
->src(0));
2732 emitGPR (0x00, insn
->def(0));
2735 /*******************************************************************************
2737 ******************************************************************************/
2740 CodeEmitterGM107::emitTEXs(int pos
)
2742 int src1
= insn
->predSrc
== 1 ? 2 : 1;
2743 if (insn
->srcExists(src1
))
2744 emitGPR(pos
, insn
->src(src1
));
2750 getTEXSMask(uint8_t mask
)
2753 case 0x1: return 0x0;
2754 case 0x2: return 0x1;
2755 case 0x3: return 0x4;
2756 case 0x4: return 0x2;
2757 case 0x7: return 0x0;
2758 case 0x8: return 0x3;
2759 case 0x9: return 0x5;
2760 case 0xa: return 0x6;
2761 case 0xb: return 0x1;
2762 case 0xc: return 0x7;
2763 case 0xd: return 0x2;
2764 case 0xe: return 0x3;
2765 case 0xf: return 0x4;
2767 assert(!"invalid mask");
2773 getTEXSTarget(const TexInstruction
*tex
)
2775 assert(tex
->op
== OP_TEX
|| tex
->op
== OP_TXL
);
2777 switch (tex
->tex
.target
.getEnum()) {
2779 assert(tex
->tex
.levelZero
);
2782 case TEX_TARGET_RECT
:
2783 if (tex
->tex
.levelZero
)
2785 if (tex
->op
== OP_TXL
)
2788 case TEX_TARGET_2D_SHADOW
:
2789 case TEX_TARGET_RECT_SHADOW
:
2790 if (tex
->tex
.levelZero
)
2792 if (tex
->op
== OP_TXL
)
2795 case TEX_TARGET_2D_ARRAY
:
2796 if (tex
->tex
.levelZero
)
2799 case TEX_TARGET_2D_ARRAY_SHADOW
:
2800 assert(tex
->tex
.levelZero
);
2803 if (tex
->tex
.levelZero
)
2805 assert(tex
->op
!= OP_TXL
);
2807 case TEX_TARGET_CUBE
:
2808 assert(!tex
->tex
.levelZero
);
2809 if (tex
->op
== OP_TXL
)
2819 getTLDSTarget(const TexInstruction
*tex
)
2821 switch (tex
->tex
.target
.getEnum()) {
2823 if (tex
->tex
.levelZero
)
2827 case TEX_TARGET_RECT
:
2828 if (tex
->tex
.levelZero
)
2829 return tex
->tex
.useOffsets
? 0x4 : 0x2;
2830 return tex
->tex
.useOffsets
? 0xc : 0x5;
2831 case TEX_TARGET_2D_MS
:
2832 assert(tex
->tex
.levelZero
);
2835 assert(tex
->tex
.levelZero
);
2837 case TEX_TARGET_2D_ARRAY
:
2838 assert(tex
->tex
.levelZero
);
2848 CodeEmitterGM107::emitTEX()
2850 const TexInstruction
*insn
= this->insn
->asTex();
2853 if (!insn
->tex
.levelZero
) {
2855 case OP_TEX
: lodm
= 0; break;
2856 case OP_TXB
: lodm
= 2; break;
2857 case OP_TXL
: lodm
= 3; break;
2859 assert(!"invalid tex op");
2866 if (insn
->tex
.rIndirectSrc
>= 0) {
2867 emitInsn (0xdeb80000);
2868 emitField(0x25, 2, lodm
);
2869 emitField(0x24, 1, insn
->tex
.useOffsets
== 1);
2871 emitInsn (0xc0380000);
2872 emitField(0x37, 2, lodm
);
2873 emitField(0x36, 1, insn
->tex
.useOffsets
== 1);
2874 emitField(0x24, 13, insn
->tex
.r
);
2877 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2878 emitField(0x31, 1, insn
->tex
.liveOnly
);
2879 emitField(0x23, 1, insn
->tex
.derivAll
);
2880 emitField(0x1f, 4, insn
->tex
.mask
);
2881 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2882 insn
->tex
.target
.getDim() - 1);
2883 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2885 emitGPR (0x08, insn
->src(0));
2886 emitGPR (0x00, insn
->def(0));
2890 CodeEmitterGM107::emitTEXS()
2892 const TexInstruction
*insn
= this->insn
->asTex();
2893 assert(!insn
->tex
.derivAll
);
2898 emitInsn (0xd8000000);
2899 emitField(0x35, 4, getTEXSTarget(insn
));
2900 emitField(0x32, 3, getTEXSMask(insn
->tex
.mask
));
2903 emitInsn (0xda000000);
2904 emitField(0x35, 4, getTLDSTarget(insn
));
2905 emitField(0x32, 3, getTEXSMask(insn
->tex
.mask
));
2908 assert(insn
->tex
.useOffsets
!= 4);
2909 emitInsn (0xdf000000);
2910 emitField(0x34, 2, insn
->tex
.gatherComp
);
2911 emitField(0x33, 1, insn
->tex
.useOffsets
== 1);
2912 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2915 unreachable("unknown op in emitTEXS()");
2919 emitField(0x31, 1, insn
->tex
.liveOnly
);
2920 emitField(0x24, 13, insn
->tex
.r
);
2921 if (insn
->defExists(1))
2922 emitGPR(0x1c, insn
->def(1));
2925 if (insn
->srcExists(1))
2926 emitGPR(0x14, insn
->getSrc(1));
2929 emitGPR (0x08, insn
->src(0));
2930 emitGPR (0x00, insn
->def(0));
2934 CodeEmitterGM107::emitTLD()
2936 const TexInstruction
*insn
= this->insn
->asTex();
2938 if (insn
->tex
.rIndirectSrc
>= 0) {
2939 emitInsn (0xdd380000);
2941 emitInsn (0xdc380000);
2942 emitField(0x24, 13, insn
->tex
.r
);
2945 emitField(0x37, 1, insn
->tex
.levelZero
== 0);
2946 emitField(0x32, 1, insn
->tex
.target
.isMS());
2947 emitField(0x31, 1, insn
->tex
.liveOnly
);
2948 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2949 emitField(0x1f, 4, insn
->tex
.mask
);
2950 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2951 insn
->tex
.target
.getDim() - 1);
2952 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2954 emitGPR (0x08, insn
->src(0));
2955 emitGPR (0x00, insn
->def(0));
2959 CodeEmitterGM107::emitTLD4()
2961 const TexInstruction
*insn
= this->insn
->asTex();
2963 if (insn
->tex
.rIndirectSrc
>= 0) {
2964 emitInsn (0xdef80000);
2965 emitField(0x26, 2, insn
->tex
.gatherComp
);
2966 emitField(0x25, 2, insn
->tex
.useOffsets
== 4);
2967 emitField(0x24, 2, insn
->tex
.useOffsets
== 1);
2969 emitInsn (0xc8380000);
2970 emitField(0x38, 2, insn
->tex
.gatherComp
);
2971 emitField(0x37, 2, insn
->tex
.useOffsets
== 4);
2972 emitField(0x36, 2, insn
->tex
.useOffsets
== 1);
2973 emitField(0x24, 13, insn
->tex
.r
);
2976 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2977 emitField(0x31, 1, insn
->tex
.liveOnly
);
2978 emitField(0x23, 1, insn
->tex
.derivAll
);
2979 emitField(0x1f, 4, insn
->tex
.mask
);
2980 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2981 insn
->tex
.target
.getDim() - 1);
2982 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2984 emitGPR (0x08, insn
->src(0));
2985 emitGPR (0x00, insn
->def(0));
2989 CodeEmitterGM107::emitTXD()
2991 const TexInstruction
*insn
= this->insn
->asTex();
2993 if (insn
->tex
.rIndirectSrc
>= 0) {
2994 emitInsn (0xde780000);
2996 emitInsn (0xde380000);
2997 emitField(0x24, 13, insn
->tex
.r
);
3000 emitField(0x31, 1, insn
->tex
.liveOnly
);
3001 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
3002 emitField(0x1f, 4, insn
->tex
.mask
);
3003 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
3004 insn
->tex
.target
.getDim() - 1);
3005 emitField(0x1c, 1, insn
->tex
.target
.isArray());
3007 emitGPR (0x08, insn
->src(0));
3008 emitGPR (0x00, insn
->def(0));
3012 CodeEmitterGM107::emitTMML()
3014 const TexInstruction
*insn
= this->insn
->asTex();
3016 if (insn
->tex
.rIndirectSrc
>= 0) {
3017 emitInsn (0xdf600000);
3019 emitInsn (0xdf580000);
3020 emitField(0x24, 13, insn
->tex
.r
);
3023 emitField(0x31, 1, insn
->tex
.liveOnly
);
3024 emitField(0x23, 1, insn
->tex
.derivAll
);
3025 emitField(0x1f, 4, insn
->tex
.mask
);
3026 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
3027 insn
->tex
.target
.getDim() - 1);
3028 emitField(0x1c, 1, insn
->tex
.target
.isArray());
3030 emitGPR (0x08, insn
->src(0));
3031 emitGPR (0x00, insn
->def(0));
3035 CodeEmitterGM107::emitTXQ()
3037 const TexInstruction
*insn
= this->insn
->asTex();
3040 switch (insn
->tex
.query
) {
3041 case TXQ_DIMS
: type
= 0x01; break;
3042 case TXQ_TYPE
: type
= 0x02; break;
3043 case TXQ_SAMPLE_POSITION
: type
= 0x05; break;
3044 case TXQ_FILTER
: type
= 0x10; break;
3045 case TXQ_LOD
: type
= 0x12; break;
3046 case TXQ_WRAP
: type
= 0x14; break;
3047 case TXQ_BORDER_COLOUR
: type
= 0x16; break;
3049 assert(!"invalid txq query");
3053 if (insn
->tex
.rIndirectSrc
>= 0) {
3054 emitInsn (0xdf500000);
3056 emitInsn (0xdf480000);
3057 emitField(0x24, 13, insn
->tex
.r
);
3060 emitField(0x31, 1, insn
->tex
.liveOnly
);
3061 emitField(0x1f, 4, insn
->tex
.mask
);
3062 emitField(0x16, 6, type
);
3063 emitGPR (0x08, insn
->src(0));
3064 emitGPR (0x00, insn
->def(0));
3068 CodeEmitterGM107::emitDEPBAR()
3070 emitInsn (0xf0f00000);
3071 emitField(0x1d, 1, 1); /* le */
3072 emitField(0x1a, 3, 5);
3073 emitField(0x14, 6, insn
->subOp
);
3074 emitField(0x00, 6, insn
->subOp
);
3077 /*******************************************************************************
3079 ******************************************************************************/
3082 CodeEmitterGM107::emitNOP()
3084 emitInsn(0x50b00000);
3088 CodeEmitterGM107::emitKIL()
3090 emitInsn (0xe3300000);
3091 emitCond5(0x00, CC_TR
);
3095 CodeEmitterGM107::emitOUT()
3097 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
3098 const int emit
= insn
->op
== OP_EMIT
;
3100 switch (insn
->src(1).getFile()) {
3102 emitInsn(0xfbe00000);
3103 emitGPR (0x14, insn
->src(1));
3105 case FILE_IMMEDIATE
:
3106 emitInsn(0xf6e00000);
3107 emitIMMD(0x14, 19, insn
->src(1));
3109 case FILE_MEMORY_CONST
:
3110 emitInsn(0xebe00000);
3111 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
3114 assert(!"bad src1 file");
3118 emitField(0x27, 2, (cut
<< 1) | emit
);
3119 emitGPR (0x08, insn
->src(0));
3120 emitGPR (0x00, insn
->def(0));
3124 CodeEmitterGM107::emitBAR()
3128 emitInsn (0xf0a80000);
3130 switch (insn
->subOp
) {
3131 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; break;
3132 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x0a; break;
3133 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x12; break;
3134 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x81; break;
3137 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
3141 emitField(0x20, 8, subop
);
3144 if (insn
->src(0).getFile() == FILE_GPR
) {
3145 emitGPR(0x08, insn
->src(0));
3147 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
3149 emitField(0x08, 8, imm
->reg
.data
.u32
);
3150 emitField(0x2b, 1, 1);
3154 if (insn
->src(1).getFile() == FILE_GPR
) {
3155 emitGPR(0x14, insn
->src(1));
3157 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
3159 emitField(0x14, 12, imm
->reg
.data
.u32
);
3160 emitField(0x2c, 1, 1);
3163 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
3164 emitPRED (0x27, insn
->src(2));
3165 emitField(0x2a, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
3167 emitField(0x27, 3, 7);
3172 CodeEmitterGM107::emitMEMBAR()
3174 emitInsn (0xef980000);
3175 emitField(0x08, 2, insn
->subOp
>> 2);
3179 CodeEmitterGM107::emitVOTE()
3181 const ImmediateValue
*imm
;
3185 for (int i
= 0; insn
->defExists(i
); i
++) {
3186 if (insn
->def(i
).getFile() == FILE_GPR
)
3188 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
3192 emitInsn (0x50d80000);
3193 emitField(0x30, 2, insn
->subOp
);
3195 emitGPR (0x00, insn
->def(r
));
3199 emitPRED (0x2d, insn
->def(p
));
3203 switch (insn
->src(0).getFile()) {
3204 case FILE_PREDICATE
:
3205 emitField(0x2a, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
3206 emitPRED (0x27, insn
->src(0));
3208 case FILE_IMMEDIATE
:
3209 imm
= insn
->getSrc(0)->asImm();
3211 u32
= imm
->reg
.data
.u32
;
3212 assert(u32
== 0 || u32
== 1);
3214 emitField(0x2a, 1, u32
== 0);
3217 assert(!"Unhandled src");
3223 CodeEmitterGM107::emitSUTarget()
3225 const TexInstruction
*insn
= this->insn
->asTex();
3228 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
3230 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
3232 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
3234 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
3235 insn
->tex
.target
== TEX_TARGET_RECT
) {
3237 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
3238 insn
->tex
.target
== TEX_TARGET_CUBE
||
3239 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
3241 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
3244 assert(insn
->tex
.target
== TEX_TARGET_1D
);
3246 emitField(0x20, 4, target
);
3250 CodeEmitterGM107::emitSUHandle(const int s
)
3252 const TexInstruction
*insn
= this->insn
->asTex();
3254 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
3256 if (insn
->src(s
).getFile() == FILE_GPR
) {
3257 emitGPR(0x27, insn
->src(s
));
3259 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
3261 emitField(0x33, 1, 1);
3262 emitField(0x24, 13, imm
->reg
.data
.u32
);
3267 CodeEmitterGM107::emitSUSTx()
3269 const TexInstruction
*insn
= this->insn
->asTex();
3271 emitInsn(0xeb200000);
3272 if (insn
->op
== OP_SUSTB
)
3273 emitField(0x34, 1, 1);
3277 emitField(0x14, 4, 0xf); // rgba
3278 emitGPR (0x08, insn
->src(0));
3279 emitGPR (0x00, insn
->src(1));
3285 CodeEmitterGM107::emitSULDx()
3287 const TexInstruction
*insn
= this->insn
->asTex();
3290 emitInsn(0xeb000000);
3291 if (insn
->op
== OP_SULDB
)
3292 emitField(0x34, 1, 1);
3295 switch (insn
->dType
) {
3296 case TYPE_S8
: type
= 1; break;
3297 case TYPE_U16
: type
= 2; break;
3298 case TYPE_S16
: type
= 3; break;
3299 case TYPE_U32
: type
= 4; break;
3300 case TYPE_U64
: type
= 5; break;
3301 case TYPE_B128
: type
= 6; break;
3303 assert(insn
->dType
== TYPE_U8
);
3307 emitField(0x14, 3, type
);
3308 emitGPR (0x00, insn
->def(0));
3309 emitGPR (0x08, insn
->src(0));
3315 CodeEmitterGM107::emitSUREDx()
3317 const TexInstruction
*insn
= this->insn
->asTex();
3318 uint8_t type
= 0, subOp
;
3320 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
3321 emitInsn(0xeac00000);
3323 emitInsn(0xea600000);
3325 if (insn
->op
== OP_SUREDB
)
3326 emitField(0x34, 1, 1);
3330 switch (insn
->dType
) {
3331 case TYPE_S32
: type
= 1; break;
3332 case TYPE_U64
: type
= 2; break;
3333 case TYPE_F32
: type
= 3; break;
3334 case TYPE_S64
: type
= 5; break;
3336 assert(insn
->dType
== TYPE_U32
);
3341 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
3343 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
3346 subOp
= insn
->subOp
;
3349 emitField(0x24, 3, type
);
3350 emitField(0x1d, 4, subOp
);
3351 emitGPR (0x14, insn
->src(1));
3352 emitGPR (0x08, insn
->src(0));
3353 emitGPR (0x00, insn
->def(0));
3358 /*******************************************************************************
3359 * assembler front-end
3360 ******************************************************************************/
3363 CodeEmitterGM107::emitInstruction(Instruction
*i
)
3365 const unsigned int size
= (writeIssueDelays
&& !(codeSize
& 0x1f)) ? 16 : 8;
3370 if (insn
->encSize
!= 8) {
3371 ERROR("skipping undecodable instruction: "); insn
->print();
3374 if (codeSize
+ size
> codeSizeLimit
) {
3375 ERROR("code emitter output buffer too small\n");
3379 if (writeIssueDelays
) {
3380 int n
= ((codeSize
& 0x1f) / 8) - 1;
3383 data
[0] = 0x00000000;
3384 data
[1] = 0x00000000;
3390 emitField(data
, n
* 21, 21, insn
->sched
);
3437 if (targGM107
->isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
))
3449 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
3450 insn
->src(0).getFile() == FILE_PREDICATE
)) {
3452 } else if (isFloatType(insn
->dType
)) {
3453 if (isFloatType(insn
->sType
))
3458 if (isFloatType(insn
->sType
))
3469 if (isFloatType(insn
->dType
)) {
3470 if (insn
->dType
== TYPE_F64
)
3479 if (isFloatType(insn
->dType
)) {
3480 if (insn
->dType
== TYPE_F64
)
3490 if (isFloatType(insn
->dType
)) {
3491 if (insn
->dType
== TYPE_F64
)
3507 if (isFloatType(insn
->dType
)) {
3508 if (insn
->dType
== TYPE_F64
)
3517 if (typeSizeof(insn
->sType
) == 8)
3523 if (typeSizeof(insn
->sType
) == 8)
3541 if (isFloatType(insn
->dType
))
3550 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
3551 if (isFloatType(insn
->sType
))
3552 if (insn
->sType
== TYPE_F64
)
3559 if (isFloatType(insn
->sType
))
3560 if (insn
->sType
== TYPE_F64
)
3587 switch (insn
->def(0).getFile()) {
3588 case FILE_GPR
: emitLOP(); break;
3589 case FILE_PREDICATE
: emitPSETP(); break;
3591 assert(!"invalid bool op");
3598 switch (insn
->src(0).getFile()) {
3599 case FILE_MEMORY_CONST
: emitLDC(); break;
3600 case FILE_MEMORY_LOCAL
: emitLDL(); break;
3601 case FILE_MEMORY_SHARED
: emitLDS(); break;
3602 case FILE_MEMORY_GLOBAL
: emitLD(); break;
3604 assert(!"invalid load");
3610 switch (insn
->src(0).getFile()) {
3611 case FILE_MEMORY_LOCAL
: emitSTL(); break;
3612 case FILE_MEMORY_SHARED
: emitSTS(); break;
3613 case FILE_MEMORY_GLOBAL
: emitST(); break;
3615 assert(!"invalid store");
3621 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
3624 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
3653 if (insn
->asTex()->tex
.scalar
)
3662 if (insn
->asTex()->tex
.scalar
)
3668 if (insn
->asTex()->tex
.scalar
)
3720 assert(!"invalid opcode");
3736 CodeEmitterGM107::getMinEncodingSize(const Instruction
*i
) const
3741 /*******************************************************************************
3742 * sched data calculator
3743 ******************************************************************************/
3745 class SchedDataCalculatorGM107
: public Pass
3748 SchedDataCalculatorGM107(const TargetGM107
*targ
) : targ(targ
) {}
3760 void rebase(const int base
)
3762 const int delta
= this->base
- base
;
3767 for (int i
= 0; i
< 256; ++i
) {
3771 for (int i
= 0; i
< 8; ++i
) {
3780 memset(&rd
, 0, sizeof(rd
));
3781 memset(&wr
, 0, sizeof(wr
));
3783 int getLatest(const ScoreData
& d
) const
3786 for (int i
= 0; i
< 256; ++i
)
3789 for (int i
= 0; i
< 8; ++i
)
3796 inline int getLatestRd() const
3798 return getLatest(rd
);
3800 inline int getLatestWr() const
3802 return getLatest(wr
);
3804 inline int getLatest() const
3806 return MAX2(getLatestRd(), getLatestWr());
3808 void setMax(const RegScores
*that
)
3810 for (int i
= 0; i
< 256; ++i
) {
3811 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3812 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3814 for (int i
= 0; i
< 8; ++i
) {
3815 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3816 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3818 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3819 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3821 void print(int cycle
)
3823 for (int i
= 0; i
< 256; ++i
) {
3824 if (rd
.r
[i
] > cycle
)
3825 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3826 if (wr
.r
[i
] > cycle
)
3827 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3829 for (int i
= 0; i
< 8; ++i
) {
3830 if (rd
.p
[i
] > cycle
)
3831 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3832 if (wr
.p
[i
] > cycle
)
3833 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3836 INFO("rd $c @ %i\n", rd
.c
);
3838 INFO("wr $c @ %i\n", wr
.c
);
3842 RegScores
*score
; // for current BB
3843 std::vector
<RegScores
> scoreBoards
;
3845 const TargetGM107
*targ
;
3846 bool visit(Function
*);
3847 bool visit(BasicBlock
*);
3849 void commitInsn(const Instruction
*, int);
3850 int calcDelay(const Instruction
*, int) const;
3851 void setDelay(Instruction
*, int, const Instruction
*);
3852 void recordWr(const Value
*, int, int);
3853 void checkRd(const Value
*, int, int&) const;
3855 inline void emitYield(Instruction
*);
3856 inline void emitStall(Instruction
*, uint8_t);
3857 inline void emitReuse(Instruction
*, uint8_t);
3858 inline void emitWrDepBar(Instruction
*, uint8_t);
3859 inline void emitRdDepBar(Instruction
*, uint8_t);
3860 inline void emitWtDepBar(Instruction
*, uint8_t);
3862 inline int getStall(const Instruction
*) const;
3863 inline int getWrDepBar(const Instruction
*) const;
3864 inline int getRdDepBar(const Instruction
*) const;
3865 inline int getWtDepBar(const Instruction
*) const;
3867 void setReuseFlag(Instruction
*);
3869 inline void printSchedInfo(int, const Instruction
*) const;
3872 LiveBarUse(Instruction
*insn
, Instruction
*usei
)
3873 : insn(insn
), usei(usei
) { }
3879 LiveBarDef(Instruction
*insn
, Instruction
*defi
)
3880 : insn(insn
), defi(defi
) { }
3885 bool insertBarriers(BasicBlock
*);
3887 bool doesInsnWriteTo(const Instruction
*insn
, const Value
*val
) const;
3888 Instruction
*findFirstUse(const Instruction
*) const;
3889 Instruction
*findFirstDef(const Instruction
*) const;
3891 bool needRdDepBar(const Instruction
*) const;
3892 bool needWrDepBar(const Instruction
*) const;
3896 SchedDataCalculatorGM107::emitStall(Instruction
*insn
, uint8_t cnt
)
3903 SchedDataCalculatorGM107::emitYield(Instruction
*insn
)
3905 insn
->sched
|= 1 << 4;
3909 SchedDataCalculatorGM107::emitWrDepBar(Instruction
*insn
, uint8_t id
)
3912 if ((insn
->sched
& 0xe0) == 0xe0)
3913 insn
->sched
^= 0xe0;
3914 insn
->sched
|= id
<< 5;
3918 SchedDataCalculatorGM107::emitRdDepBar(Instruction
*insn
, uint8_t id
)
3921 if ((insn
->sched
& 0x700) == 0x700)
3922 insn
->sched
^= 0x700;
3923 insn
->sched
|= id
<< 8;
3927 SchedDataCalculatorGM107::emitWtDepBar(Instruction
*insn
, uint8_t id
)
3930 insn
->sched
|= 1 << (11 + id
);
3934 SchedDataCalculatorGM107::emitReuse(Instruction
*insn
, uint8_t id
)
3937 insn
->sched
|= 1 << (17 + id
);
3941 SchedDataCalculatorGM107::printSchedInfo(int cycle
,
3942 const Instruction
*insn
) const
3944 uint8_t st
, yl
, wr
, rd
, wt
, ru
;
3946 st
= (insn
->sched
& 0x00000f) >> 0;
3947 yl
= (insn
->sched
& 0x000010) >> 4;
3948 wr
= (insn
->sched
& 0x0000e0) >> 5;
3949 rd
= (insn
->sched
& 0x000700) >> 8;
3950 wt
= (insn
->sched
& 0x01f800) >> 11;
3951 ru
= (insn
->sched
& 0x1e0000) >> 17;
3953 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3954 cycle
, st
, yl
, wr
, rd
, wt
, ru
);
3958 SchedDataCalculatorGM107::getStall(const Instruction
*insn
) const
3960 return insn
->sched
& 0xf;
3964 SchedDataCalculatorGM107::getWrDepBar(const Instruction
*insn
) const
3966 return (insn
->sched
& 0x0000e0) >> 5;
3970 SchedDataCalculatorGM107::getRdDepBar(const Instruction
*insn
) const
3972 return (insn
->sched
& 0x000700) >> 8;
3976 SchedDataCalculatorGM107::getWtDepBar(const Instruction
*insn
) const
3978 return (insn
->sched
& 0x01f800) >> 11;
3981 // Emit the reuse flag which allows to make use of the new memory hierarchy
3982 // introduced since Maxwell, the operand reuse cache.
3984 // It allows to reduce bank conflicts by caching operands. Each time you issue
3985 // an instruction, that flag can tell the hw which operands are going to be
3986 // re-used by the next instruction. Note that the next instruction has to use
3987 // the same GPR id in the same operand slot.
3989 SchedDataCalculatorGM107::setReuseFlag(Instruction
*insn
)
3991 Instruction
*next
= insn
->next
;
3992 BitSet
defs(255, 1);
3994 if (!targ
->isReuseSupported(insn
))
3997 for (int d
= 0; insn
->defExists(d
); ++d
) {
3998 const Value
*def
= insn
->def(d
).rep();
3999 if (insn
->def(d
).getFile() != FILE_GPR
)
4001 if (typeSizeof(insn
->dType
) != 4 || def
->reg
.data
.id
== 255)
4003 defs
.set(def
->reg
.data
.id
);
4006 for (int s
= 0; insn
->srcExists(s
); s
++) {
4007 const Value
*src
= insn
->src(s
).rep();
4008 if (insn
->src(s
).getFile() != FILE_GPR
)
4010 if (typeSizeof(insn
->sType
) != 4 || src
->reg
.data
.id
== 255)
4012 if (defs
.test(src
->reg
.data
.id
))
4014 if (!next
->srcExists(s
) || next
->src(s
).getFile() != FILE_GPR
)
4016 if (src
->reg
.data
.id
!= next
->getSrc(s
)->reg
.data
.id
)
4024 SchedDataCalculatorGM107::recordWr(const Value
*v
, int cycle
, int ready
)
4026 int a
= v
->reg
.data
.id
, b
;
4028 switch (v
->reg
.file
) {
4030 b
= a
+ v
->reg
.size
/ 4;
4031 for (int r
= a
; r
< b
; ++r
)
4032 score
->rd
.r
[r
] = ready
;
4034 case FILE_PREDICATE
:
4035 // To immediately use a predicate set by any instructions, the minimum
4036 // number of stall counts is 13.
4037 score
->rd
.p
[a
] = cycle
+ 13;
4040 score
->rd
.c
= ready
;
4048 SchedDataCalculatorGM107::checkRd(const Value
*v
, int cycle
, int &delay
) const
4050 int a
= v
->reg
.data
.id
, b
;
4053 switch (v
->reg
.file
) {
4055 b
= a
+ v
->reg
.size
/ 4;
4056 for (int r
= a
; r
< b
; ++r
)
4057 ready
= MAX2(ready
, score
->rd
.r
[r
]);
4059 case FILE_PREDICATE
:
4060 ready
= MAX2(ready
, score
->rd
.p
[a
]);
4063 ready
= MAX2(ready
, score
->rd
.c
);
4069 delay
= MAX2(delay
, ready
- cycle
);
4073 SchedDataCalculatorGM107::commitInsn(const Instruction
*insn
, int cycle
)
4075 const int ready
= cycle
+ targ
->getLatency(insn
);
4077 for (int d
= 0; insn
->defExists(d
); ++d
)
4078 recordWr(insn
->getDef(d
), cycle
, ready
);
4080 #ifdef GM107_DEBUG_SCHED_DATA
4081 score
->print(cycle
);
4085 #define GM107_MIN_ISSUE_DELAY 0x1
4086 #define GM107_MAX_ISSUE_DELAY 0xf
4089 SchedDataCalculatorGM107::calcDelay(const Instruction
*insn
, int cycle
) const
4091 int delay
= 0, ready
= cycle
;
4093 for (int s
= 0; insn
->srcExists(s
); ++s
)
4094 checkRd(insn
->getSrc(s
), cycle
, delay
);
4096 // TODO: make use of getReadLatency()!
4098 return MAX2(delay
, ready
- cycle
);
4102 SchedDataCalculatorGM107::setDelay(Instruction
*insn
, int delay
,
4103 const Instruction
*next
)
4105 const OpClass cl
= targ
->getOpClass(insn
->op
);
4108 if (insn
->op
== OP_EXIT
||
4109 insn
->op
== OP_BAR
||
4110 insn
->op
== OP_MEMBAR
) {
4111 delay
= GM107_MAX_ISSUE_DELAY
;
4113 if (insn
->op
== OP_QUADON
||
4114 insn
->op
== OP_QUADPOP
) {
4117 if (cl
== OPCLASS_FLOW
|| insn
->join
) {
4121 if (!next
|| !targ
->canDualIssue(insn
, next
)) {
4122 delay
= CLAMP(delay
, GM107_MIN_ISSUE_DELAY
, GM107_MAX_ISSUE_DELAY
);
4124 delay
= 0x0; // dual-issue
4127 wr
= getWrDepBar(insn
);
4128 rd
= getRdDepBar(insn
);
4130 if (delay
== GM107_MIN_ISSUE_DELAY
&& (wr
& rd
) != 7) {
4131 // Barriers take one additional clock cycle to become active on top of
4132 // the clock consumed by the instruction producing it.
4133 if (!next
|| insn
->bb
!= next
->bb
) {
4136 int wt
= getWtDepBar(next
);
4137 if ((wt
& (1 << wr
)) | (wt
& (1 << rd
)))
4142 emitStall(insn
, delay
);
4146 // Return true when the given instruction needs to emit a read dependency
4147 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4148 // setting the maximum number of stall counts is not enough.
4150 SchedDataCalculatorGM107::needRdDepBar(const Instruction
*insn
) const
4152 BitSet
srcs(255, 1), defs(255, 1);
4155 if (!targ
->isBarrierRequired(insn
))
4158 // Do not emit a read dependency barrier when the instruction doesn't use
4159 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4160 for (int s
= 0; insn
->srcExists(s
); ++s
) {
4161 const Value
*src
= insn
->src(s
).rep();
4162 if (insn
->src(s
).getFile() != FILE_GPR
)
4164 if (src
->reg
.data
.id
== 255)
4167 a
= src
->reg
.data
.id
;
4168 b
= a
+ src
->reg
.size
/ 4;
4169 for (int r
= a
; r
< b
; ++r
)
4173 if (!srcs
.popCount())
4176 // Do not emit a read dependency barrier when the output GPRs are equal to
4177 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4178 // be produced and WaR hazards are prevented.
4179 for (int d
= 0; insn
->defExists(d
); ++d
) {
4180 const Value
*def
= insn
->def(d
).rep();
4181 if (insn
->def(d
).getFile() != FILE_GPR
)
4183 if (def
->reg
.data
.id
== 255)
4186 a
= def
->reg
.data
.id
;
4187 b
= a
+ def
->reg
.size
/ 4;
4188 for (int r
= a
; r
< b
; ++r
)
4193 if (!srcs
.popCount())
4199 // Return true when the given instruction needs to emit a write dependency
4200 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4201 // setting the maximum number of stall counts is not enough. This is only legal
4202 // if the instruction output something.
4204 SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn
) const
4206 if (!targ
->isBarrierRequired(insn
))
4209 for (int d
= 0; insn
->defExists(d
); ++d
) {
4210 if (insn
->def(d
).getFile() == FILE_GPR
||
4211 insn
->def(d
).getFile() == FILE_FLAGS
||
4212 insn
->def(d
).getFile() == FILE_PREDICATE
)
4218 // Helper function for findFirstUse() and findFirstDef()
4220 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction
*insn
,
4221 const Value
*val
) const
4223 if (val
->reg
.file
!= FILE_GPR
&&
4224 val
->reg
.file
!= FILE_PREDICATE
&&
4225 val
->reg
.file
!= FILE_FLAGS
)
4228 for (int d
= 0; insn
->defExists(d
); ++d
) {
4229 const Value
* def
= insn
->getDef(d
);
4230 int minGPR
= def
->reg
.data
.id
;
4231 int maxGPR
= minGPR
+ def
->reg
.size
/ 4 - 1;
4233 if (def
->reg
.file
!= val
->reg
.file
)
4236 if (def
->reg
.file
== FILE_GPR
) {
4237 if (val
->reg
.data
.id
+ val
->reg
.size
/ 4 - 1 < minGPR
||
4238 val
->reg
.data
.id
> maxGPR
)
4242 if (def
->reg
.file
== FILE_PREDICATE
) {
4243 if (val
->reg
.data
.id
!= minGPR
)
4247 if (def
->reg
.file
== FILE_FLAGS
) {
4248 if (val
->reg
.data
.id
!= minGPR
)
4257 // Find the next instruction inside the same basic block which uses (reads or
4258 // writes from) the output of the given instruction in order to avoid RaW and
4261 SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari
) const
4263 Instruction
*insn
, *next
;
4265 if (!bari
->defExists(0))
4268 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
4271 for (int s
= 0; insn
->srcExists(s
); ++s
)
4272 if (doesInsnWriteTo(bari
, insn
->getSrc(s
)))
4275 for (int d
= 0; insn
->defExists(d
); ++d
)
4276 if (doesInsnWriteTo(bari
, insn
->getDef(d
)))
4282 // Find the next instruction inside the same basic block which overwrites, at
4283 // least, one source of the given instruction in order to avoid WaR hazards.
4285 SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari
) const
4287 Instruction
*insn
, *next
;
4289 if (!bari
->srcExists(0))
4292 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
4295 for (int s
= 0; bari
->srcExists(s
); ++s
)
4296 if (doesInsnWriteTo(insn
, bari
->getSrc(s
)))
4302 // Dependency barriers:
4303 // This pass is a bit ugly and could probably be improved by performing a
4304 // better allocation.
4306 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4307 // dependency barriers using the control codes.
4309 SchedDataCalculatorGM107::insertBarriers(BasicBlock
*bb
)
4311 std::list
<LiveBarUse
> live_uses
;
4312 std::list
<LiveBarDef
> live_defs
;
4313 Instruction
*insn
, *next
;
4317 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4318 Instruction
*usei
= NULL
, *defi
= NULL
;
4319 bool need_wr_bar
, need_rd_bar
;
4323 // Expire old barrier uses.
4324 for (std::list
<LiveBarUse
>::iterator it
= live_uses
.begin();
4325 it
!= live_uses
.end();) {
4326 if (insn
->serial
>= it
->usei
->serial
) {
4327 int wr
= getWrDepBar(it
->insn
);
4328 emitWtDepBar(insn
, wr
);
4329 bars
.clr(wr
); // free barrier
4330 it
= live_uses
.erase(it
);
4336 // Expire old barrier defs.
4337 for (std::list
<LiveBarDef
>::iterator it
= live_defs
.begin();
4338 it
!= live_defs
.end();) {
4339 if (insn
->serial
>= it
->defi
->serial
) {
4340 int rd
= getRdDepBar(it
->insn
);
4341 emitWtDepBar(insn
, rd
);
4342 bars
.clr(rd
); // free barrier
4343 it
= live_defs
.erase(it
);
4349 need_wr_bar
= needWrDepBar(insn
);
4350 need_rd_bar
= needRdDepBar(insn
);
4353 // When the instruction requires to emit a write dependency barrier
4354 // (all which write something at a variable latency), find the next
4355 // instruction which reads the outputs (or writes to them, potentially
4356 // completing before this insn.
4357 usei
= findFirstUse(insn
);
4359 // Allocate and emit a new barrier.
4360 bar_id
= bars
.findFreeRange(1);
4364 emitWrDepBar(insn
, bar_id
);
4366 live_uses
.push_back(LiveBarUse(insn
, usei
));
4370 // When the instruction requires to emit a read dependency barrier
4371 // (all which read something at a variable latency), find the next
4372 // instruction which will write the inputs.
4373 defi
= findFirstDef(insn
);
4375 if (usei
&& defi
&& usei
->serial
<= defi
->serial
)
4378 // Allocate and emit a new barrier.
4379 bar_id
= bars
.findFreeRange(1);
4383 emitRdDepBar(insn
, bar_id
);
4385 live_defs
.push_back(LiveBarDef(insn
, defi
));
4389 // Remove unnecessary barrier waits.
4390 BitSet
alive_bars(6, 1);
4391 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4396 wr
= getWrDepBar(insn
);
4397 rd
= getRdDepBar(insn
);
4398 wt
= getWtDepBar(insn
);
4400 for (int idx
= 0; idx
< 6; ++idx
) {
4401 if (!(wt
& (1 << idx
)))
4403 if (!alive_bars
.test(idx
)) {
4404 insn
->sched
&= ~(1 << (11 + idx
));
4406 alive_bars
.clr(idx
);
4420 SchedDataCalculatorGM107::visit(Function
*func
)
4424 func
->orderInstructions(insns
);
4426 scoreBoards
.resize(func
->cfg
.getSize());
4427 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
4428 scoreBoards
[i
].wipe();
4433 SchedDataCalculatorGM107::visit(BasicBlock
*bb
)
4435 Instruction
*insn
, *next
= NULL
;
4438 for (Instruction
*insn
= bb
->getEntry(); insn
; insn
= insn
->next
) {
4440 insn
->sched
= 0x7e0;
4443 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4446 // Insert read/write dependency barriers for instructions which don't
4447 // operate at a fixed latency.
4450 score
= &scoreBoards
.at(bb
->getId());
4452 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
4453 // back branches will wait until all target dependencies are satisfied
4454 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
4456 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
4457 score
->setMax(&scoreBoards
.at(in
->getId()));
4460 #ifdef GM107_DEBUG_SCHED_DATA
4461 INFO("=== BB:%i initial scores\n", bb
->getId());
4462 score
->print(cycle
);
4465 // Because barriers are allocated locally (intra-BB), we have to make sure
4466 // that all produced barriers have been consumed before entering inside a
4467 // new basic block. The best way is to do a global allocation pre RA but
4468 // it's really more difficult, especially because of the phi nodes. Anyways,
4469 // it seems like that waiting on a barrier which has already been consumed
4470 // doesn't add any additional cost, it's just not elegant!
4471 Instruction
*start
= bb
->getEntry();
4472 if (start
&& bb
->cfg
.incidentCount() > 0) {
4473 for (int b
= 0; b
< 6; b
++)
4474 emitWtDepBar(start
, b
);
4477 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
4480 commitInsn(insn
, cycle
);
4481 int delay
= calcDelay(next
, cycle
);
4482 setDelay(insn
, delay
, next
);
4483 cycle
+= getStall(insn
);
4487 // XXX: The yield flag seems to destroy a bunch of things when it is
4488 // set on every instruction, need investigation.
4491 #ifdef GM107_DEBUG_SCHED_DATA
4492 printSchedInfo(cycle
, insn
);
4500 commitInsn(insn
, cycle
);
4504 #ifdef GM107_DEBUG_SCHED_DATA
4505 fprintf(stderr
, "last instruction is : ");
4507 fprintf(stderr
, "cycle=%d\n", cycle
);
4510 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
4511 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
4513 if (ei
.getType() != Graph::Edge::BACK
) {
4514 // Only test the first instruction of the outgoing block.
4515 next
= out
->getEntry();
4517 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
4519 // When the outgoing BB is empty, make sure to set the number of
4520 // stall counts needed by the instruction because we don't know the
4521 // next instruction.
4522 bbDelay
= MAX2(bbDelay
, targ
->getLatency(insn
));
4525 // Wait until all dependencies are satisfied.
4526 const int regsFree
= score
->getLatest();
4527 next
= out
->getFirst();
4528 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
4529 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
4530 c
+= getStall(next
);
4535 if (bb
->cfg
.outgoingCount() != 1)
4537 setDelay(insn
, bbDelay
, next
);
4538 cycle
+= getStall(insn
);
4540 score
->rebase(cycle
); // common base for initializing out blocks' scores
4544 /*******************************************************************************
4546 ******************************************************************************/
4549 CodeEmitterGM107::prepareEmission(Function
*func
)
4551 SchedDataCalculatorGM107
sched(targGM107
);
4552 CodeEmitter::prepareEmission(func
);
4553 sched
.run(func
, true, true);
4556 static inline uint32_t sizeToBundlesGM107(uint32_t size
)
4558 return (size
+ 23) / 24;
4562 CodeEmitterGM107::prepareEmission(Program
*prog
)
4564 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
4565 !fi
.end(); fi
.next()) {
4566 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
4567 func
->binPos
= prog
->binSize
;
4568 prepareEmission(func
);
4570 // adjust sizes & positions for schedulding info:
4571 if (prog
->getTarget()->hasSWSched
) {
4572 uint32_t adjPos
= func
->binPos
;
4573 BasicBlock
*bb
= NULL
;
4574 for (int i
= 0; i
< func
->bbCount
; ++i
) {
4575 bb
= func
->bbArray
[i
];
4576 int32_t adjSize
= bb
->binSize
;
4578 adjSize
-= 32 - adjPos
% 32;
4582 adjSize
= bb
->binSize
+ sizeToBundlesGM107(adjSize
) * 8;
4583 bb
->binPos
= adjPos
;
4584 bb
->binSize
= adjSize
;
4588 func
->binSize
= adjPos
- func
->binPos
;
4591 prog
->binSize
+= func
->binSize
;
4595 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107
*target
)
4596 : CodeEmitter(target
),
4598 writeIssueDelays(target
->hasSWSched
)
4601 codeSize
= codeSizeLimit
= 0;
4606 TargetGM107::createCodeEmitterGM107(Program::Type type
)
4608 CodeEmitterGM107
*emit
= new CodeEmitterGM107(this);
4609 emit
->setProgramType(type
);
4613 } // namespace nv50_ir