2 * Copyright 2014 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
25 #include "codegen/nv50_ir_target_gm107.h"
27 //#define GM107_DEBUG_SCHED_DATA
31 class CodeEmitterGM107
: public CodeEmitter
34 CodeEmitterGM107(const TargetGM107
*);
36 virtual bool emitInstruction(Instruction
*);
37 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
39 virtual void prepareEmission(Program
*);
40 virtual void prepareEmission(Function
*);
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
45 const TargetGM107
*targGM107
;
47 Program::Type progType
;
49 const Instruction
*insn
;
50 const bool writeIssueDelays
;
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b
, int s
, uint32_t v
) { emitField(code
, b
, s
, v
); }
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o
) { emitInsn(o
, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value
*);
61 inline void emitGPR(int pos
) {
62 emitGPR(pos
, (const Value
*)NULL
);
64 inline void emitGPR(int pos
, const ValueRef
&ref
) {
65 emitGPR(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
67 inline void emitGPR(int pos
, const ValueRef
*ref
) {
68 emitGPR(pos
, ref
? ref
->rep() : (const Value
*)NULL
);
70 inline void emitGPR(int pos
, const ValueDef
&def
) {
71 emitGPR(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
73 inline void emitSYS(int, const Value
*);
74 inline void emitSYS(int pos
, const ValueRef
&ref
) {
75 emitSYS(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
77 inline void emitPRED(int, const Value
*);
78 inline void emitPRED(int pos
) {
79 emitPRED(pos
, (const Value
*)NULL
);
81 inline void emitPRED(int pos
, const ValueRef
&ref
) {
82 emitPRED(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
84 inline void emitPRED(int pos
, const ValueDef
&def
) {
85 emitPRED(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
87 inline void emitADDR(int, int, int, int, const ValueRef
&);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef
&);
89 inline bool longIMMD(const ValueRef
&);
90 inline void emitIMMD(int, int, const ValueRef
&);
92 void emitCond3(int, CondCode
);
93 void emitCond4(int, CondCode
);
94 void emitCond5(int pos
, CondCode cc
) { emitCond4(pos
, cc
); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef
&);
101 inline void emitNEG(int, const ValueRef
&);
102 inline void emitNEG2(int, const ValueRef
&, const ValueRef
&);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode
, int);
105 inline void emitRND(int pos
) {
106 emitRND(pos
, insn
->rnd
, -1);
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef
&);
170 void emitLDSTs(int, DataType
);
210 void emitSUHandle(const int s
);
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
221 CodeEmitterGM107::emitField(uint32_t *data
, int b
, int s
, uint32_t v
)
224 uint32_t m
= ((1ULL << s
) - 1);
225 uint64_t d
= (uint64_t)(v
& m
) << b
;
226 assert(!(v
& ~m
) || (v
& ~m
) == ~m
);
233 CodeEmitterGM107::emitPred()
235 if (insn
->predSrc
>= 0) {
236 emitField(16, 3, insn
->getSrc(insn
->predSrc
)->rep()->reg
.data
.id
);
237 emitField(19, 1, insn
->cc
== CC_NOT_P
);
244 CodeEmitterGM107::emitInsn(uint32_t hi
, bool pred
)
246 code
[0] = 0x00000000;
253 CodeEmitterGM107::emitGPR(int pos
, const Value
*val
)
255 emitField(pos
, 8, val
&& !val
->inFile(FILE_FLAGS
) ?
256 val
->reg
.data
.id
: 255);
260 CodeEmitterGM107::emitSYS(int pos
, const Value
*val
)
262 int id
= val
? val
->reg
.data
.id
: -1;
265 case SV_LANEID
: id
= 0x00; break;
266 case SV_VERTEX_COUNT
: id
= 0x10; break;
267 case SV_INVOCATION_ID
: id
= 0x11; break;
268 case SV_THREAD_KILL
: id
= 0x13; break;
269 case SV_INVOCATION_INFO
: id
= 0x1d; break;
270 case SV_TID
: id
= 0x21 + val
->reg
.data
.sv
.index
; break;
271 case SV_CTAID
: id
= 0x25 + val
->reg
.data
.sv
.index
; break;
272 case SV_CLOCK
: id
= 0x50 + val
->reg
.data
.sv
.index
; break;
274 assert(!"invalid system value");
279 emitField(pos
, 8, id
);
283 CodeEmitterGM107::emitPRED(int pos
, const Value
*val
)
285 emitField(pos
, 3, val
? val
->reg
.data
.id
: 7);
289 CodeEmitterGM107::emitADDR(int gpr
, int off
, int len
, int shr
,
292 const Value
*v
= ref
.get();
293 assert(!(v
->reg
.data
.offset
& ((1 << shr
) - 1)));
295 emitGPR(gpr
, ref
.getIndirect(0));
296 emitField(off
, len
, v
->reg
.data
.offset
>> shr
);
300 CodeEmitterGM107::emitCBUF(int buf
, int gpr
, int off
, int len
, int shr
,
303 const Value
*v
= ref
.get();
304 const Symbol
*s
= v
->asSym();
306 assert(!(s
->reg
.data
.offset
& ((1 << shr
) - 1)));
308 emitField(buf
, 5, v
->reg
.fileIndex
);
310 emitGPR(gpr
, ref
.getIndirect(0));
311 emitField(off
, 16, s
->reg
.data
.offset
>> shr
);
315 CodeEmitterGM107::longIMMD(const ValueRef
&ref
)
317 if (ref
.getFile() == FILE_IMMEDIATE
) {
318 const ImmediateValue
*imm
= ref
.get()->asImm();
319 if (isFloatType(insn
->sType
)) {
320 if ((imm
->reg
.data
.u32
& 0x00000fff) != 0x00000000)
323 if ((imm
->reg
.data
.u32
& 0xfff00000) != 0x00000000 &&
324 (imm
->reg
.data
.u32
& 0xfff00000) != 0xfff00000)
332 CodeEmitterGM107::emitIMMD(int pos
, int len
, const ValueRef
&ref
)
334 const ImmediateValue
*imm
= ref
.get()->asImm();
335 uint32_t val
= imm
->reg
.data
.u32
;
338 if (insn
->sType
== TYPE_F32
|| insn
->sType
== TYPE_F16
) {
339 assert(!(val
& 0x00000fff));
341 } else if (insn
->sType
== TYPE_F64
) {
342 assert(!(imm
->reg
.data
.u64
& 0x00000fffffffffffULL
));
343 val
= imm
->reg
.data
.u64
>> 44;
345 assert(!(val
& 0xfff00000) || (val
& 0xfff00000) == 0xfff00000);
346 emitField( 56, 1, (val
& 0x80000) >> 19);
347 emitField(pos
, len
, (val
& 0x7ffff));
349 emitField(pos
, len
, val
);
353 /*******************************************************************************
355 ******************************************************************************/
358 CodeEmitterGM107::emitCond3(int pos
, CondCode code
)
363 case CC_FL
: data
= 0x00; break;
365 case CC_LT
: data
= 0x01; break;
367 case CC_EQ
: data
= 0x02; break;
369 case CC_LE
: data
= 0x03; break;
371 case CC_GT
: data
= 0x04; break;
373 case CC_NE
: data
= 0x05; break;
375 case CC_GE
: data
= 0x06; break;
376 case CC_TR
: data
= 0x07; break;
378 assert(!"invalid cond3");
382 emitField(pos
, 3, data
);
386 CodeEmitterGM107::emitCond4(int pos
, CondCode code
)
391 case CC_FL
: data
= 0x00; break;
392 case CC_LT
: data
= 0x01; break;
393 case CC_EQ
: data
= 0x02; break;
394 case CC_LE
: data
= 0x03; break;
395 case CC_GT
: data
= 0x04; break;
396 case CC_NE
: data
= 0x05; break;
397 case CC_GE
: data
= 0x06; break;
398 // case CC_NUM: data = 0x07; break;
399 // case CC_NAN: data = 0x08; break;
400 case CC_LTU
: data
= 0x09; break;
401 case CC_EQU
: data
= 0x0a; break;
402 case CC_LEU
: data
= 0x0b; break;
403 case CC_GTU
: data
= 0x0c; break;
404 case CC_NEU
: data
= 0x0d; break;
405 case CC_GEU
: data
= 0x0e; break;
406 case CC_TR
: data
= 0x0f; break;
408 assert(!"invalid cond4");
412 emitField(pos
, 4, data
);
416 CodeEmitterGM107::emitO(int pos
)
418 emitField(pos
, 1, insn
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
);
422 CodeEmitterGM107::emitP(int pos
)
424 emitField(pos
, 1, insn
->perPatch
);
428 CodeEmitterGM107::emitSAT(int pos
)
430 emitField(pos
, 1, insn
->saturate
);
434 CodeEmitterGM107::emitCC(int pos
)
436 emitField(pos
, 1, insn
->flagsDef
>= 0);
440 CodeEmitterGM107::emitX(int pos
)
442 emitField(pos
, 1, insn
->flagsSrc
>= 0);
446 CodeEmitterGM107::emitABS(int pos
, const ValueRef
&ref
)
448 emitField(pos
, 1, ref
.mod
.abs());
452 CodeEmitterGM107::emitNEG(int pos
, const ValueRef
&ref
)
454 emitField(pos
, 1, ref
.mod
.neg());
458 CodeEmitterGM107::emitNEG2(int pos
, const ValueRef
&a
, const ValueRef
&b
)
460 emitField(pos
, 1, a
.mod
.neg() ^ b
.mod
.neg());
464 CodeEmitterGM107::emitFMZ(int pos
, int len
)
466 emitField(pos
, len
, insn
->dnz
<< 1 | insn
->ftz
);
470 CodeEmitterGM107::emitRND(int rmp
, RoundMode rnd
, int rip
)
474 case ROUND_NI
: ri
= 1;
475 case ROUND_N
: rm
= 0; break;
476 case ROUND_MI
: ri
= 1;
477 case ROUND_M
: rm
= 1; break;
478 case ROUND_PI
: ri
= 1;
479 case ROUND_P
: rm
= 2; break;
480 case ROUND_ZI
: ri
= 1;
481 case ROUND_Z
: rm
= 3; break;
483 assert(!"invalid round mode");
486 emitField(rip
, 1, ri
);
487 emitField(rmp
, 2, rm
);
491 CodeEmitterGM107::emitPDIV(int pos
)
493 assert(insn
->postFactor
>= -3 && insn
->postFactor
<= 3);
494 if (insn
->postFactor
> 0)
495 emitField(pos
, 3, 7 - insn
->postFactor
);
497 emitField(pos
, 3, 0 - insn
->postFactor
);
501 CodeEmitterGM107::emitINV(int pos
, const ValueRef
&ref
)
503 emitField(pos
, 1, !!(ref
.mod
& Modifier(NV50_IR_MOD_NOT
)));
506 /*******************************************************************************
508 ******************************************************************************/
511 CodeEmitterGM107::emitEXIT()
513 emitInsn (0xe3000000);
514 emitCond5(0x00, CC_TR
);
518 CodeEmitterGM107::emitBRA()
520 const FlowInstruction
*insn
= this->insn
->asFlow();
523 if (insn
->indirect
) {
525 emitInsn(0xe2000000); // JMX
527 emitInsn(0xe2500000); // BRX
531 emitInsn(0xe2100000); // JMP
533 emitInsn(0xe2400000); // BRA
534 emitField(0x07, 1, insn
->allWarp
);
537 emitField(0x06, 1, insn
->limit
);
538 emitCond5(0x00, CC_TR
);
540 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
541 int32_t pos
= insn
->target
.bb
->binPos
;
542 if (writeIssueDelays
&& !(pos
& 0x1f))
545 emitField(0x14, 24, pos
- (codeSize
+ 8));
547 emitField(0x14, 32, pos
);
549 emitCBUF (0x24, gpr
, 20, 16, 0, insn
->src(0));
550 emitField(0x05, 1, 1);
555 CodeEmitterGM107::emitCAL()
557 const FlowInstruction
*insn
= this->insn
->asFlow();
559 if (insn
->absolute
) {
560 emitInsn(0xe2200000, 0); // JCAL
562 emitInsn(0xe2600000, 0); // CAL
565 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
567 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
570 int pcAbs
= targGM107
->getBuiltinOffset(insn
->target
.builtin
);
571 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfff00000, 20);
572 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x000fffff, -12);
574 emitField(0x14, 32, insn
->target
.bb
->binPos
);
578 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
579 emitField(0x05, 1, 1);
584 CodeEmitterGM107::emitPCNT()
586 const FlowInstruction
*insn
= this->insn
->asFlow();
588 emitInsn(0xe2b00000, 0);
590 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
591 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
593 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
594 emitField(0x05, 1, 1);
599 CodeEmitterGM107::emitCONT()
601 emitInsn (0xe3500000);
602 emitCond5(0x00, CC_TR
);
606 CodeEmitterGM107::emitPBK()
608 const FlowInstruction
*insn
= this->insn
->asFlow();
610 emitInsn(0xe2a00000, 0);
612 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
613 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
615 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
616 emitField(0x05, 1, 1);
621 CodeEmitterGM107::emitBRK()
623 emitInsn (0xe3400000);
624 emitCond5(0x00, CC_TR
);
628 CodeEmitterGM107::emitPRET()
630 const FlowInstruction
*insn
= this->insn
->asFlow();
632 emitInsn(0xe2700000, 0);
634 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
635 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
637 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
638 emitField(0x05, 1, 1);
643 CodeEmitterGM107::emitRET()
645 emitInsn (0xe3200000);
646 emitCond5(0x00, CC_TR
);
650 CodeEmitterGM107::emitSSY()
652 const FlowInstruction
*insn
= this->insn
->asFlow();
654 emitInsn(0xe2900000, 0);
656 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
657 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
659 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
660 emitField(0x05, 1, 1);
665 CodeEmitterGM107::emitSYNC()
667 emitInsn (0xf0f80000);
668 emitCond5(0x00, CC_TR
);
672 CodeEmitterGM107::emitSAM()
674 emitInsn(0xe3700000, 0);
678 CodeEmitterGM107::emitRAM()
680 emitInsn(0xe3800000, 0);
683 /*******************************************************************************
685 ******************************************************************************/
687 /*******************************************************************************
688 * movement / conversion
689 ******************************************************************************/
692 CodeEmitterGM107::emitMOV()
694 if (insn
->src(0).getFile() != FILE_IMMEDIATE
) {
695 switch (insn
->src(0).getFile()) {
697 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
698 emitInsn(0x5b6a0000);
701 emitInsn(0x5c980000);
703 emitGPR (0x14, insn
->src(0));
705 case FILE_MEMORY_CONST
:
706 emitInsn(0x4c980000);
707 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
710 emitInsn(0x38980000);
711 emitIMMD(0x14, 19, insn
->src(0));
714 emitInsn(0x50880000);
715 emitPRED(0x0c, insn
->src(0));
720 assert(!"bad src file");
723 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
724 insn
->src(0).getFile() != FILE_PREDICATE
)
725 emitField(0x27, 4, insn
->lanes
);
727 emitInsn (0x01000000);
728 emitIMMD (0x14, 32, insn
->src(0));
729 emitField(0x0c, 4, insn
->lanes
);
732 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
734 emitPRED(0x03, insn
->def(0));
737 emitGPR(0x00, insn
->def(0));
742 CodeEmitterGM107::emitS2R()
744 emitInsn(0xf0c80000);
745 emitSYS (0x14, insn
->src(0));
746 emitGPR (0x00, insn
->def(0));
750 CodeEmitterGM107::emitF2F()
752 RoundMode rnd
= insn
->rnd
;
755 case OP_FLOOR
: rnd
= ROUND_MI
; break;
756 case OP_CEIL
: rnd
= ROUND_PI
; break;
757 case OP_TRUNC
: rnd
= ROUND_ZI
; break;
762 switch (insn
->src(0).getFile()) {
764 emitInsn(0x5ca80000);
765 emitGPR (0x14, insn
->src(0));
767 case FILE_MEMORY_CONST
:
768 emitInsn(0x4ca80000);
769 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
772 emitInsn(0x38a80000);
773 emitIMMD(0x14, 19, insn
->src(0));
776 assert(!"bad src0 file");
780 emitField(0x32, 1, (insn
->op
== OP_SAT
) || insn
->saturate
);
781 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
783 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
785 emitField(0x29, 1, insn
->subOp
);
786 emitRND (0x27, rnd
, 0x2a);
787 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
788 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
789 emitGPR (0x00, insn
->def(0));
793 CodeEmitterGM107::emitF2I()
795 RoundMode rnd
= insn
->rnd
;
798 case OP_FLOOR
: rnd
= ROUND_M
; break;
799 case OP_CEIL
: rnd
= ROUND_P
; break;
800 case OP_TRUNC
: rnd
= ROUND_Z
; break;
805 switch (insn
->src(0).getFile()) {
807 emitInsn(0x5cb00000);
808 emitGPR (0x14, insn
->src(0));
810 case FILE_MEMORY_CONST
:
811 emitInsn(0x4cb00000);
812 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
815 emitInsn(0x38b00000);
816 emitIMMD(0x14, 19, insn
->src(0));
819 assert(!"bad src0 file");
823 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
825 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
827 emitRND (0x27, rnd
, 0x2a);
828 emitField(0x0c, 1, isSignedType(insn
->dType
));
829 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
830 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
831 emitGPR (0x00, insn
->def(0));
835 CodeEmitterGM107::emitI2F()
837 RoundMode rnd
= insn
->rnd
;
840 case OP_FLOOR
: rnd
= ROUND_M
; break;
841 case OP_CEIL
: rnd
= ROUND_P
; break;
842 case OP_TRUNC
: rnd
= ROUND_Z
; break;
847 switch (insn
->src(0).getFile()) {
849 emitInsn(0x5cb80000);
850 emitGPR (0x14, insn
->src(0));
852 case FILE_MEMORY_CONST
:
853 emitInsn(0x4cb80000);
854 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
857 emitInsn(0x38b80000);
858 emitIMMD(0x14, 19, insn
->src(0));
861 assert(!"bad src0 file");
865 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
867 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
868 emitField(0x29, 2, insn
->subOp
);
869 emitRND (0x27, rnd
, -1);
870 emitField(0x0d, 1, isSignedType(insn
->sType
));
871 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
872 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
873 emitGPR (0x00, insn
->def(0));
877 CodeEmitterGM107::emitI2I()
879 switch (insn
->src(0).getFile()) {
881 emitInsn(0x5ce00000);
882 emitGPR (0x14, insn
->src(0));
884 case FILE_MEMORY_CONST
:
885 emitInsn(0x4ce00000);
886 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
889 emitInsn(0x38e00000);
890 emitIMMD(0x14, 19, insn
->src(0));
893 assert(!"bad src0 file");
898 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
900 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
901 emitField(0x29, 2, insn
->subOp
);
902 emitField(0x0d, 1, isSignedType(insn
->sType
));
903 emitField(0x0c, 1, isSignedType(insn
->dType
));
904 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
905 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
906 emitGPR (0x00, insn
->def(0));
910 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
912 int loc
= entry
->loc
;
913 if (data
.force_persample_interp
)
914 code
[loc
+ 1] |= 1 << 10;
916 code
[loc
+ 1] &= ~(1 << 10);
920 CodeEmitterGM107::emitSEL()
922 switch (insn
->src(1).getFile()) {
924 emitInsn(0x5ca00000);
925 emitGPR (0x14, insn
->src(1));
927 case FILE_MEMORY_CONST
:
928 emitInsn(0x4ca00000);
929 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
932 emitInsn(0x38a00000);
933 emitIMMD(0x14, 19, insn
->src(1));
936 assert(!"bad src1 file");
940 emitINV (0x2a, insn
->src(2));
941 emitPRED(0x27, insn
->src(2));
942 emitGPR (0x08, insn
->src(0));
943 emitGPR (0x00, insn
->def(0));
945 if (insn
->subOp
== 1) {
946 addInterp(0, 0, selpFlip
);
951 CodeEmitterGM107::emitSHFL()
955 emitInsn (0xef100000);
957 switch (insn
->src(1).getFile()) {
959 emitGPR(0x14, insn
->src(1));
962 emitIMMD(0x14, 5, insn
->src(1));
966 assert(!"invalid src1 file");
970 switch (insn
->src(2).getFile()) {
972 emitGPR(0x27, insn
->src(2));
975 emitIMMD(0x22, 13, insn
->src(2));
979 assert(!"invalid src2 file");
983 if (!insn
->defExists(1))
986 assert(insn
->def(1).getFile() == FILE_PREDICATE
);
987 emitPRED(0x30, insn
->def(1));
990 emitField(0x1e, 2, insn
->subOp
);
991 emitField(0x1c, 2, type
);
992 emitGPR (0x08, insn
->src(0));
993 emitGPR (0x00, insn
->def(0));
996 /*******************************************************************************
998 ******************************************************************************/
1001 CodeEmitterGM107::emitDADD()
1003 switch (insn
->src(1).getFile()) {
1005 emitInsn(0x5c700000);
1006 emitGPR (0x14, insn
->src(1));
1008 case FILE_MEMORY_CONST
:
1009 emitInsn(0x4c700000);
1010 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1012 case FILE_IMMEDIATE
:
1013 emitInsn(0x38700000);
1014 emitIMMD(0x14, 19, insn
->src(1));
1017 assert(!"bad src1 file");
1020 emitABS(0x31, insn
->src(1));
1021 emitNEG(0x30, insn
->src(0));
1023 emitABS(0x2e, insn
->src(0));
1024 emitNEG(0x2d, insn
->src(1));
1026 if (insn
->op
== OP_SUB
)
1027 code
[1] ^= 0x00002000;
1029 emitGPR(0x08, insn
->src(0));
1030 emitGPR(0x00, insn
->def(0));
1034 CodeEmitterGM107::emitDMUL()
1036 switch (insn
->src(1).getFile()) {
1038 emitInsn(0x5c800000);
1039 emitGPR (0x14, insn
->src(1));
1041 case FILE_MEMORY_CONST
:
1042 emitInsn(0x4c800000);
1043 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1045 case FILE_IMMEDIATE
:
1046 emitInsn(0x38800000);
1047 emitIMMD(0x14, 19, insn
->src(1));
1050 assert(!"bad src1 file");
1054 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1057 emitGPR (0x08, insn
->src(0));
1058 emitGPR (0x00, insn
->def(0));
1062 CodeEmitterGM107::emitDFMA()
1064 switch(insn
->src(2).getFile()) {
1066 switch (insn
->src(1).getFile()) {
1068 emitInsn(0x5b700000);
1069 emitGPR (0x14, insn
->src(1));
1071 case FILE_MEMORY_CONST
:
1072 emitInsn(0x4b700000);
1073 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1075 case FILE_IMMEDIATE
:
1076 emitInsn(0x36700000);
1077 emitIMMD(0x14, 19, insn
->src(1));
1080 assert(!"bad src1 file");
1083 emitGPR (0x27, insn
->src(2));
1085 case FILE_MEMORY_CONST
:
1086 emitInsn(0x53700000);
1087 emitGPR (0x27, insn
->src(1));
1088 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1091 assert(!"bad src2 file");
1096 emitNEG (0x31, insn
->src(2));
1097 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1099 emitGPR (0x08, insn
->src(0));
1100 emitGPR (0x00, insn
->def(0));
1104 CodeEmitterGM107::emitDMNMX()
1106 switch (insn
->src(1).getFile()) {
1108 emitInsn(0x5c500000);
1109 emitGPR (0x14, insn
->src(1));
1111 case FILE_MEMORY_CONST
:
1112 emitInsn(0x4c500000);
1113 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1115 case FILE_IMMEDIATE
:
1116 emitInsn(0x38500000);
1117 emitIMMD(0x14, 19, insn
->src(1));
1120 assert(!"bad src1 file");
1124 emitABS (0x31, insn
->src(1));
1125 emitNEG (0x30, insn
->src(0));
1127 emitABS (0x2e, insn
->src(0));
1128 emitNEG (0x2d, insn
->src(1));
1129 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1131 emitGPR (0x08, insn
->src(0));
1132 emitGPR (0x00, insn
->def(0));
1136 CodeEmitterGM107::emitDSET()
1138 const CmpInstruction
*insn
= this->insn
->asCmp();
1140 switch (insn
->src(1).getFile()) {
1142 emitInsn(0x59000000);
1143 emitGPR (0x14, insn
->src(1));
1145 case FILE_MEMORY_CONST
:
1146 emitInsn(0x49000000);
1147 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1149 case FILE_IMMEDIATE
:
1150 emitInsn(0x32000000);
1151 emitIMMD(0x14, 19, insn
->src(1));
1154 assert(!"bad src1 file");
1158 if (insn
->op
!= OP_SET
) {
1160 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1161 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1162 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1164 assert(!"invalid set op");
1167 emitPRED(0x27, insn
->src(2));
1172 emitABS (0x36, insn
->src(0));
1173 emitNEG (0x35, insn
->src(1));
1174 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1175 emitCond4(0x30, insn
->setCond
);
1177 emitABS (0x2c, insn
->src(1));
1178 emitNEG (0x2b, insn
->src(0));
1179 emitGPR (0x08, insn
->src(0));
1180 emitGPR (0x00, insn
->def(0));
1184 CodeEmitterGM107::emitDSETP()
1186 const CmpInstruction
*insn
= this->insn
->asCmp();
1188 switch (insn
->src(1).getFile()) {
1190 emitInsn(0x5b800000);
1191 emitGPR (0x14, insn
->src(1));
1193 case FILE_MEMORY_CONST
:
1194 emitInsn(0x4b800000);
1195 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1197 case FILE_IMMEDIATE
:
1198 emitInsn(0x36800000);
1199 emitIMMD(0x14, 19, insn
->src(1));
1202 assert(!"bad src1 file");
1206 if (insn
->op
!= OP_SET
) {
1208 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1209 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1210 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1212 assert(!"invalid set op");
1215 emitPRED(0x27, insn
->src(2));
1220 emitCond4(0x30, insn
->setCond
);
1221 emitABS (0x2c, insn
->src(1));
1222 emitNEG (0x2b, insn
->src(0));
1223 emitGPR (0x08, insn
->src(0));
1224 emitABS (0x07, insn
->src(0));
1225 emitNEG (0x06, insn
->src(1));
1226 emitPRED (0x03, insn
->def(0));
1227 if (insn
->defExists(1))
1228 emitPRED(0x00, insn
->def(1));
1233 /*******************************************************************************
1235 ******************************************************************************/
1238 CodeEmitterGM107::emitFADD()
1240 if (!longIMMD(insn
->src(1))) {
1241 switch (insn
->src(1).getFile()) {
1243 emitInsn(0x5c580000);
1244 emitGPR (0x14, insn
->src(1));
1246 case FILE_MEMORY_CONST
:
1247 emitInsn(0x4c580000);
1248 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1250 case FILE_IMMEDIATE
:
1251 emitInsn(0x38580000);
1252 emitIMMD(0x14, 19, insn
->src(1));
1255 assert(!"bad src1 file");
1259 emitABS(0x31, insn
->src(1));
1260 emitNEG(0x30, insn
->src(0));
1262 emitABS(0x2e, insn
->src(0));
1263 emitNEG(0x2d, insn
->src(1));
1266 if (insn
->op
== OP_SUB
)
1267 code
[1] ^= 0x00002000;
1269 emitInsn(0x08000000);
1270 emitABS(0x39, insn
->src(1));
1271 emitNEG(0x38, insn
->src(0));
1273 emitABS(0x36, insn
->src(0));
1274 emitNEG(0x35, insn
->src(1));
1276 emitIMMD(0x14, 32, insn
->src(1));
1278 if (insn
->op
== OP_SUB
)
1279 code
[1] ^= 0x00080000;
1282 emitGPR(0x08, insn
->src(0));
1283 emitGPR(0x00, insn
->def(0));
1287 CodeEmitterGM107::emitFMUL()
1289 if (!longIMMD(insn
->src(1))) {
1290 switch (insn
->src(1).getFile()) {
1292 emitInsn(0x5c680000);
1293 emitGPR (0x14, insn
->src(1));
1295 case FILE_MEMORY_CONST
:
1296 emitInsn(0x4c680000);
1297 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1299 case FILE_IMMEDIATE
:
1300 emitInsn(0x38680000);
1301 emitIMMD(0x14, 19, insn
->src(1));
1304 assert(!"bad src1 file");
1308 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1314 emitInsn(0x1e000000);
1318 emitIMMD(0x14, 32, insn
->src(1));
1319 if (insn
->src(0).mod
.neg() ^ insn
->src(1).mod
.neg())
1320 code
[1] ^= 0x00080000; /* flip immd sign bit */
1323 emitGPR(0x08, insn
->src(0));
1324 emitGPR(0x00, insn
->def(0));
1328 CodeEmitterGM107::emitFFMA()
1330 bool isLongIMMD
= false;
1331 switch(insn
->src(2).getFile()) {
1333 switch (insn
->src(1).getFile()) {
1335 emitInsn(0x59800000);
1336 emitGPR (0x14, insn
->src(1));
1338 case FILE_MEMORY_CONST
:
1339 emitInsn(0x49800000);
1340 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1342 case FILE_IMMEDIATE
:
1343 if (longIMMD(insn
->getSrc(1))) {
1344 assert(insn
->getDef(0)->reg
.data
.id
== insn
->getSrc(2)->reg
.data
.id
);
1346 emitInsn(0x0c000000);
1347 emitIMMD(0x14, 32, insn
->src(1));
1349 emitInsn(0x32800000);
1350 emitIMMD(0x14, 19, insn
->src(1));
1354 assert(!"bad src1 file");
1358 emitGPR (0x27, insn
->src(2));
1360 case FILE_MEMORY_CONST
:
1361 emitInsn(0x51800000);
1362 emitGPR (0x27, insn
->src(1));
1363 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1366 assert(!"bad src2 file");
1371 emitNEG (0x39, insn
->src(2));
1372 emitNEG2(0x38, insn
->src(0), insn
->src(1));
1378 emitNEG (0x31, insn
->src(2));
1379 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1384 emitGPR(0x08, insn
->src(0));
1385 emitGPR(0x00, insn
->def(0));
1389 CodeEmitterGM107::emitMUFU()
1394 case OP_COS
: mufu
= 0; break;
1395 case OP_SIN
: mufu
= 1; break;
1396 case OP_EX2
: mufu
= 2; break;
1397 case OP_LG2
: mufu
= 3; break;
1398 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
1399 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
1401 assert(!"invalid mufu");
1405 emitInsn (0x50800000);
1407 emitNEG (0x30, insn
->src(0));
1408 emitABS (0x2e, insn
->src(0));
1409 emitField(0x14, 3, mufu
);
1410 emitGPR (0x08, insn
->src(0));
1411 emitGPR (0x00, insn
->def(0));
1415 CodeEmitterGM107::emitFMNMX()
1417 switch (insn
->src(1).getFile()) {
1419 emitInsn(0x5c600000);
1420 emitGPR (0x14, insn
->src(1));
1422 case FILE_MEMORY_CONST
:
1423 emitInsn(0x4c600000);
1424 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1426 case FILE_IMMEDIATE
:
1427 emitInsn(0x38600000);
1428 emitIMMD(0x14, 19, insn
->src(1));
1431 assert(!"bad src1 file");
1435 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1438 emitABS(0x31, insn
->src(1));
1439 emitNEG(0x30, insn
->src(0));
1441 emitABS(0x2e, insn
->src(0));
1442 emitNEG(0x2d, insn
->src(1));
1444 emitGPR(0x08, insn
->src(0));
1445 emitGPR(0x00, insn
->def(0));
1449 CodeEmitterGM107::emitRRO()
1451 switch (insn
->src(0).getFile()) {
1453 emitInsn(0x5c900000);
1454 emitGPR (0x14, insn
->src(0));
1456 case FILE_MEMORY_CONST
:
1457 emitInsn(0x4c900000);
1458 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1460 case FILE_IMMEDIATE
:
1461 emitInsn(0x38900000);
1462 emitIMMD(0x14, 19, insn
->src(0));
1465 assert(!"bad src file");
1469 emitABS (0x31, insn
->src(0));
1470 emitNEG (0x2d, insn
->src(0));
1471 emitField(0x27, 1, insn
->op
== OP_PREEX2
);
1472 emitGPR (0x00, insn
->def(0));
1476 CodeEmitterGM107::emitFCMP()
1478 const CmpInstruction
*insn
= this->insn
->asCmp();
1479 CondCode cc
= insn
->setCond
;
1481 if (insn
->src(2).mod
.neg())
1482 cc
= reverseCondCode(cc
);
1484 switch(insn
->src(2).getFile()) {
1486 switch (insn
->src(1).getFile()) {
1488 emitInsn(0x5ba00000);
1489 emitGPR (0x14, insn
->src(1));
1491 case FILE_MEMORY_CONST
:
1492 emitInsn(0x4ba00000);
1493 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1495 case FILE_IMMEDIATE
:
1496 emitInsn(0x36a00000);
1497 emitIMMD(0x14, 19, insn
->src(1));
1500 assert(!"bad src1 file");
1503 emitGPR (0x27, insn
->src(2));
1505 case FILE_MEMORY_CONST
:
1506 emitInsn(0x53a00000);
1507 emitGPR (0x27, insn
->src(1));
1508 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1511 assert(!"bad src2 file");
1515 emitCond4(0x30, cc
);
1517 emitGPR (0x08, insn
->src(0));
1518 emitGPR (0x00, insn
->def(0));
1522 CodeEmitterGM107::emitFSET()
1524 const CmpInstruction
*insn
= this->insn
->asCmp();
1526 switch (insn
->src(1).getFile()) {
1528 emitInsn(0x58000000);
1529 emitGPR (0x14, insn
->src(1));
1531 case FILE_MEMORY_CONST
:
1532 emitInsn(0x48000000);
1533 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1535 case FILE_IMMEDIATE
:
1536 emitInsn(0x30000000);
1537 emitIMMD(0x14, 19, insn
->src(1));
1540 assert(!"bad src1 file");
1544 if (insn
->op
!= OP_SET
) {
1546 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1547 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1548 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1550 assert(!"invalid set op");
1553 emitPRED(0x27, insn
->src(2));
1559 emitABS (0x36, insn
->src(0));
1560 emitNEG (0x35, insn
->src(1));
1561 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1562 emitCond4(0x30, insn
->setCond
);
1564 emitABS (0x2c, insn
->src(1));
1565 emitNEG (0x2b, insn
->src(0));
1566 emitGPR (0x08, insn
->src(0));
1567 emitGPR (0x00, insn
->def(0));
1571 CodeEmitterGM107::emitFSETP()
1573 const CmpInstruction
*insn
= this->insn
->asCmp();
1575 switch (insn
->src(1).getFile()) {
1577 emitInsn(0x5bb00000);
1578 emitGPR (0x14, insn
->src(1));
1580 case FILE_MEMORY_CONST
:
1581 emitInsn(0x4bb00000);
1582 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1584 case FILE_IMMEDIATE
:
1585 emitInsn(0x36b00000);
1586 emitIMMD(0x14, 19, insn
->src(1));
1589 assert(!"bad src1 file");
1593 if (insn
->op
!= OP_SET
) {
1595 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1596 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1597 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1599 assert(!"invalid set op");
1602 emitPRED(0x27, insn
->src(2));
1607 emitCond4(0x30, insn
->setCond
);
1609 emitABS (0x2c, insn
->src(1));
1610 emitNEG (0x2b, insn
->src(0));
1611 emitGPR (0x08, insn
->src(0));
1612 emitABS (0x07, insn
->src(0));
1613 emitNEG (0x06, insn
->src(1));
1614 emitPRED (0x03, insn
->def(0));
1615 if (insn
->defExists(1))
1616 emitPRED(0x00, insn
->def(1));
1622 CodeEmitterGM107::emitFSWZADD()
1624 emitInsn (0x50f80000);
1628 emitField(0x26, 1, insn
->lanes
); /* abused for .ndv */
1629 emitField(0x1c, 8, insn
->subOp
);
1630 if (insn
->predSrc
!= 1)
1631 emitGPR (0x14, insn
->src(1));
1634 emitGPR (0x08, insn
->src(0));
1635 emitGPR (0x00, insn
->def(0));
1638 /*******************************************************************************
1640 ******************************************************************************/
1643 CodeEmitterGM107::emitLOP()
1648 case OP_AND
: lop
= 0; break;
1649 case OP_OR
: lop
= 1; break;
1650 case OP_XOR
: lop
= 2; break;
1652 assert(!"invalid lop");
1656 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1657 switch (insn
->src(1).getFile()) {
1659 emitInsn(0x5c400000);
1660 emitGPR (0x14, insn
->src(1));
1662 case FILE_MEMORY_CONST
:
1663 emitInsn(0x4c400000);
1664 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1666 case FILE_IMMEDIATE
:
1667 emitInsn(0x38400000);
1668 emitIMMD(0x14, 19, insn
->src(1));
1671 assert(!"bad src1 file");
1677 emitField(0x29, 2, lop
);
1678 emitINV (0x28, insn
->src(1));
1679 emitINV (0x27, insn
->src(0));
1681 emitInsn (0x04000000);
1683 emitINV (0x38, insn
->src(1));
1684 emitINV (0x37, insn
->src(0));
1685 emitField(0x35, 2, lop
);
1687 emitIMMD (0x14, 32, insn
->src(1));
1690 emitGPR (0x08, insn
->src(0));
1691 emitGPR (0x00, insn
->def(0));
1694 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1696 CodeEmitterGM107::emitNOT()
1698 if (!longIMMD(insn
->src(0))) {
1699 switch (insn
->src(0).getFile()) {
1701 emitInsn(0x5c400700);
1702 emitGPR (0x14, insn
->src(0));
1704 case FILE_MEMORY_CONST
:
1705 emitInsn(0x4c400700);
1706 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1708 case FILE_IMMEDIATE
:
1709 emitInsn(0x38400700);
1710 emitIMMD(0x14, 19, insn
->src(0));
1713 assert(!"bad src1 file");
1718 emitInsn (0x05600000);
1719 emitIMMD (0x14, 32, insn
->src(1));
1723 emitGPR(0x00, insn
->def(0));
1727 CodeEmitterGM107::emitIADD()
1729 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1730 switch (insn
->src(1).getFile()) {
1732 emitInsn(0x5c100000);
1733 emitGPR (0x14, insn
->src(1));
1735 case FILE_MEMORY_CONST
:
1736 emitInsn(0x4c100000);
1737 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1739 case FILE_IMMEDIATE
:
1740 emitInsn(0x38100000);
1741 emitIMMD(0x14, 19, insn
->src(1));
1744 assert(!"bad src1 file");
1748 emitNEG(0x31, insn
->src(0));
1749 emitNEG(0x30, insn
->src(1));
1753 emitInsn(0x1c000000);
1754 emitNEG (0x38, insn
->src(0));
1758 emitIMMD(0x14, 32, insn
->src(1));
1761 if (insn
->op
== OP_SUB
)
1762 code
[1] ^= 0x00010000;
1764 emitGPR(0x08, insn
->src(0));
1765 emitGPR(0x00, insn
->def(0));
1769 CodeEmitterGM107::emitIMUL()
1771 if (insn
->src(1).getFile() != FILE_IMMEDIATE
) {
1772 switch (insn
->src(1).getFile()) {
1774 emitInsn(0x5c380000);
1775 emitGPR (0x14, insn
->src(1));
1777 case FILE_MEMORY_CONST
:
1778 emitInsn(0x4c380000);
1779 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1781 case FILE_IMMEDIATE
:
1782 emitInsn(0x38380000);
1783 emitIMMD(0x14, 19, insn
->src(1));
1786 assert(!"bad src1 file");
1790 emitField(0x29, 1, isSignedType(insn
->sType
));
1791 emitField(0x28, 1, isSignedType(insn
->dType
));
1792 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1794 emitInsn (0x1f000000);
1795 emitField(0x37, 1, isSignedType(insn
->sType
));
1796 emitField(0x36, 1, isSignedType(insn
->dType
));
1797 emitField(0x35, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1799 emitIMMD (0x14, 32, insn
->src(1));
1802 emitGPR(0x08, insn
->src(0));
1803 emitGPR(0x00, insn
->def(0));
1807 CodeEmitterGM107::emitIMAD()
1809 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1810 switch(insn
->src(2).getFile()) {
1812 switch (insn
->src(1).getFile()) {
1814 emitInsn(0x5a000000);
1815 emitGPR (0x14, insn
->src(1));
1817 case FILE_MEMORY_CONST
:
1818 emitInsn(0x4a000000);
1819 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1821 case FILE_IMMEDIATE
:
1822 emitInsn(0x34000000);
1823 emitIMMD(0x14, 19, insn
->src(1));
1826 assert(!"bad src1 file");
1829 emitGPR (0x27, insn
->src(2));
1831 case FILE_MEMORY_CONST
:
1832 emitInsn(0x52000000);
1833 emitGPR (0x27, insn
->src(1));
1834 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1837 assert(!"bad src2 file");
1841 emitField(0x36, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1842 emitField(0x35, 1, isSignedType(insn
->sType
));
1843 emitNEG (0x34, insn
->src(2));
1844 emitNEG2 (0x33, insn
->src(0), insn
->src(1));
1847 emitField(0x30, 1, isSignedType(insn
->dType
));
1849 emitGPR (0x08, insn
->src(0));
1850 emitGPR (0x00, insn
->def(0));
1854 CodeEmitterGM107::emitISCADD()
1856 switch (insn
->src(2).getFile()) {
1858 emitInsn(0x5c180000);
1859 emitGPR (0x14, insn
->src(2));
1861 case FILE_MEMORY_CONST
:
1862 emitInsn(0x4c180000);
1863 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1865 case FILE_IMMEDIATE
:
1866 emitInsn(0x38180000);
1867 emitIMMD(0x14, 19, insn
->src(2));
1870 assert(!"bad src1 file");
1873 emitNEG (0x31, insn
->src(0));
1874 emitNEG (0x30, insn
->src(2));
1876 emitIMMD(0x27, 5, insn
->src(1));
1877 emitGPR (0x08, insn
->src(0));
1878 emitGPR (0x00, insn
->def(0));
1882 CodeEmitterGM107::emitIMNMX()
1884 switch (insn
->src(1).getFile()) {
1886 emitInsn(0x5c200000);
1887 emitGPR (0x14, insn
->src(1));
1889 case FILE_MEMORY_CONST
:
1890 emitInsn(0x4c200000);
1891 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1893 case FILE_IMMEDIATE
:
1894 emitInsn(0x38200000);
1895 emitIMMD(0x14, 19, insn
->src(1));
1898 assert(!"bad src1 file");
1902 emitField(0x30, 1, isSignedType(insn
->dType
));
1904 emitField(0x2b, 2, insn
->subOp
);
1905 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1907 emitGPR (0x08, insn
->src(0));
1908 emitGPR (0x00, insn
->def(0));
1912 CodeEmitterGM107::emitICMP()
1914 const CmpInstruction
*insn
= this->insn
->asCmp();
1915 CondCode cc
= insn
->setCond
;
1917 if (insn
->src(2).mod
.neg())
1918 cc
= reverseCondCode(cc
);
1920 switch(insn
->src(2).getFile()) {
1922 switch (insn
->src(1).getFile()) {
1924 emitInsn(0x5b400000);
1925 emitGPR (0x14, insn
->src(1));
1927 case FILE_MEMORY_CONST
:
1928 emitInsn(0x4b400000);
1929 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1931 case FILE_IMMEDIATE
:
1932 emitInsn(0x36400000);
1933 emitIMMD(0x14, 19, insn
->src(1));
1936 assert(!"bad src1 file");
1939 emitGPR (0x27, insn
->src(2));
1941 case FILE_MEMORY_CONST
:
1942 emitInsn(0x53400000);
1943 emitGPR (0x27, insn
->src(1));
1944 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1947 assert(!"bad src2 file");
1951 emitCond3(0x31, cc
);
1952 emitField(0x30, 1, isSignedType(insn
->sType
));
1953 emitGPR (0x08, insn
->src(0));
1954 emitGPR (0x00, insn
->def(0));
1958 CodeEmitterGM107::emitISET()
1960 const CmpInstruction
*insn
= this->insn
->asCmp();
1962 switch (insn
->src(1).getFile()) {
1964 emitInsn(0x5b500000);
1965 emitGPR (0x14, insn
->src(1));
1967 case FILE_MEMORY_CONST
:
1968 emitInsn(0x4b500000);
1969 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1971 case FILE_IMMEDIATE
:
1972 emitInsn(0x36500000);
1973 emitIMMD(0x14, 19, insn
->src(1));
1976 assert(!"bad src1 file");
1980 if (insn
->op
!= OP_SET
) {
1982 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1983 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1984 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1986 assert(!"invalid set op");
1989 emitPRED(0x27, insn
->src(2));
1994 emitCond3(0x31, insn
->setCond
);
1995 emitField(0x30, 1, isSignedType(insn
->sType
));
1997 emitField(0x2c, 1, insn
->dType
== TYPE_F32
);
1999 emitGPR (0x08, insn
->src(0));
2000 emitGPR (0x00, insn
->def(0));
2004 CodeEmitterGM107::emitISETP()
2006 const CmpInstruction
*insn
= this->insn
->asCmp();
2008 switch (insn
->src(1).getFile()) {
2010 emitInsn(0x5b600000);
2011 emitGPR (0x14, insn
->src(1));
2013 case FILE_MEMORY_CONST
:
2014 emitInsn(0x4b600000);
2015 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2017 case FILE_IMMEDIATE
:
2018 emitInsn(0x36600000);
2019 emitIMMD(0x14, 19, insn
->src(1));
2022 assert(!"bad src1 file");
2026 if (insn
->op
!= OP_SET
) {
2028 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2029 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2030 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2032 assert(!"invalid set op");
2035 emitPRED(0x27, insn
->src(2));
2040 emitCond3(0x31, insn
->setCond
);
2041 emitField(0x30, 1, isSignedType(insn
->sType
));
2043 emitGPR (0x08, insn
->src(0));
2044 emitPRED (0x03, insn
->def(0));
2045 if (insn
->defExists(1))
2046 emitPRED(0x00, insn
->def(1));
2052 CodeEmitterGM107::emitSHL()
2054 switch (insn
->src(1).getFile()) {
2056 emitInsn(0x5c480000);
2057 emitGPR (0x14, insn
->src(1));
2059 case FILE_MEMORY_CONST
:
2060 emitInsn(0x4c480000);
2061 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2063 case FILE_IMMEDIATE
:
2064 emitInsn(0x38480000);
2065 emitIMMD(0x14, 19, insn
->src(1));
2068 assert(!"bad src1 file");
2074 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2075 emitGPR (0x08, insn
->src(0));
2076 emitGPR (0x00, insn
->def(0));
2080 CodeEmitterGM107::emitSHR()
2082 switch (insn
->src(1).getFile()) {
2084 emitInsn(0x5c280000);
2085 emitGPR (0x14, insn
->src(1));
2087 case FILE_MEMORY_CONST
:
2088 emitInsn(0x4c280000);
2089 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2091 case FILE_IMMEDIATE
:
2092 emitInsn(0x38280000);
2093 emitIMMD(0x14, 19, insn
->src(1));
2096 assert(!"bad src1 file");
2100 emitField(0x30, 1, isSignedType(insn
->dType
));
2103 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2104 emitGPR (0x08, insn
->src(0));
2105 emitGPR (0x00, insn
->def(0));
2109 CodeEmitterGM107::emitSHF()
2113 switch (insn
->src(1).getFile()) {
2115 emitInsn(insn
->op
== OP_SHL
? 0x5bf80000 : 0x5cf80000);
2116 emitGPR(0x14, insn
->src(1));
2118 case FILE_IMMEDIATE
:
2119 emitInsn(insn
->op
== OP_SHL
? 0x36f80000 : 0x38f80000);
2120 emitIMMD(0x14, 19, insn
->src(1));
2123 assert(!"bad src1 file");
2127 switch (insn
->sType
) {
2139 emitField(0x32, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
));
2141 emitField(0x30, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_HIGH
));
2143 emitGPR (0x27, insn
->src(2));
2144 emitField(0x25, 2, type
);
2145 emitGPR (0x08, insn
->src(0));
2146 emitGPR (0x00, insn
->def(0));
2150 CodeEmitterGM107::emitPOPC()
2152 switch (insn
->src(0).getFile()) {
2154 emitInsn(0x5c080000);
2155 emitGPR (0x14, insn
->src(0));
2157 case FILE_MEMORY_CONST
:
2158 emitInsn(0x4c080000);
2159 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2161 case FILE_IMMEDIATE
:
2162 emitInsn(0x38080000);
2163 emitIMMD(0x14, 19, insn
->src(0));
2166 assert(!"bad src1 file");
2170 emitINV(0x28, insn
->src(0));
2171 emitGPR(0x00, insn
->def(0));
2175 CodeEmitterGM107::emitBFI()
2177 switch(insn
->src(2).getFile()) {
2179 switch (insn
->src(1).getFile()) {
2181 emitInsn(0x5bf00000);
2182 emitGPR (0x14, insn
->src(1));
2184 case FILE_MEMORY_CONST
:
2185 emitInsn(0x4bf00000);
2186 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2188 case FILE_IMMEDIATE
:
2189 emitInsn(0x36f00000);
2190 emitIMMD(0x14, 19, insn
->src(1));
2193 assert(!"bad src1 file");
2196 emitGPR (0x27, insn
->src(2));
2198 case FILE_MEMORY_CONST
:
2199 emitInsn(0x53f00000);
2200 emitGPR (0x27, insn
->src(1));
2201 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2204 assert(!"bad src2 file");
2209 emitGPR (0x08, insn
->src(0));
2210 emitGPR (0x00, insn
->def(0));
2214 CodeEmitterGM107::emitBFE()
2216 switch (insn
->src(1).getFile()) {
2218 emitInsn(0x5c000000);
2219 emitGPR (0x14, insn
->src(1));
2221 case FILE_MEMORY_CONST
:
2222 emitInsn(0x4c000000);
2223 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2225 case FILE_IMMEDIATE
:
2226 emitInsn(0x38000000);
2227 emitIMMD(0x14, 19, insn
->src(1));
2230 assert(!"bad src1 file");
2234 emitField(0x30, 1, isSignedType(insn
->dType
));
2236 emitField(0x28, 1, insn
->subOp
== NV50_IR_SUBOP_EXTBF_REV
);
2237 emitGPR (0x08, insn
->src(0));
2238 emitGPR (0x00, insn
->def(0));
2242 CodeEmitterGM107::emitFLO()
2244 switch (insn
->src(0).getFile()) {
2246 emitInsn(0x5c300000);
2247 emitGPR (0x14, insn
->src(0));
2249 case FILE_MEMORY_CONST
:
2250 emitInsn(0x4c300000);
2251 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2253 case FILE_IMMEDIATE
:
2254 emitInsn(0x38300000);
2255 emitIMMD(0x14, 19, insn
->src(0));
2258 assert(!"bad src1 file");
2262 emitField(0x30, 1, isSignedType(insn
->dType
));
2264 emitField(0x29, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
2265 emitINV (0x28, insn
->src(0));
2266 emitGPR (0x00, insn
->def(0));
2269 /*******************************************************************************
2271 ******************************************************************************/
2274 CodeEmitterGM107::emitLDSTs(int pos
, DataType type
)
2278 switch (typeSizeof(type
)) {
2279 case 1: data
= isSignedType(type
) ? 1 : 0; break;
2280 case 2: data
= isSignedType(type
) ? 3 : 2; break;
2281 case 4: data
= 4; break;
2282 case 8: data
= 5; break;
2283 case 16: data
= 6; break;
2285 assert(!"bad type");
2289 emitField(pos
, 3, data
);
2293 CodeEmitterGM107::emitLDSTc(int pos
)
2297 switch (insn
->cache
) {
2298 case CACHE_CA
: mode
= 0; break;
2299 case CACHE_CG
: mode
= 1; break;
2300 case CACHE_CS
: mode
= 2; break;
2301 case CACHE_CV
: mode
= 3; break;
2303 assert(!"invalid caching mode");
2307 emitField(pos
, 2, mode
);
2311 CodeEmitterGM107::emitLDC()
2313 emitInsn (0xef900000);
2314 emitLDSTs(0x30, insn
->dType
);
2315 emitField(0x2c, 2, insn
->subOp
);
2316 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn
->src(0));
2317 emitGPR (0x00, insn
->def(0));
2321 CodeEmitterGM107::emitLDL()
2323 emitInsn (0xef400000);
2324 emitLDSTs(0x30, insn
->dType
);
2326 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2327 emitGPR (0x00, insn
->def(0));
2331 CodeEmitterGM107::emitLDS()
2333 emitInsn (0xef480000);
2334 emitLDSTs(0x30, insn
->dType
);
2335 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2336 emitGPR (0x00, insn
->def(0));
2340 CodeEmitterGM107::emitLD()
2342 emitInsn (0x80000000);
2345 emitLDSTs(0x35, insn
->dType
);
2346 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2347 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2348 emitGPR (0x00, insn
->def(0));
2352 CodeEmitterGM107::emitSTL()
2354 emitInsn (0xef500000);
2355 emitLDSTs(0x30, insn
->dType
);
2357 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2358 emitGPR (0x00, insn
->src(1));
2362 CodeEmitterGM107::emitSTS()
2364 emitInsn (0xef580000);
2365 emitLDSTs(0x30, insn
->dType
);
2366 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2367 emitGPR (0x00, insn
->src(1));
2371 CodeEmitterGM107::emitST()
2373 emitInsn (0xa0000000);
2376 emitLDSTs(0x35, insn
->dType
);
2377 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2378 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2379 emitGPR (0x00, insn
->src(1));
2383 CodeEmitterGM107::emitALD()
2385 emitInsn (0xefd80000);
2386 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2387 emitGPR (0x27, insn
->src(0).getIndirect(1));
2390 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2391 emitGPR (0x00, insn
->def(0));
2395 CodeEmitterGM107::emitAST()
2397 emitInsn (0xeff00000);
2398 emitField(0x2f, 2, (typeSizeof(insn
->dType
) / 4) - 1);
2399 emitGPR (0x27, insn
->src(0).getIndirect(1));
2401 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2402 emitGPR (0x00, insn
->src(1));
2406 CodeEmitterGM107::emitISBERD()
2408 emitInsn(0xefd00000);
2409 emitGPR (0x08, insn
->src(0));
2410 emitGPR (0x00, insn
->def(0));
2414 CodeEmitterGM107::emitAL2P()
2416 emitInsn (0xefa00000);
2417 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2420 emitField(0x14, 11, insn
->src(0).get()->reg
.data
.offset
);
2421 emitGPR (0x08, insn
->src(0).getIndirect(0));
2422 emitGPR (0x00, insn
->def(0));
2426 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
2428 int ipa
= entry
->ipa
;
2429 int reg
= entry
->reg
;
2430 int loc
= entry
->loc
;
2432 if (data
.flatshade
&&
2433 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
2434 ipa
= NV50_IR_INTERP_FLAT
;
2436 } else if (data
.force_persample_interp
&&
2437 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
2438 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
2439 ipa
|= NV50_IR_INTERP_CENTROID
;
2441 code
[loc
+ 1] &= ~(0xf << 0x14);
2442 code
[loc
+ 1] |= (ipa
& 0x3) << 0x16;
2443 code
[loc
+ 1] |= (ipa
& 0xc) << (0x14 - 2);
2444 code
[loc
+ 0] &= ~(0xff << 0x14);
2445 code
[loc
+ 0] |= reg
<< 0x14;
2449 CodeEmitterGM107::emitIPA()
2451 int ipam
= 0, ipas
= 0;
2453 switch (insn
->getInterpMode()) {
2454 case NV50_IR_INTERP_LINEAR
: ipam
= 0; break;
2455 case NV50_IR_INTERP_PERSPECTIVE
: ipam
= 1; break;
2456 case NV50_IR_INTERP_FLAT
: ipam
= 2; break;
2457 case NV50_IR_INTERP_SC
: ipam
= 3; break;
2459 assert(!"invalid ipa mode");
2463 switch (insn
->getSampleMode()) {
2464 case NV50_IR_INTERP_DEFAULT
: ipas
= 0; break;
2465 case NV50_IR_INTERP_CENTROID
: ipas
= 1; break;
2466 case NV50_IR_INTERP_OFFSET
: ipas
= 2; break;
2468 assert(!"invalid ipa sample mode");
2472 emitInsn (0xe0000000);
2473 emitField(0x36, 2, ipam
);
2474 emitField(0x34, 2, ipas
);
2476 emitField(0x2f, 3, 7);
2477 emitADDR (0x08, 0x1c, 10, 0, insn
->src(0));
2478 if ((code
[0] & 0x0000ff00) != 0x0000ff00)
2479 code
[1] |= 0x00000040; /* .idx */
2480 emitGPR(0x00, insn
->def(0));
2482 if (insn
->op
== OP_PINTERP
) {
2483 emitGPR(0x14, insn
->src(1));
2484 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2485 emitGPR(0x27, insn
->src(2));
2486 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, interpApply
);
2488 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2489 emitGPR(0x27, insn
->src(1));
2491 addInterp(insn
->ipa
, 0xff, interpApply
);
2494 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
)
2499 CodeEmitterGM107::emitATOM()
2501 unsigned dType
, subOp
;
2503 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2504 switch (insn
->dType
) {
2505 case TYPE_U32
: dType
= 0; break;
2506 case TYPE_U64
: dType
= 1; break;
2507 default: assert(!"unexpected dType"); dType
= 0; break;
2511 emitInsn (0xee000000);
2513 switch (insn
->dType
) {
2514 case TYPE_U32
: dType
= 0; break;
2515 case TYPE_S32
: dType
= 1; break;
2516 case TYPE_U64
: dType
= 2; break;
2517 case TYPE_F32
: dType
= 3; break;
2518 case TYPE_B128
: dType
= 4; break;
2519 case TYPE_S64
: dType
= 5; break;
2520 default: assert(!"unexpected dType"); dType
= 0; break;
2522 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2525 subOp
= insn
->subOp
;
2527 emitInsn (0xed000000);
2530 emitField(0x34, 4, subOp
);
2531 emitField(0x31, 3, dType
);
2532 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2533 emitGPR (0x14, insn
->src(1));
2534 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2535 emitGPR (0x00, insn
->def(0));
2539 CodeEmitterGM107::emitATOMS()
2541 unsigned dType
, subOp
;
2543 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2544 switch (insn
->dType
) {
2545 case TYPE_U32
: dType
= 0; break;
2546 case TYPE_U64
: dType
= 1; break;
2547 default: assert(!"unexpected dType"); dType
= 0; break;
2551 emitInsn (0xee000000);
2552 emitField(0x34, 1, dType
);
2554 switch (insn
->dType
) {
2555 case TYPE_U32
: dType
= 0; break;
2556 case TYPE_S32
: dType
= 1; break;
2557 case TYPE_U64
: dType
= 2; break;
2558 case TYPE_S64
: dType
= 3; break;
2559 default: assert(!"unexpected dType"); dType
= 0; break;
2562 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2565 subOp
= insn
->subOp
;
2567 emitInsn (0xec000000);
2568 emitField(0x1c, 3, dType
);
2571 emitField(0x34, 4, subOp
);
2572 emitGPR (0x14, insn
->src(1));
2573 emitADDR (0x08, 0x1e, 22, 2, insn
->src(0));
2574 emitGPR (0x00, insn
->def(0));
2578 CodeEmitterGM107::emitRED()
2582 switch (insn
->dType
) {
2583 case TYPE_U32
: dType
= 0; break;
2584 case TYPE_S32
: dType
= 1; break;
2585 case TYPE_U64
: dType
= 2; break;
2586 case TYPE_F32
: dType
= 3; break;
2587 case TYPE_B128
: dType
= 4; break;
2588 case TYPE_S64
: dType
= 5; break;
2589 default: assert(!"unexpected dType"); dType
= 0; break;
2592 emitInsn (0xebf80000);
2593 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2594 emitField(0x17, 3, insn
->subOp
);
2595 emitField(0x14, 3, dType
);
2596 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2597 emitGPR (0x00, insn
->src(1));
2601 CodeEmitterGM107::emitCCTL()
2604 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2605 emitInsn(0xef600000);
2608 emitInsn(0xef800000);
2611 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2612 emitADDR (0x08, 0x16, width
, 2, insn
->src(0));
2613 emitField(0x00, 4, insn
->subOp
);
2616 /*******************************************************************************
2618 ******************************************************************************/
2621 CodeEmitterGM107::emitPIXLD()
2623 emitInsn (0xefe80000);
2625 emitField(0x1f, 3, insn
->subOp
);
2626 emitGPR (0x08, insn
->src(0));
2627 emitGPR (0x00, insn
->def(0));
2630 /*******************************************************************************
2632 ******************************************************************************/
2635 CodeEmitterGM107::emitTEXs(int pos
)
2637 int src1
= insn
->predSrc
== 1 ? 2 : 1;
2638 if (insn
->srcExists(src1
))
2639 emitGPR(pos
, insn
->src(src1
));
2645 CodeEmitterGM107::emitTEX()
2647 const TexInstruction
*insn
= this->insn
->asTex();
2650 if (!insn
->tex
.levelZero
) {
2652 case OP_TEX
: lodm
= 0; break;
2653 case OP_TXB
: lodm
= 2; break;
2654 case OP_TXL
: lodm
= 3; break;
2656 assert(!"invalid tex op");
2663 if (insn
->tex
.rIndirectSrc
>= 0) {
2664 emitInsn (0xdeb80000);
2665 emitField(0x25, 2, lodm
);
2666 emitField(0x24, 1, insn
->tex
.useOffsets
== 1);
2668 emitInsn (0xc0380000);
2669 emitField(0x37, 2, lodm
);
2670 emitField(0x36, 1, insn
->tex
.useOffsets
== 1);
2671 emitField(0x24, 13, insn
->tex
.r
);
2674 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2675 emitField(0x31, 1, insn
->tex
.liveOnly
);
2676 emitField(0x23, 1, insn
->tex
.derivAll
);
2677 emitField(0x1f, 4, insn
->tex
.mask
);
2678 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2679 insn
->tex
.target
.getDim() - 1);
2680 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2682 emitGPR (0x08, insn
->src(0));
2683 emitGPR (0x00, insn
->def(0));
2687 CodeEmitterGM107::emitTLD()
2689 const TexInstruction
*insn
= this->insn
->asTex();
2691 if (insn
->tex
.rIndirectSrc
>= 0) {
2692 emitInsn (0xdd380000);
2694 emitInsn (0xdc380000);
2695 emitField(0x24, 13, insn
->tex
.r
);
2698 emitField(0x37, 1, insn
->tex
.levelZero
== 0);
2699 emitField(0x32, 1, insn
->tex
.target
.isMS());
2700 emitField(0x31, 1, insn
->tex
.liveOnly
);
2701 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2702 emitField(0x1f, 4, insn
->tex
.mask
);
2703 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2704 insn
->tex
.target
.getDim() - 1);
2705 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2707 emitGPR (0x08, insn
->src(0));
2708 emitGPR (0x00, insn
->def(0));
2712 CodeEmitterGM107::emitTLD4()
2714 const TexInstruction
*insn
= this->insn
->asTex();
2716 if (insn
->tex
.rIndirectSrc
>= 0) {
2717 emitInsn (0xdef80000);
2718 emitField(0x26, 2, insn
->tex
.gatherComp
);
2719 emitField(0x25, 2, insn
->tex
.useOffsets
== 4);
2720 emitField(0x24, 2, insn
->tex
.useOffsets
== 1);
2722 emitInsn (0xc8380000);
2723 emitField(0x38, 2, insn
->tex
.gatherComp
);
2724 emitField(0x37, 2, insn
->tex
.useOffsets
== 4);
2725 emitField(0x36, 2, insn
->tex
.useOffsets
== 1);
2726 emitField(0x24, 13, insn
->tex
.r
);
2729 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2730 emitField(0x31, 1, insn
->tex
.liveOnly
);
2731 emitField(0x23, 1, insn
->tex
.derivAll
);
2732 emitField(0x1f, 4, insn
->tex
.mask
);
2733 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2734 insn
->tex
.target
.getDim() - 1);
2735 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2737 emitGPR (0x08, insn
->src(0));
2738 emitGPR (0x00, insn
->def(0));
2742 CodeEmitterGM107::emitTXD()
2744 const TexInstruction
*insn
= this->insn
->asTex();
2746 if (insn
->tex
.rIndirectSrc
>= 0) {
2747 emitInsn (0xde780000);
2749 emitInsn (0xde380000);
2750 emitField(0x24, 13, insn
->tex
.r
);
2753 emitField(0x31, 1, insn
->tex
.liveOnly
);
2754 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2755 emitField(0x1f, 4, insn
->tex
.mask
);
2756 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2757 insn
->tex
.target
.getDim() - 1);
2758 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2760 emitGPR (0x08, insn
->src(0));
2761 emitGPR (0x00, insn
->def(0));
2765 CodeEmitterGM107::emitTMML()
2767 const TexInstruction
*insn
= this->insn
->asTex();
2769 if (insn
->tex
.rIndirectSrc
>= 0) {
2770 emitInsn (0xdf600000);
2772 emitInsn (0xdf580000);
2773 emitField(0x24, 13, insn
->tex
.r
);
2776 emitField(0x31, 1, insn
->tex
.liveOnly
);
2777 emitField(0x23, 1, insn
->tex
.derivAll
);
2778 emitField(0x1f, 4, insn
->tex
.mask
);
2779 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2780 insn
->tex
.target
.getDim() - 1);
2781 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2783 emitGPR (0x08, insn
->src(0));
2784 emitGPR (0x00, insn
->def(0));
2788 CodeEmitterGM107::emitTXQ()
2790 const TexInstruction
*insn
= this->insn
->asTex();
2793 switch (insn
->tex
.query
) {
2794 case TXQ_DIMS
: type
= 0x01; break;
2795 case TXQ_TYPE
: type
= 0x02; break;
2796 case TXQ_SAMPLE_POSITION
: type
= 0x05; break;
2797 case TXQ_FILTER
: type
= 0x10; break;
2798 case TXQ_LOD
: type
= 0x12; break;
2799 case TXQ_WRAP
: type
= 0x14; break;
2800 case TXQ_BORDER_COLOUR
: type
= 0x16; break;
2802 assert(!"invalid txq query");
2806 if (insn
->tex
.rIndirectSrc
>= 0) {
2807 emitInsn (0xdf500000);
2809 emitInsn (0xdf480000);
2810 emitField(0x24, 13, insn
->tex
.r
);
2813 emitField(0x31, 1, insn
->tex
.liveOnly
);
2814 emitField(0x1f, 4, insn
->tex
.mask
);
2815 emitField(0x16, 6, type
);
2816 emitGPR (0x08, insn
->src(0));
2817 emitGPR (0x00, insn
->def(0));
2821 CodeEmitterGM107::emitDEPBAR()
2823 emitInsn (0xf0f00000);
2824 emitField(0x1d, 1, 1); /* le */
2825 emitField(0x1a, 3, 5);
2826 emitField(0x14, 6, insn
->subOp
);
2827 emitField(0x00, 6, insn
->subOp
);
2830 /*******************************************************************************
2832 ******************************************************************************/
2835 CodeEmitterGM107::emitNOP()
2837 emitInsn(0x50b00000);
2841 CodeEmitterGM107::emitKIL()
2843 emitInsn (0xe3300000);
2844 emitCond5(0x00, CC_TR
);
2848 CodeEmitterGM107::emitOUT()
2850 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
2851 const int emit
= insn
->op
== OP_EMIT
;
2853 switch (insn
->src(1).getFile()) {
2855 emitInsn(0xfbe00000);
2856 emitGPR (0x14, insn
->src(1));
2858 case FILE_IMMEDIATE
:
2859 emitInsn(0xf6e00000);
2860 emitIMMD(0x14, 19, insn
->src(1));
2862 case FILE_MEMORY_CONST
:
2863 emitInsn(0xebe00000);
2864 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2867 assert(!"bad src1 file");
2871 emitField(0x27, 2, (cut
<< 1) | emit
);
2872 emitGPR (0x08, insn
->src(0));
2873 emitGPR (0x00, insn
->def(0));
2877 CodeEmitterGM107::emitBAR()
2881 emitInsn (0xf0a80000);
2883 switch (insn
->subOp
) {
2884 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; break;
2885 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x0a; break;
2886 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x12; break;
2887 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x81; break;
2890 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
2894 emitField(0x20, 8, subop
);
2897 if (insn
->src(0).getFile() == FILE_GPR
) {
2898 emitGPR(0x08, insn
->src(0));
2900 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2902 emitField(0x08, 8, imm
->reg
.data
.u32
);
2903 emitField(0x2b, 1, 1);
2907 if (insn
->src(1).getFile() == FILE_GPR
) {
2908 emitGPR(0x14, insn
->src(1));
2910 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
2912 emitField(0x14, 12, imm
->reg
.data
.u32
);
2913 emitField(0x2c, 1, 1);
2916 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
2917 emitPRED (0x27, insn
->src(2));
2918 emitField(0x2a, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
2920 emitField(0x27, 3, 7);
2925 CodeEmitterGM107::emitMEMBAR()
2927 emitInsn (0xef980000);
2928 emitField(0x08, 2, insn
->subOp
>> 2);
2932 CodeEmitterGM107::emitVOTE()
2934 assert(insn
->src(0).getFile() == FILE_PREDICATE
);
2937 for (int i
= 0; insn
->defExists(i
); i
++) {
2938 if (insn
->def(i
).getFile() == FILE_GPR
)
2940 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
2944 emitInsn (0x50d80000);
2945 emitField(0x30, 2, insn
->subOp
);
2947 emitGPR (0x00, insn
->def(r
));
2951 emitPRED (0x2d, insn
->def(p
));
2954 emitField(0x2a, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
2955 emitPRED (0x27, insn
->src(0));
2959 CodeEmitterGM107::emitSUTarget()
2961 const TexInstruction
*insn
= this->insn
->asTex();
2964 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2966 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
2968 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
2970 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
2971 insn
->tex
.target
== TEX_TARGET_RECT
) {
2973 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
2974 insn
->tex
.target
== TEX_TARGET_CUBE
||
2975 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
2977 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
2980 assert(insn
->tex
.target
== TEX_TARGET_1D
);
2982 emitField(0x20, 4, target
);
2986 CodeEmitterGM107::emitSUHandle(const int s
)
2988 const TexInstruction
*insn
= this->insn
->asTex();
2990 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
2992 if (insn
->src(s
).getFile() == FILE_GPR
) {
2993 emitGPR(0x27, insn
->src(s
));
2995 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
2997 emitField(0x33, 1, 1);
2998 emitField(0x24, 13, imm
->reg
.data
.u32
);
3003 CodeEmitterGM107::emitSUSTx()
3005 const TexInstruction
*insn
= this->insn
->asTex();
3007 emitInsn(0xeb200000);
3008 if (insn
->op
== OP_SUSTB
)
3009 emitField(0x34, 1, 1);
3013 emitField(0x14, 4, 0xf); // rgba
3014 emitGPR (0x08, insn
->src(0));
3015 emitGPR (0x00, insn
->src(1));
3021 CodeEmitterGM107::emitSULDx()
3023 const TexInstruction
*insn
= this->insn
->asTex();
3026 emitInsn(0xeb000000);
3027 if (insn
->op
== OP_SULDB
)
3028 emitField(0x34, 1, 1);
3031 switch (insn
->dType
) {
3032 case TYPE_S8
: type
= 1; break;
3033 case TYPE_U16
: type
= 2; break;
3034 case TYPE_S16
: type
= 3; break;
3035 case TYPE_U32
: type
= 4; break;
3036 case TYPE_U64
: type
= 5; break;
3037 case TYPE_B128
: type
= 6; break;
3039 assert(insn
->dType
== TYPE_U8
);
3043 emitField(0x14, 3, type
);
3044 emitGPR (0x00, insn
->def(0));
3045 emitGPR (0x08, insn
->src(0));
3051 CodeEmitterGM107::emitSUREDx()
3053 const TexInstruction
*insn
= this->insn
->asTex();
3054 uint8_t type
= 0, subOp
;
3056 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
3057 emitInsn(0xeac00000);
3059 emitInsn(0xea600000);
3061 if (insn
->op
== OP_SUREDB
)
3062 emitField(0x34, 1, 1);
3066 switch (insn
->dType
) {
3067 case TYPE_S32
: type
= 1; break;
3068 case TYPE_U64
: type
= 2; break;
3069 case TYPE_F32
: type
= 3; break;
3070 case TYPE_S64
: type
= 5; break;
3072 assert(insn
->dType
== TYPE_U32
);
3077 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
3079 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
3082 subOp
= insn
->subOp
;
3085 emitField(0x24, 3, type
);
3086 emitField(0x1d, 4, subOp
);
3087 emitGPR (0x14, insn
->src(1));
3088 emitGPR (0x08, insn
->src(0));
3089 emitGPR (0x00, insn
->def(0));
3094 /*******************************************************************************
3095 * assembler front-end
3096 ******************************************************************************/
3099 CodeEmitterGM107::emitInstruction(Instruction
*i
)
3101 const unsigned int size
= (writeIssueDelays
&& !(codeSize
& 0x1f)) ? 16 : 8;
3106 if (insn
->encSize
!= 8) {
3107 ERROR("skipping undecodable instruction: "); insn
->print();
3110 if (codeSize
+ size
> codeSizeLimit
) {
3111 ERROR("code emitter output buffer too small\n");
3115 if (writeIssueDelays
) {
3116 int n
= ((codeSize
& 0x1f) / 8) - 1;
3119 data
[0] = 0x00000000;
3120 data
[1] = 0x00000000;
3126 emitField(data
, n
* 21, 21, insn
->sched
);
3182 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
3183 insn
->src(0).getFile() == FILE_PREDICATE
)) {
3185 } else if (isFloatType(insn
->dType
)) {
3186 if (isFloatType(insn
->sType
))
3191 if (isFloatType(insn
->sType
))
3202 if (isFloatType(insn
->dType
)) {
3203 if (insn
->dType
== TYPE_F64
)
3212 if (isFloatType(insn
->dType
)) {
3213 if (insn
->dType
== TYPE_F64
)
3223 if (isFloatType(insn
->dType
)) {
3224 if (insn
->dType
== TYPE_F64
)
3237 if (isFloatType(insn
->dType
)) {
3238 if (insn
->dType
== TYPE_F64
)
3247 if (typeSizeof(insn
->sType
) == 8)
3253 if (typeSizeof(insn
->sType
) == 8)
3271 if (isFloatType(insn
->dType
))
3280 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
3281 if (isFloatType(insn
->sType
))
3282 if (insn
->sType
== TYPE_F64
)
3289 if (isFloatType(insn
->sType
))
3290 if (insn
->sType
== TYPE_F64
)
3322 switch (insn
->src(0).getFile()) {
3323 case FILE_MEMORY_CONST
: emitLDC(); break;
3324 case FILE_MEMORY_LOCAL
: emitLDL(); break;
3325 case FILE_MEMORY_SHARED
: emitLDS(); break;
3326 case FILE_MEMORY_GLOBAL
: emitLD(); break;
3328 assert(!"invalid load");
3334 switch (insn
->src(0).getFile()) {
3335 case FILE_MEMORY_LOCAL
: emitSTL(); break;
3336 case FILE_MEMORY_SHARED
: emitSTS(); break;
3337 case FILE_MEMORY_GLOBAL
: emitST(); break;
3339 assert(!"invalid store");
3345 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
3348 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
3433 assert(!"invalid opcode");
3449 CodeEmitterGM107::getMinEncodingSize(const Instruction
*i
) const
3454 /*******************************************************************************
3455 * sched data calculator
3456 ******************************************************************************/
3458 class SchedDataCalculatorGM107
: public Pass
3461 SchedDataCalculatorGM107(const TargetGM107
*targ
) : targ(targ
) {}
3473 void rebase(const int base
)
3475 const int delta
= this->base
- base
;
3480 for (int i
= 0; i
< 256; ++i
) {
3484 for (int i
= 0; i
< 8; ++i
) {
3493 memset(&rd
, 0, sizeof(rd
));
3494 memset(&wr
, 0, sizeof(wr
));
3496 int getLatest(const ScoreData
& d
) const
3499 for (int i
= 0; i
< 256; ++i
)
3502 for (int i
= 0; i
< 8; ++i
)
3509 inline int getLatestRd() const
3511 return getLatest(rd
);
3513 inline int getLatestWr() const
3515 return getLatest(wr
);
3517 inline int getLatest() const
3519 return MAX2(getLatestRd(), getLatestWr());
3521 void setMax(const RegScores
*that
)
3523 for (int i
= 0; i
< 256; ++i
) {
3524 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3525 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3527 for (int i
= 0; i
< 8; ++i
) {
3528 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3529 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3531 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3532 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3534 void print(int cycle
)
3536 for (int i
= 0; i
< 256; ++i
) {
3537 if (rd
.r
[i
] > cycle
)
3538 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3539 if (wr
.r
[i
] > cycle
)
3540 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3542 for (int i
= 0; i
< 8; ++i
) {
3543 if (rd
.p
[i
] > cycle
)
3544 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3545 if (wr
.p
[i
] > cycle
)
3546 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3549 INFO("rd $c @ %i\n", rd
.c
);
3551 INFO("wr $c @ %i\n", wr
.c
);
3555 RegScores
*score
; // for current BB
3556 std::vector
<RegScores
> scoreBoards
;
3558 const TargetGM107
*targ
;
3559 bool visit(Function
*);
3560 bool visit(BasicBlock
*);
3562 void commitInsn(const Instruction
*, int);
3563 int calcDelay(const Instruction
*, int) const;
3564 void setDelay(Instruction
*, int, const Instruction
*);
3565 void recordWr(const Value
*, int, int);
3566 void checkRd(const Value
*, int, int&) const;
3568 inline void emitYield(Instruction
*);
3569 inline void emitStall(Instruction
*, uint8_t);
3570 inline void emitReuse(Instruction
*, uint8_t);
3571 inline void emitWrDepBar(Instruction
*, uint8_t);
3572 inline void emitRdDepBar(Instruction
*, uint8_t);
3573 inline void emitWtDepBar(Instruction
*, uint8_t);
3575 inline int getStall(const Instruction
*) const;
3576 inline int getWrDepBar(const Instruction
*) const;
3577 inline int getRdDepBar(const Instruction
*) const;
3578 inline int getWtDepBar(const Instruction
*) const;
3580 void setReuseFlag(Instruction
*);
3582 inline void printSchedInfo(int, const Instruction
*) const;
3585 LiveBarUse(Instruction
*insn
, Instruction
*usei
)
3586 : insn(insn
), usei(usei
) { }
3592 LiveBarDef(Instruction
*insn
, Instruction
*defi
)
3593 : insn(insn
), defi(defi
) { }
3598 bool insertBarriers(BasicBlock
*);
3600 Instruction
*findFirstUse(const Instruction
*) const;
3601 Instruction
*findFirstDef(const Instruction
*) const;
3603 bool needRdDepBar(const Instruction
*) const;
3604 bool needWrDepBar(const Instruction
*) const;
3608 SchedDataCalculatorGM107::emitStall(Instruction
*insn
, uint8_t cnt
)
3615 SchedDataCalculatorGM107::emitYield(Instruction
*insn
)
3617 insn
->sched
|= 1 << 4;
3621 SchedDataCalculatorGM107::emitWrDepBar(Instruction
*insn
, uint8_t id
)
3624 if ((insn
->sched
& 0xe0) == 0xe0)
3625 insn
->sched
^= 0xe0;
3626 insn
->sched
|= id
<< 5;
3630 SchedDataCalculatorGM107::emitRdDepBar(Instruction
*insn
, uint8_t id
)
3633 if ((insn
->sched
& 0x700) == 0x700)
3634 insn
->sched
^= 0x700;
3635 insn
->sched
|= id
<< 8;
3639 SchedDataCalculatorGM107::emitWtDepBar(Instruction
*insn
, uint8_t id
)
3642 insn
->sched
|= 1 << (11 + id
);
3646 SchedDataCalculatorGM107::emitReuse(Instruction
*insn
, uint8_t id
)
3649 insn
->sched
|= 1 << (17 + id
);
3653 SchedDataCalculatorGM107::printSchedInfo(int cycle
,
3654 const Instruction
*insn
) const
3656 uint8_t st
, yl
, wr
, rd
, wt
, ru
;
3658 st
= (insn
->sched
& 0x00000f) >> 0;
3659 yl
= (insn
->sched
& 0x000010) >> 4;
3660 wr
= (insn
->sched
& 0x0000e0) >> 5;
3661 rd
= (insn
->sched
& 0x000700) >> 8;
3662 wt
= (insn
->sched
& 0x01f800) >> 11;
3663 ru
= (insn
->sched
& 0x1e0000) >> 17;
3665 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3666 cycle
, st
, yl
, wr
, rd
, wt
, ru
);
3670 SchedDataCalculatorGM107::getStall(const Instruction
*insn
) const
3672 return insn
->sched
& 0xf;
3676 SchedDataCalculatorGM107::getWrDepBar(const Instruction
*insn
) const
3678 return (insn
->sched
& 0x0000e0) >> 5;
3682 SchedDataCalculatorGM107::getRdDepBar(const Instruction
*insn
) const
3684 return (insn
->sched
& 0x000700) >> 8;
3688 SchedDataCalculatorGM107::getWtDepBar(const Instruction
*insn
) const
3690 return (insn
->sched
& 0x01f800) >> 11;
3693 // Emit the reuse flag which allows to make use of the new memory hierarchy
3694 // introduced since Maxwell, the operand reuse cache.
3696 // It allows to reduce bank conflicts by caching operands. Each time you issue
3697 // an instruction, that flag can tell the hw which operands are going to be
3698 // re-used by the next instruction. Note that the next instruction has to use
3699 // the same GPR id in the same operand slot.
3701 SchedDataCalculatorGM107::setReuseFlag(Instruction
*insn
)
3703 Instruction
*next
= insn
->next
;
3704 BitSet
defs(255, 1);
3706 if (!targ
->isReuseSupported(insn
))
3709 for (int d
= 0; insn
->defExists(d
); ++d
) {
3710 const Value
*def
= insn
->def(d
).rep();
3711 if (insn
->def(d
).getFile() != FILE_GPR
)
3713 if (typeSizeof(insn
->dType
) != 4 || def
->reg
.data
.id
== 255)
3715 defs
.set(def
->reg
.data
.id
);
3718 for (int s
= 0; insn
->srcExists(s
); s
++) {
3719 const Value
*src
= insn
->src(s
).rep();
3720 if (insn
->src(s
).getFile() != FILE_GPR
)
3722 if (typeSizeof(insn
->sType
) != 4 || src
->reg
.data
.id
== 255)
3724 if (defs
.test(src
->reg
.data
.id
))
3726 if (!next
->srcExists(s
) || next
->src(s
).getFile() != FILE_GPR
)
3728 if (src
->reg
.data
.id
!= next
->getSrc(s
)->reg
.data
.id
)
3736 SchedDataCalculatorGM107::recordWr(const Value
*v
, int cycle
, int ready
)
3738 int a
= v
->reg
.data
.id
, b
;
3740 switch (v
->reg
.file
) {
3742 b
= a
+ v
->reg
.size
/ 4;
3743 for (int r
= a
; r
< b
; ++r
)
3744 score
->rd
.r
[r
] = ready
;
3746 case FILE_PREDICATE
:
3747 // To immediately use a predicate set by any instructions, the minimum
3748 // number of stall counts is 13.
3749 score
->rd
.p
[a
] = cycle
+ 13;
3752 score
->rd
.c
= ready
;
3760 SchedDataCalculatorGM107::checkRd(const Value
*v
, int cycle
, int &delay
) const
3762 int a
= v
->reg
.data
.id
, b
;
3765 switch (v
->reg
.file
) {
3767 b
= a
+ v
->reg
.size
/ 4;
3768 for (int r
= a
; r
< b
; ++r
)
3769 ready
= MAX2(ready
, score
->rd
.r
[r
]);
3771 case FILE_PREDICATE
:
3772 ready
= MAX2(ready
, score
->rd
.p
[a
]);
3775 ready
= MAX2(ready
, score
->rd
.c
);
3781 delay
= MAX2(delay
, ready
- cycle
);
3785 SchedDataCalculatorGM107::commitInsn(const Instruction
*insn
, int cycle
)
3787 const int ready
= cycle
+ targ
->getLatency(insn
);
3789 for (int d
= 0; insn
->defExists(d
); ++d
)
3790 recordWr(insn
->getDef(d
), cycle
, ready
);
3792 #ifdef GM107_DEBUG_SCHED_DATA
3793 score
->print(cycle
);
3797 #define GM107_MIN_ISSUE_DELAY 0x1
3798 #define GM107_MAX_ISSUE_DELAY 0xf
3801 SchedDataCalculatorGM107::calcDelay(const Instruction
*insn
, int cycle
) const
3803 int delay
= 0, ready
= cycle
;
3805 for (int s
= 0; insn
->srcExists(s
); ++s
)
3806 checkRd(insn
->getSrc(s
), cycle
, delay
);
3808 // TODO: make use of getReadLatency()!
3810 return MAX2(delay
, ready
- cycle
);
3814 SchedDataCalculatorGM107::setDelay(Instruction
*insn
, int delay
,
3815 const Instruction
*next
)
3817 const OpClass cl
= targ
->getOpClass(insn
->op
);
3820 if (insn
->op
== OP_EXIT
||
3821 insn
->op
== OP_BAR
||
3822 insn
->op
== OP_MEMBAR
) {
3823 delay
= GM107_MAX_ISSUE_DELAY
;
3825 if (insn
->op
== OP_QUADON
||
3826 insn
->op
== OP_QUADPOP
) {
3829 if (cl
== OPCLASS_FLOW
|| insn
->join
) {
3833 if (!next
|| !targ
->canDualIssue(insn
, next
)) {
3834 delay
= CLAMP(delay
, GM107_MIN_ISSUE_DELAY
, GM107_MAX_ISSUE_DELAY
);
3836 delay
= 0x0; // dual-issue
3839 wr
= getWrDepBar(insn
);
3840 rd
= getRdDepBar(insn
);
3842 if (delay
== GM107_MIN_ISSUE_DELAY
&& (wr
& rd
) != 7) {
3843 // Barriers take one additional clock cycle to become active on top of
3844 // the clock consumed by the instruction producing it.
3845 if (!next
|| insn
->bb
!= next
->bb
) {
3848 int wt
= getWtDepBar(next
);
3849 if ((wt
& (1 << wr
)) | (wt
& (1 << rd
)))
3854 emitStall(insn
, delay
);
3858 // Return true when the given instruction needs to emit a read dependency
3859 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3860 // setting the maximum number of stall counts is not enough.
3862 SchedDataCalculatorGM107::needRdDepBar(const Instruction
*insn
) const
3864 BitSet
srcs(255, 1), defs(255, 1);
3867 if (!targ
->isBarrierRequired(insn
))
3870 // Do not emit a read dependency barrier when the instruction doesn't use
3871 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3872 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3873 const Value
*src
= insn
->src(s
).rep();
3874 if (insn
->src(s
).getFile() != FILE_GPR
)
3876 if (src
->reg
.data
.id
== 255)
3879 a
= src
->reg
.data
.id
;
3880 b
= a
+ src
->reg
.size
/ 4;
3881 for (int r
= a
; r
< b
; ++r
)
3885 if (!srcs
.popCount())
3888 // Do not emit a read dependency barrier when the output GPRs are equal to
3889 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3890 // be produced and WaR hazards are prevented.
3891 for (int d
= 0; insn
->defExists(d
); ++d
) {
3892 const Value
*def
= insn
->def(d
).rep();
3893 if (insn
->def(d
).getFile() != FILE_GPR
)
3895 if (def
->reg
.data
.id
== 255)
3898 a
= def
->reg
.data
.id
;
3899 b
= a
+ def
->reg
.size
/ 4;
3900 for (int r
= a
; r
< b
; ++r
)
3905 if (!srcs
.popCount())
3911 // Return true when the given instruction needs to emit a write dependency
3912 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3913 // setting the maximum number of stall counts is not enough. This is only legal
3914 // if the instruction output something.
3916 SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn
) const
3918 if (!targ
->isBarrierRequired(insn
))
3921 for (int d
= 0; insn
->defExists(d
); ++d
) {
3922 if (insn
->def(d
).getFile() == FILE_GPR
||
3923 insn
->def(d
).getFile() == FILE_PREDICATE
)
3929 // Find the next instruction inside the same basic block which uses the output
3930 // of the given instruction in order to avoid RaW hazards.
3932 SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari
) const
3934 Instruction
*insn
, *next
;
3937 if (!bari
->defExists(0))
3940 minGPR
= bari
->def(0).rep()->reg
.data
.id
;
3941 maxGPR
= minGPR
+ bari
->def(0).rep()->reg
.size
/ 4 - 1;
3943 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3946 for (int s
= 0; insn
->srcExists(s
); ++s
) {
3947 const Value
*src
= insn
->src(s
).rep();
3948 if (bari
->def(0).getFile() == FILE_GPR
) {
3949 if (insn
->src(s
).getFile() != FILE_GPR
||
3950 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3951 src
->reg
.data
.id
> maxGPR
)
3955 if (bari
->def(0).getFile() == FILE_PREDICATE
) {
3956 if (insn
->src(s
).getFile() != FILE_PREDICATE
||
3957 src
->reg
.data
.id
!= minGPR
)
3966 // Find the next instruction inside the same basic block which overwrites, at
3967 // least, one source of the given instruction in order to avoid WaR hazards.
3969 SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari
) const
3971 Instruction
*insn
, *next
;
3974 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
3977 for (int d
= 0; insn
->defExists(d
); ++d
) {
3978 const Value
*def
= insn
->def(d
).rep();
3979 if (insn
->def(d
).getFile() != FILE_GPR
)
3982 minGPR
= def
->reg
.data
.id
;
3983 maxGPR
= minGPR
+ def
->reg
.size
/ 4 - 1;
3985 for (int s
= 0; bari
->srcExists(s
); ++s
) {
3986 const Value
*src
= bari
->src(s
).rep();
3987 if (bari
->src(s
).getFile() != FILE_GPR
||
3988 src
->reg
.data
.id
+ src
->reg
.size
/ 4 - 1 < minGPR
||
3989 src
->reg
.data
.id
> maxGPR
)
3998 // Dependency barriers:
3999 // This pass is a bit ugly and could probably be improved by performing a
4000 // better allocation.
4002 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4003 // dependency barriers using the control codes.
4005 SchedDataCalculatorGM107::insertBarriers(BasicBlock
*bb
)
4007 std::list
<LiveBarUse
> live_uses
;
4008 std::list
<LiveBarDef
> live_defs
;
4009 Instruction
*insn
, *next
;
4013 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4014 Instruction
*usei
= NULL
, *defi
= NULL
;
4015 bool need_wr_bar
, need_rd_bar
;
4019 // Expire old barrier uses.
4020 for (std::list
<LiveBarUse
>::iterator it
= live_uses
.begin();
4021 it
!= live_uses
.end();) {
4022 if (insn
->serial
>= it
->usei
->serial
) {
4023 int wr
= getWrDepBar(it
->insn
);
4024 emitWtDepBar(insn
, wr
);
4025 bars
.clr(wr
); // free barrier
4026 it
= live_uses
.erase(it
);
4032 // Expire old barrier defs.
4033 for (std::list
<LiveBarDef
>::iterator it
= live_defs
.begin();
4034 it
!= live_defs
.end();) {
4035 if (insn
->serial
>= it
->defi
->serial
) {
4036 int rd
= getRdDepBar(it
->insn
);
4037 emitWtDepBar(insn
, rd
);
4038 bars
.clr(rd
); // free barrier
4039 it
= live_defs
.erase(it
);
4045 need_wr_bar
= needWrDepBar(insn
);
4046 need_rd_bar
= needRdDepBar(insn
);
4049 // When the instruction requires to emit a write dependency barrier
4050 // (all which write something at a variable latency), find the next
4051 // instruction which reads the outputs.
4052 usei
= findFirstUse(insn
);
4054 // Allocate and emit a new barrier.
4055 bar_id
= bars
.findFreeRange(1);
4059 emitWrDepBar(insn
, bar_id
);
4061 live_uses
.push_back(LiveBarUse(insn
, usei
));
4065 // When the instruction requires to emit a read dependency barrier
4066 // (all which read something at a variable latency), find the next
4067 // instruction which will write the inputs.
4068 defi
= findFirstDef(insn
);
4070 if (usei
&& defi
&& usei
->serial
<= defi
->serial
)
4073 // Allocate and emit a new barrier.
4074 bar_id
= bars
.findFreeRange(1);
4078 emitRdDepBar(insn
, bar_id
);
4080 live_defs
.push_back(LiveBarDef(insn
, defi
));
4084 // Remove unnecessary barrier waits.
4085 BitSet
alive_bars(6, 1);
4086 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4091 wr
= getWrDepBar(insn
);
4092 rd
= getRdDepBar(insn
);
4093 wt
= getWtDepBar(insn
);
4095 for (int idx
= 0; idx
< 6; ++idx
) {
4096 if (!(wt
& (1 << idx
)))
4098 if (!alive_bars
.test(idx
)) {
4099 insn
->sched
&= ~(1 << (11 + idx
));
4101 alive_bars
.clr(idx
);
4115 SchedDataCalculatorGM107::visit(Function
*func
)
4119 func
->orderInstructions(insns
);
4121 scoreBoards
.resize(func
->cfg
.getSize());
4122 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
4123 scoreBoards
[i
].wipe();
4128 SchedDataCalculatorGM107::visit(BasicBlock
*bb
)
4130 Instruction
*insn
, *next
= NULL
;
4133 for (Instruction
*insn
= bb
->getEntry(); insn
; insn
= insn
->next
) {
4135 insn
->sched
= 0x7e0;
4138 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4141 // Insert read/write dependency barriers for instructions which don't
4142 // operate at a fixed latency.
4145 score
= &scoreBoards
.at(bb
->getId());
4147 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
4148 // back branches will wait until all target dependencies are satisfied
4149 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
4151 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
4152 score
->setMax(&scoreBoards
.at(in
->getId()));
4155 #ifdef GM107_DEBUG_SCHED_DATA
4156 INFO("=== BB:%i initial scores\n", bb
->getId());
4157 score
->print(cycle
);
4160 // Because barriers are allocated locally (intra-BB), we have to make sure
4161 // that all produced barriers have been consumed before entering inside a
4162 // new basic block. The best way is to do a global allocation pre RA but
4163 // it's really more difficult, especially because of the phi nodes. Anyways,
4164 // it seems like that waiting on a barrier which has already been consumed
4165 // doesn't add any additional cost, it's just not elegant!
4166 Instruction
*start
= bb
->getEntry();
4167 if (start
&& bb
->cfg
.incidentCount() > 0) {
4168 for (int b
= 0; b
< 6; b
++)
4169 emitWtDepBar(start
, b
);
4172 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
4175 commitInsn(insn
, cycle
);
4176 int delay
= calcDelay(next
, cycle
);
4177 setDelay(insn
, delay
, next
);
4178 cycle
+= getStall(insn
);
4182 // XXX: The yield flag seems to destroy a bunch of things when it is
4183 // set on every instruction, need investigation.
4186 #ifdef GM107_DEBUG_SCHED_DATA
4187 printSchedInfo(cycle
, insn
);
4195 commitInsn(insn
, cycle
);
4199 #ifdef GM107_DEBUG_SCHED_DATA
4200 fprintf(stderr
, "last instruction is : ");
4202 fprintf(stderr
, "cycle=%d\n", cycle
);
4205 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
4206 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
4208 if (ei
.getType() != Graph::Edge::BACK
) {
4209 // Only test the first instruction of the outgoing block.
4210 next
= out
->getEntry();
4212 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
4214 // When the outgoing BB is empty, make sure to set the number of
4215 // stall counts needed by the instruction because we don't know the
4216 // next instruction.
4217 bbDelay
= MAX2(bbDelay
, targ
->getLatency(insn
));
4220 // Wait until all dependencies are satisfied.
4221 const int regsFree
= score
->getLatest();
4222 next
= out
->getFirst();
4223 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
4224 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
4225 c
+= getStall(next
);
4230 if (bb
->cfg
.outgoingCount() != 1)
4232 setDelay(insn
, bbDelay
, next
);
4233 cycle
+= getStall(insn
);
4235 score
->rebase(cycle
); // common base for initializing out blocks' scores
4239 /*******************************************************************************
4241 ******************************************************************************/
4244 CodeEmitterGM107::prepareEmission(Function
*func
)
4246 SchedDataCalculatorGM107
sched(targGM107
);
4247 CodeEmitter::prepareEmission(func
);
4248 sched
.run(func
, true, true);
4251 static inline uint32_t sizeToBundlesGM107(uint32_t size
)
4253 return (size
+ 23) / 24;
4257 CodeEmitterGM107::prepareEmission(Program
*prog
)
4259 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
4260 !fi
.end(); fi
.next()) {
4261 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
4262 func
->binPos
= prog
->binSize
;
4263 prepareEmission(func
);
4265 // adjust sizes & positions for schedulding info:
4266 if (prog
->getTarget()->hasSWSched
) {
4267 uint32_t adjPos
= func
->binPos
;
4268 BasicBlock
*bb
= NULL
;
4269 for (int i
= 0; i
< func
->bbCount
; ++i
) {
4270 bb
= func
->bbArray
[i
];
4271 int32_t adjSize
= bb
->binSize
;
4273 adjSize
-= 32 - adjPos
% 32;
4277 adjSize
= bb
->binSize
+ sizeToBundlesGM107(adjSize
) * 8;
4278 bb
->binPos
= adjPos
;
4279 bb
->binSize
= adjSize
;
4283 func
->binSize
= adjPos
- func
->binPos
;
4286 prog
->binSize
+= func
->binSize
;
4290 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107
*target
)
4291 : CodeEmitter(target
),
4293 writeIssueDelays(target
->hasSWSched
)
4296 codeSize
= codeSizeLimit
= 0;
4301 TargetGM107::createCodeEmitterGM107(Program::Type type
)
4303 CodeEmitterGM107
*emit
= new CodeEmitterGM107(this);
4304 emit
->setProgramType(type
);
4308 } // namespace nv50_ir