2 * Copyright 2014 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
25 #include "codegen/nv50_ir_target_gm107.h"
27 //#define GM107_DEBUG_SCHED_DATA
31 class CodeEmitterGM107
: public CodeEmitter
34 CodeEmitterGM107(const TargetGM107
*);
36 virtual bool emitInstruction(Instruction
*);
37 virtual uint32_t getMinEncodingSize(const Instruction
*) const;
39 virtual void prepareEmission(Program
*);
40 virtual void prepareEmission(Function
*);
42 inline void setProgramType(Program::Type pType
) { progType
= pType
; }
45 const TargetGM107
*targGM107
;
47 Program::Type progType
;
49 const Instruction
*insn
;
50 const bool writeIssueDelays
;
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b
, int s
, uint32_t v
) { emitField(code
, b
, s
, v
); }
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o
) { emitInsn(o
, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value
*);
61 inline void emitGPR(int pos
) {
62 emitGPR(pos
, (const Value
*)NULL
);
64 inline void emitGPR(int pos
, const ValueRef
&ref
) {
65 emitGPR(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
67 inline void emitGPR(int pos
, const ValueRef
*ref
) {
68 emitGPR(pos
, ref
? ref
->rep() : (const Value
*)NULL
);
70 inline void emitGPR(int pos
, const ValueDef
&def
) {
71 emitGPR(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
73 inline void emitSYS(int, const Value
*);
74 inline void emitSYS(int pos
, const ValueRef
&ref
) {
75 emitSYS(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
77 inline void emitPRED(int, const Value
*);
78 inline void emitPRED(int pos
) {
79 emitPRED(pos
, (const Value
*)NULL
);
81 inline void emitPRED(int pos
, const ValueRef
&ref
) {
82 emitPRED(pos
, ref
.get() ? ref
.rep() : (const Value
*)NULL
);
84 inline void emitPRED(int pos
, const ValueDef
&def
) {
85 emitPRED(pos
, def
.get() ? def
.rep() : (const Value
*)NULL
);
87 inline void emitADDR(int, int, int, int, const ValueRef
&);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef
&);
89 inline bool longIMMD(const ValueRef
&);
90 inline void emitIMMD(int, int, const ValueRef
&);
92 void emitCond3(int, CondCode
);
93 void emitCond4(int, CondCode
);
94 void emitCond5(int pos
, CondCode cc
) { emitCond4(pos
, cc
); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef
&);
101 inline void emitNEG(int, const ValueRef
&);
102 inline void emitNEG2(int, const ValueRef
&, const ValueRef
&);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode
, int);
105 inline void emitRND(int pos
) {
106 emitRND(pos
, insn
->rnd
, -1);
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef
&);
172 void emitLDSTs(int, DataType
);
213 void emitSUHandle(const int s
);
219 /*******************************************************************************
220 * general instruction layout/fields
221 ******************************************************************************/
224 CodeEmitterGM107::emitField(uint32_t *data
, int b
, int s
, uint32_t v
)
227 uint32_t m
= ((1ULL << s
) - 1);
228 uint64_t d
= (uint64_t)(v
& m
) << b
;
229 assert(!(v
& ~m
) || (v
& ~m
) == ~m
);
236 CodeEmitterGM107::emitPred()
238 if (insn
->predSrc
>= 0) {
239 emitField(16, 3, insn
->getSrc(insn
->predSrc
)->rep()->reg
.data
.id
);
240 emitField(19, 1, insn
->cc
== CC_NOT_P
);
247 CodeEmitterGM107::emitInsn(uint32_t hi
, bool pred
)
249 code
[0] = 0x00000000;
256 CodeEmitterGM107::emitGPR(int pos
, const Value
*val
)
258 emitField(pos
, 8, val
&& !val
->inFile(FILE_FLAGS
) ?
259 val
->reg
.data
.id
: 255);
263 CodeEmitterGM107::emitSYS(int pos
, const Value
*val
)
265 int id
= val
? val
->reg
.data
.id
: -1;
268 case SV_LANEID
: id
= 0x00; break;
269 case SV_VERTEX_COUNT
: id
= 0x10; break;
270 case SV_INVOCATION_ID
: id
= 0x11; break;
271 case SV_THREAD_KILL
: id
= 0x13; break;
272 case SV_INVOCATION_INFO
: id
= 0x1d; break;
273 case SV_COMBINED_TID
: id
= 0x20; break;
274 case SV_TID
: id
= 0x21 + val
->reg
.data
.sv
.index
; break;
275 case SV_CTAID
: id
= 0x25 + val
->reg
.data
.sv
.index
; break;
276 case SV_LANEMASK_EQ
: id
= 0x38; break;
277 case SV_LANEMASK_LT
: id
= 0x39; break;
278 case SV_LANEMASK_LE
: id
= 0x3a; break;
279 case SV_LANEMASK_GT
: id
= 0x3b; break;
280 case SV_LANEMASK_GE
: id
= 0x3c; break;
281 case SV_CLOCK
: id
= 0x50 + val
->reg
.data
.sv
.index
; break;
283 assert(!"invalid system value");
288 emitField(pos
, 8, id
);
292 CodeEmitterGM107::emitPRED(int pos
, const Value
*val
)
294 emitField(pos
, 3, val
? val
->reg
.data
.id
: 7);
298 CodeEmitterGM107::emitADDR(int gpr
, int off
, int len
, int shr
,
301 const Value
*v
= ref
.get();
302 assert(!(v
->reg
.data
.offset
& ((1 << shr
) - 1)));
304 emitGPR(gpr
, ref
.getIndirect(0));
305 emitField(off
, len
, v
->reg
.data
.offset
>> shr
);
309 CodeEmitterGM107::emitCBUF(int buf
, int gpr
, int off
, int len
, int shr
,
312 const Value
*v
= ref
.get();
313 const Symbol
*s
= v
->asSym();
315 assert(!(s
->reg
.data
.offset
& ((1 << shr
) - 1)));
317 emitField(buf
, 5, v
->reg
.fileIndex
);
319 emitGPR(gpr
, ref
.getIndirect(0));
320 emitField(off
, 16, s
->reg
.data
.offset
>> shr
);
324 CodeEmitterGM107::longIMMD(const ValueRef
&ref
)
326 if (ref
.getFile() == FILE_IMMEDIATE
) {
327 const ImmediateValue
*imm
= ref
.get()->asImm();
328 if (isFloatType(insn
->sType
))
329 return imm
->reg
.data
.u32
& 0xfff;
331 return imm
->reg
.data
.s32
> 0x7ffff || imm
->reg
.data
.s32
< -0x80000;
337 CodeEmitterGM107::emitIMMD(int pos
, int len
, const ValueRef
&ref
)
339 const ImmediateValue
*imm
= ref
.get()->asImm();
340 uint32_t val
= imm
->reg
.data
.u32
;
343 if (insn
->sType
== TYPE_F32
|| insn
->sType
== TYPE_F16
) {
344 assert(!(val
& 0x00000fff));
346 } else if (insn
->sType
== TYPE_F64
) {
347 assert(!(imm
->reg
.data
.u64
& 0x00000fffffffffffULL
));
348 val
= imm
->reg
.data
.u64
>> 44;
350 assert(!(val
& 0xfff80000) || (val
& 0xfff80000) == 0xfff80000);
352 emitField( 56, 1, (val
& 0x80000) >> 19);
353 emitField(pos
, len
, (val
& 0x7ffff));
355 emitField(pos
, len
, val
);
359 /*******************************************************************************
361 ******************************************************************************/
364 CodeEmitterGM107::emitCond3(int pos
, CondCode code
)
369 case CC_FL
: data
= 0x00; break;
371 case CC_LT
: data
= 0x01; break;
373 case CC_EQ
: data
= 0x02; break;
375 case CC_LE
: data
= 0x03; break;
377 case CC_GT
: data
= 0x04; break;
379 case CC_NE
: data
= 0x05; break;
381 case CC_GE
: data
= 0x06; break;
382 case CC_TR
: data
= 0x07; break;
384 assert(!"invalid cond3");
388 emitField(pos
, 3, data
);
392 CodeEmitterGM107::emitCond4(int pos
, CondCode code
)
397 case CC_FL
: data
= 0x00; break;
398 case CC_LT
: data
= 0x01; break;
399 case CC_EQ
: data
= 0x02; break;
400 case CC_LE
: data
= 0x03; break;
401 case CC_GT
: data
= 0x04; break;
402 case CC_NE
: data
= 0x05; break;
403 case CC_GE
: data
= 0x06; break;
404 // case CC_NUM: data = 0x07; break;
405 // case CC_NAN: data = 0x08; break;
406 case CC_LTU
: data
= 0x09; break;
407 case CC_EQU
: data
= 0x0a; break;
408 case CC_LEU
: data
= 0x0b; break;
409 case CC_GTU
: data
= 0x0c; break;
410 case CC_NEU
: data
= 0x0d; break;
411 case CC_GEU
: data
= 0x0e; break;
412 case CC_TR
: data
= 0x0f; break;
414 assert(!"invalid cond4");
418 emitField(pos
, 4, data
);
422 CodeEmitterGM107::emitO(int pos
)
424 emitField(pos
, 1, insn
->getSrc(0)->reg
.file
== FILE_SHADER_OUTPUT
);
428 CodeEmitterGM107::emitP(int pos
)
430 emitField(pos
, 1, insn
->perPatch
);
434 CodeEmitterGM107::emitSAT(int pos
)
436 emitField(pos
, 1, insn
->saturate
);
440 CodeEmitterGM107::emitCC(int pos
)
442 emitField(pos
, 1, insn
->flagsDef
>= 0);
446 CodeEmitterGM107::emitX(int pos
)
448 emitField(pos
, 1, insn
->flagsSrc
>= 0);
452 CodeEmitterGM107::emitABS(int pos
, const ValueRef
&ref
)
454 emitField(pos
, 1, ref
.mod
.abs());
458 CodeEmitterGM107::emitNEG(int pos
, const ValueRef
&ref
)
460 emitField(pos
, 1, ref
.mod
.neg());
464 CodeEmitterGM107::emitNEG2(int pos
, const ValueRef
&a
, const ValueRef
&b
)
466 emitField(pos
, 1, a
.mod
.neg() ^ b
.mod
.neg());
470 CodeEmitterGM107::emitFMZ(int pos
, int len
)
472 emitField(pos
, len
, insn
->dnz
<< 1 | insn
->ftz
);
476 CodeEmitterGM107::emitRND(int rmp
, RoundMode rnd
, int rip
)
480 case ROUND_NI
: ri
= 1;
481 case ROUND_N
: rm
= 0; break;
482 case ROUND_MI
: ri
= 1;
483 case ROUND_M
: rm
= 1; break;
484 case ROUND_PI
: ri
= 1;
485 case ROUND_P
: rm
= 2; break;
486 case ROUND_ZI
: ri
= 1;
487 case ROUND_Z
: rm
= 3; break;
489 assert(!"invalid round mode");
492 emitField(rip
, 1, ri
);
493 emitField(rmp
, 2, rm
);
497 CodeEmitterGM107::emitPDIV(int pos
)
499 assert(insn
->postFactor
>= -3 && insn
->postFactor
<= 3);
500 if (insn
->postFactor
> 0)
501 emitField(pos
, 3, 7 - insn
->postFactor
);
503 emitField(pos
, 3, 0 - insn
->postFactor
);
507 CodeEmitterGM107::emitINV(int pos
, const ValueRef
&ref
)
509 emitField(pos
, 1, !!(ref
.mod
& Modifier(NV50_IR_MOD_NOT
)));
512 /*******************************************************************************
514 ******************************************************************************/
517 CodeEmitterGM107::emitEXIT()
519 emitInsn (0xe3000000);
520 emitCond5(0x00, CC_TR
);
524 CodeEmitterGM107::emitBRA()
526 const FlowInstruction
*insn
= this->insn
->asFlow();
529 if (insn
->indirect
) {
531 emitInsn(0xe2000000); // JMX
533 emitInsn(0xe2500000); // BRX
537 emitInsn(0xe2100000); // JMP
539 emitInsn(0xe2400000); // BRA
540 emitField(0x07, 1, insn
->allWarp
);
543 emitField(0x06, 1, insn
->limit
);
544 emitCond5(0x00, CC_TR
);
546 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
547 int32_t pos
= insn
->target
.bb
->binPos
;
548 if (writeIssueDelays
&& !(pos
& 0x1f))
551 emitField(0x14, 24, pos
- (codeSize
+ 8));
553 emitField(0x14, 32, pos
);
555 emitCBUF (0x24, gpr
, 20, 16, 0, insn
->src(0));
556 emitField(0x05, 1, 1);
561 CodeEmitterGM107::emitCAL()
563 const FlowInstruction
*insn
= this->insn
->asFlow();
565 if (insn
->absolute
) {
566 emitInsn(0xe2200000, 0); // JCAL
568 emitInsn(0xe2600000, 0); // CAL
571 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
573 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
576 int pcAbs
= targGM107
->getBuiltinOffset(insn
->target
.builtin
);
577 addReloc(RelocEntry::TYPE_BUILTIN
, 0, pcAbs
, 0xfff00000, 20);
578 addReloc(RelocEntry::TYPE_BUILTIN
, 1, pcAbs
, 0x000fffff, -12);
580 emitField(0x14, 32, insn
->target
.bb
->binPos
);
584 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
585 emitField(0x05, 1, 1);
590 CodeEmitterGM107::emitPCNT()
592 const FlowInstruction
*insn
= this->insn
->asFlow();
594 emitInsn(0xe2b00000, 0);
596 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
597 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
599 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
600 emitField(0x05, 1, 1);
605 CodeEmitterGM107::emitCONT()
607 emitInsn (0xe3500000);
608 emitCond5(0x00, CC_TR
);
612 CodeEmitterGM107::emitPBK()
614 const FlowInstruction
*insn
= this->insn
->asFlow();
616 emitInsn(0xe2a00000, 0);
618 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
619 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
621 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
622 emitField(0x05, 1, 1);
627 CodeEmitterGM107::emitBRK()
629 emitInsn (0xe3400000);
630 emitCond5(0x00, CC_TR
);
634 CodeEmitterGM107::emitPRET()
636 const FlowInstruction
*insn
= this->insn
->asFlow();
638 emitInsn(0xe2700000, 0);
640 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
641 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
643 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
644 emitField(0x05, 1, 1);
649 CodeEmitterGM107::emitRET()
651 emitInsn (0xe3200000);
652 emitCond5(0x00, CC_TR
);
656 CodeEmitterGM107::emitSSY()
658 const FlowInstruction
*insn
= this->insn
->asFlow();
660 emitInsn(0xe2900000, 0);
662 if (!insn
->srcExists(0) || insn
->src(0).getFile() != FILE_MEMORY_CONST
) {
663 emitField(0x14, 24, insn
->target
.bb
->binPos
- (codeSize
+ 8));
665 emitCBUF (0x24, -1, 20, 16, 0, insn
->src(0));
666 emitField(0x05, 1, 1);
671 CodeEmitterGM107::emitSYNC()
673 emitInsn (0xf0f80000);
674 emitCond5(0x00, CC_TR
);
678 CodeEmitterGM107::emitSAM()
680 emitInsn(0xe3700000, 0);
684 CodeEmitterGM107::emitRAM()
686 emitInsn(0xe3800000, 0);
689 /*******************************************************************************
691 ******************************************************************************/
693 /*******************************************************************************
694 * movement / conversion
695 ******************************************************************************/
698 CodeEmitterGM107::emitMOV()
700 if (insn
->src(0).getFile() != FILE_IMMEDIATE
) {
701 switch (insn
->src(0).getFile()) {
703 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
704 emitInsn(0x5b6a0000);
707 emitInsn(0x5c980000);
709 emitGPR (0x14, insn
->src(0));
711 case FILE_MEMORY_CONST
:
712 emitInsn(0x4c980000);
713 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
716 emitInsn(0x38980000);
717 emitIMMD(0x14, 19, insn
->src(0));
720 emitInsn(0x50880000);
721 emitPRED(0x0c, insn
->src(0));
726 assert(!"bad src file");
729 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
730 insn
->src(0).getFile() != FILE_PREDICATE
)
731 emitField(0x27, 4, insn
->lanes
);
733 emitInsn (0x01000000);
734 emitIMMD (0x14, 32, insn
->src(0));
735 emitField(0x0c, 4, insn
->lanes
);
738 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
740 emitPRED(0x03, insn
->def(0));
743 emitGPR(0x00, insn
->def(0));
748 CodeEmitterGM107::emitS2R()
750 emitInsn(0xf0c80000);
751 emitSYS (0x14, insn
->src(0));
752 emitGPR (0x00, insn
->def(0));
756 CodeEmitterGM107::emitCS2R()
758 emitInsn(0x50c80000);
759 emitSYS (0x14, insn
->src(0));
760 emitGPR (0x00, insn
->def(0));
764 CodeEmitterGM107::emitF2F()
766 RoundMode rnd
= insn
->rnd
;
769 case OP_FLOOR
: rnd
= ROUND_MI
; break;
770 case OP_CEIL
: rnd
= ROUND_PI
; break;
771 case OP_TRUNC
: rnd
= ROUND_ZI
; break;
776 switch (insn
->src(0).getFile()) {
778 emitInsn(0x5ca80000);
779 emitGPR (0x14, insn
->src(0));
781 case FILE_MEMORY_CONST
:
782 emitInsn(0x4ca80000);
783 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
786 emitInsn(0x38a80000);
787 emitIMMD(0x14, 19, insn
->src(0));
790 assert(!"bad src0 file");
794 emitField(0x32, 1, (insn
->op
== OP_SAT
) || insn
->saturate
);
795 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
797 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
799 emitField(0x29, 1, insn
->subOp
);
800 emitRND (0x27, rnd
, 0x2a);
801 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
802 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
803 emitGPR (0x00, insn
->def(0));
807 CodeEmitterGM107::emitF2I()
809 RoundMode rnd
= insn
->rnd
;
812 case OP_FLOOR
: rnd
= ROUND_M
; break;
813 case OP_CEIL
: rnd
= ROUND_P
; break;
814 case OP_TRUNC
: rnd
= ROUND_Z
; break;
819 switch (insn
->src(0).getFile()) {
821 emitInsn(0x5cb00000);
822 emitGPR (0x14, insn
->src(0));
824 case FILE_MEMORY_CONST
:
825 emitInsn(0x4cb00000);
826 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
829 emitInsn(0x38b00000);
830 emitIMMD(0x14, 19, insn
->src(0));
833 assert(!"bad src0 file");
837 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
839 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
841 emitRND (0x27, rnd
, 0x2a);
842 emitField(0x0c, 1, isSignedType(insn
->dType
));
843 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
844 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
845 emitGPR (0x00, insn
->def(0));
849 CodeEmitterGM107::emitI2F()
851 RoundMode rnd
= insn
->rnd
;
854 case OP_FLOOR
: rnd
= ROUND_M
; break;
855 case OP_CEIL
: rnd
= ROUND_P
; break;
856 case OP_TRUNC
: rnd
= ROUND_Z
; break;
861 switch (insn
->src(0).getFile()) {
863 emitInsn(0x5cb80000);
864 emitGPR (0x14, insn
->src(0));
866 case FILE_MEMORY_CONST
:
867 emitInsn(0x4cb80000);
868 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
871 emitInsn(0x38b80000);
872 emitIMMD(0x14, 19, insn
->src(0));
875 assert(!"bad src0 file");
879 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
881 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
882 emitField(0x29, 2, insn
->subOp
);
883 emitRND (0x27, rnd
, -1);
884 emitField(0x0d, 1, isSignedType(insn
->sType
));
885 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
886 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
887 emitGPR (0x00, insn
->def(0));
891 CodeEmitterGM107::emitI2I()
893 switch (insn
->src(0).getFile()) {
895 emitInsn(0x5ce00000);
896 emitGPR (0x14, insn
->src(0));
898 case FILE_MEMORY_CONST
:
899 emitInsn(0x4ce00000);
900 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
903 emitInsn(0x38e00000);
904 emitIMMD(0x14, 19, insn
->src(0));
907 assert(!"bad src0 file");
912 emitField(0x31, 1, (insn
->op
== OP_ABS
) || insn
->src(0).mod
.abs());
914 emitField(0x2d, 1, (insn
->op
== OP_NEG
) || insn
->src(0).mod
.neg());
915 emitField(0x29, 2, insn
->subOp
);
916 emitField(0x0d, 1, isSignedType(insn
->sType
));
917 emitField(0x0c, 1, isSignedType(insn
->dType
));
918 emitField(0x0a, 2, util_logbase2(typeSizeof(insn
->sType
)));
919 emitField(0x08, 2, util_logbase2(typeSizeof(insn
->dType
)));
920 emitGPR (0x00, insn
->def(0));
924 selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
926 int loc
= entry
->loc
;
927 if (data
.force_persample_interp
)
928 code
[loc
+ 1] |= 1 << 10;
930 code
[loc
+ 1] &= ~(1 << 10);
934 CodeEmitterGM107::emitSEL()
936 switch (insn
->src(1).getFile()) {
938 emitInsn(0x5ca00000);
939 emitGPR (0x14, insn
->src(1));
941 case FILE_MEMORY_CONST
:
942 emitInsn(0x4ca00000);
943 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
946 emitInsn(0x38a00000);
947 emitIMMD(0x14, 19, insn
->src(1));
950 assert(!"bad src1 file");
954 emitINV (0x2a, insn
->src(2));
955 emitPRED(0x27, insn
->src(2));
956 emitGPR (0x08, insn
->src(0));
957 emitGPR (0x00, insn
->def(0));
959 if (insn
->subOp
== 1) {
960 addInterp(0, 0, selpFlip
);
965 CodeEmitterGM107::emitSHFL()
969 emitInsn (0xef100000);
971 switch (insn
->src(1).getFile()) {
973 emitGPR(0x14, insn
->src(1));
976 emitIMMD(0x14, 5, insn
->src(1));
980 assert(!"invalid src1 file");
984 switch (insn
->src(2).getFile()) {
986 emitGPR(0x27, insn
->src(2));
989 emitIMMD(0x22, 13, insn
->src(2));
993 assert(!"invalid src2 file");
997 if (!insn
->defExists(1))
1000 assert(insn
->def(1).getFile() == FILE_PREDICATE
);
1001 emitPRED(0x30, insn
->def(1));
1004 emitField(0x1e, 2, insn
->subOp
);
1005 emitField(0x1c, 2, type
);
1006 emitGPR (0x08, insn
->src(0));
1007 emitGPR (0x00, insn
->def(0));
1010 /*******************************************************************************
1012 ******************************************************************************/
1015 CodeEmitterGM107::emitDADD()
1017 switch (insn
->src(1).getFile()) {
1019 emitInsn(0x5c700000);
1020 emitGPR (0x14, insn
->src(1));
1022 case FILE_MEMORY_CONST
:
1023 emitInsn(0x4c700000);
1024 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1026 case FILE_IMMEDIATE
:
1027 emitInsn(0x38700000);
1028 emitIMMD(0x14, 19, insn
->src(1));
1031 assert(!"bad src1 file");
1034 emitABS(0x31, insn
->src(1));
1035 emitNEG(0x30, insn
->src(0));
1037 emitABS(0x2e, insn
->src(0));
1038 emitNEG(0x2d, insn
->src(1));
1040 if (insn
->op
== OP_SUB
)
1041 code
[1] ^= 0x00002000;
1043 emitGPR(0x08, insn
->src(0));
1044 emitGPR(0x00, insn
->def(0));
1048 CodeEmitterGM107::emitDMUL()
1050 switch (insn
->src(1).getFile()) {
1052 emitInsn(0x5c800000);
1053 emitGPR (0x14, insn
->src(1));
1055 case FILE_MEMORY_CONST
:
1056 emitInsn(0x4c800000);
1057 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1059 case FILE_IMMEDIATE
:
1060 emitInsn(0x38800000);
1061 emitIMMD(0x14, 19, insn
->src(1));
1064 assert(!"bad src1 file");
1068 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1071 emitGPR (0x08, insn
->src(0));
1072 emitGPR (0x00, insn
->def(0));
1076 CodeEmitterGM107::emitDFMA()
1078 switch(insn
->src(2).getFile()) {
1080 switch (insn
->src(1).getFile()) {
1082 emitInsn(0x5b700000);
1083 emitGPR (0x14, insn
->src(1));
1085 case FILE_MEMORY_CONST
:
1086 emitInsn(0x4b700000);
1087 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1089 case FILE_IMMEDIATE
:
1090 emitInsn(0x36700000);
1091 emitIMMD(0x14, 19, insn
->src(1));
1094 assert(!"bad src1 file");
1097 emitGPR (0x27, insn
->src(2));
1099 case FILE_MEMORY_CONST
:
1100 emitInsn(0x53700000);
1101 emitGPR (0x27, insn
->src(1));
1102 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1105 assert(!"bad src2 file");
1110 emitNEG (0x31, insn
->src(2));
1111 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1113 emitGPR (0x08, insn
->src(0));
1114 emitGPR (0x00, insn
->def(0));
1118 CodeEmitterGM107::emitDMNMX()
1120 switch (insn
->src(1).getFile()) {
1122 emitInsn(0x5c500000);
1123 emitGPR (0x14, insn
->src(1));
1125 case FILE_MEMORY_CONST
:
1126 emitInsn(0x4c500000);
1127 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1129 case FILE_IMMEDIATE
:
1130 emitInsn(0x38500000);
1131 emitIMMD(0x14, 19, insn
->src(1));
1134 assert(!"bad src1 file");
1138 emitABS (0x31, insn
->src(1));
1139 emitNEG (0x30, insn
->src(0));
1141 emitABS (0x2e, insn
->src(0));
1142 emitNEG (0x2d, insn
->src(1));
1143 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1145 emitGPR (0x08, insn
->src(0));
1146 emitGPR (0x00, insn
->def(0));
1150 CodeEmitterGM107::emitDSET()
1152 const CmpInstruction
*insn
= this->insn
->asCmp();
1154 switch (insn
->src(1).getFile()) {
1156 emitInsn(0x59000000);
1157 emitGPR (0x14, insn
->src(1));
1159 case FILE_MEMORY_CONST
:
1160 emitInsn(0x49000000);
1161 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1163 case FILE_IMMEDIATE
:
1164 emitInsn(0x32000000);
1165 emitIMMD(0x14, 19, insn
->src(1));
1168 assert(!"bad src1 file");
1172 if (insn
->op
!= OP_SET
) {
1174 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1175 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1176 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1178 assert(!"invalid set op");
1181 emitPRED(0x27, insn
->src(2));
1186 emitABS (0x36, insn
->src(0));
1187 emitNEG (0x35, insn
->src(1));
1188 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1189 emitCond4(0x30, insn
->setCond
);
1191 emitABS (0x2c, insn
->src(1));
1192 emitNEG (0x2b, insn
->src(0));
1193 emitGPR (0x08, insn
->src(0));
1194 emitGPR (0x00, insn
->def(0));
1198 CodeEmitterGM107::emitDSETP()
1200 const CmpInstruction
*insn
= this->insn
->asCmp();
1202 switch (insn
->src(1).getFile()) {
1204 emitInsn(0x5b800000);
1205 emitGPR (0x14, insn
->src(1));
1207 case FILE_MEMORY_CONST
:
1208 emitInsn(0x4b800000);
1209 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1211 case FILE_IMMEDIATE
:
1212 emitInsn(0x36800000);
1213 emitIMMD(0x14, 19, insn
->src(1));
1216 assert(!"bad src1 file");
1220 if (insn
->op
!= OP_SET
) {
1222 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1223 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1224 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1226 assert(!"invalid set op");
1229 emitPRED(0x27, insn
->src(2));
1234 emitCond4(0x30, insn
->setCond
);
1235 emitABS (0x2c, insn
->src(1));
1236 emitNEG (0x2b, insn
->src(0));
1237 emitGPR (0x08, insn
->src(0));
1238 emitABS (0x07, insn
->src(0));
1239 emitNEG (0x06, insn
->src(1));
1240 emitPRED (0x03, insn
->def(0));
1241 if (insn
->defExists(1))
1242 emitPRED(0x00, insn
->def(1));
1247 /*******************************************************************************
1249 ******************************************************************************/
1252 CodeEmitterGM107::emitFADD()
1254 if (!longIMMD(insn
->src(1))) {
1255 switch (insn
->src(1).getFile()) {
1257 emitInsn(0x5c580000);
1258 emitGPR (0x14, insn
->src(1));
1260 case FILE_MEMORY_CONST
:
1261 emitInsn(0x4c580000);
1262 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1264 case FILE_IMMEDIATE
:
1265 emitInsn(0x38580000);
1266 emitIMMD(0x14, 19, insn
->src(1));
1269 assert(!"bad src1 file");
1273 emitABS(0x31, insn
->src(1));
1274 emitNEG(0x30, insn
->src(0));
1276 emitABS(0x2e, insn
->src(0));
1277 emitNEG(0x2d, insn
->src(1));
1280 if (insn
->op
== OP_SUB
)
1281 code
[1] ^= 0x00002000;
1283 emitInsn(0x08000000);
1284 emitABS(0x39, insn
->src(1));
1285 emitNEG(0x38, insn
->src(0));
1287 emitABS(0x36, insn
->src(0));
1288 emitNEG(0x35, insn
->src(1));
1290 emitIMMD(0x14, 32, insn
->src(1));
1292 if (insn
->op
== OP_SUB
)
1293 code
[1] ^= 0x00080000;
1296 emitGPR(0x08, insn
->src(0));
1297 emitGPR(0x00, insn
->def(0));
1301 CodeEmitterGM107::emitFMUL()
1303 if (!longIMMD(insn
->src(1))) {
1304 switch (insn
->src(1).getFile()) {
1306 emitInsn(0x5c680000);
1307 emitGPR (0x14, insn
->src(1));
1309 case FILE_MEMORY_CONST
:
1310 emitInsn(0x4c680000);
1311 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1313 case FILE_IMMEDIATE
:
1314 emitInsn(0x38680000);
1315 emitIMMD(0x14, 19, insn
->src(1));
1318 assert(!"bad src1 file");
1322 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1328 emitInsn(0x1e000000);
1332 emitIMMD(0x14, 32, insn
->src(1));
1333 if (insn
->src(0).mod
.neg() ^ insn
->src(1).mod
.neg())
1334 code
[1] ^= 0x00080000; /* flip immd sign bit */
1337 emitGPR(0x08, insn
->src(0));
1338 emitGPR(0x00, insn
->def(0));
1342 CodeEmitterGM107::emitFFMA()
1344 bool isLongIMMD
= false;
1345 switch(insn
->src(2).getFile()) {
1347 switch (insn
->src(1).getFile()) {
1349 emitInsn(0x59800000);
1350 emitGPR (0x14, insn
->src(1));
1352 case FILE_MEMORY_CONST
:
1353 emitInsn(0x49800000);
1354 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1356 case FILE_IMMEDIATE
:
1357 if (longIMMD(insn
->getSrc(1))) {
1358 assert(insn
->getDef(0)->reg
.data
.id
== insn
->getSrc(2)->reg
.data
.id
);
1360 emitInsn(0x0c000000);
1361 emitIMMD(0x14, 32, insn
->src(1));
1363 emitInsn(0x32800000);
1364 emitIMMD(0x14, 19, insn
->src(1));
1368 assert(!"bad src1 file");
1372 emitGPR (0x27, insn
->src(2));
1374 case FILE_MEMORY_CONST
:
1375 emitInsn(0x51800000);
1376 emitGPR (0x27, insn
->src(1));
1377 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1380 assert(!"bad src2 file");
1385 emitNEG (0x39, insn
->src(2));
1386 emitNEG2(0x38, insn
->src(0), insn
->src(1));
1392 emitNEG (0x31, insn
->src(2));
1393 emitNEG2(0x30, insn
->src(0), insn
->src(1));
1398 emitGPR(0x08, insn
->src(0));
1399 emitGPR(0x00, insn
->def(0));
1403 CodeEmitterGM107::emitMUFU()
1408 case OP_COS
: mufu
= 0; break;
1409 case OP_SIN
: mufu
= 1; break;
1410 case OP_EX2
: mufu
= 2; break;
1411 case OP_LG2
: mufu
= 3; break;
1412 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
1413 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
1414 case OP_SQRT
: mufu
= 8; break;
1416 assert(!"invalid mufu");
1420 emitInsn (0x50800000);
1422 emitNEG (0x30, insn
->src(0));
1423 emitABS (0x2e, insn
->src(0));
1424 emitField(0x14, 4, mufu
);
1425 emitGPR (0x08, insn
->src(0));
1426 emitGPR (0x00, insn
->def(0));
1430 CodeEmitterGM107::emitFMNMX()
1432 switch (insn
->src(1).getFile()) {
1434 emitInsn(0x5c600000);
1435 emitGPR (0x14, insn
->src(1));
1437 case FILE_MEMORY_CONST
:
1438 emitInsn(0x4c600000);
1439 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1441 case FILE_IMMEDIATE
:
1442 emitInsn(0x38600000);
1443 emitIMMD(0x14, 19, insn
->src(1));
1446 assert(!"bad src1 file");
1450 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1453 emitABS(0x31, insn
->src(1));
1454 emitNEG(0x30, insn
->src(0));
1456 emitABS(0x2e, insn
->src(0));
1457 emitNEG(0x2d, insn
->src(1));
1459 emitGPR(0x08, insn
->src(0));
1460 emitGPR(0x00, insn
->def(0));
1464 CodeEmitterGM107::emitRRO()
1466 switch (insn
->src(0).getFile()) {
1468 emitInsn(0x5c900000);
1469 emitGPR (0x14, insn
->src(0));
1471 case FILE_MEMORY_CONST
:
1472 emitInsn(0x4c900000);
1473 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1475 case FILE_IMMEDIATE
:
1476 emitInsn(0x38900000);
1477 emitIMMD(0x14, 19, insn
->src(0));
1480 assert(!"bad src file");
1484 emitABS (0x31, insn
->src(0));
1485 emitNEG (0x2d, insn
->src(0));
1486 emitField(0x27, 1, insn
->op
== OP_PREEX2
);
1487 emitGPR (0x00, insn
->def(0));
1491 CodeEmitterGM107::emitFCMP()
1493 const CmpInstruction
*insn
= this->insn
->asCmp();
1494 CondCode cc
= insn
->setCond
;
1496 if (insn
->src(2).mod
.neg())
1497 cc
= reverseCondCode(cc
);
1499 switch(insn
->src(2).getFile()) {
1501 switch (insn
->src(1).getFile()) {
1503 emitInsn(0x5ba00000);
1504 emitGPR (0x14, insn
->src(1));
1506 case FILE_MEMORY_CONST
:
1507 emitInsn(0x4ba00000);
1508 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1510 case FILE_IMMEDIATE
:
1511 emitInsn(0x36a00000);
1512 emitIMMD(0x14, 19, insn
->src(1));
1515 assert(!"bad src1 file");
1518 emitGPR (0x27, insn
->src(2));
1520 case FILE_MEMORY_CONST
:
1521 emitInsn(0x53a00000);
1522 emitGPR (0x27, insn
->src(1));
1523 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1526 assert(!"bad src2 file");
1530 emitCond4(0x30, cc
);
1532 emitGPR (0x08, insn
->src(0));
1533 emitGPR (0x00, insn
->def(0));
1537 CodeEmitterGM107::emitFSET()
1539 const CmpInstruction
*insn
= this->insn
->asCmp();
1541 switch (insn
->src(1).getFile()) {
1543 emitInsn(0x58000000);
1544 emitGPR (0x14, insn
->src(1));
1546 case FILE_MEMORY_CONST
:
1547 emitInsn(0x48000000);
1548 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1550 case FILE_IMMEDIATE
:
1551 emitInsn(0x30000000);
1552 emitIMMD(0x14, 19, insn
->src(1));
1555 assert(!"bad src1 file");
1559 if (insn
->op
!= OP_SET
) {
1561 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1562 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1563 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1565 assert(!"invalid set op");
1568 emitPRED(0x27, insn
->src(2));
1574 emitABS (0x36, insn
->src(0));
1575 emitNEG (0x35, insn
->src(1));
1576 emitField(0x34, 1, insn
->dType
== TYPE_F32
);
1577 emitCond4(0x30, insn
->setCond
);
1579 emitABS (0x2c, insn
->src(1));
1580 emitNEG (0x2b, insn
->src(0));
1581 emitGPR (0x08, insn
->src(0));
1582 emitGPR (0x00, insn
->def(0));
1586 CodeEmitterGM107::emitFSETP()
1588 const CmpInstruction
*insn
= this->insn
->asCmp();
1590 switch (insn
->src(1).getFile()) {
1592 emitInsn(0x5bb00000);
1593 emitGPR (0x14, insn
->src(1));
1595 case FILE_MEMORY_CONST
:
1596 emitInsn(0x4bb00000);
1597 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1599 case FILE_IMMEDIATE
:
1600 emitInsn(0x36b00000);
1601 emitIMMD(0x14, 19, insn
->src(1));
1604 assert(!"bad src1 file");
1608 if (insn
->op
!= OP_SET
) {
1610 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
1611 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
1612 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
1614 assert(!"invalid set op");
1617 emitPRED(0x27, insn
->src(2));
1622 emitCond4(0x30, insn
->setCond
);
1624 emitABS (0x2c, insn
->src(1));
1625 emitNEG (0x2b, insn
->src(0));
1626 emitGPR (0x08, insn
->src(0));
1627 emitABS (0x07, insn
->src(0));
1628 emitNEG (0x06, insn
->src(1));
1629 emitPRED (0x03, insn
->def(0));
1630 if (insn
->defExists(1))
1631 emitPRED(0x00, insn
->def(1));
1637 CodeEmitterGM107::emitFSWZADD()
1639 emitInsn (0x50f80000);
1643 emitField(0x26, 1, insn
->lanes
); /* abused for .ndv */
1644 emitField(0x1c, 8, insn
->subOp
);
1645 if (insn
->predSrc
!= 1)
1646 emitGPR (0x14, insn
->src(1));
1649 emitGPR (0x08, insn
->src(0));
1650 emitGPR (0x00, insn
->def(0));
1653 /*******************************************************************************
1655 ******************************************************************************/
1658 CodeEmitterGM107::emitLOP()
1663 case OP_AND
: lop
= 0; break;
1664 case OP_OR
: lop
= 1; break;
1665 case OP_XOR
: lop
= 2; break;
1667 assert(!"invalid lop");
1671 if (!longIMMD(insn
->src(1))) {
1672 switch (insn
->src(1).getFile()) {
1674 emitInsn(0x5c400000);
1675 emitGPR (0x14, insn
->src(1));
1677 case FILE_MEMORY_CONST
:
1678 emitInsn(0x4c400000);
1679 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1681 case FILE_IMMEDIATE
:
1682 emitInsn(0x38400000);
1683 emitIMMD(0x14, 19, insn
->src(1));
1686 assert(!"bad src1 file");
1692 emitField(0x29, 2, lop
);
1693 emitINV (0x28, insn
->src(1));
1694 emitINV (0x27, insn
->src(0));
1696 emitInsn (0x04000000);
1698 emitINV (0x38, insn
->src(1));
1699 emitINV (0x37, insn
->src(0));
1700 emitField(0x35, 2, lop
);
1702 emitIMMD (0x14, 32, insn
->src(1));
1705 emitGPR (0x08, insn
->src(0));
1706 emitGPR (0x00, insn
->def(0));
1709 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1711 CodeEmitterGM107::emitNOT()
1713 if (!longIMMD(insn
->src(0))) {
1714 switch (insn
->src(0).getFile()) {
1716 emitInsn(0x5c400700);
1717 emitGPR (0x14, insn
->src(0));
1719 case FILE_MEMORY_CONST
:
1720 emitInsn(0x4c400700);
1721 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
1723 case FILE_IMMEDIATE
:
1724 emitInsn(0x38400700);
1725 emitIMMD(0x14, 19, insn
->src(0));
1728 assert(!"bad src1 file");
1733 emitInsn (0x05600000);
1734 emitIMMD (0x14, 32, insn
->src(1));
1738 emitGPR(0x00, insn
->def(0));
1742 CodeEmitterGM107::emitIADD()
1744 if (!longIMMD(insn
->src(1))) {
1745 switch (insn
->src(1).getFile()) {
1747 emitInsn(0x5c100000);
1748 emitGPR (0x14, insn
->src(1));
1750 case FILE_MEMORY_CONST
:
1751 emitInsn(0x4c100000);
1752 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1754 case FILE_IMMEDIATE
:
1755 emitInsn(0x38100000);
1756 emitIMMD(0x14, 19, insn
->src(1));
1759 assert(!"bad src1 file");
1763 emitNEG(0x31, insn
->src(0));
1764 emitNEG(0x30, insn
->src(1));
1768 emitInsn(0x1c000000);
1769 emitNEG (0x38, insn
->src(0));
1773 emitIMMD(0x14, 32, insn
->src(1));
1776 if (insn
->op
== OP_SUB
)
1777 code
[1] ^= 0x00010000;
1779 emitGPR(0x08, insn
->src(0));
1780 emitGPR(0x00, insn
->def(0));
1784 CodeEmitterGM107::emitIMUL()
1786 if (!longIMMD(insn
->src(1))) {
1787 switch (insn
->src(1).getFile()) {
1789 emitInsn(0x5c380000);
1790 emitGPR (0x14, insn
->src(1));
1792 case FILE_MEMORY_CONST
:
1793 emitInsn(0x4c380000);
1794 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1796 case FILE_IMMEDIATE
:
1797 emitInsn(0x38380000);
1798 emitIMMD(0x14, 19, insn
->src(1));
1801 assert(!"bad src1 file");
1805 emitField(0x29, 1, isSignedType(insn
->sType
));
1806 emitField(0x28, 1, isSignedType(insn
->dType
));
1807 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1809 emitInsn (0x1f000000);
1810 emitField(0x37, 1, isSignedType(insn
->sType
));
1811 emitField(0x36, 1, isSignedType(insn
->dType
));
1812 emitField(0x35, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1814 emitIMMD (0x14, 32, insn
->src(1));
1817 emitGPR(0x08, insn
->src(0));
1818 emitGPR(0x00, insn
->def(0));
1822 CodeEmitterGM107::emitIMAD()
1824 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1825 switch(insn
->src(2).getFile()) {
1827 switch (insn
->src(1).getFile()) {
1829 emitInsn(0x5a000000);
1830 emitGPR (0x14, insn
->src(1));
1832 case FILE_MEMORY_CONST
:
1833 emitInsn(0x4a000000);
1834 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1836 case FILE_IMMEDIATE
:
1837 emitInsn(0x34000000);
1838 emitIMMD(0x14, 19, insn
->src(1));
1841 assert(!"bad src1 file");
1844 emitGPR (0x27, insn
->src(2));
1846 case FILE_MEMORY_CONST
:
1847 emitInsn(0x52000000);
1848 emitGPR (0x27, insn
->src(1));
1849 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1852 assert(!"bad src2 file");
1856 emitField(0x36, 1, insn
->subOp
== NV50_IR_SUBOP_MUL_HIGH
);
1857 emitField(0x35, 1, isSignedType(insn
->sType
));
1858 emitNEG (0x34, insn
->src(2));
1859 emitNEG2 (0x33, insn
->src(0), insn
->src(1));
1862 emitField(0x30, 1, isSignedType(insn
->dType
));
1864 emitGPR (0x08, insn
->src(0));
1865 emitGPR (0x00, insn
->def(0));
1869 CodeEmitterGM107::emitISCADD()
1871 assert(insn
->src(1).get()->asImm());
1873 switch (insn
->src(2).getFile()) {
1875 emitInsn(0x5c180000);
1876 emitGPR (0x14, insn
->src(2));
1878 case FILE_MEMORY_CONST
:
1879 emitInsn(0x4c180000);
1880 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1882 case FILE_IMMEDIATE
:
1883 emitInsn(0x38180000);
1884 emitIMMD(0x14, 19, insn
->src(2));
1887 assert(!"bad src1 file");
1890 emitNEG (0x31, insn
->src(0));
1891 emitNEG (0x30, insn
->src(2));
1893 emitIMMD(0x27, 5, insn
->src(1));
1894 emitGPR (0x08, insn
->src(0));
1895 emitGPR (0x00, insn
->def(0));
1899 CodeEmitterGM107::emitXMAD()
1901 assert(insn
->src(0).getFile() == FILE_GPR
);
1903 bool constbuf
= false;
1904 bool psl_mrg
= true;
1905 bool immediate
= false;
1906 if (insn
->src(2).getFile() == FILE_MEMORY_CONST
) {
1907 assert(insn
->src(1).getFile() == FILE_GPR
);
1910 emitInsn(0x51000000);
1911 emitGPR(0x27, insn
->src(1));
1912 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
1913 } else if (insn
->src(1).getFile() == FILE_MEMORY_CONST
) {
1914 assert(insn
->src(2).getFile() == FILE_GPR
);
1916 emitInsn(0x4e000000);
1917 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1918 emitGPR(0x27, insn
->src(2));
1919 } else if (insn
->src(1).getFile() == FILE_IMMEDIATE
) {
1920 assert(insn
->src(2).getFile() == FILE_GPR
);
1921 assert(!(insn
->subOp
& NV50_IR_SUBOP_XMAD_H1(1)));
1923 emitInsn(0x36000000);
1924 emitIMMD(0x14, 16, insn
->src(1));
1925 emitGPR(0x27, insn
->src(2));
1927 assert(insn
->src(1).getFile() == FILE_GPR
);
1928 assert(insn
->src(2).getFile() == FILE_GPR
);
1929 emitInsn(0x5b000000);
1930 emitGPR(0x14, insn
->src(1));
1931 emitGPR(0x27, insn
->src(2));
1935 emitField(constbuf
? 0x37 : 0x24, 2, insn
->subOp
& 0x3);
1937 unsigned cmode
= (insn
->subOp
& NV50_IR_SUBOP_XMAD_CMODE_MASK
);
1938 cmode
>>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT
;
1939 emitField(0x32, constbuf
? 2 : 3, cmode
);
1941 emitX(constbuf
? 0x36 : 0x26);
1944 emitGPR(0x0, insn
->def(0));
1945 emitGPR(0x8, insn
->src(0));
1948 if (isSignedType(insn
->sType
)) {
1949 uint16_t h1s
= insn
->subOp
& NV50_IR_SUBOP_XMAD_H1_MASK
;
1950 emitField(0x30, 2, h1s
>> NV50_IR_SUBOP_XMAD_H1_SHIFT
);
1952 emitField(0x35, 1, insn
->subOp
& NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1954 bool h1
= insn
->subOp
& NV50_IR_SUBOP_XMAD_H1(1);
1955 emitField(constbuf
? 0x34 : 0x23, 1, h1
);
1960 CodeEmitterGM107::emitIMNMX()
1962 switch (insn
->src(1).getFile()) {
1964 emitInsn(0x5c200000);
1965 emitGPR (0x14, insn
->src(1));
1967 case FILE_MEMORY_CONST
:
1968 emitInsn(0x4c200000);
1969 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
1971 case FILE_IMMEDIATE
:
1972 emitInsn(0x38200000);
1973 emitIMMD(0x14, 19, insn
->src(1));
1976 assert(!"bad src1 file");
1980 emitField(0x30, 1, isSignedType(insn
->dType
));
1982 emitField(0x2b, 2, insn
->subOp
);
1983 emitField(0x2a, 1, insn
->op
== OP_MAX
);
1985 emitGPR (0x08, insn
->src(0));
1986 emitGPR (0x00, insn
->def(0));
1990 CodeEmitterGM107::emitICMP()
1992 const CmpInstruction
*insn
= this->insn
->asCmp();
1993 CondCode cc
= insn
->setCond
;
1995 if (insn
->src(2).mod
.neg())
1996 cc
= reverseCondCode(cc
);
1998 switch(insn
->src(2).getFile()) {
2000 switch (insn
->src(1).getFile()) {
2002 emitInsn(0x5b400000);
2003 emitGPR (0x14, insn
->src(1));
2005 case FILE_MEMORY_CONST
:
2006 emitInsn(0x4b400000);
2007 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2009 case FILE_IMMEDIATE
:
2010 emitInsn(0x36400000);
2011 emitIMMD(0x14, 19, insn
->src(1));
2014 assert(!"bad src1 file");
2017 emitGPR (0x27, insn
->src(2));
2019 case FILE_MEMORY_CONST
:
2020 emitInsn(0x53400000);
2021 emitGPR (0x27, insn
->src(1));
2022 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2025 assert(!"bad src2 file");
2029 emitCond3(0x31, cc
);
2030 emitField(0x30, 1, isSignedType(insn
->sType
));
2031 emitGPR (0x08, insn
->src(0));
2032 emitGPR (0x00, insn
->def(0));
2036 CodeEmitterGM107::emitISET()
2038 const CmpInstruction
*insn
= this->insn
->asCmp();
2040 switch (insn
->src(1).getFile()) {
2042 emitInsn(0x5b500000);
2043 emitGPR (0x14, insn
->src(1));
2045 case FILE_MEMORY_CONST
:
2046 emitInsn(0x4b500000);
2047 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2049 case FILE_IMMEDIATE
:
2050 emitInsn(0x36500000);
2051 emitIMMD(0x14, 19, insn
->src(1));
2054 assert(!"bad src1 file");
2058 if (insn
->op
!= OP_SET
) {
2060 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2061 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2062 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2064 assert(!"invalid set op");
2067 emitPRED(0x27, insn
->src(2));
2072 emitCond3(0x31, insn
->setCond
);
2073 emitField(0x30, 1, isSignedType(insn
->sType
));
2075 emitField(0x2c, 1, insn
->dType
== TYPE_F32
);
2077 emitGPR (0x08, insn
->src(0));
2078 emitGPR (0x00, insn
->def(0));
2082 CodeEmitterGM107::emitISETP()
2084 const CmpInstruction
*insn
= this->insn
->asCmp();
2086 switch (insn
->src(1).getFile()) {
2088 emitInsn(0x5b600000);
2089 emitGPR (0x14, insn
->src(1));
2091 case FILE_MEMORY_CONST
:
2092 emitInsn(0x4b600000);
2093 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2095 case FILE_IMMEDIATE
:
2096 emitInsn(0x36600000);
2097 emitIMMD(0x14, 19, insn
->src(1));
2100 assert(!"bad src1 file");
2104 if (insn
->op
!= OP_SET
) {
2106 case OP_SET_AND
: emitField(0x2d, 2, 0); break;
2107 case OP_SET_OR
: emitField(0x2d, 2, 1); break;
2108 case OP_SET_XOR
: emitField(0x2d, 2, 2); break;
2110 assert(!"invalid set op");
2113 emitPRED(0x27, insn
->src(2));
2118 emitCond3(0x31, insn
->setCond
);
2119 emitField(0x30, 1, isSignedType(insn
->sType
));
2121 emitGPR (0x08, insn
->src(0));
2122 emitPRED (0x03, insn
->def(0));
2123 if (insn
->defExists(1))
2124 emitPRED(0x00, insn
->def(1));
2130 CodeEmitterGM107::emitSHL()
2132 switch (insn
->src(1).getFile()) {
2134 emitInsn(0x5c480000);
2135 emitGPR (0x14, insn
->src(1));
2137 case FILE_MEMORY_CONST
:
2138 emitInsn(0x4c480000);
2139 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2141 case FILE_IMMEDIATE
:
2142 emitInsn(0x38480000);
2143 emitIMMD(0x14, 19, insn
->src(1));
2146 assert(!"bad src1 file");
2152 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2153 emitGPR (0x08, insn
->src(0));
2154 emitGPR (0x00, insn
->def(0));
2158 CodeEmitterGM107::emitSHR()
2160 switch (insn
->src(1).getFile()) {
2162 emitInsn(0x5c280000);
2163 emitGPR (0x14, insn
->src(1));
2165 case FILE_MEMORY_CONST
:
2166 emitInsn(0x4c280000);
2167 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2169 case FILE_IMMEDIATE
:
2170 emitInsn(0x38280000);
2171 emitIMMD(0x14, 19, insn
->src(1));
2174 assert(!"bad src1 file");
2178 emitField(0x30, 1, isSignedType(insn
->dType
));
2181 emitField(0x27, 1, insn
->subOp
== NV50_IR_SUBOP_SHIFT_WRAP
);
2182 emitGPR (0x08, insn
->src(0));
2183 emitGPR (0x00, insn
->def(0));
2187 CodeEmitterGM107::emitSHF()
2191 switch (insn
->src(1).getFile()) {
2193 emitInsn(insn
->op
== OP_SHL
? 0x5bf80000 : 0x5cf80000);
2194 emitGPR(0x14, insn
->src(1));
2196 case FILE_IMMEDIATE
:
2197 emitInsn(insn
->op
== OP_SHL
? 0x36f80000 : 0x38f80000);
2198 emitIMMD(0x14, 19, insn
->src(1));
2201 assert(!"bad src1 file");
2205 switch (insn
->sType
) {
2217 emitField(0x32, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_WRAP
));
2219 emitField(0x30, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHIFT_HIGH
));
2221 emitGPR (0x27, insn
->src(2));
2222 emitField(0x25, 2, type
);
2223 emitGPR (0x08, insn
->src(0));
2224 emitGPR (0x00, insn
->def(0));
2228 CodeEmitterGM107::emitPOPC()
2230 switch (insn
->src(0).getFile()) {
2232 emitInsn(0x5c080000);
2233 emitGPR (0x14, insn
->src(0));
2235 case FILE_MEMORY_CONST
:
2236 emitInsn(0x4c080000);
2237 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2239 case FILE_IMMEDIATE
:
2240 emitInsn(0x38080000);
2241 emitIMMD(0x14, 19, insn
->src(0));
2244 assert(!"bad src1 file");
2248 emitINV(0x28, insn
->src(0));
2249 emitGPR(0x00, insn
->def(0));
2253 CodeEmitterGM107::emitBFI()
2255 switch(insn
->src(2).getFile()) {
2257 switch (insn
->src(1).getFile()) {
2259 emitInsn(0x5bf00000);
2260 emitGPR (0x14, insn
->src(1));
2262 case FILE_MEMORY_CONST
:
2263 emitInsn(0x4bf00000);
2264 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2266 case FILE_IMMEDIATE
:
2267 emitInsn(0x36f00000);
2268 emitIMMD(0x14, 19, insn
->src(1));
2271 assert(!"bad src1 file");
2274 emitGPR (0x27, insn
->src(2));
2276 case FILE_MEMORY_CONST
:
2277 emitInsn(0x53f00000);
2278 emitGPR (0x27, insn
->src(1));
2279 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(2));
2282 assert(!"bad src2 file");
2287 emitGPR (0x08, insn
->src(0));
2288 emitGPR (0x00, insn
->def(0));
2292 CodeEmitterGM107::emitBFE()
2294 switch (insn
->src(1).getFile()) {
2296 emitInsn(0x5c000000);
2297 emitGPR (0x14, insn
->src(1));
2299 case FILE_MEMORY_CONST
:
2300 emitInsn(0x4c000000);
2301 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
2303 case FILE_IMMEDIATE
:
2304 emitInsn(0x38000000);
2305 emitIMMD(0x14, 19, insn
->src(1));
2308 assert(!"bad src1 file");
2312 emitField(0x30, 1, isSignedType(insn
->dType
));
2314 emitField(0x28, 1, insn
->subOp
== NV50_IR_SUBOP_EXTBF_REV
);
2315 emitGPR (0x08, insn
->src(0));
2316 emitGPR (0x00, insn
->def(0));
2320 CodeEmitterGM107::emitFLO()
2322 switch (insn
->src(0).getFile()) {
2324 emitInsn(0x5c300000);
2325 emitGPR (0x14, insn
->src(0));
2327 case FILE_MEMORY_CONST
:
2328 emitInsn(0x4c300000);
2329 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(0));
2331 case FILE_IMMEDIATE
:
2332 emitInsn(0x38300000);
2333 emitIMMD(0x14, 19, insn
->src(0));
2336 assert(!"bad src1 file");
2340 emitField(0x30, 1, isSignedType(insn
->dType
));
2342 emitField(0x29, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
2343 emitINV (0x28, insn
->src(0));
2344 emitGPR (0x00, insn
->def(0));
2347 /*******************************************************************************
2349 ******************************************************************************/
2352 CodeEmitterGM107::emitLDSTs(int pos
, DataType type
)
2356 switch (typeSizeof(type
)) {
2357 case 1: data
= isSignedType(type
) ? 1 : 0; break;
2358 case 2: data
= isSignedType(type
) ? 3 : 2; break;
2359 case 4: data
= 4; break;
2360 case 8: data
= 5; break;
2361 case 16: data
= 6; break;
2363 assert(!"bad type");
2367 emitField(pos
, 3, data
);
2371 CodeEmitterGM107::emitLDSTc(int pos
)
2375 switch (insn
->cache
) {
2376 case CACHE_CA
: mode
= 0; break;
2377 case CACHE_CG
: mode
= 1; break;
2378 case CACHE_CS
: mode
= 2; break;
2379 case CACHE_CV
: mode
= 3; break;
2381 assert(!"invalid caching mode");
2385 emitField(pos
, 2, mode
);
2389 CodeEmitterGM107::emitLDC()
2391 emitInsn (0xef900000);
2392 emitLDSTs(0x30, insn
->dType
);
2393 emitField(0x2c, 2, insn
->subOp
);
2394 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn
->src(0));
2395 emitGPR (0x00, insn
->def(0));
2399 CodeEmitterGM107::emitLDL()
2401 emitInsn (0xef400000);
2402 emitLDSTs(0x30, insn
->dType
);
2404 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2405 emitGPR (0x00, insn
->def(0));
2409 CodeEmitterGM107::emitLDS()
2411 emitInsn (0xef480000);
2412 emitLDSTs(0x30, insn
->dType
);
2413 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2414 emitGPR (0x00, insn
->def(0));
2418 CodeEmitterGM107::emitLD()
2420 emitInsn (0x80000000);
2423 emitLDSTs(0x35, insn
->dType
);
2424 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2425 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2426 emitGPR (0x00, insn
->def(0));
2430 CodeEmitterGM107::emitSTL()
2432 emitInsn (0xef500000);
2433 emitLDSTs(0x30, insn
->dType
);
2435 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2436 emitGPR (0x00, insn
->src(1));
2440 CodeEmitterGM107::emitSTS()
2442 emitInsn (0xef580000);
2443 emitLDSTs(0x30, insn
->dType
);
2444 emitADDR (0x08, 0x14, 24, 0, insn
->src(0));
2445 emitGPR (0x00, insn
->src(1));
2449 CodeEmitterGM107::emitST()
2451 emitInsn (0xa0000000);
2454 emitLDSTs(0x35, insn
->dType
);
2455 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2456 emitADDR (0x08, 0x14, 32, 0, insn
->src(0));
2457 emitGPR (0x00, insn
->src(1));
2461 CodeEmitterGM107::emitALD()
2463 emitInsn (0xefd80000);
2464 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2465 emitGPR (0x27, insn
->src(0).getIndirect(1));
2468 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2469 emitGPR (0x00, insn
->def(0));
2473 CodeEmitterGM107::emitAST()
2475 emitInsn (0xeff00000);
2476 emitField(0x2f, 2, (typeSizeof(insn
->dType
) / 4) - 1);
2477 emitGPR (0x27, insn
->src(0).getIndirect(1));
2479 emitADDR (0x08, 20, 10, 0, insn
->src(0));
2480 emitGPR (0x00, insn
->src(1));
2484 CodeEmitterGM107::emitISBERD()
2486 emitInsn(0xefd00000);
2487 emitGPR (0x08, insn
->src(0));
2488 emitGPR (0x00, insn
->def(0));
2492 CodeEmitterGM107::emitAL2P()
2494 emitInsn (0xefa00000);
2495 emitField(0x2f, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
2498 emitField(0x14, 11, insn
->src(0).get()->reg
.data
.offset
);
2499 emitGPR (0x08, insn
->src(0).getIndirect(0));
2500 emitGPR (0x00, insn
->def(0));
2504 interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
2506 int ipa
= entry
->ipa
;
2507 int reg
= entry
->reg
;
2508 int loc
= entry
->loc
;
2510 if (data
.flatshade
&&
2511 (ipa
& NV50_IR_INTERP_MODE_MASK
) == NV50_IR_INTERP_SC
) {
2512 ipa
= NV50_IR_INTERP_FLAT
;
2514 } else if (data
.force_persample_interp
&&
2515 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
2516 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
2517 ipa
|= NV50_IR_INTERP_CENTROID
;
2519 code
[loc
+ 1] &= ~(0xf << 0x14);
2520 code
[loc
+ 1] |= (ipa
& 0x3) << 0x16;
2521 code
[loc
+ 1] |= (ipa
& 0xc) << (0x14 - 2);
2522 code
[loc
+ 0] &= ~(0xff << 0x14);
2523 code
[loc
+ 0] |= reg
<< 0x14;
2527 CodeEmitterGM107::emitIPA()
2529 int ipam
= 0, ipas
= 0;
2531 switch (insn
->getInterpMode()) {
2532 case NV50_IR_INTERP_LINEAR
: ipam
= 0; break;
2533 case NV50_IR_INTERP_PERSPECTIVE
: ipam
= 1; break;
2534 case NV50_IR_INTERP_FLAT
: ipam
= 2; break;
2535 case NV50_IR_INTERP_SC
: ipam
= 3; break;
2537 assert(!"invalid ipa mode");
2541 switch (insn
->getSampleMode()) {
2542 case NV50_IR_INTERP_DEFAULT
: ipas
= 0; break;
2543 case NV50_IR_INTERP_CENTROID
: ipas
= 1; break;
2544 case NV50_IR_INTERP_OFFSET
: ipas
= 2; break;
2546 assert(!"invalid ipa sample mode");
2550 emitInsn (0xe0000000);
2551 emitField(0x36, 2, ipam
);
2552 emitField(0x34, 2, ipas
);
2554 emitField(0x2f, 3, 7);
2555 emitADDR (0x08, 0x1c, 10, 0, insn
->src(0));
2556 if ((code
[0] & 0x0000ff00) != 0x0000ff00)
2557 code
[1] |= 0x00000040; /* .idx */
2558 emitGPR(0x00, insn
->def(0));
2560 if (insn
->op
== OP_PINTERP
) {
2561 emitGPR(0x14, insn
->src(1));
2562 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2563 emitGPR(0x27, insn
->src(2));
2564 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, interpApply
);
2566 if (insn
->getSampleMode() == NV50_IR_INTERP_OFFSET
)
2567 emitGPR(0x27, insn
->src(1));
2569 addInterp(insn
->ipa
, 0xff, interpApply
);
2572 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
)
2577 CodeEmitterGM107::emitATOM()
2579 unsigned dType
, subOp
;
2581 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2582 switch (insn
->dType
) {
2583 case TYPE_U32
: dType
= 0; break;
2584 case TYPE_U64
: dType
= 1; break;
2585 default: assert(!"unexpected dType"); dType
= 0; break;
2589 emitInsn (0xee000000);
2591 switch (insn
->dType
) {
2592 case TYPE_U32
: dType
= 0; break;
2593 case TYPE_S32
: dType
= 1; break;
2594 case TYPE_U64
: dType
= 2; break;
2595 case TYPE_F32
: dType
= 3; break;
2596 case TYPE_B128
: dType
= 4; break;
2597 case TYPE_S64
: dType
= 5; break;
2598 default: assert(!"unexpected dType"); dType
= 0; break;
2600 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2603 subOp
= insn
->subOp
;
2605 emitInsn (0xed000000);
2608 emitField(0x34, 4, subOp
);
2609 emitField(0x31, 3, dType
);
2610 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2611 emitGPR (0x14, insn
->src(1));
2612 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2613 emitGPR (0x00, insn
->def(0));
2617 CodeEmitterGM107::emitATOMS()
2619 unsigned dType
, subOp
;
2621 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
2622 switch (insn
->dType
) {
2623 case TYPE_U32
: dType
= 0; break;
2624 case TYPE_U64
: dType
= 1; break;
2625 default: assert(!"unexpected dType"); dType
= 0; break;
2629 emitInsn (0xee000000);
2630 emitField(0x34, 1, dType
);
2632 switch (insn
->dType
) {
2633 case TYPE_U32
: dType
= 0; break;
2634 case TYPE_S32
: dType
= 1; break;
2635 case TYPE_U64
: dType
= 2; break;
2636 case TYPE_S64
: dType
= 3; break;
2637 default: assert(!"unexpected dType"); dType
= 0; break;
2640 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
2643 subOp
= insn
->subOp
;
2645 emitInsn (0xec000000);
2646 emitField(0x1c, 3, dType
);
2649 emitField(0x34, 4, subOp
);
2650 emitGPR (0x14, insn
->src(1));
2651 emitADDR (0x08, 0x1e, 22, 2, insn
->src(0));
2652 emitGPR (0x00, insn
->def(0));
2656 CodeEmitterGM107::emitRED()
2660 switch (insn
->dType
) {
2661 case TYPE_U32
: dType
= 0; break;
2662 case TYPE_S32
: dType
= 1; break;
2663 case TYPE_U64
: dType
= 2; break;
2664 case TYPE_F32
: dType
= 3; break;
2665 case TYPE_B128
: dType
= 4; break;
2666 case TYPE_S64
: dType
= 5; break;
2667 default: assert(!"unexpected dType"); dType
= 0; break;
2670 emitInsn (0xebf80000);
2671 emitField(0x30, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2672 emitField(0x17, 3, insn
->subOp
);
2673 emitField(0x14, 3, dType
);
2674 emitADDR (0x08, 0x1c, 20, 0, insn
->src(0));
2675 emitGPR (0x00, insn
->src(1));
2679 CodeEmitterGM107::emitCCTL()
2682 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
2683 emitInsn(0xef600000);
2686 emitInsn(0xef800000);
2689 emitField(0x34, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
2690 emitADDR (0x08, 0x16, width
, 2, insn
->src(0));
2691 emitField(0x00, 4, insn
->subOp
);
2694 /*******************************************************************************
2696 ******************************************************************************/
2699 CodeEmitterGM107::emitPIXLD()
2701 emitInsn (0xefe80000);
2703 emitField(0x1f, 3, insn
->subOp
);
2704 emitGPR (0x08, insn
->src(0));
2705 emitGPR (0x00, insn
->def(0));
2708 /*******************************************************************************
2710 ******************************************************************************/
2713 CodeEmitterGM107::emitTEXs(int pos
)
2715 int src1
= insn
->predSrc
== 1 ? 2 : 1;
2716 if (insn
->srcExists(src1
))
2717 emitGPR(pos
, insn
->src(src1
));
2723 getTEXSMask(uint8_t mask
)
2726 case 0x1: return 0x0;
2727 case 0x2: return 0x1;
2728 case 0x3: return 0x4;
2729 case 0x4: return 0x2;
2730 case 0x7: return 0x0;
2731 case 0x8: return 0x3;
2732 case 0x9: return 0x5;
2733 case 0xa: return 0x6;
2734 case 0xb: return 0x1;
2735 case 0xc: return 0x7;
2736 case 0xd: return 0x2;
2737 case 0xe: return 0x3;
2738 case 0xf: return 0x4;
2740 assert(!"invalid mask");
2746 getTEXSTarget(const TexInstruction
*tex
)
2748 assert(tex
->op
== OP_TEX
|| tex
->op
== OP_TXL
);
2750 switch (tex
->tex
.target
.getEnum()) {
2752 assert(tex
->tex
.levelZero
);
2755 case TEX_TARGET_RECT
:
2756 if (tex
->tex
.levelZero
)
2758 if (tex
->op
== OP_TXL
)
2761 case TEX_TARGET_2D_SHADOW
:
2762 case TEX_TARGET_RECT_SHADOW
:
2763 if (tex
->tex
.levelZero
)
2765 if (tex
->op
== OP_TXL
)
2768 case TEX_TARGET_2D_ARRAY
:
2769 if (tex
->tex
.levelZero
)
2772 case TEX_TARGET_2D_ARRAY_SHADOW
:
2773 assert(tex
->tex
.levelZero
);
2776 if (tex
->tex
.levelZero
)
2778 assert(tex
->op
!= OP_TXL
);
2780 case TEX_TARGET_CUBE
:
2781 assert(!tex
->tex
.levelZero
);
2782 if (tex
->op
== OP_TXL
)
2792 getTLDSTarget(const TexInstruction
*tex
)
2794 switch (tex
->tex
.target
.getEnum()) {
2796 if (tex
->tex
.levelZero
)
2800 case TEX_TARGET_RECT
:
2801 if (tex
->tex
.levelZero
)
2802 return tex
->tex
.useOffsets
? 0x4 : 0x2;
2803 return tex
->tex
.useOffsets
? 0xc : 0x5;
2804 case TEX_TARGET_2D_MS
:
2805 assert(tex
->tex
.levelZero
);
2808 assert(tex
->tex
.levelZero
);
2810 case TEX_TARGET_2D_ARRAY
:
2811 assert(tex
->tex
.levelZero
);
2821 CodeEmitterGM107::emitTEX()
2823 const TexInstruction
*insn
= this->insn
->asTex();
2826 if (!insn
->tex
.levelZero
) {
2828 case OP_TEX
: lodm
= 0; break;
2829 case OP_TXB
: lodm
= 2; break;
2830 case OP_TXL
: lodm
= 3; break;
2832 assert(!"invalid tex op");
2839 if (insn
->tex
.rIndirectSrc
>= 0) {
2840 emitInsn (0xdeb80000);
2841 emitField(0x25, 2, lodm
);
2842 emitField(0x24, 1, insn
->tex
.useOffsets
== 1);
2844 emitInsn (0xc0380000);
2845 emitField(0x37, 2, lodm
);
2846 emitField(0x36, 1, insn
->tex
.useOffsets
== 1);
2847 emitField(0x24, 13, insn
->tex
.r
);
2850 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2851 emitField(0x31, 1, insn
->tex
.liveOnly
);
2852 emitField(0x23, 1, insn
->tex
.derivAll
);
2853 emitField(0x1f, 4, insn
->tex
.mask
);
2854 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2855 insn
->tex
.target
.getDim() - 1);
2856 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2858 emitGPR (0x08, insn
->src(0));
2859 emitGPR (0x00, insn
->def(0));
2863 CodeEmitterGM107::emitTEXS()
2865 const TexInstruction
*insn
= this->insn
->asTex();
2870 emitInsn (0xd8000000);
2871 emitField(0x35, 4, getTEXSTarget(insn
));
2872 emitField(0x32, 3, getTEXSMask(insn
->tex
.mask
));
2875 emitInsn (0xda000000);
2876 emitField(0x35, 4, getTLDSTarget(insn
));
2877 emitField(0x32, 3, getTEXSMask(insn
->tex
.mask
));
2880 assert(insn
->tex
.useOffsets
!= 4);
2881 emitInsn (0xdf000000);
2882 emitField(0x34, 2, insn
->tex
.gatherComp
);
2883 emitField(0x33, 1, insn
->tex
.useOffsets
== 1);
2884 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2887 unreachable("unknown op in emitTEXS()");
2891 emitField(0x31, 1, insn
->tex
.liveOnly
);
2892 emitField(0x24, 13, insn
->tex
.r
);
2893 if (insn
->defExists(1))
2894 emitGPR(0x1c, insn
->def(1));
2897 if (insn
->srcExists(1))
2898 emitGPR(0x14, insn
->getSrc(1));
2901 emitGPR (0x08, insn
->src(0));
2902 emitGPR (0x00, insn
->def(0));
2906 CodeEmitterGM107::emitTLD()
2908 const TexInstruction
*insn
= this->insn
->asTex();
2910 if (insn
->tex
.rIndirectSrc
>= 0) {
2911 emitInsn (0xdd380000);
2913 emitInsn (0xdc380000);
2914 emitField(0x24, 13, insn
->tex
.r
);
2917 emitField(0x37, 1, insn
->tex
.levelZero
== 0);
2918 emitField(0x32, 1, insn
->tex
.target
.isMS());
2919 emitField(0x31, 1, insn
->tex
.liveOnly
);
2920 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2921 emitField(0x1f, 4, insn
->tex
.mask
);
2922 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2923 insn
->tex
.target
.getDim() - 1);
2924 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2926 emitGPR (0x08, insn
->src(0));
2927 emitGPR (0x00, insn
->def(0));
2931 CodeEmitterGM107::emitTLD4()
2933 const TexInstruction
*insn
= this->insn
->asTex();
2935 if (insn
->tex
.rIndirectSrc
>= 0) {
2936 emitInsn (0xdef80000);
2937 emitField(0x26, 2, insn
->tex
.gatherComp
);
2938 emitField(0x25, 2, insn
->tex
.useOffsets
== 4);
2939 emitField(0x24, 2, insn
->tex
.useOffsets
== 1);
2941 emitInsn (0xc8380000);
2942 emitField(0x38, 2, insn
->tex
.gatherComp
);
2943 emitField(0x37, 2, insn
->tex
.useOffsets
== 4);
2944 emitField(0x36, 2, insn
->tex
.useOffsets
== 1);
2945 emitField(0x24, 13, insn
->tex
.r
);
2948 emitField(0x32, 1, insn
->tex
.target
.isShadow());
2949 emitField(0x31, 1, insn
->tex
.liveOnly
);
2950 emitField(0x23, 1, insn
->tex
.derivAll
);
2951 emitField(0x1f, 4, insn
->tex
.mask
);
2952 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2953 insn
->tex
.target
.getDim() - 1);
2954 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2956 emitGPR (0x08, insn
->src(0));
2957 emitGPR (0x00, insn
->def(0));
2961 CodeEmitterGM107::emitTXD()
2963 const TexInstruction
*insn
= this->insn
->asTex();
2965 if (insn
->tex
.rIndirectSrc
>= 0) {
2966 emitInsn (0xde780000);
2968 emitInsn (0xde380000);
2969 emitField(0x24, 13, insn
->tex
.r
);
2972 emitField(0x31, 1, insn
->tex
.liveOnly
);
2973 emitField(0x23, 1, insn
->tex
.useOffsets
== 1);
2974 emitField(0x1f, 4, insn
->tex
.mask
);
2975 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2976 insn
->tex
.target
.getDim() - 1);
2977 emitField(0x1c, 1, insn
->tex
.target
.isArray());
2979 emitGPR (0x08, insn
->src(0));
2980 emitGPR (0x00, insn
->def(0));
2984 CodeEmitterGM107::emitTMML()
2986 const TexInstruction
*insn
= this->insn
->asTex();
2988 if (insn
->tex
.rIndirectSrc
>= 0) {
2989 emitInsn (0xdf600000);
2991 emitInsn (0xdf580000);
2992 emitField(0x24, 13, insn
->tex
.r
);
2995 emitField(0x31, 1, insn
->tex
.liveOnly
);
2996 emitField(0x23, 1, insn
->tex
.derivAll
);
2997 emitField(0x1f, 4, insn
->tex
.mask
);
2998 emitField(0x1d, 2, insn
->tex
.target
.isCube() ? 3 :
2999 insn
->tex
.target
.getDim() - 1);
3000 emitField(0x1c, 1, insn
->tex
.target
.isArray());
3002 emitGPR (0x08, insn
->src(0));
3003 emitGPR (0x00, insn
->def(0));
3007 CodeEmitterGM107::emitTXQ()
3009 const TexInstruction
*insn
= this->insn
->asTex();
3012 switch (insn
->tex
.query
) {
3013 case TXQ_DIMS
: type
= 0x01; break;
3014 case TXQ_TYPE
: type
= 0x02; break;
3015 case TXQ_SAMPLE_POSITION
: type
= 0x05; break;
3016 case TXQ_FILTER
: type
= 0x10; break;
3017 case TXQ_LOD
: type
= 0x12; break;
3018 case TXQ_WRAP
: type
= 0x14; break;
3019 case TXQ_BORDER_COLOUR
: type
= 0x16; break;
3021 assert(!"invalid txq query");
3025 if (insn
->tex
.rIndirectSrc
>= 0) {
3026 emitInsn (0xdf500000);
3028 emitInsn (0xdf480000);
3029 emitField(0x24, 13, insn
->tex
.r
);
3032 emitField(0x31, 1, insn
->tex
.liveOnly
);
3033 emitField(0x1f, 4, insn
->tex
.mask
);
3034 emitField(0x16, 6, type
);
3035 emitGPR (0x08, insn
->src(0));
3036 emitGPR (0x00, insn
->def(0));
3040 CodeEmitterGM107::emitDEPBAR()
3042 emitInsn (0xf0f00000);
3043 emitField(0x1d, 1, 1); /* le */
3044 emitField(0x1a, 3, 5);
3045 emitField(0x14, 6, insn
->subOp
);
3046 emitField(0x00, 6, insn
->subOp
);
3049 /*******************************************************************************
3051 ******************************************************************************/
3054 CodeEmitterGM107::emitNOP()
3056 emitInsn(0x50b00000);
3060 CodeEmitterGM107::emitKIL()
3062 emitInsn (0xe3300000);
3063 emitCond5(0x00, CC_TR
);
3067 CodeEmitterGM107::emitOUT()
3069 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
3070 const int emit
= insn
->op
== OP_EMIT
;
3072 switch (insn
->src(1).getFile()) {
3074 emitInsn(0xfbe00000);
3075 emitGPR (0x14, insn
->src(1));
3077 case FILE_IMMEDIATE
:
3078 emitInsn(0xf6e00000);
3079 emitIMMD(0x14, 19, insn
->src(1));
3081 case FILE_MEMORY_CONST
:
3082 emitInsn(0xebe00000);
3083 emitCBUF(0x22, -1, 0x14, 16, 2, insn
->src(1));
3086 assert(!"bad src1 file");
3090 emitField(0x27, 2, (cut
<< 1) | emit
);
3091 emitGPR (0x08, insn
->src(0));
3092 emitGPR (0x00, insn
->def(0));
3096 CodeEmitterGM107::emitBAR()
3100 emitInsn (0xf0a80000);
3102 switch (insn
->subOp
) {
3103 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; break;
3104 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x0a; break;
3105 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x12; break;
3106 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x81; break;
3109 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
3113 emitField(0x20, 8, subop
);
3116 if (insn
->src(0).getFile() == FILE_GPR
) {
3117 emitGPR(0x08, insn
->src(0));
3119 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
3121 emitField(0x08, 8, imm
->reg
.data
.u32
);
3122 emitField(0x2b, 1, 1);
3126 if (insn
->src(1).getFile() == FILE_GPR
) {
3127 emitGPR(0x14, insn
->src(1));
3129 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
3131 emitField(0x14, 12, imm
->reg
.data
.u32
);
3132 emitField(0x2c, 1, 1);
3135 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
3136 emitPRED (0x27, insn
->src(2));
3137 emitField(0x2a, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
3139 emitField(0x27, 3, 7);
3144 CodeEmitterGM107::emitMEMBAR()
3146 emitInsn (0xef980000);
3147 emitField(0x08, 2, insn
->subOp
>> 2);
3151 CodeEmitterGM107::emitVOTE()
3153 const ImmediateValue
*imm
;
3157 for (int i
= 0; insn
->defExists(i
); i
++) {
3158 if (insn
->def(i
).getFile() == FILE_GPR
)
3160 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
3164 emitInsn (0x50d80000);
3165 emitField(0x30, 2, insn
->subOp
);
3167 emitGPR (0x00, insn
->def(r
));
3171 emitPRED (0x2d, insn
->def(p
));
3175 switch (insn
->src(0).getFile()) {
3176 case FILE_PREDICATE
:
3177 emitField(0x2a, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
3178 emitPRED (0x27, insn
->src(0));
3180 case FILE_IMMEDIATE
:
3181 imm
= insn
->getSrc(0)->asImm();
3183 u32
= imm
->reg
.data
.u32
;
3184 assert(u32
== 0 || u32
== 1);
3186 emitField(0x2a, 1, u32
== 0);
3189 assert(!"Unhandled src");
3195 CodeEmitterGM107::emitSUTarget()
3197 const TexInstruction
*insn
= this->insn
->asTex();
3200 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
3202 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
3204 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
3206 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
3207 insn
->tex
.target
== TEX_TARGET_RECT
) {
3209 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
3210 insn
->tex
.target
== TEX_TARGET_CUBE
||
3211 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
3213 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
3216 assert(insn
->tex
.target
== TEX_TARGET_1D
);
3218 emitField(0x20, 4, target
);
3222 CodeEmitterGM107::emitSUHandle(const int s
)
3224 const TexInstruction
*insn
= this->insn
->asTex();
3226 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
3228 if (insn
->src(s
).getFile() == FILE_GPR
) {
3229 emitGPR(0x27, insn
->src(s
));
3231 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
3233 emitField(0x33, 1, 1);
3234 emitField(0x24, 13, imm
->reg
.data
.u32
);
3239 CodeEmitterGM107::emitSUSTx()
3241 const TexInstruction
*insn
= this->insn
->asTex();
3243 emitInsn(0xeb200000);
3244 if (insn
->op
== OP_SUSTB
)
3245 emitField(0x34, 1, 1);
3249 emitField(0x14, 4, 0xf); // rgba
3250 emitGPR (0x08, insn
->src(0));
3251 emitGPR (0x00, insn
->src(1));
3257 CodeEmitterGM107::emitSULDx()
3259 const TexInstruction
*insn
= this->insn
->asTex();
3262 emitInsn(0xeb000000);
3263 if (insn
->op
== OP_SULDB
)
3264 emitField(0x34, 1, 1);
3267 switch (insn
->dType
) {
3268 case TYPE_S8
: type
= 1; break;
3269 case TYPE_U16
: type
= 2; break;
3270 case TYPE_S16
: type
= 3; break;
3271 case TYPE_U32
: type
= 4; break;
3272 case TYPE_U64
: type
= 5; break;
3273 case TYPE_B128
: type
= 6; break;
3275 assert(insn
->dType
== TYPE_U8
);
3279 emitField(0x14, 3, type
);
3280 emitGPR (0x00, insn
->def(0));
3281 emitGPR (0x08, insn
->src(0));
3287 CodeEmitterGM107::emitSUREDx()
3289 const TexInstruction
*insn
= this->insn
->asTex();
3290 uint8_t type
= 0, subOp
;
3292 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
3293 emitInsn(0xeac00000);
3295 emitInsn(0xea600000);
3297 if (insn
->op
== OP_SUREDB
)
3298 emitField(0x34, 1, 1);
3302 switch (insn
->dType
) {
3303 case TYPE_S32
: type
= 1; break;
3304 case TYPE_U64
: type
= 2; break;
3305 case TYPE_F32
: type
= 3; break;
3306 case TYPE_S64
: type
= 5; break;
3308 assert(insn
->dType
== TYPE_U32
);
3313 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
3315 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
3318 subOp
= insn
->subOp
;
3321 emitField(0x24, 3, type
);
3322 emitField(0x1d, 4, subOp
);
3323 emitGPR (0x14, insn
->src(1));
3324 emitGPR (0x08, insn
->src(0));
3325 emitGPR (0x00, insn
->def(0));
3330 /*******************************************************************************
3331 * assembler front-end
3332 ******************************************************************************/
3335 CodeEmitterGM107::emitInstruction(Instruction
*i
)
3337 const unsigned int size
= (writeIssueDelays
&& !(codeSize
& 0x1f)) ? 16 : 8;
3342 if (insn
->encSize
!= 8) {
3343 ERROR("skipping undecodable instruction: "); insn
->print();
3346 if (codeSize
+ size
> codeSizeLimit
) {
3347 ERROR("code emitter output buffer too small\n");
3351 if (writeIssueDelays
) {
3352 int n
= ((codeSize
& 0x1f) / 8) - 1;
3355 data
[0] = 0x00000000;
3356 data
[1] = 0x00000000;
3362 emitField(data
, n
* 21, 21, insn
->sched
);
3409 if (targGM107
->isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
))
3421 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
3422 insn
->src(0).getFile() == FILE_PREDICATE
)) {
3424 } else if (isFloatType(insn
->dType
)) {
3425 if (isFloatType(insn
->sType
))
3430 if (isFloatType(insn
->sType
))
3441 if (isFloatType(insn
->dType
)) {
3442 if (insn
->dType
== TYPE_F64
)
3451 if (isFloatType(insn
->dType
)) {
3452 if (insn
->dType
== TYPE_F64
)
3462 if (isFloatType(insn
->dType
)) {
3463 if (insn
->dType
== TYPE_F64
)
3479 if (isFloatType(insn
->dType
)) {
3480 if (insn
->dType
== TYPE_F64
)
3489 if (typeSizeof(insn
->sType
) == 8)
3495 if (typeSizeof(insn
->sType
) == 8)
3513 if (isFloatType(insn
->dType
))
3522 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
3523 if (isFloatType(insn
->sType
))
3524 if (insn
->sType
== TYPE_F64
)
3531 if (isFloatType(insn
->sType
))
3532 if (insn
->sType
== TYPE_F64
)
3565 switch (insn
->src(0).getFile()) {
3566 case FILE_MEMORY_CONST
: emitLDC(); break;
3567 case FILE_MEMORY_LOCAL
: emitLDL(); break;
3568 case FILE_MEMORY_SHARED
: emitLDS(); break;
3569 case FILE_MEMORY_GLOBAL
: emitLD(); break;
3571 assert(!"invalid load");
3577 switch (insn
->src(0).getFile()) {
3578 case FILE_MEMORY_LOCAL
: emitSTL(); break;
3579 case FILE_MEMORY_SHARED
: emitSTS(); break;
3580 case FILE_MEMORY_GLOBAL
: emitST(); break;
3582 assert(!"invalid store");
3588 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
3591 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
3620 if (insn
->asTex()->tex
.scalar
)
3629 if (insn
->asTex()->tex
.scalar
)
3635 if (insn
->asTex()->tex
.scalar
)
3687 assert(!"invalid opcode");
3703 CodeEmitterGM107::getMinEncodingSize(const Instruction
*i
) const
3708 /*******************************************************************************
3709 * sched data calculator
3710 ******************************************************************************/
3712 class SchedDataCalculatorGM107
: public Pass
3715 SchedDataCalculatorGM107(const TargetGM107
*targ
) : targ(targ
) {}
3727 void rebase(const int base
)
3729 const int delta
= this->base
- base
;
3734 for (int i
= 0; i
< 256; ++i
) {
3738 for (int i
= 0; i
< 8; ++i
) {
3747 memset(&rd
, 0, sizeof(rd
));
3748 memset(&wr
, 0, sizeof(wr
));
3750 int getLatest(const ScoreData
& d
) const
3753 for (int i
= 0; i
< 256; ++i
)
3756 for (int i
= 0; i
< 8; ++i
)
3763 inline int getLatestRd() const
3765 return getLatest(rd
);
3767 inline int getLatestWr() const
3769 return getLatest(wr
);
3771 inline int getLatest() const
3773 return MAX2(getLatestRd(), getLatestWr());
3775 void setMax(const RegScores
*that
)
3777 for (int i
= 0; i
< 256; ++i
) {
3778 rd
.r
[i
] = MAX2(rd
.r
[i
], that
->rd
.r
[i
]);
3779 wr
.r
[i
] = MAX2(wr
.r
[i
], that
->wr
.r
[i
]);
3781 for (int i
= 0; i
< 8; ++i
) {
3782 rd
.p
[i
] = MAX2(rd
.p
[i
], that
->rd
.p
[i
]);
3783 wr
.p
[i
] = MAX2(wr
.p
[i
], that
->wr
.p
[i
]);
3785 rd
.c
= MAX2(rd
.c
, that
->rd
.c
);
3786 wr
.c
= MAX2(wr
.c
, that
->wr
.c
);
3788 void print(int cycle
)
3790 for (int i
= 0; i
< 256; ++i
) {
3791 if (rd
.r
[i
] > cycle
)
3792 INFO("rd $r%i @ %i\n", i
, rd
.r
[i
]);
3793 if (wr
.r
[i
] > cycle
)
3794 INFO("wr $r%i @ %i\n", i
, wr
.r
[i
]);
3796 for (int i
= 0; i
< 8; ++i
) {
3797 if (rd
.p
[i
] > cycle
)
3798 INFO("rd $p%i @ %i\n", i
, rd
.p
[i
]);
3799 if (wr
.p
[i
] > cycle
)
3800 INFO("wr $p%i @ %i\n", i
, wr
.p
[i
]);
3803 INFO("rd $c @ %i\n", rd
.c
);
3805 INFO("wr $c @ %i\n", wr
.c
);
3809 RegScores
*score
; // for current BB
3810 std::vector
<RegScores
> scoreBoards
;
3812 const TargetGM107
*targ
;
3813 bool visit(Function
*);
3814 bool visit(BasicBlock
*);
3816 void commitInsn(const Instruction
*, int);
3817 int calcDelay(const Instruction
*, int) const;
3818 void setDelay(Instruction
*, int, const Instruction
*);
3819 void recordWr(const Value
*, int, int);
3820 void checkRd(const Value
*, int, int&) const;
3822 inline void emitYield(Instruction
*);
3823 inline void emitStall(Instruction
*, uint8_t);
3824 inline void emitReuse(Instruction
*, uint8_t);
3825 inline void emitWrDepBar(Instruction
*, uint8_t);
3826 inline void emitRdDepBar(Instruction
*, uint8_t);
3827 inline void emitWtDepBar(Instruction
*, uint8_t);
3829 inline int getStall(const Instruction
*) const;
3830 inline int getWrDepBar(const Instruction
*) const;
3831 inline int getRdDepBar(const Instruction
*) const;
3832 inline int getWtDepBar(const Instruction
*) const;
3834 void setReuseFlag(Instruction
*);
3836 inline void printSchedInfo(int, const Instruction
*) const;
3839 LiveBarUse(Instruction
*insn
, Instruction
*usei
)
3840 : insn(insn
), usei(usei
) { }
3846 LiveBarDef(Instruction
*insn
, Instruction
*defi
)
3847 : insn(insn
), defi(defi
) { }
3852 bool insertBarriers(BasicBlock
*);
3854 bool doesInsnWriteTo(const Instruction
*insn
, const Value
*val
) const;
3855 Instruction
*findFirstUse(const Instruction
*) const;
3856 Instruction
*findFirstDef(const Instruction
*) const;
3858 bool needRdDepBar(const Instruction
*) const;
3859 bool needWrDepBar(const Instruction
*) const;
3863 SchedDataCalculatorGM107::emitStall(Instruction
*insn
, uint8_t cnt
)
3870 SchedDataCalculatorGM107::emitYield(Instruction
*insn
)
3872 insn
->sched
|= 1 << 4;
3876 SchedDataCalculatorGM107::emitWrDepBar(Instruction
*insn
, uint8_t id
)
3879 if ((insn
->sched
& 0xe0) == 0xe0)
3880 insn
->sched
^= 0xe0;
3881 insn
->sched
|= id
<< 5;
3885 SchedDataCalculatorGM107::emitRdDepBar(Instruction
*insn
, uint8_t id
)
3888 if ((insn
->sched
& 0x700) == 0x700)
3889 insn
->sched
^= 0x700;
3890 insn
->sched
|= id
<< 8;
3894 SchedDataCalculatorGM107::emitWtDepBar(Instruction
*insn
, uint8_t id
)
3897 insn
->sched
|= 1 << (11 + id
);
3901 SchedDataCalculatorGM107::emitReuse(Instruction
*insn
, uint8_t id
)
3904 insn
->sched
|= 1 << (17 + id
);
3908 SchedDataCalculatorGM107::printSchedInfo(int cycle
,
3909 const Instruction
*insn
) const
3911 uint8_t st
, yl
, wr
, rd
, wt
, ru
;
3913 st
= (insn
->sched
& 0x00000f) >> 0;
3914 yl
= (insn
->sched
& 0x000010) >> 4;
3915 wr
= (insn
->sched
& 0x0000e0) >> 5;
3916 rd
= (insn
->sched
& 0x000700) >> 8;
3917 wt
= (insn
->sched
& 0x01f800) >> 11;
3918 ru
= (insn
->sched
& 0x1e0000) >> 17;
3920 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3921 cycle
, st
, yl
, wr
, rd
, wt
, ru
);
3925 SchedDataCalculatorGM107::getStall(const Instruction
*insn
) const
3927 return insn
->sched
& 0xf;
3931 SchedDataCalculatorGM107::getWrDepBar(const Instruction
*insn
) const
3933 return (insn
->sched
& 0x0000e0) >> 5;
3937 SchedDataCalculatorGM107::getRdDepBar(const Instruction
*insn
) const
3939 return (insn
->sched
& 0x000700) >> 8;
3943 SchedDataCalculatorGM107::getWtDepBar(const Instruction
*insn
) const
3945 return (insn
->sched
& 0x01f800) >> 11;
3948 // Emit the reuse flag which allows to make use of the new memory hierarchy
3949 // introduced since Maxwell, the operand reuse cache.
3951 // It allows to reduce bank conflicts by caching operands. Each time you issue
3952 // an instruction, that flag can tell the hw which operands are going to be
3953 // re-used by the next instruction. Note that the next instruction has to use
3954 // the same GPR id in the same operand slot.
3956 SchedDataCalculatorGM107::setReuseFlag(Instruction
*insn
)
3958 Instruction
*next
= insn
->next
;
3959 BitSet
defs(255, 1);
3961 if (!targ
->isReuseSupported(insn
))
3964 for (int d
= 0; insn
->defExists(d
); ++d
) {
3965 const Value
*def
= insn
->def(d
).rep();
3966 if (insn
->def(d
).getFile() != FILE_GPR
)
3968 if (typeSizeof(insn
->dType
) != 4 || def
->reg
.data
.id
== 255)
3970 defs
.set(def
->reg
.data
.id
);
3973 for (int s
= 0; insn
->srcExists(s
); s
++) {
3974 const Value
*src
= insn
->src(s
).rep();
3975 if (insn
->src(s
).getFile() != FILE_GPR
)
3977 if (typeSizeof(insn
->sType
) != 4 || src
->reg
.data
.id
== 255)
3979 if (defs
.test(src
->reg
.data
.id
))
3981 if (!next
->srcExists(s
) || next
->src(s
).getFile() != FILE_GPR
)
3983 if (src
->reg
.data
.id
!= next
->getSrc(s
)->reg
.data
.id
)
3991 SchedDataCalculatorGM107::recordWr(const Value
*v
, int cycle
, int ready
)
3993 int a
= v
->reg
.data
.id
, b
;
3995 switch (v
->reg
.file
) {
3997 b
= a
+ v
->reg
.size
/ 4;
3998 for (int r
= a
; r
< b
; ++r
)
3999 score
->rd
.r
[r
] = ready
;
4001 case FILE_PREDICATE
:
4002 // To immediately use a predicate set by any instructions, the minimum
4003 // number of stall counts is 13.
4004 score
->rd
.p
[a
] = cycle
+ 13;
4007 score
->rd
.c
= ready
;
4015 SchedDataCalculatorGM107::checkRd(const Value
*v
, int cycle
, int &delay
) const
4017 int a
= v
->reg
.data
.id
, b
;
4020 switch (v
->reg
.file
) {
4022 b
= a
+ v
->reg
.size
/ 4;
4023 for (int r
= a
; r
< b
; ++r
)
4024 ready
= MAX2(ready
, score
->rd
.r
[r
]);
4026 case FILE_PREDICATE
:
4027 ready
= MAX2(ready
, score
->rd
.p
[a
]);
4030 ready
= MAX2(ready
, score
->rd
.c
);
4036 delay
= MAX2(delay
, ready
- cycle
);
4040 SchedDataCalculatorGM107::commitInsn(const Instruction
*insn
, int cycle
)
4042 const int ready
= cycle
+ targ
->getLatency(insn
);
4044 for (int d
= 0; insn
->defExists(d
); ++d
)
4045 recordWr(insn
->getDef(d
), cycle
, ready
);
4047 #ifdef GM107_DEBUG_SCHED_DATA
4048 score
->print(cycle
);
4052 #define GM107_MIN_ISSUE_DELAY 0x1
4053 #define GM107_MAX_ISSUE_DELAY 0xf
4056 SchedDataCalculatorGM107::calcDelay(const Instruction
*insn
, int cycle
) const
4058 int delay
= 0, ready
= cycle
;
4060 for (int s
= 0; insn
->srcExists(s
); ++s
)
4061 checkRd(insn
->getSrc(s
), cycle
, delay
);
4063 // TODO: make use of getReadLatency()!
4065 return MAX2(delay
, ready
- cycle
);
4069 SchedDataCalculatorGM107::setDelay(Instruction
*insn
, int delay
,
4070 const Instruction
*next
)
4072 const OpClass cl
= targ
->getOpClass(insn
->op
);
4075 if (insn
->op
== OP_EXIT
||
4076 insn
->op
== OP_BAR
||
4077 insn
->op
== OP_MEMBAR
) {
4078 delay
= GM107_MAX_ISSUE_DELAY
;
4080 if (insn
->op
== OP_QUADON
||
4081 insn
->op
== OP_QUADPOP
) {
4084 if (cl
== OPCLASS_FLOW
|| insn
->join
) {
4088 if (!next
|| !targ
->canDualIssue(insn
, next
)) {
4089 delay
= CLAMP(delay
, GM107_MIN_ISSUE_DELAY
, GM107_MAX_ISSUE_DELAY
);
4091 delay
= 0x0; // dual-issue
4094 wr
= getWrDepBar(insn
);
4095 rd
= getRdDepBar(insn
);
4097 if (delay
== GM107_MIN_ISSUE_DELAY
&& (wr
& rd
) != 7) {
4098 // Barriers take one additional clock cycle to become active on top of
4099 // the clock consumed by the instruction producing it.
4100 if (!next
|| insn
->bb
!= next
->bb
) {
4103 int wt
= getWtDepBar(next
);
4104 if ((wt
& (1 << wr
)) | (wt
& (1 << rd
)))
4109 emitStall(insn
, delay
);
4113 // Return true when the given instruction needs to emit a read dependency
4114 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4115 // setting the maximum number of stall counts is not enough.
4117 SchedDataCalculatorGM107::needRdDepBar(const Instruction
*insn
) const
4119 BitSet
srcs(255, 1), defs(255, 1);
4122 if (!targ
->isBarrierRequired(insn
))
4125 // Do not emit a read dependency barrier when the instruction doesn't use
4126 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4127 for (int s
= 0; insn
->srcExists(s
); ++s
) {
4128 const Value
*src
= insn
->src(s
).rep();
4129 if (insn
->src(s
).getFile() != FILE_GPR
)
4131 if (src
->reg
.data
.id
== 255)
4134 a
= src
->reg
.data
.id
;
4135 b
= a
+ src
->reg
.size
/ 4;
4136 for (int r
= a
; r
< b
; ++r
)
4140 if (!srcs
.popCount())
4143 // Do not emit a read dependency barrier when the output GPRs are equal to
4144 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4145 // be produced and WaR hazards are prevented.
4146 for (int d
= 0; insn
->defExists(d
); ++d
) {
4147 const Value
*def
= insn
->def(d
).rep();
4148 if (insn
->def(d
).getFile() != FILE_GPR
)
4150 if (def
->reg
.data
.id
== 255)
4153 a
= def
->reg
.data
.id
;
4154 b
= a
+ def
->reg
.size
/ 4;
4155 for (int r
= a
; r
< b
; ++r
)
4160 if (!srcs
.popCount())
4166 // Return true when the given instruction needs to emit a write dependency
4167 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4168 // setting the maximum number of stall counts is not enough. This is only legal
4169 // if the instruction output something.
4171 SchedDataCalculatorGM107::needWrDepBar(const Instruction
*insn
) const
4173 if (!targ
->isBarrierRequired(insn
))
4176 for (int d
= 0; insn
->defExists(d
); ++d
) {
4177 if (insn
->def(d
).getFile() == FILE_GPR
||
4178 insn
->def(d
).getFile() == FILE_FLAGS
||
4179 insn
->def(d
).getFile() == FILE_PREDICATE
)
4185 // Helper function for findFirstUse() and findFirstDef()
4187 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction
*insn
,
4188 const Value
*val
) const
4190 if (val
->reg
.file
!= FILE_GPR
&&
4191 val
->reg
.file
!= FILE_PREDICATE
&&
4192 val
->reg
.file
!= FILE_FLAGS
)
4195 for (int d
= 0; insn
->defExists(d
); ++d
) {
4196 const Value
* def
= insn
->getDef(d
);
4197 int minGPR
= def
->reg
.data
.id
;
4198 int maxGPR
= minGPR
+ def
->reg
.size
/ 4 - 1;
4200 if (def
->reg
.file
!= val
->reg
.file
)
4203 if (def
->reg
.file
== FILE_GPR
) {
4204 if (val
->reg
.data
.id
+ val
->reg
.size
/ 4 - 1 < minGPR
||
4205 val
->reg
.data
.id
> maxGPR
)
4209 if (def
->reg
.file
== FILE_PREDICATE
) {
4210 if (val
->reg
.data
.id
!= minGPR
)
4214 if (def
->reg
.file
== FILE_FLAGS
) {
4215 if (val
->reg
.data
.id
!= minGPR
)
4224 // Find the next instruction inside the same basic block which uses (reads or
4225 // writes from) the output of the given instruction in order to avoid RaW and
4228 SchedDataCalculatorGM107::findFirstUse(const Instruction
*bari
) const
4230 Instruction
*insn
, *next
;
4232 if (!bari
->defExists(0))
4235 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
4238 for (int s
= 0; insn
->srcExists(s
); ++s
)
4239 if (doesInsnWriteTo(bari
, insn
->getSrc(s
)))
4242 for (int d
= 0; insn
->defExists(d
); ++d
)
4243 if (doesInsnWriteTo(bari
, insn
->getDef(d
)))
4249 // Find the next instruction inside the same basic block which overwrites, at
4250 // least, one source of the given instruction in order to avoid WaR hazards.
4252 SchedDataCalculatorGM107::findFirstDef(const Instruction
*bari
) const
4254 Instruction
*insn
, *next
;
4256 if (!bari
->srcExists(0))
4259 for (insn
= bari
->next
; insn
!= NULL
; insn
= next
) {
4262 for (int s
= 0; bari
->srcExists(s
); ++s
)
4263 if (doesInsnWriteTo(insn
, bari
->getSrc(s
)))
4269 // Dependency barriers:
4270 // This pass is a bit ugly and could probably be improved by performing a
4271 // better allocation.
4273 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4274 // dependency barriers using the control codes.
4276 SchedDataCalculatorGM107::insertBarriers(BasicBlock
*bb
)
4278 std::list
<LiveBarUse
> live_uses
;
4279 std::list
<LiveBarDef
> live_defs
;
4280 Instruction
*insn
, *next
;
4284 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4285 Instruction
*usei
= NULL
, *defi
= NULL
;
4286 bool need_wr_bar
, need_rd_bar
;
4290 // Expire old barrier uses.
4291 for (std::list
<LiveBarUse
>::iterator it
= live_uses
.begin();
4292 it
!= live_uses
.end();) {
4293 if (insn
->serial
>= it
->usei
->serial
) {
4294 int wr
= getWrDepBar(it
->insn
);
4295 emitWtDepBar(insn
, wr
);
4296 bars
.clr(wr
); // free barrier
4297 it
= live_uses
.erase(it
);
4303 // Expire old barrier defs.
4304 for (std::list
<LiveBarDef
>::iterator it
= live_defs
.begin();
4305 it
!= live_defs
.end();) {
4306 if (insn
->serial
>= it
->defi
->serial
) {
4307 int rd
= getRdDepBar(it
->insn
);
4308 emitWtDepBar(insn
, rd
);
4309 bars
.clr(rd
); // free barrier
4310 it
= live_defs
.erase(it
);
4316 need_wr_bar
= needWrDepBar(insn
);
4317 need_rd_bar
= needRdDepBar(insn
);
4320 // When the instruction requires to emit a write dependency barrier
4321 // (all which write something at a variable latency), find the next
4322 // instruction which reads the outputs (or writes to them, potentially
4323 // completing before this insn.
4324 usei
= findFirstUse(insn
);
4326 // Allocate and emit a new barrier.
4327 bar_id
= bars
.findFreeRange(1);
4331 emitWrDepBar(insn
, bar_id
);
4333 live_uses
.push_back(LiveBarUse(insn
, usei
));
4337 // When the instruction requires to emit a read dependency barrier
4338 // (all which read something at a variable latency), find the next
4339 // instruction which will write the inputs.
4340 defi
= findFirstDef(insn
);
4342 if (usei
&& defi
&& usei
->serial
<= defi
->serial
)
4345 // Allocate and emit a new barrier.
4346 bar_id
= bars
.findFreeRange(1);
4350 emitRdDepBar(insn
, bar_id
);
4352 live_defs
.push_back(LiveBarDef(insn
, defi
));
4356 // Remove unnecessary barrier waits.
4357 BitSet
alive_bars(6, 1);
4358 for (insn
= bb
->getEntry(); insn
!= NULL
; insn
= next
) {
4363 wr
= getWrDepBar(insn
);
4364 rd
= getRdDepBar(insn
);
4365 wt
= getWtDepBar(insn
);
4367 for (int idx
= 0; idx
< 6; ++idx
) {
4368 if (!(wt
& (1 << idx
)))
4370 if (!alive_bars
.test(idx
)) {
4371 insn
->sched
&= ~(1 << (11 + idx
));
4373 alive_bars
.clr(idx
);
4387 SchedDataCalculatorGM107::visit(Function
*func
)
4391 func
->orderInstructions(insns
);
4393 scoreBoards
.resize(func
->cfg
.getSize());
4394 for (size_t i
= 0; i
< scoreBoards
.size(); ++i
)
4395 scoreBoards
[i
].wipe();
4400 SchedDataCalculatorGM107::visit(BasicBlock
*bb
)
4402 Instruction
*insn
, *next
= NULL
;
4405 for (Instruction
*insn
= bb
->getEntry(); insn
; insn
= insn
->next
) {
4407 insn
->sched
= 0x7e0;
4410 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4413 // Insert read/write dependency barriers for instructions which don't
4414 // operate at a fixed latency.
4417 score
= &scoreBoards
.at(bb
->getId());
4419 for (Graph::EdgeIterator ei
= bb
->cfg
.incident(); !ei
.end(); ei
.next()) {
4420 // back branches will wait until all target dependencies are satisfied
4421 if (ei
.getType() == Graph::Edge::BACK
) // sched would be uninitialized
4423 BasicBlock
*in
= BasicBlock::get(ei
.getNode());
4424 score
->setMax(&scoreBoards
.at(in
->getId()));
4427 #ifdef GM107_DEBUG_SCHED_DATA
4428 INFO("=== BB:%i initial scores\n", bb
->getId());
4429 score
->print(cycle
);
4432 // Because barriers are allocated locally (intra-BB), we have to make sure
4433 // that all produced barriers have been consumed before entering inside a
4434 // new basic block. The best way is to do a global allocation pre RA but
4435 // it's really more difficult, especially because of the phi nodes. Anyways,
4436 // it seems like that waiting on a barrier which has already been consumed
4437 // doesn't add any additional cost, it's just not elegant!
4438 Instruction
*start
= bb
->getEntry();
4439 if (start
&& bb
->cfg
.incidentCount() > 0) {
4440 for (int b
= 0; b
< 6; b
++)
4441 emitWtDepBar(start
, b
);
4444 for (insn
= bb
->getEntry(); insn
&& insn
->next
; insn
= insn
->next
) {
4447 commitInsn(insn
, cycle
);
4448 int delay
= calcDelay(next
, cycle
);
4449 setDelay(insn
, delay
, next
);
4450 cycle
+= getStall(insn
);
4454 // XXX: The yield flag seems to destroy a bunch of things when it is
4455 // set on every instruction, need investigation.
4458 #ifdef GM107_DEBUG_SCHED_DATA
4459 printSchedInfo(cycle
, insn
);
4467 commitInsn(insn
, cycle
);
4471 #ifdef GM107_DEBUG_SCHED_DATA
4472 fprintf(stderr
, "last instruction is : ");
4474 fprintf(stderr
, "cycle=%d\n", cycle
);
4477 for (Graph::EdgeIterator ei
= bb
->cfg
.outgoing(); !ei
.end(); ei
.next()) {
4478 BasicBlock
*out
= BasicBlock::get(ei
.getNode());
4480 if (ei
.getType() != Graph::Edge::BACK
) {
4481 // Only test the first instruction of the outgoing block.
4482 next
= out
->getEntry();
4484 bbDelay
= MAX2(bbDelay
, calcDelay(next
, cycle
));
4486 // When the outgoing BB is empty, make sure to set the number of
4487 // stall counts needed by the instruction because we don't know the
4488 // next instruction.
4489 bbDelay
= MAX2(bbDelay
, targ
->getLatency(insn
));
4492 // Wait until all dependencies are satisfied.
4493 const int regsFree
= score
->getLatest();
4494 next
= out
->getFirst();
4495 for (int c
= cycle
; next
&& c
< regsFree
; next
= next
->next
) {
4496 bbDelay
= MAX2(bbDelay
, calcDelay(next
, c
));
4497 c
+= getStall(next
);
4502 if (bb
->cfg
.outgoingCount() != 1)
4504 setDelay(insn
, bbDelay
, next
);
4505 cycle
+= getStall(insn
);
4507 score
->rebase(cycle
); // common base for initializing out blocks' scores
4511 /*******************************************************************************
4513 ******************************************************************************/
4516 CodeEmitterGM107::prepareEmission(Function
*func
)
4518 SchedDataCalculatorGM107
sched(targGM107
);
4519 CodeEmitter::prepareEmission(func
);
4520 sched
.run(func
, true, true);
4523 static inline uint32_t sizeToBundlesGM107(uint32_t size
)
4525 return (size
+ 23) / 24;
4529 CodeEmitterGM107::prepareEmission(Program
*prog
)
4531 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
4532 !fi
.end(); fi
.next()) {
4533 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
4534 func
->binPos
= prog
->binSize
;
4535 prepareEmission(func
);
4537 // adjust sizes & positions for schedulding info:
4538 if (prog
->getTarget()->hasSWSched
) {
4539 uint32_t adjPos
= func
->binPos
;
4540 BasicBlock
*bb
= NULL
;
4541 for (int i
= 0; i
< func
->bbCount
; ++i
) {
4542 bb
= func
->bbArray
[i
];
4543 int32_t adjSize
= bb
->binSize
;
4545 adjSize
-= 32 - adjPos
% 32;
4549 adjSize
= bb
->binSize
+ sizeToBundlesGM107(adjSize
) * 8;
4550 bb
->binPos
= adjPos
;
4551 bb
->binSize
= adjSize
;
4555 func
->binSize
= adjPos
- func
->binPos
;
4558 prog
->binSize
+= func
->binSize
;
4562 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107
*target
)
4563 : CodeEmitter(target
),
4565 writeIssueDelays(target
->hasSWSched
)
4568 codeSize
= codeSizeLimit
= 0;
4573 TargetGM107::createCodeEmitterGM107(Program::Type type
)
4575 CodeEmitterGM107
*emit
= new CodeEmitterGM107(this);
4576 emit
->setProgramType(type
);
4580 } // namespace nv50_ir