2 * Copyright 2020 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 #include "codegen/nv50_ir_emit_gv100.h"
23 #include "codegen/nv50_ir_sched_gm107.h"
27 /*******************************************************************************
28 * instruction format helpers
29 ******************************************************************************/
31 #define FA_NODEF (1 << 0)
32 #define FA_RRR (1 << 1)
33 #define FA_RRI (1 << 2)
34 #define FA_RRC (1 << 3)
35 #define FA_RIR (1 << 4)
36 #define FA_RCR (1 << 5)
38 #define FA_SRC_MASK 0x0ff
39 #define FA_SRC_NEG 0x100
40 #define FA_SRC_ABS 0x200
43 #define __(a) (a) // no source modifiers
44 #define _A(a) ((a) | FA_SRC_ABS)
45 #define N_(a) ((a) | FA_SRC_NEG)
46 #define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)
49 CodeEmitterGV100::emitFormA_I32(int src
)
51 emitIMMD(32, 32, insn
->src(src
));
52 if (insn
->src(src
).mod
.abs())
53 code
[1] &= 0x7fffffff;
54 if (insn
->src(src
).mod
.neg())
55 code
[1] ^= 0x80000000;
59 CodeEmitterGV100::emitFormA_RRC(uint16_t op
, int src1
, int src2
)
63 emitNEG (75, (src1
& FA_SRC_MASK
), (src1
& FA_SRC_NEG
));
64 emitABS (74, (src1
& FA_SRC_MASK
), (src1
& FA_SRC_ABS
));
65 emitGPR (64, insn
->src(src1
& FA_SRC_MASK
));
68 emitNEG (63, (src2
& FA_SRC_MASK
), (src2
& FA_SRC_NEG
));
69 emitABS (62, (src2
& FA_SRC_MASK
), (src2
& FA_SRC_ABS
));
70 emitCBUF(54, -1, 38, 0, 2, insn
->src(src2
& FA_SRC_MASK
));
75 CodeEmitterGV100::emitFormA_RRI(uint16_t op
, int src1
, int src2
)
79 emitNEG (75, (src1
& FA_SRC_MASK
), (src1
& FA_SRC_NEG
));
80 emitABS (74, (src1
& FA_SRC_MASK
), (src1
& FA_SRC_ABS
));
81 emitGPR (64, insn
->src(src1
& FA_SRC_MASK
));
84 emitFormA_I32(src2
& FA_SRC_MASK
);
88 CodeEmitterGV100::emitFormA_RRR(uint16_t op
, int src1
, int src2
)
92 emitNEG (75, (src2
& FA_SRC_MASK
), (src2
& FA_SRC_NEG
));
93 emitABS (74, (src2
& FA_SRC_MASK
), (src2
& FA_SRC_ABS
));
94 emitGPR (64, insn
->src(src2
& FA_SRC_MASK
));
98 emitNEG (63, (src1
& FA_SRC_MASK
), (src1
& FA_SRC_NEG
));
99 emitABS (62, (src1
& FA_SRC_MASK
), (src1
& FA_SRC_ABS
));
100 emitGPR (32, insn
->src(src1
& FA_SRC_MASK
));
105 CodeEmitterGV100::emitFormA(uint16_t op
, uint8_t forms
,
106 int src0
, int src1
, int src2
)
108 switch ((src1
< 0) ? FILE_GPR
: insn
->src(src1
& FA_SRC_MASK
).getFile()) {
110 switch ((src2
< 0) ? FILE_GPR
: insn
->src(src2
& FA_SRC_MASK
).getFile()) {
112 assert(forms
& FA_RRR
);
113 emitFormA_RRR((1 << 9) | op
, src1
, src2
);
116 assert(forms
& FA_RRI
);
117 emitFormA_RRI((2 << 9) | op
, src1
, src2
);
119 case FILE_MEMORY_CONST
:
120 assert(forms
& FA_RRC
);
121 emitFormA_RRC((3 << 9) | op
, src1
, src2
);
124 assert(!"bad src2 file");
129 assert((src2
< 0) || insn
->src(src2
& FA_SRC_MASK
).getFile() == FILE_GPR
);
130 assert(forms
& FA_RIR
);
131 emitFormA_RRI((4 << 9) | op
, src2
, src1
);
133 case FILE_MEMORY_CONST
:
134 assert((src2
< 0) || insn
->src(src2
& FA_SRC_MASK
).getFile() == FILE_GPR
);
135 assert(forms
& FA_RCR
);
136 emitFormA_RRC((5 << 9) | op
, src2
, src1
);
139 assert(!"bad src1 file");
144 assert(insn
->src(src0
& FA_SRC_MASK
).getFile() == FILE_GPR
);
145 emitABS(73, (src0
& FA_SRC_MASK
), (src0
& FA_SRC_ABS
));
146 emitNEG(72, (src0
& FA_SRC_MASK
), (src0
& FA_SRC_NEG
));
147 emitGPR(24, insn
->src(src0
& FA_SRC_MASK
));
150 if (!(forms
& FA_NODEF
))
151 emitGPR(16, insn
->def(0));
154 /*******************************************************************************
156 ******************************************************************************/
159 CodeEmitterGV100::emitBRA()
161 const FlowInstruction
*insn
= this->insn
->asFlow();
162 int64_t target
= ((int64_t)insn
->target
.bb
->binPos
- (codeSize
+ 0x10)) / 4;
164 assert(!insn
->indirect
&& !insn
->absolute
);
167 emitField(34, 48, target
);
169 emitField(86, 2, 0); // ./.INC/.DEC
173 CodeEmitterGV100::emitEXIT()
178 emitField(85, 1, 0); // .NO_ATEXIT
179 emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
183 CodeEmitterGV100::emitKILL()
190 CodeEmitterGV100::emitNOP()
196 CodeEmitterGV100::emitWARPSYNC()
198 emitFormA(0x148, FA_NODEF
| FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
203 /*******************************************************************************
204 * movement / conversion
205 ******************************************************************************/
208 CodeEmitterGV100::emitCS2R()
211 emitSYS (72, insn
->src(0));
212 emitGPR (16, insn
->def(0));
216 CodeEmitterGV100::emitF2F()
218 if (typeSizeof(insn
->sType
) != 8 && typeSizeof(insn
->dType
) != 8)
219 emitFormA(0x104, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
221 emitFormA(0x110, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
222 emitField(84, 2, util_logbase2(typeSizeof(insn
->sType
)));
225 emitField(75, 2, util_logbase2(typeSizeof(insn
->dType
)));
226 emitField(60, 2, insn
->subOp
); // ./.H1/.INVALID2/.INVALID3
230 CodeEmitterGV100::emitF2I()
232 if (typeSizeof(insn
->sType
) != 8 && typeSizeof(insn
->dType
) != 8)
233 emitFormA(0x105, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
235 emitFormA(0x111, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
236 emitField(84, 2, util_logbase2(typeSizeof(insn
->sType
)));
239 emitField(77, 1, 0); // .NTZ
240 emitField(75, 2, util_logbase2(typeSizeof(insn
->dType
)));
241 emitField(72, 1, isSignedType(insn
->dType
));
245 CodeEmitterGV100::emitFRND()
252 case ROUND_NI
: subop
= 0; break;
253 case ROUND_MI
: subop
= 1; break;
254 case ROUND_PI
: subop
= 2; break;
255 case ROUND_ZI
: subop
= 3; break;
257 assert(!"invalid FRND mode");
261 case OP_FLOOR
: subop
= 1; break;
262 case OP_CEIL
: subop
= 2; break;
263 case OP_TRUNC
: subop
= 3; break;
265 assert(!"invalid FRND opcode");
269 if (typeSizeof(insn
->sType
) != 8 && typeSizeof(insn
->dType
) != 8)
270 emitFormA(0x107, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
272 emitFormA(0x113, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
273 emitField(84, 2, util_logbase2(typeSizeof(insn
->sType
)));
275 emitField(78, 2, subop
);
276 emitField(75, 2, util_logbase2(typeSizeof(insn
->dType
)));
280 CodeEmitterGV100::emitI2F()
282 if (typeSizeof(insn
->sType
) != 8 && typeSizeof(insn
->dType
) != 8)
283 emitFormA(0x106, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
285 emitFormA(0x112, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
286 emitField(84, 2, util_logbase2(typeSizeof(insn
->sType
)));
288 emitField(75, 2, util_logbase2(typeSizeof(insn
->dType
)));
289 emitField(74, 1, isSignedType(insn
->sType
));
290 if (typeSizeof(insn
->sType
) == 2)
291 emitField(60, 2, insn
->subOp
>> 1);
293 emitField(60, 2, insn
->subOp
); // ./.B1/.B2/.B3
297 CodeEmitterGV100::emitMOV()
299 switch (insn
->def(0).getFile()) {
301 switch (insn
->src(0).getFile()) {
303 case FILE_MEMORY_CONST
:
305 emitFormA(0x002, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
306 emitField(72, 4, insn
->lanes
);
310 emitGPR (16, insn
->def(0));
312 emitField(32, 32, 0xffffffff);
314 emitPRED (87, insn
->src(0));
317 assert(!"bad src file");
327 emitPRED (81, insn
->def(0));
328 emitCond3(76, CC_NE
);
329 emitGPR (24, insn
->src(0));
333 assert(!"bad dst file");
339 CodeEmitterGV100::emitPRMT()
341 emitFormA(0x016, FA_RRR
| FA_RRI
| FA_RRC
| FA_RIR
| FA_RCR
, __(0), __(1), __(2));
342 emitField(72, 3, insn
->subOp
);
346 CodeEmitterGV100::emitS2R()
349 emitSYS (72, insn
->src(0));
350 emitGPR (16, insn
->def(0));
354 gv100_selpFlip(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
356 int loc
= entry
->loc
;
357 if (data
.force_persample_interp
)
358 code
[loc
+ 2] |= 1 << 26;
360 code
[loc
+ 2] &= ~(1 << 26);
364 CodeEmitterGV100::emitSEL()
366 emitFormA(0x007, FA_RRR
| FA_RIR
| FA_RCR
, __(0), __(1), EMPTY
);
367 emitNOT (90, insn
->src(2));
368 emitPRED (87, insn
->src(2));
369 if (insn
->subOp
== 1)
370 addInterp(0, 0, gv100_selpFlip
);
374 CodeEmitterGV100::emitSHFL()
376 switch (insn
->src(1).getFile()) {
378 switch (insn
->src(2).getFile()) {
381 emitGPR (64, insn
->src(2));
385 emitIMMD(40, 13, insn
->src(2));
388 assert(!"bad src2 file");
391 emitGPR(32, insn
->src(1));
394 switch (insn
->src(2).getFile()) {
397 emitGPR (64, insn
->src(2));
401 emitIMMD(40, 13, insn
->src(2));
404 assert(!"bad src2 file");
407 emitIMMD(53, 5, insn
->src(1));
410 assert(!"bad src1 file");
414 if (insn
->defExists(1))
415 emitPRED(81, insn
->def(1));
419 emitField(58, 2, insn
->subOp
);
420 emitGPR (24, insn
->src(0));
421 emitGPR (16, insn
->def(0));
424 /*******************************************************************************
426 ******************************************************************************/
429 CodeEmitterGV100::emitFADD()
431 if (insn
->src(1).getFile() == FILE_GPR
)
432 emitFormA(0x021, FA_RRR
, NA(0), NA(1), EMPTY
);
434 emitFormA(0x021, FA_RRI
| FA_RRC
, NA(0), EMPTY
, NA(1));
441 CodeEmitterGV100::emitFFMA()
443 emitFormA(0x023, FA_RRR
| FA_RRI
| FA_RRC
| FA_RIR
| FA_RCR
, NA(0), NA(1), NA(2));
444 emitField(80, 1, insn
->ftz
);
447 emitField(76, 1, insn
->dnz
);
451 CodeEmitterGV100::emitFMNMX()
453 emitFormA(0x009, FA_RRR
| FA_RIR
| FA_RCR
, NA(0), NA(1), EMPTY
);
454 emitField(90, 1, insn
->op
== OP_MAX
);
460 CodeEmitterGV100::emitFMUL()
462 emitFormA(0x020, FA_RRR
| FA_RIR
| FA_RCR
, NA(0), NA(1), EMPTY
);
463 emitField(80, 1, insn
->ftz
);
467 emitField(76, 1, insn
->dnz
);
471 CodeEmitterGV100::emitFSET_BF()
473 const CmpInstruction
*insn
= this->insn
->asCmp();
475 emitFormA(0x00a, FA_RRR
| FA_RIR
| FA_RCR
, NA(0), NA(1), EMPTY
);
477 emitCond4(76, insn
->setCond
);
479 if (insn
->op
!= OP_SET
) {
481 case OP_SET_AND
: emitField(74, 2, 0); break;
482 case OP_SET_OR
: emitField(74, 2, 1); break;
483 case OP_SET_XOR
: emitField(74, 2, 2); break;
485 assert(!"invalid set op");
488 emitNOT (90, insn
->src(2));
489 emitPRED(87, insn
->src(2));
496 CodeEmitterGV100::emitFSETP()
498 const CmpInstruction
*insn
= this->insn
->asCmp();
500 emitFormA(0x00b, FA_NODEF
| FA_RRR
| FA_RIR
| FA_RCR
, NA(0), NA(1), EMPTY
);
502 emitCond4(76, insn
->setCond
);
504 if (insn
->op
!= OP_SET
) {
506 case OP_SET_AND
: emitField(74, 2, 0); break;
507 case OP_SET_OR
: emitField(74, 2, 1); break;
508 case OP_SET_XOR
: emitField(74, 2, 2); break;
510 assert(!"invalid set op");
513 emitNOT (90, insn
->src(2));
514 emitPRED(87, insn
->src(2));
519 if (insn
->defExists(1))
520 emitPRED(84, insn
->def(1));
523 emitPRED(81, insn
->def(0));
527 CodeEmitterGV100::emitFSWZADD()
531 // NP/PN swapped vs SM60
532 for (int i
= 0; i
< 4; i
++) {
533 uint8_t p
= ((insn
->subOp
>> (i
* 2)) & 3);
534 if (p
== 1 || p
== 2)
536 subOp
|= p
<< (i
* 2);
542 emitField(77, 1, insn
->lanes
); /* abused for .ndv */
543 emitGPR (64, insn
->src(1));
544 emitField(32, 8, subOp
);
545 emitGPR (24, insn
->src(0));
546 emitGPR (16, insn
->def(0));
550 CodeEmitterGV100::emitMUFU()
555 case OP_COS
: mufu
= 0; break;
556 case OP_SIN
: mufu
= 1; break;
557 case OP_EX2
: mufu
= 2; break;
558 case OP_LG2
: mufu
= 3; break;
559 case OP_RCP
: mufu
= 4 + 2 * insn
->subOp
; break;
560 case OP_RSQ
: mufu
= 5 + 2 * insn
->subOp
; break;
561 case OP_SQRT
: mufu
= 8; break;
563 assert(!"invalid mufu");
567 emitFormA(0x108, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, NA(0), EMPTY
);
568 emitField(74, 4, mufu
);
571 /*******************************************************************************
573 ******************************************************************************/
576 CodeEmitterGV100::emitDADD()
578 emitFormA(0x029, FA_RRR
| FA_RRI
| FA_RRC
, NA(0), EMPTY
, NA(1));
583 CodeEmitterGV100::emitDFMA()
585 emitFormA(0x02b, FA_RRR
| FA_RRI
| FA_RRC
| FA_RIR
| FA_RCR
, NA(0), NA(1), NA(2));
590 CodeEmitterGV100::emitDMUL()
592 emitFormA(0x028, FA_RRR
| FA_RIR
| FA_RCR
, NA(0), NA(1), EMPTY
);
597 CodeEmitterGV100::emitDSETP()
599 const CmpInstruction
*insn
= this->insn
->asCmp();
601 if (insn
->src(1).getFile() == FILE_GPR
)
602 emitFormA(0x02a, FA_NODEF
| FA_RRR
, NA(0), NA(1), EMPTY
);
604 emitFormA(0x02a, FA_NODEF
| FA_RRI
| FA_RRC
, NA(0), EMPTY
, NA(1));
606 if (insn
->op
!= OP_SET
) {
608 case OP_SET_AND
: emitField(74, 2, 0); break;
609 case OP_SET_OR
: emitField(74, 2, 1); break;
610 case OP_SET_XOR
: emitField(74, 2, 2); break;
612 assert(!"invalid set op");
615 emitNOT (90, insn
->src(2));
616 emitPRED(87, insn
->src(2));
621 if (insn
->defExists(1))
622 emitPRED(84, insn
->def(1));
625 emitPRED (81, insn
->def(0));
626 emitCond4(76, insn
->setCond
);
629 /*******************************************************************************
631 ******************************************************************************/
634 CodeEmitterGV100::emitBMSK()
636 emitFormA(0x01b, FA_RRR
| FA_RIR
| FA_RCR
, __(0), __(1), EMPTY
);
637 emitField(75, 1, insn
->subOp
); // .C/.W
641 CodeEmitterGV100::emitBREV()
643 emitFormA(0x101, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
647 CodeEmitterGV100::emitFLO()
649 emitFormA(0x100, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
651 emitField(74, 1, insn
->subOp
== NV50_IR_SUBOP_BFIND_SAMT
);
652 emitField(73, 1, isSignedType(insn
->dType
));
653 emitNOT (63, insn
->src(0));
657 CodeEmitterGV100::emitIABS()
659 emitFormA(0x013, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
663 CodeEmitterGV100::emitIADD3()
665 // emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));
666 emitFormA(0x010, FA_RRR
| FA_RIR
| FA_RCR
, N_(0), N_(1), EMPTY
);
667 emitGPR (64); //XXX: fix when switching back to N_(2)
668 emitPRED (84, NULL
); // .CC1
669 emitPRED (81, insn
->flagsDef
>= 0 ? insn
->getDef(insn
->flagsDef
) : NULL
);
670 if (insn
->flagsSrc
>= 0) {
671 emitField(74, 1, 1); // .X
672 emitPRED (87, insn
->getSrc(insn
->flagsSrc
));
673 emitField(77, 4, 0xf); // .X1
678 CodeEmitterGV100::emitIMAD()
680 emitFormA(0x024, FA_RRR
| FA_RRI
| FA_RRC
| FA_RIR
| FA_RCR
, __(0), __(1), N_(2));
681 emitField(73, 1, isSignedType(insn
->sType
));
685 CodeEmitterGV100::emitIMAD_WIDE()
687 emitFormA(0x025, FA_RRR
| FA_RRC
| FA_RIR
| FA_RCR
, __(0), __(1), N_(2));
689 emitField(73, 1, isSignedType(insn
->sType
));
693 CodeEmitterGV100::emitISETP()
695 const CmpInstruction
*insn
= this->insn
->asCmp();
697 emitFormA(0x00c, FA_NODEF
| FA_RRR
| FA_RIR
| FA_RCR
, __(0), __(1), EMPTY
);
699 if (insn
->op
!= OP_SET
) {
701 case OP_SET_AND
: emitField(74, 2, 0); break;
702 case OP_SET_OR
: emitField(74, 2, 1); break;
703 case OP_SET_XOR
: emitField(74, 2, 2); break;
705 assert(!"invalid set op");
708 emitNOT (90, insn
->src(2));
709 emitPRED(87, insn
->src(2));
715 if (insn
->flagsSrc
>= 0) {
724 if (insn
->defExists(1))
725 emitPRED(84, insn
->def(1));
728 emitPRED (81, insn
->def(0));
729 emitCond3(76, insn
->setCond
);
730 emitField(73, 1, isSignedType(insn
->sType
));
732 if (insn
->subOp
) { // .EX
735 emitPRED (68, insn
->srcExists(3) ? insn
->src(3) : insn
->src(2));
740 CodeEmitterGV100::emitLEA()
742 assert(insn
->src(1).get()->asImm());
744 emitFormA(0x011, FA_RRR
| FA_RIR
| FA_RCR
, N_(0), N_(2), EMPTY
);
746 emitIMMD (75, 5, insn
->src(1));
751 CodeEmitterGV100::emitLOP3_LUT()
753 emitFormA(0x012, FA_RRR
| FA_RIR
| FA_RCR
, __(0), __(1), __(2));
757 emitField(80, 1, 0); // .PAND
758 emitField(72, 8, insn
->subOp
);
762 CodeEmitterGV100::emitPOPC()
764 emitFormA(0x109, FA_RRR
| FA_RIR
| FA_RCR
, EMPTY
, __(0), EMPTY
);
765 emitNOT (63, insn
->src(0));
769 CodeEmitterGV100::emitSGXT()
771 emitFormA(0x01a, FA_RRR
| FA_RIR
| FA_RCR
, __(0), __(1), EMPTY
);
772 emitField(75, 1, 0); // .W
773 emitField(73, 1, 1); // /.U32
777 CodeEmitterGV100::emitSHF()
779 emitFormA(0x019, FA_RRR
| FA_RRI
| FA_RRC
| FA_RIR
| FA_RCR
, __(0), __(1), __(2));
780 emitField(80, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHF_HI
));
781 emitField(76, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHF_R
));
782 emitField(75, 1, !!(insn
->subOp
& NV50_IR_SUBOP_SHF_W
));
784 switch (insn
->sType
) {
785 case TYPE_S64
: emitField(73, 2, 0); break;
786 case TYPE_U64
: emitField(73, 2, 1); break;
787 case TYPE_S32
: emitField(73, 2, 2); break;
795 /*******************************************************************************
797 ******************************************************************************/
800 CodeEmitterGV100::emitALD()
803 emitField(74, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
804 emitGPR (32, insn
->src(0).getIndirect(1));
807 emitADDR (24, 40, 10, 0, insn
->src(0));
808 emitGPR (16, insn
->def(0));
812 CodeEmitterGV100::emitAST()
815 emitField(74, 2, (typeSizeof(insn
->dType
) / 4) - 1);
816 emitGPR (64, insn
->src(0).getIndirect(1));
818 emitADDR (24, 40, 10, 0, insn
->src(0));
819 emitGPR (32, insn
->src(1));
823 CodeEmitterGV100::emitATOM()
825 unsigned subOp
, dType
;
827 if (insn
->subOp
!= NV50_IR_SUBOP_ATOM_CAS
) {
830 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
834 emitField(87, 4, subOp
);
836 switch (insn
->dType
) {
837 case TYPE_U32
: dType
= 0; break;
838 case TYPE_S32
: dType
= 1; break;
839 case TYPE_U64
: dType
= 2; break;
840 case TYPE_F32
: dType
= 3; break;
841 case TYPE_B128
: dType
= 4; break;
842 case TYPE_S64
: dType
= 5; break;
844 assert(!"unexpected dType");
848 emitField(73, 3, dType
);
852 switch (insn
->dType
) {
853 case TYPE_U32
: dType
= 0; break;
854 case TYPE_U64
: dType
= 2; break;
856 assert(!"unexpected dType");
860 emitField(73, 3, dType
);
861 emitGPR (64, insn
->src(2));
866 emitField(72, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
867 emitGPR (32, insn
->src(1));
868 emitADDR (24, 40, 24, 0, insn
->src(0));
869 emitGPR (16, insn
->def(0));
873 CodeEmitterGV100::emitATOMS()
875 unsigned dType
, subOp
;
877 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
878 switch (insn
->dType
) {
879 case TYPE_U32
: dType
= 0; break;
880 case TYPE_S32
: dType
= 1; break;
881 case TYPE_U64
: dType
= 2; break;
882 default: assert(!"unexpected dType"); dType
= 0; break;
886 emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST
887 emitField(73, 2, dType
);
888 emitGPR (64, insn
->src(2));
892 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
)
896 emitField(87, 4, subOp
);
898 switch (insn
->dType
) {
899 case TYPE_U32
: dType
= 0; break;
900 case TYPE_S32
: dType
= 1; break;
901 case TYPE_U64
: dType
= 2; break;
902 default: assert(!"unexpected dType"); dType
= 0; break;
905 emitField(73, 2, dType
);
908 emitGPR (32, insn
->src(1));
909 emitADDR (24, 40, 24, 0, insn
->src(0));
910 emitGPR (16, insn
->def(0));
914 gv100_interpApply(const FixupEntry
*entry
, uint32_t *code
, const FixupData
& data
)
916 int ipa
= entry
->ipa
;
917 int loc
= entry
->loc
;
919 if (data
.force_persample_interp
&&
920 (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) == NV50_IR_INTERP_DEFAULT
&&
921 (ipa
& NV50_IR_INTERP_MODE_MASK
) != NV50_IR_INTERP_FLAT
) {
922 ipa
|= NV50_IR_INTERP_CENTROID
;
926 switch (ipa
& NV50_IR_INTERP_SAMPLE_MASK
) {
927 case NV50_IR_INTERP_DEFAULT
: sample
= 0; break;
928 case NV50_IR_INTERP_CENTROID
: sample
= 1; break;
929 case NV50_IR_INTERP_OFFSET
: sample
= 2; break;
930 default: assert(!"invalid sample mode");
934 switch (ipa
& NV50_IR_INTERP_MODE_MASK
) {
935 case NV50_IR_INTERP_LINEAR
:
936 case NV50_IR_INTERP_PERSPECTIVE
: interp
= 0; break;
937 case NV50_IR_INTERP_FLAT
: interp
= 1; break;
938 case NV50_IR_INTERP_SC
: interp
= 2; break;
939 default: assert(!"invalid ipa mode");
942 code
[loc
+ 2] &= ~(0xf << 12);
943 code
[loc
+ 2] |= sample
<< 12;
944 code
[loc
+ 2] |= interp
<< 14;
948 CodeEmitterGV100::emitIPA()
951 emitPRED (81, insn
->defExists(1) ? insn
->def(1) : NULL
);
953 switch (insn
->getInterpMode()) {
954 case NV50_IR_INTERP_LINEAR
:
955 case NV50_IR_INTERP_PERSPECTIVE
: emitField(78, 2, 0); break;
956 case NV50_IR_INTERP_FLAT
: emitField(78, 2, 1); break;
957 case NV50_IR_INTERP_SC
: emitField(78, 2, 2); break;
959 assert(!"invalid ipa mode");
963 switch (insn
->getSampleMode()) {
964 case NV50_IR_INTERP_DEFAULT
: emitField(76, 2, 0); break;
965 case NV50_IR_INTERP_CENTROID
: emitField(76, 2, 1); break;
966 case NV50_IR_INTERP_OFFSET
: emitField(76, 2, 2); break;
968 assert(!"invalid sample mode");
972 if (insn
->getSampleMode() != NV50_IR_INTERP_OFFSET
) {
974 addInterp(insn
->ipa
, 0xff, gv100_interpApply
);
976 emitGPR (32, insn
->src(1));
977 addInterp(insn
->ipa
, insn
->getSrc(1)->reg
.data
.id
, gv100_interpApply
);
980 assert(!insn
->src(0).isIndirect(0));
981 emitADDR (-1, 64, 8, 2, insn
->src(0));
982 emitGPR (16, insn
->def(0));
986 CodeEmitterGV100::emitISBERD()
989 emitGPR (24, insn
->src(0));
990 emitGPR (16, insn
->def(0));
994 CodeEmitterGV100::emitLDSTc(int posm
, int poso
)
999 switch (insn
->cache
) {
1000 case CACHE_CA
: mode
= 0; order
= 1; break;
1001 case CACHE_CG
: mode
= 2; order
= 2; break;
1002 case CACHE_CV
: mode
= 3; order
= 2; break;
1004 assert(!"invalid caching mode");
1008 emitField(poso
, 2, order
);
1009 emitField(posm
, 2, mode
);
1013 CodeEmitterGV100::emitLDSTs(int pos
, DataType type
)
1017 switch (typeSizeof(type
)) {
1018 case 1: data
= isSignedType(type
) ? 1 : 0; break;
1019 case 2: data
= isSignedType(type
) ? 3 : 2; break;
1020 case 4: data
= 4; break;
1021 case 8: data
= 5; break;
1022 case 16: data
= 6; break;
1024 assert(!"bad type");
1028 emitField(pos
, 3, data
);
1032 CodeEmitterGV100::emitLD()
1035 emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO
1036 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1037 emitLDSTs(73, insn
->dType
);
1038 emitField(72, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
1039 emitADDR (24, 32, 32, 0, insn
->src(0));
1040 emitGPR (16, insn
->def(0));
1044 CodeEmitterGV100::emitLDC()
1046 emitFormA(0x182, FA_RCR
, EMPTY
, __(0), EMPTY
);
1047 emitField(78, 2, insn
->subOp
);
1048 emitLDSTs(73, insn
->dType
);
1049 emitGPR (24, insn
->src(0).getIndirect(0));
1053 CodeEmitterGV100::emitLDL()
1056 emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
1057 emitLDSTs(73, insn
->dType
);
1058 emitADDR (24, 40, 24, 0, insn
->src(0));
1059 emitGPR (16, insn
->def(0));
1063 CodeEmitterGV100::emitLDS()
1066 emitLDSTs(73, insn
->dType
);
1067 emitADDR (24, 40, 24, 0, insn
->src(0));
1068 emitGPR (16, insn
->def(0));
1072 CodeEmitterGV100::emitOUT()
1074 const int cut
= insn
->op
== OP_RESTART
|| insn
->subOp
;
1075 const int emit
= insn
->op
== OP_EMIT
;
1077 if (insn
->op
!= OP_FINAL
)
1078 emitFormA(0x124, FA_RRR
| FA_RIR
, __(0), __(1), EMPTY
);
1080 emitFormA(0x124, FA_RRR
| FA_RIR
, __(0), EMPTY
, EMPTY
);
1081 emitField(78, 2, (cut
<< 1) | emit
);
1085 CodeEmitterGV100::emitRED()
1089 switch (insn
->dType
) {
1090 case TYPE_U32
: dType
= 0; break;
1091 case TYPE_S32
: dType
= 1; break;
1092 case TYPE_U64
: dType
= 2; break;
1093 case TYPE_F32
: dType
= 3; break;
1094 case TYPE_B128
: dType
= 4; break;
1095 case TYPE_S64
: dType
= 5; break;
1096 default: assert(!"unexpected dType"); dType
= 0; break;
1100 emitField(87, 3, insn
->subOp
);
1101 emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
1102 emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
1103 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1104 emitField(73, 3, dType
);
1105 emitField(72, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
1106 emitGPR (32, insn
->src(1));
1107 emitADDR (24, 40, 24, 0, insn
->src(0));
1111 CodeEmitterGV100::emitST()
1114 emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO
1115 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1116 emitLDSTs(73, insn
->dType
);
1117 emitField(72, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
1118 emitGPR (64, insn
->src(1));
1119 emitADDR (24, 32, 32, 0, insn
->src(0));
1123 CodeEmitterGV100::emitSTL()
1126 emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
1127 emitLDSTs(73, insn
->dType
);
1128 emitADDR (24, 40, 24, 0, insn
->src(0));
1129 emitGPR (32, insn
->src(1));
1133 CodeEmitterGV100::emitSTS()
1136 emitLDSTs(73, insn
->dType
);
1137 emitADDR (24, 40, 24, 0, insn
->src(0));
1138 emitGPR (32, insn
->src(1));
1141 /*******************************************************************************
1143 ******************************************************************************/
1146 CodeEmitterGV100::emitTEXs(int pos
)
1148 int src1
= insn
->predSrc
== 1 ? 2 : 1;
1149 if (insn
->srcExists(src1
))
1150 emitGPR(pos
, insn
->src(src1
));
1156 CodeEmitterGV100::emitTEX()
1158 const TexInstruction
*insn
= this->insn
->asTex();
1161 if (!insn
->tex
.levelZero
) {
1163 case OP_TEX
: lodm
= 0; break;
1164 case OP_TXB
: lodm
= 2; break;
1165 case OP_TXL
: lodm
= 3; break;
1167 assert(!"invalid tex op");
1174 if (insn
->tex
.rIndirectSrc
< 0) {
1176 emitField(54, 5, prog
->driver
->io
.auxCBSlot
);
1177 emitField(40, 14, insn
->tex
.r
);
1180 emitField(59, 1, 1); // .B
1182 emitField(90, 1, insn
->tex
.liveOnly
); // .NODEP
1183 emitField(87, 3, lodm
);
1184 emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
1185 emitField(78, 1, insn
->tex
.target
.isShadow()); // .DC
1186 emitField(77, 1, insn
->tex
.derivAll
); // .NDV
1187 emitField(76, 1, insn
->tex
.useOffsets
== 1); // .AOFFI
1189 emitGPR (64, insn
->def(1));
1190 emitGPR (16, insn
->def(0));
1191 emitGPR (24, insn
->src(0));
1193 emitField(63, 1, insn
->tex
.target
.isArray());
1194 emitField(61, 2, insn
->tex
.target
.isCube() ? 3 :
1195 insn
->tex
.target
.getDim() - 1);
1196 emitField(72, 4, insn
->tex
.mask
);
1200 CodeEmitterGV100::emitTLD()
1202 const TexInstruction
*insn
= this->insn
->asTex();
1204 if (insn
->tex
.rIndirectSrc
< 0) {
1206 emitField(54, 5, prog
->driver
->io
.auxCBSlot
);
1207 emitField(40, 14, insn
->tex
.r
);
1210 emitField(59, 1, 1); // .B
1212 emitField(90, 1, insn
->tex
.liveOnly
);
1213 emitField(87, 3, insn
->tex
.levelZero
? 1 /* .LZ */ : 3 /* .LL */);
1215 emitField(78, 1, insn
->tex
.target
.isMS());
1216 emitField(76, 1, insn
->tex
.useOffsets
== 1);
1217 emitField(72, 4, insn
->tex
.mask
);
1218 emitGPR (64, insn
->def(1));
1219 emitField(63, 1, insn
->tex
.target
.isArray());
1220 emitField(61, 2, insn
->tex
.target
.isCube() ? 3 :
1221 insn
->tex
.target
.getDim() - 1);
1223 emitGPR (24, insn
->src(0));
1224 emitGPR (16, insn
->def(0));
1228 CodeEmitterGV100::emitTLD4()
1230 const TexInstruction
*insn
= this->insn
->asTex();
1233 switch (insn
->tex
.useOffsets
) {
1234 case 4: offsets
= 2; break;
1235 case 1: offsets
= 1; break;
1236 case 0: offsets
= 0; break;
1237 default: assert(!"invalid offsets count"); break;
1240 if (insn
->tex
.rIndirectSrc
< 0) {
1242 emitField(54, 5, prog
->driver
->io
.auxCBSlot
);
1243 emitField(40, 14, insn
->tex
.r
);
1246 emitField(59, 1, 1); // .B
1248 emitField(90, 1, insn
->tex
.liveOnly
);
1249 emitField(87, 2, insn
->tex
.gatherComp
);
1250 emitField(84, 1, 1); // !.EF
1252 emitField(78, 1, insn
->tex
.target
.isShadow());
1253 emitField(76, 2, offsets
);
1254 emitField(72, 4, insn
->tex
.mask
);
1255 emitGPR (64, insn
->def(1));
1256 emitField(63, 1, insn
->tex
.target
.isArray());
1257 emitField(61, 2, insn
->tex
.target
.isCube() ? 3 :
1258 insn
->tex
.target
.getDim() - 1);
1260 emitGPR (24, insn
->src(0));
1261 emitGPR (16, insn
->def(0));
1265 CodeEmitterGV100::emitTMML()
1267 const TexInstruction
*insn
= this->insn
->asTex();
1269 if (insn
->tex
.rIndirectSrc
< 0) {
1271 emitField(54, 5, prog
->driver
->io
.auxCBSlot
);
1272 emitField(40, 14, insn
->tex
.r
);
1275 emitField(59, 1, 1); // .B
1277 emitField(90, 1, insn
->tex
.liveOnly
);
1278 emitField(77, 1, insn
->tex
.derivAll
);
1279 emitField(72, 4, insn
->tex
.mask
);
1280 emitGPR (64, insn
->def(1));
1281 emitField(63, 1, insn
->tex
.target
.isArray());
1282 emitField(61, 2, insn
->tex
.target
.isCube() ? 3 :
1283 insn
->tex
.target
.getDim() - 1);
1285 emitGPR (24, insn
->src(0));
1286 emitGPR (16, insn
->def(0));
1290 CodeEmitterGV100::emitTXD()
1292 const TexInstruction
*insn
= this->insn
->asTex();
1294 if (insn
->tex
.rIndirectSrc
< 0) {
1296 emitField(54, 5, prog
->driver
->io
.auxCBSlot
);
1297 emitField(40, 14, insn
->tex
.r
);
1300 emitField(59, 1, 1); // .B
1302 emitField(90, 1, insn
->tex
.liveOnly
);
1304 emitField(76, 1, insn
->tex
.useOffsets
== 1);
1305 emitField(72, 4, insn
->tex
.mask
);
1306 emitGPR (64, insn
->def(1));
1307 emitField(63, 1, insn
->tex
.target
.isArray());
1308 emitField(61, 2, insn
->tex
.target
.isCube() ? 3 :
1309 insn
->tex
.target
.getDim() - 1);
1311 emitGPR (24, insn
->src(0));
1312 emitGPR (16, insn
->def(0));
1316 CodeEmitterGV100::emitTXQ()
1318 const TexInstruction
*insn
= this->insn
->asTex();
1321 switch (insn
->tex
.query
) {
1322 case TXQ_DIMS
: type
= 0x00; break;
1323 case TXQ_TYPE
: type
= 0x01; break;
1324 case TXQ_SAMPLE_POSITION
: type
= 0x02; break;
1326 assert(!"invalid txq query");
1330 if (insn
->tex
.rIndirectSrc
< 0) {
1332 emitField(54, 5, prog
->driver
->io
.auxCBSlot
);
1333 emitField(40, 14, insn
->tex
.r
);
1336 emitField(59, 1, 1); // .B
1338 emitField(90, 1, insn
->tex
.liveOnly
);
1339 emitField(72, 4, insn
->tex
.mask
);
1340 emitGPR (64, insn
->def(1));
1341 emitField(62, 2, type
);
1342 emitGPR (24, insn
->src(0));
1343 emitGPR (16, insn
->def(0));
1346 /*******************************************************************************
1348 ******************************************************************************/
1351 CodeEmitterGV100::emitSUHandle(const int s
)
1353 const TexInstruction
*insn
= this->insn
->asTex();
1355 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
1357 if (insn
->src(s
).getFile() == FILE_GPR
) {
1358 emitGPR(64, insn
->src(s
));
1362 ImmediateValue
*imm
= insn
->getSrc(s
)->asImm();
1364 emitField(0x33, 1, 1);
1365 emitField(0x24, 13, imm
->reg
.data
.u32
);
1370 CodeEmitterGV100::emitSUTarget()
1372 const TexInstruction
*insn
= this->insn
->asTex();
1375 assert(insn
->op
>= OP_SULDB
&& insn
->op
<= OP_SUREDP
);
1377 if (insn
->tex
.target
== TEX_TARGET_BUFFER
) {
1379 } else if (insn
->tex
.target
== TEX_TARGET_1D_ARRAY
) {
1381 } else if (insn
->tex
.target
== TEX_TARGET_2D
||
1382 insn
->tex
.target
== TEX_TARGET_RECT
) {
1384 } else if (insn
->tex
.target
== TEX_TARGET_2D_ARRAY
||
1385 insn
->tex
.target
== TEX_TARGET_CUBE
||
1386 insn
->tex
.target
== TEX_TARGET_CUBE_ARRAY
) {
1388 } else if (insn
->tex
.target
== TEX_TARGET_3D
) {
1391 assert(insn
->tex
.target
== TEX_TARGET_1D
);
1393 emitField(61, 3, target
);
1397 CodeEmitterGV100::emitSUATOM()
1399 const TexInstruction
*insn
= this->insn
->asTex();
1400 uint8_t type
= 0, subOp
;
1402 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
)
1403 emitInsn(0x396); // SUATOM.D.CAS
1405 emitInsn(0x394); // SUATOM.D
1410 switch (insn
->dType
) {
1411 case TYPE_S32
: type
= 1; break;
1412 case TYPE_U64
: type
= 2; break;
1413 case TYPE_F32
: type
= 3; break;
1414 case TYPE_S64
: type
= 5; break;
1416 assert(insn
->dType
== TYPE_U32
);
1421 if (insn
->subOp
== NV50_IR_SUBOP_ATOM_CAS
) {
1423 } else if (insn
->subOp
== NV50_IR_SUBOP_ATOM_EXCH
) {
1426 subOp
= insn
->subOp
;
1429 emitField(87, 4, subOp
);
1431 emitField(79, 2, 1);
1432 emitField(73, 3, type
);
1433 emitField(72, 1, 0); // .BA
1434 emitGPR (32, insn
->src(1));
1435 emitGPR (24, insn
->src(0));
1436 emitGPR (16, insn
->def(0));
1442 CodeEmitterGV100::emitSULD()
1444 const TexInstruction
*insn
= this->insn
->asTex();
1447 if (insn
->op
== OP_SULDB
) {
1451 switch (insn
->dType
) {
1452 case TYPE_U8
: type
= 0; break;
1453 case TYPE_S8
: type
= 1; break;
1454 case TYPE_U16
: type
= 2; break;
1455 case TYPE_S16
: type
= 3; break;
1456 case TYPE_U32
: type
= 4; break;
1457 case TYPE_U64
: type
= 5; break;
1458 case TYPE_B128
: type
= 6; break;
1463 emitField(73, 3, type
);
1467 emitField(72, 4, 0xf); // rgba
1473 emitGPR (16, insn
->def(0));
1474 emitGPR (24, insn
->src(0));
1480 CodeEmitterGV100::emitSUST()
1482 const TexInstruction
*insn
= this->insn
->asTex();
1484 emitInsn(0x99c); // SUST.P
1486 if (insn
->op
== OP_SUSTB
)
1487 emitField(0x34, 1, 1);
1492 emitField(72, 4, 0xf); // rgba
1493 emitGPR(32, insn
->src(1));
1494 emitGPR(24, insn
->src(0));
1498 /*******************************************************************************
1500 ******************************************************************************/
1503 CodeEmitterGV100::emitAL2P()
1507 emitField(74, 2, (insn
->getDef(0)->reg
.size
/ 4) - 1);
1508 emitField(40, 11, insn
->src(0).get()->reg
.data
.offset
);
1509 emitGPR (24, insn
->src(0).getIndirect(0));
1510 emitGPR (16, insn
->def(0));
1514 CodeEmitterGV100::emitBAR()
1516 uint8_t subop
, redop
= 0x00;
1519 // 01: DEFER_BLOCKING
1530 switch (insn
->subOp
) {
1531 case NV50_IR_SUBOP_BAR_RED_POPC
: subop
= 0x02; redop
= 0x00; break;
1532 case NV50_IR_SUBOP_BAR_RED_AND
: subop
= 0x02; redop
= 0x01; break;
1533 case NV50_IR_SUBOP_BAR_RED_OR
: subop
= 0x02; redop
= 0x02; break;
1534 case NV50_IR_SUBOP_BAR_ARRIVE
: subop
= 0x01; break;
1537 assert(insn
->subOp
== NV50_IR_SUBOP_BAR_SYNC
);
1541 if (insn
->src(0).getFile() == FILE_GPR
) {
1542 emitInsn ((1 << 9) | 0x11d);
1543 emitGPR (32, insn
->src(0)); //XXX: nvdisasm shows src0==src1
1545 ImmediateValue
*imm
= insn
->getSrc(0)->asImm();
1547 if (insn
->src(1).getFile() == FILE_GPR
) {
1548 emitInsn ((4 << 9) | 0x11d);
1549 emitGPR (32, insn
->src(1));
1551 emitInsn ((5 << 9) | 0x11d);
1553 emitField(54, 4, imm
->reg
.data
.u32
);
1556 emitField(77, 2, subop
);
1557 emitField(74, 2, redop
);
1559 if (insn
->srcExists(2) && (insn
->predSrc
!= 2)) {
1560 emitField(90, 1, insn
->src(2).mod
== Modifier(NV50_IR_MOD_NOT
));
1561 emitPRED (87, insn
->src(2));
1563 emitField(87, 3, 7);
1568 CodeEmitterGV100::emitCCTL()
1570 if (insn
->src(0).getFile() == FILE_MEMORY_GLOBAL
)
1574 emitField(87, 4, insn
->subOp
);
1575 emitField(72, 1, insn
->src(0).getIndirect(0)->getSize() == 8);
1576 emitADDR (24, 32, 32, 0, insn
->src(0));
1580 CodeEmitterGV100::emitMEMBAR()
1583 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn
->subOp
)) {
1584 case NV50_IR_SUBOP_MEMBAR_CTA
: emitField(76, 3, 0); break;
1585 case NV50_IR_SUBOP_MEMBAR_GL
: emitField(76, 3, 2); break;
1586 case NV50_IR_SUBOP_MEMBAR_SYS
: emitField(76, 3, 3); break;
1588 assert(!"invalid scope");
1594 CodeEmitterGV100::emitPIXLD()
1597 switch (insn
->subOp
) {
1598 case NV50_IR_SUBOP_PIXLD_COVMASK
: emitField(78, 3, 1); break; // .COVMASK
1599 case NV50_IR_SUBOP_PIXLD_SAMPLEID
: emitField(78, 3, 3); break; // .MY_INDEX
1605 emitGPR (16, insn
->def(0));
1609 CodeEmitterGV100::emitPLOP3_LUT()
1614 case OP_AND
: op
[0] = 0xf0 & 0xcc; break;
1615 case OP_OR
: op
[0] = 0xf0 | 0xcc; break;
1616 case OP_XOR
: op
[0] = 0xf0 ^ 0xcc; break;
1618 assert(!"invalid PLOP3");
1623 emitNOT (90, insn
->src(0));
1624 emitPRED(87, insn
->src(0));
1625 emitPRED(84); // def(1)
1626 emitPRED(81, insn
->def(0));
1627 emitNOT (80, insn
->src(1));
1628 emitPRED(77, insn
->src(1));
1629 emitField(72, 5, op
[0] >> 3);
1630 emitNOT (71); // src(2)
1631 emitPRED(68); // src(2)
1632 emitField(64, 3, op
[0] & 7);
1633 emitField(16, 8, op
[1]);
1637 CodeEmitterGV100::emitVOTE()
1639 const ImmediateValue
*imm
;
1643 for (int i
= 0; insn
->defExists(i
); i
++) {
1644 if (insn
->def(i
).getFile() == FILE_GPR
)
1646 else if (insn
->def(i
).getFile() == FILE_PREDICATE
)
1651 emitField(72, 2, insn
->subOp
);
1653 emitGPR (16, insn
->def(r
));
1657 emitPRED (81, insn
->def(p
));
1661 switch (insn
->src(0).getFile()) {
1662 case FILE_PREDICATE
:
1663 emitField(90, 1, insn
->src(0).mod
== Modifier(NV50_IR_MOD_NOT
));
1664 emitPRED (87, insn
->src(0));
1666 case FILE_IMMEDIATE
:
1667 imm
= insn
->getSrc(0)->asImm();
1669 u32
= imm
->reg
.data
.u32
;
1670 assert(u32
== 0 || u32
== 1);
1671 emitField(90, 1, u32
== 0);
1675 assert(!"Unhandled src");
1681 CodeEmitterGV100::emitInstruction(Instruction
*i
)
1687 assert(!isFloatType(insn
->dType
));
1691 if (isFloatType(insn
->dType
)) {
1692 if (insn
->dType
== TYPE_F32
)
1706 if (insn
->def(0).getFile() == FILE_PREDICATE
) {
1709 assert(!"invalid logop");
1714 if (insn
->src(0).getFile() == FILE_MEMORY_SHARED
)
1717 if (!insn
->defExists(0) && insn
->subOp
< NV50_IR_SUBOP_ATOM_CAS
)
1745 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
1746 insn
->src(0).getFile() == FILE_PREDICATE
)) {
1748 } else if (isFloatType(insn
->dType
)) {
1749 if (isFloatType(insn
->sType
)) {
1750 if (insn
->sType
== insn
->dType
)
1758 if (isFloatType(insn
->sType
)) {
1791 if (isFloatType(insn
->dType
)) {
1792 if (insn
->dType
== TYPE_F32
)
1797 if (typeSizeof(insn
->dType
) != 8)
1803 case OP_JOINAT
: //XXX
1810 switch (insn
->src(0).getFile()) {
1811 case FILE_MEMORY_CONST
: emitLDC(); break;
1812 case FILE_MEMORY_LOCAL
: emitLDL(); break;
1813 case FILE_MEMORY_SHARED
: emitLDS(); break;
1814 case FILE_MEMORY_GLOBAL
: emitLD(); break;
1816 assert(!"invalid load");
1826 if (isFloatType(insn
->dType
)) {
1827 if (insn
->dType
== TYPE_F32
) {
1830 assert(!"invalid FMNMX");
1834 assert(!"invalid MNMX");
1845 if (isFloatType(insn
->dType
)) {
1846 if (insn
->dType
== TYPE_F32
)
1851 assert(!"invalid IMUL");
1871 if (targ
->isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
))
1883 if (insn
->def(0).getFile() != FILE_PREDICATE
) {
1884 if (isFloatType(insn
->dType
)) {
1885 if (insn
->dType
== TYPE_F32
) {
1888 assert(!"invalid FSET");
1892 assert(!"invalid SET");
1896 if (isFloatType(insn
->sType
))
1897 if (insn
->sType
== TYPE_F64
)
1918 switch (insn
->src(0).getFile()) {
1919 case FILE_MEMORY_LOCAL
: emitSTL(); break;
1920 case FILE_MEMORY_SHARED
: emitSTS(); break;
1921 case FILE_MEMORY_GLOBAL
: emitST(); break;
1923 assert(!"invalid store");
1970 assert(!"invalid opcode");
1975 code
[3] &= 0x000001ff;
1976 code
[3] |= insn
->sched
<< 9;
1983 CodeEmitterGV100::prepareEmission(BasicBlock
*bb
)
1985 Function
*func
= bb
->getFunction();
1989 for (j
= func
->bbCount
- 1; j
>= 0 && !func
->bbArray
[j
]->binSize
; --j
);
1991 for (; j
>= 0; --j
) {
1992 BasicBlock
*in
= func
->bbArray
[j
];
1993 Instruction
*exit
= in
->getExit();
1995 if (exit
&& exit
->op
== OP_BRA
&& exit
->asFlow()->target
.bb
== bb
) {
1997 func
->binSize
-= 16;
1999 for (++j
; j
< func
->bbCount
; ++j
)
2000 func
->bbArray
[j
]->binPos
-= 16;
2004 bb
->binPos
= in
->binPos
+ in
->binSize
;
2005 if (in
->binSize
) // no more no-op branches to bb
2008 func
->bbArray
[func
->bbCount
++] = bb
;
2013 for (i
= bb
->getEntry(); i
; i
= i
->next
) {
2014 i
->encSize
= getMinEncodingSize(i
);
2015 bb
->binSize
+= i
->encSize
;
2018 assert(!bb
->getEntry() || (bb
->getExit() && bb
->getExit()->encSize
== 16));
2020 func
->binSize
+= bb
->binSize
;
2024 CodeEmitterGV100::prepareEmission(Function
*func
)
2026 SchedDataCalculatorGM107
sched(targ
);
2027 CodeEmitter::prepareEmission(func
);
2028 sched
.run(func
, true, true);
2032 CodeEmitterGV100::prepareEmission(Program
*prog
)
2034 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
2035 !fi
.end(); fi
.next()) {
2036 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
2037 func
->binPos
= prog
->binSize
;
2038 prepareEmission(func
);
2039 prog
->binSize
+= func
->binSize
;
2045 CodeEmitterGV100::CodeEmitterGV100(TargetGV100
*target
)
2046 : CodeEmitter(target
), targ(target
)
2049 codeSize
= codeSizeLimit
= 0;