nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gv100.cpp
1 /*
2 * Copyright 2020 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "codegen/nv50_ir_emit_gv100.h"
23 #include "codegen/nv50_ir_sched_gm107.h"
24
25 namespace nv50_ir {
26
27 /*******************************************************************************
28 * instruction format helpers
29 ******************************************************************************/
30
31 #define FA_NODEF (1 << 0)
32 #define FA_RRR (1 << 1)
33 #define FA_RRI (1 << 2)
34 #define FA_RRC (1 << 3)
35 #define FA_RIR (1 << 4)
36 #define FA_RCR (1 << 5)
37
38 #define FA_SRC_MASK 0x0ff
39 #define FA_SRC_NEG 0x100
40 #define FA_SRC_ABS 0x200
41
42 #define EMPTY -1
43 #define __(a) (a) // no source modifiers
44 #define _A(a) ((a) | FA_SRC_ABS)
45 #define N_(a) ((a) | FA_SRC_NEG)
46 #define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)
47
48 void
49 CodeEmitterGV100::emitFormA_I32(int src)
50 {
51 emitIMMD(32, 32, insn->src(src));
52 if (insn->src(src).mod.abs())
53 code[1] &= 0x7fffffff;
54 if (insn->src(src).mod.neg())
55 code[1] ^= 0x80000000;
56 }
57
58 void
59 CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2)
60 {
61 emitInsn(op);
62 if (src1 >= 0) {
63 emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
64 emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
65 emitGPR (64, insn->src(src1 & FA_SRC_MASK));
66 }
67 if (src2 >= 0) {
68 emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
69 emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
70 emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK));
71 }
72 }
73
74 void
75 CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2)
76 {
77 emitInsn(op);
78 if (src1 >= 0) {
79 emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
80 emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
81 emitGPR (64, insn->src(src1 & FA_SRC_MASK));
82 }
83 if (src2 >= 0)
84 emitFormA_I32(src2 & FA_SRC_MASK);
85 }
86
87 void
88 CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2)
89 {
90 emitInsn(op);
91 if (src2 >= 0) {
92 emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
93 emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
94 emitGPR (64, insn->src(src2 & FA_SRC_MASK));
95 }
96
97 if (src1 >= 0) {
98 emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
99 emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
100 emitGPR (32, insn->src(src1 & FA_SRC_MASK));
101 }
102 }
103
104 void
105 CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms,
106 int src0, int src1, int src2)
107 {
108 switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) {
109 case FILE_GPR:
110 switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) {
111 case FILE_GPR:
112 assert(forms & FA_RRR);
113 emitFormA_RRR((1 << 9) | op, src1, src2);
114 break;
115 case FILE_IMMEDIATE:
116 assert(forms & FA_RRI);
117 emitFormA_RRI((2 << 9) | op, src1, src2);
118 break;
119 case FILE_MEMORY_CONST:
120 assert(forms & FA_RRC);
121 emitFormA_RRC((3 << 9) | op, src1, src2);
122 break;
123 default:
124 assert(!"bad src2 file");
125 break;
126 }
127 break;
128 case FILE_IMMEDIATE:
129 assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
130 assert(forms & FA_RIR);
131 emitFormA_RRI((4 << 9) | op, src2, src1);
132 break;
133 case FILE_MEMORY_CONST:
134 assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
135 assert(forms & FA_RCR);
136 emitFormA_RRC((5 << 9) | op, src2, src1);
137 break;
138 default:
139 assert(!"bad src1 file");
140 break;
141 }
142
143 if (src0 >= 0) {
144 assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR);
145 emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS));
146 emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG));
147 emitGPR(24, insn->src(src0 & FA_SRC_MASK));
148 }
149
150 if (!(forms & FA_NODEF))
151 emitGPR(16, insn->def(0));
152 }
153
154 /*******************************************************************************
155 * control
156 ******************************************************************************/
157
158 void
159 CodeEmitterGV100::emitBRA()
160 {
161 const FlowInstruction *insn = this->insn->asFlow();
162 int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4;
163
164 assert(!insn->indirect && !insn->absolute);
165
166 emitInsn (0x947);
167 emitField(34, 48, target);
168 emitPRED (87);
169 emitField(86, 2, 0); // ./.INC/.DEC
170 }
171
172 void
173 CodeEmitterGV100::emitEXIT()
174 {
175 emitInsn (0x94d);
176 emitNOT (90);
177 emitPRED (87);
178 emitField(85, 1, 0); // .NO_ATEXIT
179 emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
180 }
181
182 void
183 CodeEmitterGV100::emitKILL()
184 {
185 emitInsn(0x95b);
186 emitPRED(87);
187 }
188
189 void
190 CodeEmitterGV100::emitNOP()
191 {
192 emitInsn(0x918);
193 }
194
195 void
196 CodeEmitterGV100::emitWARPSYNC()
197 {
198 emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
199 emitNOT (90);
200 emitPRED (87);
201 }
202
203 /*******************************************************************************
204 * movement / conversion
205 ******************************************************************************/
206
207 void
208 CodeEmitterGV100::emitCS2R()
209 {
210 emitInsn(0x805);
211 emitSYS (72, insn->src(0));
212 emitGPR (16, insn->def(0));
213 }
214
215 void
216 CodeEmitterGV100::emitF2F()
217 {
218 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
219 emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
220 else
221 emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
222 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
223 emitFMZ (80, 1);
224 emitRND (78);
225 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
226 emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3
227 }
228
229 void
230 CodeEmitterGV100::emitF2I()
231 {
232 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
233 emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
234 else
235 emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
236 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
237 emitFMZ (80, 1);
238 emitRND (78);
239 emitField(77, 1, 0); // .NTZ
240 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
241 emitField(72, 1, isSignedType(insn->dType));
242 }
243
244 void
245 CodeEmitterGV100::emitFRND()
246 {
247 int subop = 0;
248
249 switch (insn->op) {
250 case OP_CVT:
251 switch (insn->rnd) {
252 case ROUND_NI: subop = 0; break;
253 case ROUND_MI: subop = 1; break;
254 case ROUND_PI: subop = 2; break;
255 case ROUND_ZI: subop = 3; break;
256 default:
257 assert(!"invalid FRND mode");
258 break;
259 }
260 break;
261 case OP_FLOOR: subop = 1; break;
262 case OP_CEIL : subop = 2; break;
263 case OP_TRUNC: subop = 3; break;
264 default:
265 assert(!"invalid FRND opcode");
266 break;
267 }
268
269 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
270 emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
271 else
272 emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
273 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
274 emitFMZ (80, 1);
275 emitField(78, 2, subop);
276 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
277 }
278
279 void
280 CodeEmitterGV100::emitI2F()
281 {
282 if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
283 emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
284 else
285 emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
286 emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
287 emitRND (78);
288 emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
289 emitField(74, 1, isSignedType(insn->sType));
290 if (typeSizeof(insn->sType) == 2)
291 emitField(60, 2, insn->subOp >> 1);
292 else
293 emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3
294 }
295
296 void
297 CodeEmitterGV100::emitMOV()
298 {
299 switch (insn->def(0).getFile()) {
300 case FILE_GPR:
301 switch (insn->src(0).getFile()) {
302 case FILE_GPR:
303 case FILE_MEMORY_CONST:
304 case FILE_IMMEDIATE:
305 emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
306 emitField(72, 4, insn->lanes);
307 break;
308 case FILE_PREDICATE:
309 emitInsn (0x807);
310 emitGPR (16, insn->def(0));
311 emitGPR (24);
312 emitField(32, 32, 0xffffffff);
313 emitField(90, 1, 1);
314 emitPRED (87, insn->src(0));
315 break;
316 default:
317 assert(!"bad src file");
318 break;
319 }
320 break;
321 case FILE_PREDICATE:
322 emitInsn (0x20c);
323 emitPRED (87);
324 emitPRED (84);
325 emitNOT (71);
326 emitPRED (68);
327 emitPRED (81, insn->def(0));
328 emitCond3(76, CC_NE);
329 emitGPR (24, insn->src(0));
330 emitGPR (32);
331 break;
332 default:
333 assert(!"bad dst file");
334 break;
335 }
336 }
337
338 void
339 CodeEmitterGV100::emitPRMT()
340 {
341 emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
342 emitField(72, 3, insn->subOp);
343 }
344
345 void
346 CodeEmitterGV100::emitS2R()
347 {
348 emitInsn(0x919);
349 emitSYS (72, insn->src(0));
350 emitGPR (16, insn->def(0));
351 }
352
353 void
354 gv100_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
355 {
356 int loc = entry->loc;
357 if (data.force_persample_interp)
358 code[loc + 2] |= 1 << 26;
359 else
360 code[loc + 2] &= ~(1 << 26);
361 }
362
363 void
364 CodeEmitterGV100::emitSEL()
365 {
366 emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
367 emitNOT (90, insn->src(2));
368 emitPRED (87, insn->src(2));
369 if (insn->subOp == 1)
370 addInterp(0, 0, gv100_selpFlip);
371 }
372
373 void
374 CodeEmitterGV100::emitSHFL()
375 {
376 switch (insn->src(1).getFile()) {
377 case FILE_GPR:
378 switch (insn->src(2).getFile()) {
379 case FILE_GPR:
380 emitInsn(0x389);
381 emitGPR (64, insn->src(2));
382 break;
383 case FILE_IMMEDIATE:
384 emitInsn(0x589);
385 emitIMMD(40, 13, insn->src(2));
386 break;
387 default:
388 assert(!"bad src2 file");
389 break;
390 }
391 emitGPR(32, insn->src(1));
392 break;
393 case FILE_IMMEDIATE:
394 switch (insn->src(2).getFile()) {
395 case FILE_GPR:
396 emitInsn(0x989);
397 emitGPR (64, insn->src(2));
398 break;
399 case FILE_IMMEDIATE:
400 emitInsn(0xf89);
401 emitIMMD(40, 13, insn->src(2));
402 break;
403 default:
404 assert(!"bad src2 file");
405 break;
406 }
407 emitIMMD(53, 5, insn->src(1));
408 break;
409 default:
410 assert(!"bad src1 file");
411 break;
412 }
413
414 if (insn->defExists(1))
415 emitPRED(81, insn->def(1));
416 else
417 emitPRED(81);
418
419 emitField(58, 2, insn->subOp);
420 emitGPR (24, insn->src(0));
421 emitGPR (16, insn->def(0));
422 }
423
424 /*******************************************************************************
425 * fp32
426 ******************************************************************************/
427
428 void
429 CodeEmitterGV100::emitFADD()
430 {
431 if (insn->src(1).getFile() == FILE_GPR)
432 emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY);
433 else
434 emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
435 emitFMZ (80, 1);
436 emitRND (78);
437 emitSAT (77);
438 }
439
440 void
441 CodeEmitterGV100::emitFFMA()
442 {
443 emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
444 emitField(80, 1, insn->ftz);
445 emitRND (78);
446 emitSAT (77);
447 emitField(76, 1, insn->dnz);
448 }
449
450 void
451 CodeEmitterGV100::emitFMNMX()
452 {
453 emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
454 emitField(90, 1, insn->op == OP_MAX);
455 emitPRED (87);
456 emitFMZ (80, 1);
457 }
458
459 void
460 CodeEmitterGV100::emitFMUL()
461 {
462 emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
463 emitField(80, 1, insn->ftz);
464 emitPDIV (84);
465 emitRND (78);
466 emitSAT (77);
467 emitField(76, 1, insn->dnz);
468 }
469
470 void
471 CodeEmitterGV100::emitFSET_BF()
472 {
473 const CmpInstruction *insn = this->insn->asCmp();
474
475 emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
476 emitFMZ (80, 1);
477 emitCond4(76, insn->setCond);
478
479 if (insn->op != OP_SET) {
480 switch (insn->op) {
481 case OP_SET_AND: emitField(74, 2, 0); break;
482 case OP_SET_OR : emitField(74, 2, 1); break;
483 case OP_SET_XOR: emitField(74, 2, 2); break;
484 default:
485 assert(!"invalid set op");
486 break;
487 }
488 emitNOT (90, insn->src(2));
489 emitPRED(87, insn->src(2));
490 } else {
491 emitPRED(87);
492 }
493 }
494
495 void
496 CodeEmitterGV100::emitFSETP()
497 {
498 const CmpInstruction *insn = this->insn->asCmp();
499
500 emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
501 emitFMZ (80, 1);
502 emitCond4(76, insn->setCond);
503
504 if (insn->op != OP_SET) {
505 switch (insn->op) {
506 case OP_SET_AND: emitField(74, 2, 0); break;
507 case OP_SET_OR : emitField(74, 2, 1); break;
508 case OP_SET_XOR: emitField(74, 2, 2); break;
509 default:
510 assert(!"invalid set op");
511 break;
512 }
513 emitNOT (90, insn->src(2));
514 emitPRED(87, insn->src(2));
515 } else {
516 emitPRED(87);
517 }
518
519 if (insn->defExists(1))
520 emitPRED(84, insn->def(1));
521 else
522 emitPRED(84);
523 emitPRED(81, insn->def(0));
524 }
525
526 void
527 CodeEmitterGV100::emitFSWZADD()
528 {
529 uint8_t subOp = 0;
530
531 // NP/PN swapped vs SM60
532 for (int i = 0; i < 4; i++) {
533 uint8_t p = ((insn->subOp >> (i * 2)) & 3);
534 if (p == 1 || p == 2)
535 p ^= 3;
536 subOp |= p << (i * 2);
537 }
538
539 emitInsn (0x822);
540 emitFMZ (80, 1);
541 emitRND (78);
542 emitField(77, 1, insn->lanes); /* abused for .ndv */
543 emitGPR (64, insn->src(1));
544 emitField(32, 8, subOp);
545 emitGPR (24, insn->src(0));
546 emitGPR (16, insn->def(0));
547 }
548
549 void
550 CodeEmitterGV100::emitMUFU()
551 {
552 int mufu = 0;
553
554 switch (insn->op) {
555 case OP_COS : mufu = 0; break;
556 case OP_SIN : mufu = 1; break;
557 case OP_EX2 : mufu = 2; break;
558 case OP_LG2 : mufu = 3; break;
559 case OP_RCP : mufu = 4 + 2 * insn->subOp; break;
560 case OP_RSQ : mufu = 5 + 2 * insn->subOp; break;
561 case OP_SQRT: mufu = 8; break;
562 default:
563 assert(!"invalid mufu");
564 break;
565 }
566
567 emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
568 emitField(74, 4, mufu);
569 }
570
571 /*******************************************************************************
572 * fp64
573 ******************************************************************************/
574
575 void
576 CodeEmitterGV100::emitDADD()
577 {
578 emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
579 emitRND(78);
580 }
581
582 void
583 CodeEmitterGV100::emitDFMA()
584 {
585 emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
586 emitRND(78);
587 }
588
589 void
590 CodeEmitterGV100::emitDMUL()
591 {
592 emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
593 emitRND(78);
594 }
595
596 void
597 CodeEmitterGV100::emitDSETP()
598 {
599 const CmpInstruction *insn = this->insn->asCmp();
600
601 if (insn->src(1).getFile() == FILE_GPR)
602 emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY);
603 else
604 emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
605
606 if (insn->op != OP_SET) {
607 switch (insn->op) {
608 case OP_SET_AND: emitField(74, 2, 0); break;
609 case OP_SET_OR : emitField(74, 2, 1); break;
610 case OP_SET_XOR: emitField(74, 2, 2); break;
611 default:
612 assert(!"invalid set op");
613 break;
614 }
615 emitNOT (90, insn->src(2));
616 emitPRED(87, insn->src(2));
617 } else {
618 emitPRED(87);
619 }
620
621 if (insn->defExists(1))
622 emitPRED(84, insn->def(1));
623 else
624 emitPRED(84);
625 emitPRED (81, insn->def(0));
626 emitCond4(76, insn->setCond);
627 }
628
629 /*******************************************************************************
630 * integer
631 ******************************************************************************/
632
633 void
634 CodeEmitterGV100::emitBMSK()
635 {
636 emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
637 emitField(75, 1, insn->subOp); // .C/.W
638 }
639
640 void
641 CodeEmitterGV100::emitBREV()
642 {
643 emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
644 }
645
646 void
647 CodeEmitterGV100::emitFLO()
648 {
649 emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
650 emitPRED (81);
651 emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
652 emitField(73, 1, isSignedType(insn->dType));
653 emitNOT (63, insn->src(0));
654 }
655
656 void
657 CodeEmitterGV100::emitIABS()
658 {
659 emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
660 }
661
662 void
663 CodeEmitterGV100::emitIADD3()
664 {
665 // emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));
666 emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY);
667 emitGPR (64); //XXX: fix when switching back to N_(2)
668 emitPRED (84, NULL); // .CC1
669 emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL);
670 if (insn->flagsSrc >= 0) {
671 emitField(74, 1, 1); // .X
672 emitPRED (87, insn->getSrc(insn->flagsSrc));
673 emitField(77, 4, 0xf); // .X1
674 }
675 }
676
677 void
678 CodeEmitterGV100::emitIMAD()
679 {
680 emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
681 emitField(73, 1, isSignedType(insn->sType));
682 }
683
684 void
685 CodeEmitterGV100::emitIMAD_WIDE()
686 {
687 emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
688 emitPRED (81);
689 emitField(73, 1, isSignedType(insn->sType));
690 }
691
692 void
693 CodeEmitterGV100::emitISETP()
694 {
695 const CmpInstruction *insn = this->insn->asCmp();
696
697 emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
698
699 if (insn->op != OP_SET) {
700 switch (insn->op) {
701 case OP_SET_AND: emitField(74, 2, 0); break;
702 case OP_SET_OR : emitField(74, 2, 1); break;
703 case OP_SET_XOR: emitField(74, 2, 2); break;
704 default:
705 assert(!"invalid set op");
706 break;
707 }
708 emitNOT (90, insn->src(2));
709 emitPRED(87, insn->src(2));
710 } else {
711 emitPRED(87);
712 }
713
714 //XXX: CC->pred
715 if (insn->flagsSrc >= 0) {
716 assert(0);
717 emitField(68, 4, 6);
718 } else {
719 emitNOT (71);
720 if (!insn->subOp)
721 emitPRED(68);
722 }
723
724 if (insn->defExists(1))
725 emitPRED(84, insn->def(1));
726 else
727 emitPRED(84);
728 emitPRED (81, insn->def(0));
729 emitCond3(76, insn->setCond);
730 emitField(73, 1, isSignedType(insn->sType));
731
732 if (insn->subOp) { // .EX
733 assert(0);
734 emitField(72, 1, 1);
735 emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2));
736 }
737 }
738
739 void
740 CodeEmitterGV100::emitLEA()
741 {
742 assert(insn->src(1).get()->asImm());
743
744 emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY);
745 emitPRED (81);
746 emitIMMD (75, 5, insn->src(1));
747 emitGPR (64);
748 }
749
750 void
751 CodeEmitterGV100::emitLOP3_LUT()
752 {
753 emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2));
754 emitField(90, 1, 1);
755 emitPRED (87);
756 emitPRED (81);
757 emitField(80, 1, 0); // .PAND
758 emitField(72, 8, insn->subOp);
759 }
760
761 void
762 CodeEmitterGV100::emitPOPC()
763 {
764 emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
765 emitNOT (63, insn->src(0));
766 }
767
768 void
769 CodeEmitterGV100::emitSGXT()
770 {
771 emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
772 emitField(75, 1, 0); // .W
773 emitField(73, 1, 1); // /.U32
774 }
775
776 void
777 CodeEmitterGV100::emitSHF()
778 {
779 emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
780 emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI));
781 emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R));
782 emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W));
783
784 switch (insn->sType) {
785 case TYPE_S64: emitField(73, 2, 0); break;
786 case TYPE_U64: emitField(73, 2, 1); break;
787 case TYPE_S32: emitField(73, 2, 2); break;
788 case TYPE_U32:
789 default:
790 emitField(73, 2, 3);
791 break;
792 }
793 }
794
795 /*******************************************************************************
796 * load/stores
797 ******************************************************************************/
798
799 void
800 CodeEmitterGV100::emitALD()
801 {
802 emitInsn (0x321);
803 emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
804 emitGPR (32, insn->src(0).getIndirect(1));
805 emitO (79);
806 emitP (76);
807 emitADDR (24, 40, 10, 0, insn->src(0));
808 emitGPR (16, insn->def(0));
809 }
810
811 void
812 CodeEmitterGV100::emitAST()
813 {
814 emitInsn (0x322);
815 emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1);
816 emitGPR (64, insn->src(0).getIndirect(1));
817 emitP (76);
818 emitADDR (24, 40, 10, 0, insn->src(0));
819 emitGPR (32, insn->src(1));
820 }
821
822 void
823 CodeEmitterGV100::emitATOM()
824 {
825 unsigned subOp, dType;
826
827 if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) {
828 emitInsn(0x38a);
829
830 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
831 subOp = 8;
832 else
833 subOp = insn->subOp;
834 emitField(87, 4, subOp);
835
836 switch (insn->dType) {
837 case TYPE_U32 : dType = 0; break;
838 case TYPE_S32 : dType = 1; break;
839 case TYPE_U64 : dType = 2; break;
840 case TYPE_F32 : dType = 3; break;
841 case TYPE_B128: dType = 4; break;
842 case TYPE_S64 : dType = 5; break;
843 default:
844 assert(!"unexpected dType");
845 dType = 0;
846 break;
847 }
848 emitField(73, 3, dType);
849 } else {
850 emitInsn(0x38b);
851
852 switch (insn->dType) {
853 case TYPE_U32: dType = 0; break;
854 case TYPE_U64: dType = 2; break;
855 default:
856 assert(!"unexpected dType");
857 dType = 0;
858 break;
859 }
860 emitField(73, 3, dType);
861 emitGPR (64, insn->src(2));
862 }
863
864 emitPRED (81);
865 emitField(79, 2, 1);
866 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
867 emitGPR (32, insn->src(1));
868 emitADDR (24, 40, 24, 0, insn->src(0));
869 emitGPR (16, insn->def(0));
870 }
871
872 void
873 CodeEmitterGV100::emitATOMS()
874 {
875 unsigned dType, subOp;
876
877 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
878 switch (insn->dType) {
879 case TYPE_U32: dType = 0; break;
880 case TYPE_S32: dType = 1; break;
881 case TYPE_U64: dType = 2; break;
882 default: assert(!"unexpected dType"); dType = 0; break;
883 }
884
885 emitInsn (0x38d);
886 emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST
887 emitField(73, 2, dType);
888 emitGPR (64, insn->src(2));
889 } else {
890 emitInsn(0x38c);
891
892 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
893 subOp = 8;
894 else
895 subOp = insn->subOp;
896 emitField(87, 4, subOp);
897
898 switch (insn->dType) {
899 case TYPE_U32: dType = 0; break;
900 case TYPE_S32: dType = 1; break;
901 case TYPE_U64: dType = 2; break;
902 default: assert(!"unexpected dType"); dType = 0; break;
903 }
904
905 emitField(73, 2, dType);
906 }
907
908 emitGPR (32, insn->src(1));
909 emitADDR (24, 40, 24, 0, insn->src(0));
910 emitGPR (16, insn->def(0));
911 }
912
913 void
914 gv100_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
915 {
916 int ipa = entry->ipa;
917 int loc = entry->loc;
918
919 if (data.force_persample_interp &&
920 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
921 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
922 ipa |= NV50_IR_INTERP_CENTROID;
923 }
924
925 int sample;
926 switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) {
927 case NV50_IR_INTERP_DEFAULT : sample = 0; break;
928 case NV50_IR_INTERP_CENTROID: sample = 1; break;
929 case NV50_IR_INTERP_OFFSET : sample = 2; break;
930 default: assert(!"invalid sample mode");
931 }
932
933 int interp;
934 switch (ipa & NV50_IR_INTERP_MODE_MASK) {
935 case NV50_IR_INTERP_LINEAR :
936 case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break;
937 case NV50_IR_INTERP_FLAT : interp = 1; break;
938 case NV50_IR_INTERP_SC : interp = 2; break;
939 default: assert(!"invalid ipa mode");
940 }
941
942 code[loc + 2] &= ~(0xf << 12);
943 code[loc + 2] |= sample << 12;
944 code[loc + 2] |= interp << 14;
945 }
946
947 void
948 CodeEmitterGV100::emitIPA()
949 {
950 emitInsn (0x326);
951 emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL);
952
953 switch (insn->getInterpMode()) {
954 case NV50_IR_INTERP_LINEAR :
955 case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break;
956 case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break;
957 case NV50_IR_INTERP_SC : emitField(78, 2, 2); break;
958 default:
959 assert(!"invalid ipa mode");
960 break;
961 }
962
963 switch (insn->getSampleMode()) {
964 case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break;
965 case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break;
966 case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break;
967 default:
968 assert(!"invalid sample mode");
969 break;
970 }
971
972 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) {
973 emitGPR (32);
974 addInterp(insn->ipa, 0xff, gv100_interpApply);
975 } else {
976 emitGPR (32, insn->src(1));
977 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gv100_interpApply);
978 }
979
980 assert(!insn->src(0).isIndirect(0));
981 emitADDR (-1, 64, 8, 2, insn->src(0));
982 emitGPR (16, insn->def(0));
983 }
984
985 void
986 CodeEmitterGV100::emitISBERD()
987 {
988 emitInsn(0x923);
989 emitGPR (24, insn->src(0));
990 emitGPR (16, insn->def(0));
991 }
992
993 void
994 CodeEmitterGV100::emitLDSTc(int posm, int poso)
995 {
996 int mode = 0;
997 int order = 1;
998
999 switch (insn->cache) {
1000 case CACHE_CA: mode = 0; order = 1; break;
1001 case CACHE_CG: mode = 2; order = 2; break;
1002 case CACHE_CV: mode = 3; order = 2; break;
1003 default:
1004 assert(!"invalid caching mode");
1005 break;
1006 }
1007
1008 emitField(poso, 2, order);
1009 emitField(posm, 2, mode);
1010 }
1011
1012 void
1013 CodeEmitterGV100::emitLDSTs(int pos, DataType type)
1014 {
1015 int data = 0;
1016
1017 switch (typeSizeof(type)) {
1018 case 1: data = isSignedType(type) ? 1 : 0; break;
1019 case 2: data = isSignedType(type) ? 3 : 2; break;
1020 case 4: data = 4; break;
1021 case 8: data = 5; break;
1022 case 16: data = 6; break;
1023 default:
1024 assert(!"bad type");
1025 break;
1026 }
1027
1028 emitField(pos, 3, data);
1029 }
1030
1031 void
1032 CodeEmitterGV100::emitLD()
1033 {
1034 emitInsn (0x980);
1035 emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO
1036 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1037 emitLDSTs(73, insn->dType);
1038 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1039 emitADDR (24, 32, 32, 0, insn->src(0));
1040 emitGPR (16, insn->def(0));
1041 }
1042
1043 void
1044 CodeEmitterGV100::emitLDC()
1045 {
1046 emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY);
1047 emitField(78, 2, insn->subOp);
1048 emitLDSTs(73, insn->dType);
1049 emitGPR (24, insn->src(0).getIndirect(0));
1050 }
1051
1052 void
1053 CodeEmitterGV100::emitLDL()
1054 {
1055 emitInsn (0x983);
1056 emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
1057 emitLDSTs(73, insn->dType);
1058 emitADDR (24, 40, 24, 0, insn->src(0));
1059 emitGPR (16, insn->def(0));
1060 }
1061
1062 void
1063 CodeEmitterGV100::emitLDS()
1064 {
1065 emitInsn (0x984);
1066 emitLDSTs(73, insn->dType);
1067 emitADDR (24, 40, 24, 0, insn->src(0));
1068 emitGPR (16, insn->def(0));
1069 }
1070
1071 void
1072 CodeEmitterGV100::emitOUT()
1073 {
1074 const int cut = insn->op == OP_RESTART || insn->subOp;
1075 const int emit = insn->op == OP_EMIT;
1076
1077 if (insn->op != OP_FINAL)
1078 emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY);
1079 else
1080 emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY);
1081 emitField(78, 2, (cut << 1) | emit);
1082 }
1083
1084 void
1085 CodeEmitterGV100::emitRED()
1086 {
1087 unsigned dType;
1088
1089 switch (insn->dType) {
1090 case TYPE_U32: dType = 0; break;
1091 case TYPE_S32: dType = 1; break;
1092 case TYPE_U64: dType = 2; break;
1093 case TYPE_F32: dType = 3; break;
1094 case TYPE_B128: dType = 4; break;
1095 case TYPE_S64: dType = 5; break;
1096 default: assert(!"unexpected dType"); dType = 0; break;
1097 }
1098
1099 emitInsn (0x98e);
1100 emitField(87, 3, insn->subOp);
1101 emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
1102 emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
1103 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1104 emitField(73, 3, dType);
1105 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1106 emitGPR (32, insn->src(1));
1107 emitADDR (24, 40, 24, 0, insn->src(0));
1108 }
1109
1110 void
1111 CodeEmitterGV100::emitST()
1112 {
1113 emitInsn (0x385);
1114 emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO
1115 emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
1116 emitLDSTs(73, insn->dType);
1117 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1118 emitGPR (64, insn->src(1));
1119 emitADDR (24, 32, 32, 0, insn->src(0));
1120 }
1121
1122 void
1123 CodeEmitterGV100::emitSTL()
1124 {
1125 emitInsn (0x387);
1126 emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
1127 emitLDSTs(73, insn->dType);
1128 emitADDR (24, 40, 24, 0, insn->src(0));
1129 emitGPR (32, insn->src(1));
1130 }
1131
1132 void
1133 CodeEmitterGV100::emitSTS()
1134 {
1135 emitInsn (0x388);
1136 emitLDSTs(73, insn->dType);
1137 emitADDR (24, 40, 24, 0, insn->src(0));
1138 emitGPR (32, insn->src(1));
1139 }
1140
1141 /*******************************************************************************
1142 * texture
1143 ******************************************************************************/
1144
1145 void
1146 CodeEmitterGV100::emitTEXs(int pos)
1147 {
1148 int src1 = insn->predSrc == 1 ? 2 : 1;
1149 if (insn->srcExists(src1))
1150 emitGPR(pos, insn->src(src1));
1151 else
1152 emitGPR(pos);
1153 }
1154
1155 void
1156 CodeEmitterGV100::emitTEX()
1157 {
1158 const TexInstruction *insn = this->insn->asTex();
1159 int lodm = 0;
1160
1161 if (!insn->tex.levelZero) {
1162 switch (insn->op) {
1163 case OP_TEX: lodm = 0; break;
1164 case OP_TXB: lodm = 2; break;
1165 case OP_TXL: lodm = 3; break;
1166 default:
1167 assert(!"invalid tex op");
1168 break;
1169 }
1170 } else {
1171 lodm = 1;
1172 }
1173
1174 if (insn->tex.rIndirectSrc < 0) {
1175 emitInsn (0xb60);
1176 emitField(54, 5, prog->driver->io.auxCBSlot);
1177 emitField(40, 14, insn->tex.r);
1178 } else {
1179 emitInsn (0x361);
1180 emitField(59, 1, 1); // .B
1181 }
1182 emitField(90, 1, insn->tex.liveOnly); // .NODEP
1183 emitField(87, 3, lodm);
1184 emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
1185 emitField(78, 1, insn->tex.target.isShadow()); // .DC
1186 emitField(77, 1, insn->tex.derivAll); // .NDV
1187 emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI
1188 emitPRED (81);
1189 emitGPR (64, insn->def(1));
1190 emitGPR (16, insn->def(0));
1191 emitGPR (24, insn->src(0));
1192 emitTEXs (32);
1193 emitField(63, 1, insn->tex.target.isArray());
1194 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1195 insn->tex.target.getDim() - 1);
1196 emitField(72, 4, insn->tex.mask);
1197 }
1198
1199 void
1200 CodeEmitterGV100::emitTLD()
1201 {
1202 const TexInstruction *insn = this->insn->asTex();
1203
1204 if (insn->tex.rIndirectSrc < 0) {
1205 emitInsn (0xb66);
1206 emitField(54, 5, prog->driver->io.auxCBSlot);
1207 emitField(40, 14, insn->tex.r);
1208 } else {
1209 emitInsn (0x367);
1210 emitField(59, 1, 1); // .B
1211 }
1212 emitField(90, 1, insn->tex.liveOnly);
1213 emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */);
1214 emitPRED (81);
1215 emitField(78, 1, insn->tex.target.isMS());
1216 emitField(76, 1, insn->tex.useOffsets == 1);
1217 emitField(72, 4, insn->tex.mask);
1218 emitGPR (64, insn->def(1));
1219 emitField(63, 1, insn->tex.target.isArray());
1220 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1221 insn->tex.target.getDim() - 1);
1222 emitTEXs (32);
1223 emitGPR (24, insn->src(0));
1224 emitGPR (16, insn->def(0));
1225 }
1226
1227 void
1228 CodeEmitterGV100::emitTLD4()
1229 {
1230 const TexInstruction *insn = this->insn->asTex();
1231
1232 int offsets = 0;
1233 switch (insn->tex.useOffsets) {
1234 case 4: offsets = 2; break;
1235 case 1: offsets = 1; break;
1236 case 0: offsets = 0; break;
1237 default: assert(!"invalid offsets count"); break;
1238 }
1239
1240 if (insn->tex.rIndirectSrc < 0) {
1241 emitInsn (0xb63);
1242 emitField(54, 5, prog->driver->io.auxCBSlot);
1243 emitField(40, 14, insn->tex.r);
1244 } else {
1245 emitInsn (0x364);
1246 emitField(59, 1, 1); // .B
1247 }
1248 emitField(90, 1, insn->tex.liveOnly);
1249 emitField(87, 2, insn->tex.gatherComp);
1250 emitField(84, 1, 1); // !.EF
1251 emitPRED (81);
1252 emitField(78, 1, insn->tex.target.isShadow());
1253 emitField(76, 2, offsets);
1254 emitField(72, 4, insn->tex.mask);
1255 emitGPR (64, insn->def(1));
1256 emitField(63, 1, insn->tex.target.isArray());
1257 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1258 insn->tex.target.getDim() - 1);
1259 emitTEXs (32);
1260 emitGPR (24, insn->src(0));
1261 emitGPR (16, insn->def(0));
1262 }
1263
1264 void
1265 CodeEmitterGV100::emitTMML()
1266 {
1267 const TexInstruction *insn = this->insn->asTex();
1268
1269 if (insn->tex.rIndirectSrc < 0) {
1270 emitInsn (0xb69);
1271 emitField(54, 5, prog->driver->io.auxCBSlot);
1272 emitField(40, 14, insn->tex.r);
1273 } else {
1274 emitInsn (0x36a);
1275 emitField(59, 1, 1); // .B
1276 }
1277 emitField(90, 1, insn->tex.liveOnly);
1278 emitField(77, 1, insn->tex.derivAll);
1279 emitField(72, 4, insn->tex.mask);
1280 emitGPR (64, insn->def(1));
1281 emitField(63, 1, insn->tex.target.isArray());
1282 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1283 insn->tex.target.getDim() - 1);
1284 emitTEXs (32);
1285 emitGPR (24, insn->src(0));
1286 emitGPR (16, insn->def(0));
1287 }
1288
1289 void
1290 CodeEmitterGV100::emitTXD()
1291 {
1292 const TexInstruction *insn = this->insn->asTex();
1293
1294 if (insn->tex.rIndirectSrc < 0) {
1295 emitInsn (0xb6c);
1296 emitField(54, 5, prog->driver->io.auxCBSlot);
1297 emitField(40, 14, insn->tex.r);
1298 } else {
1299 emitInsn (0x36d);
1300 emitField(59, 1, 1); // .B
1301 }
1302 emitField(90, 1, insn->tex.liveOnly);
1303 emitPRED (81);
1304 emitField(76, 1, insn->tex.useOffsets == 1);
1305 emitField(72, 4, insn->tex.mask);
1306 emitGPR (64, insn->def(1));
1307 emitField(63, 1, insn->tex.target.isArray());
1308 emitField(61, 2, insn->tex.target.isCube() ? 3 :
1309 insn->tex.target.getDim() - 1);
1310 emitTEXs (32);
1311 emitGPR (24, insn->src(0));
1312 emitGPR (16, insn->def(0));
1313 }
1314
1315 void
1316 CodeEmitterGV100::emitTXQ()
1317 {
1318 const TexInstruction *insn = this->insn->asTex();
1319 int type = 0;
1320
1321 switch (insn->tex.query) {
1322 case TXQ_DIMS : type = 0x00; break;
1323 case TXQ_TYPE : type = 0x01; break;
1324 case TXQ_SAMPLE_POSITION: type = 0x02; break;
1325 default:
1326 assert(!"invalid txq query");
1327 break;
1328 }
1329
1330 if (insn->tex.rIndirectSrc < 0) {
1331 emitInsn (0xb6f);
1332 emitField(54, 5, prog->driver->io.auxCBSlot);
1333 emitField(40, 14, insn->tex.r);
1334 } else {
1335 emitInsn (0x370);
1336 emitField(59, 1, 1); // .B
1337 }
1338 emitField(90, 1, insn->tex.liveOnly);
1339 emitField(72, 4, insn->tex.mask);
1340 emitGPR (64, insn->def(1));
1341 emitField(62, 2, type);
1342 emitGPR (24, insn->src(0));
1343 emitGPR (16, insn->def(0));
1344 }
1345
1346 /*******************************************************************************
1347 * surface
1348 ******************************************************************************/
1349
1350 void
1351 CodeEmitterGV100::emitSUHandle(const int s)
1352 {
1353 const TexInstruction *insn = this->insn->asTex();
1354
1355 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
1356
1357 if (insn->src(s).getFile() == FILE_GPR) {
1358 emitGPR(64, insn->src(s));
1359 } else {
1360 assert(0);
1361 //XXX: not done
1362 ImmediateValue *imm = insn->getSrc(s)->asImm();
1363 assert(imm);
1364 emitField(0x33, 1, 1);
1365 emitField(0x24, 13, imm->reg.data.u32);
1366 }
1367 }
1368
1369 void
1370 CodeEmitterGV100::emitSUTarget()
1371 {
1372 const TexInstruction *insn = this->insn->asTex();
1373 int target = 0;
1374
1375 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
1376
1377 if (insn->tex.target == TEX_TARGET_BUFFER) {
1378 target = 1;
1379 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
1380 target = 2;
1381 } else if (insn->tex.target == TEX_TARGET_2D ||
1382 insn->tex.target == TEX_TARGET_RECT) {
1383 target = 3;
1384 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
1385 insn->tex.target == TEX_TARGET_CUBE ||
1386 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
1387 target = 4;
1388 } else if (insn->tex.target == TEX_TARGET_3D) {
1389 target = 5;
1390 } else {
1391 assert(insn->tex.target == TEX_TARGET_1D);
1392 }
1393 emitField(61, 3, target);
1394 }
1395
1396 void
1397 CodeEmitterGV100::emitSUATOM()
1398 {
1399 const TexInstruction *insn = this->insn->asTex();
1400 uint8_t type = 0, subOp;
1401
1402 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
1403 emitInsn(0x396); // SUATOM.D.CAS
1404 else
1405 emitInsn(0x394); // SUATOM.D
1406
1407 emitSUTarget();
1408
1409 // destination type
1410 switch (insn->dType) {
1411 case TYPE_S32: type = 1; break;
1412 case TYPE_U64: type = 2; break;
1413 case TYPE_F32: type = 3; break;
1414 case TYPE_S64: type = 5; break;
1415 default:
1416 assert(insn->dType == TYPE_U32);
1417 break;
1418 }
1419
1420 // atomic operation
1421 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
1422 subOp = 0;
1423 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
1424 subOp = 8;
1425 } else {
1426 subOp = insn->subOp;
1427 }
1428
1429 emitField(87, 4, subOp);
1430 emitPRED (81);
1431 emitField(79, 2, 1);
1432 emitField(73, 3, type);
1433 emitField(72, 1, 0); // .BA
1434 emitGPR (32, insn->src(1));
1435 emitGPR (24, insn->src(0));
1436 emitGPR (16, insn->def(0));
1437
1438 emitSUHandle(2);
1439 }
1440
1441 void
1442 CodeEmitterGV100::emitSULD()
1443 {
1444 const TexInstruction *insn = this->insn->asTex();
1445 int type = 0;
1446
1447 if (insn->op == OP_SULDB) {
1448 emitInsn(0x99a);
1449 emitSUTarget();
1450
1451 switch (insn->dType) {
1452 case TYPE_U8: type = 0; break;
1453 case TYPE_S8: type = 1; break;
1454 case TYPE_U16: type = 2; break;
1455 case TYPE_S16: type = 3; break;
1456 case TYPE_U32: type = 4; break;
1457 case TYPE_U64: type = 5; break;
1458 case TYPE_B128: type = 6; break;
1459 default:
1460 assert(0);
1461 break;
1462 }
1463 emitField(73, 3, type);
1464 } else {
1465 emitInsn(0x998);
1466 emitSUTarget();
1467 emitField(72, 4, 0xf); // rgba
1468 }
1469
1470 emitPRED (81);
1471 emitLDSTc(77, 79);
1472
1473 emitGPR (16, insn->def(0));
1474 emitGPR (24, insn->src(0));
1475
1476 emitSUHandle(1);
1477 }
1478
1479 void
1480 CodeEmitterGV100::emitSUST()
1481 {
1482 const TexInstruction *insn = this->insn->asTex();
1483
1484 emitInsn(0x99c); // SUST.P
1485 #if 0
1486 if (insn->op == OP_SUSTB)
1487 emitField(0x34, 1, 1);
1488 #endif
1489 emitSUTarget();
1490
1491 emitLDSTc(77, 79);
1492 emitField(72, 4, 0xf); // rgba
1493 emitGPR(32, insn->src(1));
1494 emitGPR(24, insn->src(0));
1495 emitSUHandle(2);
1496 }
1497
1498 /*******************************************************************************
1499 * misc
1500 ******************************************************************************/
1501
1502 void
1503 CodeEmitterGV100::emitAL2P()
1504 {
1505 emitInsn (0x920);
1506 emitO (79);
1507 emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
1508 emitField(40, 11, insn->src(0).get()->reg.data.offset);
1509 emitGPR (24, insn->src(0).getIndirect(0));
1510 emitGPR (16, insn->def(0));
1511 }
1512
1513 void
1514 CodeEmitterGV100::emitBAR()
1515 {
1516 uint8_t subop, redop = 0x00;
1517
1518 // 80
1519 // 01: DEFER_BLOCKING
1520 // 78:77
1521 // 00: SYNC
1522 // 01: ARV
1523 // 02: RED
1524 // 03: SCAN
1525 // 75:74
1526 // 00: RED.POPC
1527 // 01: RED.AND
1528 // 02: RED.OR
1529
1530 switch (insn->subOp) {
1531 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break;
1532 case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break;
1533 case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break;
1534 case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break;
1535 default:
1536 subop = 0x00;
1537 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
1538 break;
1539 }
1540
1541 if (insn->src(0).getFile() == FILE_GPR) {
1542 emitInsn ((1 << 9) | 0x11d);
1543 emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src1
1544 } else {
1545 ImmediateValue *imm = insn->getSrc(0)->asImm();
1546 assert(imm);
1547 if (insn->src(1).getFile() == FILE_GPR) {
1548 emitInsn ((4 << 9) | 0x11d);
1549 emitGPR (32, insn->src(1));
1550 } else {
1551 emitInsn ((5 << 9) | 0x11d);
1552 }
1553 emitField(54, 4, imm->reg.data.u32);
1554 }
1555
1556 emitField(77, 2, subop);
1557 emitField(74, 2, redop);
1558
1559 if (insn->srcExists(2) && (insn->predSrc != 2)) {
1560 emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
1561 emitPRED (87, insn->src(2));
1562 } else {
1563 emitField(87, 3, 7);
1564 }
1565 }
1566
1567 void
1568 CodeEmitterGV100::emitCCTL()
1569 {
1570 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL)
1571 emitInsn(0x98f);
1572 else
1573 emitInsn(0x990);
1574 emitField(87, 4, insn->subOp);
1575 emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
1576 emitADDR (24, 32, 32, 0, insn->src(0));
1577 }
1578
1579 void
1580 CodeEmitterGV100::emitMEMBAR()
1581 {
1582 emitInsn (0x992);
1583 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) {
1584 case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break;
1585 case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break;
1586 case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break;
1587 default:
1588 assert(!"invalid scope");
1589 break;
1590 }
1591 }
1592
1593 void
1594 CodeEmitterGV100::emitPIXLD()
1595 {
1596 emitInsn (0x925);
1597 switch (insn->subOp) {
1598 case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK
1599 case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX
1600 default:
1601 assert(0);
1602 break;
1603 }
1604 emitPRED (71);
1605 emitGPR (16, insn->def(0));
1606 }
1607
1608 void
1609 CodeEmitterGV100::emitPLOP3_LUT()
1610 {
1611 uint8_t op[2] = {};
1612
1613 switch (insn->op) {
1614 case OP_AND: op[0] = 0xf0 & 0xcc; break;
1615 case OP_OR : op[0] = 0xf0 | 0xcc; break;
1616 case OP_XOR: op[0] = 0xf0 ^ 0xcc; break;
1617 default:
1618 assert(!"invalid PLOP3");
1619 break;
1620 }
1621
1622 emitInsn(0x81c);
1623 emitNOT (90, insn->src(0));
1624 emitPRED(87, insn->src(0));
1625 emitPRED(84); // def(1)
1626 emitPRED(81, insn->def(0));
1627 emitNOT (80, insn->src(1));
1628 emitPRED(77, insn->src(1));
1629 emitField(72, 5, op[0] >> 3);
1630 emitNOT (71); // src(2)
1631 emitPRED(68); // src(2)
1632 emitField(64, 3, op[0] & 7);
1633 emitField(16, 8, op[1]);
1634 }
1635
1636 void
1637 CodeEmitterGV100::emitVOTE()
1638 {
1639 const ImmediateValue *imm;
1640 uint32_t u32;
1641
1642 int r = -1, p = -1;
1643 for (int i = 0; insn->defExists(i); i++) {
1644 if (insn->def(i).getFile() == FILE_GPR)
1645 r = i;
1646 else if (insn->def(i).getFile() == FILE_PREDICATE)
1647 p = i;
1648 }
1649
1650 emitInsn (0x806);
1651 emitField(72, 2, insn->subOp);
1652 if (r >= 0)
1653 emitGPR (16, insn->def(r));
1654 else
1655 emitGPR (16);
1656 if (p >= 0)
1657 emitPRED (81, insn->def(p));
1658 else
1659 emitPRED (81);
1660
1661 switch (insn->src(0).getFile()) {
1662 case FILE_PREDICATE:
1663 emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
1664 emitPRED (87, insn->src(0));
1665 break;
1666 case FILE_IMMEDIATE:
1667 imm = insn->getSrc(0)->asImm();
1668 assert(imm);
1669 u32 = imm->reg.data.u32;
1670 assert(u32 == 0 || u32 == 1);
1671 emitField(90, 1, u32 == 0);
1672 emitPRED (87);
1673 break;
1674 default:
1675 assert(!"Unhandled src");
1676 break;
1677 }
1678 }
1679
1680 bool
1681 CodeEmitterGV100::emitInstruction(Instruction *i)
1682 {
1683 insn = i;
1684
1685 switch (insn->op) {
1686 case OP_ABS:
1687 assert(!isFloatType(insn->dType));
1688 emitIABS();
1689 break;
1690 case OP_ADD:
1691 if (isFloatType(insn->dType)) {
1692 if (insn->dType == TYPE_F32)
1693 emitFADD();
1694 else
1695 emitDADD();
1696 } else {
1697 emitIADD3();
1698 }
1699 break;
1700 case OP_AFETCH:
1701 emitAL2P();
1702 break;
1703 case OP_AND:
1704 case OP_OR:
1705 case OP_XOR:
1706 if (insn->def(0).getFile() == FILE_PREDICATE) {
1707 emitPLOP3_LUT();
1708 } else {
1709 assert(!"invalid logop");
1710 emitNOP();
1711 }
1712 break;
1713 case OP_ATOM:
1714 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
1715 emitATOMS();
1716 else
1717 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
1718 emitRED();
1719 else
1720 emitATOM();
1721 break;
1722 case OP_BAR:
1723 emitBAR();
1724 break;
1725 case OP_BFIND:
1726 emitFLO();
1727 break;
1728 case OP_BMSK:
1729 emitBMSK();
1730 break;
1731 case OP_BREV:
1732 emitBREV();
1733 break;
1734 case OP_BRA:
1735 case OP_JOIN: //XXX
1736 emitBRA();
1737 break;
1738 case OP_CCTL:
1739 emitCCTL();
1740 break;
1741 case OP_CEIL:
1742 case OP_CVT:
1743 case OP_FLOOR:
1744 case OP_TRUNC:
1745 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
1746 insn->src(0).getFile() == FILE_PREDICATE)) {
1747 emitMOV();
1748 } else if (isFloatType(insn->dType)) {
1749 if (isFloatType(insn->sType)) {
1750 if (insn->sType == insn->dType)
1751 emitFRND();
1752 else
1753 emitF2F();
1754 } else {
1755 emitI2F();
1756 }
1757 } else {
1758 if (isFloatType(insn->sType)) {
1759 emitF2I();
1760 } else {
1761 assert(!"I2I");
1762 emitNOP();
1763 }
1764 }
1765 break;
1766 case OP_COS:
1767 case OP_EX2:
1768 case OP_LG2:
1769 case OP_RCP:
1770 case OP_RSQ:
1771 case OP_SIN:
1772 case OP_SQRT:
1773 emitMUFU();
1774 break;
1775 case OP_DISCARD:
1776 emitKILL();
1777 break;
1778 case OP_EMIT:
1779 case OP_FINAL:
1780 case OP_RESTART:
1781 emitOUT();
1782 break;
1783 case OP_EXIT:
1784 emitEXIT();
1785 break;
1786 case OP_EXPORT:
1787 emitAST();
1788 break;
1789 case OP_FMA:
1790 case OP_MAD:
1791 if (isFloatType(insn->dType)) {
1792 if (insn->dType == TYPE_F32)
1793 emitFFMA();
1794 else
1795 emitDFMA();
1796 } else {
1797 if (typeSizeof(insn->dType) != 8)
1798 emitIMAD();
1799 else
1800 emitIMAD_WIDE();
1801 }
1802 break;
1803 case OP_JOINAT: //XXX
1804 emitNOP();
1805 break;
1806 case OP_LINTERP:
1807 emitIPA();
1808 break;
1809 case OP_LOAD:
1810 switch (insn->src(0).getFile()) {
1811 case FILE_MEMORY_CONST : emitLDC(); break;
1812 case FILE_MEMORY_LOCAL : emitLDL(); break;
1813 case FILE_MEMORY_SHARED: emitLDS(); break;
1814 case FILE_MEMORY_GLOBAL: emitLD(); break;
1815 default:
1816 assert(!"invalid load");
1817 emitNOP();
1818 break;
1819 }
1820 break;
1821 case OP_LOP3_LUT:
1822 emitLOP3_LUT();
1823 break;
1824 case OP_MAX:
1825 case OP_MIN:
1826 if (isFloatType(insn->dType)) {
1827 if (insn->dType == TYPE_F32) {
1828 emitFMNMX();
1829 } else {
1830 assert(!"invalid FMNMX");
1831 emitNOP();
1832 }
1833 } else {
1834 assert(!"invalid MNMX");
1835 emitNOP();
1836 }
1837 break;
1838 case OP_MEMBAR:
1839 emitMEMBAR();
1840 break;
1841 case OP_MOV:
1842 emitMOV();
1843 break;
1844 case OP_MUL:
1845 if (isFloatType(insn->dType)) {
1846 if (insn->dType == TYPE_F32)
1847 emitFMUL();
1848 else
1849 emitDMUL();
1850 } else {
1851 assert(!"invalid IMUL");
1852 emitNOP();
1853 }
1854 break;
1855 case OP_PERMT:
1856 emitPRMT();
1857 break;
1858 case OP_PFETCH:
1859 emitISBERD();
1860 break;
1861 case OP_PIXLD:
1862 emitPIXLD();
1863 break;
1864 case OP_POPCNT:
1865 emitPOPC();
1866 break;
1867 case OP_QUADOP:
1868 emitFSWZADD();
1869 break;
1870 case OP_RDSV:
1871 if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
1872 emitCS2R();
1873 else
1874 emitS2R();
1875 break;
1876 case OP_SELP:
1877 emitSEL();
1878 break;
1879 case OP_SET:
1880 case OP_SET_AND:
1881 case OP_SET_OR:
1882 case OP_SET_XOR:
1883 if (insn->def(0).getFile() != FILE_PREDICATE) {
1884 if (isFloatType(insn->dType)) {
1885 if (insn->dType == TYPE_F32) {
1886 emitFSET_BF();
1887 } else {
1888 assert(!"invalid FSET");
1889 emitNOP();
1890 }
1891 } else {
1892 assert(!"invalid SET");
1893 emitNOP();
1894 }
1895 } else {
1896 if (isFloatType(insn->sType))
1897 if (insn->sType == TYPE_F64)
1898 emitDSETP();
1899 else
1900 emitFSETP();
1901 else
1902 emitISETP();
1903 }
1904 break;
1905 case OP_SGXT:
1906 emitSGXT();
1907 break;
1908 case OP_SHF:
1909 emitSHF();
1910 break;
1911 case OP_SHFL:
1912 emitSHFL();
1913 break;
1914 case OP_SHLADD:
1915 emitLEA();
1916 break;
1917 case OP_STORE:
1918 switch (insn->src(0).getFile()) {
1919 case FILE_MEMORY_LOCAL : emitSTL(); break;
1920 case FILE_MEMORY_SHARED: emitSTS(); break;
1921 case FILE_MEMORY_GLOBAL: emitST(); break;
1922 default:
1923 assert(!"invalid store");
1924 emitNOP();
1925 break;
1926 }
1927 break;
1928 case OP_SULDB:
1929 case OP_SULDP:
1930 emitSULD();
1931 break;
1932 case OP_SUREDB:
1933 case OP_SUREDP:
1934 emitSUATOM();
1935 break;
1936 case OP_SUSTB:
1937 case OP_SUSTP:
1938 emitSUST();
1939 break;
1940 case OP_TEX:
1941 case OP_TXB:
1942 case OP_TXL:
1943 emitTEX();
1944 break;
1945 case OP_TXD:
1946 emitTXD();
1947 break;
1948 case OP_TXF:
1949 emitTLD();
1950 break;
1951 case OP_TXG:
1952 emitTLD4();
1953 break;
1954 case OP_TXLQ:
1955 emitTMML();
1956 break;
1957 case OP_TXQ:
1958 emitTXQ();
1959 break;
1960 case OP_VFETCH:
1961 emitALD();
1962 break;
1963 case OP_VOTE:
1964 emitVOTE();
1965 break;
1966 case OP_WARPSYNC:
1967 emitWARPSYNC();
1968 break;
1969 default:
1970 assert(!"invalid opcode");
1971 emitNOP();
1972 break;
1973 }
1974
1975 code[3] &= 0x000001ff;
1976 code[3] |= insn->sched << 9;
1977 code += 4;
1978 codeSize += 16;
1979 return true;
1980 }
1981
1982 void
1983 CodeEmitterGV100::prepareEmission(BasicBlock *bb)
1984 {
1985 Function *func = bb->getFunction();
1986 Instruction *i;
1987 int j;
1988
1989 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
1990
1991 for (; j >= 0; --j) {
1992 BasicBlock *in = func->bbArray[j];
1993 Instruction *exit = in->getExit();
1994
1995 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
1996 in->binSize -= 16;
1997 func->binSize -= 16;
1998
1999 for (++j; j < func->bbCount; ++j)
2000 func->bbArray[j]->binPos -= 16;
2001
2002 in->remove(exit);
2003 }
2004 bb->binPos = in->binPos + in->binSize;
2005 if (in->binSize) // no more no-op branches to bb
2006 break;
2007 }
2008 func->bbArray[func->bbCount++] = bb;
2009
2010 if (!bb->getExit())
2011 return;
2012
2013 for (i = bb->getEntry(); i; i = i->next) {
2014 i->encSize = getMinEncodingSize(i);
2015 bb->binSize += i->encSize;
2016 }
2017
2018 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16));
2019
2020 func->binSize += bb->binSize;
2021 }
2022
2023 void
2024 CodeEmitterGV100::prepareEmission(Function *func)
2025 {
2026 SchedDataCalculatorGM107 sched(targ);
2027 CodeEmitter::prepareEmission(func);
2028 sched.run(func, true, true);
2029 }
2030
2031 void
2032 CodeEmitterGV100::prepareEmission(Program *prog)
2033 {
2034 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
2035 !fi.end(); fi.next()) {
2036 Function *func = reinterpret_cast<Function *>(fi.get());
2037 func->binPos = prog->binSize;
2038 prepareEmission(func);
2039 prog->binSize += func->binSize;
2040 }
2041
2042 this->prog = prog;
2043 }
2044
2045 CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target)
2046 : CodeEmitter(target), targ(target)
2047 {
2048 code = NULL;
2049 codeSize = codeSizeLimit = 0;
2050 relocInfo = NULL;
2051 }
2052 };