nvc0/ir: add support for emitting partial min/max ops for int64
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setAddress24(const ValueRef&);
56 void setAddressByFile(const ValueRef&);
57 void setImmediate(const Instruction *, const int s); // needs op already set
58 void setImmediateS8(const ValueRef&);
59 void setSUConst16(const Instruction *, const int s);
60 void setSUPred(const Instruction *, const int s);
61
62 void emitCondCode(CondCode cc, int pos);
63 void emitInterpMode(const Instruction *);
64 void emitLoadStoreType(DataType ty);
65 void emitSUGType(DataType);
66 void emitSUAddr(const TexInstruction *);
67 void emitSUDim(const TexInstruction *);
68 void emitCachingMode(CacheMode c);
69
70 void emitShortSrc2(const ValueRef&);
71
72 inline uint8_t getSRegEncoding(const ValueRef&);
73
74 void roundMode_A(const Instruction *);
75 void roundMode_C(const Instruction *);
76 void roundMode_CS(const Instruction *);
77
78 void emitNegAbs12(const Instruction *);
79
80 void emitNOP(const Instruction *);
81
82 void emitLOAD(const Instruction *);
83 void emitSTORE(const Instruction *);
84 void emitMOV(const Instruction *);
85 void emitATOM(const Instruction *);
86 void emitMEMBAR(const Instruction *);
87 void emitCCTL(const Instruction *);
88
89 void emitINTERP(const Instruction *);
90 void emitAFETCH(const Instruction *);
91 void emitPFETCH(const Instruction *);
92 void emitVFETCH(const Instruction *);
93 void emitEXPORT(const Instruction *);
94 void emitOUT(const Instruction *);
95
96 void emitUADD(const Instruction *);
97 void emitFADD(const Instruction *);
98 void emitDADD(const Instruction *);
99 void emitUMUL(const Instruction *);
100 void emitFMUL(const Instruction *);
101 void emitDMUL(const Instruction *);
102 void emitIMAD(const Instruction *);
103 void emitISAD(const Instruction *);
104 void emitSHLADD(const Instruction *a);
105 void emitFMAD(const Instruction *);
106 void emitDMAD(const Instruction *);
107 void emitMADSP(const Instruction *);
108
109 void emitNOT(Instruction *);
110 void emitLogicOp(const Instruction *, uint8_t subOp);
111 void emitPOPC(const Instruction *);
112 void emitINSBF(const Instruction *);
113 void emitEXTBF(const Instruction *);
114 void emitBFIND(const Instruction *);
115 void emitPERMT(const Instruction *);
116 void emitShift(const Instruction *);
117
118 void emitSFnOp(const Instruction *, uint8_t subOp);
119
120 void emitCVT(Instruction *);
121 void emitMINMAX(const Instruction *);
122 void emitPreOp(const Instruction *);
123
124 void emitSET(const CmpInstruction *);
125 void emitSLCT(const CmpInstruction *);
126 void emitSELP(const Instruction *);
127
128 void emitTEXBAR(const Instruction *);
129 void emitTEX(const TexInstruction *);
130 void emitTEXCSAA(const TexInstruction *);
131 void emitTXQ(const TexInstruction *);
132
133 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
134
135 void emitFlow(const Instruction *);
136 void emitBAR(const Instruction *);
137
138 void emitSUCLAMPMode(uint16_t);
139 void emitSUCalc(Instruction *);
140 void emitSULDGB(const TexInstruction *);
141 void emitSUSTGx(const TexInstruction *);
142
143 void emitSULDB(const TexInstruction *);
144 void emitSUSTx(const TexInstruction *);
145 void emitSULEA(const TexInstruction *);
146
147 void emitVSHL(const Instruction *);
148 void emitVectorSubOp(const Instruction *);
149
150 void emitPIXLD(const Instruction *);
151
152 void emitVOTE(const Instruction *);
153
154 inline void defId(const ValueDef&, const int pos);
155 inline void defId(const Instruction *, int d, const int pos);
156 inline void srcId(const ValueRef&, const int pos);
157 inline void srcId(const ValueRef *, const int pos);
158 inline void srcId(const Instruction *, int s, const int pos);
159 inline void srcAddr32(const ValueRef&, int pos, int shr);
160
161 inline bool isLIMM(const ValueRef&, DataType ty);
162 };
163
164 // for better visibility
165 #define HEX64(h, l) 0x##h##l##ULL
166
167 #define SDATA(a) ((a).rep()->reg.data)
168 #define DDATA(a) ((a).rep()->reg.data)
169
170 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
171 {
172 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
173 }
174
175 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
176 {
177 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
178 }
179
180 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
181 {
182 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
183 code[pos / 32] |= r << (pos % 32);
184 }
185
186 void
187 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
188 {
189 const uint32_t offset = SDATA(src).offset >> shr;
190
191 code[pos / 32] |= offset << (pos % 32);
192 if (pos && (pos < 32))
193 code[1] |= offset >> (32 - pos);
194 }
195
196 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
197 {
198 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
199 }
200
201 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
202 {
203 int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
204 code[pos / 32] |= r << (pos % 32);
205 }
206
207 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
208 {
209 const ImmediateValue *imm = ref.get()->asImm();
210
211 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
212 }
213
214 void
215 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
216 {
217 switch (insn->rnd) {
218 case ROUND_M: code[1] |= 1 << 23; break;
219 case ROUND_P: code[1] |= 2 << 23; break;
220 case ROUND_Z: code[1] |= 3 << 23; break;
221 default:
222 assert(insn->rnd == ROUND_N);
223 break;
224 }
225 }
226
227 void
228 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
229 {
230 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
231 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
232 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
233 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
234 }
235
236 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
237 {
238 uint8_t val;
239
240 switch (cc) {
241 case CC_LT: val = 0x1; break;
242 case CC_LTU: val = 0x9; break;
243 case CC_EQ: val = 0x2; break;
244 case CC_EQU: val = 0xa; break;
245 case CC_LE: val = 0x3; break;
246 case CC_LEU: val = 0xb; break;
247 case CC_GT: val = 0x4; break;
248 case CC_GTU: val = 0xc; break;
249 case CC_NE: val = 0x5; break;
250 case CC_NEU: val = 0xd; break;
251 case CC_GE: val = 0x6; break;
252 case CC_GEU: val = 0xe; break;
253 case CC_TR: val = 0xf; break;
254 case CC_FL: val = 0x0; break;
255
256 case CC_A: val = 0x14; break;
257 case CC_NA: val = 0x13; break;
258 case CC_S: val = 0x15; break;
259 case CC_NS: val = 0x12; break;
260 case CC_C: val = 0x16; break;
261 case CC_NC: val = 0x11; break;
262 case CC_O: val = 0x17; break;
263 case CC_NO: val = 0x10; break;
264
265 default:
266 val = 0;
267 assert(!"invalid condition code");
268 break;
269 }
270 code[pos / 32] |= val << (pos % 32);
271 }
272
273 void
274 CodeEmitterNVC0::emitPredicate(const Instruction *i)
275 {
276 if (i->predSrc >= 0) {
277 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
278 srcId(i->src(i->predSrc), 10);
279 if (i->cc == CC_NOT_P)
280 code[0] |= 0x2000; // negate
281 } else {
282 code[0] |= 0x1c00;
283 }
284 }
285
286 void
287 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
288 {
289 switch (src.getFile()) {
290 case FILE_MEMORY_GLOBAL:
291 srcAddr32(src, 26, 0);
292 break;
293 case FILE_MEMORY_LOCAL:
294 case FILE_MEMORY_SHARED:
295 setAddress24(src);
296 break;
297 default:
298 assert(src.getFile() == FILE_MEMORY_CONST);
299 setAddress16(src);
300 break;
301 }
302 }
303
304 void
305 CodeEmitterNVC0::setAddress16(const ValueRef& src)
306 {
307 Symbol *sym = src.get()->asSym();
308
309 assert(sym);
310
311 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
312 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
313 }
314
315 void
316 CodeEmitterNVC0::setAddress24(const ValueRef& src)
317 {
318 Symbol *sym = src.get()->asSym();
319
320 assert(sym);
321
322 code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
323 code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
324 }
325
326 void
327 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
328 {
329 const ImmediateValue *imm = i->src(s).get()->asImm();
330 uint32_t u32;
331
332 assert(imm);
333 u32 = imm->reg.data.u32;
334
335 if ((code[0] & 0xf) == 0x1) {
336 // double immediate
337 uint64_t u64 = imm->reg.data.u64;
338 assert(!(u64 & 0x00000fffffffffffULL));
339 assert(!(code[1] & 0xc000));
340 code[0] |= ((u64 >> 44) & 0x3f) << 26;
341 code[1] |= 0xc000 | (u64 >> 50);
342 } else
343 if ((code[0] & 0xf) == 0x2) {
344 // LIMM
345 code[0] |= (u32 & 0x3f) << 26;
346 code[1] |= u32 >> 6;
347 } else
348 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
349 // integer immediate
350 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
351 assert(!(code[1] & 0xc000));
352 u32 &= 0xfffff;
353 code[0] |= (u32 & 0x3f) << 26;
354 code[1] |= 0xc000 | (u32 >> 6);
355 } else {
356 // float immediate
357 assert(!(u32 & 0x00000fff));
358 assert(!(code[1] & 0xc000));
359 code[0] |= ((u32 >> 12) & 0x3f) << 26;
360 code[1] |= 0xc000 | (u32 >> 18);
361 }
362 }
363
364 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
365 {
366 const ImmediateValue *imm = ref.get()->asImm();
367
368 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
369
370 assert(s8 == imm->reg.data.s32);
371
372 code[0] |= (s8 & 0x3f) << 26;
373 code[0] |= (s8 >> 6) << 8;
374 }
375
376 void
377 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
378 {
379 code[0] = opc;
380 code[1] = opc >> 32;
381
382 emitPredicate(i);
383
384 defId(i->def(0), 14);
385
386 int s1 = 26;
387 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
388 s1 = 49;
389
390 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
391 switch (i->getSrc(s)->reg.file) {
392 case FILE_MEMORY_CONST:
393 assert(!(code[1] & 0xc000));
394 code[1] |= (s == 2) ? 0x8000 : 0x4000;
395 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
396 setAddress16(i->src(s));
397 break;
398 case FILE_IMMEDIATE:
399 assert(s == 1 ||
400 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
401 assert(!(code[1] & 0xc000));
402 setImmediate(i, s);
403 break;
404 case FILE_GPR:
405 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
406 break;
407 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
408 break;
409 default:
410 if (i->op == OP_SELP) {
411 // OP_SELP is used to implement shared+atomics on Fermi.
412 assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
413 srcId(i->src(s), 49);
414 }
415 // ignore here, can be predicate or flags, but must not be address
416 break;
417 }
418 }
419 }
420
421 void
422 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
423 {
424 code[0] = opc;
425 code[1] = opc >> 32;
426
427 emitPredicate(i);
428
429 defId(i->def(0), 14);
430
431 switch (i->src(0).getFile()) {
432 case FILE_MEMORY_CONST:
433 assert(!(code[1] & 0xc000));
434 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
435 setAddress16(i->src(0));
436 break;
437 case FILE_IMMEDIATE:
438 assert(!(code[1] & 0xc000));
439 setImmediate(i, 0);
440 break;
441 case FILE_GPR:
442 srcId(i->src(0), 26);
443 break;
444 default:
445 // ignore here, can be predicate or flags, but must not be address
446 break;
447 }
448 }
449
450 void
451 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
452 {
453 code[0] = opc;
454
455 int ss2a = 0;
456 if (opc == 0x0d || opc == 0x0e)
457 ss2a = 2;
458
459 defId(i->def(0), 14);
460 srcId(i->src(0), 20);
461
462 assert(pred || (i->predSrc < 0));
463 if (pred)
464 emitPredicate(i);
465
466 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
467 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
468 assert(!(code[0] & (0x300 >> ss2a)));
469 switch (i->src(s).get()->reg.fileIndex) {
470 case 0: code[0] |= 0x100 >> ss2a; break;
471 case 1: code[0] |= 0x200 >> ss2a; break;
472 case 16: code[0] |= 0x300 >> ss2a; break;
473 default:
474 ERROR("invalid c[] space for short form\n");
475 break;
476 }
477 if (s == 1)
478 code[0] |= i->getSrc(s)->reg.data.offset << 24;
479 else
480 code[0] |= i->getSrc(s)->reg.data.offset << 6;
481 } else
482 if (i->src(s).getFile() == FILE_IMMEDIATE) {
483 assert(s == 1);
484 setImmediateS8(i->src(s));
485 } else
486 if (i->src(s).getFile() == FILE_GPR) {
487 srcId(i->src(s), (s == 1) ? 26 : 8);
488 }
489 }
490 }
491
492 void
493 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
494 {
495 if (src.getFile() == FILE_MEMORY_CONST) {
496 switch (src.get()->reg.fileIndex) {
497 case 0: code[0] |= 0x100; break;
498 case 1: code[0] |= 0x200; break;
499 case 16: code[0] |= 0x300; break;
500 default:
501 assert(!"unsupported file index for short op");
502 break;
503 }
504 srcAddr32(src, 20, 2);
505 } else {
506 srcId(src, 20);
507 assert(src.getFile() == FILE_GPR);
508 }
509 }
510
511 void
512 CodeEmitterNVC0::emitNOP(const Instruction *i)
513 {
514 code[0] = 0x000001e4;
515 code[1] = 0x40000000;
516 emitPredicate(i);
517 }
518
519 void
520 CodeEmitterNVC0::emitFMAD(const Instruction *i)
521 {
522 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
523
524 if (i->encSize == 8) {
525 if (isLIMM(i->src(1), TYPE_F32)) {
526 emitForm_A(i, HEX64(20000000, 00000002));
527 } else {
528 emitForm_A(i, HEX64(30000000, 00000000));
529
530 if (i->src(2).mod.neg())
531 code[0] |= 1 << 8;
532 }
533 roundMode_A(i);
534
535 if (neg1)
536 code[0] |= 1 << 9;
537
538 if (i->saturate)
539 code[0] |= 1 << 5;
540
541 if (i->dnz)
542 code[0] |= 1 << 7;
543 else
544 if (i->ftz)
545 code[0] |= 1 << 6;
546 } else {
547 assert(!i->saturate && !i->src(2).mod.neg());
548 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
549 false);
550 if (neg1)
551 code[0] |= 1 << 4;
552 }
553 }
554
555 void
556 CodeEmitterNVC0::emitDMAD(const Instruction *i)
557 {
558 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
559
560 emitForm_A(i, HEX64(20000000, 00000001));
561
562 if (i->src(2).mod.neg())
563 code[0] |= 1 << 8;
564
565 roundMode_A(i);
566
567 if (neg1)
568 code[0] |= 1 << 9;
569
570 assert(!i->saturate);
571 assert(!i->ftz);
572 }
573
574 void
575 CodeEmitterNVC0::emitFMUL(const Instruction *i)
576 {
577 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
578
579 assert(i->postFactor >= -3 && i->postFactor <= 3);
580
581 if (i->encSize == 8) {
582 if (isLIMM(i->src(1), TYPE_F32)) {
583 assert(i->postFactor == 0); // constant folded, hopefully
584 emitForm_A(i, HEX64(30000000, 00000002));
585 } else {
586 emitForm_A(i, HEX64(58000000, 00000000));
587 roundMode_A(i);
588 code[1] |= ((i->postFactor > 0) ?
589 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
590 }
591 if (neg)
592 code[1] ^= 1 << 25; // aliases with LIMM sign bit
593
594 if (i->saturate)
595 code[0] |= 1 << 5;
596
597 if (i->dnz)
598 code[0] |= 1 << 7;
599 else
600 if (i->ftz)
601 code[0] |= 1 << 6;
602 } else {
603 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
604 emitForm_S(i, 0xa8, true);
605 }
606 }
607
608 void
609 CodeEmitterNVC0::emitDMUL(const Instruction *i)
610 {
611 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
612
613 emitForm_A(i, HEX64(50000000, 00000001));
614 roundMode_A(i);
615
616 if (neg)
617 code[0] |= 1 << 9;
618
619 assert(!i->saturate);
620 assert(!i->ftz);
621 assert(!i->dnz);
622 assert(!i->postFactor);
623 }
624
625 void
626 CodeEmitterNVC0::emitUMUL(const Instruction *i)
627 {
628 if (i->encSize == 8) {
629 if (i->src(1).getFile() == FILE_IMMEDIATE) {
630 emitForm_A(i, HEX64(10000000, 00000002));
631 } else {
632 emitForm_A(i, HEX64(50000000, 00000003));
633 }
634 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
635 code[0] |= 1 << 6;
636 if (i->sType == TYPE_S32)
637 code[0] |= 1 << 5;
638 if (i->dType == TYPE_S32)
639 code[0] |= 1 << 7;
640 } else {
641 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
642
643 if (i->sType == TYPE_S32)
644 code[0] |= 1 << 6;
645 }
646 }
647
648 void
649 CodeEmitterNVC0::emitFADD(const Instruction *i)
650 {
651 if (i->encSize == 8) {
652 if (isLIMM(i->src(1), TYPE_F32)) {
653 assert(!i->saturate);
654 emitForm_A(i, HEX64(28000000, 00000002));
655
656 code[0] |= i->src(0).mod.abs() << 7;
657 code[0] |= i->src(0).mod.neg() << 9;
658
659 if (i->src(1).mod.abs())
660 code[1] &= 0xfdffffff;
661 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
662 code[1] ^= 0x02000000;
663 } else {
664 emitForm_A(i, HEX64(50000000, 00000000));
665
666 roundMode_A(i);
667 if (i->saturate)
668 code[1] |= 1 << 17;
669
670 emitNegAbs12(i);
671 if (i->op == OP_SUB) code[0] ^= 1 << 8;
672 }
673 if (i->ftz)
674 code[0] |= 1 << 5;
675 } else {
676 assert(!i->saturate && i->op != OP_SUB &&
677 !i->src(0).mod.abs() &&
678 !i->src(1).mod.neg() && !i->src(1).mod.abs());
679
680 emitForm_S(i, 0x49, true);
681
682 if (i->src(0).mod.neg())
683 code[0] |= 1 << 7;
684 }
685 }
686
687 void
688 CodeEmitterNVC0::emitDADD(const Instruction *i)
689 {
690 assert(i->encSize == 8);
691 emitForm_A(i, HEX64(48000000, 00000001));
692 roundMode_A(i);
693 assert(!i->saturate);
694 assert(!i->ftz);
695 emitNegAbs12(i);
696 if (i->op == OP_SUB)
697 code[0] ^= 1 << 8;
698 }
699
700 void
701 CodeEmitterNVC0::emitUADD(const Instruction *i)
702 {
703 uint32_t addOp = 0;
704
705 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
706
707 if (i->src(0).mod.neg())
708 addOp |= 0x200;
709 if (i->src(1).mod.neg())
710 addOp |= 0x100;
711 if (i->op == OP_SUB)
712 addOp ^= 0x100;
713
714 assert(addOp != 0x300); // would be add-plus-one
715
716 if (i->encSize == 8) {
717 if (isLIMM(i->src(1), TYPE_U32)) {
718 emitForm_A(i, HEX64(08000000, 00000002));
719 if (i->defExists(1))
720 code[1] |= 1 << 26; // write carry
721 } else {
722 emitForm_A(i, HEX64(48000000, 00000003));
723 if (i->defExists(1))
724 code[1] |= 1 << 16; // write carry
725 }
726 code[0] |= addOp;
727
728 if (i->saturate)
729 code[0] |= 1 << 5;
730 if (i->flagsSrc >= 0) // add carry
731 code[0] |= 1 << 6;
732 } else {
733 assert(!(addOp & 0x100));
734 emitForm_S(i, (addOp >> 3) |
735 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
736 }
737 }
738
739 void
740 CodeEmitterNVC0::emitIMAD(const Instruction *i)
741 {
742 uint8_t addOp =
743 i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
744
745 assert(i->encSize == 8);
746 emitForm_A(i, HEX64(20000000, 00000003));
747
748 assert(addOp != 3);
749 code[0] |= addOp << 8;
750
751 if (isSignedType(i->dType))
752 code[0] |= 1 << 7;
753 if (isSignedType(i->sType))
754 code[0] |= 1 << 5;
755
756 code[1] |= i->saturate << 24;
757
758 if (i->flagsDef >= 0) code[1] |= 1 << 16;
759 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
760
761 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
762 code[0] |= 1 << 6;
763 }
764
765 void
766 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
767 {
768 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
769 const ImmediateValue *imm = i->src(1).get()->asImm();
770 assert(imm);
771
772 code[0] = 0x00000003;
773 code[1] = 0x40000000 | addOp << 23;
774
775 emitPredicate(i);
776
777 defId(i->def(0), 14);
778 srcId(i->src(0), 20);
779
780 if (i->flagsDef >= 0)
781 code[1] |= 1 << 16;
782
783 assert(!(imm->reg.data.u32 & 0xffffffe0));
784 code[0] |= imm->reg.data.u32 << 5;
785
786 switch (i->src(2).getFile()) {
787 case FILE_GPR:
788 srcId(i->src(2), 26);
789 break;
790 case FILE_MEMORY_CONST:
791 code[1] |= 0x4000;
792 code[1] |= i->getSrc(2)->reg.fileIndex << 10;
793 setAddress16(i->src(2));
794 break;
795 case FILE_IMMEDIATE:
796 setImmediate(i, 2);
797 break;
798 default:
799 assert(!"bad src2 file");
800 break;
801 }
802 }
803
804 void
805 CodeEmitterNVC0::emitMADSP(const Instruction *i)
806 {
807 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
808
809 emitForm_A(i, HEX64(00000000, 00000003));
810
811 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
812 code[1] |= 0x01800000;
813 } else {
814 code[0] |= (i->subOp & 0x00f) << 7;
815 code[0] |= (i->subOp & 0x0f0) << 1;
816 code[0] |= (i->subOp & 0x100) >> 3;
817 code[0] |= (i->subOp & 0x200) >> 2;
818 code[1] |= (i->subOp & 0xc00) << 13;
819 }
820
821 if (i->flagsDef >= 0)
822 code[1] |= 1 << 16;
823 }
824
825 void
826 CodeEmitterNVC0::emitISAD(const Instruction *i)
827 {
828 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
829 assert(i->encSize == 8);
830
831 emitForm_A(i, HEX64(38000000, 00000003));
832
833 if (i->dType == TYPE_S32)
834 code[0] |= 1 << 5;
835 }
836
837 void
838 CodeEmitterNVC0::emitNOT(Instruction *i)
839 {
840 assert(i->encSize == 8);
841 i->setSrc(1, i->src(0));
842 emitForm_A(i, HEX64(68000000, 000001c3));
843 }
844
845 void
846 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
847 {
848 if (i->def(0).getFile() == FILE_PREDICATE) {
849 code[0] = 0x00000004 | (subOp << 30);
850 code[1] = 0x0c000000;
851
852 emitPredicate(i);
853
854 defId(i->def(0), 17);
855 srcId(i->src(0), 20);
856 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
857 srcId(i->src(1), 26);
858 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
859
860 if (i->defExists(1)) {
861 defId(i->def(1), 14);
862 } else {
863 code[0] |= 7 << 14;
864 }
865 // (a OP b) OP c
866 if (i->predSrc != 2 && i->srcExists(2)) {
867 code[1] |= subOp << 21;
868 srcId(i->src(2), 49);
869 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
870 } else {
871 code[1] |= 0x000e0000;
872 }
873 } else
874 if (i->encSize == 8) {
875 if (isLIMM(i->src(1), TYPE_U32)) {
876 emitForm_A(i, HEX64(38000000, 00000002));
877
878 if (i->flagsDef >= 0)
879 code[1] |= 1 << 26;
880 } else {
881 emitForm_A(i, HEX64(68000000, 00000003));
882
883 if (i->flagsDef >= 0)
884 code[1] |= 1 << 16;
885 }
886 code[0] |= subOp << 6;
887
888 if (i->flagsSrc >= 0) // carry
889 code[0] |= 1 << 5;
890
891 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
892 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
893 } else {
894 emitForm_S(i, (subOp << 5) |
895 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
896 }
897 }
898
899 void
900 CodeEmitterNVC0::emitPOPC(const Instruction *i)
901 {
902 emitForm_A(i, HEX64(54000000, 00000004));
903
904 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
905 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
906 }
907
908 void
909 CodeEmitterNVC0::emitINSBF(const Instruction *i)
910 {
911 emitForm_A(i, HEX64(28000000, 00000003));
912 }
913
914 void
915 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
916 {
917 emitForm_A(i, HEX64(70000000, 00000003));
918
919 if (i->dType == TYPE_S32)
920 code[0] |= 1 << 5;
921 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
922 code[0] |= 1 << 8;
923 }
924
925 void
926 CodeEmitterNVC0::emitBFIND(const Instruction *i)
927 {
928 emitForm_B(i, HEX64(78000000, 00000003));
929
930 if (i->dType == TYPE_S32)
931 code[0] |= 1 << 5;
932 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
933 code[0] |= 1 << 8;
934 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
935 code[0] |= 1 << 6;
936 }
937
938 void
939 CodeEmitterNVC0::emitPERMT(const Instruction *i)
940 {
941 emitForm_A(i, HEX64(24000000, 00000004));
942
943 code[0] |= i->subOp << 5;
944 }
945
946 void
947 CodeEmitterNVC0::emitShift(const Instruction *i)
948 {
949 if (i->op == OP_SHR) {
950 emitForm_A(i, HEX64(58000000, 00000003)
951 | (isSignedType(i->dType) ? 0x20 : 0x00));
952 } else {
953 emitForm_A(i, HEX64(60000000, 00000003));
954 }
955
956 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
957 code[0] |= 1 << 9;
958 }
959
960 void
961 CodeEmitterNVC0::emitPreOp(const Instruction *i)
962 {
963 if (i->encSize == 8) {
964 emitForm_B(i, HEX64(60000000, 00000000));
965
966 if (i->op == OP_PREEX2)
967 code[0] |= 0x20;
968
969 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
970 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
971 } else {
972 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
973 }
974 }
975
976 void
977 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
978 {
979 if (i->encSize == 8) {
980 code[0] = 0x00000000 | (subOp << 26);
981 code[1] = 0xc8000000;
982
983 emitPredicate(i);
984
985 defId(i->def(0), 14);
986 srcId(i->src(0), 20);
987
988 assert(i->src(0).getFile() == FILE_GPR);
989
990 if (i->saturate) code[0] |= 1 << 5;
991
992 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
993 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
994 } else {
995 emitForm_S(i, 0x80000008 | (subOp << 26), true);
996
997 assert(!i->src(0).mod.neg());
998 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
999 }
1000 }
1001
1002 void
1003 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
1004 {
1005 uint64_t op;
1006
1007 assert(i->encSize == 8);
1008
1009 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
1010
1011 if (i->ftz)
1012 op |= 1 << 5;
1013 else
1014 if (!isFloatType(i->dType)) {
1015 op |= isSignedType(i->dType) ? 0x23 : 0x03;
1016 op |= i->subOp << 6;
1017 }
1018 if (i->dType == TYPE_F64)
1019 op |= 0x01;
1020
1021 emitForm_A(i, op);
1022 emitNegAbs12(i);
1023
1024 if (i->flagsDef >= 0)
1025 code[1] |= 1 << 16;
1026 }
1027
1028 void
1029 CodeEmitterNVC0::roundMode_C(const Instruction *i)
1030 {
1031 switch (i->rnd) {
1032 case ROUND_M: code[1] |= 1 << 17; break;
1033 case ROUND_P: code[1] |= 2 << 17; break;
1034 case ROUND_Z: code[1] |= 3 << 17; break;
1035 case ROUND_NI: code[0] |= 1 << 7; break;
1036 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
1037 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
1038 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
1039 case ROUND_N: break;
1040 default:
1041 assert(!"invalid round mode");
1042 break;
1043 }
1044 }
1045
1046 void
1047 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
1048 {
1049 switch (i->rnd) {
1050 case ROUND_M:
1051 case ROUND_MI: code[0] |= 1 << 16; break;
1052 case ROUND_P:
1053 case ROUND_PI: code[0] |= 2 << 16; break;
1054 case ROUND_Z:
1055 case ROUND_ZI: code[0] |= 3 << 16; break;
1056 default:
1057 break;
1058 }
1059 }
1060
1061 void
1062 CodeEmitterNVC0::emitCVT(Instruction *i)
1063 {
1064 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1065 DataType dType;
1066
1067 switch (i->op) {
1068 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1069 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1070 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1071 default:
1072 break;
1073 }
1074
1075 const bool sat = (i->op == OP_SAT) || i->saturate;
1076 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1077 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1078
1079 if (i->op == OP_NEG && i->dType == TYPE_U32)
1080 dType = TYPE_S32;
1081 else
1082 dType = i->dType;
1083
1084 if (i->encSize == 8) {
1085 emitForm_B(i, HEX64(10000000, 00000004));
1086
1087 roundMode_C(i);
1088
1089 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1090 code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1091 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1092
1093 // for 8/16 source types, the byte/word is in subOp. word 1 is
1094 // represented as 2.
1095 if (!isFloatType(i->sType))
1096 code[1] |= i->subOp << 0x17;
1097 else
1098 code[1] |= i->subOp << 0x18;
1099
1100 if (sat)
1101 code[0] |= 0x20;
1102 if (abs)
1103 code[0] |= 1 << 6;
1104 if (neg && i->op != OP_ABS)
1105 code[0] |= 1 << 8;
1106
1107 if (i->ftz)
1108 code[1] |= 1 << 23;
1109
1110 if (isSignedIntType(dType))
1111 code[0] |= 0x080;
1112 if (isSignedIntType(i->sType))
1113 code[0] |= 0x200;
1114
1115 if (isFloatType(dType)) {
1116 if (!isFloatType(i->sType))
1117 code[1] |= 0x08000000;
1118 } else {
1119 if (isFloatType(i->sType))
1120 code[1] |= 0x04000000;
1121 else
1122 code[1] |= 0x0c000000;
1123 }
1124 } else {
1125 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1126 code[0] = 0x298;
1127 } else
1128 if (isFloatType(dType)) {
1129 if (isFloatType(i->sType))
1130 code[0] = 0x098;
1131 else
1132 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1133 } else {
1134 assert(isFloatType(i->sType));
1135
1136 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1137 }
1138
1139 if (neg) code[0] |= 1 << 16;
1140 if (sat) code[0] |= 1 << 18;
1141 if (abs) code[0] |= 1 << 19;
1142
1143 roundMode_CS(i);
1144 }
1145 }
1146
1147 void
1148 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1149 {
1150 uint32_t hi;
1151 uint32_t lo = 0;
1152
1153 if (i->sType == TYPE_F64)
1154 lo = 0x1;
1155 else
1156 if (!isFloatType(i->sType))
1157 lo = 0x3;
1158
1159 if (isSignedIntType(i->sType))
1160 lo |= 0x20;
1161 if (isFloatType(i->dType)) {
1162 if (isFloatType(i->sType))
1163 lo |= 0x20;
1164 else
1165 lo |= 0x80;
1166 }
1167
1168 switch (i->op) {
1169 case OP_SET_AND: hi = 0x10000000; break;
1170 case OP_SET_OR: hi = 0x10200000; break;
1171 case OP_SET_XOR: hi = 0x10400000; break;
1172 default:
1173 hi = 0x100e0000;
1174 break;
1175 }
1176 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1177
1178 if (i->op != OP_SET)
1179 srcId(i->src(2), 32 + 17);
1180
1181 if (i->def(0).getFile() == FILE_PREDICATE) {
1182 if (i->sType == TYPE_F32)
1183 code[1] += 0x10000000;
1184 else
1185 code[1] += 0x08000000;
1186
1187 code[0] &= ~0xfc000;
1188 defId(i->def(0), 17);
1189 if (i->defExists(1))
1190 defId(i->def(1), 14);
1191 else
1192 code[0] |= 0x1c000;
1193 }
1194
1195 if (i->ftz)
1196 code[1] |= 1 << 27;
1197
1198 emitCondCode(i->setCond, 32 + 23);
1199 emitNegAbs12(i);
1200 }
1201
1202 void
1203 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1204 {
1205 uint64_t op;
1206
1207 switch (i->dType) {
1208 case TYPE_S32:
1209 op = HEX64(30000000, 00000023);
1210 break;
1211 case TYPE_U32:
1212 op = HEX64(30000000, 00000003);
1213 break;
1214 case TYPE_F32:
1215 op = HEX64(38000000, 00000000);
1216 break;
1217 default:
1218 assert(!"invalid type for SLCT");
1219 op = 0;
1220 break;
1221 }
1222 emitForm_A(i, op);
1223
1224 CondCode cc = i->setCond;
1225
1226 if (i->src(2).mod.neg())
1227 cc = reverseCondCode(cc);
1228
1229 emitCondCode(cc, 32 + 23);
1230
1231 if (i->ftz)
1232 code[0] |= 1 << 5;
1233 }
1234
1235 static void
1236 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1237 {
1238 int loc = entry->loc;
1239 if (data.force_persample_interp)
1240 code[loc + 1] |= 1 << 20;
1241 else
1242 code[loc + 1] &= ~(1 << 20);
1243 }
1244
1245 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1246 {
1247 emitForm_A(i, HEX64(20000000, 00000004));
1248
1249 if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1250 code[1] |= 1 << 20;
1251
1252 if (i->subOp == 1) {
1253 addInterp(0, 0, selpFlip);
1254 }
1255 }
1256
1257 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1258 {
1259 code[0] = 0x00000006 | (i->subOp << 26);
1260 code[1] = 0xf0000000;
1261 emitPredicate(i);
1262 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1263 }
1264
1265 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1266 {
1267 code[0] = 0x00000086;
1268 code[1] = 0xd0000000;
1269
1270 code[1] |= i->tex.r;
1271 code[1] |= i->tex.s << 8;
1272
1273 if (i->tex.liveOnly)
1274 code[0] |= 1 << 9;
1275
1276 defId(i->def(0), 14);
1277 srcId(i->src(0), 20);
1278 }
1279
1280 static inline bool
1281 isNextIndependentTex(const TexInstruction *i)
1282 {
1283 if (!i->next || !isTextureOp(i->next->op))
1284 return false;
1285 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1286 return false;
1287 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1288 }
1289
1290 void
1291 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1292 {
1293 code[0] = 0x00000006;
1294
1295 if (isNextIndependentTex(i))
1296 code[0] |= 0x080; // t mode
1297 else
1298 code[0] |= 0x100; // p mode
1299
1300 if (i->tex.liveOnly)
1301 code[0] |= 1 << 9;
1302
1303 switch (i->op) {
1304 case OP_TEX: code[1] = 0x80000000; break;
1305 case OP_TXB: code[1] = 0x84000000; break;
1306 case OP_TXL: code[1] = 0x86000000; break;
1307 case OP_TXF: code[1] = 0x90000000; break;
1308 case OP_TXG: code[1] = 0xa0000000; break;
1309 case OP_TXLQ: code[1] = 0xb0000000; break;
1310 case OP_TXD: code[1] = 0xe0000000; break;
1311 default:
1312 assert(!"invalid texture op");
1313 break;
1314 }
1315 if (i->op == OP_TXF) {
1316 if (!i->tex.levelZero)
1317 code[1] |= 0x02000000;
1318 } else
1319 if (i->tex.levelZero) {
1320 code[1] |= 0x02000000;
1321 }
1322
1323 if (i->op != OP_TXD && i->tex.derivAll)
1324 code[1] |= 1 << 13;
1325
1326 defId(i->def(0), 14);
1327 srcId(i->src(0), 20);
1328
1329 emitPredicate(i);
1330
1331 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1332
1333 code[1] |= i->tex.mask << 14;
1334
1335 code[1] |= i->tex.r;
1336 code[1] |= i->tex.s << 8;
1337 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1338 code[1] |= 1 << 18; // in 1st source (with array index)
1339
1340 // texture target:
1341 code[1] |= (i->tex.target.getDim() - 1) << 20;
1342 if (i->tex.target.isCube())
1343 code[1] += 2 << 20;
1344 if (i->tex.target.isArray())
1345 code[1] |= 1 << 19;
1346 if (i->tex.target.isShadow())
1347 code[1] |= 1 << 24;
1348
1349 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1350
1351 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1352 // lzero
1353 if (i->op == OP_TXL)
1354 code[1] &= ~(1 << 26);
1355 else
1356 if (i->op == OP_TXF)
1357 code[1] &= ~(1 << 25);
1358 }
1359 if (i->tex.target == TEX_TARGET_2D_MS ||
1360 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1361 code[1] |= 1 << 23;
1362
1363 if (i->tex.useOffsets == 1)
1364 code[1] |= 1 << 22;
1365 if (i->tex.useOffsets == 4)
1366 code[1] |= 1 << 23;
1367
1368 srcId(i, src1, 26);
1369 }
1370
1371 void
1372 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1373 {
1374 code[0] = 0x00000086;
1375 code[1] = 0xc0000000;
1376
1377 switch (i->tex.query) {
1378 case TXQ_DIMS: code[1] |= 0 << 22; break;
1379 case TXQ_TYPE: code[1] |= 1 << 22; break;
1380 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1381 case TXQ_FILTER: code[1] |= 3 << 22; break;
1382 case TXQ_LOD: code[1] |= 4 << 22; break;
1383 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1384 default:
1385 assert(!"invalid texture query");
1386 break;
1387 }
1388
1389 code[1] |= i->tex.mask << 14;
1390
1391 code[1] |= i->tex.r;
1392 code[1] |= i->tex.s << 8;
1393 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1394 code[1] |= 1 << 18;
1395
1396 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1397
1398 defId(i->def(0), 14);
1399 srcId(i->src(0), 20);
1400 srcId(i, src1, 26);
1401
1402 emitPredicate(i);
1403 }
1404
1405 void
1406 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1407 {
1408 code[0] = 0x00000200 | (laneMask << 6); // dall
1409 code[1] = 0x48000000 | qOp;
1410
1411 defId(i->def(0), 14);
1412 srcId(i->src(0), 20);
1413 srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
1414
1415 emitPredicate(i);
1416 }
1417
1418 void
1419 CodeEmitterNVC0::emitFlow(const Instruction *i)
1420 {
1421 const FlowInstruction *f = i->asFlow();
1422
1423 unsigned mask; // bit 0: predicate, bit 1: target
1424
1425 code[0] = 0x00000007;
1426
1427 switch (i->op) {
1428 case OP_BRA:
1429 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1430 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1431 code[0] |= 0x4000;
1432 mask = 3;
1433 break;
1434 case OP_CALL:
1435 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1436 if (f->indirect)
1437 code[0] |= 0x4000; // indirect calls always use c[] source
1438 mask = 2;
1439 break;
1440
1441 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1442 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1443 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1444 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1445 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1446
1447 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1448 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1449 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1450 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1451
1452 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1453 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1454 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1455 default:
1456 assert(!"invalid flow operation");
1457 return;
1458 }
1459
1460 if (mask & 1) {
1461 emitPredicate(i);
1462 if (i->flagsSrc < 0)
1463 code[0] |= 0x1e0;
1464 }
1465
1466 if (!f)
1467 return;
1468
1469 if (f->allWarp)
1470 code[0] |= 1 << 15;
1471 if (f->limit)
1472 code[0] |= 1 << 16;
1473
1474 if (f->indirect) {
1475 if (code[0] & 0x4000) {
1476 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1477 setAddress16(i->src(0));
1478 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1479 if (f->op == OP_BRA)
1480 srcId(f->src(0).getIndirect(0), 20);
1481 } else {
1482 srcId(f, 0, 20);
1483 }
1484 }
1485
1486 if (f->op == OP_CALL) {
1487 if (f->indirect) {
1488 // nothing
1489 } else
1490 if (f->builtin) {
1491 assert(f->absolute);
1492 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1493 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1494 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1495 } else {
1496 assert(!f->absolute);
1497 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1498 code[0] |= (pcRel & 0x3f) << 26;
1499 code[1] |= (pcRel >> 6) & 0x3ffff;
1500 }
1501 } else
1502 if (mask & 2) {
1503 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1504 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1505 pcRel += 8;
1506 // currently we don't want absolute branches
1507 assert(!f->absolute);
1508 code[0] |= (pcRel & 0x3f) << 26;
1509 code[1] |= (pcRel >> 6) & 0x3ffff;
1510 }
1511 }
1512
1513 void
1514 CodeEmitterNVC0::emitBAR(const Instruction *i)
1515 {
1516 Value *rDef = NULL, *pDef = NULL;
1517
1518 switch (i->subOp) {
1519 case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
1520 case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
1521 case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
1522 case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1523 default:
1524 code[0] = 0x04;
1525 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1526 break;
1527 }
1528 code[1] = 0x50000000;
1529
1530 code[0] |= 63 << 14;
1531 code[1] |= 7 << 21;
1532
1533 emitPredicate(i);
1534
1535 // barrier id
1536 if (i->src(0).getFile() == FILE_GPR) {
1537 srcId(i->src(0), 20);
1538 } else {
1539 ImmediateValue *imm = i->getSrc(0)->asImm();
1540 assert(imm);
1541 code[0] |= imm->reg.data.u32 << 20;
1542 code[1] |= 0x8000;
1543 }
1544
1545 // thread count
1546 if (i->src(1).getFile() == FILE_GPR) {
1547 srcId(i->src(1), 26);
1548 } else {
1549 ImmediateValue *imm = i->getSrc(1)->asImm();
1550 assert(imm);
1551 assert(imm->reg.data.u32 <= 0xfff);
1552 code[0] |= imm->reg.data.u32 << 26;
1553 code[1] |= imm->reg.data.u32 >> 6;
1554 code[1] |= 0x4000;
1555 }
1556
1557 if (i->srcExists(2) && (i->predSrc != 2)) {
1558 srcId(i->src(2), 32 + 17);
1559 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1560 code[1] |= 1 << 20;
1561 } else {
1562 code[1] |= 7 << 17;
1563 }
1564
1565 if (i->defExists(0)) {
1566 if (i->def(0).getFile() == FILE_GPR)
1567 rDef = i->getDef(0);
1568 else
1569 pDef = i->getDef(0);
1570
1571 if (i->defExists(1)) {
1572 if (i->def(1).getFile() == FILE_GPR)
1573 rDef = i->getDef(1);
1574 else
1575 pDef = i->getDef(1);
1576 }
1577 }
1578 if (rDef) {
1579 code[0] &= ~(63 << 14);
1580 defId(rDef, 14);
1581 }
1582 if (pDef) {
1583 code[1] &= ~(7 << 21);
1584 defId(pDef, 32 + 21);
1585 }
1586 }
1587
1588 void
1589 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1590 {
1591 code[0] = 0x00000006;
1592 code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1593
1594 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1595 code[0] |= 0x200;
1596
1597 emitPredicate(i);
1598
1599 defId(i->def(0), 14);
1600 srcId(i->src(0).getIndirect(0), 20);
1601 }
1602
1603 void
1604 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1605 {
1606 uint32_t prim = i->src(0).get()->reg.data.u32;
1607
1608 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1609 code[1] = 0x00000000 | (prim >> 6);
1610
1611 emitPredicate(i);
1612
1613 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1614
1615 defId(i->def(0), 14);
1616 srcId(i, src1, 20);
1617 }
1618
1619 void
1620 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1621 {
1622 code[0] = 0x00000006;
1623 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1624
1625 if (i->perPatch)
1626 code[0] |= 0x100;
1627 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1628 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1629
1630 emitPredicate(i);
1631
1632 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1633
1634 defId(i->def(0), 14);
1635 srcId(i->src(0).getIndirect(0), 20);
1636 srcId(i->src(0).getIndirect(1), 26); // vertex address
1637 }
1638
1639 void
1640 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1641 {
1642 unsigned int size = typeSizeof(i->dType);
1643
1644 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1645 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1646
1647 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1648
1649 if (i->perPatch)
1650 code[0] |= 0x100;
1651
1652 emitPredicate(i);
1653
1654 assert(i->src(1).getFile() == FILE_GPR);
1655
1656 srcId(i->src(0).getIndirect(0), 20);
1657 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1658 srcId(i->src(1), 26);
1659 }
1660
1661 void
1662 CodeEmitterNVC0::emitOUT(const Instruction *i)
1663 {
1664 code[0] = 0x00000006;
1665 code[1] = 0x1c000000;
1666
1667 emitPredicate(i);
1668
1669 defId(i->def(0), 14); // new secret address
1670 srcId(i->src(0), 20); // old secret address, should be 0 initially
1671
1672 assert(i->src(0).getFile() == FILE_GPR);
1673
1674 if (i->op == OP_EMIT)
1675 code[0] |= 1 << 5;
1676 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1677 code[0] |= 1 << 6;
1678
1679 // vertex stream
1680 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1681 unsigned int stream = SDATA(i->src(1)).u32;
1682 assert(stream < 4);
1683 if (stream) {
1684 code[1] |= 0xc000;
1685 code[0] |= stream << 26;
1686 } else {
1687 srcId(NULL, 26);
1688 }
1689 } else {
1690 srcId(i->src(1), 26);
1691 }
1692 }
1693
1694 void
1695 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1696 {
1697 if (i->encSize == 8) {
1698 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1699 } else {
1700 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1701 code[0] |= 0x80;
1702 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1703 }
1704 }
1705
1706 static void
1707 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1708 {
1709 int ipa = entry->ipa;
1710 int reg = entry->reg;
1711 int loc = entry->loc;
1712
1713 if (data.flatshade &&
1714 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1715 ipa = NV50_IR_INTERP_FLAT;
1716 reg = 0x3f;
1717 } else if (data.force_persample_interp &&
1718 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1719 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1720 ipa |= NV50_IR_INTERP_CENTROID;
1721 }
1722 code[loc + 0] &= ~(0xf << 6);
1723 code[loc + 0] |= ipa << 6;
1724 code[loc + 0] &= ~(0x3f << 26);
1725 code[loc + 0] |= reg << 26;
1726 }
1727
1728 void
1729 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1730 {
1731 const uint32_t base = i->getSrc(0)->reg.data.offset;
1732
1733 if (i->encSize == 8) {
1734 code[0] = 0x00000000;
1735 code[1] = 0xc0000000 | (base & 0xffff);
1736
1737 if (i->saturate)
1738 code[0] |= 1 << 5;
1739
1740 if (i->op == OP_PINTERP) {
1741 srcId(i->src(1), 26);
1742 addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1743 } else {
1744 code[0] |= 0x3f << 26;
1745 addInterp(i->ipa, 0x3f, interpApply);
1746 }
1747
1748 srcId(i->src(0).getIndirect(0), 20);
1749 } else {
1750 assert(i->op == OP_PINTERP);
1751 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1752 srcId(i->src(1), 20);
1753 }
1754 emitInterpMode(i);
1755
1756 emitPredicate(i);
1757 defId(i->def(0), 14);
1758
1759 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1760 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1761 else
1762 code[1] |= 0x3f << 17;
1763 }
1764
1765 void
1766 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1767 {
1768 uint8_t val;
1769
1770 switch (ty) {
1771 case TYPE_U8:
1772 val = 0x00;
1773 break;
1774 case TYPE_S8:
1775 val = 0x20;
1776 break;
1777 case TYPE_F16:
1778 case TYPE_U16:
1779 val = 0x40;
1780 break;
1781 case TYPE_S16:
1782 val = 0x60;
1783 break;
1784 case TYPE_F32:
1785 case TYPE_U32:
1786 case TYPE_S32:
1787 val = 0x80;
1788 break;
1789 case TYPE_F64:
1790 case TYPE_U64:
1791 case TYPE_S64:
1792 val = 0xa0;
1793 break;
1794 case TYPE_B128:
1795 val = 0xc0;
1796 break;
1797 default:
1798 val = 0x80;
1799 assert(!"invalid type");
1800 break;
1801 }
1802 code[0] |= val;
1803 }
1804
1805 void
1806 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1807 {
1808 uint32_t val;
1809
1810 switch (c) {
1811 case CACHE_CA:
1812 // case CACHE_WB:
1813 val = 0x000;
1814 break;
1815 case CACHE_CG:
1816 val = 0x100;
1817 break;
1818 case CACHE_CS:
1819 val = 0x200;
1820 break;
1821 case CACHE_CV:
1822 // case CACHE_WT:
1823 val = 0x300;
1824 break;
1825 default:
1826 val = 0;
1827 assert(!"invalid caching mode");
1828 break;
1829 }
1830 code[0] |= val;
1831 }
1832
1833 static inline bool
1834 uses64bitAddress(const Instruction *ldst)
1835 {
1836 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1837 ldst->src(0).isIndirect(0) &&
1838 ldst->getIndirect(0, 0)->reg.size == 8;
1839 }
1840
1841 void
1842 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1843 {
1844 uint32_t opc;
1845
1846 switch (i->src(0).getFile()) {
1847 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1848 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1849 case FILE_MEMORY_SHARED:
1850 if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1851 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1852 opc = 0xb8000000;
1853 else
1854 opc = 0xcc000000;
1855 } else {
1856 opc = 0xc9000000;
1857 }
1858 break;
1859 default:
1860 assert(!"invalid memory file");
1861 opc = 0;
1862 break;
1863 }
1864 code[0] = 0x00000005;
1865 code[1] = opc;
1866
1867 if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
1868 // Unlocked store on shared memory can fail.
1869 if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
1870 i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
1871 assert(i->defExists(0));
1872 defId(i->def(0), 8);
1873 }
1874 }
1875
1876 setAddressByFile(i->src(0));
1877 srcId(i->src(1), 14);
1878 srcId(i->src(0).getIndirect(0), 20);
1879 if (uses64bitAddress(i))
1880 code[1] |= 1 << 26;
1881
1882 emitPredicate(i);
1883
1884 emitLoadStoreType(i->dType);
1885 emitCachingMode(i->cache);
1886 }
1887
1888 void
1889 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1890 {
1891 uint32_t opc;
1892
1893 code[0] = 0x00000005;
1894
1895 switch (i->src(0).getFile()) {
1896 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1897 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1898 case FILE_MEMORY_SHARED:
1899 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1900 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1901 opc = 0xa8000000;
1902 else
1903 opc = 0xc4000000;
1904 } else {
1905 opc = 0xc1000000;
1906 }
1907 break;
1908 case FILE_MEMORY_CONST:
1909 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1910 emitMOV(i); // not sure if this is any better
1911 return;
1912 }
1913 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1914 code[0] = 0x00000006 | (i->subOp << 8);
1915 break;
1916 default:
1917 assert(!"invalid memory file");
1918 opc = 0;
1919 break;
1920 }
1921 code[1] = opc;
1922
1923 int r = 0, p = -1;
1924 if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
1925 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
1926 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1927 r = -1;
1928 p = 0;
1929 } else if (i->defExists(1)) { // r, p
1930 p = 1;
1931 } else {
1932 assert(!"Expected predicate dest for load locked");
1933 }
1934 }
1935 }
1936
1937 if (r >= 0)
1938 defId(i->def(r), 14);
1939 else
1940 code[0] |= 63 << 14;
1941
1942 if (p >= 0) {
1943 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
1944 defId(i->def(p), 8);
1945 else
1946 defId(i->def(p), 32 + 18);
1947 }
1948
1949 setAddressByFile(i->src(0));
1950 srcId(i->src(0).getIndirect(0), 20);
1951 if (uses64bitAddress(i))
1952 code[1] |= 1 << 26;
1953
1954 emitPredicate(i);
1955
1956 emitLoadStoreType(i->dType);
1957 emitCachingMode(i->cache);
1958 }
1959
1960 uint8_t
1961 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1962 {
1963 switch (SDATA(ref).sv.sv) {
1964 case SV_LANEID: return 0x00;
1965 case SV_PHYSID: return 0x03;
1966 case SV_VERTEX_COUNT: return 0x10;
1967 case SV_INVOCATION_ID: return 0x11;
1968 case SV_YDIR: return 0x12;
1969 case SV_THREAD_KILL: return 0x13;
1970 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1971 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1972 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1973 case SV_GRIDID: return 0x2c;
1974 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1975 case SV_LBASE: return 0x34;
1976 case SV_SBASE: return 0x30;
1977 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1978 default:
1979 assert(!"no sreg for system value");
1980 return 0;
1981 }
1982 }
1983
1984 void
1985 CodeEmitterNVC0::emitMOV(const Instruction *i)
1986 {
1987 if (i->def(0).getFile() == FILE_PREDICATE) {
1988 if (i->src(0).getFile() == FILE_GPR) {
1989 code[0] = 0xfc01c003;
1990 code[1] = 0x1a8e0000;
1991 srcId(i->src(0), 20);
1992 } else {
1993 code[0] = 0x0001c004;
1994 code[1] = 0x0c0e0000;
1995 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1996 code[0] |= 7 << 20;
1997 if (!i->getSrc(0)->reg.data.u32)
1998 code[0] |= 1 << 23;
1999 } else {
2000 srcId(i->src(0), 20);
2001 }
2002 }
2003 defId(i->def(0), 17);
2004 emitPredicate(i);
2005 } else
2006 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2007 uint8_t sr = getSRegEncoding(i->src(0));
2008
2009 if (i->encSize == 8) {
2010 code[0] = 0x00000004 | (sr << 26);
2011 code[1] = 0x2c000000;
2012 } else {
2013 code[0] = 0x40000008 | (sr << 20);
2014 }
2015 defId(i->def(0), 14);
2016
2017 emitPredicate(i);
2018 } else
2019 if (i->encSize == 8) {
2020 uint64_t opc;
2021
2022 if (i->src(0).getFile() == FILE_IMMEDIATE)
2023 opc = HEX64(18000000, 000001e2);
2024 else
2025 if (i->src(0).getFile() == FILE_PREDICATE)
2026 opc = HEX64(080e0000, 1c000004);
2027 else
2028 opc = HEX64(28000000, 00000004);
2029
2030 if (i->src(0).getFile() != FILE_PREDICATE)
2031 opc |= i->lanes << 5;
2032
2033 emitForm_B(i, opc);
2034
2035 // Explicitly emit the predicate source as emitForm_B skips it.
2036 if (i->src(0).getFile() == FILE_PREDICATE)
2037 srcId(i->src(0), 20);
2038 } else {
2039 uint32_t imm;
2040
2041 if (i->src(0).getFile() == FILE_IMMEDIATE) {
2042 imm = SDATA(i->src(0)).u32;
2043 if (imm & 0xfff00000) {
2044 assert(!(imm & 0x000fffff));
2045 code[0] = 0x00000318 | imm;
2046 } else {
2047 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
2048 code[0] = 0x00000118 | (imm << 20);
2049 }
2050 } else {
2051 code[0] = 0x0028;
2052 emitShortSrc2(i->src(0));
2053 }
2054 defId(i->def(0), 14);
2055
2056 emitPredicate(i);
2057 }
2058 }
2059
2060 void
2061 CodeEmitterNVC0::emitATOM(const Instruction *i)
2062 {
2063 const bool hasDst = i->defExists(0);
2064 const bool casOrExch =
2065 i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
2066 i->subOp == NV50_IR_SUBOP_ATOM_CAS;
2067
2068 if (i->dType == TYPE_U64) {
2069 switch (i->subOp) {
2070 case NV50_IR_SUBOP_ATOM_ADD:
2071 code[0] = 0x205;
2072 if (hasDst)
2073 code[1] = 0x507e0000;
2074 else
2075 code[1] = 0x10000000;
2076 break;
2077 case NV50_IR_SUBOP_ATOM_EXCH:
2078 code[0] = 0x305;
2079 code[1] = 0x507e0000;
2080 break;
2081 case NV50_IR_SUBOP_ATOM_CAS:
2082 code[0] = 0x325;
2083 code[1] = 0x50000000;
2084 break;
2085 default:
2086 assert(!"invalid u64 red op");
2087 break;
2088 }
2089 } else
2090 if (i->dType == TYPE_U32) {
2091 switch (i->subOp) {
2092 case NV50_IR_SUBOP_ATOM_EXCH:
2093 code[0] = 0x105;
2094 code[1] = 0x507e0000;
2095 break;
2096 case NV50_IR_SUBOP_ATOM_CAS:
2097 code[0] = 0x125;
2098 code[1] = 0x50000000;
2099 break;
2100 default:
2101 code[0] = 0x5 | (i->subOp << 5);
2102 if (hasDst)
2103 code[1] = 0x507e0000;
2104 else
2105 code[1] = 0x10000000;
2106 break;
2107 }
2108 } else
2109 if (i->dType == TYPE_S32) {
2110 assert(i->subOp <= 2);
2111 code[0] = 0x205 | (i->subOp << 5);
2112 if (hasDst)
2113 code[1] = 0x587e0000;
2114 else
2115 code[1] = 0x18000000;
2116 } else
2117 if (i->dType == TYPE_F32) {
2118 assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
2119 code[0] = 0x205;
2120 if (hasDst)
2121 code[1] = 0x687e0000;
2122 else
2123 code[1] = 0x28000000;
2124 }
2125
2126 emitPredicate(i);
2127
2128 srcId(i->src(1), 14);
2129
2130 if (hasDst)
2131 defId(i->def(0), 32 + 11);
2132 else
2133 if (casOrExch)
2134 code[1] |= 63 << 11;
2135
2136 if (hasDst || casOrExch) {
2137 const int32_t offset = SDATA(i->src(0)).offset;
2138 assert(offset < 0x80000 && offset >= -0x80000);
2139 code[0] |= offset << 26;
2140 code[1] |= (offset & 0x1ffc0) >> 6;
2141 code[1] |= (offset & 0xe0000) << 6;
2142 } else {
2143 srcAddr32(i->src(0), 26, 0);
2144 }
2145 if (i->getIndirect(0, 0)) {
2146 srcId(i->getIndirect(0, 0), 20);
2147 if (i->getIndirect(0, 0)->reg.size == 8)
2148 code[1] |= 1 << 26;
2149 } else {
2150 code[0] |= 63 << 20;
2151 }
2152
2153 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2154 assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2155 code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2156 }
2157 }
2158
2159 void
2160 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2161 {
2162 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2163 case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2164 case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
2165 default:
2166 code[0] = 0x45;
2167 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2168 break;
2169 }
2170 code[1] = 0xe0000000;
2171
2172 emitPredicate(i);
2173 }
2174
2175 void
2176 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2177 {
2178 code[0] = 0x00000005 | (i->subOp << 5);
2179
2180 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2181 code[1] = 0x98000000;
2182 srcAddr32(i->src(0), 28, 2);
2183 } else {
2184 code[1] = 0xd0000000;
2185 setAddress24(i->src(0));
2186 }
2187 if (uses64bitAddress(i))
2188 code[1] |= 1 << 26;
2189 srcId(i->src(0).getIndirect(0), 20);
2190
2191 emitPredicate(i);
2192
2193 defId(i, 0, 14);
2194 }
2195
2196 void
2197 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2198 {
2199 uint8_t m;
2200 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2201 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2202 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2203 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2204 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2205 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2206 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2207 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2208 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2209 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2210 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2211 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2212 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2213 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2214 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2215 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2216 default:
2217 return;
2218 }
2219 code[0] |= m << 5;
2220 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2221 code[1] |= 1 << 16;
2222 }
2223
2224 void
2225 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2226 {
2227 ImmediateValue *imm = NULL;
2228 uint64_t opc;
2229
2230 if (i->srcExists(2)) {
2231 imm = i->getSrc(2)->asImm();
2232 if (imm)
2233 i->setSrc(2, NULL); // special case, make emitForm_A not assert
2234 }
2235
2236 switch (i->op) {
2237 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2238 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2239 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2240 default:
2241 assert(0);
2242 return;
2243 }
2244 emitForm_A(i, opc);
2245
2246 if (i->op == OP_SUCLAMP) {
2247 if (i->dType == TYPE_S32)
2248 code[0] |= 1 << 9;
2249 emitSUCLAMPMode(i->subOp);
2250 }
2251
2252 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2253 code[1] |= 1 << 16;
2254
2255 if (i->op != OP_SUEAU) {
2256 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2257 code[0] |= 63 << 14;
2258 code[1] |= i->getDef(0)->reg.data.id << 23;
2259 } else
2260 if (i->defExists(1)) { // r, p
2261 assert(i->def(1).getFile() == FILE_PREDICATE);
2262 code[1] |= i->getDef(1)->reg.data.id << 23;
2263 } else { // r, #
2264 code[1] |= 7 << 23;
2265 }
2266 }
2267 if (imm) {
2268 assert(i->op == OP_SUCLAMP);
2269 i->setSrc(2, imm);
2270 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2271 }
2272 }
2273
2274 void
2275 CodeEmitterNVC0::emitSUGType(DataType ty)
2276 {
2277 switch (ty) {
2278 case TYPE_S32: code[1] |= 1 << 13; break;
2279 case TYPE_U8: code[1] |= 2 << 13; break;
2280 case TYPE_S8: code[1] |= 3 << 13; break;
2281 default:
2282 assert(ty == TYPE_U32);
2283 break;
2284 }
2285 }
2286
2287 void
2288 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2289 {
2290 const uint32_t offset = i->getSrc(s)->reg.data.offset;
2291
2292 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2293 assert(offset == (offset & 0xfffc));
2294
2295 code[1] |= 1 << 21;
2296 code[0] |= offset << 24;
2297 code[1] |= offset >> 8;
2298 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2299 }
2300
2301 void
2302 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2303 {
2304 if (!i->srcExists(s) || (i->predSrc == s)) {
2305 code[1] |= 0x7 << 17;
2306 } else {
2307 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2308 code[1] |= 1 << 20;
2309 srcId(i->src(s), 32 + 17);
2310 }
2311 }
2312
2313 void
2314 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2315 {
2316 code[0] = 0x5;
2317 code[1] = 0xd4000000 | (i->subOp << 15);
2318
2319 emitLoadStoreType(i->dType);
2320 emitSUGType(i->sType);
2321 emitCachingMode(i->cache);
2322
2323 emitPredicate(i);
2324 defId(i->def(0), 14); // destination
2325 srcId(i->src(0), 20); // address
2326 // format
2327 if (i->src(1).getFile() == FILE_GPR)
2328 srcId(i->src(1), 26);
2329 else
2330 setSUConst16(i, 1);
2331 setSUPred(i, 2);
2332 }
2333
2334 void
2335 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2336 {
2337 code[0] = 0x5;
2338 code[1] = 0xdc000000 | (i->subOp << 15);
2339
2340 if (i->op == OP_SUSTP)
2341 code[1] |= i->tex.mask << 22;
2342 else
2343 emitLoadStoreType(i->dType);
2344 emitSUGType(i->sType);
2345 emitCachingMode(i->cache);
2346
2347 emitPredicate(i);
2348 srcId(i->src(0), 20); // address
2349 // format
2350 if (i->src(1).getFile() == FILE_GPR)
2351 srcId(i->src(1), 26);
2352 else
2353 setSUConst16(i, 1);
2354 srcId(i->src(3), 14); // values
2355 setSUPred(i, 2);
2356 }
2357
2358 void
2359 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
2360 {
2361 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2362
2363 if (i->tex.rIndirectSrc < 0) {
2364 code[1] |= 0x00004000;
2365 code[0] |= i->tex.r << 26;
2366 } else {
2367 srcId(i, i->tex.rIndirectSrc, 26);
2368 }
2369 }
2370
2371 void
2372 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
2373 {
2374 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2375
2376 code[1] |= (i->tex.target.getDim() - 1) << 12;
2377 if (i->tex.target.isArray() || i->tex.target.isCube() ||
2378 i->tex.target.getDim() == 3) {
2379 // use e2d mode for 3-dim images, arrays and cubes.
2380 code[1] |= 3 << 12;
2381 }
2382
2383 srcId(i->src(0), 20);
2384 }
2385
2386 void
2387 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
2388 {
2389 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2390
2391 code[0] = 0x5;
2392 code[1] = 0xf0000000;
2393
2394 emitPredicate(i);
2395 emitLoadStoreType(i->sType);
2396
2397 defId(i->def(0), 14);
2398
2399 if (i->defExists(1)) {
2400 defId(i->def(1), 32 + 22);
2401 } else {
2402 code[1] |= 7 << 22;
2403 }
2404
2405 emitSUAddr(i);
2406 emitSUDim(i);
2407 }
2408
2409 void
2410 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
2411 {
2412 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2413
2414 code[0] = 0x5;
2415 code[1] = 0xd4000000 | (i->subOp << 15);
2416
2417 emitPredicate(i);
2418 emitLoadStoreType(i->dType);
2419
2420 defId(i->def(0), 14);
2421
2422 emitCachingMode(i->cache);
2423 emitSUAddr(i);
2424 emitSUDim(i);
2425 }
2426
2427 void
2428 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
2429 {
2430 assert(targ->getChipset() < NVISA_GK104_CHIPSET);
2431
2432 code[0] = 0x5;
2433 code[1] = 0xdc000000 | (i->subOp << 15);
2434
2435 if (i->op == OP_SUSTP)
2436 code[1] |= i->tex.mask << 17;
2437 else
2438 emitLoadStoreType(i->dType);
2439
2440 emitPredicate(i);
2441
2442 srcId(i->src(1), 14);
2443
2444 emitCachingMode(i->cache);
2445 emitSUAddr(i);
2446 emitSUDim(i);
2447 }
2448
2449 void
2450 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2451 {
2452 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2453 case 0:
2454 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2455 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
2456 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
2457 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2458 break;
2459 case 1:
2460 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
2461 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2462 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
2463 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
2464 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2465 code[1] |= (i->mask & 0x3) << 2;
2466 break;
2467 case 2:
2468 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2469 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2470 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2471 code[1] |= (i->mask & 0x3) << 2;
2472 code[1] |= (i->mask & 0xc) << 21;
2473 break;
2474 default:
2475 assert(0);
2476 break;
2477 }
2478 }
2479
2480 void
2481 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2482 {
2483 uint64_t opc = 0x4;
2484
2485 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2486 case 0: opc |= 0xe8ULL << 56; break;
2487 case 1: opc |= 0xb4ULL << 56; break;
2488 case 2: opc |= 0x94ULL << 56; break;
2489 default:
2490 assert(0);
2491 break;
2492 }
2493 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2494 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2495 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2496 } else {
2497 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2498 if (isSignedType(i->sType)) opc |= 1 << 6;
2499 }
2500 emitForm_A(i, opc);
2501 emitVectorSubOp(i);
2502
2503 if (i->saturate)
2504 code[0] |= 1 << 9;
2505 if (i->flagsDef >= 0)
2506 code[1] |= 1 << 16;
2507 }
2508
2509 void
2510 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2511 {
2512 assert(i->encSize == 8);
2513 emitForm_A(i, HEX64(10000000, 00000006));
2514 code[0] |= i->subOp << 5;
2515 code[1] |= 0x00e00000;
2516 }
2517
2518 void
2519 CodeEmitterNVC0::emitVOTE(const Instruction *i)
2520 {
2521 assert(i->src(0).getFile() == FILE_PREDICATE);
2522
2523 code[0] = 0x00000004 | (i->subOp << 5);
2524 code[1] = 0x48000000;
2525
2526 emitPredicate(i);
2527
2528 unsigned rp = 0;
2529 for (int d = 0; i->defExists(d); d++) {
2530 if (i->def(d).getFile() == FILE_PREDICATE) {
2531 assert(!(rp & 2));
2532 rp |= 2;
2533 defId(i->def(d), 32 + 22);
2534 } else if (i->def(d).getFile() == FILE_GPR) {
2535 assert(!(rp & 1));
2536 rp |= 1;
2537 defId(i->def(d), 14);
2538 } else {
2539 assert(!"Unhandled def");
2540 }
2541 }
2542 if (!(rp & 1))
2543 code[0] |= 63 << 14;
2544 if (!(rp & 2))
2545 code[1] |= 7 << 22;
2546 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
2547 code[0] |= 1 << 23;
2548 srcId(i->src(0), 20);
2549 }
2550
2551 bool
2552 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2553 {
2554 unsigned int size = insn->encSize;
2555
2556 if (writeIssueDelays && !(codeSize & 0x3f))
2557 size += 8;
2558
2559 if (!insn->encSize) {
2560 ERROR("skipping unencodable instruction: "); insn->print();
2561 return false;
2562 } else
2563 if (codeSize + size > codeSizeLimit) {
2564 ERROR("code emitter output buffer too small\n");
2565 return false;
2566 }
2567
2568 if (writeIssueDelays) {
2569 if (!(codeSize & 0x3f)) {
2570 code[0] = 0x00000007; // cf issue delay "instruction"
2571 code[1] = 0x20000000;
2572 code += 2;
2573 codeSize += 8;
2574 }
2575 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2576 uint32_t *data = code - (id * 2 + 2);
2577 if (id <= 2) {
2578 data[0] |= insn->sched << (id * 8 + 4);
2579 } else
2580 if (id == 3) {
2581 data[0] |= insn->sched << 28;
2582 data[1] |= insn->sched >> 4;
2583 } else {
2584 data[1] |= insn->sched << ((id - 4) * 8 + 4);
2585 }
2586 }
2587
2588 // assert that instructions with multiple defs don't corrupt registers
2589 for (int d = 0; insn->defExists(d); ++d)
2590 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2591
2592 switch (insn->op) {
2593 case OP_MOV:
2594 case OP_RDSV:
2595 emitMOV(insn);
2596 break;
2597 case OP_NOP:
2598 break;
2599 case OP_LOAD:
2600 emitLOAD(insn);
2601 break;
2602 case OP_STORE:
2603 emitSTORE(insn);
2604 break;
2605 case OP_LINTERP:
2606 case OP_PINTERP:
2607 emitINTERP(insn);
2608 break;
2609 case OP_VFETCH:
2610 emitVFETCH(insn);
2611 break;
2612 case OP_EXPORT:
2613 emitEXPORT(insn);
2614 break;
2615 case OP_PFETCH:
2616 emitPFETCH(insn);
2617 break;
2618 case OP_AFETCH:
2619 emitAFETCH(insn);
2620 break;
2621 case OP_EMIT:
2622 case OP_RESTART:
2623 emitOUT(insn);
2624 break;
2625 case OP_ADD:
2626 case OP_SUB:
2627 if (insn->dType == TYPE_F64)
2628 emitDADD(insn);
2629 else if (isFloatType(insn->dType))
2630 emitFADD(insn);
2631 else
2632 emitUADD(insn);
2633 break;
2634 case OP_MUL:
2635 if (insn->dType == TYPE_F64)
2636 emitDMUL(insn);
2637 else if (isFloatType(insn->dType))
2638 emitFMUL(insn);
2639 else
2640 emitUMUL(insn);
2641 break;
2642 case OP_MAD:
2643 case OP_FMA:
2644 if (insn->dType == TYPE_F64)
2645 emitDMAD(insn);
2646 else if (isFloatType(insn->dType))
2647 emitFMAD(insn);
2648 else
2649 emitIMAD(insn);
2650 break;
2651 case OP_SAD:
2652 emitISAD(insn);
2653 break;
2654 case OP_SHLADD:
2655 emitSHLADD(insn);
2656 break;
2657 case OP_NOT:
2658 emitNOT(insn);
2659 break;
2660 case OP_AND:
2661 emitLogicOp(insn, 0);
2662 break;
2663 case OP_OR:
2664 emitLogicOp(insn, 1);
2665 break;
2666 case OP_XOR:
2667 emitLogicOp(insn, 2);
2668 break;
2669 case OP_SHL:
2670 case OP_SHR:
2671 emitShift(insn);
2672 break;
2673 case OP_SET:
2674 case OP_SET_AND:
2675 case OP_SET_OR:
2676 case OP_SET_XOR:
2677 emitSET(insn->asCmp());
2678 break;
2679 case OP_SELP:
2680 emitSELP(insn);
2681 break;
2682 case OP_SLCT:
2683 emitSLCT(insn->asCmp());
2684 break;
2685 case OP_MIN:
2686 case OP_MAX:
2687 emitMINMAX(insn);
2688 break;
2689 case OP_ABS:
2690 case OP_NEG:
2691 case OP_CEIL:
2692 case OP_FLOOR:
2693 case OP_TRUNC:
2694 case OP_SAT:
2695 emitCVT(insn);
2696 break;
2697 case OP_CVT:
2698 if (insn->def(0).getFile() == FILE_PREDICATE ||
2699 insn->src(0).getFile() == FILE_PREDICATE)
2700 emitMOV(insn);
2701 else
2702 emitCVT(insn);
2703 break;
2704 case OP_RSQ:
2705 emitSFnOp(insn, 5 + 2 * insn->subOp);
2706 break;
2707 case OP_RCP:
2708 emitSFnOp(insn, 4 + 2 * insn->subOp);
2709 break;
2710 case OP_LG2:
2711 emitSFnOp(insn, 3);
2712 break;
2713 case OP_EX2:
2714 emitSFnOp(insn, 2);
2715 break;
2716 case OP_SIN:
2717 emitSFnOp(insn, 1);
2718 break;
2719 case OP_COS:
2720 emitSFnOp(insn, 0);
2721 break;
2722 case OP_PRESIN:
2723 case OP_PREEX2:
2724 emitPreOp(insn);
2725 break;
2726 case OP_TEX:
2727 case OP_TXB:
2728 case OP_TXL:
2729 case OP_TXD:
2730 case OP_TXF:
2731 case OP_TXG:
2732 case OP_TXLQ:
2733 emitTEX(insn->asTex());
2734 break;
2735 case OP_TXQ:
2736 emitTXQ(insn->asTex());
2737 break;
2738 case OP_TEXBAR:
2739 emitTEXBAR(insn);
2740 break;
2741 case OP_SUBFM:
2742 case OP_SUCLAMP:
2743 case OP_SUEAU:
2744 emitSUCalc(insn);
2745 break;
2746 case OP_MADSP:
2747 emitMADSP(insn);
2748 break;
2749 case OP_SULDB:
2750 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2751 emitSULDGB(insn->asTex());
2752 else
2753 emitSULDB(insn->asTex());
2754 break;
2755 case OP_SUSTB:
2756 case OP_SUSTP:
2757 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2758 emitSUSTGx(insn->asTex());
2759 else
2760 emitSUSTx(insn->asTex());
2761 break;
2762 case OP_SULEA:
2763 emitSULEA(insn->asTex());
2764 break;
2765 case OP_ATOM:
2766 emitATOM(insn);
2767 break;
2768 case OP_BRA:
2769 case OP_CALL:
2770 case OP_PRERET:
2771 case OP_RET:
2772 case OP_DISCARD:
2773 case OP_EXIT:
2774 case OP_PRECONT:
2775 case OP_CONT:
2776 case OP_PREBREAK:
2777 case OP_BREAK:
2778 case OP_JOINAT:
2779 case OP_BRKPT:
2780 case OP_QUADON:
2781 case OP_QUADPOP:
2782 emitFlow(insn);
2783 break;
2784 case OP_QUADOP:
2785 emitQUADOP(insn, insn->subOp, insn->lanes);
2786 break;
2787 case OP_DFDX:
2788 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2789 break;
2790 case OP_DFDY:
2791 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2792 break;
2793 case OP_POPCNT:
2794 emitPOPC(insn);
2795 break;
2796 case OP_INSBF:
2797 emitINSBF(insn);
2798 break;
2799 case OP_EXTBF:
2800 emitEXTBF(insn);
2801 break;
2802 case OP_BFIND:
2803 emitBFIND(insn);
2804 break;
2805 case OP_PERMT:
2806 emitPERMT(insn);
2807 break;
2808 case OP_JOIN:
2809 emitNOP(insn);
2810 insn->join = 1;
2811 break;
2812 case OP_BAR:
2813 emitBAR(insn);
2814 break;
2815 case OP_MEMBAR:
2816 emitMEMBAR(insn);
2817 break;
2818 case OP_CCTL:
2819 emitCCTL(insn);
2820 break;
2821 case OP_VSHL:
2822 emitVSHL(insn);
2823 break;
2824 case OP_PIXLD:
2825 emitPIXLD(insn);
2826 break;
2827 case OP_VOTE:
2828 emitVOTE(insn);
2829 break;
2830 case OP_PHI:
2831 case OP_UNION:
2832 case OP_CONSTRAINT:
2833 ERROR("operation should have been eliminated");
2834 return false;
2835 case OP_EXP:
2836 case OP_LOG:
2837 case OP_SQRT:
2838 case OP_POW:
2839 ERROR("operation should have been lowered\n");
2840 return false;
2841 default:
2842 ERROR("unknown op: %u\n", insn->op);
2843 return false;
2844 }
2845
2846 if (insn->join) {
2847 code[0] |= 0x10;
2848 assert(insn->encSize == 8);
2849 }
2850
2851 code += insn->encSize / 4;
2852 codeSize += insn->encSize;
2853 return true;
2854 }
2855
2856 uint32_t
2857 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2858 {
2859 const Target::OpInfo &info = targ->getOpInfo(i);
2860
2861 if (writeIssueDelays || info.minEncSize == 8 || 1)
2862 return 8;
2863
2864 if (i->ftz || i->saturate || i->join)
2865 return 8;
2866 if (i->rnd != ROUND_N)
2867 return 8;
2868 if (i->predSrc >= 0 && i->op == OP_MAD)
2869 return 8;
2870
2871 if (i->op == OP_PINTERP) {
2872 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2873 return 8;
2874 } else
2875 if (i->op == OP_MOV && i->lanes != 0xf) {
2876 return 8;
2877 }
2878
2879 for (int s = 0; i->srcExists(s); ++s) {
2880 if (i->src(s).isIndirect(0))
2881 return 8;
2882
2883 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2884 if (SDATA(i->src(s)).offset >= 0x100)
2885 return 8;
2886 if (i->getSrc(s)->reg.fileIndex > 1 &&
2887 i->getSrc(s)->reg.fileIndex != 16)
2888 return 8;
2889 } else
2890 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2891 if (i->dType == TYPE_F32) {
2892 if (SDATA(i->src(s)).u32 >= 0x100)
2893 return 8;
2894 } else {
2895 if (SDATA(i->src(s)).u32 > 0xff)
2896 return 8;
2897 }
2898 }
2899
2900 if (i->op == OP_CVT)
2901 continue;
2902 if (i->src(s).mod != Modifier(0)) {
2903 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2904 if (i->op != OP_RSQ)
2905 return 8;
2906 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2907 if (i->op != OP_ADD || s != 0)
2908 return 8;
2909 }
2910 }
2911
2912 return 4;
2913 }
2914
2915 // Simplified, erring on safe side.
2916 class SchedDataCalculator : public Pass
2917 {
2918 public:
2919 SchedDataCalculator(const Target *targ) : targ(targ) { }
2920
2921 private:
2922 struct RegScores
2923 {
2924 struct Resource {
2925 int st[DATA_FILE_COUNT]; // LD to LD delay 3
2926 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2927 int tex; // TEX to non-TEX delay 17 (0x11)
2928 int sfu; // SFU to SFU delay 3 (except PRE-ops)
2929 int imul; // integer MUL to MUL delay 3
2930 } res;
2931 struct ScoreData {
2932 int r[256];
2933 int p[8];
2934 int c;
2935 } rd, wr;
2936 int base;
2937 int regs;
2938
2939 void rebase(const int base)
2940 {
2941 const int delta = this->base - base;
2942 if (!delta)
2943 return;
2944 this->base = 0;
2945
2946 for (int i = 0; i < regs; ++i) {
2947 rd.r[i] += delta;
2948 wr.r[i] += delta;
2949 }
2950 for (int i = 0; i < 8; ++i) {
2951 rd.p[i] += delta;
2952 wr.p[i] += delta;
2953 }
2954 rd.c += delta;
2955 wr.c += delta;
2956
2957 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2958 res.ld[f] += delta;
2959 res.st[f] += delta;
2960 }
2961 res.sfu += delta;
2962 res.imul += delta;
2963 res.tex += delta;
2964 }
2965 void wipe(int regs)
2966 {
2967 memset(&rd, 0, sizeof(rd));
2968 memset(&wr, 0, sizeof(wr));
2969 memset(&res, 0, sizeof(res));
2970 this->regs = regs;
2971 }
2972 int getLatest(const ScoreData& d) const
2973 {
2974 int max = 0;
2975 for (int i = 0; i < regs; ++i)
2976 if (d.r[i] > max)
2977 max = d.r[i];
2978 for (int i = 0; i < 8; ++i)
2979 if (d.p[i] > max)
2980 max = d.p[i];
2981 if (d.c > max)
2982 max = d.c;
2983 return max;
2984 }
2985 inline int getLatestRd() const
2986 {
2987 return getLatest(rd);
2988 }
2989 inline int getLatestWr() const
2990 {
2991 return getLatest(wr);
2992 }
2993 inline int getLatest() const
2994 {
2995 const int a = getLatestRd();
2996 const int b = getLatestWr();
2997
2998 int max = MAX2(a, b);
2999 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3000 max = MAX2(res.ld[f], max);
3001 max = MAX2(res.st[f], max);
3002 }
3003 max = MAX2(res.sfu, max);
3004 max = MAX2(res.imul, max);
3005 max = MAX2(res.tex, max);
3006 return max;
3007 }
3008 void setMax(const RegScores *that)
3009 {
3010 for (int i = 0; i < regs; ++i) {
3011 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3012 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3013 }
3014 for (int i = 0; i < 8; ++i) {
3015 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3016 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3017 }
3018 rd.c = MAX2(rd.c, that->rd.c);
3019 wr.c = MAX2(wr.c, that->wr.c);
3020
3021 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
3022 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
3023 res.st[f] = MAX2(res.st[f], that->res.st[f]);
3024 }
3025 res.sfu = MAX2(res.sfu, that->res.sfu);
3026 res.imul = MAX2(res.imul, that->res.imul);
3027 res.tex = MAX2(res.tex, that->res.tex);
3028 }
3029 void print(int cycle)
3030 {
3031 for (int i = 0; i < regs; ++i) {
3032 if (rd.r[i] > cycle)
3033 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3034 if (wr.r[i] > cycle)
3035 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3036 }
3037 for (int i = 0; i < 8; ++i) {
3038 if (rd.p[i] > cycle)
3039 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3040 if (wr.p[i] > cycle)
3041 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3042 }
3043 if (rd.c > cycle)
3044 INFO("rd $c @ %i\n", rd.c);
3045 if (wr.c > cycle)
3046 INFO("wr $c @ %i\n", wr.c);
3047 if (res.sfu > cycle)
3048 INFO("sfu @ %i\n", res.sfu);
3049 if (res.imul > cycle)
3050 INFO("imul @ %i\n", res.imul);
3051 if (res.tex > cycle)
3052 INFO("tex @ %i\n", res.tex);
3053 }
3054 };
3055
3056 RegScores *score; // for current BB
3057 std::vector<RegScores> scoreBoards;
3058 int prevData;
3059 operation prevOp;
3060
3061 const Target *targ;
3062
3063 bool visit(Function *);
3064 bool visit(BasicBlock *);
3065
3066 void commitInsn(const Instruction *, int cycle);
3067 int calcDelay(const Instruction *, int cycle) const;
3068 void setDelay(Instruction *, int delay, Instruction *next);
3069
3070 void recordRd(const Value *, const int ready);
3071 void recordWr(const Value *, const int ready);
3072 void checkRd(const Value *, int cycle, int& delay) const;
3073 void checkWr(const Value *, int cycle, int& delay) const;
3074
3075 int getCycles(const Instruction *, int origDelay) const;
3076 };
3077
3078 void
3079 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
3080 {
3081 if (insn->op == OP_EXIT || insn->op == OP_RET)
3082 delay = MAX2(delay, 14);
3083
3084 if (insn->op == OP_TEXBAR) {
3085 // TODO: except if results not used before EXIT
3086 insn->sched = 0xc2;
3087 } else
3088 if (insn->op == OP_JOIN || insn->join) {
3089 insn->sched = 0x00;
3090 } else
3091 if (delay >= 0 || prevData == 0x04 ||
3092 !next || !targ->canDualIssue(insn, next)) {
3093 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
3094 if (prevOp == OP_EXPORT)
3095 insn->sched |= 0x40;
3096 else
3097 insn->sched |= 0x20;
3098 } else {
3099 insn->sched = 0x04; // dual-issue
3100 }
3101
3102 if (prevData != 0x04 || prevOp != OP_EXPORT)
3103 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
3104 prevOp = insn->op;
3105
3106 prevData = insn->sched;
3107 }
3108
3109 int
3110 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
3111 {
3112 if (insn->sched & 0x80) {
3113 int c = (insn->sched & 0x0f) * 2 + 1;
3114 if (insn->op == OP_TEXBAR && origDelay > 0)
3115 c += origDelay;
3116 return c;
3117 }
3118 if (insn->sched & 0x60)
3119 return (insn->sched & 0x1f) + 1;
3120 return (insn->sched == 0x04) ? 0 : 32;
3121 }
3122
3123 bool
3124 SchedDataCalculator::visit(Function *func)
3125 {
3126 int regs = targ->getFileSize(FILE_GPR) + 1;
3127 scoreBoards.resize(func->cfg.getSize());
3128 for (size_t i = 0; i < scoreBoards.size(); ++i)
3129 scoreBoards[i].wipe(regs);
3130 return true;
3131 }
3132
3133 bool
3134 SchedDataCalculator::visit(BasicBlock *bb)
3135 {
3136 Instruction *insn;
3137 Instruction *next = NULL;
3138
3139 int cycle = 0;
3140
3141 prevData = 0x00;
3142 prevOp = OP_NOP;
3143 score = &scoreBoards.at(bb->getId());
3144
3145 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
3146 // back branches will wait until all target dependencies are satisfied
3147 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
3148 continue;
3149 BasicBlock *in = BasicBlock::get(ei.getNode());
3150 if (in->getExit()) {
3151 if (prevData != 0x04)
3152 prevData = in->getExit()->sched;
3153 prevOp = in->getExit()->op;
3154 }
3155 score->setMax(&scoreBoards.at(in->getId()));
3156 }
3157 if (bb->cfg.incidentCount() > 1)
3158 prevOp = OP_NOP;
3159
3160 #ifdef NVC0_DEBUG_SCHED_DATA
3161 INFO("=== BB:%i initial scores\n", bb->getId());
3162 score->print(cycle);
3163 #endif
3164
3165 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
3166 next = insn->next;
3167
3168 commitInsn(insn, cycle);
3169 int delay = calcDelay(next, cycle);
3170 setDelay(insn, delay, next);
3171 cycle += getCycles(insn, delay);
3172
3173 #ifdef NVC0_DEBUG_SCHED_DATA
3174 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
3175 insn->print();
3176 next->print();
3177 #endif
3178 }
3179 if (!insn)
3180 return true;
3181 commitInsn(insn, cycle);
3182
3183 int bbDelay = -1;
3184
3185 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
3186 BasicBlock *out = BasicBlock::get(ei.getNode());
3187
3188 if (ei.getType() != Graph::Edge::BACK) {
3189 // only test the first instruction of the outgoing block
3190 next = out->getEntry();
3191 if (next)
3192 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
3193 } else {
3194 // wait until all dependencies are satisfied
3195 const int regsFree = score->getLatest();
3196 next = out->getFirst();
3197 for (int c = cycle; next && c < regsFree; next = next->next) {
3198 bbDelay = MAX2(bbDelay, calcDelay(next, c));
3199 c += getCycles(next, bbDelay);
3200 }
3201 next = NULL;
3202 }
3203 }
3204 if (bb->cfg.outgoingCount() != 1)
3205 next = NULL;
3206 setDelay(insn, bbDelay, next);
3207 cycle += getCycles(insn, bbDelay);
3208
3209 score->rebase(cycle); // common base for initializing out blocks' scores
3210 return true;
3211 }
3212
3213 #define NVE4_MAX_ISSUE_DELAY 0x1f
3214 int
3215 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
3216 {
3217 int delay = 0, ready = cycle;
3218
3219 for (int s = 0; insn->srcExists(s); ++s)
3220 checkRd(insn->getSrc(s), cycle, delay);
3221 // WAR & WAW don't seem to matter
3222 // for (int s = 0; insn->srcExists(s); ++s)
3223 // recordRd(insn->getSrc(s), cycle);
3224
3225 switch (Target::getOpClass(insn->op)) {
3226 case OPCLASS_SFU:
3227 ready = score->res.sfu;
3228 break;
3229 case OPCLASS_ARITH:
3230 if (insn->op == OP_MUL && !isFloatType(insn->dType))
3231 ready = score->res.imul;
3232 break;
3233 case OPCLASS_TEXTURE:
3234 ready = score->res.tex;
3235 break;
3236 case OPCLASS_LOAD:
3237 ready = score->res.ld[insn->src(0).getFile()];
3238 break;
3239 case OPCLASS_STORE:
3240 ready = score->res.st[insn->src(0).getFile()];
3241 break;
3242 default:
3243 break;
3244 }
3245 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
3246 ready = MAX2(ready, score->res.tex);
3247
3248 delay = MAX2(delay, ready - cycle);
3249
3250 // if can issue next cycle, delay is 0, not 1
3251 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
3252 }
3253
3254 void
3255 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
3256 {
3257 const int ready = cycle + targ->getLatency(insn);
3258
3259 for (int d = 0; insn->defExists(d); ++d)
3260 recordWr(insn->getDef(d), ready);
3261 // WAR & WAW don't seem to matter
3262 // for (int s = 0; insn->srcExists(s); ++s)
3263 // recordRd(insn->getSrc(s), cycle);
3264
3265 switch (Target::getOpClass(insn->op)) {
3266 case OPCLASS_SFU:
3267 score->res.sfu = cycle + 4;
3268 break;
3269 case OPCLASS_ARITH:
3270 if (insn->op == OP_MUL && !isFloatType(insn->dType))
3271 score->res.imul = cycle + 4;
3272 break;
3273 case OPCLASS_TEXTURE:
3274 score->res.tex = cycle + 18;
3275 break;
3276 case OPCLASS_LOAD:
3277 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3278 break;
3279 score->res.ld[insn->src(0).getFile()] = cycle + 4;
3280 score->res.st[insn->src(0).getFile()] = ready;
3281 break;
3282 case OPCLASS_STORE:
3283 score->res.st[insn->src(0).getFile()] = cycle + 4;
3284 score->res.ld[insn->src(0).getFile()] = ready;
3285 break;
3286 case OPCLASS_OTHER:
3287 if (insn->op == OP_TEXBAR)
3288 score->res.tex = cycle;
3289 break;
3290 default:
3291 break;
3292 }
3293
3294 #ifdef NVC0_DEBUG_SCHED_DATA
3295 score->print(cycle);
3296 #endif
3297 }
3298
3299 void
3300 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3301 {
3302 int ready = cycle;
3303 int a, b;
3304
3305 switch (v->reg.file) {
3306 case FILE_GPR:
3307 a = v->reg.data.id;
3308 b = a + v->reg.size / 4;
3309 for (int r = a; r < b; ++r)
3310 ready = MAX2(ready, score->rd.r[r]);
3311 break;
3312 case FILE_PREDICATE:
3313 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3314 break;
3315 case FILE_FLAGS:
3316 ready = MAX2(ready, score->rd.c);
3317 break;
3318 case FILE_SHADER_INPUT:
3319 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3320 case FILE_MEMORY_LOCAL:
3321 case FILE_MEMORY_CONST:
3322 case FILE_MEMORY_SHARED:
3323 case FILE_MEMORY_GLOBAL:
3324 case FILE_SYSTEM_VALUE:
3325 // TODO: any restrictions here ?
3326 break;
3327 case FILE_IMMEDIATE:
3328 break;
3329 default:
3330 assert(0);
3331 break;
3332 }
3333 if (cycle < ready)
3334 delay = MAX2(delay, ready - cycle);
3335 }
3336
3337 void
3338 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3339 {
3340 int ready = cycle;
3341 int a, b;
3342
3343 switch (v->reg.file) {
3344 case FILE_GPR:
3345 a = v->reg.data.id;
3346 b = a + v->reg.size / 4;
3347 for (int r = a; r < b; ++r)
3348 ready = MAX2(ready, score->wr.r[r]);
3349 break;
3350 case FILE_PREDICATE:
3351 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3352 break;
3353 default:
3354 assert(v->reg.file == FILE_FLAGS);
3355 ready = MAX2(ready, score->wr.c);
3356 break;
3357 }
3358 if (cycle < ready)
3359 delay = MAX2(delay, ready - cycle);
3360 }
3361
3362 void
3363 SchedDataCalculator::recordWr(const Value *v, const int ready)
3364 {
3365 int a = v->reg.data.id;
3366
3367 if (v->reg.file == FILE_GPR) {
3368 int b = a + v->reg.size / 4;
3369 for (int r = a; r < b; ++r)
3370 score->rd.r[r] = ready;
3371 } else
3372 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3373 if (v->reg.file == FILE_PREDICATE) {
3374 score->rd.p[a] = ready + 4;
3375 } else {
3376 assert(v->reg.file == FILE_FLAGS);
3377 score->rd.c = ready + 4;
3378 }
3379 }
3380
3381 void
3382 SchedDataCalculator::recordRd(const Value *v, const int ready)
3383 {
3384 int a = v->reg.data.id;
3385
3386 if (v->reg.file == FILE_GPR) {
3387 int b = a + v->reg.size / 4;
3388 for (int r = a; r < b; ++r)
3389 score->wr.r[r] = ready;
3390 } else
3391 if (v->reg.file == FILE_PREDICATE) {
3392 score->wr.p[a] = ready;
3393 } else
3394 if (v->reg.file == FILE_FLAGS) {
3395 score->wr.c = ready;
3396 }
3397 }
3398
3399 bool
3400 calculateSchedDataNVC0(const Target *targ, Function *func)
3401 {
3402 SchedDataCalculator sched(targ);
3403 return sched.run(func, true, true);
3404 }
3405
3406 void
3407 CodeEmitterNVC0::prepareEmission(Function *func)
3408 {
3409 CodeEmitter::prepareEmission(func);
3410
3411 if (targ->hasSWSched)
3412 calculateSchedDataNVC0(targ, func);
3413 }
3414
3415 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3416 : CodeEmitter(target),
3417 targNVC0(target),
3418 writeIssueDelays(target->hasSWSched)
3419 {
3420 code = NULL;
3421 codeSize = codeSizeLimit = 0;
3422 relocInfo = NULL;
3423 }
3424
3425 CodeEmitter *
3426 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3427 {
3428 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3429 emit->setProgramType(type);
3430 return emit;
3431 }
3432
3433 CodeEmitter *
3434 TargetNVC0::getCodeEmitter(Program::Type type)
3435 {
3436 if (chipset >= NVISA_GK20A_CHIPSET)
3437 return createCodeEmitterGK110(type);
3438 return createCodeEmitterNVC0(type);
3439 }
3440
3441 } // namespace nv50_ir