6cf69e5339fd8a039bbb431b7374fbf5acffe5d7
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setAddress24(const ValueRef&);
56 void setAddressByFile(const ValueRef&);
57 void setImmediate(const Instruction *, const int s); // needs op already set
58 void setImmediateS8(const ValueRef&);
59 void setSUConst16(const Instruction *, const int s);
60 void setSUPred(const Instruction *, const int s);
61
62 void emitCondCode(CondCode cc, int pos);
63 void emitInterpMode(const Instruction *);
64 void emitLoadStoreType(DataType ty);
65 void emitSUGType(DataType);
66 void emitCachingMode(CacheMode c);
67
68 void emitShortSrc2(const ValueRef&);
69
70 inline uint8_t getSRegEncoding(const ValueRef&);
71
72 void roundMode_A(const Instruction *);
73 void roundMode_C(const Instruction *);
74 void roundMode_CS(const Instruction *);
75
76 void emitNegAbs12(const Instruction *);
77
78 void emitNOP(const Instruction *);
79
80 void emitLOAD(const Instruction *);
81 void emitSTORE(const Instruction *);
82 void emitMOV(const Instruction *);
83 void emitATOM(const Instruction *);
84 void emitMEMBAR(const Instruction *);
85 void emitCCTL(const Instruction *);
86
87 void emitINTERP(const Instruction *);
88 void emitAFETCH(const Instruction *);
89 void emitPFETCH(const Instruction *);
90 void emitVFETCH(const Instruction *);
91 void emitEXPORT(const Instruction *);
92 void emitOUT(const Instruction *);
93
94 void emitUADD(const Instruction *);
95 void emitFADD(const Instruction *);
96 void emitDADD(const Instruction *);
97 void emitUMUL(const Instruction *);
98 void emitFMUL(const Instruction *);
99 void emitDMUL(const Instruction *);
100 void emitIMAD(const Instruction *);
101 void emitISAD(const Instruction *);
102 void emitFMAD(const Instruction *);
103 void emitDMAD(const Instruction *);
104 void emitMADSP(const Instruction *);
105
106 void emitNOT(Instruction *);
107 void emitLogicOp(const Instruction *, uint8_t subOp);
108 void emitPOPC(const Instruction *);
109 void emitINSBF(const Instruction *);
110 void emitEXTBF(const Instruction *);
111 void emitBFIND(const Instruction *);
112 void emitPERMT(const Instruction *);
113 void emitShift(const Instruction *);
114
115 void emitSFnOp(const Instruction *, uint8_t subOp);
116
117 void emitCVT(Instruction *);
118 void emitMINMAX(const Instruction *);
119 void emitPreOp(const Instruction *);
120
121 void emitSET(const CmpInstruction *);
122 void emitSLCT(const CmpInstruction *);
123 void emitSELP(const Instruction *);
124
125 void emitTEXBAR(const Instruction *);
126 void emitTEX(const TexInstruction *);
127 void emitTEXCSAA(const TexInstruction *);
128 void emitTXQ(const TexInstruction *);
129
130 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
131
132 void emitFlow(const Instruction *);
133 void emitBAR(const Instruction *);
134
135 void emitSUCLAMPMode(uint16_t);
136 void emitSUCalc(Instruction *);
137 void emitSULDGB(const TexInstruction *);
138 void emitSUSTGx(const TexInstruction *);
139
140 void emitVSHL(const Instruction *);
141 void emitVectorSubOp(const Instruction *);
142
143 void emitPIXLD(const Instruction *);
144
145 inline void defId(const ValueDef&, const int pos);
146 inline void defId(const Instruction *, int d, const int pos);
147 inline void srcId(const ValueRef&, const int pos);
148 inline void srcId(const ValueRef *, const int pos);
149 inline void srcId(const Instruction *, int s, const int pos);
150 inline void srcAddr32(const ValueRef&, int pos, int shr);
151
152 inline bool isLIMM(const ValueRef&, DataType ty);
153 };
154
155 // for better visibility
156 #define HEX64(h, l) 0x##h##l##ULL
157
158 #define SDATA(a) ((a).rep()->reg.data)
159 #define DDATA(a) ((a).rep()->reg.data)
160
161 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
162 {
163 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
164 }
165
166 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
167 {
168 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
169 }
170
171 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
172 {
173 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
174 code[pos / 32] |= r << (pos % 32);
175 }
176
177 void
178 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
179 {
180 const uint32_t offset = SDATA(src).offset >> shr;
181
182 code[pos / 32] |= offset << (pos % 32);
183 if (pos && (pos < 32))
184 code[1] |= offset >> (32 - pos);
185 }
186
187 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
188 {
189 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
190 }
191
192 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
193 {
194 int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
195 code[pos / 32] |= r << (pos % 32);
196 }
197
198 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
199 {
200 const ImmediateValue *imm = ref.get()->asImm();
201
202 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
203 }
204
205 void
206 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
207 {
208 switch (insn->rnd) {
209 case ROUND_M: code[1] |= 1 << 23; break;
210 case ROUND_P: code[1] |= 2 << 23; break;
211 case ROUND_Z: code[1] |= 3 << 23; break;
212 default:
213 assert(insn->rnd == ROUND_N);
214 break;
215 }
216 }
217
218 void
219 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
220 {
221 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
222 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
223 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
224 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
225 }
226
227 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
228 {
229 uint8_t val;
230
231 switch (cc) {
232 case CC_LT: val = 0x1; break;
233 case CC_LTU: val = 0x9; break;
234 case CC_EQ: val = 0x2; break;
235 case CC_EQU: val = 0xa; break;
236 case CC_LE: val = 0x3; break;
237 case CC_LEU: val = 0xb; break;
238 case CC_GT: val = 0x4; break;
239 case CC_GTU: val = 0xc; break;
240 case CC_NE: val = 0x5; break;
241 case CC_NEU: val = 0xd; break;
242 case CC_GE: val = 0x6; break;
243 case CC_GEU: val = 0xe; break;
244 case CC_TR: val = 0xf; break;
245 case CC_FL: val = 0x0; break;
246
247 case CC_A: val = 0x14; break;
248 case CC_NA: val = 0x13; break;
249 case CC_S: val = 0x15; break;
250 case CC_NS: val = 0x12; break;
251 case CC_C: val = 0x16; break;
252 case CC_NC: val = 0x11; break;
253 case CC_O: val = 0x17; break;
254 case CC_NO: val = 0x10; break;
255
256 default:
257 val = 0;
258 assert(!"invalid condition code");
259 break;
260 }
261 code[pos / 32] |= val << (pos % 32);
262 }
263
264 void
265 CodeEmitterNVC0::emitPredicate(const Instruction *i)
266 {
267 if (i->predSrc >= 0) {
268 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
269 srcId(i->src(i->predSrc), 10);
270 if (i->cc == CC_NOT_P)
271 code[0] |= 0x2000; // negate
272 } else {
273 code[0] |= 0x1c00;
274 }
275 }
276
277 void
278 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
279 {
280 switch (src.getFile()) {
281 case FILE_MEMORY_GLOBAL:
282 srcAddr32(src, 26, 0);
283 break;
284 case FILE_MEMORY_LOCAL:
285 case FILE_MEMORY_SHARED:
286 setAddress24(src);
287 break;
288 default:
289 assert(src.getFile() == FILE_MEMORY_CONST);
290 setAddress16(src);
291 break;
292 }
293 }
294
295 void
296 CodeEmitterNVC0::setAddress16(const ValueRef& src)
297 {
298 Symbol *sym = src.get()->asSym();
299
300 assert(sym);
301
302 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
303 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
304 }
305
306 void
307 CodeEmitterNVC0::setAddress24(const ValueRef& src)
308 {
309 Symbol *sym = src.get()->asSym();
310
311 assert(sym);
312
313 code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
314 code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
315 }
316
317 void
318 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
319 {
320 const ImmediateValue *imm = i->src(s).get()->asImm();
321 uint32_t u32;
322
323 assert(imm);
324 u32 = imm->reg.data.u32;
325
326 if ((code[0] & 0xf) == 0x1) {
327 // double immediate
328 uint64_t u64 = imm->reg.data.u64;
329 assert(!(u64 & 0x00000fffffffffffULL));
330 assert(!(code[1] & 0xc000));
331 code[0] |= ((u64 >> 44) & 0x3f) << 26;
332 code[1] |= 0xc000 | (u64 >> 50);
333 } else
334 if ((code[0] & 0xf) == 0x2) {
335 // LIMM
336 code[0] |= (u32 & 0x3f) << 26;
337 code[1] |= u32 >> 6;
338 } else
339 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
340 // integer immediate
341 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
342 assert(!(code[1] & 0xc000));
343 u32 &= 0xfffff;
344 code[0] |= (u32 & 0x3f) << 26;
345 code[1] |= 0xc000 | (u32 >> 6);
346 } else {
347 // float immediate
348 assert(!(u32 & 0x00000fff));
349 assert(!(code[1] & 0xc000));
350 code[0] |= ((u32 >> 12) & 0x3f) << 26;
351 code[1] |= 0xc000 | (u32 >> 18);
352 }
353 }
354
355 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
356 {
357 const ImmediateValue *imm = ref.get()->asImm();
358
359 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
360
361 assert(s8 == imm->reg.data.s32);
362
363 code[0] |= (s8 & 0x3f) << 26;
364 code[0] |= (s8 >> 6) << 8;
365 }
366
367 void
368 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
369 {
370 code[0] = opc;
371 code[1] = opc >> 32;
372
373 emitPredicate(i);
374
375 defId(i->def(0), 14);
376
377 int s1 = 26;
378 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
379 s1 = 49;
380
381 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
382 switch (i->getSrc(s)->reg.file) {
383 case FILE_MEMORY_CONST:
384 assert(!(code[1] & 0xc000));
385 code[1] |= (s == 2) ? 0x8000 : 0x4000;
386 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
387 setAddress16(i->src(s));
388 break;
389 case FILE_IMMEDIATE:
390 assert(s == 1 ||
391 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
392 assert(!(code[1] & 0xc000));
393 setImmediate(i, s);
394 break;
395 case FILE_GPR:
396 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
397 break;
398 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
399 break;
400 default:
401 // ignore here, can be predicate or flags, but must not be address
402 break;
403 }
404 }
405 }
406
407 void
408 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
409 {
410 code[0] = opc;
411 code[1] = opc >> 32;
412
413 emitPredicate(i);
414
415 defId(i->def(0), 14);
416
417 switch (i->src(0).getFile()) {
418 case FILE_MEMORY_CONST:
419 assert(!(code[1] & 0xc000));
420 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
421 setAddress16(i->src(0));
422 break;
423 case FILE_IMMEDIATE:
424 assert(!(code[1] & 0xc000));
425 setImmediate(i, 0);
426 break;
427 case FILE_GPR:
428 srcId(i->src(0), 26);
429 break;
430 default:
431 // ignore here, can be predicate or flags, but must not be address
432 break;
433 }
434 }
435
436 void
437 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
438 {
439 code[0] = opc;
440
441 int ss2a = 0;
442 if (opc == 0x0d || opc == 0x0e)
443 ss2a = 2;
444
445 defId(i->def(0), 14);
446 srcId(i->src(0), 20);
447
448 assert(pred || (i->predSrc < 0));
449 if (pred)
450 emitPredicate(i);
451
452 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
453 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
454 assert(!(code[0] & (0x300 >> ss2a)));
455 switch (i->src(s).get()->reg.fileIndex) {
456 case 0: code[0] |= 0x100 >> ss2a; break;
457 case 1: code[0] |= 0x200 >> ss2a; break;
458 case 16: code[0] |= 0x300 >> ss2a; break;
459 default:
460 ERROR("invalid c[] space for short form\n");
461 break;
462 }
463 if (s == 1)
464 code[0] |= i->getSrc(s)->reg.data.offset << 24;
465 else
466 code[0] |= i->getSrc(s)->reg.data.offset << 6;
467 } else
468 if (i->src(s).getFile() == FILE_IMMEDIATE) {
469 assert(s == 1);
470 setImmediateS8(i->src(s));
471 } else
472 if (i->src(s).getFile() == FILE_GPR) {
473 srcId(i->src(s), (s == 1) ? 26 : 8);
474 }
475 }
476 }
477
478 void
479 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
480 {
481 if (src.getFile() == FILE_MEMORY_CONST) {
482 switch (src.get()->reg.fileIndex) {
483 case 0: code[0] |= 0x100; break;
484 case 1: code[0] |= 0x200; break;
485 case 16: code[0] |= 0x300; break;
486 default:
487 assert(!"unsupported file index for short op");
488 break;
489 }
490 srcAddr32(src, 20, 2);
491 } else {
492 srcId(src, 20);
493 assert(src.getFile() == FILE_GPR);
494 }
495 }
496
497 void
498 CodeEmitterNVC0::emitNOP(const Instruction *i)
499 {
500 code[0] = 0x000001e4;
501 code[1] = 0x40000000;
502 emitPredicate(i);
503 }
504
505 void
506 CodeEmitterNVC0::emitFMAD(const Instruction *i)
507 {
508 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
509
510 if (i->encSize == 8) {
511 if (isLIMM(i->src(1), TYPE_F32)) {
512 emitForm_A(i, HEX64(20000000, 00000002));
513 } else {
514 emitForm_A(i, HEX64(30000000, 00000000));
515
516 if (i->src(2).mod.neg())
517 code[0] |= 1 << 8;
518 }
519 roundMode_A(i);
520
521 if (neg1)
522 code[0] |= 1 << 9;
523
524 if (i->saturate)
525 code[0] |= 1 << 5;
526 if (i->ftz)
527 code[0] |= 1 << 6;
528 } else {
529 assert(!i->saturate && !i->src(2).mod.neg());
530 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
531 false);
532 if (neg1)
533 code[0] |= 1 << 4;
534 }
535 }
536
537 void
538 CodeEmitterNVC0::emitDMAD(const Instruction *i)
539 {
540 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
541
542 emitForm_A(i, HEX64(20000000, 00000001));
543
544 if (i->src(2).mod.neg())
545 code[0] |= 1 << 8;
546
547 roundMode_A(i);
548
549 if (neg1)
550 code[0] |= 1 << 9;
551
552 assert(!i->saturate);
553 assert(!i->ftz);
554 }
555
556 void
557 CodeEmitterNVC0::emitFMUL(const Instruction *i)
558 {
559 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
560
561 assert(i->postFactor >= -3 && i->postFactor <= 3);
562
563 if (i->encSize == 8) {
564 if (isLIMM(i->src(1), TYPE_F32)) {
565 assert(i->postFactor == 0); // constant folded, hopefully
566 emitForm_A(i, HEX64(30000000, 00000002));
567 } else {
568 emitForm_A(i, HEX64(58000000, 00000000));
569 roundMode_A(i);
570 code[1] |= ((i->postFactor > 0) ?
571 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
572 }
573 if (neg)
574 code[1] ^= 1 << 25; // aliases with LIMM sign bit
575
576 if (i->saturate)
577 code[0] |= 1 << 5;
578
579 if (i->dnz)
580 code[0] |= 1 << 7;
581 else
582 if (i->ftz)
583 code[0] |= 1 << 6;
584 } else {
585 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
586 emitForm_S(i, 0xa8, true);
587 }
588 }
589
590 void
591 CodeEmitterNVC0::emitDMUL(const Instruction *i)
592 {
593 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
594
595 emitForm_A(i, HEX64(50000000, 00000001));
596 roundMode_A(i);
597
598 if (neg)
599 code[0] |= 1 << 9;
600
601 assert(!i->saturate);
602 assert(!i->ftz);
603 assert(!i->dnz);
604 assert(!i->postFactor);
605 }
606
607 void
608 CodeEmitterNVC0::emitUMUL(const Instruction *i)
609 {
610 if (i->encSize == 8) {
611 if (i->src(1).getFile() == FILE_IMMEDIATE) {
612 emitForm_A(i, HEX64(10000000, 00000002));
613 } else {
614 emitForm_A(i, HEX64(50000000, 00000003));
615 }
616 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
617 code[0] |= 1 << 6;
618 if (i->sType == TYPE_S32)
619 code[0] |= 1 << 5;
620 if (i->dType == TYPE_S32)
621 code[0] |= 1 << 7;
622 } else {
623 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
624
625 if (i->sType == TYPE_S32)
626 code[0] |= 1 << 6;
627 }
628 }
629
630 void
631 CodeEmitterNVC0::emitFADD(const Instruction *i)
632 {
633 if (i->encSize == 8) {
634 if (isLIMM(i->src(1), TYPE_F32)) {
635 assert(!i->saturate);
636 emitForm_A(i, HEX64(28000000, 00000002));
637
638 code[0] |= i->src(0).mod.abs() << 7;
639 code[0] |= i->src(0).mod.neg() << 9;
640
641 if (i->src(1).mod.abs())
642 code[1] &= 0xfdffffff;
643 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
644 code[1] ^= 0x02000000;
645 } else {
646 emitForm_A(i, HEX64(50000000, 00000000));
647
648 roundMode_A(i);
649 if (i->saturate)
650 code[1] |= 1 << 17;
651
652 emitNegAbs12(i);
653 if (i->op == OP_SUB) code[0] ^= 1 << 8;
654 }
655 if (i->ftz)
656 code[0] |= 1 << 5;
657 } else {
658 assert(!i->saturate && i->op != OP_SUB &&
659 !i->src(0).mod.abs() &&
660 !i->src(1).mod.neg() && !i->src(1).mod.abs());
661
662 emitForm_S(i, 0x49, true);
663
664 if (i->src(0).mod.neg())
665 code[0] |= 1 << 7;
666 }
667 }
668
669 void
670 CodeEmitterNVC0::emitDADD(const Instruction *i)
671 {
672 assert(i->encSize == 8);
673 emitForm_A(i, HEX64(48000000, 00000001));
674 roundMode_A(i);
675 assert(!i->saturate);
676 assert(!i->ftz);
677 emitNegAbs12(i);
678 if (i->op == OP_SUB)
679 code[0] ^= 1 << 8;
680 }
681
682 void
683 CodeEmitterNVC0::emitUADD(const Instruction *i)
684 {
685 uint32_t addOp = 0;
686
687 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
688 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
689
690 if (i->src(0).mod.neg())
691 addOp |= 0x200;
692 if (i->src(1).mod.neg())
693 addOp |= 0x100;
694 if (i->op == OP_SUB) {
695 addOp ^= 0x100;
696 assert(addOp != 0x300); // would be add-plus-one
697 }
698
699 if (i->encSize == 8) {
700 if (isLIMM(i->src(1), TYPE_U32)) {
701 emitForm_A(i, HEX64(08000000, 00000002));
702 if (i->defExists(1))
703 code[1] |= 1 << 26; // write carry
704 } else {
705 emitForm_A(i, HEX64(48000000, 00000003));
706 if (i->defExists(1))
707 code[1] |= 1 << 16; // write carry
708 }
709 code[0] |= addOp;
710
711 if (i->saturate)
712 code[0] |= 1 << 5;
713 if (i->flagsSrc >= 0) // add carry
714 code[0] |= 1 << 6;
715 } else {
716 assert(!(addOp & 0x100));
717 emitForm_S(i, (addOp >> 3) |
718 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
719 }
720 }
721
722 // TODO: shl-add
723 void
724 CodeEmitterNVC0::emitIMAD(const Instruction *i)
725 {
726 assert(i->encSize == 8);
727 emitForm_A(i, HEX64(20000000, 00000003));
728
729 if (isSignedType(i->dType))
730 code[0] |= 1 << 7;
731 if (isSignedType(i->sType))
732 code[0] |= 1 << 5;
733
734 code[1] |= i->saturate << 24;
735
736 if (i->flagsDef >= 0) code[1] |= 1 << 16;
737 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
738
739 if (i->src(2).mod.neg()) code[0] |= 0x10;
740 if (i->src(1).mod.neg() ^
741 i->src(0).mod.neg()) code[0] |= 0x20;
742
743 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
744 code[0] |= 1 << 6;
745 }
746
747 void
748 CodeEmitterNVC0::emitMADSP(const Instruction *i)
749 {
750 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
751
752 emitForm_A(i, HEX64(00000000, 00000003));
753
754 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
755 code[1] |= 0x01800000;
756 } else {
757 code[0] |= (i->subOp & 0x00f) << 7;
758 code[0] |= (i->subOp & 0x0f0) << 1;
759 code[0] |= (i->subOp & 0x100) >> 3;
760 code[0] |= (i->subOp & 0x200) >> 2;
761 code[1] |= (i->subOp & 0xc00) << 13;
762 }
763
764 if (i->flagsDef >= 0)
765 code[1] |= 1 << 16;
766 }
767
768 void
769 CodeEmitterNVC0::emitISAD(const Instruction *i)
770 {
771 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
772 assert(i->encSize == 8);
773
774 emitForm_A(i, HEX64(38000000, 00000003));
775
776 if (i->dType == TYPE_S32)
777 code[0] |= 1 << 5;
778 }
779
780 void
781 CodeEmitterNVC0::emitNOT(Instruction *i)
782 {
783 assert(i->encSize == 8);
784 i->setSrc(1, i->src(0));
785 emitForm_A(i, HEX64(68000000, 000001c3));
786 }
787
788 void
789 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
790 {
791 if (i->def(0).getFile() == FILE_PREDICATE) {
792 code[0] = 0x00000004 | (subOp << 30);
793 code[1] = 0x0c000000;
794
795 emitPredicate(i);
796
797 defId(i->def(0), 17);
798 srcId(i->src(0), 20);
799 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
800 srcId(i->src(1), 26);
801 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
802
803 if (i->defExists(1)) {
804 defId(i->def(1), 14);
805 } else {
806 code[0] |= 7 << 14;
807 }
808 // (a OP b) OP c
809 if (i->predSrc != 2 && i->srcExists(2)) {
810 code[1] |= subOp << 21;
811 srcId(i->src(2), 17);
812 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
813 } else {
814 code[1] |= 0x000e0000;
815 }
816 } else
817 if (i->encSize == 8) {
818 if (isLIMM(i->src(1), TYPE_U32)) {
819 emitForm_A(i, HEX64(38000000, 00000002));
820
821 if (i->flagsDef >= 0)
822 code[1] |= 1 << 26;
823 } else {
824 emitForm_A(i, HEX64(68000000, 00000003));
825
826 if (i->flagsDef >= 0)
827 code[1] |= 1 << 16;
828 }
829 code[0] |= subOp << 6;
830
831 if (i->flagsSrc >= 0) // carry
832 code[0] |= 1 << 5;
833
834 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
835 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
836 } else {
837 emitForm_S(i, (subOp << 5) |
838 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
839 }
840 }
841
842 void
843 CodeEmitterNVC0::emitPOPC(const Instruction *i)
844 {
845 emitForm_A(i, HEX64(54000000, 00000004));
846
847 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
848 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
849 }
850
851 void
852 CodeEmitterNVC0::emitINSBF(const Instruction *i)
853 {
854 emitForm_A(i, HEX64(28000000, 00000003));
855 }
856
857 void
858 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
859 {
860 emitForm_A(i, HEX64(70000000, 00000003));
861
862 if (i->dType == TYPE_S32)
863 code[0] |= 1 << 5;
864 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
865 code[0] |= 1 << 8;
866 }
867
868 void
869 CodeEmitterNVC0::emitBFIND(const Instruction *i)
870 {
871 emitForm_B(i, HEX64(78000000, 00000003));
872
873 if (i->dType == TYPE_S32)
874 code[0] |= 1 << 5;
875 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
876 code[0] |= 1 << 8;
877 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
878 code[0] |= 1 << 6;
879 }
880
881 void
882 CodeEmitterNVC0::emitPERMT(const Instruction *i)
883 {
884 emitForm_A(i, HEX64(24000000, 00000004));
885
886 code[0] |= i->subOp << 5;
887 }
888
889 void
890 CodeEmitterNVC0::emitShift(const Instruction *i)
891 {
892 if (i->op == OP_SHR) {
893 emitForm_A(i, HEX64(58000000, 00000003)
894 | (isSignedType(i->dType) ? 0x20 : 0x00));
895 } else {
896 emitForm_A(i, HEX64(60000000, 00000003));
897 }
898
899 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
900 code[0] |= 1 << 9;
901 }
902
903 void
904 CodeEmitterNVC0::emitPreOp(const Instruction *i)
905 {
906 if (i->encSize == 8) {
907 emitForm_B(i, HEX64(60000000, 00000000));
908
909 if (i->op == OP_PREEX2)
910 code[0] |= 0x20;
911
912 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
913 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
914 } else {
915 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
916 }
917 }
918
919 void
920 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
921 {
922 if (i->encSize == 8) {
923 code[0] = 0x00000000 | (subOp << 26);
924 code[1] = 0xc8000000;
925
926 emitPredicate(i);
927
928 defId(i->def(0), 14);
929 srcId(i->src(0), 20);
930
931 assert(i->src(0).getFile() == FILE_GPR);
932
933 if (i->saturate) code[0] |= 1 << 5;
934
935 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
936 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
937 } else {
938 emitForm_S(i, 0x80000008 | (subOp << 26), true);
939
940 assert(!i->src(0).mod.neg());
941 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
942 }
943 }
944
945 void
946 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
947 {
948 uint64_t op;
949
950 assert(i->encSize == 8);
951
952 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
953
954 if (i->ftz)
955 op |= 1 << 5;
956 else
957 if (!isFloatType(i->dType))
958 op |= isSignedType(i->dType) ? 0x23 : 0x03;
959 if (i->dType == TYPE_F64)
960 op |= 0x01;
961
962 emitForm_A(i, op);
963 emitNegAbs12(i);
964 }
965
966 void
967 CodeEmitterNVC0::roundMode_C(const Instruction *i)
968 {
969 switch (i->rnd) {
970 case ROUND_M: code[1] |= 1 << 17; break;
971 case ROUND_P: code[1] |= 2 << 17; break;
972 case ROUND_Z: code[1] |= 3 << 17; break;
973 case ROUND_NI: code[0] |= 1 << 7; break;
974 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
975 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
976 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
977 case ROUND_N: break;
978 default:
979 assert(!"invalid round mode");
980 break;
981 }
982 }
983
984 void
985 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
986 {
987 switch (i->rnd) {
988 case ROUND_M:
989 case ROUND_MI: code[0] |= 1 << 16; break;
990 case ROUND_P:
991 case ROUND_PI: code[0] |= 2 << 16; break;
992 case ROUND_Z:
993 case ROUND_ZI: code[0] |= 3 << 16; break;
994 default:
995 break;
996 }
997 }
998
999 void
1000 CodeEmitterNVC0::emitCVT(Instruction *i)
1001 {
1002 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1003 DataType dType;
1004
1005 switch (i->op) {
1006 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
1007 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1008 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1009 default:
1010 break;
1011 }
1012
1013 const bool sat = (i->op == OP_SAT) || i->saturate;
1014 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1015 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1016
1017 if (i->op == OP_NEG && i->dType == TYPE_U32)
1018 dType = TYPE_S32;
1019 else
1020 dType = i->dType;
1021
1022 if (i->encSize == 8) {
1023 emitForm_B(i, HEX64(10000000, 00000004));
1024
1025 roundMode_C(i);
1026
1027 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1028 code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1029 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1030
1031 // for 8/16 source types, the byte/word is in subOp. word 1 is
1032 // represented as 2.
1033 if (!isFloatType(i->sType))
1034 code[1] |= i->subOp << 0x17;
1035 else
1036 code[1] |= i->subOp << 0x18;
1037
1038 if (sat)
1039 code[0] |= 0x20;
1040 if (abs)
1041 code[0] |= 1 << 6;
1042 if (neg && i->op != OP_ABS)
1043 code[0] |= 1 << 8;
1044
1045 if (i->ftz)
1046 code[1] |= 1 << 23;
1047
1048 if (isSignedIntType(dType))
1049 code[0] |= 0x080;
1050 if (isSignedIntType(i->sType))
1051 code[0] |= 0x200;
1052
1053 if (isFloatType(dType)) {
1054 if (!isFloatType(i->sType))
1055 code[1] |= 0x08000000;
1056 } else {
1057 if (isFloatType(i->sType))
1058 code[1] |= 0x04000000;
1059 else
1060 code[1] |= 0x0c000000;
1061 }
1062 } else {
1063 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1064 code[0] = 0x298;
1065 } else
1066 if (isFloatType(dType)) {
1067 if (isFloatType(i->sType))
1068 code[0] = 0x098;
1069 else
1070 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1071 } else {
1072 assert(isFloatType(i->sType));
1073
1074 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1075 }
1076
1077 if (neg) code[0] |= 1 << 16;
1078 if (sat) code[0] |= 1 << 18;
1079 if (abs) code[0] |= 1 << 19;
1080
1081 roundMode_CS(i);
1082 }
1083 }
1084
1085 void
1086 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1087 {
1088 uint32_t hi;
1089 uint32_t lo = 0;
1090
1091 if (i->sType == TYPE_F64)
1092 lo = 0x1;
1093 else
1094 if (!isFloatType(i->sType))
1095 lo = 0x3;
1096
1097 if (isSignedIntType(i->sType))
1098 lo |= 0x20;
1099 if (isFloatType(i->dType)) {
1100 if (isFloatType(i->sType))
1101 lo |= 0x20;
1102 else
1103 lo |= 0x80;
1104 }
1105
1106 switch (i->op) {
1107 case OP_SET_AND: hi = 0x10000000; break;
1108 case OP_SET_OR: hi = 0x10200000; break;
1109 case OP_SET_XOR: hi = 0x10400000; break;
1110 default:
1111 hi = 0x100e0000;
1112 break;
1113 }
1114 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1115
1116 if (i->op != OP_SET)
1117 srcId(i->src(2), 32 + 17);
1118
1119 if (i->def(0).getFile() == FILE_PREDICATE) {
1120 if (i->sType == TYPE_F32)
1121 code[1] += 0x10000000;
1122 else
1123 code[1] += 0x08000000;
1124
1125 code[0] &= ~0xfc000;
1126 defId(i->def(0), 17);
1127 if (i->defExists(1))
1128 defId(i->def(1), 14);
1129 else
1130 code[0] |= 0x1c000;
1131 }
1132
1133 if (i->ftz)
1134 code[1] |= 1 << 27;
1135
1136 emitCondCode(i->setCond, 32 + 23);
1137 emitNegAbs12(i);
1138 }
1139
1140 void
1141 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1142 {
1143 uint64_t op;
1144
1145 switch (i->dType) {
1146 case TYPE_S32:
1147 op = HEX64(30000000, 00000023);
1148 break;
1149 case TYPE_U32:
1150 op = HEX64(30000000, 00000003);
1151 break;
1152 case TYPE_F32:
1153 op = HEX64(38000000, 00000000);
1154 break;
1155 default:
1156 assert(!"invalid type for SLCT");
1157 op = 0;
1158 break;
1159 }
1160 emitForm_A(i, op);
1161
1162 CondCode cc = i->setCond;
1163
1164 if (i->src(2).mod.neg())
1165 cc = reverseCondCode(cc);
1166
1167 emitCondCode(cc, 32 + 23);
1168
1169 if (i->ftz)
1170 code[0] |= 1 << 5;
1171 }
1172
1173 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1174 {
1175 emitForm_A(i, HEX64(20000000, 00000004));
1176
1177 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1178 code[1] |= 1 << 20;
1179 }
1180
1181 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1182 {
1183 code[0] = 0x00000006 | (i->subOp << 26);
1184 code[1] = 0xf0000000;
1185 emitPredicate(i);
1186 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1187 }
1188
1189 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1190 {
1191 code[0] = 0x00000086;
1192 code[1] = 0xd0000000;
1193
1194 code[1] |= i->tex.r;
1195 code[1] |= i->tex.s << 8;
1196
1197 if (i->tex.liveOnly)
1198 code[0] |= 1 << 9;
1199
1200 defId(i->def(0), 14);
1201 srcId(i->src(0), 20);
1202 }
1203
1204 static inline bool
1205 isNextIndependentTex(const TexInstruction *i)
1206 {
1207 if (!i->next || !isTextureOp(i->next->op))
1208 return false;
1209 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1210 return false;
1211 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1212 }
1213
1214 void
1215 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1216 {
1217 code[0] = 0x00000006;
1218
1219 if (isNextIndependentTex(i))
1220 code[0] |= 0x080; // t mode
1221 else
1222 code[0] |= 0x100; // p mode
1223
1224 if (i->tex.liveOnly)
1225 code[0] |= 1 << 9;
1226
1227 switch (i->op) {
1228 case OP_TEX: code[1] = 0x80000000; break;
1229 case OP_TXB: code[1] = 0x84000000; break;
1230 case OP_TXL: code[1] = 0x86000000; break;
1231 case OP_TXF: code[1] = 0x90000000; break;
1232 case OP_TXG: code[1] = 0xa0000000; break;
1233 case OP_TXLQ: code[1] = 0xb0000000; break;
1234 case OP_TXD: code[1] = 0xe0000000; break;
1235 default:
1236 assert(!"invalid texture op");
1237 break;
1238 }
1239 if (i->op == OP_TXF) {
1240 if (!i->tex.levelZero)
1241 code[1] |= 0x02000000;
1242 } else
1243 if (i->tex.levelZero) {
1244 code[1] |= 0x02000000;
1245 }
1246
1247 if (i->op != OP_TXD && i->tex.derivAll)
1248 code[1] |= 1 << 13;
1249
1250 defId(i->def(0), 14);
1251 srcId(i->src(0), 20);
1252
1253 emitPredicate(i);
1254
1255 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1256
1257 code[1] |= i->tex.mask << 14;
1258
1259 code[1] |= i->tex.r;
1260 code[1] |= i->tex.s << 8;
1261 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1262 code[1] |= 1 << 18; // in 1st source (with array index)
1263
1264 // texture target:
1265 code[1] |= (i->tex.target.getDim() - 1) << 20;
1266 if (i->tex.target.isCube())
1267 code[1] += 2 << 20;
1268 if (i->tex.target.isArray())
1269 code[1] |= 1 << 19;
1270 if (i->tex.target.isShadow())
1271 code[1] |= 1 << 24;
1272
1273 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1274
1275 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1276 // lzero
1277 if (i->op == OP_TXL)
1278 code[1] &= ~(1 << 26);
1279 else
1280 if (i->op == OP_TXF)
1281 code[1] &= ~(1 << 25);
1282 }
1283 if (i->tex.target == TEX_TARGET_2D_MS ||
1284 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1285 code[1] |= 1 << 23;
1286
1287 if (i->tex.useOffsets == 1)
1288 code[1] |= 1 << 22;
1289 if (i->tex.useOffsets == 4)
1290 code[1] |= 1 << 23;
1291
1292 srcId(i, src1, 26);
1293 }
1294
1295 void
1296 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1297 {
1298 code[0] = 0x00000086;
1299 code[1] = 0xc0000000;
1300
1301 switch (i->tex.query) {
1302 case TXQ_DIMS: code[1] |= 0 << 22; break;
1303 case TXQ_TYPE: code[1] |= 1 << 22; break;
1304 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1305 case TXQ_FILTER: code[1] |= 3 << 22; break;
1306 case TXQ_LOD: code[1] |= 4 << 22; break;
1307 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1308 default:
1309 assert(!"invalid texture query");
1310 break;
1311 }
1312
1313 code[1] |= i->tex.mask << 14;
1314
1315 code[1] |= i->tex.r;
1316 code[1] |= i->tex.s << 8;
1317 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1318 code[1] |= 1 << 18;
1319
1320 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1321
1322 defId(i->def(0), 14);
1323 srcId(i->src(0), 20);
1324 srcId(i, src1, 26);
1325
1326 emitPredicate(i);
1327 }
1328
1329 void
1330 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1331 {
1332 code[0] = 0x00000000 | (laneMask << 6);
1333 code[1] = 0x48000000 | qOp;
1334
1335 defId(i->def(0), 14);
1336 srcId(i->src(0), 20);
1337 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1338
1339 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1340 code[0] |= 1 << 9; // dall
1341
1342 emitPredicate(i);
1343 }
1344
1345 void
1346 CodeEmitterNVC0::emitFlow(const Instruction *i)
1347 {
1348 const FlowInstruction *f = i->asFlow();
1349
1350 unsigned mask; // bit 0: predicate, bit 1: target
1351
1352 code[0] = 0x00000007;
1353
1354 switch (i->op) {
1355 case OP_BRA:
1356 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1357 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1358 code[0] |= 0x4000;
1359 mask = 3;
1360 break;
1361 case OP_CALL:
1362 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1363 if (f->indirect)
1364 code[0] |= 0x4000; // indirect calls always use c[] source
1365 mask = 2;
1366 break;
1367
1368 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1369 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1370 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1371 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1372 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1373
1374 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1375 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1376 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1377 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1378
1379 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1380 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1381 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1382 default:
1383 assert(!"invalid flow operation");
1384 return;
1385 }
1386
1387 if (mask & 1) {
1388 emitPredicate(i);
1389 if (i->flagsSrc < 0)
1390 code[0] |= 0x1e0;
1391 }
1392
1393 if (!f)
1394 return;
1395
1396 if (f->allWarp)
1397 code[0] |= 1 << 15;
1398 if (f->limit)
1399 code[0] |= 1 << 16;
1400
1401 if (f->indirect) {
1402 if (code[0] & 0x4000) {
1403 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1404 setAddress16(i->src(0));
1405 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1406 if (f->op == OP_BRA)
1407 srcId(f->src(0).getIndirect(0), 20);
1408 } else {
1409 srcId(f, 0, 20);
1410 }
1411 }
1412
1413 if (f->op == OP_CALL) {
1414 if (f->indirect) {
1415 // nothing
1416 } else
1417 if (f->builtin) {
1418 assert(f->absolute);
1419 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1420 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1421 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1422 } else {
1423 assert(!f->absolute);
1424 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1425 code[0] |= (pcRel & 0x3f) << 26;
1426 code[1] |= (pcRel >> 6) & 0x3ffff;
1427 }
1428 } else
1429 if (mask & 2) {
1430 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1431 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1432 pcRel += 8;
1433 // currently we don't want absolute branches
1434 assert(!f->absolute);
1435 code[0] |= (pcRel & 0x3f) << 26;
1436 code[1] |= (pcRel >> 6) & 0x3ffff;
1437 }
1438 }
1439
1440 void
1441 CodeEmitterNVC0::emitBAR(const Instruction *i)
1442 {
1443 Value *rDef = NULL, *pDef = NULL;
1444
1445 switch (i->subOp) {
1446 case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
1447 case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
1448 case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
1449 case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1450 default:
1451 code[0] = 0x04;
1452 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1453 break;
1454 }
1455 code[1] = 0x50000000;
1456
1457 code[0] |= 63 << 14;
1458 code[1] |= 7 << 21;
1459
1460 emitPredicate(i);
1461
1462 // barrier id
1463 if (i->src(0).getFile() == FILE_GPR) {
1464 srcId(i->src(0), 20);
1465 } else {
1466 ImmediateValue *imm = i->getSrc(0)->asImm();
1467 assert(imm);
1468 code[0] |= imm->reg.data.u32 << 20;
1469 code[1] |= 0x8000;
1470 }
1471
1472 // thread count
1473 if (i->src(1).getFile() == FILE_GPR) {
1474 srcId(i->src(1), 26);
1475 } else {
1476 ImmediateValue *imm = i->getSrc(1)->asImm();
1477 assert(imm);
1478 code[0] |= imm->reg.data.u32 << 26;
1479 code[1] |= imm->reg.data.u32 >> 6;
1480 code[1] |= 0x4000;
1481 }
1482
1483 if (i->srcExists(2) && (i->predSrc != 2)) {
1484 srcId(i->src(2), 32 + 17);
1485 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1486 code[1] |= 1 << 20;
1487 } else {
1488 code[1] |= 7 << 17;
1489 }
1490
1491 if (i->defExists(0)) {
1492 if (i->def(0).getFile() == FILE_GPR)
1493 rDef = i->getDef(0);
1494 else
1495 pDef = i->getDef(0);
1496
1497 if (i->defExists(1)) {
1498 if (i->def(1).getFile() == FILE_GPR)
1499 rDef = i->getDef(1);
1500 else
1501 pDef = i->getDef(1);
1502 }
1503 }
1504 if (rDef) {
1505 code[0] &= ~(63 << 14);
1506 defId(rDef, 14);
1507 }
1508 if (pDef) {
1509 code[1] &= ~(7 << 21);
1510 defId(pDef, 32 + 21);
1511 }
1512 }
1513
1514 void
1515 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1516 {
1517 code[0] = 0x00000006;
1518 code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1519
1520 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1521 code[0] |= 0x200;
1522
1523 emitPredicate(i);
1524
1525 defId(i->def(0), 14);
1526 srcId(i->src(0).getIndirect(0), 20);
1527 }
1528
1529 void
1530 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1531 {
1532 uint32_t prim = i->src(0).get()->reg.data.u32;
1533
1534 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1535 code[1] = 0x00000000 | (prim >> 6);
1536
1537 emitPredicate(i);
1538
1539 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1540
1541 defId(i->def(0), 14);
1542 srcId(i, src1, 20);
1543 }
1544
1545 void
1546 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1547 {
1548 code[0] = 0x00000006;
1549 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1550
1551 if (i->perPatch)
1552 code[0] |= 0x100;
1553 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1554 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1555
1556 emitPredicate(i);
1557
1558 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1559
1560 defId(i->def(0), 14);
1561 srcId(i->src(0).getIndirect(0), 20);
1562 srcId(i->src(0).getIndirect(1), 26); // vertex address
1563 }
1564
1565 void
1566 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1567 {
1568 unsigned int size = typeSizeof(i->dType);
1569
1570 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1571 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1572
1573 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1574
1575 if (i->perPatch)
1576 code[0] |= 0x100;
1577
1578 emitPredicate(i);
1579
1580 assert(i->src(1).getFile() == FILE_GPR);
1581
1582 srcId(i->src(0).getIndirect(0), 20);
1583 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1584 srcId(i->src(1), 26);
1585 }
1586
1587 void
1588 CodeEmitterNVC0::emitOUT(const Instruction *i)
1589 {
1590 code[0] = 0x00000006;
1591 code[1] = 0x1c000000;
1592
1593 emitPredicate(i);
1594
1595 defId(i->def(0), 14); // new secret address
1596 srcId(i->src(0), 20); // old secret address, should be 0 initially
1597
1598 assert(i->src(0).getFile() == FILE_GPR);
1599
1600 if (i->op == OP_EMIT)
1601 code[0] |= 1 << 5;
1602 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1603 code[0] |= 1 << 6;
1604
1605 // vertex stream
1606 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1607 unsigned int stream = SDATA(i->src(1)).u32;
1608 assert(stream < 4);
1609 if (stream) {
1610 code[1] |= 0xc000;
1611 code[0] |= stream << 26;
1612 } else {
1613 srcId(NULL, 26);
1614 }
1615 } else {
1616 srcId(i->src(1), 26);
1617 }
1618 }
1619
1620 void
1621 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1622 {
1623 if (i->encSize == 8) {
1624 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1625 } else {
1626 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1627 code[0] |= 0x80;
1628 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1629 }
1630 }
1631
1632 static void
1633 interpApply(const InterpEntry *entry, uint32_t *code,
1634 bool force_persample_interp, bool flatshade)
1635 {
1636 int ipa = entry->ipa;
1637 int reg = entry->reg;
1638 int loc = entry->loc;
1639
1640 if (flatshade &&
1641 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1642 ipa = NV50_IR_INTERP_FLAT;
1643 reg = 0x3f;
1644 } else if (force_persample_interp &&
1645 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1646 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1647 ipa |= NV50_IR_INTERP_CENTROID;
1648 }
1649 code[loc + 0] &= ~(0xf << 6);
1650 code[loc + 0] |= ipa << 6;
1651 code[loc + 0] &= ~(0x3f << 26);
1652 code[loc + 0] |= reg << 26;
1653 }
1654
1655 void
1656 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1657 {
1658 const uint32_t base = i->getSrc(0)->reg.data.offset;
1659
1660 if (i->encSize == 8) {
1661 code[0] = 0x00000000;
1662 code[1] = 0xc0000000 | (base & 0xffff);
1663
1664 if (i->saturate)
1665 code[0] |= 1 << 5;
1666
1667 if (i->op == OP_PINTERP) {
1668 srcId(i->src(1), 26);
1669 addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1670 } else {
1671 code[0] |= 0x3f << 26;
1672 addInterp(i->ipa, 0x3f, interpApply);
1673 }
1674
1675 srcId(i->src(0).getIndirect(0), 20);
1676 } else {
1677 assert(i->op == OP_PINTERP);
1678 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1679 srcId(i->src(1), 20);
1680 }
1681 emitInterpMode(i);
1682
1683 emitPredicate(i);
1684 defId(i->def(0), 14);
1685
1686 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1687 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1688 else
1689 code[1] |= 0x3f << 17;
1690 }
1691
1692 void
1693 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1694 {
1695 uint8_t val;
1696
1697 switch (ty) {
1698 case TYPE_U8:
1699 val = 0x00;
1700 break;
1701 case TYPE_S8:
1702 val = 0x20;
1703 break;
1704 case TYPE_F16:
1705 case TYPE_U16:
1706 val = 0x40;
1707 break;
1708 case TYPE_S16:
1709 val = 0x60;
1710 break;
1711 case TYPE_F32:
1712 case TYPE_U32:
1713 case TYPE_S32:
1714 val = 0x80;
1715 break;
1716 case TYPE_F64:
1717 case TYPE_U64:
1718 case TYPE_S64:
1719 val = 0xa0;
1720 break;
1721 case TYPE_B128:
1722 val = 0xc0;
1723 break;
1724 default:
1725 val = 0x80;
1726 assert(!"invalid type");
1727 break;
1728 }
1729 code[0] |= val;
1730 }
1731
1732 void
1733 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1734 {
1735 uint32_t val;
1736
1737 switch (c) {
1738 case CACHE_CA:
1739 // case CACHE_WB:
1740 val = 0x000;
1741 break;
1742 case CACHE_CG:
1743 val = 0x100;
1744 break;
1745 case CACHE_CS:
1746 val = 0x200;
1747 break;
1748 case CACHE_CV:
1749 // case CACHE_WT:
1750 val = 0x300;
1751 break;
1752 default:
1753 val = 0;
1754 assert(!"invalid caching mode");
1755 break;
1756 }
1757 code[0] |= val;
1758 }
1759
1760 static inline bool
1761 uses64bitAddress(const Instruction *ldst)
1762 {
1763 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1764 ldst->src(0).isIndirect(0) &&
1765 ldst->getIndirect(0, 0)->reg.size == 8;
1766 }
1767
1768 void
1769 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1770 {
1771 uint32_t opc;
1772
1773 switch (i->src(0).getFile()) {
1774 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1775 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1776 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1777 default:
1778 assert(!"invalid memory file");
1779 opc = 0;
1780 break;
1781 }
1782 code[0] = 0x00000005;
1783 code[1] = opc;
1784
1785 setAddressByFile(i->src(0));
1786 srcId(i->src(1), 14);
1787 srcId(i->src(0).getIndirect(0), 20);
1788 if (uses64bitAddress(i))
1789 code[1] |= 1 << 26;
1790
1791 emitPredicate(i);
1792
1793 emitLoadStoreType(i->dType);
1794 emitCachingMode(i->cache);
1795 }
1796
1797 void
1798 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1799 {
1800 uint32_t opc;
1801
1802 code[0] = 0x00000005;
1803
1804 switch (i->src(0).getFile()) {
1805 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1806 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1807 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1808 case FILE_MEMORY_CONST:
1809 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1810 emitMOV(i); // not sure if this is any better
1811 return;
1812 }
1813 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1814 code[0] = 0x00000006 | (i->subOp << 8);
1815 break;
1816 default:
1817 assert(!"invalid memory file");
1818 opc = 0;
1819 break;
1820 }
1821 code[1] = opc;
1822
1823 defId(i->def(0), 14);
1824
1825 setAddressByFile(i->src(0));
1826 srcId(i->src(0).getIndirect(0), 20);
1827 if (uses64bitAddress(i))
1828 code[1] |= 1 << 26;
1829
1830 emitPredicate(i);
1831
1832 emitLoadStoreType(i->dType);
1833 emitCachingMode(i->cache);
1834 }
1835
1836 uint8_t
1837 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1838 {
1839 switch (SDATA(ref).sv.sv) {
1840 case SV_LANEID: return 0x00;
1841 case SV_PHYSID: return 0x03;
1842 case SV_VERTEX_COUNT: return 0x10;
1843 case SV_INVOCATION_ID: return 0x11;
1844 case SV_YDIR: return 0x12;
1845 case SV_THREAD_KILL: return 0x13;
1846 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1847 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1848 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1849 case SV_GRIDID: return 0x2c;
1850 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1851 case SV_LBASE: return 0x34;
1852 case SV_SBASE: return 0x30;
1853 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1854 default:
1855 assert(!"no sreg for system value");
1856 return 0;
1857 }
1858 }
1859
1860 void
1861 CodeEmitterNVC0::emitMOV(const Instruction *i)
1862 {
1863 if (i->def(0).getFile() == FILE_PREDICATE) {
1864 if (i->src(0).getFile() == FILE_GPR) {
1865 code[0] = 0xfc01c003;
1866 code[1] = 0x1a8e0000;
1867 srcId(i->src(0), 20);
1868 } else {
1869 code[0] = 0x0001c004;
1870 code[1] = 0x0c0e0000;
1871 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1872 code[0] |= 7 << 20;
1873 if (!i->getSrc(0)->reg.data.u32)
1874 code[0] |= 1 << 23;
1875 } else {
1876 srcId(i->src(0), 20);
1877 }
1878 }
1879 defId(i->def(0), 17);
1880 emitPredicate(i);
1881 } else
1882 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1883 uint8_t sr = getSRegEncoding(i->src(0));
1884
1885 if (i->encSize == 8) {
1886 code[0] = 0x00000004 | (sr << 26);
1887 code[1] = 0x2c000000;
1888 } else {
1889 code[0] = 0x40000008 | (sr << 20);
1890 }
1891 defId(i->def(0), 14);
1892
1893 emitPredicate(i);
1894 } else
1895 if (i->encSize == 8) {
1896 uint64_t opc;
1897
1898 if (i->src(0).getFile() == FILE_IMMEDIATE)
1899 opc = HEX64(18000000, 000001e2);
1900 else
1901 if (i->src(0).getFile() == FILE_PREDICATE)
1902 opc = HEX64(080e0000, 1c000004);
1903 else
1904 opc = HEX64(28000000, 00000004);
1905
1906 opc |= i->lanes << 5;
1907
1908 emitForm_B(i, opc);
1909 } else {
1910 uint32_t imm;
1911
1912 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1913 imm = SDATA(i->src(0)).u32;
1914 if (imm & 0xfff00000) {
1915 assert(!(imm & 0x000fffff));
1916 code[0] = 0x00000318 | imm;
1917 } else {
1918 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1919 code[0] = 0x00000118 | (imm << 20);
1920 }
1921 } else {
1922 code[0] = 0x0028;
1923 emitShortSrc2(i->src(0));
1924 }
1925 defId(i->def(0), 14);
1926
1927 emitPredicate(i);
1928 }
1929 }
1930
1931 void
1932 CodeEmitterNVC0::emitATOM(const Instruction *i)
1933 {
1934 const bool hasDst = i->defExists(0);
1935 const bool casOrExch =
1936 i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1937 i->subOp == NV50_IR_SUBOP_ATOM_CAS;
1938
1939 if (i->dType == TYPE_U64) {
1940 switch (i->subOp) {
1941 case NV50_IR_SUBOP_ATOM_ADD:
1942 code[0] = 0x205;
1943 if (hasDst)
1944 code[1] = 0x507e0000;
1945 else
1946 code[1] = 0x10000000;
1947 break;
1948 case NV50_IR_SUBOP_ATOM_EXCH:
1949 code[0] = 0x305;
1950 code[1] = 0x507e0000;
1951 break;
1952 case NV50_IR_SUBOP_ATOM_CAS:
1953 code[0] = 0x325;
1954 code[1] = 0x50000000;
1955 break;
1956 default:
1957 assert(!"invalid u64 red op");
1958 break;
1959 }
1960 } else
1961 if (i->dType == TYPE_U32) {
1962 switch (i->subOp) {
1963 case NV50_IR_SUBOP_ATOM_EXCH:
1964 code[0] = 0x105;
1965 code[1] = 0x507e0000;
1966 break;
1967 case NV50_IR_SUBOP_ATOM_CAS:
1968 code[0] = 0x125;
1969 code[1] = 0x50000000;
1970 break;
1971 default:
1972 code[0] = 0x5 | (i->subOp << 5);
1973 if (hasDst)
1974 code[1] = 0x507e0000;
1975 else
1976 code[1] = 0x10000000;
1977 break;
1978 }
1979 } else
1980 if (i->dType == TYPE_S32) {
1981 assert(i->subOp <= 2);
1982 code[0] = 0x205 | (i->subOp << 5);
1983 if (hasDst)
1984 code[1] = 0x587e0000;
1985 else
1986 code[1] = 0x18000000;
1987 } else
1988 if (i->dType == TYPE_F32) {
1989 assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
1990 code[0] = 0x205;
1991 if (hasDst)
1992 code[1] = 0x687e0000;
1993 else
1994 code[1] = 0x28000000;
1995 }
1996
1997 emitPredicate(i);
1998
1999 srcId(i->src(1), 14);
2000
2001 if (hasDst)
2002 defId(i->def(0), 32 + 11);
2003 else
2004 if (casOrExch)
2005 code[1] |= 63 << 11;
2006
2007 if (hasDst || casOrExch) {
2008 const int32_t offset = SDATA(i->src(0)).offset;
2009 assert(offset < 0x80000 && offset >= -0x80000);
2010 code[0] |= offset << 26;
2011 code[1] |= (offset & 0x1ffc0) >> 6;
2012 code[1] |= (offset & 0xe0000) << 6;
2013 } else {
2014 srcAddr32(i->src(0), 26, 0);
2015 }
2016 if (i->getIndirect(0, 0)) {
2017 srcId(i->getIndirect(0, 0), 20);
2018 if (i->getIndirect(0, 0)->reg.size == 8)
2019 code[1] |= 1 << 26;
2020 } else {
2021 code[0] |= 63 << 20;
2022 }
2023
2024 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2025 assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
2026 code[1] |= (SDATA(i->src(1)).id + 1) << 17;
2027 }
2028 }
2029
2030 void
2031 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
2032 {
2033 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
2034 case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
2035 case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
2036 default:
2037 code[0] = 0x45;
2038 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
2039 break;
2040 }
2041 code[1] = 0xe0000000;
2042
2043 emitPredicate(i);
2044 }
2045
2046 void
2047 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2048 {
2049 code[0] = 0x00000005 | (i->subOp << 5);
2050
2051 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2052 code[1] = 0x98000000;
2053 srcAddr32(i->src(0), 28, 2);
2054 } else {
2055 code[1] = 0xd0000000;
2056 setAddress24(i->src(0));
2057 }
2058 if (uses64bitAddress(i))
2059 code[1] |= 1 << 26;
2060 srcId(i->src(0).getIndirect(0), 20);
2061
2062 emitPredicate(i);
2063
2064 defId(i, 0, 14);
2065 }
2066
2067 void
2068 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2069 {
2070 uint8_t m;
2071 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2072 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2073 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2074 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2075 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2076 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2077 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2078 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2079 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2080 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2081 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2082 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2083 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2084 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2085 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2086 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2087 default:
2088 return;
2089 }
2090 code[0] |= m << 5;
2091 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2092 code[1] |= 1 << 16;
2093 }
2094
2095 void
2096 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2097 {
2098 ImmediateValue *imm = NULL;
2099 uint64_t opc;
2100
2101 if (i->srcExists(2)) {
2102 imm = i->getSrc(2)->asImm();
2103 if (imm)
2104 i->setSrc(2, NULL); // special case, make emitForm_A not assert
2105 }
2106
2107 switch (i->op) {
2108 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2109 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2110 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2111 default:
2112 assert(0);
2113 return;
2114 }
2115 emitForm_A(i, opc);
2116
2117 if (i->op == OP_SUCLAMP) {
2118 if (i->dType == TYPE_S32)
2119 code[0] |= 1 << 9;
2120 emitSUCLAMPMode(i->subOp);
2121 }
2122
2123 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2124 code[1] |= 1 << 16;
2125
2126 if (i->op != OP_SUEAU) {
2127 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2128 code[0] |= 63 << 14;
2129 code[1] |= i->getDef(0)->reg.data.id << 23;
2130 } else
2131 if (i->defExists(1)) { // r, p
2132 assert(i->def(1).getFile() == FILE_PREDICATE);
2133 code[1] |= i->getDef(1)->reg.data.id << 23;
2134 } else { // r, #
2135 code[1] |= 7 << 23;
2136 }
2137 }
2138 if (imm) {
2139 assert(i->op == OP_SUCLAMP);
2140 i->setSrc(2, imm);
2141 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2142 }
2143 }
2144
2145 void
2146 CodeEmitterNVC0::emitSUGType(DataType ty)
2147 {
2148 switch (ty) {
2149 case TYPE_S32: code[1] |= 1 << 13; break;
2150 case TYPE_U8: code[1] |= 2 << 13; break;
2151 case TYPE_S8: code[1] |= 3 << 13; break;
2152 default:
2153 assert(ty == TYPE_U32);
2154 break;
2155 }
2156 }
2157
2158 void
2159 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2160 {
2161 const uint32_t offset = i->getSrc(s)->reg.data.offset;
2162
2163 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2164 assert(offset == (offset & 0xfffc));
2165
2166 code[1] |= 1 << 21;
2167 code[0] |= offset << 24;
2168 code[1] |= offset >> 8;
2169 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2170 }
2171
2172 void
2173 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2174 {
2175 if (!i->srcExists(s) || (i->predSrc == s)) {
2176 code[1] |= 0x7 << 17;
2177 } else {
2178 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2179 code[1] |= 1 << 20;
2180 srcId(i->src(s), 32 + 17);
2181 }
2182 }
2183
2184 void
2185 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2186 {
2187 code[0] = 0x5;
2188 code[1] = 0xd4000000 | (i->subOp << 15);
2189
2190 emitLoadStoreType(i->dType);
2191 emitSUGType(i->sType);
2192 emitCachingMode(i->cache);
2193
2194 emitPredicate(i);
2195 defId(i->def(0), 14); // destination
2196 srcId(i->src(0), 20); // address
2197 // format
2198 if (i->src(1).getFile() == FILE_GPR)
2199 srcId(i->src(1), 26);
2200 else
2201 setSUConst16(i, 1);
2202 setSUPred(i, 2);
2203 }
2204
2205 void
2206 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2207 {
2208 code[0] = 0x5;
2209 code[1] = 0xdc000000 | (i->subOp << 15);
2210
2211 if (i->op == OP_SUSTP)
2212 code[1] |= i->tex.mask << 22;
2213 else
2214 emitLoadStoreType(i->dType);
2215 emitSUGType(i->sType);
2216 emitCachingMode(i->cache);
2217
2218 emitPredicate(i);
2219 srcId(i->src(0), 20); // address
2220 // format
2221 if (i->src(1).getFile() == FILE_GPR)
2222 srcId(i->src(1), 26);
2223 else
2224 setSUConst16(i, 1);
2225 srcId(i->src(3), 14); // values
2226 setSUPred(i, 2);
2227 }
2228
2229 void
2230 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2231 {
2232 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2233 case 0:
2234 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2235 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
2236 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
2237 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2238 break;
2239 case 1:
2240 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
2241 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2242 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
2243 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
2244 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2245 code[1] |= (i->mask & 0x3) << 2;
2246 break;
2247 case 2:
2248 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2249 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2250 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2251 code[1] |= (i->mask & 0x3) << 2;
2252 code[1] |= (i->mask & 0xc) << 21;
2253 break;
2254 default:
2255 assert(0);
2256 break;
2257 }
2258 }
2259
2260 void
2261 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2262 {
2263 uint64_t opc = 0x4;
2264
2265 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2266 case 0: opc |= 0xe8ULL << 56; break;
2267 case 1: opc |= 0xb4ULL << 56; break;
2268 case 2: opc |= 0x94ULL << 56; break;
2269 default:
2270 assert(0);
2271 break;
2272 }
2273 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2274 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2275 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2276 } else {
2277 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2278 if (isSignedType(i->sType)) opc |= 1 << 6;
2279 }
2280 emitForm_A(i, opc);
2281 emitVectorSubOp(i);
2282
2283 if (i->saturate)
2284 code[0] |= 1 << 9;
2285 if (i->flagsDef >= 0)
2286 code[1] |= 1 << 16;
2287 }
2288
2289 void
2290 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2291 {
2292 assert(i->encSize == 8);
2293 emitForm_A(i, HEX64(10000000, 00000006));
2294 code[0] |= i->subOp << 5;
2295 code[1] |= 0x00e00000;
2296 }
2297
2298 bool
2299 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2300 {
2301 unsigned int size = insn->encSize;
2302
2303 if (writeIssueDelays && !(codeSize & 0x3f))
2304 size += 8;
2305
2306 if (!insn->encSize) {
2307 ERROR("skipping unencodable instruction: "); insn->print();
2308 return false;
2309 } else
2310 if (codeSize + size > codeSizeLimit) {
2311 ERROR("code emitter output buffer too small\n");
2312 return false;
2313 }
2314
2315 if (writeIssueDelays) {
2316 if (!(codeSize & 0x3f)) {
2317 code[0] = 0x00000007; // cf issue delay "instruction"
2318 code[1] = 0x20000000;
2319 code += 2;
2320 codeSize += 8;
2321 }
2322 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2323 uint32_t *data = code - (id * 2 + 2);
2324 if (id <= 2) {
2325 data[0] |= insn->sched << (id * 8 + 4);
2326 } else
2327 if (id == 3) {
2328 data[0] |= insn->sched << 28;
2329 data[1] |= insn->sched >> 4;
2330 } else {
2331 data[1] |= insn->sched << ((id - 4) * 8 + 4);
2332 }
2333 }
2334
2335 // assert that instructions with multiple defs don't corrupt registers
2336 for (int d = 0; insn->defExists(d); ++d)
2337 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2338
2339 switch (insn->op) {
2340 case OP_MOV:
2341 case OP_RDSV:
2342 emitMOV(insn);
2343 break;
2344 case OP_NOP:
2345 break;
2346 case OP_LOAD:
2347 emitLOAD(insn);
2348 break;
2349 case OP_STORE:
2350 emitSTORE(insn);
2351 break;
2352 case OP_LINTERP:
2353 case OP_PINTERP:
2354 emitINTERP(insn);
2355 break;
2356 case OP_VFETCH:
2357 emitVFETCH(insn);
2358 break;
2359 case OP_EXPORT:
2360 emitEXPORT(insn);
2361 break;
2362 case OP_PFETCH:
2363 emitPFETCH(insn);
2364 break;
2365 case OP_AFETCH:
2366 emitAFETCH(insn);
2367 break;
2368 case OP_EMIT:
2369 case OP_RESTART:
2370 emitOUT(insn);
2371 break;
2372 case OP_ADD:
2373 case OP_SUB:
2374 if (insn->dType == TYPE_F64)
2375 emitDADD(insn);
2376 else if (isFloatType(insn->dType))
2377 emitFADD(insn);
2378 else
2379 emitUADD(insn);
2380 break;
2381 case OP_MUL:
2382 if (insn->dType == TYPE_F64)
2383 emitDMUL(insn);
2384 else if (isFloatType(insn->dType))
2385 emitFMUL(insn);
2386 else
2387 emitUMUL(insn);
2388 break;
2389 case OP_MAD:
2390 case OP_FMA:
2391 if (insn->dType == TYPE_F64)
2392 emitDMAD(insn);
2393 else if (isFloatType(insn->dType))
2394 emitFMAD(insn);
2395 else
2396 emitIMAD(insn);
2397 break;
2398 case OP_SAD:
2399 emitISAD(insn);
2400 break;
2401 case OP_NOT:
2402 emitNOT(insn);
2403 break;
2404 case OP_AND:
2405 emitLogicOp(insn, 0);
2406 break;
2407 case OP_OR:
2408 emitLogicOp(insn, 1);
2409 break;
2410 case OP_XOR:
2411 emitLogicOp(insn, 2);
2412 break;
2413 case OP_SHL:
2414 case OP_SHR:
2415 emitShift(insn);
2416 break;
2417 case OP_SET:
2418 case OP_SET_AND:
2419 case OP_SET_OR:
2420 case OP_SET_XOR:
2421 emitSET(insn->asCmp());
2422 break;
2423 case OP_SELP:
2424 emitSELP(insn);
2425 break;
2426 case OP_SLCT:
2427 emitSLCT(insn->asCmp());
2428 break;
2429 case OP_MIN:
2430 case OP_MAX:
2431 emitMINMAX(insn);
2432 break;
2433 case OP_ABS:
2434 case OP_NEG:
2435 case OP_CEIL:
2436 case OP_FLOOR:
2437 case OP_TRUNC:
2438 case OP_CVT:
2439 case OP_SAT:
2440 emitCVT(insn);
2441 break;
2442 case OP_RSQ:
2443 emitSFnOp(insn, 5 + 2 * insn->subOp);
2444 break;
2445 case OP_RCP:
2446 emitSFnOp(insn, 4 + 2 * insn->subOp);
2447 break;
2448 case OP_LG2:
2449 emitSFnOp(insn, 3);
2450 break;
2451 case OP_EX2:
2452 emitSFnOp(insn, 2);
2453 break;
2454 case OP_SIN:
2455 emitSFnOp(insn, 1);
2456 break;
2457 case OP_COS:
2458 emitSFnOp(insn, 0);
2459 break;
2460 case OP_PRESIN:
2461 case OP_PREEX2:
2462 emitPreOp(insn);
2463 break;
2464 case OP_TEX:
2465 case OP_TXB:
2466 case OP_TXL:
2467 case OP_TXD:
2468 case OP_TXF:
2469 case OP_TXG:
2470 case OP_TXLQ:
2471 emitTEX(insn->asTex());
2472 break;
2473 case OP_TXQ:
2474 emitTXQ(insn->asTex());
2475 break;
2476 case OP_TEXBAR:
2477 emitTEXBAR(insn);
2478 break;
2479 case OP_SUBFM:
2480 case OP_SUCLAMP:
2481 case OP_SUEAU:
2482 emitSUCalc(insn);
2483 break;
2484 case OP_MADSP:
2485 emitMADSP(insn);
2486 break;
2487 case OP_SULDB:
2488 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2489 emitSULDGB(insn->asTex());
2490 else
2491 ERROR("SULDB not yet supported on < nve4\n");
2492 break;
2493 case OP_SUSTB:
2494 case OP_SUSTP:
2495 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2496 emitSUSTGx(insn->asTex());
2497 else
2498 ERROR("SUSTx not yet supported on < nve4\n");
2499 break;
2500 case OP_ATOM:
2501 emitATOM(insn);
2502 break;
2503 case OP_BRA:
2504 case OP_CALL:
2505 case OP_PRERET:
2506 case OP_RET:
2507 case OP_DISCARD:
2508 case OP_EXIT:
2509 case OP_PRECONT:
2510 case OP_CONT:
2511 case OP_PREBREAK:
2512 case OP_BREAK:
2513 case OP_JOINAT:
2514 case OP_BRKPT:
2515 case OP_QUADON:
2516 case OP_QUADPOP:
2517 emitFlow(insn);
2518 break;
2519 case OP_QUADOP:
2520 emitQUADOP(insn, insn->subOp, insn->lanes);
2521 break;
2522 case OP_DFDX:
2523 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2524 break;
2525 case OP_DFDY:
2526 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2527 break;
2528 case OP_POPCNT:
2529 emitPOPC(insn);
2530 break;
2531 case OP_INSBF:
2532 emitINSBF(insn);
2533 break;
2534 case OP_EXTBF:
2535 emitEXTBF(insn);
2536 break;
2537 case OP_BFIND:
2538 emitBFIND(insn);
2539 break;
2540 case OP_PERMT:
2541 emitPERMT(insn);
2542 break;
2543 case OP_JOIN:
2544 emitNOP(insn);
2545 insn->join = 1;
2546 break;
2547 case OP_BAR:
2548 emitBAR(insn);
2549 break;
2550 case OP_MEMBAR:
2551 emitMEMBAR(insn);
2552 break;
2553 case OP_CCTL:
2554 emitCCTL(insn);
2555 break;
2556 case OP_VSHL:
2557 emitVSHL(insn);
2558 break;
2559 case OP_PIXLD:
2560 emitPIXLD(insn);
2561 break;
2562 case OP_PHI:
2563 case OP_UNION:
2564 case OP_CONSTRAINT:
2565 ERROR("operation should have been eliminated");
2566 return false;
2567 case OP_EXP:
2568 case OP_LOG:
2569 case OP_SQRT:
2570 case OP_POW:
2571 ERROR("operation should have been lowered\n");
2572 return false;
2573 default:
2574 ERROR("unknown op: %u\n", insn->op);
2575 return false;
2576 }
2577
2578 if (insn->join) {
2579 code[0] |= 0x10;
2580 assert(insn->encSize == 8);
2581 }
2582
2583 code += insn->encSize / 4;
2584 codeSize += insn->encSize;
2585 return true;
2586 }
2587
2588 uint32_t
2589 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2590 {
2591 const Target::OpInfo &info = targ->getOpInfo(i);
2592
2593 if (writeIssueDelays || info.minEncSize == 8 || 1)
2594 return 8;
2595
2596 if (i->ftz || i->saturate || i->join)
2597 return 8;
2598 if (i->rnd != ROUND_N)
2599 return 8;
2600 if (i->predSrc >= 0 && i->op == OP_MAD)
2601 return 8;
2602
2603 if (i->op == OP_PINTERP) {
2604 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2605 return 8;
2606 } else
2607 if (i->op == OP_MOV && i->lanes != 0xf) {
2608 return 8;
2609 }
2610
2611 for (int s = 0; i->srcExists(s); ++s) {
2612 if (i->src(s).isIndirect(0))
2613 return 8;
2614
2615 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2616 if (SDATA(i->src(s)).offset >= 0x100)
2617 return 8;
2618 if (i->getSrc(s)->reg.fileIndex > 1 &&
2619 i->getSrc(s)->reg.fileIndex != 16)
2620 return 8;
2621 } else
2622 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2623 if (i->dType == TYPE_F32) {
2624 if (SDATA(i->src(s)).u32 >= 0x100)
2625 return 8;
2626 } else {
2627 if (SDATA(i->src(s)).u32 > 0xff)
2628 return 8;
2629 }
2630 }
2631
2632 if (i->op == OP_CVT)
2633 continue;
2634 if (i->src(s).mod != Modifier(0)) {
2635 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2636 if (i->op != OP_RSQ)
2637 return 8;
2638 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2639 if (i->op != OP_ADD || s != 0)
2640 return 8;
2641 }
2642 }
2643
2644 return 4;
2645 }
2646
2647 // Simplified, erring on safe side.
2648 class SchedDataCalculator : public Pass
2649 {
2650 public:
2651 SchedDataCalculator(const Target *targ) : targ(targ) { }
2652
2653 private:
2654 struct RegScores
2655 {
2656 struct Resource {
2657 int st[DATA_FILE_COUNT]; // LD to LD delay 3
2658 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2659 int tex; // TEX to non-TEX delay 17 (0x11)
2660 int sfu; // SFU to SFU delay 3 (except PRE-ops)
2661 int imul; // integer MUL to MUL delay 3
2662 } res;
2663 struct ScoreData {
2664 int r[256];
2665 int p[8];
2666 int c;
2667 } rd, wr;
2668 int base;
2669 int regs;
2670
2671 void rebase(const int base)
2672 {
2673 const int delta = this->base - base;
2674 if (!delta)
2675 return;
2676 this->base = 0;
2677
2678 for (int i = 0; i < regs; ++i) {
2679 rd.r[i] += delta;
2680 wr.r[i] += delta;
2681 }
2682 for (int i = 0; i < 8; ++i) {
2683 rd.p[i] += delta;
2684 wr.p[i] += delta;
2685 }
2686 rd.c += delta;
2687 wr.c += delta;
2688
2689 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2690 res.ld[f] += delta;
2691 res.st[f] += delta;
2692 }
2693 res.sfu += delta;
2694 res.imul += delta;
2695 res.tex += delta;
2696 }
2697 void wipe(int regs)
2698 {
2699 memset(&rd, 0, sizeof(rd));
2700 memset(&wr, 0, sizeof(wr));
2701 memset(&res, 0, sizeof(res));
2702 this->regs = regs;
2703 }
2704 int getLatest(const ScoreData& d) const
2705 {
2706 int max = 0;
2707 for (int i = 0; i < regs; ++i)
2708 if (d.r[i] > max)
2709 max = d.r[i];
2710 for (int i = 0; i < 8; ++i)
2711 if (d.p[i] > max)
2712 max = d.p[i];
2713 if (d.c > max)
2714 max = d.c;
2715 return max;
2716 }
2717 inline int getLatestRd() const
2718 {
2719 return getLatest(rd);
2720 }
2721 inline int getLatestWr() const
2722 {
2723 return getLatest(wr);
2724 }
2725 inline int getLatest() const
2726 {
2727 const int a = getLatestRd();
2728 const int b = getLatestWr();
2729
2730 int max = MAX2(a, b);
2731 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2732 max = MAX2(res.ld[f], max);
2733 max = MAX2(res.st[f], max);
2734 }
2735 max = MAX2(res.sfu, max);
2736 max = MAX2(res.imul, max);
2737 max = MAX2(res.tex, max);
2738 return max;
2739 }
2740 void setMax(const RegScores *that)
2741 {
2742 for (int i = 0; i < regs; ++i) {
2743 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2744 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2745 }
2746 for (int i = 0; i < 8; ++i) {
2747 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2748 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2749 }
2750 rd.c = MAX2(rd.c, that->rd.c);
2751 wr.c = MAX2(wr.c, that->wr.c);
2752
2753 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2754 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2755 res.st[f] = MAX2(res.st[f], that->res.st[f]);
2756 }
2757 res.sfu = MAX2(res.sfu, that->res.sfu);
2758 res.imul = MAX2(res.imul, that->res.imul);
2759 res.tex = MAX2(res.tex, that->res.tex);
2760 }
2761 void print(int cycle)
2762 {
2763 for (int i = 0; i < regs; ++i) {
2764 if (rd.r[i] > cycle)
2765 INFO("rd $r%i @ %i\n", i, rd.r[i]);
2766 if (wr.r[i] > cycle)
2767 INFO("wr $r%i @ %i\n", i, wr.r[i]);
2768 }
2769 for (int i = 0; i < 8; ++i) {
2770 if (rd.p[i] > cycle)
2771 INFO("rd $p%i @ %i\n", i, rd.p[i]);
2772 if (wr.p[i] > cycle)
2773 INFO("wr $p%i @ %i\n", i, wr.p[i]);
2774 }
2775 if (rd.c > cycle)
2776 INFO("rd $c @ %i\n", rd.c);
2777 if (wr.c > cycle)
2778 INFO("wr $c @ %i\n", wr.c);
2779 if (res.sfu > cycle)
2780 INFO("sfu @ %i\n", res.sfu);
2781 if (res.imul > cycle)
2782 INFO("imul @ %i\n", res.imul);
2783 if (res.tex > cycle)
2784 INFO("tex @ %i\n", res.tex);
2785 }
2786 };
2787
2788 RegScores *score; // for current BB
2789 std::vector<RegScores> scoreBoards;
2790 int prevData;
2791 operation prevOp;
2792
2793 const Target *targ;
2794
2795 bool visit(Function *);
2796 bool visit(BasicBlock *);
2797
2798 void commitInsn(const Instruction *, int cycle);
2799 int calcDelay(const Instruction *, int cycle) const;
2800 void setDelay(Instruction *, int delay, Instruction *next);
2801
2802 void recordRd(const Value *, const int ready);
2803 void recordWr(const Value *, const int ready);
2804 void checkRd(const Value *, int cycle, int& delay) const;
2805 void checkWr(const Value *, int cycle, int& delay) const;
2806
2807 int getCycles(const Instruction *, int origDelay) const;
2808 };
2809
2810 void
2811 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2812 {
2813 if (insn->op == OP_EXIT || insn->op == OP_RET)
2814 delay = MAX2(delay, 14);
2815
2816 if (insn->op == OP_TEXBAR) {
2817 // TODO: except if results not used before EXIT
2818 insn->sched = 0xc2;
2819 } else
2820 if (insn->op == OP_JOIN || insn->join) {
2821 insn->sched = 0x00;
2822 } else
2823 if (delay >= 0 || prevData == 0x04 ||
2824 !next || !targ->canDualIssue(insn, next)) {
2825 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
2826 if (prevOp == OP_EXPORT)
2827 insn->sched |= 0x40;
2828 else
2829 insn->sched |= 0x20;
2830 } else {
2831 insn->sched = 0x04; // dual-issue
2832 }
2833
2834 if (prevData != 0x04 || prevOp != OP_EXPORT)
2835 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2836 prevOp = insn->op;
2837
2838 prevData = insn->sched;
2839 }
2840
2841 int
2842 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2843 {
2844 if (insn->sched & 0x80) {
2845 int c = (insn->sched & 0x0f) * 2 + 1;
2846 if (insn->op == OP_TEXBAR && origDelay > 0)
2847 c += origDelay;
2848 return c;
2849 }
2850 if (insn->sched & 0x60)
2851 return (insn->sched & 0x1f) + 1;
2852 return (insn->sched == 0x04) ? 0 : 32;
2853 }
2854
2855 bool
2856 SchedDataCalculator::visit(Function *func)
2857 {
2858 int regs = targ->getFileSize(FILE_GPR) + 1;
2859 scoreBoards.resize(func->cfg.getSize());
2860 for (size_t i = 0; i < scoreBoards.size(); ++i)
2861 scoreBoards[i].wipe(regs);
2862 return true;
2863 }
2864
2865 bool
2866 SchedDataCalculator::visit(BasicBlock *bb)
2867 {
2868 Instruction *insn;
2869 Instruction *next = NULL;
2870
2871 int cycle = 0;
2872
2873 prevData = 0x00;
2874 prevOp = OP_NOP;
2875 score = &scoreBoards.at(bb->getId());
2876
2877 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2878 // back branches will wait until all target dependencies are satisfied
2879 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
2880 continue;
2881 BasicBlock *in = BasicBlock::get(ei.getNode());
2882 if (in->getExit()) {
2883 if (prevData != 0x04)
2884 prevData = in->getExit()->sched;
2885 prevOp = in->getExit()->op;
2886 }
2887 score->setMax(&scoreBoards.at(in->getId()));
2888 }
2889 if (bb->cfg.incidentCount() > 1)
2890 prevOp = OP_NOP;
2891
2892 #ifdef NVC0_DEBUG_SCHED_DATA
2893 INFO("=== BB:%i initial scores\n", bb->getId());
2894 score->print(cycle);
2895 #endif
2896
2897 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2898 next = insn->next;
2899
2900 commitInsn(insn, cycle);
2901 int delay = calcDelay(next, cycle);
2902 setDelay(insn, delay, next);
2903 cycle += getCycles(insn, delay);
2904
2905 #ifdef NVC0_DEBUG_SCHED_DATA
2906 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2907 insn->print();
2908 next->print();
2909 #endif
2910 }
2911 if (!insn)
2912 return true;
2913 commitInsn(insn, cycle);
2914
2915 int bbDelay = -1;
2916
2917 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2918 BasicBlock *out = BasicBlock::get(ei.getNode());
2919
2920 if (ei.getType() != Graph::Edge::BACK) {
2921 // only test the first instruction of the outgoing block
2922 next = out->getEntry();
2923 if (next)
2924 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2925 } else {
2926 // wait until all dependencies are satisfied
2927 const int regsFree = score->getLatest();
2928 next = out->getFirst();
2929 for (int c = cycle; next && c < regsFree; next = next->next) {
2930 bbDelay = MAX2(bbDelay, calcDelay(next, c));
2931 c += getCycles(next, bbDelay);
2932 }
2933 next = NULL;
2934 }
2935 }
2936 if (bb->cfg.outgoingCount() != 1)
2937 next = NULL;
2938 setDelay(insn, bbDelay, next);
2939 cycle += getCycles(insn, bbDelay);
2940
2941 score->rebase(cycle); // common base for initializing out blocks' scores
2942 return true;
2943 }
2944
2945 #define NVE4_MAX_ISSUE_DELAY 0x1f
2946 int
2947 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2948 {
2949 int delay = 0, ready = cycle;
2950
2951 for (int s = 0; insn->srcExists(s); ++s)
2952 checkRd(insn->getSrc(s), cycle, delay);
2953 // WAR & WAW don't seem to matter
2954 // for (int s = 0; insn->srcExists(s); ++s)
2955 // recordRd(insn->getSrc(s), cycle);
2956
2957 switch (Target::getOpClass(insn->op)) {
2958 case OPCLASS_SFU:
2959 ready = score->res.sfu;
2960 break;
2961 case OPCLASS_ARITH:
2962 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2963 ready = score->res.imul;
2964 break;
2965 case OPCLASS_TEXTURE:
2966 ready = score->res.tex;
2967 break;
2968 case OPCLASS_LOAD:
2969 ready = score->res.ld[insn->src(0).getFile()];
2970 break;
2971 case OPCLASS_STORE:
2972 ready = score->res.st[insn->src(0).getFile()];
2973 break;
2974 default:
2975 break;
2976 }
2977 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2978 ready = MAX2(ready, score->res.tex);
2979
2980 delay = MAX2(delay, ready - cycle);
2981
2982 // if can issue next cycle, delay is 0, not 1
2983 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2984 }
2985
2986 void
2987 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2988 {
2989 const int ready = cycle + targ->getLatency(insn);
2990
2991 for (int d = 0; insn->defExists(d); ++d)
2992 recordWr(insn->getDef(d), ready);
2993 // WAR & WAW don't seem to matter
2994 // for (int s = 0; insn->srcExists(s); ++s)
2995 // recordRd(insn->getSrc(s), cycle);
2996
2997 switch (Target::getOpClass(insn->op)) {
2998 case OPCLASS_SFU:
2999 score->res.sfu = cycle + 4;
3000 break;
3001 case OPCLASS_ARITH:
3002 if (insn->op == OP_MUL && !isFloatType(insn->dType))
3003 score->res.imul = cycle + 4;
3004 break;
3005 case OPCLASS_TEXTURE:
3006 score->res.tex = cycle + 18;
3007 break;
3008 case OPCLASS_LOAD:
3009 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
3010 break;
3011 score->res.ld[insn->src(0).getFile()] = cycle + 4;
3012 score->res.st[insn->src(0).getFile()] = ready;
3013 break;
3014 case OPCLASS_STORE:
3015 score->res.st[insn->src(0).getFile()] = cycle + 4;
3016 score->res.ld[insn->src(0).getFile()] = ready;
3017 break;
3018 case OPCLASS_OTHER:
3019 if (insn->op == OP_TEXBAR)
3020 score->res.tex = cycle;
3021 break;
3022 default:
3023 break;
3024 }
3025
3026 #ifdef NVC0_DEBUG_SCHED_DATA
3027 score->print(cycle);
3028 #endif
3029 }
3030
3031 void
3032 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
3033 {
3034 int ready = cycle;
3035 int a, b;
3036
3037 switch (v->reg.file) {
3038 case FILE_GPR:
3039 a = v->reg.data.id;
3040 b = a + v->reg.size / 4;
3041 for (int r = a; r < b; ++r)
3042 ready = MAX2(ready, score->rd.r[r]);
3043 break;
3044 case FILE_PREDICATE:
3045 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3046 break;
3047 case FILE_FLAGS:
3048 ready = MAX2(ready, score->rd.c);
3049 break;
3050 case FILE_SHADER_INPUT:
3051 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3052 case FILE_MEMORY_LOCAL:
3053 case FILE_MEMORY_CONST:
3054 case FILE_MEMORY_SHARED:
3055 case FILE_MEMORY_GLOBAL:
3056 case FILE_SYSTEM_VALUE:
3057 // TODO: any restrictions here ?
3058 break;
3059 case FILE_IMMEDIATE:
3060 break;
3061 default:
3062 assert(0);
3063 break;
3064 }
3065 if (cycle < ready)
3066 delay = MAX2(delay, ready - cycle);
3067 }
3068
3069 void
3070 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3071 {
3072 int ready = cycle;
3073 int a, b;
3074
3075 switch (v->reg.file) {
3076 case FILE_GPR:
3077 a = v->reg.data.id;
3078 b = a + v->reg.size / 4;
3079 for (int r = a; r < b; ++r)
3080 ready = MAX2(ready, score->wr.r[r]);
3081 break;
3082 case FILE_PREDICATE:
3083 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3084 break;
3085 default:
3086 assert(v->reg.file == FILE_FLAGS);
3087 ready = MAX2(ready, score->wr.c);
3088 break;
3089 }
3090 if (cycle < ready)
3091 delay = MAX2(delay, ready - cycle);
3092 }
3093
3094 void
3095 SchedDataCalculator::recordWr(const Value *v, const int ready)
3096 {
3097 int a = v->reg.data.id;
3098
3099 if (v->reg.file == FILE_GPR) {
3100 int b = a + v->reg.size / 4;
3101 for (int r = a; r < b; ++r)
3102 score->rd.r[r] = ready;
3103 } else
3104 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3105 if (v->reg.file == FILE_PREDICATE) {
3106 score->rd.p[a] = ready + 4;
3107 } else {
3108 assert(v->reg.file == FILE_FLAGS);
3109 score->rd.c = ready + 4;
3110 }
3111 }
3112
3113 void
3114 SchedDataCalculator::recordRd(const Value *v, const int ready)
3115 {
3116 int a = v->reg.data.id;
3117
3118 if (v->reg.file == FILE_GPR) {
3119 int b = a + v->reg.size / 4;
3120 for (int r = a; r < b; ++r)
3121 score->wr.r[r] = ready;
3122 } else
3123 if (v->reg.file == FILE_PREDICATE) {
3124 score->wr.p[a] = ready;
3125 } else
3126 if (v->reg.file == FILE_FLAGS) {
3127 score->wr.c = ready;
3128 }
3129 }
3130
3131 bool
3132 calculateSchedDataNVC0(const Target *targ, Function *func)
3133 {
3134 SchedDataCalculator sched(targ);
3135 return sched.run(func, true, true);
3136 }
3137
3138 void
3139 CodeEmitterNVC0::prepareEmission(Function *func)
3140 {
3141 CodeEmitter::prepareEmission(func);
3142
3143 if (targ->hasSWSched)
3144 calculateSchedDataNVC0(targ, func);
3145 }
3146
3147 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3148 : CodeEmitter(target),
3149 targNVC0(target),
3150 writeIssueDelays(target->hasSWSched)
3151 {
3152 code = NULL;
3153 codeSize = codeSizeLimit = 0;
3154 relocInfo = NULL;
3155 }
3156
3157 CodeEmitter *
3158 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3159 {
3160 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3161 emit->setProgramType(type);
3162 return emit;
3163 }
3164
3165 CodeEmitter *
3166 TargetNVC0::getCodeEmitter(Program::Type type)
3167 {
3168 if (chipset >= NVISA_GK20A_CHIPSET)
3169 return createCodeEmitterGK110(type);
3170 return createCodeEmitterNVC0(type);
3171 }
3172
3173 } // namespace nv50_ir