Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setAddress24(const ValueRef&);
56 void setAddressByFile(const ValueRef&);
57 void setImmediate(const Instruction *, const int s); // needs op already set
58 void setImmediateS8(const ValueRef&);
59 void setSUConst16(const Instruction *, const int s);
60 void setSUPred(const Instruction *, const int s);
61
62 void emitCondCode(CondCode cc, int pos);
63 void emitInterpMode(const Instruction *);
64 void emitLoadStoreType(DataType ty);
65 void emitSUGType(DataType);
66 void emitCachingMode(CacheMode c);
67
68 void emitShortSrc2(const ValueRef&);
69
70 inline uint8_t getSRegEncoding(const ValueRef&);
71
72 void roundMode_A(const Instruction *);
73 void roundMode_C(const Instruction *);
74 void roundMode_CS(const Instruction *);
75
76 void emitNegAbs12(const Instruction *);
77
78 void emitNOP(const Instruction *);
79
80 void emitLOAD(const Instruction *);
81 void emitSTORE(const Instruction *);
82 void emitMOV(const Instruction *);
83 void emitATOM(const Instruction *);
84 void emitMEMBAR(const Instruction *);
85 void emitCCTL(const Instruction *);
86
87 void emitINTERP(const Instruction *);
88 void emitAFETCH(const Instruction *);
89 void emitPFETCH(const Instruction *);
90 void emitVFETCH(const Instruction *);
91 void emitEXPORT(const Instruction *);
92 void emitOUT(const Instruction *);
93
94 void emitUADD(const Instruction *);
95 void emitFADD(const Instruction *);
96 void emitDADD(const Instruction *);
97 void emitUMUL(const Instruction *);
98 void emitFMUL(const Instruction *);
99 void emitDMUL(const Instruction *);
100 void emitIMAD(const Instruction *);
101 void emitISAD(const Instruction *);
102 void emitFMAD(const Instruction *);
103 void emitDMAD(const Instruction *);
104 void emitMADSP(const Instruction *);
105
106 void emitNOT(Instruction *);
107 void emitLogicOp(const Instruction *, uint8_t subOp);
108 void emitPOPC(const Instruction *);
109 void emitINSBF(const Instruction *);
110 void emitEXTBF(const Instruction *);
111 void emitBFIND(const Instruction *);
112 void emitPERMT(const Instruction *);
113 void emitShift(const Instruction *);
114
115 void emitSFnOp(const Instruction *, uint8_t subOp);
116
117 void emitCVT(Instruction *);
118 void emitMINMAX(const Instruction *);
119 void emitPreOp(const Instruction *);
120
121 void emitSET(const CmpInstruction *);
122 void emitSLCT(const CmpInstruction *);
123 void emitSELP(const Instruction *);
124
125 void emitTEXBAR(const Instruction *);
126 void emitTEX(const TexInstruction *);
127 void emitTEXCSAA(const TexInstruction *);
128 void emitTXQ(const TexInstruction *);
129
130 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
131
132 void emitFlow(const Instruction *);
133 void emitBAR(const Instruction *);
134
135 void emitSUCLAMPMode(uint16_t);
136 void emitSUCalc(Instruction *);
137 void emitSULDGB(const TexInstruction *);
138 void emitSUSTGx(const TexInstruction *);
139
140 void emitVSHL(const Instruction *);
141 void emitVectorSubOp(const Instruction *);
142
143 void emitPIXLD(const Instruction *);
144
145 inline void defId(const ValueDef&, const int pos);
146 inline void defId(const Instruction *, int d, const int pos);
147 inline void srcId(const ValueRef&, const int pos);
148 inline void srcId(const ValueRef *, const int pos);
149 inline void srcId(const Instruction *, int s, const int pos);
150 inline void srcAddr32(const ValueRef&, int pos, int shr);
151
152 inline bool isLIMM(const ValueRef&, DataType ty);
153 };
154
155 // for better visibility
156 #define HEX64(h, l) 0x##h##l##ULL
157
158 #define SDATA(a) ((a).rep()->reg.data)
159 #define DDATA(a) ((a).rep()->reg.data)
160
161 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
162 {
163 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
164 }
165
166 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
167 {
168 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
169 }
170
171 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
172 {
173 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
174 code[pos / 32] |= r << (pos % 32);
175 }
176
177 void
178 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
179 {
180 const uint32_t offset = SDATA(src).offset >> shr;
181
182 code[pos / 32] |= offset << (pos % 32);
183 if (pos && (pos < 32))
184 code[1] |= offset >> (32 - pos);
185 }
186
187 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
188 {
189 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
190 }
191
192 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
193 {
194 int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
195 code[pos / 32] |= r << (pos % 32);
196 }
197
198 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
199 {
200 const ImmediateValue *imm = ref.get()->asImm();
201
202 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
203 }
204
205 void
206 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
207 {
208 switch (insn->rnd) {
209 case ROUND_M: code[1] |= 1 << 23; break;
210 case ROUND_P: code[1] |= 2 << 23; break;
211 case ROUND_Z: code[1] |= 3 << 23; break;
212 default:
213 assert(insn->rnd == ROUND_N);
214 break;
215 }
216 }
217
218 void
219 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
220 {
221 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
222 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
223 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
224 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
225 }
226
227 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
228 {
229 uint8_t val;
230
231 switch (cc) {
232 case CC_LT: val = 0x1; break;
233 case CC_LTU: val = 0x9; break;
234 case CC_EQ: val = 0x2; break;
235 case CC_EQU: val = 0xa; break;
236 case CC_LE: val = 0x3; break;
237 case CC_LEU: val = 0xb; break;
238 case CC_GT: val = 0x4; break;
239 case CC_GTU: val = 0xc; break;
240 case CC_NE: val = 0x5; break;
241 case CC_NEU: val = 0xd; break;
242 case CC_GE: val = 0x6; break;
243 case CC_GEU: val = 0xe; break;
244 case CC_TR: val = 0xf; break;
245 case CC_FL: val = 0x0; break;
246
247 case CC_A: val = 0x14; break;
248 case CC_NA: val = 0x13; break;
249 case CC_S: val = 0x15; break;
250 case CC_NS: val = 0x12; break;
251 case CC_C: val = 0x16; break;
252 case CC_NC: val = 0x11; break;
253 case CC_O: val = 0x17; break;
254 case CC_NO: val = 0x10; break;
255
256 default:
257 val = 0;
258 assert(!"invalid condition code");
259 break;
260 }
261 code[pos / 32] |= val << (pos % 32);
262 }
263
264 void
265 CodeEmitterNVC0::emitPredicate(const Instruction *i)
266 {
267 if (i->predSrc >= 0) {
268 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
269 srcId(i->src(i->predSrc), 10);
270 if (i->cc == CC_NOT_P)
271 code[0] |= 0x2000; // negate
272 } else {
273 code[0] |= 0x1c00;
274 }
275 }
276
277 void
278 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
279 {
280 switch (src.getFile()) {
281 case FILE_MEMORY_GLOBAL:
282 srcAddr32(src, 26, 0);
283 break;
284 case FILE_MEMORY_LOCAL:
285 case FILE_MEMORY_SHARED:
286 setAddress24(src);
287 break;
288 default:
289 assert(src.getFile() == FILE_MEMORY_CONST);
290 setAddress16(src);
291 break;
292 }
293 }
294
295 void
296 CodeEmitterNVC0::setAddress16(const ValueRef& src)
297 {
298 Symbol *sym = src.get()->asSym();
299
300 assert(sym);
301
302 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
303 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
304 }
305
306 void
307 CodeEmitterNVC0::setAddress24(const ValueRef& src)
308 {
309 Symbol *sym = src.get()->asSym();
310
311 assert(sym);
312
313 code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
314 code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
315 }
316
317 void
318 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
319 {
320 const ImmediateValue *imm = i->src(s).get()->asImm();
321 uint32_t u32;
322
323 assert(imm);
324 u32 = imm->reg.data.u32;
325
326 if ((code[0] & 0xf) == 0x2) {
327 // LIMM
328 code[0] |= (u32 & 0x3f) << 26;
329 code[1] |= u32 >> 6;
330 } else
331 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
332 // integer immediate
333 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
334 assert(!(code[1] & 0xc000));
335 u32 &= 0xfffff;
336 code[0] |= (u32 & 0x3f) << 26;
337 code[1] |= 0xc000 | (u32 >> 6);
338 } else {
339 // float immediate
340 assert(!(u32 & 0x00000fff));
341 assert(!(code[1] & 0xc000));
342 code[0] |= ((u32 >> 12) & 0x3f) << 26;
343 code[1] |= 0xc000 | (u32 >> 18);
344 }
345 }
346
347 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
348 {
349 const ImmediateValue *imm = ref.get()->asImm();
350
351 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
352
353 assert(s8 == imm->reg.data.s32);
354
355 code[0] |= (s8 & 0x3f) << 26;
356 code[0] |= (s8 >> 6) << 8;
357 }
358
359 void
360 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
361 {
362 code[0] = opc;
363 code[1] = opc >> 32;
364
365 emitPredicate(i);
366
367 defId(i->def(0), 14);
368
369 int s1 = 26;
370 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
371 s1 = 49;
372
373 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
374 switch (i->getSrc(s)->reg.file) {
375 case FILE_MEMORY_CONST:
376 assert(!(code[1] & 0xc000));
377 code[1] |= (s == 2) ? 0x8000 : 0x4000;
378 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
379 setAddress16(i->src(s));
380 break;
381 case FILE_IMMEDIATE:
382 assert(s == 1 ||
383 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
384 assert(!(code[1] & 0xc000));
385 setImmediate(i, s);
386 break;
387 case FILE_GPR:
388 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
389 break;
390 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
391 break;
392 default:
393 // ignore here, can be predicate or flags, but must not be address
394 break;
395 }
396 }
397 }
398
399 void
400 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
401 {
402 code[0] = opc;
403 code[1] = opc >> 32;
404
405 emitPredicate(i);
406
407 defId(i->def(0), 14);
408
409 switch (i->src(0).getFile()) {
410 case FILE_MEMORY_CONST:
411 assert(!(code[1] & 0xc000));
412 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
413 setAddress16(i->src(0));
414 break;
415 case FILE_IMMEDIATE:
416 assert(!(code[1] & 0xc000));
417 setImmediate(i, 0);
418 break;
419 case FILE_GPR:
420 srcId(i->src(0), 26);
421 break;
422 default:
423 // ignore here, can be predicate or flags, but must not be address
424 break;
425 }
426 }
427
428 void
429 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
430 {
431 code[0] = opc;
432
433 int ss2a = 0;
434 if (opc == 0x0d || opc == 0x0e)
435 ss2a = 2;
436
437 defId(i->def(0), 14);
438 srcId(i->src(0), 20);
439
440 assert(pred || (i->predSrc < 0));
441 if (pred)
442 emitPredicate(i);
443
444 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
445 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
446 assert(!(code[0] & (0x300 >> ss2a)));
447 switch (i->src(s).get()->reg.fileIndex) {
448 case 0: code[0] |= 0x100 >> ss2a; break;
449 case 1: code[0] |= 0x200 >> ss2a; break;
450 case 16: code[0] |= 0x300 >> ss2a; break;
451 default:
452 ERROR("invalid c[] space for short form\n");
453 break;
454 }
455 if (s == 1)
456 code[0] |= i->getSrc(s)->reg.data.offset << 24;
457 else
458 code[0] |= i->getSrc(s)->reg.data.offset << 6;
459 } else
460 if (i->src(s).getFile() == FILE_IMMEDIATE) {
461 assert(s == 1);
462 setImmediateS8(i->src(s));
463 } else
464 if (i->src(s).getFile() == FILE_GPR) {
465 srcId(i->src(s), (s == 1) ? 26 : 8);
466 }
467 }
468 }
469
470 void
471 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
472 {
473 if (src.getFile() == FILE_MEMORY_CONST) {
474 switch (src.get()->reg.fileIndex) {
475 case 0: code[0] |= 0x100; break;
476 case 1: code[0] |= 0x200; break;
477 case 16: code[0] |= 0x300; break;
478 default:
479 assert(!"unsupported file index for short op");
480 break;
481 }
482 srcAddr32(src, 20, 2);
483 } else {
484 srcId(src, 20);
485 assert(src.getFile() == FILE_GPR);
486 }
487 }
488
489 void
490 CodeEmitterNVC0::emitNOP(const Instruction *i)
491 {
492 code[0] = 0x000001e4;
493 code[1] = 0x40000000;
494 emitPredicate(i);
495 }
496
497 void
498 CodeEmitterNVC0::emitFMAD(const Instruction *i)
499 {
500 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
501
502 if (i->encSize == 8) {
503 if (isLIMM(i->src(1), TYPE_F32)) {
504 emitForm_A(i, HEX64(20000000, 00000002));
505 } else {
506 emitForm_A(i, HEX64(30000000, 00000000));
507
508 if (i->src(2).mod.neg())
509 code[0] |= 1 << 8;
510 }
511 roundMode_A(i);
512
513 if (neg1)
514 code[0] |= 1 << 9;
515
516 if (i->saturate)
517 code[0] |= 1 << 5;
518 if (i->ftz)
519 code[0] |= 1 << 6;
520 } else {
521 assert(!i->saturate && !i->src(2).mod.neg());
522 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
523 false);
524 if (neg1)
525 code[0] |= 1 << 4;
526 }
527 }
528
529 void
530 CodeEmitterNVC0::emitDMAD(const Instruction *i)
531 {
532 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
533
534 emitForm_A(i, HEX64(20000000, 00000001));
535
536 if (i->src(2).mod.neg())
537 code[0] |= 1 << 8;
538
539 roundMode_A(i);
540
541 if (neg1)
542 code[0] |= 1 << 9;
543
544 assert(!i->saturate);
545 assert(!i->ftz);
546 }
547
548 void
549 CodeEmitterNVC0::emitFMUL(const Instruction *i)
550 {
551 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
552
553 assert(i->postFactor >= -3 && i->postFactor <= 3);
554
555 if (i->encSize == 8) {
556 if (isLIMM(i->src(1), TYPE_F32)) {
557 assert(i->postFactor == 0); // constant folded, hopefully
558 emitForm_A(i, HEX64(30000000, 00000002));
559 } else {
560 emitForm_A(i, HEX64(58000000, 00000000));
561 roundMode_A(i);
562 code[1] |= ((i->postFactor > 0) ?
563 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
564 }
565 if (neg)
566 code[1] ^= 1 << 25; // aliases with LIMM sign bit
567
568 if (i->saturate)
569 code[0] |= 1 << 5;
570
571 if (i->dnz)
572 code[0] |= 1 << 7;
573 else
574 if (i->ftz)
575 code[0] |= 1 << 6;
576 } else {
577 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
578 emitForm_S(i, 0xa8, true);
579 }
580 }
581
582 void
583 CodeEmitterNVC0::emitDMUL(const Instruction *i)
584 {
585 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
586
587 emitForm_A(i, HEX64(50000000, 00000001));
588 roundMode_A(i);
589
590 if (neg)
591 code[0] |= 1 << 9;
592
593 assert(!i->saturate);
594 assert(!i->ftz);
595 assert(!i->dnz);
596 assert(!i->postFactor);
597 }
598
599 void
600 CodeEmitterNVC0::emitUMUL(const Instruction *i)
601 {
602 if (i->encSize == 8) {
603 if (i->src(1).getFile() == FILE_IMMEDIATE) {
604 emitForm_A(i, HEX64(10000000, 00000002));
605 } else {
606 emitForm_A(i, HEX64(50000000, 00000003));
607 }
608 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
609 code[0] |= 1 << 6;
610 if (i->sType == TYPE_S32)
611 code[0] |= 1 << 5;
612 if (i->dType == TYPE_S32)
613 code[0] |= 1 << 7;
614 } else {
615 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
616
617 if (i->sType == TYPE_S32)
618 code[0] |= 1 << 6;
619 }
620 }
621
622 void
623 CodeEmitterNVC0::emitFADD(const Instruction *i)
624 {
625 if (i->encSize == 8) {
626 if (isLIMM(i->src(1), TYPE_F32)) {
627 assert(!i->saturate);
628 emitForm_A(i, HEX64(28000000, 00000002));
629
630 code[0] |= i->src(0).mod.abs() << 7;
631 code[0] |= i->src(0).mod.neg() << 9;
632
633 if (i->src(1).mod.abs())
634 code[1] &= 0xfdffffff;
635 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
636 code[1] ^= 0x02000000;
637 } else {
638 emitForm_A(i, HEX64(50000000, 00000000));
639
640 roundMode_A(i);
641 if (i->saturate)
642 code[1] |= 1 << 17;
643
644 emitNegAbs12(i);
645 if (i->op == OP_SUB) code[0] ^= 1 << 8;
646 }
647 if (i->ftz)
648 code[0] |= 1 << 5;
649 } else {
650 assert(!i->saturate && i->op != OP_SUB &&
651 !i->src(0).mod.abs() &&
652 !i->src(1).mod.neg() && !i->src(1).mod.abs());
653
654 emitForm_S(i, 0x49, true);
655
656 if (i->src(0).mod.neg())
657 code[0] |= 1 << 7;
658 }
659 }
660
661 void
662 CodeEmitterNVC0::emitDADD(const Instruction *i)
663 {
664 assert(i->encSize == 8);
665 emitForm_A(i, HEX64(48000000, 00000001));
666 roundMode_A(i);
667 assert(!i->saturate);
668 assert(!i->ftz);
669 emitNegAbs12(i);
670 if (i->op == OP_SUB)
671 code[0] ^= 1 << 8;
672 }
673
674 void
675 CodeEmitterNVC0::emitUADD(const Instruction *i)
676 {
677 uint32_t addOp = 0;
678
679 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
680 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
681
682 if (i->src(0).mod.neg())
683 addOp |= 0x200;
684 if (i->src(1).mod.neg())
685 addOp |= 0x100;
686 if (i->op == OP_SUB) {
687 addOp ^= 0x100;
688 assert(addOp != 0x300); // would be add-plus-one
689 }
690
691 if (i->encSize == 8) {
692 if (isLIMM(i->src(1), TYPE_U32)) {
693 emitForm_A(i, HEX64(08000000, 00000002));
694 if (i->defExists(1))
695 code[1] |= 1 << 26; // write carry
696 } else {
697 emitForm_A(i, HEX64(48000000, 00000003));
698 if (i->defExists(1))
699 code[1] |= 1 << 16; // write carry
700 }
701 code[0] |= addOp;
702
703 if (i->saturate)
704 code[0] |= 1 << 5;
705 if (i->flagsSrc >= 0) // add carry
706 code[0] |= 1 << 6;
707 } else {
708 assert(!(addOp & 0x100));
709 emitForm_S(i, (addOp >> 3) |
710 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
711 }
712 }
713
714 // TODO: shl-add
715 void
716 CodeEmitterNVC0::emitIMAD(const Instruction *i)
717 {
718 assert(i->encSize == 8);
719 emitForm_A(i, HEX64(20000000, 00000003));
720
721 if (isSignedType(i->dType))
722 code[0] |= 1 << 7;
723 if (isSignedType(i->sType))
724 code[0] |= 1 << 5;
725
726 code[1] |= i->saturate << 24;
727
728 if (i->flagsDef >= 0) code[1] |= 1 << 16;
729 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
730
731 if (i->src(2).mod.neg()) code[0] |= 0x10;
732 if (i->src(1).mod.neg() ^
733 i->src(0).mod.neg()) code[0] |= 0x20;
734
735 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
736 code[0] |= 1 << 6;
737 }
738
739 void
740 CodeEmitterNVC0::emitMADSP(const Instruction *i)
741 {
742 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
743
744 emitForm_A(i, HEX64(00000000, 00000003));
745
746 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
747 code[1] |= 0x01800000;
748 } else {
749 code[0] |= (i->subOp & 0x00f) << 7;
750 code[0] |= (i->subOp & 0x0f0) << 1;
751 code[0] |= (i->subOp & 0x100) >> 3;
752 code[0] |= (i->subOp & 0x200) >> 2;
753 code[1] |= (i->subOp & 0xc00) << 13;
754 }
755
756 if (i->flagsDef >= 0)
757 code[1] |= 1 << 16;
758 }
759
760 void
761 CodeEmitterNVC0::emitISAD(const Instruction *i)
762 {
763 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
764 assert(i->encSize == 8);
765
766 emitForm_A(i, HEX64(38000000, 00000003));
767
768 if (i->dType == TYPE_S32)
769 code[0] |= 1 << 5;
770 }
771
772 void
773 CodeEmitterNVC0::emitNOT(Instruction *i)
774 {
775 assert(i->encSize == 8);
776 i->setSrc(1, i->src(0));
777 emitForm_A(i, HEX64(68000000, 000001c3));
778 }
779
780 void
781 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
782 {
783 if (i->def(0).getFile() == FILE_PREDICATE) {
784 code[0] = 0x00000004 | (subOp << 30);
785 code[1] = 0x0c000000;
786
787 emitPredicate(i);
788
789 defId(i->def(0), 17);
790 srcId(i->src(0), 20);
791 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
792 srcId(i->src(1), 26);
793 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
794
795 if (i->defExists(1)) {
796 defId(i->def(1), 14);
797 } else {
798 code[0] |= 7 << 14;
799 }
800 // (a OP b) OP c
801 if (i->predSrc != 2 && i->srcExists(2)) {
802 code[1] |= subOp << 21;
803 srcId(i->src(2), 17);
804 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
805 } else {
806 code[1] |= 0x000e0000;
807 }
808 } else
809 if (i->encSize == 8) {
810 if (isLIMM(i->src(1), TYPE_U32)) {
811 emitForm_A(i, HEX64(38000000, 00000002));
812
813 if (i->flagsDef >= 0)
814 code[1] |= 1 << 26;
815 } else {
816 emitForm_A(i, HEX64(68000000, 00000003));
817
818 if (i->flagsDef >= 0)
819 code[1] |= 1 << 16;
820 }
821 code[0] |= subOp << 6;
822
823 if (i->flagsSrc >= 0) // carry
824 code[0] |= 1 << 5;
825
826 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
827 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
828 } else {
829 emitForm_S(i, (subOp << 5) |
830 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
831 }
832 }
833
834 void
835 CodeEmitterNVC0::emitPOPC(const Instruction *i)
836 {
837 emitForm_A(i, HEX64(54000000, 00000004));
838
839 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
840 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
841 }
842
843 void
844 CodeEmitterNVC0::emitINSBF(const Instruction *i)
845 {
846 emitForm_A(i, HEX64(28000000, 00000003));
847 }
848
849 void
850 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
851 {
852 emitForm_A(i, HEX64(70000000, 00000003));
853
854 if (i->dType == TYPE_S32)
855 code[0] |= 1 << 5;
856 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
857 code[0] |= 1 << 8;
858 }
859
860 void
861 CodeEmitterNVC0::emitBFIND(const Instruction *i)
862 {
863 emitForm_B(i, HEX64(78000000, 00000003));
864
865 if (i->dType == TYPE_S32)
866 code[0] |= 1 << 5;
867 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
868 code[0] |= 1 << 8;
869 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
870 code[0] |= 1 << 6;
871 }
872
873 void
874 CodeEmitterNVC0::emitPERMT(const Instruction *i)
875 {
876 emitForm_A(i, HEX64(24000000, 00000004));
877
878 code[0] |= i->subOp << 5;
879 }
880
881 void
882 CodeEmitterNVC0::emitShift(const Instruction *i)
883 {
884 if (i->op == OP_SHR) {
885 emitForm_A(i, HEX64(58000000, 00000003)
886 | (isSignedType(i->dType) ? 0x20 : 0x00));
887 } else {
888 emitForm_A(i, HEX64(60000000, 00000003));
889 }
890
891 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
892 code[0] |= 1 << 9;
893 }
894
895 void
896 CodeEmitterNVC0::emitPreOp(const Instruction *i)
897 {
898 if (i->encSize == 8) {
899 emitForm_B(i, HEX64(60000000, 00000000));
900
901 if (i->op == OP_PREEX2)
902 code[0] |= 0x20;
903
904 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
905 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
906 } else {
907 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
908 }
909 }
910
911 void
912 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
913 {
914 if (i->encSize == 8) {
915 code[0] = 0x00000000 | (subOp << 26);
916 code[1] = 0xc8000000;
917
918 emitPredicate(i);
919
920 defId(i->def(0), 14);
921 srcId(i->src(0), 20);
922
923 assert(i->src(0).getFile() == FILE_GPR);
924
925 if (i->saturate) code[0] |= 1 << 5;
926
927 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
928 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
929 } else {
930 emitForm_S(i, 0x80000008 | (subOp << 26), true);
931
932 assert(!i->src(0).mod.neg());
933 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
934 }
935 }
936
937 void
938 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
939 {
940 uint64_t op;
941
942 assert(i->encSize == 8);
943
944 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
945
946 if (i->ftz)
947 op |= 1 << 5;
948 else
949 if (!isFloatType(i->dType))
950 op |= isSignedType(i->dType) ? 0x23 : 0x03;
951 if (i->dType == TYPE_F64)
952 op |= 0x01;
953
954 emitForm_A(i, op);
955 emitNegAbs12(i);
956 }
957
958 void
959 CodeEmitterNVC0::roundMode_C(const Instruction *i)
960 {
961 switch (i->rnd) {
962 case ROUND_M: code[1] |= 1 << 17; break;
963 case ROUND_P: code[1] |= 2 << 17; break;
964 case ROUND_Z: code[1] |= 3 << 17; break;
965 case ROUND_NI: code[0] |= 1 << 7; break;
966 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
967 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
968 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
969 case ROUND_N: break;
970 default:
971 assert(!"invalid round mode");
972 break;
973 }
974 }
975
976 void
977 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
978 {
979 switch (i->rnd) {
980 case ROUND_M:
981 case ROUND_MI: code[0] |= 1 << 16; break;
982 case ROUND_P:
983 case ROUND_PI: code[0] |= 2 << 16; break;
984 case ROUND_Z:
985 case ROUND_ZI: code[0] |= 3 << 16; break;
986 default:
987 break;
988 }
989 }
990
991 void
992 CodeEmitterNVC0::emitCVT(Instruction *i)
993 {
994 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
995 DataType dType;
996
997 switch (i->op) {
998 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
999 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
1000 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1001 default:
1002 break;
1003 }
1004
1005 const bool sat = (i->op == OP_SAT) || i->saturate;
1006 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1007 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1008
1009 if (i->op == OP_NEG && i->dType == TYPE_U32)
1010 dType = TYPE_S32;
1011 else
1012 dType = i->dType;
1013
1014 if (i->encSize == 8) {
1015 emitForm_B(i, HEX64(10000000, 00000004));
1016
1017 roundMode_C(i);
1018
1019 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1020 code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1021 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1022
1023 // for 8/16 source types, the byte/word is in subOp. word 1 is
1024 // represented as 2.
1025 code[1] |= i->subOp << 0x17;
1026
1027 if (sat)
1028 code[0] |= 0x20;
1029 if (abs)
1030 code[0] |= 1 << 6;
1031 if (neg && i->op != OP_ABS)
1032 code[0] |= 1 << 8;
1033
1034 if (i->ftz)
1035 code[1] |= 1 << 23;
1036
1037 if (isSignedIntType(dType))
1038 code[0] |= 0x080;
1039 if (isSignedIntType(i->sType))
1040 code[0] |= 0x200;
1041
1042 if (isFloatType(dType)) {
1043 if (!isFloatType(i->sType))
1044 code[1] |= 0x08000000;
1045 } else {
1046 if (isFloatType(i->sType))
1047 code[1] |= 0x04000000;
1048 else
1049 code[1] |= 0x0c000000;
1050 }
1051 } else {
1052 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1053 code[0] = 0x298;
1054 } else
1055 if (isFloatType(dType)) {
1056 if (isFloatType(i->sType))
1057 code[0] = 0x098;
1058 else
1059 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1060 } else {
1061 assert(isFloatType(i->sType));
1062
1063 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1064 }
1065
1066 if (neg) code[0] |= 1 << 16;
1067 if (sat) code[0] |= 1 << 18;
1068 if (abs) code[0] |= 1 << 19;
1069
1070 roundMode_CS(i);
1071 }
1072 }
1073
1074 void
1075 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1076 {
1077 uint32_t hi;
1078 uint32_t lo = 0;
1079
1080 if (i->sType == TYPE_F64)
1081 lo = 0x1;
1082 else
1083 if (!isFloatType(i->sType))
1084 lo = 0x3;
1085
1086 if (isSignedIntType(i->sType))
1087 lo |= 0x20;
1088 if (isFloatType(i->dType)) {
1089 if (isFloatType(i->sType))
1090 lo |= 0x20;
1091 else
1092 lo |= 0x80;
1093 }
1094
1095 switch (i->op) {
1096 case OP_SET_AND: hi = 0x10000000; break;
1097 case OP_SET_OR: hi = 0x10200000; break;
1098 case OP_SET_XOR: hi = 0x10400000; break;
1099 default:
1100 hi = 0x100e0000;
1101 break;
1102 }
1103 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1104
1105 if (i->op != OP_SET)
1106 srcId(i->src(2), 32 + 17);
1107
1108 if (i->def(0).getFile() == FILE_PREDICATE) {
1109 if (i->sType == TYPE_F32)
1110 code[1] += 0x10000000;
1111 else
1112 code[1] += 0x08000000;
1113
1114 code[0] &= ~0xfc000;
1115 defId(i->def(0), 17);
1116 if (i->defExists(1))
1117 defId(i->def(1), 14);
1118 else
1119 code[0] |= 0x1c000;
1120 }
1121
1122 if (i->ftz)
1123 code[1] |= 1 << 27;
1124
1125 emitCondCode(i->setCond, 32 + 23);
1126 emitNegAbs12(i);
1127 }
1128
1129 void
1130 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1131 {
1132 uint64_t op;
1133
1134 switch (i->dType) {
1135 case TYPE_S32:
1136 op = HEX64(30000000, 00000023);
1137 break;
1138 case TYPE_U32:
1139 op = HEX64(30000000, 00000003);
1140 break;
1141 case TYPE_F32:
1142 op = HEX64(38000000, 00000000);
1143 break;
1144 default:
1145 assert(!"invalid type for SLCT");
1146 op = 0;
1147 break;
1148 }
1149 emitForm_A(i, op);
1150
1151 CondCode cc = i->setCond;
1152
1153 if (i->src(2).mod.neg())
1154 cc = reverseCondCode(cc);
1155
1156 emitCondCode(cc, 32 + 23);
1157
1158 if (i->ftz)
1159 code[0] |= 1 << 5;
1160 }
1161
1162 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1163 {
1164 emitForm_A(i, HEX64(20000000, 00000004));
1165
1166 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1167 code[1] |= 1 << 20;
1168 }
1169
1170 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1171 {
1172 code[0] = 0x00000006 | (i->subOp << 26);
1173 code[1] = 0xf0000000;
1174 emitPredicate(i);
1175 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1176 }
1177
1178 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1179 {
1180 code[0] = 0x00000086;
1181 code[1] = 0xd0000000;
1182
1183 code[1] |= i->tex.r;
1184 code[1] |= i->tex.s << 8;
1185
1186 if (i->tex.liveOnly)
1187 code[0] |= 1 << 9;
1188
1189 defId(i->def(0), 14);
1190 srcId(i->src(0), 20);
1191 }
1192
1193 static inline bool
1194 isNextIndependentTex(const TexInstruction *i)
1195 {
1196 if (!i->next || !isTextureOp(i->next->op))
1197 return false;
1198 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1199 return false;
1200 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1201 }
1202
1203 void
1204 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1205 {
1206 code[0] = 0x00000006;
1207
1208 if (isNextIndependentTex(i))
1209 code[0] |= 0x080; // t mode
1210 else
1211 code[0] |= 0x100; // p mode
1212
1213 if (i->tex.liveOnly)
1214 code[0] |= 1 << 9;
1215
1216 switch (i->op) {
1217 case OP_TEX: code[1] = 0x80000000; break;
1218 case OP_TXB: code[1] = 0x84000000; break;
1219 case OP_TXL: code[1] = 0x86000000; break;
1220 case OP_TXF: code[1] = 0x90000000; break;
1221 case OP_TXG: code[1] = 0xa0000000; break;
1222 case OP_TXLQ: code[1] = 0xb0000000; break;
1223 case OP_TXD: code[1] = 0xe0000000; break;
1224 default:
1225 assert(!"invalid texture op");
1226 break;
1227 }
1228 if (i->op == OP_TXF) {
1229 if (!i->tex.levelZero)
1230 code[1] |= 0x02000000;
1231 } else
1232 if (i->tex.levelZero) {
1233 code[1] |= 0x02000000;
1234 }
1235
1236 if (i->op != OP_TXD && i->tex.derivAll)
1237 code[1] |= 1 << 13;
1238
1239 defId(i->def(0), 14);
1240 srcId(i->src(0), 20);
1241
1242 emitPredicate(i);
1243
1244 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1245
1246 code[1] |= i->tex.mask << 14;
1247
1248 code[1] |= i->tex.r;
1249 code[1] |= i->tex.s << 8;
1250 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1251 code[1] |= 1 << 18; // in 1st source (with array index)
1252
1253 // texture target:
1254 code[1] |= (i->tex.target.getDim() - 1) << 20;
1255 if (i->tex.target.isCube())
1256 code[1] += 2 << 20;
1257 if (i->tex.target.isArray())
1258 code[1] |= 1 << 19;
1259 if (i->tex.target.isShadow())
1260 code[1] |= 1 << 24;
1261
1262 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1263
1264 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1265 // lzero
1266 if (i->op == OP_TXL)
1267 code[1] &= ~(1 << 26);
1268 else
1269 if (i->op == OP_TXF)
1270 code[1] &= ~(1 << 25);
1271 }
1272 if (i->tex.target == TEX_TARGET_2D_MS ||
1273 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1274 code[1] |= 1 << 23;
1275
1276 if (i->tex.useOffsets == 1)
1277 code[1] |= 1 << 22;
1278 if (i->tex.useOffsets == 4)
1279 code[1] |= 1 << 23;
1280
1281 srcId(i, src1, 26);
1282 }
1283
1284 void
1285 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1286 {
1287 code[0] = 0x00000086;
1288 code[1] = 0xc0000000;
1289
1290 switch (i->tex.query) {
1291 case TXQ_DIMS: code[1] |= 0 << 22; break;
1292 case TXQ_TYPE: code[1] |= 1 << 22; break;
1293 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1294 case TXQ_FILTER: code[1] |= 3 << 22; break;
1295 case TXQ_LOD: code[1] |= 4 << 22; break;
1296 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1297 default:
1298 assert(!"invalid texture query");
1299 break;
1300 }
1301
1302 code[1] |= i->tex.mask << 14;
1303
1304 code[1] |= i->tex.r;
1305 code[1] |= i->tex.s << 8;
1306 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1307 code[1] |= 1 << 18;
1308
1309 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1310
1311 defId(i->def(0), 14);
1312 srcId(i->src(0), 20);
1313 srcId(i, src1, 26);
1314
1315 emitPredicate(i);
1316 }
1317
1318 void
1319 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1320 {
1321 code[0] = 0x00000000 | (laneMask << 6);
1322 code[1] = 0x48000000 | qOp;
1323
1324 defId(i->def(0), 14);
1325 srcId(i->src(0), 20);
1326 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1327
1328 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1329 code[0] |= 1 << 9; // dall
1330
1331 emitPredicate(i);
1332 }
1333
1334 void
1335 CodeEmitterNVC0::emitFlow(const Instruction *i)
1336 {
1337 const FlowInstruction *f = i->asFlow();
1338
1339 unsigned mask; // bit 0: predicate, bit 1: target
1340
1341 code[0] = 0x00000007;
1342
1343 switch (i->op) {
1344 case OP_BRA:
1345 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1346 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1347 code[0] |= 0x4000;
1348 mask = 3;
1349 break;
1350 case OP_CALL:
1351 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1352 if (f->indirect)
1353 code[0] |= 0x4000; // indirect calls always use c[] source
1354 mask = 2;
1355 break;
1356
1357 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1358 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1359 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1360 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1361 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1362
1363 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1364 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1365 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1366 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1367
1368 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1369 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1370 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1371 default:
1372 assert(!"invalid flow operation");
1373 return;
1374 }
1375
1376 if (mask & 1) {
1377 emitPredicate(i);
1378 if (i->flagsSrc < 0)
1379 code[0] |= 0x1e0;
1380 }
1381
1382 if (!f)
1383 return;
1384
1385 if (f->allWarp)
1386 code[0] |= 1 << 15;
1387 if (f->limit)
1388 code[0] |= 1 << 16;
1389
1390 if (f->indirect) {
1391 if (code[0] & 0x4000) {
1392 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1393 setAddress16(i->src(0));
1394 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1395 if (f->op == OP_BRA)
1396 srcId(f->src(0).getIndirect(0), 20);
1397 } else {
1398 srcId(f, 0, 20);
1399 }
1400 }
1401
1402 if (f->op == OP_CALL) {
1403 if (f->indirect) {
1404 // nothing
1405 } else
1406 if (f->builtin) {
1407 assert(f->absolute);
1408 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1409 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1410 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1411 } else {
1412 assert(!f->absolute);
1413 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1414 code[0] |= (pcRel & 0x3f) << 26;
1415 code[1] |= (pcRel >> 6) & 0x3ffff;
1416 }
1417 } else
1418 if (mask & 2) {
1419 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1420 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1421 pcRel += 8;
1422 // currently we don't want absolute branches
1423 assert(!f->absolute);
1424 code[0] |= (pcRel & 0x3f) << 26;
1425 code[1] |= (pcRel >> 6) & 0x3ffff;
1426 }
1427 }
1428
1429 void
1430 CodeEmitterNVC0::emitBAR(const Instruction *i)
1431 {
1432 Value *rDef = NULL, *pDef = NULL;
1433
1434 switch (i->subOp) {
1435 case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
1436 case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
1437 case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
1438 case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1439 default:
1440 code[0] = 0x04;
1441 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1442 break;
1443 }
1444 code[1] = 0x50000000;
1445
1446 code[0] |= 63 << 14;
1447 code[1] |= 7 << 21;
1448
1449 emitPredicate(i);
1450
1451 // barrier id
1452 if (i->src(0).getFile() == FILE_GPR) {
1453 srcId(i->src(0), 20);
1454 } else {
1455 ImmediateValue *imm = i->getSrc(0)->asImm();
1456 assert(imm);
1457 code[0] |= imm->reg.data.u32 << 20;
1458 code[1] |= 0x8000;
1459 }
1460
1461 // thread count
1462 if (i->src(1).getFile() == FILE_GPR) {
1463 srcId(i->src(1), 26);
1464 } else {
1465 ImmediateValue *imm = i->getSrc(1)->asImm();
1466 assert(imm);
1467 code[0] |= imm->reg.data.u32 << 26;
1468 code[1] |= imm->reg.data.u32 >> 6;
1469 code[1] |= 0x4000;
1470 }
1471
1472 if (i->srcExists(2) && (i->predSrc != 2)) {
1473 srcId(i->src(2), 32 + 17);
1474 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1475 code[1] |= 1 << 20;
1476 } else {
1477 code[1] |= 7 << 17;
1478 }
1479
1480 if (i->defExists(0)) {
1481 if (i->def(0).getFile() == FILE_GPR)
1482 rDef = i->getDef(0);
1483 else
1484 pDef = i->getDef(0);
1485
1486 if (i->defExists(1)) {
1487 if (i->def(1).getFile() == FILE_GPR)
1488 rDef = i->getDef(1);
1489 else
1490 pDef = i->getDef(1);
1491 }
1492 }
1493 if (rDef) {
1494 code[0] &= ~(63 << 14);
1495 defId(rDef, 14);
1496 }
1497 if (pDef) {
1498 code[1] &= ~(7 << 21);
1499 defId(pDef, 32 + 21);
1500 }
1501 }
1502
1503 void
1504 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
1505 {
1506 code[0] = 0x00000006;
1507 code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
1508
1509 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1510 code[0] |= 0x200;
1511
1512 emitPredicate(i);
1513
1514 defId(i->def(0), 14);
1515 srcId(i->src(0).getIndirect(0), 20);
1516 }
1517
1518 void
1519 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1520 {
1521 uint32_t prim = i->src(0).get()->reg.data.u32;
1522
1523 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1524 code[1] = 0x00000000 | (prim >> 6);
1525
1526 emitPredicate(i);
1527
1528 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1529
1530 defId(i->def(0), 14);
1531 srcId(i, src1, 20);
1532 }
1533
1534 void
1535 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1536 {
1537 code[0] = 0x00000006;
1538 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1539
1540 if (i->perPatch)
1541 code[0] |= 0x100;
1542 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1543 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1544
1545 emitPredicate(i);
1546
1547 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1548
1549 defId(i->def(0), 14);
1550 srcId(i->src(0).getIndirect(0), 20);
1551 srcId(i->src(0).getIndirect(1), 26); // vertex address
1552 }
1553
1554 void
1555 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1556 {
1557 unsigned int size = typeSizeof(i->dType);
1558
1559 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1560 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1561
1562 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1563
1564 if (i->perPatch)
1565 code[0] |= 0x100;
1566
1567 emitPredicate(i);
1568
1569 assert(i->src(1).getFile() == FILE_GPR);
1570
1571 srcId(i->src(0).getIndirect(0), 20);
1572 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1573 srcId(i->src(1), 26);
1574 }
1575
1576 void
1577 CodeEmitterNVC0::emitOUT(const Instruction *i)
1578 {
1579 code[0] = 0x00000006;
1580 code[1] = 0x1c000000;
1581
1582 emitPredicate(i);
1583
1584 defId(i->def(0), 14); // new secret address
1585 srcId(i->src(0), 20); // old secret address, should be 0 initially
1586
1587 assert(i->src(0).getFile() == FILE_GPR);
1588
1589 if (i->op == OP_EMIT)
1590 code[0] |= 1 << 5;
1591 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1592 code[0] |= 1 << 6;
1593
1594 // vertex stream
1595 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1596 unsigned int stream = SDATA(i->src(1)).u32;
1597 assert(stream < 4);
1598 if (stream) {
1599 code[1] |= 0xc000;
1600 code[0] |= stream << 26;
1601 } else {
1602 srcId(NULL, 26);
1603 }
1604 } else {
1605 srcId(i->src(1), 26);
1606 }
1607 }
1608
1609 void
1610 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1611 {
1612 if (i->encSize == 8) {
1613 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1614 } else {
1615 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1616 code[0] |= 0x80;
1617 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1618 }
1619 }
1620
1621 void
1622 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1623 {
1624 const uint32_t base = i->getSrc(0)->reg.data.offset;
1625
1626 if (i->encSize == 8) {
1627 code[0] = 0x00000000;
1628 code[1] = 0xc0000000 | (base & 0xffff);
1629
1630 if (i->saturate)
1631 code[0] |= 1 << 5;
1632
1633 if (i->op == OP_PINTERP)
1634 srcId(i->src(1), 26);
1635 else
1636 code[0] |= 0x3f << 26;
1637
1638 srcId(i->src(0).getIndirect(0), 20);
1639 } else {
1640 assert(i->op == OP_PINTERP);
1641 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1642 srcId(i->src(1), 20);
1643 }
1644 emitInterpMode(i);
1645
1646 emitPredicate(i);
1647 defId(i->def(0), 14);
1648
1649 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1650 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1651 else
1652 code[1] |= 0x3f << 17;
1653 }
1654
1655 void
1656 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1657 {
1658 uint8_t val;
1659
1660 switch (ty) {
1661 case TYPE_U8:
1662 val = 0x00;
1663 break;
1664 case TYPE_S8:
1665 val = 0x20;
1666 break;
1667 case TYPE_F16:
1668 case TYPE_U16:
1669 val = 0x40;
1670 break;
1671 case TYPE_S16:
1672 val = 0x60;
1673 break;
1674 case TYPE_F32:
1675 case TYPE_U32:
1676 case TYPE_S32:
1677 val = 0x80;
1678 break;
1679 case TYPE_F64:
1680 case TYPE_U64:
1681 case TYPE_S64:
1682 val = 0xa0;
1683 break;
1684 case TYPE_B128:
1685 val = 0xc0;
1686 break;
1687 default:
1688 val = 0x80;
1689 assert(!"invalid type");
1690 break;
1691 }
1692 code[0] |= val;
1693 }
1694
1695 void
1696 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1697 {
1698 uint32_t val;
1699
1700 switch (c) {
1701 case CACHE_CA:
1702 // case CACHE_WB:
1703 val = 0x000;
1704 break;
1705 case CACHE_CG:
1706 val = 0x100;
1707 break;
1708 case CACHE_CS:
1709 val = 0x200;
1710 break;
1711 case CACHE_CV:
1712 // case CACHE_WT:
1713 val = 0x300;
1714 break;
1715 default:
1716 val = 0;
1717 assert(!"invalid caching mode");
1718 break;
1719 }
1720 code[0] |= val;
1721 }
1722
1723 static inline bool
1724 uses64bitAddress(const Instruction *ldst)
1725 {
1726 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1727 ldst->src(0).isIndirect(0) &&
1728 ldst->getIndirect(0, 0)->reg.size == 8;
1729 }
1730
1731 void
1732 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1733 {
1734 uint32_t opc;
1735
1736 switch (i->src(0).getFile()) {
1737 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1738 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1739 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1740 default:
1741 assert(!"invalid memory file");
1742 opc = 0;
1743 break;
1744 }
1745 code[0] = 0x00000005;
1746 code[1] = opc;
1747
1748 setAddressByFile(i->src(0));
1749 srcId(i->src(1), 14);
1750 srcId(i->src(0).getIndirect(0), 20);
1751 if (uses64bitAddress(i))
1752 code[1] |= 1 << 26;
1753
1754 emitPredicate(i);
1755
1756 emitLoadStoreType(i->dType);
1757 emitCachingMode(i->cache);
1758 }
1759
1760 void
1761 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1762 {
1763 uint32_t opc;
1764
1765 code[0] = 0x00000005;
1766
1767 switch (i->src(0).getFile()) {
1768 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1769 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1770 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1771 case FILE_MEMORY_CONST:
1772 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1773 emitMOV(i); // not sure if this is any better
1774 return;
1775 }
1776 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1777 code[0] = 0x00000006 | (i->subOp << 8);
1778 break;
1779 default:
1780 assert(!"invalid memory file");
1781 opc = 0;
1782 break;
1783 }
1784 code[1] = opc;
1785
1786 defId(i->def(0), 14);
1787
1788 setAddressByFile(i->src(0));
1789 srcId(i->src(0).getIndirect(0), 20);
1790 if (uses64bitAddress(i))
1791 code[1] |= 1 << 26;
1792
1793 emitPredicate(i);
1794
1795 emitLoadStoreType(i->dType);
1796 emitCachingMode(i->cache);
1797 }
1798
1799 uint8_t
1800 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1801 {
1802 switch (SDATA(ref).sv.sv) {
1803 case SV_LANEID: return 0x00;
1804 case SV_PHYSID: return 0x03;
1805 case SV_VERTEX_COUNT: return 0x10;
1806 case SV_INVOCATION_ID: return 0x11;
1807 case SV_YDIR: return 0x12;
1808 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1809 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1810 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1811 case SV_GRIDID: return 0x2c;
1812 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1813 case SV_LBASE: return 0x34;
1814 case SV_SBASE: return 0x30;
1815 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1816 default:
1817 assert(!"no sreg for system value");
1818 return 0;
1819 }
1820 }
1821
1822 void
1823 CodeEmitterNVC0::emitMOV(const Instruction *i)
1824 {
1825 if (i->def(0).getFile() == FILE_PREDICATE) {
1826 if (i->src(0).getFile() == FILE_GPR) {
1827 code[0] = 0xfc01c003;
1828 code[1] = 0x1a8e0000;
1829 srcId(i->src(0), 20);
1830 } else {
1831 code[0] = 0x0001c004;
1832 code[1] = 0x0c0e0000;
1833 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1834 code[0] |= 7 << 20;
1835 if (!i->getSrc(0)->reg.data.u32)
1836 code[0] |= 1 << 23;
1837 } else {
1838 srcId(i->src(0), 20);
1839 }
1840 }
1841 defId(i->def(0), 17);
1842 emitPredicate(i);
1843 } else
1844 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1845 uint8_t sr = getSRegEncoding(i->src(0));
1846
1847 if (i->encSize == 8) {
1848 code[0] = 0x00000004 | (sr << 26);
1849 code[1] = 0x2c000000;
1850 } else {
1851 code[0] = 0x40000008 | (sr << 20);
1852 }
1853 defId(i->def(0), 14);
1854
1855 emitPredicate(i);
1856 } else
1857 if (i->encSize == 8) {
1858 uint64_t opc;
1859
1860 if (i->src(0).getFile() == FILE_IMMEDIATE)
1861 opc = HEX64(18000000, 000001e2);
1862 else
1863 if (i->src(0).getFile() == FILE_PREDICATE)
1864 opc = HEX64(080e0000, 1c000004);
1865 else
1866 opc = HEX64(28000000, 00000004);
1867
1868 opc |= i->lanes << 5;
1869
1870 emitForm_B(i, opc);
1871 } else {
1872 uint32_t imm;
1873
1874 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1875 imm = SDATA(i->src(0)).u32;
1876 if (imm & 0xfff00000) {
1877 assert(!(imm & 0x000fffff));
1878 code[0] = 0x00000318 | imm;
1879 } else {
1880 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1881 code[0] = 0x00000118 | (imm << 20);
1882 }
1883 } else {
1884 code[0] = 0x0028;
1885 emitShortSrc2(i->src(0));
1886 }
1887 defId(i->def(0), 14);
1888
1889 emitPredicate(i);
1890 }
1891 }
1892
1893 void
1894 CodeEmitterNVC0::emitATOM(const Instruction *i)
1895 {
1896 const bool hasDst = i->defExists(0);
1897 const bool casOrExch =
1898 i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1899 i->subOp == NV50_IR_SUBOP_ATOM_CAS;
1900
1901 if (i->dType == TYPE_U64) {
1902 switch (i->subOp) {
1903 case NV50_IR_SUBOP_ATOM_ADD:
1904 code[0] = 0x205;
1905 if (hasDst)
1906 code[1] = 0x507e0000;
1907 else
1908 code[1] = 0x10000000;
1909 break;
1910 case NV50_IR_SUBOP_ATOM_EXCH:
1911 code[0] = 0x305;
1912 code[1] = 0x507e0000;
1913 break;
1914 case NV50_IR_SUBOP_ATOM_CAS:
1915 code[0] = 0x325;
1916 code[1] = 0x50000000;
1917 break;
1918 default:
1919 assert(!"invalid u64 red op");
1920 break;
1921 }
1922 } else
1923 if (i->dType == TYPE_U32) {
1924 switch (i->subOp) {
1925 case NV50_IR_SUBOP_ATOM_EXCH:
1926 code[0] = 0x105;
1927 code[1] = 0x507e0000;
1928 break;
1929 case NV50_IR_SUBOP_ATOM_CAS:
1930 code[0] = 0x125;
1931 code[1] = 0x50000000;
1932 break;
1933 default:
1934 code[0] = 0x5 | (i->subOp << 5);
1935 if (hasDst)
1936 code[1] = 0x507e0000;
1937 else
1938 code[1] = 0x10000000;
1939 break;
1940 }
1941 } else
1942 if (i->dType == TYPE_S32) {
1943 assert(i->subOp <= 2);
1944 code[0] = 0x205 | (i->subOp << 5);
1945 if (hasDst)
1946 code[1] = 0x587e0000;
1947 else
1948 code[1] = 0x18000000;
1949 } else
1950 if (i->dType == TYPE_F32) {
1951 assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
1952 code[0] = 0x205;
1953 if (hasDst)
1954 code[1] = 0x687e0000;
1955 else
1956 code[1] = 0x28000000;
1957 }
1958
1959 emitPredicate(i);
1960
1961 srcId(i->src(1), 14);
1962
1963 if (hasDst)
1964 defId(i->def(0), 32 + 11);
1965 else
1966 if (casOrExch)
1967 code[1] |= 63 << 11;
1968
1969 if (hasDst || casOrExch) {
1970 const int32_t offset = SDATA(i->src(0)).offset;
1971 assert(offset < 0x80000 && offset >= -0x80000);
1972 code[0] |= offset << 26;
1973 code[1] |= (offset & 0x1ffc0) >> 6;
1974 code[1] |= (offset & 0xe0000) << 6;
1975 } else {
1976 srcAddr32(i->src(0), 26, 0);
1977 }
1978 if (i->getIndirect(0, 0)) {
1979 srcId(i->getIndirect(0, 0), 20);
1980 if (i->getIndirect(0, 0)->reg.size == 8)
1981 code[1] |= 1 << 26;
1982 } else {
1983 code[0] |= 63 << 20;
1984 }
1985
1986 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1987 srcId(i->src(2), 32 + 17);
1988 }
1989
1990 void
1991 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
1992 {
1993 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
1994 case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
1995 case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
1996 default:
1997 code[0] = 0x45;
1998 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
1999 break;
2000 }
2001 code[1] = 0xe0000000;
2002
2003 emitPredicate(i);
2004 }
2005
2006 void
2007 CodeEmitterNVC0::emitCCTL(const Instruction *i)
2008 {
2009 code[0] = 0x00000005 | (i->subOp << 5);
2010
2011 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2012 code[1] = 0x98000000;
2013 srcAddr32(i->src(0), 28, 2);
2014 } else {
2015 code[1] = 0xd0000000;
2016 setAddress24(i->src(0));
2017 }
2018 if (uses64bitAddress(i))
2019 code[1] |= 1 << 26;
2020 srcId(i->src(0).getIndirect(0), 20);
2021
2022 emitPredicate(i);
2023
2024 defId(i, 0, 14);
2025 }
2026
2027 void
2028 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
2029 {
2030 uint8_t m;
2031 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2032 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2033 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2034 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2035 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2036 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2037 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2038 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2039 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2040 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2041 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2042 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2043 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2044 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2045 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2046 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2047 default:
2048 return;
2049 }
2050 code[0] |= m << 5;
2051 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2052 code[1] |= 1 << 16;
2053 }
2054
2055 void
2056 CodeEmitterNVC0::emitSUCalc(Instruction *i)
2057 {
2058 ImmediateValue *imm = NULL;
2059 uint64_t opc;
2060
2061 if (i->srcExists(2)) {
2062 imm = i->getSrc(2)->asImm();
2063 if (imm)
2064 i->setSrc(2, NULL); // special case, make emitForm_A not assert
2065 }
2066
2067 switch (i->op) {
2068 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2069 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2070 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2071 default:
2072 assert(0);
2073 return;
2074 }
2075 emitForm_A(i, opc);
2076
2077 if (i->op == OP_SUCLAMP) {
2078 if (i->dType == TYPE_S32)
2079 code[0] |= 1 << 9;
2080 emitSUCLAMPMode(i->subOp);
2081 }
2082
2083 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2084 code[1] |= 1 << 16;
2085
2086 if (i->op != OP_SUEAU) {
2087 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2088 code[0] |= 63 << 14;
2089 code[1] |= i->getDef(0)->reg.data.id << 23;
2090 } else
2091 if (i->defExists(1)) { // r, p
2092 assert(i->def(1).getFile() == FILE_PREDICATE);
2093 code[1] |= i->getDef(1)->reg.data.id << 23;
2094 } else { // r, #
2095 code[1] |= 7 << 23;
2096 }
2097 }
2098 if (imm) {
2099 assert(i->op == OP_SUCLAMP);
2100 i->setSrc(2, imm);
2101 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2102 }
2103 }
2104
2105 void
2106 CodeEmitterNVC0::emitSUGType(DataType ty)
2107 {
2108 switch (ty) {
2109 case TYPE_S32: code[1] |= 1 << 13; break;
2110 case TYPE_U8: code[1] |= 2 << 13; break;
2111 case TYPE_S8: code[1] |= 3 << 13; break;
2112 default:
2113 assert(ty == TYPE_U32);
2114 break;
2115 }
2116 }
2117
2118 void
2119 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2120 {
2121 const uint32_t offset = i->getSrc(s)->reg.data.offset;
2122
2123 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2124 assert(offset == (offset & 0xfffc));
2125
2126 code[1] |= 1 << 21;
2127 code[0] |= offset << 24;
2128 code[1] |= offset >> 8;
2129 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2130 }
2131
2132 void
2133 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2134 {
2135 if (!i->srcExists(s) || (i->predSrc == s)) {
2136 code[1] |= 0x7 << 17;
2137 } else {
2138 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2139 code[1] |= 1 << 20;
2140 srcId(i->src(s), 32 + 17);
2141 }
2142 }
2143
2144 void
2145 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2146 {
2147 code[0] = 0x5;
2148 code[1] = 0xd4000000 | (i->subOp << 15);
2149
2150 emitLoadStoreType(i->dType);
2151 emitSUGType(i->sType);
2152 emitCachingMode(i->cache);
2153
2154 emitPredicate(i);
2155 defId(i->def(0), 14); // destination
2156 srcId(i->src(0), 20); // address
2157 // format
2158 if (i->src(1).getFile() == FILE_GPR)
2159 srcId(i->src(1), 26);
2160 else
2161 setSUConst16(i, 1);
2162 setSUPred(i, 2);
2163 }
2164
2165 void
2166 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2167 {
2168 code[0] = 0x5;
2169 code[1] = 0xdc000000 | (i->subOp << 15);
2170
2171 if (i->op == OP_SUSTP)
2172 code[1] |= i->tex.mask << 22;
2173 else
2174 emitLoadStoreType(i->dType);
2175 emitSUGType(i->sType);
2176 emitCachingMode(i->cache);
2177
2178 emitPredicate(i);
2179 srcId(i->src(0), 20); // address
2180 // format
2181 if (i->src(1).getFile() == FILE_GPR)
2182 srcId(i->src(1), 26);
2183 else
2184 setSUConst16(i, 1);
2185 srcId(i->src(3), 14); // values
2186 setSUPred(i, 2);
2187 }
2188
2189 void
2190 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2191 {
2192 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2193 case 0:
2194 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2195 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
2196 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
2197 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2198 break;
2199 case 1:
2200 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
2201 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2202 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
2203 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
2204 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2205 code[1] |= (i->mask & 0x3) << 2;
2206 break;
2207 case 2:
2208 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2209 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2210 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2211 code[1] |= (i->mask & 0x3) << 2;
2212 code[1] |= (i->mask & 0xc) << 21;
2213 break;
2214 default:
2215 assert(0);
2216 break;
2217 }
2218 }
2219
2220 void
2221 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2222 {
2223 uint64_t opc = 0x4;
2224
2225 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2226 case 0: opc |= 0xe8ULL << 56; break;
2227 case 1: opc |= 0xb4ULL << 56; break;
2228 case 2: opc |= 0x94ULL << 56; break;
2229 default:
2230 assert(0);
2231 break;
2232 }
2233 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2234 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2235 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2236 } else {
2237 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2238 if (isSignedType(i->sType)) opc |= 1 << 6;
2239 }
2240 emitForm_A(i, opc);
2241 emitVectorSubOp(i);
2242
2243 if (i->saturate)
2244 code[0] |= 1 << 9;
2245 if (i->flagsDef >= 0)
2246 code[1] |= 1 << 16;
2247 }
2248
2249 void
2250 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2251 {
2252 assert(i->encSize == 8);
2253 emitForm_A(i, HEX64(10000000, 00000006));
2254 code[0] |= i->subOp << 5;
2255 code[1] |= 0x00e00000;
2256 }
2257
2258 bool
2259 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2260 {
2261 unsigned int size = insn->encSize;
2262
2263 if (writeIssueDelays && !(codeSize & 0x3f))
2264 size += 8;
2265
2266 if (!insn->encSize) {
2267 ERROR("skipping unencodable instruction: "); insn->print();
2268 return false;
2269 } else
2270 if (codeSize + size > codeSizeLimit) {
2271 ERROR("code emitter output buffer too small\n");
2272 return false;
2273 }
2274
2275 if (writeIssueDelays) {
2276 if (!(codeSize & 0x3f)) {
2277 code[0] = 0x00000007; // cf issue delay "instruction"
2278 code[1] = 0x20000000;
2279 code += 2;
2280 codeSize += 8;
2281 }
2282 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2283 uint32_t *data = code - (id * 2 + 2);
2284 if (id <= 2) {
2285 data[0] |= insn->sched << (id * 8 + 4);
2286 } else
2287 if (id == 3) {
2288 data[0] |= insn->sched << 28;
2289 data[1] |= insn->sched >> 4;
2290 } else {
2291 data[1] |= insn->sched << ((id - 4) * 8 + 4);
2292 }
2293 }
2294
2295 // assert that instructions with multiple defs don't corrupt registers
2296 for (int d = 0; insn->defExists(d); ++d)
2297 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2298
2299 switch (insn->op) {
2300 case OP_MOV:
2301 case OP_RDSV:
2302 emitMOV(insn);
2303 break;
2304 case OP_NOP:
2305 break;
2306 case OP_LOAD:
2307 emitLOAD(insn);
2308 break;
2309 case OP_STORE:
2310 emitSTORE(insn);
2311 break;
2312 case OP_LINTERP:
2313 case OP_PINTERP:
2314 emitINTERP(insn);
2315 break;
2316 case OP_VFETCH:
2317 emitVFETCH(insn);
2318 break;
2319 case OP_EXPORT:
2320 emitEXPORT(insn);
2321 break;
2322 case OP_PFETCH:
2323 emitPFETCH(insn);
2324 break;
2325 case OP_EMIT:
2326 case OP_RESTART:
2327 emitOUT(insn);
2328 break;
2329 case OP_ADD:
2330 case OP_SUB:
2331 if (insn->dType == TYPE_F64)
2332 emitDADD(insn);
2333 else if (isFloatType(insn->dType))
2334 emitFADD(insn);
2335 else
2336 emitUADD(insn);
2337 break;
2338 case OP_MUL:
2339 if (insn->dType == TYPE_F64)
2340 emitDMUL(insn);
2341 else if (isFloatType(insn->dType))
2342 emitFMUL(insn);
2343 else
2344 emitUMUL(insn);
2345 break;
2346 case OP_MAD:
2347 case OP_FMA:
2348 if (insn->dType == TYPE_F64)
2349 emitDMAD(insn);
2350 else if (isFloatType(insn->dType))
2351 emitFMAD(insn);
2352 else
2353 emitIMAD(insn);
2354 break;
2355 case OP_SAD:
2356 emitISAD(insn);
2357 break;
2358 case OP_NOT:
2359 emitNOT(insn);
2360 break;
2361 case OP_AND:
2362 emitLogicOp(insn, 0);
2363 break;
2364 case OP_OR:
2365 emitLogicOp(insn, 1);
2366 break;
2367 case OP_XOR:
2368 emitLogicOp(insn, 2);
2369 break;
2370 case OP_SHL:
2371 case OP_SHR:
2372 emitShift(insn);
2373 break;
2374 case OP_SET:
2375 case OP_SET_AND:
2376 case OP_SET_OR:
2377 case OP_SET_XOR:
2378 emitSET(insn->asCmp());
2379 break;
2380 case OP_SELP:
2381 emitSELP(insn);
2382 break;
2383 case OP_SLCT:
2384 emitSLCT(insn->asCmp());
2385 break;
2386 case OP_MIN:
2387 case OP_MAX:
2388 emitMINMAX(insn);
2389 break;
2390 case OP_ABS:
2391 case OP_NEG:
2392 case OP_CEIL:
2393 case OP_FLOOR:
2394 case OP_TRUNC:
2395 case OP_CVT:
2396 case OP_SAT:
2397 emitCVT(insn);
2398 break;
2399 case OP_RSQ:
2400 emitSFnOp(insn, 5 + 2 * insn->subOp);
2401 break;
2402 case OP_RCP:
2403 emitSFnOp(insn, 4 + 2 * insn->subOp);
2404 break;
2405 case OP_LG2:
2406 emitSFnOp(insn, 3);
2407 break;
2408 case OP_EX2:
2409 emitSFnOp(insn, 2);
2410 break;
2411 case OP_SIN:
2412 emitSFnOp(insn, 1);
2413 break;
2414 case OP_COS:
2415 emitSFnOp(insn, 0);
2416 break;
2417 case OP_PRESIN:
2418 case OP_PREEX2:
2419 emitPreOp(insn);
2420 break;
2421 case OP_TEX:
2422 case OP_TXB:
2423 case OP_TXL:
2424 case OP_TXD:
2425 case OP_TXF:
2426 case OP_TXG:
2427 case OP_TXLQ:
2428 emitTEX(insn->asTex());
2429 break;
2430 case OP_TXQ:
2431 emitTXQ(insn->asTex());
2432 break;
2433 case OP_TEXBAR:
2434 emitTEXBAR(insn);
2435 break;
2436 case OP_SUBFM:
2437 case OP_SUCLAMP:
2438 case OP_SUEAU:
2439 emitSUCalc(insn);
2440 break;
2441 case OP_MADSP:
2442 emitMADSP(insn);
2443 break;
2444 case OP_SULDB:
2445 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2446 emitSULDGB(insn->asTex());
2447 else
2448 ERROR("SULDB not yet supported on < nve4\n");
2449 break;
2450 case OP_SUSTB:
2451 case OP_SUSTP:
2452 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2453 emitSUSTGx(insn->asTex());
2454 else
2455 ERROR("SUSTx not yet supported on < nve4\n");
2456 break;
2457 case OP_ATOM:
2458 emitATOM(insn);
2459 break;
2460 case OP_BRA:
2461 case OP_CALL:
2462 case OP_PRERET:
2463 case OP_RET:
2464 case OP_DISCARD:
2465 case OP_EXIT:
2466 case OP_PRECONT:
2467 case OP_CONT:
2468 case OP_PREBREAK:
2469 case OP_BREAK:
2470 case OP_JOINAT:
2471 case OP_BRKPT:
2472 case OP_QUADON:
2473 case OP_QUADPOP:
2474 emitFlow(insn);
2475 break;
2476 case OP_QUADOP:
2477 emitQUADOP(insn, insn->subOp, insn->lanes);
2478 break;
2479 case OP_DFDX:
2480 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2481 break;
2482 case OP_DFDY:
2483 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2484 break;
2485 case OP_POPCNT:
2486 emitPOPC(insn);
2487 break;
2488 case OP_INSBF:
2489 emitINSBF(insn);
2490 break;
2491 case OP_EXTBF:
2492 emitEXTBF(insn);
2493 break;
2494 case OP_BFIND:
2495 emitBFIND(insn);
2496 break;
2497 case OP_PERMT:
2498 emitPERMT(insn);
2499 break;
2500 case OP_JOIN:
2501 emitNOP(insn);
2502 insn->join = 1;
2503 break;
2504 case OP_BAR:
2505 emitBAR(insn);
2506 break;
2507 case OP_MEMBAR:
2508 emitMEMBAR(insn);
2509 break;
2510 case OP_CCTL:
2511 emitCCTL(insn);
2512 break;
2513 case OP_VSHL:
2514 emitVSHL(insn);
2515 break;
2516 case OP_PIXLD:
2517 emitPIXLD(insn);
2518 break;
2519 case OP_PHI:
2520 case OP_UNION:
2521 case OP_CONSTRAINT:
2522 ERROR("operation should have been eliminated");
2523 return false;
2524 case OP_EXP:
2525 case OP_LOG:
2526 case OP_SQRT:
2527 case OP_POW:
2528 ERROR("operation should have been lowered\n");
2529 return false;
2530 default:
2531 ERROR("unknow op\n");
2532 return false;
2533 }
2534
2535 if (insn->join) {
2536 code[0] |= 0x10;
2537 assert(insn->encSize == 8);
2538 }
2539
2540 code += insn->encSize / 4;
2541 codeSize += insn->encSize;
2542 return true;
2543 }
2544
2545 uint32_t
2546 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2547 {
2548 const Target::OpInfo &info = targ->getOpInfo(i);
2549
2550 if (writeIssueDelays || info.minEncSize == 8 || 1)
2551 return 8;
2552
2553 if (i->ftz || i->saturate || i->join)
2554 return 8;
2555 if (i->rnd != ROUND_N)
2556 return 8;
2557 if (i->predSrc >= 0 && i->op == OP_MAD)
2558 return 8;
2559
2560 if (i->op == OP_PINTERP) {
2561 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2562 return 8;
2563 } else
2564 if (i->op == OP_MOV && i->lanes != 0xf) {
2565 return 8;
2566 }
2567
2568 for (int s = 0; i->srcExists(s); ++s) {
2569 if (i->src(s).isIndirect(0))
2570 return 8;
2571
2572 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2573 if (SDATA(i->src(s)).offset >= 0x100)
2574 return 8;
2575 if (i->getSrc(s)->reg.fileIndex > 1 &&
2576 i->getSrc(s)->reg.fileIndex != 16)
2577 return 8;
2578 } else
2579 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2580 if (i->dType == TYPE_F32) {
2581 if (SDATA(i->src(s)).u32 >= 0x100)
2582 return 8;
2583 } else {
2584 if (SDATA(i->src(s)).u32 > 0xff)
2585 return 8;
2586 }
2587 }
2588
2589 if (i->op == OP_CVT)
2590 continue;
2591 if (i->src(s).mod != Modifier(0)) {
2592 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2593 if (i->op != OP_RSQ)
2594 return 8;
2595 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2596 if (i->op != OP_ADD || s != 0)
2597 return 8;
2598 }
2599 }
2600
2601 return 4;
2602 }
2603
2604 // Simplified, erring on safe side.
2605 class SchedDataCalculator : public Pass
2606 {
2607 public:
2608 SchedDataCalculator(const Target *targ) : targ(targ) { }
2609
2610 private:
2611 struct RegScores
2612 {
2613 struct Resource {
2614 int st[DATA_FILE_COUNT]; // LD to LD delay 3
2615 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2616 int tex; // TEX to non-TEX delay 17 (0x11)
2617 int sfu; // SFU to SFU delay 3 (except PRE-ops)
2618 int imul; // integer MUL to MUL delay 3
2619 } res;
2620 struct ScoreData {
2621 int r[256];
2622 int p[8];
2623 int c;
2624 } rd, wr;
2625 int base;
2626 int regs;
2627
2628 void rebase(const int base)
2629 {
2630 const int delta = this->base - base;
2631 if (!delta)
2632 return;
2633 this->base = 0;
2634
2635 for (int i = 0; i < regs; ++i) {
2636 rd.r[i] += delta;
2637 wr.r[i] += delta;
2638 }
2639 for (int i = 0; i < 8; ++i) {
2640 rd.p[i] += delta;
2641 wr.p[i] += delta;
2642 }
2643 rd.c += delta;
2644 wr.c += delta;
2645
2646 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2647 res.ld[f] += delta;
2648 res.st[f] += delta;
2649 }
2650 res.sfu += delta;
2651 res.imul += delta;
2652 res.tex += delta;
2653 }
2654 void wipe(int regs)
2655 {
2656 memset(&rd, 0, sizeof(rd));
2657 memset(&wr, 0, sizeof(wr));
2658 memset(&res, 0, sizeof(res));
2659 this->regs = regs;
2660 }
2661 int getLatest(const ScoreData& d) const
2662 {
2663 int max = 0;
2664 for (int i = 0; i < regs; ++i)
2665 if (d.r[i] > max)
2666 max = d.r[i];
2667 for (int i = 0; i < 8; ++i)
2668 if (d.p[i] > max)
2669 max = d.p[i];
2670 if (d.c > max)
2671 max = d.c;
2672 return max;
2673 }
2674 inline int getLatestRd() const
2675 {
2676 return getLatest(rd);
2677 }
2678 inline int getLatestWr() const
2679 {
2680 return getLatest(wr);
2681 }
2682 inline int getLatest() const
2683 {
2684 const int a = getLatestRd();
2685 const int b = getLatestWr();
2686
2687 int max = MAX2(a, b);
2688 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2689 max = MAX2(res.ld[f], max);
2690 max = MAX2(res.st[f], max);
2691 }
2692 max = MAX2(res.sfu, max);
2693 max = MAX2(res.imul, max);
2694 max = MAX2(res.tex, max);
2695 return max;
2696 }
2697 void setMax(const RegScores *that)
2698 {
2699 for (int i = 0; i < regs; ++i) {
2700 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2701 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2702 }
2703 for (int i = 0; i < 8; ++i) {
2704 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2705 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2706 }
2707 rd.c = MAX2(rd.c, that->rd.c);
2708 wr.c = MAX2(wr.c, that->wr.c);
2709
2710 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2711 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2712 res.st[f] = MAX2(res.st[f], that->res.st[f]);
2713 }
2714 res.sfu = MAX2(res.sfu, that->res.sfu);
2715 res.imul = MAX2(res.imul, that->res.imul);
2716 res.tex = MAX2(res.tex, that->res.tex);
2717 }
2718 void print(int cycle)
2719 {
2720 for (int i = 0; i < regs; ++i) {
2721 if (rd.r[i] > cycle)
2722 INFO("rd $r%i @ %i\n", i, rd.r[i]);
2723 if (wr.r[i] > cycle)
2724 INFO("wr $r%i @ %i\n", i, wr.r[i]);
2725 }
2726 for (int i = 0; i < 8; ++i) {
2727 if (rd.p[i] > cycle)
2728 INFO("rd $p%i @ %i\n", i, rd.p[i]);
2729 if (wr.p[i] > cycle)
2730 INFO("wr $p%i @ %i\n", i, wr.p[i]);
2731 }
2732 if (rd.c > cycle)
2733 INFO("rd $c @ %i\n", rd.c);
2734 if (wr.c > cycle)
2735 INFO("wr $c @ %i\n", wr.c);
2736 if (res.sfu > cycle)
2737 INFO("sfu @ %i\n", res.sfu);
2738 if (res.imul > cycle)
2739 INFO("imul @ %i\n", res.imul);
2740 if (res.tex > cycle)
2741 INFO("tex @ %i\n", res.tex);
2742 }
2743 };
2744
2745 RegScores *score; // for current BB
2746 std::vector<RegScores> scoreBoards;
2747 int prevData;
2748 operation prevOp;
2749
2750 const Target *targ;
2751
2752 bool visit(Function *);
2753 bool visit(BasicBlock *);
2754
2755 void commitInsn(const Instruction *, int cycle);
2756 int calcDelay(const Instruction *, int cycle) const;
2757 void setDelay(Instruction *, int delay, Instruction *next);
2758
2759 void recordRd(const Value *, const int ready);
2760 void recordWr(const Value *, const int ready);
2761 void checkRd(const Value *, int cycle, int& delay) const;
2762 void checkWr(const Value *, int cycle, int& delay) const;
2763
2764 int getCycles(const Instruction *, int origDelay) const;
2765 };
2766
2767 void
2768 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2769 {
2770 if (insn->op == OP_EXIT || insn->op == OP_RET)
2771 delay = MAX2(delay, 14);
2772
2773 if (insn->op == OP_TEXBAR) {
2774 // TODO: except if results not used before EXIT
2775 insn->sched = 0xc2;
2776 } else
2777 if (insn->op == OP_JOIN || insn->join) {
2778 insn->sched = 0x00;
2779 } else
2780 if (delay >= 0 || prevData == 0x04 ||
2781 !next || !targ->canDualIssue(insn, next)) {
2782 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
2783 if (prevOp == OP_EXPORT)
2784 insn->sched |= 0x40;
2785 else
2786 insn->sched |= 0x20;
2787 } else {
2788 insn->sched = 0x04; // dual-issue
2789 }
2790
2791 if (prevData != 0x04 || prevOp != OP_EXPORT)
2792 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2793 prevOp = insn->op;
2794
2795 prevData = insn->sched;
2796 }
2797
2798 int
2799 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2800 {
2801 if (insn->sched & 0x80) {
2802 int c = (insn->sched & 0x0f) * 2 + 1;
2803 if (insn->op == OP_TEXBAR && origDelay > 0)
2804 c += origDelay;
2805 return c;
2806 }
2807 if (insn->sched & 0x60)
2808 return (insn->sched & 0x1f) + 1;
2809 return (insn->sched == 0x04) ? 0 : 32;
2810 }
2811
2812 bool
2813 SchedDataCalculator::visit(Function *func)
2814 {
2815 int regs = targ->getFileSize(FILE_GPR) + 1;
2816 scoreBoards.resize(func->cfg.getSize());
2817 for (size_t i = 0; i < scoreBoards.size(); ++i)
2818 scoreBoards[i].wipe(regs);
2819 return true;
2820 }
2821
2822 bool
2823 SchedDataCalculator::visit(BasicBlock *bb)
2824 {
2825 Instruction *insn;
2826 Instruction *next = NULL;
2827
2828 int cycle = 0;
2829
2830 prevData = 0x00;
2831 prevOp = OP_NOP;
2832 score = &scoreBoards.at(bb->getId());
2833
2834 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2835 // back branches will wait until all target dependencies are satisfied
2836 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
2837 continue;
2838 BasicBlock *in = BasicBlock::get(ei.getNode());
2839 if (in->getExit()) {
2840 if (prevData != 0x04)
2841 prevData = in->getExit()->sched;
2842 prevOp = in->getExit()->op;
2843 }
2844 score->setMax(&scoreBoards.at(in->getId()));
2845 }
2846 if (bb->cfg.incidentCount() > 1)
2847 prevOp = OP_NOP;
2848
2849 #ifdef NVC0_DEBUG_SCHED_DATA
2850 INFO("=== BB:%i initial scores\n", bb->getId());
2851 score->print(cycle);
2852 #endif
2853
2854 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2855 next = insn->next;
2856
2857 commitInsn(insn, cycle);
2858 int delay = calcDelay(next, cycle);
2859 setDelay(insn, delay, next);
2860 cycle += getCycles(insn, delay);
2861
2862 #ifdef NVC0_DEBUG_SCHED_DATA
2863 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2864 insn->print();
2865 next->print();
2866 #endif
2867 }
2868 if (!insn)
2869 return true;
2870 commitInsn(insn, cycle);
2871
2872 int bbDelay = -1;
2873
2874 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2875 BasicBlock *out = BasicBlock::get(ei.getNode());
2876
2877 if (ei.getType() != Graph::Edge::BACK) {
2878 // only test the first instruction of the outgoing block
2879 next = out->getEntry();
2880 if (next)
2881 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2882 } else {
2883 // wait until all dependencies are satisfied
2884 const int regsFree = score->getLatest();
2885 next = out->getFirst();
2886 for (int c = cycle; next && c < regsFree; next = next->next) {
2887 bbDelay = MAX2(bbDelay, calcDelay(next, c));
2888 c += getCycles(next, bbDelay);
2889 }
2890 next = NULL;
2891 }
2892 }
2893 if (bb->cfg.outgoingCount() != 1)
2894 next = NULL;
2895 setDelay(insn, bbDelay, next);
2896 cycle += getCycles(insn, bbDelay);
2897
2898 score->rebase(cycle); // common base for initializing out blocks' scores
2899 return true;
2900 }
2901
2902 #define NVE4_MAX_ISSUE_DELAY 0x1f
2903 int
2904 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2905 {
2906 int delay = 0, ready = cycle;
2907
2908 for (int s = 0; insn->srcExists(s); ++s)
2909 checkRd(insn->getSrc(s), cycle, delay);
2910 // WAR & WAW don't seem to matter
2911 // for (int s = 0; insn->srcExists(s); ++s)
2912 // recordRd(insn->getSrc(s), cycle);
2913
2914 switch (Target::getOpClass(insn->op)) {
2915 case OPCLASS_SFU:
2916 ready = score->res.sfu;
2917 break;
2918 case OPCLASS_ARITH:
2919 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2920 ready = score->res.imul;
2921 break;
2922 case OPCLASS_TEXTURE:
2923 ready = score->res.tex;
2924 break;
2925 case OPCLASS_LOAD:
2926 ready = score->res.ld[insn->src(0).getFile()];
2927 break;
2928 case OPCLASS_STORE:
2929 ready = score->res.st[insn->src(0).getFile()];
2930 break;
2931 default:
2932 break;
2933 }
2934 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2935 ready = MAX2(ready, score->res.tex);
2936
2937 delay = MAX2(delay, ready - cycle);
2938
2939 // if can issue next cycle, delay is 0, not 1
2940 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2941 }
2942
2943 void
2944 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2945 {
2946 const int ready = cycle + targ->getLatency(insn);
2947
2948 for (int d = 0; insn->defExists(d); ++d)
2949 recordWr(insn->getDef(d), ready);
2950 // WAR & WAW don't seem to matter
2951 // for (int s = 0; insn->srcExists(s); ++s)
2952 // recordRd(insn->getSrc(s), cycle);
2953
2954 switch (Target::getOpClass(insn->op)) {
2955 case OPCLASS_SFU:
2956 score->res.sfu = cycle + 4;
2957 break;
2958 case OPCLASS_ARITH:
2959 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2960 score->res.imul = cycle + 4;
2961 break;
2962 case OPCLASS_TEXTURE:
2963 score->res.tex = cycle + 18;
2964 break;
2965 case OPCLASS_LOAD:
2966 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2967 break;
2968 score->res.ld[insn->src(0).getFile()] = cycle + 4;
2969 score->res.st[insn->src(0).getFile()] = ready;
2970 break;
2971 case OPCLASS_STORE:
2972 score->res.st[insn->src(0).getFile()] = cycle + 4;
2973 score->res.ld[insn->src(0).getFile()] = ready;
2974 break;
2975 case OPCLASS_OTHER:
2976 if (insn->op == OP_TEXBAR)
2977 score->res.tex = cycle;
2978 break;
2979 default:
2980 break;
2981 }
2982
2983 #ifdef NVC0_DEBUG_SCHED_DATA
2984 score->print(cycle);
2985 #endif
2986 }
2987
2988 void
2989 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2990 {
2991 int ready = cycle;
2992 int a, b;
2993
2994 switch (v->reg.file) {
2995 case FILE_GPR:
2996 a = v->reg.data.id;
2997 b = a + v->reg.size / 4;
2998 for (int r = a; r < b; ++r)
2999 ready = MAX2(ready, score->rd.r[r]);
3000 break;
3001 case FILE_PREDICATE:
3002 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
3003 break;
3004 case FILE_FLAGS:
3005 ready = MAX2(ready, score->rd.c);
3006 break;
3007 case FILE_SHADER_INPUT:
3008 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
3009 case FILE_MEMORY_LOCAL:
3010 case FILE_MEMORY_CONST:
3011 case FILE_MEMORY_SHARED:
3012 case FILE_MEMORY_GLOBAL:
3013 case FILE_SYSTEM_VALUE:
3014 // TODO: any restrictions here ?
3015 break;
3016 case FILE_IMMEDIATE:
3017 break;
3018 default:
3019 assert(0);
3020 break;
3021 }
3022 if (cycle < ready)
3023 delay = MAX2(delay, ready - cycle);
3024 }
3025
3026 void
3027 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
3028 {
3029 int ready = cycle;
3030 int a, b;
3031
3032 switch (v->reg.file) {
3033 case FILE_GPR:
3034 a = v->reg.data.id;
3035 b = a + v->reg.size / 4;
3036 for (int r = a; r < b; ++r)
3037 ready = MAX2(ready, score->wr.r[r]);
3038 break;
3039 case FILE_PREDICATE:
3040 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3041 break;
3042 default:
3043 assert(v->reg.file == FILE_FLAGS);
3044 ready = MAX2(ready, score->wr.c);
3045 break;
3046 }
3047 if (cycle < ready)
3048 delay = MAX2(delay, ready - cycle);
3049 }
3050
3051 void
3052 SchedDataCalculator::recordWr(const Value *v, const int ready)
3053 {
3054 int a = v->reg.data.id;
3055
3056 if (v->reg.file == FILE_GPR) {
3057 int b = a + v->reg.size / 4;
3058 for (int r = a; r < b; ++r)
3059 score->rd.r[r] = ready;
3060 } else
3061 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3062 if (v->reg.file == FILE_PREDICATE) {
3063 score->rd.p[a] = ready + 4;
3064 } else {
3065 assert(v->reg.file == FILE_FLAGS);
3066 score->rd.c = ready + 4;
3067 }
3068 }
3069
3070 void
3071 SchedDataCalculator::recordRd(const Value *v, const int ready)
3072 {
3073 int a = v->reg.data.id;
3074
3075 if (v->reg.file == FILE_GPR) {
3076 int b = a + v->reg.size / 4;
3077 for (int r = a; r < b; ++r)
3078 score->wr.r[r] = ready;
3079 } else
3080 if (v->reg.file == FILE_PREDICATE) {
3081 score->wr.p[a] = ready;
3082 } else
3083 if (v->reg.file == FILE_FLAGS) {
3084 score->wr.c = ready;
3085 }
3086 }
3087
3088 bool
3089 calculateSchedDataNVC0(const Target *targ, Function *func)
3090 {
3091 SchedDataCalculator sched(targ);
3092 return sched.run(func, true, true);
3093 }
3094
3095 void
3096 CodeEmitterNVC0::prepareEmission(Function *func)
3097 {
3098 CodeEmitter::prepareEmission(func);
3099
3100 if (targ->hasSWSched)
3101 calculateSchedDataNVC0(targ, func);
3102 }
3103
3104 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3105 : CodeEmitter(target),
3106 targNVC0(target),
3107 writeIssueDelays(target->hasSWSched)
3108 {
3109 code = NULL;
3110 codeSize = codeSizeLimit = 0;
3111 relocInfo = NULL;
3112 }
3113
3114 CodeEmitter *
3115 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3116 {
3117 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3118 emit->setProgramType(type);
3119 return emit;
3120 }
3121
3122 CodeEmitter *
3123 TargetNVC0::getCodeEmitter(Program::Type type)
3124 {
3125 if (chipset >= NVISA_GK20A_CHIPSET)
3126 return createCodeEmitterGK110(type);
3127 return createCodeEmitterNVC0(type);
3128 }
3129
3130 } // namespace nv50_ir