nv50/ir: add support for indirect BRA,CALL
[mesa.git] / src / gallium / drivers / nvc0 / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setImmediate(const Instruction *, const int s); // needs op already set
56 void setImmediateS8(const ValueRef&);
57 void setSUConst16(const Instruction *, const int s);
58 void setSUPred(const Instruction *, const int s);
59
60 void emitCondCode(CondCode cc, int pos);
61 void emitInterpMode(const Instruction *);
62 void emitLoadStoreType(DataType ty);
63 void emitSUGType(DataType);
64 void emitCachingMode(CacheMode c);
65
66 void emitShortSrc2(const ValueRef&);
67
68 inline uint8_t getSRegEncoding(const ValueRef&);
69
70 void roundMode_A(const Instruction *);
71 void roundMode_C(const Instruction *);
72 void roundMode_CS(const Instruction *);
73
74 void emitNegAbs12(const Instruction *);
75
76 void emitNOP(const Instruction *);
77
78 void emitLOAD(const Instruction *);
79 void emitSTORE(const Instruction *);
80 void emitMOV(const Instruction *);
81
82 void emitINTERP(const Instruction *);
83 void emitPFETCH(const Instruction *);
84 void emitVFETCH(const Instruction *);
85 void emitEXPORT(const Instruction *);
86 void emitOUT(const Instruction *);
87
88 void emitUADD(const Instruction *);
89 void emitFADD(const Instruction *);
90 void emitUMUL(const Instruction *);
91 void emitFMUL(const Instruction *);
92 void emitIMAD(const Instruction *);
93 void emitISAD(const Instruction *);
94 void emitFMAD(const Instruction *);
95 void emitMADSP(const Instruction *);
96
97 void emitNOT(Instruction *);
98 void emitLogicOp(const Instruction *, uint8_t subOp);
99 void emitPOPC(const Instruction *);
100 void emitINSBF(const Instruction *);
101 void emitShift(const Instruction *);
102
103 void emitSFnOp(const Instruction *, uint8_t subOp);
104
105 void emitCVT(Instruction *);
106 void emitMINMAX(const Instruction *);
107 void emitPreOp(const Instruction *);
108
109 void emitSET(const CmpInstruction *);
110 void emitSLCT(const CmpInstruction *);
111 void emitSELP(const Instruction *);
112
113 void emitTEXBAR(const Instruction *);
114 void emitTEX(const TexInstruction *);
115 void emitTEXCSAA(const TexInstruction *);
116 void emitTXQ(const TexInstruction *);
117
118 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
119
120 void emitFlow(const Instruction *);
121
122 void emitSUCLAMPMode(uint16_t);
123 void emitSUCalc(Instruction *);
124 void emitSULDGB(const TexInstruction *);
125 void emitSUSTGx(const TexInstruction *);
126
127 void emitVSHL(const Instruction *);
128 void emitVectorSubOp(const Instruction *);
129
130 inline void defId(const ValueDef&, const int pos);
131 inline void srcId(const ValueRef&, const int pos);
132 inline void srcId(const ValueRef *, const int pos);
133 inline void srcId(const Instruction *, int s, const int pos);
134
135 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
136
137 inline bool isLIMM(const ValueRef&, DataType ty);
138 };
139
140 // for better visibility
141 #define HEX64(h, l) 0x##h##l##ULL
142
143 #define SDATA(a) ((a).rep()->reg.data)
144 #define DDATA(a) ((a).rep()->reg.data)
145
146 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
147 {
148 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
149 }
150
151 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
152 {
153 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
154 }
155
156 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
157 {
158 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
159 code[pos / 32] |= r << (pos % 32);
160 }
161
162 void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
163 {
164 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
165 }
166
167 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
168 {
169 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
170 }
171
172 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
173 {
174 const ImmediateValue *imm = ref.get()->asImm();
175
176 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
177 }
178
179 void
180 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
181 {
182 switch (insn->rnd) {
183 case ROUND_M: code[1] |= 1 << 23; break;
184 case ROUND_P: code[1] |= 2 << 23; break;
185 case ROUND_Z: code[1] |= 3 << 23; break;
186 default:
187 assert(insn->rnd == ROUND_N);
188 break;
189 }
190 }
191
192 void
193 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
194 {
195 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
196 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
197 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
198 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
199 }
200
201 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
202 {
203 uint8_t val;
204
205 switch (cc) {
206 case CC_LT: val = 0x1; break;
207 case CC_LTU: val = 0x9; break;
208 case CC_EQ: val = 0x2; break;
209 case CC_EQU: val = 0xa; break;
210 case CC_LE: val = 0x3; break;
211 case CC_LEU: val = 0xb; break;
212 case CC_GT: val = 0x4; break;
213 case CC_GTU: val = 0xc; break;
214 case CC_NE: val = 0x5; break;
215 case CC_NEU: val = 0xd; break;
216 case CC_GE: val = 0x6; break;
217 case CC_GEU: val = 0xe; break;
218 case CC_TR: val = 0xf; break;
219 case CC_FL: val = 0x0; break;
220
221 case CC_A: val = 0x14; break;
222 case CC_NA: val = 0x13; break;
223 case CC_S: val = 0x15; break;
224 case CC_NS: val = 0x12; break;
225 case CC_C: val = 0x16; break;
226 case CC_NC: val = 0x11; break;
227 case CC_O: val = 0x17; break;
228 case CC_NO: val = 0x10; break;
229
230 default:
231 val = 0;
232 assert(!"invalid condition code");
233 break;
234 }
235 code[pos / 32] |= val << (pos % 32);
236 }
237
238 void
239 CodeEmitterNVC0::emitPredicate(const Instruction *i)
240 {
241 if (i->predSrc >= 0) {
242 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
243 srcId(i->src(i->predSrc), 10);
244 if (i->cc == CC_NOT_P)
245 code[0] |= 0x2000; // negate
246 } else {
247 code[0] |= 0x1c00;
248 }
249 }
250
251 void
252 CodeEmitterNVC0::setAddress16(const ValueRef& src)
253 {
254 Symbol *sym = src.get()->asSym();
255
256 assert(sym);
257
258 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
259 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
260 }
261
262 void
263 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
264 {
265 const ImmediateValue *imm = i->src(s).get()->asImm();
266 uint32_t u32;
267
268 assert(imm);
269 u32 = imm->reg.data.u32;
270
271 if ((code[0] & 0xf) == 0x2) {
272 // LIMM
273 code[0] |= (u32 & 0x3f) << 26;
274 code[1] |= u32 >> 6;
275 } else
276 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
277 // integer immediate
278 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
279 assert(!(code[1] & 0xc000));
280 u32 &= 0xfffff;
281 code[0] |= (u32 & 0x3f) << 26;
282 code[1] |= 0xc000 | (u32 >> 6);
283 } else {
284 // float immediate
285 assert(!(u32 & 0x00000fff));
286 assert(!(code[1] & 0xc000));
287 code[0] |= ((u32 >> 12) & 0x3f) << 26;
288 code[1] |= 0xc000 | (u32 >> 18);
289 }
290 }
291
292 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
293 {
294 const ImmediateValue *imm = ref.get()->asImm();
295
296 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
297
298 assert(s8 == imm->reg.data.s32);
299
300 code[0] |= (s8 & 0x3f) << 26;
301 code[0] |= (s8 >> 6) << 8;
302 }
303
304 void
305 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
306 {
307 code[0] = opc;
308 code[1] = opc >> 32;
309
310 emitPredicate(i);
311
312 defId(i->def(0), 14);
313
314 int s1 = 26;
315 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
316 s1 = 49;
317
318 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
319 switch (i->getSrc(s)->reg.file) {
320 case FILE_MEMORY_CONST:
321 assert(!(code[1] & 0xc000));
322 code[1] |= (s == 2) ? 0x8000 : 0x4000;
323 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
324 setAddress16(i->src(s));
325 break;
326 case FILE_IMMEDIATE:
327 assert(s == 1 ||
328 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
329 assert(!(code[1] & 0xc000));
330 setImmediate(i, s);
331 break;
332 case FILE_GPR:
333 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
334 break;
335 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
336 break;
337 default:
338 // ignore here, can be predicate or flags, but must not be address
339 break;
340 }
341 }
342 }
343
344 void
345 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
346 {
347 code[0] = opc;
348 code[1] = opc >> 32;
349
350 emitPredicate(i);
351
352 defId(i->def(0), 14);
353
354 switch (i->src(0).getFile()) {
355 case FILE_MEMORY_CONST:
356 assert(!(code[1] & 0xc000));
357 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
358 setAddress16(i->src(0));
359 break;
360 case FILE_IMMEDIATE:
361 assert(!(code[1] & 0xc000));
362 setImmediate(i, 0);
363 break;
364 case FILE_GPR:
365 srcId(i->src(0), 26);
366 break;
367 default:
368 // ignore here, can be predicate or flags, but must not be address
369 break;
370 }
371 }
372
373 void
374 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
375 {
376 code[0] = opc;
377
378 int ss2a = 0;
379 if (opc == 0x0d || opc == 0x0e)
380 ss2a = 2;
381
382 defId(i->def(0), 14);
383 srcId(i->src(0), 20);
384
385 assert(pred || (i->predSrc < 0));
386 if (pred)
387 emitPredicate(i);
388
389 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
390 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
391 assert(!(code[0] & (0x300 >> ss2a)));
392 switch (i->src(s).get()->reg.fileIndex) {
393 case 0: code[0] |= 0x100 >> ss2a; break;
394 case 1: code[0] |= 0x200 >> ss2a; break;
395 case 16: code[0] |= 0x300 >> ss2a; break;
396 default:
397 ERROR("invalid c[] space for short form\n");
398 break;
399 }
400 if (s == 1)
401 code[0] |= i->getSrc(s)->reg.data.offset << 24;
402 else
403 code[0] |= i->getSrc(s)->reg.data.offset << 6;
404 } else
405 if (i->src(s).getFile() == FILE_IMMEDIATE) {
406 assert(s == 1);
407 setImmediateS8(i->src(s));
408 } else
409 if (i->src(s).getFile() == FILE_GPR) {
410 srcId(i->src(s), (s == 1) ? 26 : 8);
411 }
412 }
413 }
414
415 void
416 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
417 {
418 if (src.getFile() == FILE_MEMORY_CONST) {
419 switch (src.get()->reg.fileIndex) {
420 case 0: code[0] |= 0x100; break;
421 case 1: code[0] |= 0x200; break;
422 case 16: code[0] |= 0x300; break;
423 default:
424 assert(!"unsupported file index for short op");
425 break;
426 }
427 srcAddr32(src, 20);
428 } else {
429 srcId(src, 20);
430 assert(src.getFile() == FILE_GPR);
431 }
432 }
433
434 void
435 CodeEmitterNVC0::emitNOP(const Instruction *i)
436 {
437 code[0] = 0x000001e4;
438 code[1] = 0x40000000;
439 emitPredicate(i);
440 }
441
442 void
443 CodeEmitterNVC0::emitFMAD(const Instruction *i)
444 {
445 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
446
447 if (i->encSize == 8) {
448 if (isLIMM(i->src(1), TYPE_F32)) {
449 emitForm_A(i, HEX64(20000000, 00000002));
450 } else {
451 emitForm_A(i, HEX64(30000000, 00000000));
452
453 if (i->src(2).mod.neg())
454 code[0] |= 1 << 8;
455 }
456 roundMode_A(i);
457
458 if (neg1)
459 code[0] |= 1 << 9;
460
461 if (i->saturate)
462 code[0] |= 1 << 5;
463 if (i->ftz)
464 code[0] |= 1 << 6;
465 } else {
466 assert(!i->saturate && !i->src(2).mod.neg());
467 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
468 false);
469 if (neg1)
470 code[0] |= 1 << 4;
471 }
472 }
473
474 void
475 CodeEmitterNVC0::emitFMUL(const Instruction *i)
476 {
477 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
478
479 assert(i->postFactor >= -3 && i->postFactor <= 3);
480
481 if (i->encSize == 8) {
482 if (isLIMM(i->src(1), TYPE_F32)) {
483 assert(i->postFactor == 0); // constant folded, hopefully
484 emitForm_A(i, HEX64(30000000, 00000002));
485 } else {
486 emitForm_A(i, HEX64(58000000, 00000000));
487 roundMode_A(i);
488 code[1] |= ((i->postFactor > 0) ?
489 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
490 }
491 if (neg)
492 code[1] ^= 1 << 25; // aliases with LIMM sign bit
493
494 if (i->saturate)
495 code[0] |= 1 << 5;
496
497 if (i->dnz)
498 code[0] |= 1 << 7;
499 else
500 if (i->ftz)
501 code[0] |= 1 << 6;
502 } else {
503 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
504 emitForm_S(i, 0xa8, true);
505 }
506 }
507
508 void
509 CodeEmitterNVC0::emitUMUL(const Instruction *i)
510 {
511 if (i->encSize == 8) {
512 if (i->src(1).getFile() == FILE_IMMEDIATE) {
513 emitForm_A(i, HEX64(10000000, 00000002));
514 } else {
515 emitForm_A(i, HEX64(50000000, 00000003));
516 }
517 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
518 code[0] |= 1 << 6;
519 if (i->sType == TYPE_S32)
520 code[0] |= 1 << 5;
521 if (i->dType == TYPE_S32)
522 code[0] |= 1 << 7;
523 } else {
524 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
525
526 if (i->sType == TYPE_S32)
527 code[0] |= 1 << 6;
528 }
529 }
530
531 void
532 CodeEmitterNVC0::emitFADD(const Instruction *i)
533 {
534 if (i->encSize == 8) {
535 if (isLIMM(i->src(1), TYPE_F32)) {
536 assert(!i->saturate);
537 emitForm_A(i, HEX64(28000000, 00000002));
538
539 code[0] |= i->src(0).mod.abs() << 7;
540 code[0] |= i->src(0).mod.neg() << 9;
541
542 if (i->src(1).mod.abs())
543 code[1] &= 0xfdffffff;
544 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
545 code[1] ^= 0x02000000;
546 } else {
547 emitForm_A(i, HEX64(50000000, 00000000));
548
549 roundMode_A(i);
550 if (i->saturate)
551 code[1] |= 1 << 17;
552
553 emitNegAbs12(i);
554 if (i->op == OP_SUB) code[0] ^= 1 << 8;
555 }
556 if (i->ftz)
557 code[0] |= 1 << 5;
558 } else {
559 assert(!i->saturate && i->op != OP_SUB &&
560 !i->src(0).mod.abs() &&
561 !i->src(1).mod.neg() && !i->src(1).mod.abs());
562
563 emitForm_S(i, 0x49, true);
564
565 if (i->src(0).mod.neg())
566 code[0] |= 1 << 7;
567 }
568 }
569
570 void
571 CodeEmitterNVC0::emitUADD(const Instruction *i)
572 {
573 uint32_t addOp = 0;
574
575 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
576 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
577
578 if (i->src(0).mod.neg())
579 addOp |= 0x200;
580 if (i->src(1).mod.neg())
581 addOp |= 0x100;
582 if (i->op == OP_SUB) {
583 addOp ^= 0x100;
584 assert(addOp != 0x300); // would be add-plus-one
585 }
586
587 if (i->encSize == 8) {
588 if (isLIMM(i->src(1), TYPE_U32)) {
589 emitForm_A(i, HEX64(08000000, 00000002));
590 if (i->defExists(1))
591 code[1] |= 1 << 26; // write carry
592 } else {
593 emitForm_A(i, HEX64(48000000, 00000003));
594 if (i->defExists(1))
595 code[1] |= 1 << 16; // write carry
596 }
597 code[0] |= addOp;
598
599 if (i->saturate)
600 code[0] |= 1 << 5;
601 if (i->flagsSrc >= 0) // add carry
602 code[0] |= 1 << 6;
603 } else {
604 assert(!(addOp & 0x100));
605 emitForm_S(i, (addOp >> 3) |
606 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
607 }
608 }
609
610 // TODO: shl-add
611 void
612 CodeEmitterNVC0::emitIMAD(const Instruction *i)
613 {
614 assert(i->encSize == 8);
615 emitForm_A(i, HEX64(20000000, 00000003));
616
617 if (isSignedType(i->dType))
618 code[0] |= 1 << 7;
619 if (isSignedType(i->sType))
620 code[0] |= 1 << 5;
621
622 code[1] |= i->saturate << 24;
623
624 if (i->flagsDef >= 0) code[1] |= 1 << 16;
625 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
626
627 if (i->src(2).mod.neg()) code[0] |= 0x10;
628 if (i->src(1).mod.neg() ^
629 i->src(0).mod.neg()) code[0] |= 0x20;
630
631 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
632 code[0] |= 1 << 6;
633 }
634
635 void
636 CodeEmitterNVC0::emitMADSP(const Instruction *i)
637 {
638 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
639
640 emitForm_A(i, HEX64(00000000, 00000003));
641
642 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
643 code[1] |= 0x01800000;
644 } else {
645 code[0] |= (i->subOp & 0x00f) << 7;
646 code[0] |= (i->subOp & 0x0f0) << 1;
647 code[0] |= (i->subOp & 0x100) >> 3;
648 code[0] |= (i->subOp & 0x200) >> 2;
649 code[1] |= (i->subOp & 0xc00) << 13;
650 }
651
652 if (i->flagsDef >= 0)
653 code[1] |= 1 << 16;
654 }
655
656 void
657 CodeEmitterNVC0::emitISAD(const Instruction *i)
658 {
659 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
660 assert(i->encSize == 8);
661
662 emitForm_A(i, HEX64(38000000, 00000003));
663
664 if (i->dType == TYPE_S32)
665 code[0] |= 1 << 5;
666 }
667
668 void
669 CodeEmitterNVC0::emitNOT(Instruction *i)
670 {
671 assert(i->encSize == 8);
672 i->setSrc(1, i->src(0));
673 emitForm_A(i, HEX64(68000000, 000001c3));
674 }
675
676 void
677 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
678 {
679 if (i->def(0).getFile() == FILE_PREDICATE) {
680 code[0] = 0x00000004 | (subOp << 30);
681 code[1] = 0x0c000000;
682
683 emitPredicate(i);
684
685 defId(i->def(0), 17);
686 srcId(i->src(0), 20);
687 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
688 srcId(i->src(1), 26);
689 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
690
691 if (i->defExists(1)) {
692 defId(i->def(1), 14);
693 } else {
694 code[0] |= 7 << 14;
695 }
696 // (a OP b) OP c
697 if (i->predSrc != 2 && i->srcExists(2)) {
698 code[1] |= subOp << 21;
699 srcId(i->src(2), 17);
700 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
701 } else {
702 code[1] |= 0x000e0000;
703 }
704 } else
705 if (i->encSize == 8) {
706 if (isLIMM(i->src(1), TYPE_U32)) {
707 emitForm_A(i, HEX64(38000000, 00000002));
708
709 if (i->flagsDef >= 0)
710 code[1] |= 1 << 26;
711 } else {
712 emitForm_A(i, HEX64(68000000, 00000003));
713
714 if (i->flagsDef >= 0)
715 code[1] |= 1 << 16;
716 }
717 code[0] |= subOp << 6;
718
719 if (i->flagsSrc >= 0) // carry
720 code[0] |= 1 << 5;
721
722 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
723 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
724 } else {
725 emitForm_S(i, (subOp << 5) |
726 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
727 }
728 }
729
730 void
731 CodeEmitterNVC0::emitPOPC(const Instruction *i)
732 {
733 emitForm_A(i, HEX64(54000000, 00000004));
734
735 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
736 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
737 }
738
739 void
740 CodeEmitterNVC0::emitINSBF(const Instruction *i)
741 {
742 emitForm_A(i, HEX64(28000000, 30000000));
743 }
744
745 void
746 CodeEmitterNVC0::emitShift(const Instruction *i)
747 {
748 if (i->op == OP_SHR) {
749 emitForm_A(i, HEX64(58000000, 00000003)
750 | (isSignedType(i->dType) ? 0x20 : 0x00));
751 } else {
752 emitForm_A(i, HEX64(60000000, 00000003));
753 }
754
755 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
756 code[0] |= 1 << 9;
757 }
758
759 void
760 CodeEmitterNVC0::emitPreOp(const Instruction *i)
761 {
762 if (i->encSize == 8) {
763 emitForm_B(i, HEX64(60000000, 00000000));
764
765 if (i->op == OP_PREEX2)
766 code[0] |= 0x20;
767
768 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
769 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
770 } else {
771 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
772 }
773 }
774
775 void
776 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
777 {
778 if (i->encSize == 8) {
779 code[0] = 0x00000000 | (subOp << 26);
780 code[1] = 0xc8000000;
781
782 emitPredicate(i);
783
784 defId(i->def(0), 14);
785 srcId(i->src(0), 20);
786
787 assert(i->src(0).getFile() == FILE_GPR);
788
789 if (i->saturate) code[0] |= 1 << 5;
790
791 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
792 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
793 } else {
794 emitForm_S(i, 0x80000008 | (subOp << 26), true);
795
796 assert(!i->src(0).mod.neg());
797 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
798 }
799 }
800
801 void
802 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
803 {
804 uint64_t op;
805
806 assert(i->encSize == 8);
807
808 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
809
810 if (i->ftz)
811 op |= 1 << 5;
812 else
813 if (!isFloatType(i->dType))
814 op |= isSignedType(i->dType) ? 0x23 : 0x03;
815
816 emitForm_A(i, op);
817 emitNegAbs12(i);
818 }
819
820 void
821 CodeEmitterNVC0::roundMode_C(const Instruction *i)
822 {
823 switch (i->rnd) {
824 case ROUND_M: code[1] |= 1 << 17; break;
825 case ROUND_P: code[1] |= 2 << 17; break;
826 case ROUND_Z: code[1] |= 3 << 17; break;
827 case ROUND_NI: code[0] |= 1 << 7; break;
828 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
829 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
830 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
831 case ROUND_N: break;
832 default:
833 assert(!"invalid round mode");
834 break;
835 }
836 }
837
838 void
839 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
840 {
841 switch (i->rnd) {
842 case ROUND_M:
843 case ROUND_MI: code[0] |= 1 << 16; break;
844 case ROUND_P:
845 case ROUND_PI: code[0] |= 2 << 16; break;
846 case ROUND_Z:
847 case ROUND_ZI: code[0] |= 3 << 16; break;
848 default:
849 break;
850 }
851 }
852
853 void
854 CodeEmitterNVC0::emitCVT(Instruction *i)
855 {
856 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
857
858 switch (i->op) {
859 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
860 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
861 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
862 default:
863 break;
864 }
865
866 const bool sat = (i->op == OP_SAT) || i->saturate;
867 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
868 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
869
870 if (i->encSize == 8) {
871 emitForm_B(i, HEX64(10000000, 00000004));
872
873 roundMode_C(i);
874
875 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
876 code[0] |= util_logbase2(typeSizeof(i->dType)) << 20;
877 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
878
879 if (sat)
880 code[0] |= 0x20;
881 if (abs)
882 code[0] |= 1 << 6;
883 if (neg && i->op != OP_ABS)
884 code[0] |= 1 << 8;
885
886 if (i->ftz)
887 code[1] |= 1 << 23;
888
889 if (isSignedIntType(i->dType))
890 code[0] |= 0x080;
891 if (isSignedIntType(i->sType))
892 code[0] |= 0x200;
893
894 if (isFloatType(i->dType)) {
895 if (!isFloatType(i->sType))
896 code[1] |= 0x08000000;
897 } else {
898 if (isFloatType(i->sType))
899 code[1] |= 0x04000000;
900 else
901 code[1] |= 0x0c000000;
902 }
903 } else {
904 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
905 code[0] = 0x298;
906 } else
907 if (isFloatType(i->dType)) {
908 if (isFloatType(i->sType))
909 code[0] = 0x098;
910 else
911 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
912 } else {
913 assert(isFloatType(i->sType));
914
915 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
916 }
917
918 if (neg) code[0] |= 1 << 16;
919 if (sat) code[0] |= 1 << 18;
920 if (abs) code[0] |= 1 << 19;
921
922 roundMode_CS(i);
923 }
924 }
925
926 void
927 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
928 {
929 uint32_t hi;
930 uint32_t lo = 0;
931
932 if (i->sType == TYPE_F64)
933 lo = 0x1;
934 else
935 if (!isFloatType(i->sType))
936 lo = 0x3;
937
938 if (isFloatType(i->dType) || isSignedIntType(i->sType))
939 lo |= 0x20;
940
941 switch (i->op) {
942 case OP_SET_AND: hi = 0x10000000; break;
943 case OP_SET_OR: hi = 0x10200000; break;
944 case OP_SET_XOR: hi = 0x10400000; break;
945 default:
946 hi = 0x100e0000;
947 break;
948 }
949 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
950
951 if (i->op != OP_SET)
952 srcId(i->src(2), 32 + 17);
953
954 if (i->def(0).getFile() == FILE_PREDICATE) {
955 if (i->sType == TYPE_F32)
956 code[1] += 0x10000000;
957 else
958 code[1] += 0x08000000;
959
960 code[0] &= ~0xfc000;
961 defId(i->def(0), 17);
962 if (i->defExists(1))
963 defId(i->def(1), 14);
964 else
965 code[0] |= 0x1c000;
966 }
967
968 if (i->ftz)
969 code[1] |= 1 << 27;
970
971 emitCondCode(i->setCond, 32 + 23);
972 emitNegAbs12(i);
973 }
974
975 void
976 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
977 {
978 uint64_t op;
979
980 switch (i->dType) {
981 case TYPE_S32:
982 op = HEX64(30000000, 00000023);
983 break;
984 case TYPE_U32:
985 op = HEX64(30000000, 00000003);
986 break;
987 case TYPE_F32:
988 op = HEX64(38000000, 00000000);
989 break;
990 default:
991 assert(!"invalid type for SLCT");
992 op = 0;
993 break;
994 }
995 emitForm_A(i, op);
996
997 CondCode cc = i->setCond;
998
999 if (i->src(2).mod.neg())
1000 cc = reverseCondCode(cc);
1001
1002 emitCondCode(cc, 32 + 23);
1003
1004 if (i->ftz)
1005 code[0] |= 1 << 5;
1006 }
1007
1008 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1009 {
1010 emitForm_A(i, HEX64(20000000, 00000004));
1011
1012 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1013 code[1] |= 1 << 20;
1014 }
1015
1016 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1017 {
1018 code[0] = 0x00000006 | (i->subOp << 26);
1019 code[1] = 0xf0000000;
1020 emitPredicate(i);
1021 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1022 }
1023
1024 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1025 {
1026 code[0] = 0x00000086;
1027 code[1] = 0xd0000000;
1028
1029 code[1] |= i->tex.r;
1030 code[1] |= i->tex.s << 8;
1031
1032 if (i->tex.liveOnly)
1033 code[0] |= 1 << 9;
1034
1035 defId(i->def(0), 14);
1036 srcId(i->src(0), 20);
1037 }
1038
1039 static inline bool
1040 isNextIndependentTex(const TexInstruction *i)
1041 {
1042 if (!i->next || !isTextureOp(i->next->op))
1043 return false;
1044 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1045 return false;
1046 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1047 }
1048
1049 void
1050 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1051 {
1052 code[0] = 0x00000006;
1053
1054 if (isNextIndependentTex(i))
1055 code[0] |= 0x080; // t mode
1056 else
1057 code[0] |= 0x100; // p mode
1058
1059 if (i->tex.liveOnly)
1060 code[0] |= 1 << 9;
1061
1062 switch (i->op) {
1063 case OP_TEX: code[1] = 0x80000000; break;
1064 case OP_TXB: code[1] = 0x84000000; break;
1065 case OP_TXL: code[1] = 0x86000000; break;
1066 case OP_TXF: code[1] = 0x90000000; break;
1067 case OP_TXG: code[1] = 0xa0000000; break;
1068 case OP_TXD: code[1] = 0xe0000000; break;
1069 default:
1070 assert(!"invalid texture op");
1071 break;
1072 }
1073 if (i->op == OP_TXF) {
1074 if (!i->tex.levelZero)
1075 code[1] |= 0x02000000;
1076 } else
1077 if (i->tex.levelZero) {
1078 code[1] |= 0x02000000;
1079 }
1080
1081 if (i->op != OP_TXD && i->tex.derivAll)
1082 code[1] |= 1 << 13;
1083
1084 defId(i->def(0), 14);
1085 srcId(i->src(0), 20);
1086
1087 emitPredicate(i);
1088
1089 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1090
1091 code[1] |= i->tex.mask << 14;
1092
1093 code[1] |= i->tex.r;
1094 code[1] |= i->tex.s << 8;
1095 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1096 code[1] |= 1 << 18; // in 1st source (with array index)
1097
1098 // texture target:
1099 code[1] |= (i->tex.target.getDim() - 1) << 20;
1100 if (i->tex.target.isCube())
1101 code[1] += 2 << 20;
1102 if (i->tex.target.isArray())
1103 code[1] |= 1 << 19;
1104 if (i->tex.target.isShadow())
1105 code[1] |= 1 << 24;
1106
1107 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1108
1109 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1110 // lzero
1111 if (i->op == OP_TXL)
1112 code[1] &= ~(1 << 26);
1113 else
1114 if (i->op == OP_TXF)
1115 code[1] &= ~(1 << 25);
1116 }
1117 if (i->tex.target == TEX_TARGET_2D_MS ||
1118 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1119 code[1] |= 1 << 23;
1120
1121 if (i->tex.useOffsets) // in vecSrc0.w
1122 code[1] |= 1 << 22;
1123
1124 srcId(i, src1, 26);
1125 }
1126
1127 void
1128 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1129 {
1130 code[0] = 0x00000086;
1131 code[1] = 0xc0000000;
1132
1133 switch (i->tex.query) {
1134 case TXQ_DIMS: code[1] |= 0 << 22; break;
1135 case TXQ_TYPE: code[1] |= 1 << 22; break;
1136 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1137 case TXQ_FILTER: code[1] |= 3 << 22; break;
1138 case TXQ_LOD: code[1] |= 4 << 22; break;
1139 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1140 default:
1141 assert(!"invalid texture query");
1142 break;
1143 }
1144
1145 code[1] |= i->tex.mask << 14;
1146
1147 code[1] |= i->tex.r;
1148 code[1] |= i->tex.s << 8;
1149 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1150 code[1] |= 1 << 18;
1151
1152 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1153
1154 defId(i->def(0), 14);
1155 srcId(i->src(0), 20);
1156 srcId(i, src1, 26);
1157
1158 emitPredicate(i);
1159 }
1160
1161 void
1162 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1163 {
1164 code[0] = 0x00000000 | (laneMask << 6);
1165 code[1] = 0x48000000 | qOp;
1166
1167 defId(i->def(0), 14);
1168 srcId(i->src(0), 20);
1169 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1170
1171 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1172 code[0] |= 1 << 9; // dall
1173
1174 emitPredicate(i);
1175 }
1176
1177 void
1178 CodeEmitterNVC0::emitFlow(const Instruction *i)
1179 {
1180 const FlowInstruction *f = i->asFlow();
1181
1182 unsigned mask; // bit 0: predicate, bit 1: target
1183
1184 code[0] = 0x00000007;
1185
1186 switch (i->op) {
1187 case OP_BRA:
1188 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1189 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1190 code[0] |= 0x4000;
1191 mask = 3;
1192 break;
1193 case OP_CALL:
1194 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1195 if (f->indirect)
1196 code[0] |= 0x4000; // indirect calls always use c[] source
1197 mask = 2;
1198 break;
1199
1200 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1201 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1202 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1203 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1204 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1205
1206 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1207 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1208 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1209 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1210
1211 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1212 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1213 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1214 default:
1215 assert(!"invalid flow operation");
1216 return;
1217 }
1218
1219 if (mask & 1) {
1220 emitPredicate(i);
1221 if (i->flagsSrc < 0)
1222 code[0] |= 0x1e0;
1223 }
1224
1225 if (!f)
1226 return;
1227
1228 if (f->allWarp)
1229 code[0] |= 1 << 15;
1230 if (f->limit)
1231 code[0] |= 1 << 16;
1232
1233 if (f->indirect) {
1234 if (code[0] & 0x4000) {
1235 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1236 setAddress16(i->src(0));
1237 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1238 if (f->op == OP_BRA)
1239 srcId(f->src(0).getIndirect(0), 20);
1240 } else {
1241 srcId(f, 0, 20);
1242 }
1243 }
1244
1245 if (f->op == OP_CALL) {
1246 if (f->indirect) {
1247 // nothing
1248 } else
1249 if (f->builtin) {
1250 assert(f->absolute);
1251 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1252 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1253 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1254 } else {
1255 assert(!f->absolute);
1256 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1257 code[0] |= (pcRel & 0x3f) << 26;
1258 code[1] |= (pcRel >> 6) & 0x3ffff;
1259 }
1260 } else
1261 if (mask & 2) {
1262 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1263 // currently we don't want absolute branches
1264 assert(!f->absolute);
1265 code[0] |= (pcRel & 0x3f) << 26;
1266 code[1] |= (pcRel >> 6) & 0x3ffff;
1267 }
1268 }
1269
1270 void
1271 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1272 {
1273 uint32_t prim = i->src(0).get()->reg.data.u32;
1274
1275 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1276 code[1] = 0x00000000 | (prim >> 6);
1277
1278 emitPredicate(i);
1279
1280 defId(i->def(0), 14);
1281 srcId(i->src(1), 20);
1282 }
1283
1284 void
1285 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1286 {
1287 code[0] = 0x00000006;
1288 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1289
1290 if (i->perPatch)
1291 code[0] |= 0x100;
1292 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1293 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1294
1295 emitPredicate(i);
1296
1297 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1298
1299 defId(i->def(0), 14);
1300 srcId(i->src(0).getIndirect(0), 20);
1301 srcId(i->src(0).getIndirect(1), 26); // vertex address
1302 }
1303
1304 void
1305 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1306 {
1307 unsigned int size = typeSizeof(i->dType);
1308
1309 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1310 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1311
1312 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1313
1314 if (i->perPatch)
1315 code[0] |= 0x100;
1316
1317 emitPredicate(i);
1318
1319 assert(i->src(1).getFile() == FILE_GPR);
1320
1321 srcId(i->src(0).getIndirect(0), 20);
1322 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1323 srcId(i->src(1), 26);
1324 }
1325
1326 void
1327 CodeEmitterNVC0::emitOUT(const Instruction *i)
1328 {
1329 code[0] = 0x00000006;
1330 code[1] = 0x1c000000;
1331
1332 emitPredicate(i);
1333
1334 defId(i->def(0), 14); // new secret address
1335 srcId(i->src(0), 20); // old secret address, should be 0 initially
1336
1337 assert(i->src(0).getFile() == FILE_GPR);
1338
1339 if (i->op == OP_EMIT)
1340 code[0] |= 1 << 5;
1341 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1342 code[0] |= 1 << 6;
1343
1344 // vertex stream
1345 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1346 code[1] |= 0xc000;
1347 code[0] |= SDATA(i->src(1)).u32 << 26;
1348 } else {
1349 srcId(i->src(1), 26);
1350 }
1351 }
1352
1353 void
1354 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1355 {
1356 if (i->encSize == 8) {
1357 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1358 } else {
1359 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1360 code[0] |= 0x80;
1361 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1362 }
1363 }
1364
1365 void
1366 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1367 {
1368 const uint32_t base = i->getSrc(0)->reg.data.offset;
1369
1370 if (i->encSize == 8) {
1371 code[0] = 0x00000000;
1372 code[1] = 0xc0000000 | (base & 0xffff);
1373
1374 if (i->saturate)
1375 code[0] |= 1 << 5;
1376
1377 if (i->op == OP_PINTERP)
1378 srcId(i->src(1), 26);
1379 else
1380 code[0] |= 0x3f << 26;
1381
1382 srcId(i->src(0).getIndirect(0), 20);
1383 } else {
1384 assert(i->op == OP_PINTERP);
1385 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1386 srcId(i->src(1), 20);
1387 }
1388 emitInterpMode(i);
1389
1390 emitPredicate(i);
1391 defId(i->def(0), 14);
1392
1393 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1394 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
1395 else
1396 code[1] |= 0x3f << 17;
1397 }
1398
1399 void
1400 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1401 {
1402 uint8_t val;
1403
1404 switch (ty) {
1405 case TYPE_U8:
1406 val = 0x00;
1407 break;
1408 case TYPE_S8:
1409 val = 0x20;
1410 break;
1411 case TYPE_F16:
1412 case TYPE_U16:
1413 val = 0x40;
1414 break;
1415 case TYPE_S16:
1416 val = 0x60;
1417 break;
1418 case TYPE_F32:
1419 case TYPE_U32:
1420 case TYPE_S32:
1421 val = 0x80;
1422 break;
1423 case TYPE_F64:
1424 case TYPE_U64:
1425 case TYPE_S64:
1426 val = 0xa0;
1427 break;
1428 case TYPE_B128:
1429 val = 0xc0;
1430 break;
1431 default:
1432 val = 0x80;
1433 assert(!"invalid type");
1434 break;
1435 }
1436 code[0] |= val;
1437 }
1438
1439 void
1440 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1441 {
1442 uint32_t val;
1443
1444 switch (c) {
1445 case CACHE_CA:
1446 // case CACHE_WB:
1447 val = 0x000;
1448 break;
1449 case CACHE_CG:
1450 val = 0x100;
1451 break;
1452 case CACHE_CS:
1453 val = 0x200;
1454 break;
1455 case CACHE_CV:
1456 // case CACHE_WT:
1457 val = 0x300;
1458 break;
1459 default:
1460 val = 0;
1461 assert(!"invalid caching mode");
1462 break;
1463 }
1464 code[0] |= val;
1465 }
1466
1467 static inline bool
1468 uses64bitAddress(const Instruction *ldst)
1469 {
1470 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1471 ldst->src(0).isIndirect(0) &&
1472 ldst->getIndirect(0, 0)->reg.size == 8;
1473 }
1474
1475 void
1476 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1477 {
1478 uint32_t opc;
1479
1480 switch (i->src(0).getFile()) {
1481 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1482 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1483 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1484 default:
1485 assert(!"invalid memory file");
1486 opc = 0;
1487 break;
1488 }
1489 code[0] = 0x00000005;
1490 code[1] = opc;
1491
1492 setAddress16(i->src(0));
1493 srcId(i->src(1), 14);
1494 srcId(i->src(0).getIndirect(0), 20);
1495 if (uses64bitAddress(i))
1496 code[1] |= 1 << 26;
1497
1498 emitPredicate(i);
1499
1500 emitLoadStoreType(i->dType);
1501 emitCachingMode(i->cache);
1502 }
1503
1504 void
1505 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1506 {
1507 uint32_t opc;
1508
1509 code[0] = 0x00000005;
1510
1511 switch (i->src(0).getFile()) {
1512 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1513 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1514 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1515 case FILE_MEMORY_CONST:
1516 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1517 emitMOV(i); // not sure if this is any better
1518 return;
1519 }
1520 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1521 code[0] = 0x00000006 | (i->subOp << 8);
1522 break;
1523 default:
1524 assert(!"invalid memory file");
1525 opc = 0;
1526 break;
1527 }
1528 code[1] = opc;
1529
1530 defId(i->def(0), 14);
1531
1532 setAddress16(i->src(0));
1533 srcId(i->src(0).getIndirect(0), 20);
1534 if (uses64bitAddress(i))
1535 code[1] |= 1 << 26;
1536
1537 emitPredicate(i);
1538
1539 emitLoadStoreType(i->dType);
1540 emitCachingMode(i->cache);
1541 }
1542
1543 uint8_t
1544 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1545 {
1546 switch (SDATA(ref).sv.sv) {
1547 case SV_LANEID: return 0x00;
1548 case SV_PHYSID: return 0x03;
1549 case SV_VERTEX_COUNT: return 0x10;
1550 case SV_INVOCATION_ID: return 0x11;
1551 case SV_YDIR: return 0x12;
1552 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1553 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1554 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1555 case SV_GRIDID: return 0x2c;
1556 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1557 case SV_LBASE: return 0x34;
1558 case SV_SBASE: return 0x30;
1559 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1560 default:
1561 assert(!"no sreg for system value");
1562 return 0;
1563 }
1564 }
1565
1566 void
1567 CodeEmitterNVC0::emitMOV(const Instruction *i)
1568 {
1569 if (i->def(0).getFile() == FILE_PREDICATE) {
1570 if (i->src(0).getFile() == FILE_GPR) {
1571 code[0] = 0xfc01c003;
1572 code[1] = 0x1a8e0000;
1573 srcId(i->src(0), 20);
1574 } else {
1575 code[0] = 0x0001c004;
1576 code[1] = 0x0c0e0000;
1577 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1578 code[0] |= 7 << 20;
1579 if (!i->getSrc(0)->reg.data.u32)
1580 code[0] |= 1 << 23;
1581 } else {
1582 srcId(i->src(0), 20);
1583 }
1584 }
1585 defId(i->def(0), 17);
1586 emitPredicate(i);
1587 } else
1588 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1589 uint8_t sr = getSRegEncoding(i->src(0));
1590
1591 if (i->encSize == 8) {
1592 code[0] = 0x00000004 | (sr << 26);
1593 code[1] = 0x2c000000;
1594 } else {
1595 code[0] = 0x40000008 | (sr << 20);
1596 }
1597 defId(i->def(0), 14);
1598
1599 emitPredicate(i);
1600 } else
1601 if (i->encSize == 8) {
1602 uint64_t opc;
1603
1604 if (i->src(0).getFile() == FILE_IMMEDIATE)
1605 opc = HEX64(18000000, 000001e2);
1606 else
1607 if (i->src(0).getFile() == FILE_PREDICATE)
1608 opc = HEX64(080e0000, 1c000004);
1609 else
1610 opc = HEX64(28000000, 00000004);
1611
1612 opc |= i->lanes << 5;
1613
1614 emitForm_B(i, opc);
1615 } else {
1616 uint32_t imm;
1617
1618 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1619 imm = SDATA(i->src(0)).u32;
1620 if (imm & 0xfff00000) {
1621 assert(!(imm & 0x000fffff));
1622 code[0] = 0x00000318 | imm;
1623 } else {
1624 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1625 code[0] = 0x00000118 | (imm << 20);
1626 }
1627 } else {
1628 code[0] = 0x0028;
1629 emitShortSrc2(i->src(0));
1630 }
1631 defId(i->def(0), 14);
1632
1633 emitPredicate(i);
1634 }
1635 }
1636
1637 void
1638 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
1639 {
1640 uint8_t m;
1641 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1642 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1643 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1644 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1645 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1646 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1647 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1648 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1649 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1650 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1651 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1652 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1653 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1654 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1655 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1656 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1657 default:
1658 return;
1659 }
1660 code[0] |= m << 5;
1661 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1662 code[1] |= 1 << 16;
1663 }
1664
1665 void
1666 CodeEmitterNVC0::emitSUCalc(Instruction *i)
1667 {
1668 ImmediateValue *imm = NULL;
1669 uint64_t opc;
1670
1671 if (i->srcExists(2)) {
1672 imm = i->getSrc(2)->asImm();
1673 if (imm)
1674 i->setSrc(2, NULL); // special case, make emitForm_A not assert
1675 }
1676
1677 switch (i->op) {
1678 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
1679 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
1680 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
1681 default:
1682 assert(0);
1683 return;
1684 }
1685 emitForm_A(i, opc);
1686
1687 if (i->op == OP_SUCLAMP) {
1688 if (i->dType == TYPE_S32)
1689 code[0] |= 1 << 9;
1690 emitSUCLAMPMode(i->subOp);
1691 }
1692
1693 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1694 code[1] |= 1 << 16;
1695
1696 if (i->op != OP_SUEAU) {
1697 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1698 code[0] |= 63 << 14;
1699 code[1] |= i->getDef(0)->reg.data.id << 23;
1700 } else
1701 if (i->defExists(1)) { // r, p
1702 assert(i->def(1).getFile() == FILE_PREDICATE);
1703 code[1] |= i->getDef(1)->reg.data.id << 23;
1704 } else { // r, #
1705 code[1] |= 7 << 23;
1706 }
1707 }
1708 if (imm) {
1709 assert(i->op == OP_SUCLAMP);
1710 i->setSrc(2, imm);
1711 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
1712 }
1713 }
1714
1715 void
1716 CodeEmitterNVC0::emitSUGType(DataType ty)
1717 {
1718 switch (ty) {
1719 case TYPE_S32: code[1] |= 1 << 13; break;
1720 case TYPE_U8: code[1] |= 2 << 13; break;
1721 case TYPE_S8: code[1] |= 3 << 13; break;
1722 default:
1723 assert(ty == TYPE_U32);
1724 break;
1725 }
1726 }
1727
1728 void
1729 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
1730 {
1731 const uint32_t offset = i->getSrc(s)->reg.data.offset;
1732
1733 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
1734 assert(offset == (offset & 0xfffc));
1735
1736 code[1] |= 1 << 21;
1737 code[0] |= offset << 24;
1738 code[1] |= offset >> 8;
1739 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
1740 }
1741
1742 void
1743 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
1744 {
1745 if (!i->srcExists(s) || (i->predSrc == s)) {
1746 code[1] |= 0x7 << 17;
1747 } else {
1748 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
1749 code[1] |= 1 << 20;
1750 srcId(i->src(s), 32 + 17);
1751 }
1752 }
1753
1754 void
1755 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
1756 {
1757 code[0] = 0x5;
1758 code[1] = 0xd4000000 | (i->subOp << 15);
1759
1760 emitLoadStoreType(i->dType);
1761 emitSUGType(i->sType);
1762 emitCachingMode(i->cache);
1763
1764 emitPredicate(i);
1765 defId(i->def(0), 14); // destination
1766 srcId(i->src(0), 20); // address
1767 // format
1768 if (i->src(1).getFile() == FILE_GPR)
1769 srcId(i->src(1), 26);
1770 else
1771 setSUConst16(i, 1);
1772 setSUPred(i, 2);
1773 }
1774
1775 void
1776 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
1777 {
1778 code[0] = 0x5;
1779 code[1] = 0xdc000000 | (i->subOp << 15);
1780
1781 if (i->op == OP_SUSTP)
1782 code[1] |= i->tex.mask << 22;
1783 else
1784 emitLoadStoreType(i->dType);
1785 emitSUGType(i->sType);
1786 emitCachingMode(i->cache);
1787
1788 emitPredicate(i);
1789 srcId(i->src(0), 20); // address
1790 // format
1791 if (i->src(1).getFile() == FILE_GPR)
1792 srcId(i->src(1), 26);
1793 else
1794 setSUConst16(i, 1);
1795 srcId(i->src(3), 14); // values
1796 setSUPred(i, 2);
1797 }
1798
1799 void
1800 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
1801 {
1802 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1803 case 0:
1804 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
1805 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
1806 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
1807 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
1808 break;
1809 case 1:
1810 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
1811 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
1812 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
1813 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
1814 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
1815 code[1] |= (i->mask & 0x3) << 2;
1816 break;
1817 case 2:
1818 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
1819 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
1820 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
1821 code[1] |= (i->mask & 0x3) << 2;
1822 code[1] |= (i->mask & 0xc) << 21;
1823 break;
1824 default:
1825 assert(0);
1826 break;
1827 }
1828 }
1829
1830 void
1831 CodeEmitterNVC0::emitVSHL(const Instruction *i)
1832 {
1833 uint64_t opc = 0x4;
1834
1835 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1836 case 0: opc |= 0xe8ULL << 56; break;
1837 case 1: opc |= 0xb4ULL << 56; break;
1838 case 2: opc |= 0x94ULL << 56; break;
1839 default:
1840 assert(0);
1841 break;
1842 }
1843 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
1844 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
1845 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
1846 } else {
1847 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
1848 if (isSignedType(i->sType)) opc |= 1 << 6;
1849 }
1850 emitForm_A(i, opc);
1851 emitVectorSubOp(i);
1852
1853 if (i->saturate)
1854 code[0] |= 1 << 9;
1855 if (i->flagsDef >= 0)
1856 code[1] |= 1 << 16;
1857 }
1858
1859 bool
1860 CodeEmitterNVC0::emitInstruction(Instruction *insn)
1861 {
1862 unsigned int size = insn->encSize;
1863
1864 if (writeIssueDelays && !(codeSize & 0x3f))
1865 size += 8;
1866
1867 if (!insn->encSize) {
1868 ERROR("skipping unencodable instruction: "); insn->print();
1869 return false;
1870 } else
1871 if (codeSize + size > codeSizeLimit) {
1872 ERROR("code emitter output buffer too small\n");
1873 return false;
1874 }
1875
1876 if (writeIssueDelays) {
1877 if (!(codeSize & 0x3f)) {
1878 code[0] = 0x00000007; // cf issue delay "instruction"
1879 code[1] = 0x20000000;
1880 code += 2;
1881 codeSize += 8;
1882 }
1883 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
1884 uint32_t *data = code - (id * 2 + 2);
1885 if (id <= 2) {
1886 data[0] |= insn->sched << (id * 8 + 4);
1887 } else
1888 if (id == 3) {
1889 data[0] |= insn->sched << 28;
1890 data[1] |= insn->sched >> 4;
1891 } else {
1892 data[1] |= insn->sched << ((id - 4) * 8 + 4);
1893 }
1894 }
1895
1896 // assert that instructions with multiple defs don't corrupt registers
1897 for (int d = 0; insn->defExists(d); ++d)
1898 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
1899
1900 switch (insn->op) {
1901 case OP_MOV:
1902 case OP_RDSV:
1903 emitMOV(insn);
1904 break;
1905 case OP_NOP:
1906 break;
1907 case OP_LOAD:
1908 emitLOAD(insn);
1909 break;
1910 case OP_STORE:
1911 emitSTORE(insn);
1912 break;
1913 case OP_LINTERP:
1914 case OP_PINTERP:
1915 emitINTERP(insn);
1916 break;
1917 case OP_VFETCH:
1918 emitVFETCH(insn);
1919 break;
1920 case OP_EXPORT:
1921 emitEXPORT(insn);
1922 break;
1923 case OP_PFETCH:
1924 emitPFETCH(insn);
1925 break;
1926 case OP_EMIT:
1927 case OP_RESTART:
1928 emitOUT(insn);
1929 break;
1930 case OP_ADD:
1931 case OP_SUB:
1932 if (isFloatType(insn->dType))
1933 emitFADD(insn);
1934 else
1935 emitUADD(insn);
1936 break;
1937 case OP_MUL:
1938 if (isFloatType(insn->dType))
1939 emitFMUL(insn);
1940 else
1941 emitUMUL(insn);
1942 break;
1943 case OP_MAD:
1944 case OP_FMA:
1945 if (isFloatType(insn->dType))
1946 emitFMAD(insn);
1947 else
1948 emitIMAD(insn);
1949 break;
1950 case OP_SAD:
1951 emitISAD(insn);
1952 break;
1953 case OP_NOT:
1954 emitNOT(insn);
1955 break;
1956 case OP_AND:
1957 emitLogicOp(insn, 0);
1958 break;
1959 case OP_OR:
1960 emitLogicOp(insn, 1);
1961 break;
1962 case OP_XOR:
1963 emitLogicOp(insn, 2);
1964 break;
1965 case OP_SHL:
1966 case OP_SHR:
1967 emitShift(insn);
1968 break;
1969 case OP_SET:
1970 case OP_SET_AND:
1971 case OP_SET_OR:
1972 case OP_SET_XOR:
1973 emitSET(insn->asCmp());
1974 break;
1975 case OP_SELP:
1976 emitSELP(insn);
1977 break;
1978 case OP_SLCT:
1979 emitSLCT(insn->asCmp());
1980 break;
1981 case OP_MIN:
1982 case OP_MAX:
1983 emitMINMAX(insn);
1984 break;
1985 case OP_ABS:
1986 case OP_NEG:
1987 case OP_CEIL:
1988 case OP_FLOOR:
1989 case OP_TRUNC:
1990 case OP_CVT:
1991 case OP_SAT:
1992 emitCVT(insn);
1993 break;
1994 case OP_RSQ:
1995 emitSFnOp(insn, 5);
1996 break;
1997 case OP_RCP:
1998 emitSFnOp(insn, 4);
1999 break;
2000 case OP_LG2:
2001 emitSFnOp(insn, 3);
2002 break;
2003 case OP_EX2:
2004 emitSFnOp(insn, 2);
2005 break;
2006 case OP_SIN:
2007 emitSFnOp(insn, 1);
2008 break;
2009 case OP_COS:
2010 emitSFnOp(insn, 0);
2011 break;
2012 case OP_PRESIN:
2013 case OP_PREEX2:
2014 emitPreOp(insn);
2015 break;
2016 case OP_TEX:
2017 case OP_TXB:
2018 case OP_TXL:
2019 case OP_TXD:
2020 case OP_TXF:
2021 emitTEX(insn->asTex());
2022 break;
2023 case OP_TXQ:
2024 emitTXQ(insn->asTex());
2025 break;
2026 case OP_TEXBAR:
2027 emitTEXBAR(insn);
2028 break;
2029 case OP_SUBFM:
2030 case OP_SUCLAMP:
2031 case OP_SUEAU:
2032 emitSUCalc(insn);
2033 break;
2034 case OP_MADSP:
2035 emitMADSP(insn);
2036 break;
2037 case OP_SULDB:
2038 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2039 emitSULDGB(insn->asTex());
2040 else
2041 ERROR("SULDB not yet supported on < nve4\n");
2042 break;
2043 case OP_SUSTB:
2044 case OP_SUSTP:
2045 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2046 emitSUSTGx(insn->asTex());
2047 else
2048 ERROR("SUSTx not yet supported on < nve4\n");
2049 break;
2050 case OP_BRA:
2051 case OP_CALL:
2052 case OP_PRERET:
2053 case OP_RET:
2054 case OP_DISCARD:
2055 case OP_EXIT:
2056 case OP_PRECONT:
2057 case OP_CONT:
2058 case OP_PREBREAK:
2059 case OP_BREAK:
2060 case OP_JOINAT:
2061 case OP_BRKPT:
2062 case OP_QUADON:
2063 case OP_QUADPOP:
2064 emitFlow(insn);
2065 break;
2066 case OP_QUADOP:
2067 emitQUADOP(insn, insn->subOp, insn->lanes);
2068 break;
2069 case OP_DFDX:
2070 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2071 break;
2072 case OP_DFDY:
2073 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2074 break;
2075 case OP_POPCNT:
2076 emitPOPC(insn);
2077 break;
2078 case OP_JOIN:
2079 emitNOP(insn);
2080 insn->join = 1;
2081 break;
2082 case OP_VSHL:
2083 emitVSHL(insn);
2084 break;
2085 case OP_PHI:
2086 case OP_UNION:
2087 case OP_CONSTRAINT:
2088 ERROR("operation should have been eliminated");
2089 return false;
2090 case OP_EXP:
2091 case OP_LOG:
2092 case OP_SQRT:
2093 case OP_POW:
2094 ERROR("operation should have been lowered\n");
2095 return false;
2096 default:
2097 ERROR("unknow op\n");
2098 return false;
2099 }
2100
2101 if (insn->join) {
2102 code[0] |= 0x10;
2103 assert(insn->encSize == 8);
2104 }
2105
2106 code += insn->encSize / 4;
2107 codeSize += insn->encSize;
2108 return true;
2109 }
2110
2111 uint32_t
2112 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2113 {
2114 const Target::OpInfo &info = targ->getOpInfo(i);
2115
2116 if (writeIssueDelays || info.minEncSize == 8 || 1)
2117 return 8;
2118
2119 if (i->ftz || i->saturate || i->join)
2120 return 8;
2121 if (i->rnd != ROUND_N)
2122 return 8;
2123 if (i->predSrc >= 0 && i->op == OP_MAD)
2124 return 8;
2125
2126 if (i->op == OP_PINTERP) {
2127 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2128 return 8;
2129 } else
2130 if (i->op == OP_MOV && i->lanes != 0xf) {
2131 return 8;
2132 }
2133
2134 for (int s = 0; i->srcExists(s); ++s) {
2135 if (i->src(s).isIndirect(0))
2136 return 8;
2137
2138 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2139 if (SDATA(i->src(s)).offset >= 0x100)
2140 return 8;
2141 if (i->getSrc(s)->reg.fileIndex > 1 &&
2142 i->getSrc(s)->reg.fileIndex != 16)
2143 return 8;
2144 } else
2145 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2146 if (i->dType == TYPE_F32) {
2147 if (SDATA(i->src(s)).u32 >= 0x100)
2148 return 8;
2149 } else {
2150 if (SDATA(i->src(s)).u32 > 0xff)
2151 return 8;
2152 }
2153 }
2154
2155 if (i->op == OP_CVT)
2156 continue;
2157 if (i->src(s).mod != Modifier(0)) {
2158 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2159 if (i->op != OP_RSQ)
2160 return 8;
2161 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2162 if (i->op != OP_ADD || s != 0)
2163 return 8;
2164 }
2165 }
2166
2167 return 4;
2168 }
2169
2170 // Simplified, erring on safe side.
2171 class SchedDataCalculator : public Pass
2172 {
2173 public:
2174 SchedDataCalculator(const Target *targ) : targ(targ) { }
2175
2176 private:
2177 struct RegScores
2178 {
2179 struct Resource {
2180 int st[DATA_FILE_COUNT]; // LD to LD delay 3
2181 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2182 int tex; // TEX to non-TEX delay 17 (0x11)
2183 int sfu; // SFU to SFU delay 3 (except PRE-ops)
2184 int imul; // integer MUL to MUL delay 3
2185 } res;
2186 struct ScoreData {
2187 int r[64];
2188 int p[8];
2189 int c;
2190 } rd, wr;
2191 int base;
2192
2193 void rebase(const int base)
2194 {
2195 const int delta = this->base - base;
2196 if (!delta)
2197 return;
2198 this->base = 0;
2199
2200 for (int i = 0; i < 64; ++i) {
2201 rd.r[i] += delta;
2202 wr.r[i] += delta;
2203 }
2204 for (int i = 0; i < 8; ++i) {
2205 rd.p[i] += delta;
2206 wr.p[i] += delta;
2207 }
2208 rd.c += delta;
2209 wr.c += delta;
2210
2211 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2212 res.ld[f] += delta;
2213 res.st[f] += delta;
2214 }
2215 res.sfu += delta;
2216 res.imul += delta;
2217 res.tex += delta;
2218 }
2219 void wipe()
2220 {
2221 memset(&rd, 0, sizeof(rd));
2222 memset(&wr, 0, sizeof(wr));
2223 memset(&res, 0, sizeof(res));
2224 }
2225 int getLatest(const ScoreData& d) const
2226 {
2227 int max = 0;
2228 for (int i = 0; i < 64; ++i)
2229 if (d.r[i] > max)
2230 max = d.r[i];
2231 for (int i = 0; i < 8; ++i)
2232 if (d.p[i] > max)
2233 max = d.p[i];
2234 if (d.c > max)
2235 max = d.c;
2236 return max;
2237 }
2238 inline int getLatestRd() const
2239 {
2240 return getLatest(rd);
2241 }
2242 inline int getLatestWr() const
2243 {
2244 return getLatest(wr);
2245 }
2246 inline int getLatest() const
2247 {
2248 const int a = getLatestRd();
2249 const int b = getLatestWr();
2250
2251 int max = MAX2(a, b);
2252 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2253 max = MAX2(res.ld[f], max);
2254 max = MAX2(res.st[f], max);
2255 }
2256 max = MAX2(res.sfu, max);
2257 max = MAX2(res.imul, max);
2258 max = MAX2(res.tex, max);
2259 return max;
2260 }
2261 void setMax(const RegScores *that)
2262 {
2263 for (int i = 0; i < 64; ++i) {
2264 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2265 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2266 }
2267 for (int i = 0; i < 8; ++i) {
2268 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2269 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2270 }
2271 rd.c = MAX2(rd.c, that->rd.c);
2272 wr.c = MAX2(wr.c, that->wr.c);
2273
2274 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2275 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2276 res.st[f] = MAX2(res.st[f], that->res.st[f]);
2277 }
2278 res.sfu = MAX2(res.sfu, that->res.sfu);
2279 res.imul = MAX2(res.imul, that->res.imul);
2280 res.tex = MAX2(res.tex, that->res.tex);
2281 }
2282 void print(int cycle)
2283 {
2284 for (int i = 0; i < 64; ++i) {
2285 if (rd.r[i] > cycle)
2286 INFO("rd $r%i @ %i\n", i, rd.r[i]);
2287 if (wr.r[i] > cycle)
2288 INFO("wr $r%i @ %i\n", i, wr.r[i]);
2289 }
2290 for (int i = 0; i < 8; ++i) {
2291 if (rd.p[i] > cycle)
2292 INFO("rd $p%i @ %i\n", i, rd.p[i]);
2293 if (wr.p[i] > cycle)
2294 INFO("wr $p%i @ %i\n", i, wr.p[i]);
2295 }
2296 if (rd.c > cycle)
2297 INFO("rd $c @ %i\n", rd.c);
2298 if (wr.c > cycle)
2299 INFO("wr $c @ %i\n", wr.c);
2300 if (res.sfu > cycle)
2301 INFO("sfu @ %i\n", res.sfu);
2302 if (res.imul > cycle)
2303 INFO("imul @ %i\n", res.imul);
2304 if (res.tex > cycle)
2305 INFO("tex @ %i\n", res.tex);
2306 }
2307 };
2308
2309 RegScores *score; // for current BB
2310 std::vector<RegScores> scoreBoards;
2311 int cycle;
2312 int prevData;
2313 operation prevOp;
2314
2315 const Target *targ;
2316
2317 bool visit(Function *);
2318 bool visit(BasicBlock *);
2319
2320 void commitInsn(const Instruction *, int cycle);
2321 int calcDelay(const Instruction *, int cycle) const;
2322 void setDelay(Instruction *, int delay, Instruction *next);
2323
2324 void recordRd(const Value *, const int ready);
2325 void recordWr(const Value *, const int ready);
2326 void checkRd(const Value *, int cycle, int& delay) const;
2327 void checkWr(const Value *, int cycle, int& delay) const;
2328
2329 int getCycles(const Instruction *, int origDelay) const;
2330 };
2331
2332 void
2333 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2334 {
2335 if (insn->op == OP_EXIT)
2336 delay = MAX2(delay, 14);
2337
2338 if (insn->op == OP_TEXBAR) {
2339 // TODO: except if results not used before EXIT
2340 insn->sched = 0xc2;
2341 } else
2342 if (insn->op == OP_JOIN || insn->join) {
2343 insn->sched = 0x00;
2344 } else
2345 if (delay >= 0 || prevData == 0x04 ||
2346 !next || !targ->canDualIssue(insn, next)) {
2347 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
2348 if (prevOp == OP_EXPORT)
2349 insn->sched |= 0x40;
2350 else
2351 insn->sched |= 0x20;
2352 } else {
2353 insn->sched = 0x04; // dual-issue
2354 }
2355
2356 if (prevData != 0x04 || prevOp != OP_EXPORT)
2357 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2358 prevOp = insn->op;
2359
2360 prevData = insn->sched;
2361 }
2362
2363 int
2364 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2365 {
2366 if (insn->sched & 0x80) {
2367 int c = (insn->sched & 0x0f) * 2 + 1;
2368 if (insn->op == OP_TEXBAR && origDelay > 0)
2369 c += origDelay;
2370 return c;
2371 }
2372 if (insn->sched & 0x60)
2373 return (insn->sched & 0x1f) + 1;
2374 return (insn->sched == 0x04) ? 0 : 32;
2375 }
2376
2377 bool
2378 SchedDataCalculator::visit(Function *func)
2379 {
2380 scoreBoards.resize(func->cfg.getSize());
2381 for (size_t i = 0; i < scoreBoards.size(); ++i)
2382 scoreBoards[i].wipe();
2383 return true;
2384 }
2385
2386 bool
2387 SchedDataCalculator::visit(BasicBlock *bb)
2388 {
2389 Instruction *insn;
2390 Instruction *next = NULL;
2391
2392 int cycle = 0;
2393
2394 prevData = 0x00;
2395 prevOp = OP_NOP;
2396 score = &scoreBoards.at(bb->getId());
2397
2398 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2399 BasicBlock *in = BasicBlock::get(ei.getNode());
2400 if (in->getExit()) {
2401 if (prevData != 0x04)
2402 prevData = in->getExit()->sched;
2403 prevOp = in->getExit()->op;
2404 }
2405 if (ei.getType() != Graph::Edge::BACK)
2406 score->setMax(&scoreBoards.at(in->getId()));
2407 // back branches will wait until all target dependencies are satisfied
2408 }
2409 if (bb->cfg.incidentCount() > 1)
2410 prevOp = OP_NOP;
2411
2412 #ifdef NVC0_DEBUG_SCHED_DATA
2413 INFO("=== BB:%i initial scores\n", bb->getId());
2414 score->print(cycle);
2415 #endif
2416
2417 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2418 next = insn->next;
2419
2420 commitInsn(insn, cycle);
2421 int delay = calcDelay(next, cycle);
2422 setDelay(insn, delay, next);
2423 cycle += getCycles(insn, delay);
2424
2425 #ifdef NVC0_DEBUG_SCHED_DATA
2426 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2427 insn->print();
2428 next->print();
2429 #endif
2430 }
2431 if (!insn)
2432 return true;
2433 commitInsn(insn, cycle);
2434
2435 int bbDelay = -1;
2436
2437 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2438 BasicBlock *out = BasicBlock::get(ei.getNode());
2439
2440 if (ei.getType() != Graph::Edge::BACK) {
2441 // only test the first instruction of the outgoing block
2442 next = out->getEntry();
2443 if (next)
2444 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2445 } else {
2446 // wait until all dependencies are satisfied
2447 const int regsFree = score->getLatest();
2448 next = out->getFirst();
2449 for (int c = cycle; next && c < regsFree; next = next->next) {
2450 bbDelay = MAX2(bbDelay, calcDelay(next, c));
2451 c += getCycles(next, bbDelay);
2452 }
2453 next = NULL;
2454 }
2455 }
2456 if (bb->cfg.outgoingCount() != 1)
2457 next = NULL;
2458 setDelay(insn, bbDelay, next);
2459 cycle += getCycles(insn, bbDelay);
2460
2461 score->rebase(cycle); // common base for initializing out blocks' scores
2462 return true;
2463 }
2464
2465 #define NVE4_MAX_ISSUE_DELAY 0x1f
2466 int
2467 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2468 {
2469 int delay = 0, ready = cycle;
2470
2471 for (int s = 0; insn->srcExists(s); ++s)
2472 checkRd(insn->getSrc(s), cycle, delay);
2473 // WAR & WAW don't seem to matter
2474 // for (int s = 0; insn->srcExists(s); ++s)
2475 // recordRd(insn->getSrc(s), cycle);
2476
2477 switch (Target::getOpClass(insn->op)) {
2478 case OPCLASS_SFU:
2479 ready = score->res.sfu;
2480 break;
2481 case OPCLASS_ARITH:
2482 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2483 ready = score->res.imul;
2484 break;
2485 case OPCLASS_TEXTURE:
2486 ready = score->res.tex;
2487 break;
2488 case OPCLASS_LOAD:
2489 ready = score->res.ld[insn->src(0).getFile()];
2490 break;
2491 case OPCLASS_STORE:
2492 ready = score->res.st[insn->src(0).getFile()];
2493 break;
2494 default:
2495 break;
2496 }
2497 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2498 ready = MAX2(ready, score->res.tex);
2499
2500 delay = MAX2(delay, ready - cycle);
2501
2502 // if can issue next cycle, delay is 0, not 1
2503 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2504 }
2505
2506 void
2507 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2508 {
2509 const int ready = cycle + targ->getLatency(insn);
2510
2511 for (int d = 0; insn->defExists(d); ++d)
2512 recordWr(insn->getDef(d), ready);
2513 // WAR & WAW don't seem to matter
2514 // for (int s = 0; insn->srcExists(s); ++s)
2515 // recordRd(insn->getSrc(s), cycle);
2516
2517 switch (Target::getOpClass(insn->op)) {
2518 case OPCLASS_SFU:
2519 score->res.sfu = cycle + 4;
2520 break;
2521 case OPCLASS_ARITH:
2522 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2523 score->res.imul = cycle + 4;
2524 break;
2525 case OPCLASS_TEXTURE:
2526 score->res.tex = cycle + 18;
2527 break;
2528 case OPCLASS_LOAD:
2529 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2530 break;
2531 score->res.ld[insn->src(0).getFile()] = cycle + 4;
2532 score->res.st[insn->src(0).getFile()] = ready;
2533 break;
2534 case OPCLASS_STORE:
2535 score->res.st[insn->src(0).getFile()] = cycle + 4;
2536 score->res.ld[insn->src(0).getFile()] = ready;
2537 break;
2538 case OPCLASS_OTHER:
2539 if (insn->op == OP_TEXBAR)
2540 score->res.tex = cycle;
2541 break;
2542 default:
2543 break;
2544 }
2545
2546 #ifdef NVC0_DEBUG_SCHED_DATA
2547 score->print(cycle);
2548 #endif
2549 }
2550
2551 void
2552 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2553 {
2554 int ready = cycle;
2555 int a, b;
2556
2557 switch (v->reg.file) {
2558 case FILE_GPR:
2559 a = v->reg.data.id;
2560 b = a + v->reg.size / 4;
2561 for (int r = a; r < b; ++r)
2562 ready = MAX2(ready, score->rd.r[r]);
2563 break;
2564 case FILE_PREDICATE:
2565 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2566 break;
2567 case FILE_FLAGS:
2568 ready = MAX2(ready, score->rd.c);
2569 break;
2570 case FILE_SHADER_INPUT:
2571 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2572 case FILE_MEMORY_LOCAL:
2573 case FILE_MEMORY_CONST:
2574 case FILE_MEMORY_SHARED:
2575 case FILE_MEMORY_GLOBAL:
2576 case FILE_SYSTEM_VALUE:
2577 // TODO: any restrictions here ?
2578 break;
2579 case FILE_IMMEDIATE:
2580 break;
2581 default:
2582 assert(0);
2583 break;
2584 }
2585 if (cycle < ready)
2586 delay = MAX2(delay, ready - cycle);
2587 }
2588
2589 void
2590 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2591 {
2592 int ready = cycle;
2593 int a, b;
2594
2595 switch (v->reg.file) {
2596 case FILE_GPR:
2597 a = v->reg.data.id;
2598 b = a + v->reg.size / 4;
2599 for (int r = a; r < b; ++r)
2600 ready = MAX2(ready, score->wr.r[r]);
2601 break;
2602 case FILE_PREDICATE:
2603 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
2604 break;
2605 default:
2606 assert(v->reg.file == FILE_FLAGS);
2607 ready = MAX2(ready, score->wr.c);
2608 break;
2609 }
2610 if (cycle < ready)
2611 delay = MAX2(delay, ready - cycle);
2612 }
2613
2614 void
2615 SchedDataCalculator::recordWr(const Value *v, const int ready)
2616 {
2617 int a = v->reg.data.id;
2618
2619 if (v->reg.file == FILE_GPR) {
2620 int b = a + v->reg.size / 4;
2621 for (int r = a; r < b; ++r)
2622 score->rd.r[r] = ready;
2623 } else
2624 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
2625 if (v->reg.file == FILE_PREDICATE) {
2626 score->rd.p[a] = ready + 4;
2627 } else {
2628 assert(v->reg.file == FILE_FLAGS);
2629 score->rd.c = ready + 4;
2630 }
2631 }
2632
2633 void
2634 SchedDataCalculator::recordRd(const Value *v, const int ready)
2635 {
2636 int a = v->reg.data.id;
2637
2638 if (v->reg.file == FILE_GPR) {
2639 int b = a + v->reg.size / 4;
2640 for (int r = a; r < b; ++r)
2641 score->wr.r[r] = ready;
2642 } else
2643 if (v->reg.file == FILE_PREDICATE) {
2644 score->wr.p[a] = ready;
2645 } else
2646 if (v->reg.file == FILE_FLAGS) {
2647 score->wr.c = ready;
2648 }
2649 }
2650
2651 bool
2652 calculateSchedDataNVC0(const Target *targ, Function *func)
2653 {
2654 SchedDataCalculator sched(targ);
2655 return sched.run(func, true, true);
2656 }
2657
2658 void
2659 CodeEmitterNVC0::prepareEmission(Function *func)
2660 {
2661 CodeEmitter::prepareEmission(func);
2662
2663 if (targ->hasSWSched)
2664 calculateSchedDataNVC0(targ, func);
2665 }
2666
2667 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
2668 : CodeEmitter(target),
2669 targNVC0(target),
2670 writeIssueDelays(target->hasSWSched)
2671 {
2672 code = NULL;
2673 codeSize = codeSizeLimit = 0;
2674 relocInfo = NULL;
2675 }
2676
2677 CodeEmitter *
2678 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
2679 {
2680 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
2681 emit->setProgramType(type);
2682 return emit;
2683 }
2684
2685 CodeEmitter *
2686 TargetNVC0::getCodeEmitter(Program::Type type)
2687 {
2688 if (chipset >= NVISA_GK110_CHIPSET)
2689 return createCodeEmitterGK110(type);
2690 return createCodeEmitterNVC0(type);
2691 }
2692
2693 } // namespace nv50_ir