d486c8d39e2bd78268b25638c82f73aff3922237
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setAddress24(const ValueRef&);
56 void setAddressByFile(const ValueRef&);
57 void setImmediate(const Instruction *, const int s); // needs op already set
58 void setImmediateS8(const ValueRef&);
59 void setSUConst16(const Instruction *, const int s);
60 void setSUPred(const Instruction *, const int s);
61
62 void emitCondCode(CondCode cc, int pos);
63 void emitInterpMode(const Instruction *);
64 void emitLoadStoreType(DataType ty);
65 void emitSUGType(DataType);
66 void emitCachingMode(CacheMode c);
67
68 void emitShortSrc2(const ValueRef&);
69
70 inline uint8_t getSRegEncoding(const ValueRef&);
71
72 void roundMode_A(const Instruction *);
73 void roundMode_C(const Instruction *);
74 void roundMode_CS(const Instruction *);
75
76 void emitNegAbs12(const Instruction *);
77
78 void emitNOP(const Instruction *);
79
80 void emitLOAD(const Instruction *);
81 void emitSTORE(const Instruction *);
82 void emitMOV(const Instruction *);
83 void emitATOM(const Instruction *);
84 void emitMEMBAR(const Instruction *);
85 void emitCCTL(const Instruction *);
86
87 void emitINTERP(const Instruction *);
88 void emitPFETCH(const Instruction *);
89 void emitVFETCH(const Instruction *);
90 void emitEXPORT(const Instruction *);
91 void emitOUT(const Instruction *);
92
93 void emitUADD(const Instruction *);
94 void emitFADD(const Instruction *);
95 void emitUMUL(const Instruction *);
96 void emitFMUL(const Instruction *);
97 void emitIMAD(const Instruction *);
98 void emitISAD(const Instruction *);
99 void emitFMAD(const Instruction *);
100 void emitMADSP(const Instruction *);
101
102 void emitNOT(Instruction *);
103 void emitLogicOp(const Instruction *, uint8_t subOp);
104 void emitPOPC(const Instruction *);
105 void emitINSBF(const Instruction *);
106 void emitEXTBF(const Instruction *);
107 void emitPERMT(const Instruction *);
108 void emitShift(const Instruction *);
109
110 void emitSFnOp(const Instruction *, uint8_t subOp);
111
112 void emitCVT(Instruction *);
113 void emitMINMAX(const Instruction *);
114 void emitPreOp(const Instruction *);
115
116 void emitSET(const CmpInstruction *);
117 void emitSLCT(const CmpInstruction *);
118 void emitSELP(const Instruction *);
119
120 void emitTEXBAR(const Instruction *);
121 void emitTEX(const TexInstruction *);
122 void emitTEXCSAA(const TexInstruction *);
123 void emitTXQ(const TexInstruction *);
124
125 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
126
127 void emitFlow(const Instruction *);
128 void emitBAR(const Instruction *);
129
130 void emitSUCLAMPMode(uint16_t);
131 void emitSUCalc(Instruction *);
132 void emitSULDGB(const TexInstruction *);
133 void emitSUSTGx(const TexInstruction *);
134
135 void emitVSHL(const Instruction *);
136 void emitVectorSubOp(const Instruction *);
137
138 inline void defId(const ValueDef&, const int pos);
139 inline void defId(const Instruction *, int d, const int pos);
140 inline void srcId(const ValueRef&, const int pos);
141 inline void srcId(const ValueRef *, const int pos);
142 inline void srcId(const Instruction *, int s, const int pos);
143 inline void srcAddr32(const ValueRef&, int pos, int shr);
144
145 inline bool isLIMM(const ValueRef&, DataType ty);
146 };
147
148 // for better visibility
149 #define HEX64(h, l) 0x##h##l##ULL
150
151 #define SDATA(a) ((a).rep()->reg.data)
152 #define DDATA(a) ((a).rep()->reg.data)
153
154 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
155 {
156 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
157 }
158
159 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
160 {
161 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
162 }
163
164 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
165 {
166 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
167 code[pos / 32] |= r << (pos % 32);
168 }
169
170 void
171 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
172 {
173 const uint32_t offset = SDATA(src).offset >> shr;
174
175 code[pos / 32] |= offset << (pos % 32);
176 if (pos && (pos < 32))
177 code[1] |= offset >> (32 - pos);
178 }
179
180 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
181 {
182 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
183 }
184
185 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
186 {
187 int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
188 code[pos / 32] |= r << (pos % 32);
189 }
190
191 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
192 {
193 const ImmediateValue *imm = ref.get()->asImm();
194
195 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
196 }
197
198 void
199 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
200 {
201 switch (insn->rnd) {
202 case ROUND_M: code[1] |= 1 << 23; break;
203 case ROUND_P: code[1] |= 2 << 23; break;
204 case ROUND_Z: code[1] |= 3 << 23; break;
205 default:
206 assert(insn->rnd == ROUND_N);
207 break;
208 }
209 }
210
211 void
212 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
213 {
214 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
215 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
216 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
217 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
218 }
219
220 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
221 {
222 uint8_t val;
223
224 switch (cc) {
225 case CC_LT: val = 0x1; break;
226 case CC_LTU: val = 0x9; break;
227 case CC_EQ: val = 0x2; break;
228 case CC_EQU: val = 0xa; break;
229 case CC_LE: val = 0x3; break;
230 case CC_LEU: val = 0xb; break;
231 case CC_GT: val = 0x4; break;
232 case CC_GTU: val = 0xc; break;
233 case CC_NE: val = 0x5; break;
234 case CC_NEU: val = 0xd; break;
235 case CC_GE: val = 0x6; break;
236 case CC_GEU: val = 0xe; break;
237 case CC_TR: val = 0xf; break;
238 case CC_FL: val = 0x0; break;
239
240 case CC_A: val = 0x14; break;
241 case CC_NA: val = 0x13; break;
242 case CC_S: val = 0x15; break;
243 case CC_NS: val = 0x12; break;
244 case CC_C: val = 0x16; break;
245 case CC_NC: val = 0x11; break;
246 case CC_O: val = 0x17; break;
247 case CC_NO: val = 0x10; break;
248
249 default:
250 val = 0;
251 assert(!"invalid condition code");
252 break;
253 }
254 code[pos / 32] |= val << (pos % 32);
255 }
256
257 void
258 CodeEmitterNVC0::emitPredicate(const Instruction *i)
259 {
260 if (i->predSrc >= 0) {
261 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
262 srcId(i->src(i->predSrc), 10);
263 if (i->cc == CC_NOT_P)
264 code[0] |= 0x2000; // negate
265 } else {
266 code[0] |= 0x1c00;
267 }
268 }
269
270 void
271 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
272 {
273 switch (src.getFile()) {
274 case FILE_MEMORY_GLOBAL:
275 srcAddr32(src, 26, 0);
276 break;
277 case FILE_MEMORY_LOCAL:
278 case FILE_MEMORY_SHARED:
279 setAddress24(src);
280 break;
281 default:
282 assert(src.getFile() == FILE_MEMORY_CONST);
283 setAddress16(src);
284 break;
285 }
286 }
287
288 void
289 CodeEmitterNVC0::setAddress16(const ValueRef& src)
290 {
291 Symbol *sym = src.get()->asSym();
292
293 assert(sym);
294
295 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
296 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
297 }
298
299 void
300 CodeEmitterNVC0::setAddress24(const ValueRef& src)
301 {
302 Symbol *sym = src.get()->asSym();
303
304 assert(sym);
305
306 code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
307 code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
308 }
309
310 void
311 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
312 {
313 const ImmediateValue *imm = i->src(s).get()->asImm();
314 uint32_t u32;
315
316 assert(imm);
317 u32 = imm->reg.data.u32;
318
319 if ((code[0] & 0xf) == 0x2) {
320 // LIMM
321 code[0] |= (u32 & 0x3f) << 26;
322 code[1] |= u32 >> 6;
323 } else
324 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
325 // integer immediate
326 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
327 assert(!(code[1] & 0xc000));
328 u32 &= 0xfffff;
329 code[0] |= (u32 & 0x3f) << 26;
330 code[1] |= 0xc000 | (u32 >> 6);
331 } else {
332 // float immediate
333 assert(!(u32 & 0x00000fff));
334 assert(!(code[1] & 0xc000));
335 code[0] |= ((u32 >> 12) & 0x3f) << 26;
336 code[1] |= 0xc000 | (u32 >> 18);
337 }
338 }
339
340 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
341 {
342 const ImmediateValue *imm = ref.get()->asImm();
343
344 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
345
346 assert(s8 == imm->reg.data.s32);
347
348 code[0] |= (s8 & 0x3f) << 26;
349 code[0] |= (s8 >> 6) << 8;
350 }
351
352 void
353 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
354 {
355 code[0] = opc;
356 code[1] = opc >> 32;
357
358 emitPredicate(i);
359
360 defId(i->def(0), 14);
361
362 int s1 = 26;
363 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
364 s1 = 49;
365
366 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
367 switch (i->getSrc(s)->reg.file) {
368 case FILE_MEMORY_CONST:
369 assert(!(code[1] & 0xc000));
370 code[1] |= (s == 2) ? 0x8000 : 0x4000;
371 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
372 setAddress16(i->src(s));
373 break;
374 case FILE_IMMEDIATE:
375 assert(s == 1 ||
376 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
377 assert(!(code[1] & 0xc000));
378 setImmediate(i, s);
379 break;
380 case FILE_GPR:
381 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
382 break;
383 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
384 break;
385 default:
386 // ignore here, can be predicate or flags, but must not be address
387 break;
388 }
389 }
390 }
391
392 void
393 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
394 {
395 code[0] = opc;
396 code[1] = opc >> 32;
397
398 emitPredicate(i);
399
400 defId(i->def(0), 14);
401
402 switch (i->src(0).getFile()) {
403 case FILE_MEMORY_CONST:
404 assert(!(code[1] & 0xc000));
405 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
406 setAddress16(i->src(0));
407 break;
408 case FILE_IMMEDIATE:
409 assert(!(code[1] & 0xc000));
410 setImmediate(i, 0);
411 break;
412 case FILE_GPR:
413 srcId(i->src(0), 26);
414 break;
415 default:
416 // ignore here, can be predicate or flags, but must not be address
417 break;
418 }
419 }
420
421 void
422 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
423 {
424 code[0] = opc;
425
426 int ss2a = 0;
427 if (opc == 0x0d || opc == 0x0e)
428 ss2a = 2;
429
430 defId(i->def(0), 14);
431 srcId(i->src(0), 20);
432
433 assert(pred || (i->predSrc < 0));
434 if (pred)
435 emitPredicate(i);
436
437 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
438 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
439 assert(!(code[0] & (0x300 >> ss2a)));
440 switch (i->src(s).get()->reg.fileIndex) {
441 case 0: code[0] |= 0x100 >> ss2a; break;
442 case 1: code[0] |= 0x200 >> ss2a; break;
443 case 16: code[0] |= 0x300 >> ss2a; break;
444 default:
445 ERROR("invalid c[] space for short form\n");
446 break;
447 }
448 if (s == 1)
449 code[0] |= i->getSrc(s)->reg.data.offset << 24;
450 else
451 code[0] |= i->getSrc(s)->reg.data.offset << 6;
452 } else
453 if (i->src(s).getFile() == FILE_IMMEDIATE) {
454 assert(s == 1);
455 setImmediateS8(i->src(s));
456 } else
457 if (i->src(s).getFile() == FILE_GPR) {
458 srcId(i->src(s), (s == 1) ? 26 : 8);
459 }
460 }
461 }
462
463 void
464 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
465 {
466 if (src.getFile() == FILE_MEMORY_CONST) {
467 switch (src.get()->reg.fileIndex) {
468 case 0: code[0] |= 0x100; break;
469 case 1: code[0] |= 0x200; break;
470 case 16: code[0] |= 0x300; break;
471 default:
472 assert(!"unsupported file index for short op");
473 break;
474 }
475 srcAddr32(src, 20, 2);
476 } else {
477 srcId(src, 20);
478 assert(src.getFile() == FILE_GPR);
479 }
480 }
481
482 void
483 CodeEmitterNVC0::emitNOP(const Instruction *i)
484 {
485 code[0] = 0x000001e4;
486 code[1] = 0x40000000;
487 emitPredicate(i);
488 }
489
490 void
491 CodeEmitterNVC0::emitFMAD(const Instruction *i)
492 {
493 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
494
495 if (i->encSize == 8) {
496 if (isLIMM(i->src(1), TYPE_F32)) {
497 emitForm_A(i, HEX64(20000000, 00000002));
498 } else {
499 emitForm_A(i, HEX64(30000000, 00000000));
500
501 if (i->src(2).mod.neg())
502 code[0] |= 1 << 8;
503 }
504 roundMode_A(i);
505
506 if (neg1)
507 code[0] |= 1 << 9;
508
509 if (i->saturate)
510 code[0] |= 1 << 5;
511 if (i->ftz)
512 code[0] |= 1 << 6;
513 } else {
514 assert(!i->saturate && !i->src(2).mod.neg());
515 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
516 false);
517 if (neg1)
518 code[0] |= 1 << 4;
519 }
520 }
521
522 void
523 CodeEmitterNVC0::emitFMUL(const Instruction *i)
524 {
525 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
526
527 assert(i->postFactor >= -3 && i->postFactor <= 3);
528
529 if (i->encSize == 8) {
530 if (isLIMM(i->src(1), TYPE_F32)) {
531 assert(i->postFactor == 0); // constant folded, hopefully
532 emitForm_A(i, HEX64(30000000, 00000002));
533 } else {
534 emitForm_A(i, HEX64(58000000, 00000000));
535 roundMode_A(i);
536 code[1] |= ((i->postFactor > 0) ?
537 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
538 }
539 if (neg)
540 code[1] ^= 1 << 25; // aliases with LIMM sign bit
541
542 if (i->saturate)
543 code[0] |= 1 << 5;
544
545 if (i->dnz)
546 code[0] |= 1 << 7;
547 else
548 if (i->ftz)
549 code[0] |= 1 << 6;
550 } else {
551 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
552 emitForm_S(i, 0xa8, true);
553 }
554 }
555
556 void
557 CodeEmitterNVC0::emitUMUL(const Instruction *i)
558 {
559 if (i->encSize == 8) {
560 if (i->src(1).getFile() == FILE_IMMEDIATE) {
561 emitForm_A(i, HEX64(10000000, 00000002));
562 } else {
563 emitForm_A(i, HEX64(50000000, 00000003));
564 }
565 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
566 code[0] |= 1 << 6;
567 if (i->sType == TYPE_S32)
568 code[0] |= 1 << 5;
569 if (i->dType == TYPE_S32)
570 code[0] |= 1 << 7;
571 } else {
572 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
573
574 if (i->sType == TYPE_S32)
575 code[0] |= 1 << 6;
576 }
577 }
578
579 void
580 CodeEmitterNVC0::emitFADD(const Instruction *i)
581 {
582 if (i->encSize == 8) {
583 if (isLIMM(i->src(1), TYPE_F32)) {
584 assert(!i->saturate);
585 emitForm_A(i, HEX64(28000000, 00000002));
586
587 code[0] |= i->src(0).mod.abs() << 7;
588 code[0] |= i->src(0).mod.neg() << 9;
589
590 if (i->src(1).mod.abs())
591 code[1] &= 0xfdffffff;
592 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
593 code[1] ^= 0x02000000;
594 } else {
595 emitForm_A(i, HEX64(50000000, 00000000));
596
597 roundMode_A(i);
598 if (i->saturate)
599 code[1] |= 1 << 17;
600
601 emitNegAbs12(i);
602 if (i->op == OP_SUB) code[0] ^= 1 << 8;
603 }
604 if (i->ftz)
605 code[0] |= 1 << 5;
606 } else {
607 assert(!i->saturate && i->op != OP_SUB &&
608 !i->src(0).mod.abs() &&
609 !i->src(1).mod.neg() && !i->src(1).mod.abs());
610
611 emitForm_S(i, 0x49, true);
612
613 if (i->src(0).mod.neg())
614 code[0] |= 1 << 7;
615 }
616 }
617
618 void
619 CodeEmitterNVC0::emitUADD(const Instruction *i)
620 {
621 uint32_t addOp = 0;
622
623 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
624 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
625
626 if (i->src(0).mod.neg())
627 addOp |= 0x200;
628 if (i->src(1).mod.neg())
629 addOp |= 0x100;
630 if (i->op == OP_SUB) {
631 addOp ^= 0x100;
632 assert(addOp != 0x300); // would be add-plus-one
633 }
634
635 if (i->encSize == 8) {
636 if (isLIMM(i->src(1), TYPE_U32)) {
637 emitForm_A(i, HEX64(08000000, 00000002));
638 if (i->defExists(1))
639 code[1] |= 1 << 26; // write carry
640 } else {
641 emitForm_A(i, HEX64(48000000, 00000003));
642 if (i->defExists(1))
643 code[1] |= 1 << 16; // write carry
644 }
645 code[0] |= addOp;
646
647 if (i->saturate)
648 code[0] |= 1 << 5;
649 if (i->flagsSrc >= 0) // add carry
650 code[0] |= 1 << 6;
651 } else {
652 assert(!(addOp & 0x100));
653 emitForm_S(i, (addOp >> 3) |
654 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
655 }
656 }
657
658 // TODO: shl-add
659 void
660 CodeEmitterNVC0::emitIMAD(const Instruction *i)
661 {
662 assert(i->encSize == 8);
663 emitForm_A(i, HEX64(20000000, 00000003));
664
665 if (isSignedType(i->dType))
666 code[0] |= 1 << 7;
667 if (isSignedType(i->sType))
668 code[0] |= 1 << 5;
669
670 code[1] |= i->saturate << 24;
671
672 if (i->flagsDef >= 0) code[1] |= 1 << 16;
673 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
674
675 if (i->src(2).mod.neg()) code[0] |= 0x10;
676 if (i->src(1).mod.neg() ^
677 i->src(0).mod.neg()) code[0] |= 0x20;
678
679 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
680 code[0] |= 1 << 6;
681 }
682
683 void
684 CodeEmitterNVC0::emitMADSP(const Instruction *i)
685 {
686 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
687
688 emitForm_A(i, HEX64(00000000, 00000003));
689
690 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
691 code[1] |= 0x01800000;
692 } else {
693 code[0] |= (i->subOp & 0x00f) << 7;
694 code[0] |= (i->subOp & 0x0f0) << 1;
695 code[0] |= (i->subOp & 0x100) >> 3;
696 code[0] |= (i->subOp & 0x200) >> 2;
697 code[1] |= (i->subOp & 0xc00) << 13;
698 }
699
700 if (i->flagsDef >= 0)
701 code[1] |= 1 << 16;
702 }
703
704 void
705 CodeEmitterNVC0::emitISAD(const Instruction *i)
706 {
707 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
708 assert(i->encSize == 8);
709
710 emitForm_A(i, HEX64(38000000, 00000003));
711
712 if (i->dType == TYPE_S32)
713 code[0] |= 1 << 5;
714 }
715
716 void
717 CodeEmitterNVC0::emitNOT(Instruction *i)
718 {
719 assert(i->encSize == 8);
720 i->setSrc(1, i->src(0));
721 emitForm_A(i, HEX64(68000000, 000001c3));
722 }
723
724 void
725 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
726 {
727 if (i->def(0).getFile() == FILE_PREDICATE) {
728 code[0] = 0x00000004 | (subOp << 30);
729 code[1] = 0x0c000000;
730
731 emitPredicate(i);
732
733 defId(i->def(0), 17);
734 srcId(i->src(0), 20);
735 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
736 srcId(i->src(1), 26);
737 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
738
739 if (i->defExists(1)) {
740 defId(i->def(1), 14);
741 } else {
742 code[0] |= 7 << 14;
743 }
744 // (a OP b) OP c
745 if (i->predSrc != 2 && i->srcExists(2)) {
746 code[1] |= subOp << 21;
747 srcId(i->src(2), 17);
748 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
749 } else {
750 code[1] |= 0x000e0000;
751 }
752 } else
753 if (i->encSize == 8) {
754 if (isLIMM(i->src(1), TYPE_U32)) {
755 emitForm_A(i, HEX64(38000000, 00000002));
756
757 if (i->flagsDef >= 0)
758 code[1] |= 1 << 26;
759 } else {
760 emitForm_A(i, HEX64(68000000, 00000003));
761
762 if (i->flagsDef >= 0)
763 code[1] |= 1 << 16;
764 }
765 code[0] |= subOp << 6;
766
767 if (i->flagsSrc >= 0) // carry
768 code[0] |= 1 << 5;
769
770 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
771 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
772 } else {
773 emitForm_S(i, (subOp << 5) |
774 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
775 }
776 }
777
778 void
779 CodeEmitterNVC0::emitPOPC(const Instruction *i)
780 {
781 emitForm_A(i, HEX64(54000000, 00000004));
782
783 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
784 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
785 }
786
787 void
788 CodeEmitterNVC0::emitINSBF(const Instruction *i)
789 {
790 emitForm_A(i, HEX64(28000000, 00000003));
791 }
792
793 void
794 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
795 {
796 emitForm_A(i, HEX64(70000000, 00000003));
797
798 if (i->dType == TYPE_S32)
799 code[0] |= 1 << 5;
800 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
801 code[0] |= 1 << 8;
802 }
803
804 void
805 CodeEmitterNVC0::emitPERMT(const Instruction *i)
806 {
807 emitForm_A(i, HEX64(24000000, 00000004));
808
809 code[0] |= i->subOp << 5;
810 }
811
812 void
813 CodeEmitterNVC0::emitShift(const Instruction *i)
814 {
815 if (i->op == OP_SHR) {
816 emitForm_A(i, HEX64(58000000, 00000003)
817 | (isSignedType(i->dType) ? 0x20 : 0x00));
818 } else {
819 emitForm_A(i, HEX64(60000000, 00000003));
820 }
821
822 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
823 code[0] |= 1 << 9;
824 }
825
826 void
827 CodeEmitterNVC0::emitPreOp(const Instruction *i)
828 {
829 if (i->encSize == 8) {
830 emitForm_B(i, HEX64(60000000, 00000000));
831
832 if (i->op == OP_PREEX2)
833 code[0] |= 0x20;
834
835 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
836 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
837 } else {
838 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
839 }
840 }
841
842 void
843 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
844 {
845 if (i->encSize == 8) {
846 code[0] = 0x00000000 | (subOp << 26);
847 code[1] = 0xc8000000;
848
849 emitPredicate(i);
850
851 defId(i->def(0), 14);
852 srcId(i->src(0), 20);
853
854 assert(i->src(0).getFile() == FILE_GPR);
855
856 if (i->saturate) code[0] |= 1 << 5;
857
858 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
859 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
860 } else {
861 emitForm_S(i, 0x80000008 | (subOp << 26), true);
862
863 assert(!i->src(0).mod.neg());
864 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
865 }
866 }
867
868 void
869 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
870 {
871 uint64_t op;
872
873 assert(i->encSize == 8);
874
875 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
876
877 if (i->ftz)
878 op |= 1 << 5;
879 else
880 if (!isFloatType(i->dType))
881 op |= isSignedType(i->dType) ? 0x23 : 0x03;
882
883 emitForm_A(i, op);
884 emitNegAbs12(i);
885 }
886
887 void
888 CodeEmitterNVC0::roundMode_C(const Instruction *i)
889 {
890 switch (i->rnd) {
891 case ROUND_M: code[1] |= 1 << 17; break;
892 case ROUND_P: code[1] |= 2 << 17; break;
893 case ROUND_Z: code[1] |= 3 << 17; break;
894 case ROUND_NI: code[0] |= 1 << 7; break;
895 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
896 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
897 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
898 case ROUND_N: break;
899 default:
900 assert(!"invalid round mode");
901 break;
902 }
903 }
904
905 void
906 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
907 {
908 switch (i->rnd) {
909 case ROUND_M:
910 case ROUND_MI: code[0] |= 1 << 16; break;
911 case ROUND_P:
912 case ROUND_PI: code[0] |= 2 << 16; break;
913 case ROUND_Z:
914 case ROUND_ZI: code[0] |= 3 << 16; break;
915 default:
916 break;
917 }
918 }
919
920 void
921 CodeEmitterNVC0::emitCVT(Instruction *i)
922 {
923 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
924 DataType dType;
925
926 switch (i->op) {
927 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
928 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
929 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
930 default:
931 break;
932 }
933
934 const bool sat = (i->op == OP_SAT) || i->saturate;
935 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
936 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
937
938 if (i->op == OP_NEG && i->dType == TYPE_U32)
939 dType = TYPE_S32;
940 else
941 dType = i->dType;
942
943 if (i->encSize == 8) {
944 emitForm_B(i, HEX64(10000000, 00000004));
945
946 roundMode_C(i);
947
948 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
949 code[0] |= util_logbase2(typeSizeof(dType)) << 20;
950 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
951
952 if (sat)
953 code[0] |= 0x20;
954 if (abs)
955 code[0] |= 1 << 6;
956 if (neg && i->op != OP_ABS)
957 code[0] |= 1 << 8;
958
959 if (i->ftz)
960 code[1] |= 1 << 23;
961
962 if (isSignedIntType(dType))
963 code[0] |= 0x080;
964 if (isSignedIntType(i->sType))
965 code[0] |= 0x200;
966
967 if (isFloatType(dType)) {
968 if (!isFloatType(i->sType))
969 code[1] |= 0x08000000;
970 } else {
971 if (isFloatType(i->sType))
972 code[1] |= 0x04000000;
973 else
974 code[1] |= 0x0c000000;
975 }
976 } else {
977 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
978 code[0] = 0x298;
979 } else
980 if (isFloatType(dType)) {
981 if (isFloatType(i->sType))
982 code[0] = 0x098;
983 else
984 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
985 } else {
986 assert(isFloatType(i->sType));
987
988 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
989 }
990
991 if (neg) code[0] |= 1 << 16;
992 if (sat) code[0] |= 1 << 18;
993 if (abs) code[0] |= 1 << 19;
994
995 roundMode_CS(i);
996 }
997 }
998
999 void
1000 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1001 {
1002 uint32_t hi;
1003 uint32_t lo = 0;
1004
1005 if (i->sType == TYPE_F64)
1006 lo = 0x1;
1007 else
1008 if (!isFloatType(i->sType))
1009 lo = 0x3;
1010
1011 if (isFloatType(i->dType) || isSignedIntType(i->sType))
1012 lo |= 0x20;
1013
1014 switch (i->op) {
1015 case OP_SET_AND: hi = 0x10000000; break;
1016 case OP_SET_OR: hi = 0x10200000; break;
1017 case OP_SET_XOR: hi = 0x10400000; break;
1018 default:
1019 hi = 0x100e0000;
1020 break;
1021 }
1022 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1023
1024 if (i->op != OP_SET)
1025 srcId(i->src(2), 32 + 17);
1026
1027 if (i->def(0).getFile() == FILE_PREDICATE) {
1028 if (i->sType == TYPE_F32)
1029 code[1] += 0x10000000;
1030 else
1031 code[1] += 0x08000000;
1032
1033 code[0] &= ~0xfc000;
1034 defId(i->def(0), 17);
1035 if (i->defExists(1))
1036 defId(i->def(1), 14);
1037 else
1038 code[0] |= 0x1c000;
1039 }
1040
1041 if (i->ftz)
1042 code[1] |= 1 << 27;
1043
1044 emitCondCode(i->setCond, 32 + 23);
1045 emitNegAbs12(i);
1046 }
1047
1048 void
1049 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1050 {
1051 uint64_t op;
1052
1053 switch (i->dType) {
1054 case TYPE_S32:
1055 op = HEX64(30000000, 00000023);
1056 break;
1057 case TYPE_U32:
1058 op = HEX64(30000000, 00000003);
1059 break;
1060 case TYPE_F32:
1061 op = HEX64(38000000, 00000000);
1062 break;
1063 default:
1064 assert(!"invalid type for SLCT");
1065 op = 0;
1066 break;
1067 }
1068 emitForm_A(i, op);
1069
1070 CondCode cc = i->setCond;
1071
1072 if (i->src(2).mod.neg())
1073 cc = reverseCondCode(cc);
1074
1075 emitCondCode(cc, 32 + 23);
1076
1077 if (i->ftz)
1078 code[0] |= 1 << 5;
1079 }
1080
1081 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1082 {
1083 emitForm_A(i, HEX64(20000000, 00000004));
1084
1085 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1086 code[1] |= 1 << 20;
1087 }
1088
1089 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1090 {
1091 code[0] = 0x00000006 | (i->subOp << 26);
1092 code[1] = 0xf0000000;
1093 emitPredicate(i);
1094 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1095 }
1096
1097 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1098 {
1099 code[0] = 0x00000086;
1100 code[1] = 0xd0000000;
1101
1102 code[1] |= i->tex.r;
1103 code[1] |= i->tex.s << 8;
1104
1105 if (i->tex.liveOnly)
1106 code[0] |= 1 << 9;
1107
1108 defId(i->def(0), 14);
1109 srcId(i->src(0), 20);
1110 }
1111
1112 static inline bool
1113 isNextIndependentTex(const TexInstruction *i)
1114 {
1115 if (!i->next || !isTextureOp(i->next->op))
1116 return false;
1117 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1118 return false;
1119 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1120 }
1121
1122 void
1123 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1124 {
1125 code[0] = 0x00000006;
1126
1127 if (isNextIndependentTex(i))
1128 code[0] |= 0x080; // t mode
1129 else
1130 code[0] |= 0x100; // p mode
1131
1132 if (i->tex.liveOnly)
1133 code[0] |= 1 << 9;
1134
1135 switch (i->op) {
1136 case OP_TEX: code[1] = 0x80000000; break;
1137 case OP_TXB: code[1] = 0x84000000; break;
1138 case OP_TXL: code[1] = 0x86000000; break;
1139 case OP_TXF: code[1] = 0x90000000; break;
1140 case OP_TXG: code[1] = 0xa0000000; break;
1141 case OP_TXLQ: code[1] = 0xb0000000; break;
1142 case OP_TXD: code[1] = 0xe0000000; break;
1143 default:
1144 assert(!"invalid texture op");
1145 break;
1146 }
1147 if (i->op == OP_TXF) {
1148 if (!i->tex.levelZero)
1149 code[1] |= 0x02000000;
1150 } else
1151 if (i->tex.levelZero) {
1152 code[1] |= 0x02000000;
1153 }
1154
1155 if (i->op != OP_TXD && i->tex.derivAll)
1156 code[1] |= 1 << 13;
1157
1158 defId(i->def(0), 14);
1159 srcId(i->src(0), 20);
1160
1161 emitPredicate(i);
1162
1163 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1164
1165 code[1] |= i->tex.mask << 14;
1166
1167 code[1] |= i->tex.r;
1168 code[1] |= i->tex.s << 8;
1169 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1170 code[1] |= 1 << 18; // in 1st source (with array index)
1171
1172 // texture target:
1173 code[1] |= (i->tex.target.getDim() - 1) << 20;
1174 if (i->tex.target.isCube())
1175 code[1] += 2 << 20;
1176 if (i->tex.target.isArray())
1177 code[1] |= 1 << 19;
1178 if (i->tex.target.isShadow())
1179 code[1] |= 1 << 24;
1180
1181 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1182
1183 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1184 // lzero
1185 if (i->op == OP_TXL)
1186 code[1] &= ~(1 << 26);
1187 else
1188 if (i->op == OP_TXF)
1189 code[1] &= ~(1 << 25);
1190 }
1191 if (i->tex.target == TEX_TARGET_2D_MS ||
1192 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1193 code[1] |= 1 << 23;
1194
1195 if (i->tex.useOffsets) // in vecSrc0.w
1196 code[1] |= 1 << 22;
1197
1198 srcId(i, src1, 26);
1199 }
1200
1201 void
1202 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1203 {
1204 code[0] = 0x00000086;
1205 code[1] = 0xc0000000;
1206
1207 switch (i->tex.query) {
1208 case TXQ_DIMS: code[1] |= 0 << 22; break;
1209 case TXQ_TYPE: code[1] |= 1 << 22; break;
1210 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1211 case TXQ_FILTER: code[1] |= 3 << 22; break;
1212 case TXQ_LOD: code[1] |= 4 << 22; break;
1213 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1214 default:
1215 assert(!"invalid texture query");
1216 break;
1217 }
1218
1219 code[1] |= i->tex.mask << 14;
1220
1221 code[1] |= i->tex.r;
1222 code[1] |= i->tex.s << 8;
1223 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1224 code[1] |= 1 << 18;
1225
1226 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1227
1228 defId(i->def(0), 14);
1229 srcId(i->src(0), 20);
1230 srcId(i, src1, 26);
1231
1232 emitPredicate(i);
1233 }
1234
1235 void
1236 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1237 {
1238 code[0] = 0x00000000 | (laneMask << 6);
1239 code[1] = 0x48000000 | qOp;
1240
1241 defId(i->def(0), 14);
1242 srcId(i->src(0), 20);
1243 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1244
1245 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1246 code[0] |= 1 << 9; // dall
1247
1248 emitPredicate(i);
1249 }
1250
1251 void
1252 CodeEmitterNVC0::emitFlow(const Instruction *i)
1253 {
1254 const FlowInstruction *f = i->asFlow();
1255
1256 unsigned mask; // bit 0: predicate, bit 1: target
1257
1258 code[0] = 0x00000007;
1259
1260 switch (i->op) {
1261 case OP_BRA:
1262 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1263 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1264 code[0] |= 0x4000;
1265 mask = 3;
1266 break;
1267 case OP_CALL:
1268 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1269 if (f->indirect)
1270 code[0] |= 0x4000; // indirect calls always use c[] source
1271 mask = 2;
1272 break;
1273
1274 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1275 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1276 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1277 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1278 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1279
1280 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1281 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1282 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1283 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1284
1285 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1286 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1287 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1288 default:
1289 assert(!"invalid flow operation");
1290 return;
1291 }
1292
1293 if (mask & 1) {
1294 emitPredicate(i);
1295 if (i->flagsSrc < 0)
1296 code[0] |= 0x1e0;
1297 }
1298
1299 if (!f)
1300 return;
1301
1302 if (f->allWarp)
1303 code[0] |= 1 << 15;
1304 if (f->limit)
1305 code[0] |= 1 << 16;
1306
1307 if (f->indirect) {
1308 if (code[0] & 0x4000) {
1309 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1310 setAddress16(i->src(0));
1311 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1312 if (f->op == OP_BRA)
1313 srcId(f->src(0).getIndirect(0), 20);
1314 } else {
1315 srcId(f, 0, 20);
1316 }
1317 }
1318
1319 if (f->op == OP_CALL) {
1320 if (f->indirect) {
1321 // nothing
1322 } else
1323 if (f->builtin) {
1324 assert(f->absolute);
1325 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1326 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1327 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1328 } else {
1329 assert(!f->absolute);
1330 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1331 code[0] |= (pcRel & 0x3f) << 26;
1332 code[1] |= (pcRel >> 6) & 0x3ffff;
1333 }
1334 } else
1335 if (mask & 2) {
1336 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1337 // currently we don't want absolute branches
1338 assert(!f->absolute);
1339 code[0] |= (pcRel & 0x3f) << 26;
1340 code[1] |= (pcRel >> 6) & 0x3ffff;
1341 }
1342 }
1343
1344 void
1345 CodeEmitterNVC0::emitBAR(const Instruction *i)
1346 {
1347 Value *rDef = NULL, *pDef = NULL;
1348
1349 switch (i->subOp) {
1350 case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
1351 case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
1352 case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
1353 case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1354 default:
1355 code[0] = 0x04;
1356 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1357 break;
1358 }
1359 code[1] = 0x50000000;
1360
1361 code[0] |= 63 << 14;
1362 code[1] |= 7 << 21;
1363
1364 emitPredicate(i);
1365
1366 // barrier id
1367 if (i->src(0).getFile() == FILE_GPR) {
1368 srcId(i->src(0), 20);
1369 } else {
1370 ImmediateValue *imm = i->getSrc(0)->asImm();
1371 assert(imm);
1372 code[0] |= imm->reg.data.u32 << 20;
1373 }
1374
1375 // thread count
1376 if (i->src(1).getFile() == FILE_GPR) {
1377 srcId(i->src(1), 26);
1378 } else {
1379 ImmediateValue *imm = i->getSrc(1)->asImm();
1380 assert(imm);
1381 code[0] |= imm->reg.data.u32 << 26;
1382 code[1] |= imm->reg.data.u32 >> 6;
1383 }
1384
1385 if (i->srcExists(2) && (i->predSrc != 2)) {
1386 srcId(i->src(2), 32 + 17);
1387 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1388 code[1] |= 1 << 20;
1389 } else {
1390 code[1] |= 7 << 17;
1391 }
1392
1393 if (i->defExists(0)) {
1394 if (i->def(0).getFile() == FILE_GPR)
1395 rDef = i->getDef(0);
1396 else
1397 pDef = i->getDef(0);
1398
1399 if (i->defExists(1)) {
1400 if (i->def(1).getFile() == FILE_GPR)
1401 rDef = i->getDef(1);
1402 else
1403 pDef = i->getDef(1);
1404 }
1405 }
1406 if (rDef) {
1407 code[0] &= ~(63 << 14);
1408 defId(rDef, 14);
1409 }
1410 if (pDef) {
1411 code[1] &= ~(7 << 21);
1412 defId(pDef, 32 + 21);
1413 }
1414 }
1415
1416 void
1417 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1418 {
1419 uint32_t prim = i->src(0).get()->reg.data.u32;
1420
1421 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1422 code[1] = 0x00000000 | (prim >> 6);
1423
1424 emitPredicate(i);
1425
1426 defId(i->def(0), 14);
1427 srcId(i->src(1), 20);
1428 }
1429
1430 void
1431 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1432 {
1433 code[0] = 0x00000006;
1434 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1435
1436 if (i->perPatch)
1437 code[0] |= 0x100;
1438 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1439 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1440
1441 emitPredicate(i);
1442
1443 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1444
1445 defId(i->def(0), 14);
1446 srcId(i->src(0).getIndirect(0), 20);
1447 srcId(i->src(0).getIndirect(1), 26); // vertex address
1448 }
1449
1450 void
1451 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1452 {
1453 unsigned int size = typeSizeof(i->dType);
1454
1455 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1456 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1457
1458 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1459
1460 if (i->perPatch)
1461 code[0] |= 0x100;
1462
1463 emitPredicate(i);
1464
1465 assert(i->src(1).getFile() == FILE_GPR);
1466
1467 srcId(i->src(0).getIndirect(0), 20);
1468 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1469 srcId(i->src(1), 26);
1470 }
1471
1472 void
1473 CodeEmitterNVC0::emitOUT(const Instruction *i)
1474 {
1475 code[0] = 0x00000006;
1476 code[1] = 0x1c000000;
1477
1478 emitPredicate(i);
1479
1480 defId(i->def(0), 14); // new secret address
1481 srcId(i->src(0), 20); // old secret address, should be 0 initially
1482
1483 assert(i->src(0).getFile() == FILE_GPR);
1484
1485 if (i->op == OP_EMIT)
1486 code[0] |= 1 << 5;
1487 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1488 code[0] |= 1 << 6;
1489
1490 // vertex stream
1491 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1492 // Using immediate encoding here triggers an invalid opcode error
1493 // or random results when error reporting is disabled.
1494 // TODO: figure this out when we get multiple vertex streams
1495 assert(SDATA(i->src(1)).u32 == 0);
1496 srcId(NULL, 26);
1497 // code[1] |= 0xc000;
1498 // code[0] |= SDATA(i->src(1)).u32 << 26;
1499 } else {
1500 srcId(i->src(1), 26);
1501 }
1502 }
1503
1504 void
1505 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1506 {
1507 if (i->encSize == 8) {
1508 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1509 } else {
1510 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1511 code[0] |= 0x80;
1512 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1513 }
1514 }
1515
1516 void
1517 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1518 {
1519 const uint32_t base = i->getSrc(0)->reg.data.offset;
1520
1521 if (i->encSize == 8) {
1522 code[0] = 0x00000000;
1523 code[1] = 0xc0000000 | (base & 0xffff);
1524
1525 if (i->saturate)
1526 code[0] |= 1 << 5;
1527
1528 if (i->op == OP_PINTERP)
1529 srcId(i->src(1), 26);
1530 else
1531 code[0] |= 0x3f << 26;
1532
1533 srcId(i->src(0).getIndirect(0), 20);
1534 } else {
1535 assert(i->op == OP_PINTERP);
1536 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1537 srcId(i->src(1), 20);
1538 }
1539 emitInterpMode(i);
1540
1541 emitPredicate(i);
1542 defId(i->def(0), 14);
1543
1544 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1545 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
1546 else
1547 code[1] |= 0x3f << 17;
1548 }
1549
1550 void
1551 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1552 {
1553 uint8_t val;
1554
1555 switch (ty) {
1556 case TYPE_U8:
1557 val = 0x00;
1558 break;
1559 case TYPE_S8:
1560 val = 0x20;
1561 break;
1562 case TYPE_F16:
1563 case TYPE_U16:
1564 val = 0x40;
1565 break;
1566 case TYPE_S16:
1567 val = 0x60;
1568 break;
1569 case TYPE_F32:
1570 case TYPE_U32:
1571 case TYPE_S32:
1572 val = 0x80;
1573 break;
1574 case TYPE_F64:
1575 case TYPE_U64:
1576 case TYPE_S64:
1577 val = 0xa0;
1578 break;
1579 case TYPE_B128:
1580 val = 0xc0;
1581 break;
1582 default:
1583 val = 0x80;
1584 assert(!"invalid type");
1585 break;
1586 }
1587 code[0] |= val;
1588 }
1589
1590 void
1591 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1592 {
1593 uint32_t val;
1594
1595 switch (c) {
1596 case CACHE_CA:
1597 // case CACHE_WB:
1598 val = 0x000;
1599 break;
1600 case CACHE_CG:
1601 val = 0x100;
1602 break;
1603 case CACHE_CS:
1604 val = 0x200;
1605 break;
1606 case CACHE_CV:
1607 // case CACHE_WT:
1608 val = 0x300;
1609 break;
1610 default:
1611 val = 0;
1612 assert(!"invalid caching mode");
1613 break;
1614 }
1615 code[0] |= val;
1616 }
1617
1618 static inline bool
1619 uses64bitAddress(const Instruction *ldst)
1620 {
1621 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1622 ldst->src(0).isIndirect(0) &&
1623 ldst->getIndirect(0, 0)->reg.size == 8;
1624 }
1625
1626 void
1627 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1628 {
1629 uint32_t opc;
1630
1631 switch (i->src(0).getFile()) {
1632 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1633 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1634 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1635 default:
1636 assert(!"invalid memory file");
1637 opc = 0;
1638 break;
1639 }
1640 code[0] = 0x00000005;
1641 code[1] = opc;
1642
1643 setAddressByFile(i->src(0));
1644 srcId(i->src(1), 14);
1645 srcId(i->src(0).getIndirect(0), 20);
1646 if (uses64bitAddress(i))
1647 code[1] |= 1 << 26;
1648
1649 emitPredicate(i);
1650
1651 emitLoadStoreType(i->dType);
1652 emitCachingMode(i->cache);
1653 }
1654
1655 void
1656 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1657 {
1658 uint32_t opc;
1659
1660 code[0] = 0x00000005;
1661
1662 switch (i->src(0).getFile()) {
1663 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1664 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1665 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1666 case FILE_MEMORY_CONST:
1667 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1668 emitMOV(i); // not sure if this is any better
1669 return;
1670 }
1671 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1672 code[0] = 0x00000006 | (i->subOp << 8);
1673 break;
1674 default:
1675 assert(!"invalid memory file");
1676 opc = 0;
1677 break;
1678 }
1679 code[1] = opc;
1680
1681 defId(i->def(0), 14);
1682
1683 setAddressByFile(i->src(0));
1684 srcId(i->src(0).getIndirect(0), 20);
1685 if (uses64bitAddress(i))
1686 code[1] |= 1 << 26;
1687
1688 emitPredicate(i);
1689
1690 emitLoadStoreType(i->dType);
1691 emitCachingMode(i->cache);
1692 }
1693
1694 uint8_t
1695 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1696 {
1697 switch (SDATA(ref).sv.sv) {
1698 case SV_LANEID: return 0x00;
1699 case SV_PHYSID: return 0x03;
1700 case SV_VERTEX_COUNT: return 0x10;
1701 case SV_INVOCATION_ID: return 0x11;
1702 case SV_YDIR: return 0x12;
1703 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1704 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1705 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1706 case SV_GRIDID: return 0x2c;
1707 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1708 case SV_LBASE: return 0x34;
1709 case SV_SBASE: return 0x30;
1710 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1711 default:
1712 assert(!"no sreg for system value");
1713 return 0;
1714 }
1715 }
1716
1717 void
1718 CodeEmitterNVC0::emitMOV(const Instruction *i)
1719 {
1720 if (i->def(0).getFile() == FILE_PREDICATE) {
1721 if (i->src(0).getFile() == FILE_GPR) {
1722 code[0] = 0xfc01c003;
1723 code[1] = 0x1a8e0000;
1724 srcId(i->src(0), 20);
1725 } else {
1726 code[0] = 0x0001c004;
1727 code[1] = 0x0c0e0000;
1728 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1729 code[0] |= 7 << 20;
1730 if (!i->getSrc(0)->reg.data.u32)
1731 code[0] |= 1 << 23;
1732 } else {
1733 srcId(i->src(0), 20);
1734 }
1735 }
1736 defId(i->def(0), 17);
1737 emitPredicate(i);
1738 } else
1739 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1740 uint8_t sr = getSRegEncoding(i->src(0));
1741
1742 if (i->encSize == 8) {
1743 code[0] = 0x00000004 | (sr << 26);
1744 code[1] = 0x2c000000;
1745 } else {
1746 code[0] = 0x40000008 | (sr << 20);
1747 }
1748 defId(i->def(0), 14);
1749
1750 emitPredicate(i);
1751 } else
1752 if (i->encSize == 8) {
1753 uint64_t opc;
1754
1755 if (i->src(0).getFile() == FILE_IMMEDIATE)
1756 opc = HEX64(18000000, 000001e2);
1757 else
1758 if (i->src(0).getFile() == FILE_PREDICATE)
1759 opc = HEX64(080e0000, 1c000004);
1760 else
1761 opc = HEX64(28000000, 00000004);
1762
1763 opc |= i->lanes << 5;
1764
1765 emitForm_B(i, opc);
1766 } else {
1767 uint32_t imm;
1768
1769 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1770 imm = SDATA(i->src(0)).u32;
1771 if (imm & 0xfff00000) {
1772 assert(!(imm & 0x000fffff));
1773 code[0] = 0x00000318 | imm;
1774 } else {
1775 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1776 code[0] = 0x00000118 | (imm << 20);
1777 }
1778 } else {
1779 code[0] = 0x0028;
1780 emitShortSrc2(i->src(0));
1781 }
1782 defId(i->def(0), 14);
1783
1784 emitPredicate(i);
1785 }
1786 }
1787
1788 void
1789 CodeEmitterNVC0::emitATOM(const Instruction *i)
1790 {
1791 const bool hasDst = i->defExists(0);
1792 const bool casOrExch =
1793 i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1794 i->subOp == NV50_IR_SUBOP_ATOM_CAS;
1795
1796 if (i->dType == TYPE_U64) {
1797 switch (i->subOp) {
1798 case NV50_IR_SUBOP_ATOM_ADD:
1799 code[0] = 0x205;
1800 if (hasDst)
1801 code[1] = 0x507e0000;
1802 else
1803 code[1] = 0x10000000;
1804 break;
1805 case NV50_IR_SUBOP_ATOM_EXCH:
1806 code[0] = 0x305;
1807 code[1] = 0x507e0000;
1808 break;
1809 case NV50_IR_SUBOP_ATOM_CAS:
1810 code[0] = 0x325;
1811 code[1] = 0x50000000;
1812 break;
1813 default:
1814 assert(!"invalid u64 red op");
1815 break;
1816 }
1817 } else
1818 if (i->dType == TYPE_U32) {
1819 switch (i->subOp) {
1820 case NV50_IR_SUBOP_ATOM_EXCH:
1821 code[0] = 0x105;
1822 code[1] = 0x507e0000;
1823 break;
1824 case NV50_IR_SUBOP_ATOM_CAS:
1825 code[0] = 0x125;
1826 code[1] = 0x50000000;
1827 break;
1828 default:
1829 code[0] = 0x5 | (i->subOp << 5);
1830 if (hasDst)
1831 code[1] = 0x507e0000;
1832 else
1833 code[1] = 0x10000000;
1834 break;
1835 }
1836 } else
1837 if (i->dType == TYPE_S32) {
1838 assert(i->subOp <= 2);
1839 code[0] = 0x205 | (i->subOp << 5);
1840 if (hasDst)
1841 code[1] = 0x587e0000;
1842 else
1843 code[1] = 0x18000000;
1844 } else
1845 if (i->dType == TYPE_F32) {
1846 assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
1847 code[0] = 0x205;
1848 if (hasDst)
1849 code[1] = 0x687e0000;
1850 else
1851 code[1] = 0x28000000;
1852 }
1853
1854 emitPredicate(i);
1855
1856 srcId(i->src(1), 14);
1857
1858 if (hasDst)
1859 defId(i->def(0), 32 + 11);
1860 else
1861 if (casOrExch)
1862 code[1] |= 63 << 11;
1863
1864 if (hasDst || casOrExch) {
1865 const int32_t offset = SDATA(i->src(0)).offset;
1866 assert(offset < 0x80000 && offset >= -0x80000);
1867 code[0] |= offset << 26;
1868 code[1] |= (offset & 0x1ffc0) >> 6;
1869 code[1] |= (offset & 0xe0000) << 6;
1870 } else {
1871 srcAddr32(i->src(0), 26, 0);
1872 }
1873 if (i->getIndirect(0, 0)) {
1874 srcId(i->getIndirect(0, 0), 20);
1875 if (i->getIndirect(0, 0)->reg.size == 8)
1876 code[1] |= 1 << 26;
1877 } else {
1878 code[0] |= 63 << 20;
1879 }
1880
1881 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1882 srcId(i->src(2), 32 + 17);
1883 }
1884
1885 void
1886 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
1887 {
1888 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
1889 case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
1890 case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
1891 default:
1892 code[0] = 0x45;
1893 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
1894 break;
1895 }
1896 code[1] = 0xe0000000;
1897
1898 emitPredicate(i);
1899 }
1900
1901 void
1902 CodeEmitterNVC0::emitCCTL(const Instruction *i)
1903 {
1904 code[0] = 0x00000005 | (i->subOp << 5);
1905
1906 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
1907 code[1] = 0x98000000;
1908 srcAddr32(i->src(0), 28, 2);
1909 } else {
1910 code[1] = 0xd0000000;
1911 setAddress24(i->src(0));
1912 }
1913 if (uses64bitAddress(i))
1914 code[1] |= 1 << 26;
1915 srcId(i->src(0).getIndirect(0), 20);
1916
1917 emitPredicate(i);
1918
1919 defId(i, 0, 14);
1920 }
1921
1922 void
1923 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
1924 {
1925 uint8_t m;
1926 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1927 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1928 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1929 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1930 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1931 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1932 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1933 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1934 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1935 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1936 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1937 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1938 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1939 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1940 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1941 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1942 default:
1943 return;
1944 }
1945 code[0] |= m << 5;
1946 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1947 code[1] |= 1 << 16;
1948 }
1949
1950 void
1951 CodeEmitterNVC0::emitSUCalc(Instruction *i)
1952 {
1953 ImmediateValue *imm = NULL;
1954 uint64_t opc;
1955
1956 if (i->srcExists(2)) {
1957 imm = i->getSrc(2)->asImm();
1958 if (imm)
1959 i->setSrc(2, NULL); // special case, make emitForm_A not assert
1960 }
1961
1962 switch (i->op) {
1963 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
1964 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
1965 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
1966 default:
1967 assert(0);
1968 return;
1969 }
1970 emitForm_A(i, opc);
1971
1972 if (i->op == OP_SUCLAMP) {
1973 if (i->dType == TYPE_S32)
1974 code[0] |= 1 << 9;
1975 emitSUCLAMPMode(i->subOp);
1976 }
1977
1978 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1979 code[1] |= 1 << 16;
1980
1981 if (i->op != OP_SUEAU) {
1982 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1983 code[0] |= 63 << 14;
1984 code[1] |= i->getDef(0)->reg.data.id << 23;
1985 } else
1986 if (i->defExists(1)) { // r, p
1987 assert(i->def(1).getFile() == FILE_PREDICATE);
1988 code[1] |= i->getDef(1)->reg.data.id << 23;
1989 } else { // r, #
1990 code[1] |= 7 << 23;
1991 }
1992 }
1993 if (imm) {
1994 assert(i->op == OP_SUCLAMP);
1995 i->setSrc(2, imm);
1996 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
1997 }
1998 }
1999
2000 void
2001 CodeEmitterNVC0::emitSUGType(DataType ty)
2002 {
2003 switch (ty) {
2004 case TYPE_S32: code[1] |= 1 << 13; break;
2005 case TYPE_U8: code[1] |= 2 << 13; break;
2006 case TYPE_S8: code[1] |= 3 << 13; break;
2007 default:
2008 assert(ty == TYPE_U32);
2009 break;
2010 }
2011 }
2012
2013 void
2014 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2015 {
2016 const uint32_t offset = i->getSrc(s)->reg.data.offset;
2017
2018 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2019 assert(offset == (offset & 0xfffc));
2020
2021 code[1] |= 1 << 21;
2022 code[0] |= offset << 24;
2023 code[1] |= offset >> 8;
2024 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2025 }
2026
2027 void
2028 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2029 {
2030 if (!i->srcExists(s) || (i->predSrc == s)) {
2031 code[1] |= 0x7 << 17;
2032 } else {
2033 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2034 code[1] |= 1 << 20;
2035 srcId(i->src(s), 32 + 17);
2036 }
2037 }
2038
2039 void
2040 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2041 {
2042 code[0] = 0x5;
2043 code[1] = 0xd4000000 | (i->subOp << 15);
2044
2045 emitLoadStoreType(i->dType);
2046 emitSUGType(i->sType);
2047 emitCachingMode(i->cache);
2048
2049 emitPredicate(i);
2050 defId(i->def(0), 14); // destination
2051 srcId(i->src(0), 20); // address
2052 // format
2053 if (i->src(1).getFile() == FILE_GPR)
2054 srcId(i->src(1), 26);
2055 else
2056 setSUConst16(i, 1);
2057 setSUPred(i, 2);
2058 }
2059
2060 void
2061 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2062 {
2063 code[0] = 0x5;
2064 code[1] = 0xdc000000 | (i->subOp << 15);
2065
2066 if (i->op == OP_SUSTP)
2067 code[1] |= i->tex.mask << 22;
2068 else
2069 emitLoadStoreType(i->dType);
2070 emitSUGType(i->sType);
2071 emitCachingMode(i->cache);
2072
2073 emitPredicate(i);
2074 srcId(i->src(0), 20); // address
2075 // format
2076 if (i->src(1).getFile() == FILE_GPR)
2077 srcId(i->src(1), 26);
2078 else
2079 setSUConst16(i, 1);
2080 srcId(i->src(3), 14); // values
2081 setSUPred(i, 2);
2082 }
2083
2084 void
2085 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2086 {
2087 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2088 case 0:
2089 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2090 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
2091 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
2092 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2093 break;
2094 case 1:
2095 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
2096 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2097 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
2098 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
2099 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2100 code[1] |= (i->mask & 0x3) << 2;
2101 break;
2102 case 2:
2103 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2104 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2105 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2106 code[1] |= (i->mask & 0x3) << 2;
2107 code[1] |= (i->mask & 0xc) << 21;
2108 break;
2109 default:
2110 assert(0);
2111 break;
2112 }
2113 }
2114
2115 void
2116 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2117 {
2118 uint64_t opc = 0x4;
2119
2120 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2121 case 0: opc |= 0xe8ULL << 56; break;
2122 case 1: opc |= 0xb4ULL << 56; break;
2123 case 2: opc |= 0x94ULL << 56; break;
2124 default:
2125 assert(0);
2126 break;
2127 }
2128 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2129 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2130 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2131 } else {
2132 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2133 if (isSignedType(i->sType)) opc |= 1 << 6;
2134 }
2135 emitForm_A(i, opc);
2136 emitVectorSubOp(i);
2137
2138 if (i->saturate)
2139 code[0] |= 1 << 9;
2140 if (i->flagsDef >= 0)
2141 code[1] |= 1 << 16;
2142 }
2143
2144 bool
2145 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2146 {
2147 unsigned int size = insn->encSize;
2148
2149 if (writeIssueDelays && !(codeSize & 0x3f))
2150 size += 8;
2151
2152 if (!insn->encSize) {
2153 ERROR("skipping unencodable instruction: "); insn->print();
2154 return false;
2155 } else
2156 if (codeSize + size > codeSizeLimit) {
2157 ERROR("code emitter output buffer too small\n");
2158 return false;
2159 }
2160
2161 if (writeIssueDelays) {
2162 if (!(codeSize & 0x3f)) {
2163 code[0] = 0x00000007; // cf issue delay "instruction"
2164 code[1] = 0x20000000;
2165 code += 2;
2166 codeSize += 8;
2167 }
2168 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2169 uint32_t *data = code - (id * 2 + 2);
2170 if (id <= 2) {
2171 data[0] |= insn->sched << (id * 8 + 4);
2172 } else
2173 if (id == 3) {
2174 data[0] |= insn->sched << 28;
2175 data[1] |= insn->sched >> 4;
2176 } else {
2177 data[1] |= insn->sched << ((id - 4) * 8 + 4);
2178 }
2179 }
2180
2181 // assert that instructions with multiple defs don't corrupt registers
2182 for (int d = 0; insn->defExists(d); ++d)
2183 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2184
2185 switch (insn->op) {
2186 case OP_MOV:
2187 case OP_RDSV:
2188 emitMOV(insn);
2189 break;
2190 case OP_NOP:
2191 break;
2192 case OP_LOAD:
2193 emitLOAD(insn);
2194 break;
2195 case OP_STORE:
2196 emitSTORE(insn);
2197 break;
2198 case OP_LINTERP:
2199 case OP_PINTERP:
2200 emitINTERP(insn);
2201 break;
2202 case OP_VFETCH:
2203 emitVFETCH(insn);
2204 break;
2205 case OP_EXPORT:
2206 emitEXPORT(insn);
2207 break;
2208 case OP_PFETCH:
2209 emitPFETCH(insn);
2210 break;
2211 case OP_EMIT:
2212 case OP_RESTART:
2213 emitOUT(insn);
2214 break;
2215 case OP_ADD:
2216 case OP_SUB:
2217 if (isFloatType(insn->dType))
2218 emitFADD(insn);
2219 else
2220 emitUADD(insn);
2221 break;
2222 case OP_MUL:
2223 if (isFloatType(insn->dType))
2224 emitFMUL(insn);
2225 else
2226 emitUMUL(insn);
2227 break;
2228 case OP_MAD:
2229 case OP_FMA:
2230 if (isFloatType(insn->dType))
2231 emitFMAD(insn);
2232 else
2233 emitIMAD(insn);
2234 break;
2235 case OP_SAD:
2236 emitISAD(insn);
2237 break;
2238 case OP_NOT:
2239 emitNOT(insn);
2240 break;
2241 case OP_AND:
2242 emitLogicOp(insn, 0);
2243 break;
2244 case OP_OR:
2245 emitLogicOp(insn, 1);
2246 break;
2247 case OP_XOR:
2248 emitLogicOp(insn, 2);
2249 break;
2250 case OP_SHL:
2251 case OP_SHR:
2252 emitShift(insn);
2253 break;
2254 case OP_SET:
2255 case OP_SET_AND:
2256 case OP_SET_OR:
2257 case OP_SET_XOR:
2258 emitSET(insn->asCmp());
2259 break;
2260 case OP_SELP:
2261 emitSELP(insn);
2262 break;
2263 case OP_SLCT:
2264 emitSLCT(insn->asCmp());
2265 break;
2266 case OP_MIN:
2267 case OP_MAX:
2268 emitMINMAX(insn);
2269 break;
2270 case OP_ABS:
2271 case OP_NEG:
2272 case OP_CEIL:
2273 case OP_FLOOR:
2274 case OP_TRUNC:
2275 case OP_CVT:
2276 case OP_SAT:
2277 emitCVT(insn);
2278 break;
2279 case OP_RSQ:
2280 emitSFnOp(insn, 5);
2281 break;
2282 case OP_RCP:
2283 emitSFnOp(insn, 4);
2284 break;
2285 case OP_LG2:
2286 emitSFnOp(insn, 3);
2287 break;
2288 case OP_EX2:
2289 emitSFnOp(insn, 2);
2290 break;
2291 case OP_SIN:
2292 emitSFnOp(insn, 1);
2293 break;
2294 case OP_COS:
2295 emitSFnOp(insn, 0);
2296 break;
2297 case OP_PRESIN:
2298 case OP_PREEX2:
2299 emitPreOp(insn);
2300 break;
2301 case OP_TEX:
2302 case OP_TXB:
2303 case OP_TXL:
2304 case OP_TXD:
2305 case OP_TXF:
2306 case OP_TXG:
2307 case OP_TXLQ:
2308 emitTEX(insn->asTex());
2309 break;
2310 case OP_TXQ:
2311 emitTXQ(insn->asTex());
2312 break;
2313 case OP_TEXBAR:
2314 emitTEXBAR(insn);
2315 break;
2316 case OP_SUBFM:
2317 case OP_SUCLAMP:
2318 case OP_SUEAU:
2319 emitSUCalc(insn);
2320 break;
2321 case OP_MADSP:
2322 emitMADSP(insn);
2323 break;
2324 case OP_SULDB:
2325 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2326 emitSULDGB(insn->asTex());
2327 else
2328 ERROR("SULDB not yet supported on < nve4\n");
2329 break;
2330 case OP_SUSTB:
2331 case OP_SUSTP:
2332 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2333 emitSUSTGx(insn->asTex());
2334 else
2335 ERROR("SUSTx not yet supported on < nve4\n");
2336 break;
2337 case OP_ATOM:
2338 emitATOM(insn);
2339 break;
2340 case OP_BRA:
2341 case OP_CALL:
2342 case OP_PRERET:
2343 case OP_RET:
2344 case OP_DISCARD:
2345 case OP_EXIT:
2346 case OP_PRECONT:
2347 case OP_CONT:
2348 case OP_PREBREAK:
2349 case OP_BREAK:
2350 case OP_JOINAT:
2351 case OP_BRKPT:
2352 case OP_QUADON:
2353 case OP_QUADPOP:
2354 emitFlow(insn);
2355 break;
2356 case OP_QUADOP:
2357 emitQUADOP(insn, insn->subOp, insn->lanes);
2358 break;
2359 case OP_DFDX:
2360 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2361 break;
2362 case OP_DFDY:
2363 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2364 break;
2365 case OP_POPCNT:
2366 emitPOPC(insn);
2367 break;
2368 case OP_INSBF:
2369 emitINSBF(insn);
2370 break;
2371 case OP_EXTBF:
2372 emitEXTBF(insn);
2373 break;
2374 case OP_PERMT:
2375 emitPERMT(insn);
2376 break;
2377 case OP_JOIN:
2378 emitNOP(insn);
2379 insn->join = 1;
2380 break;
2381 case OP_BAR:
2382 emitBAR(insn);
2383 break;
2384 case OP_MEMBAR:
2385 emitMEMBAR(insn);
2386 break;
2387 case OP_CCTL:
2388 emitCCTL(insn);
2389 break;
2390 case OP_VSHL:
2391 emitVSHL(insn);
2392 break;
2393 case OP_PHI:
2394 case OP_UNION:
2395 case OP_CONSTRAINT:
2396 ERROR("operation should have been eliminated");
2397 return false;
2398 case OP_EXP:
2399 case OP_LOG:
2400 case OP_SQRT:
2401 case OP_POW:
2402 ERROR("operation should have been lowered\n");
2403 return false;
2404 default:
2405 ERROR("unknow op\n");
2406 return false;
2407 }
2408
2409 if (insn->join) {
2410 code[0] |= 0x10;
2411 assert(insn->encSize == 8);
2412 }
2413
2414 code += insn->encSize / 4;
2415 codeSize += insn->encSize;
2416 return true;
2417 }
2418
2419 uint32_t
2420 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2421 {
2422 const Target::OpInfo &info = targ->getOpInfo(i);
2423
2424 if (writeIssueDelays || info.minEncSize == 8 || 1)
2425 return 8;
2426
2427 if (i->ftz || i->saturate || i->join)
2428 return 8;
2429 if (i->rnd != ROUND_N)
2430 return 8;
2431 if (i->predSrc >= 0 && i->op == OP_MAD)
2432 return 8;
2433
2434 if (i->op == OP_PINTERP) {
2435 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2436 return 8;
2437 } else
2438 if (i->op == OP_MOV && i->lanes != 0xf) {
2439 return 8;
2440 }
2441
2442 for (int s = 0; i->srcExists(s); ++s) {
2443 if (i->src(s).isIndirect(0))
2444 return 8;
2445
2446 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2447 if (SDATA(i->src(s)).offset >= 0x100)
2448 return 8;
2449 if (i->getSrc(s)->reg.fileIndex > 1 &&
2450 i->getSrc(s)->reg.fileIndex != 16)
2451 return 8;
2452 } else
2453 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2454 if (i->dType == TYPE_F32) {
2455 if (SDATA(i->src(s)).u32 >= 0x100)
2456 return 8;
2457 } else {
2458 if (SDATA(i->src(s)).u32 > 0xff)
2459 return 8;
2460 }
2461 }
2462
2463 if (i->op == OP_CVT)
2464 continue;
2465 if (i->src(s).mod != Modifier(0)) {
2466 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2467 if (i->op != OP_RSQ)
2468 return 8;
2469 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2470 if (i->op != OP_ADD || s != 0)
2471 return 8;
2472 }
2473 }
2474
2475 return 4;
2476 }
2477
2478 // Simplified, erring on safe side.
2479 class SchedDataCalculator : public Pass
2480 {
2481 public:
2482 SchedDataCalculator(const Target *targ) : targ(targ) { }
2483
2484 private:
2485 struct RegScores
2486 {
2487 struct Resource {
2488 int st[DATA_FILE_COUNT]; // LD to LD delay 3
2489 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2490 int tex; // TEX to non-TEX delay 17 (0x11)
2491 int sfu; // SFU to SFU delay 3 (except PRE-ops)
2492 int imul; // integer MUL to MUL delay 3
2493 } res;
2494 struct ScoreData {
2495 int r[64];
2496 int p[8];
2497 int c;
2498 } rd, wr;
2499 int base;
2500
2501 void rebase(const int base)
2502 {
2503 const int delta = this->base - base;
2504 if (!delta)
2505 return;
2506 this->base = 0;
2507
2508 for (int i = 0; i < 64; ++i) {
2509 rd.r[i] += delta;
2510 wr.r[i] += delta;
2511 }
2512 for (int i = 0; i < 8; ++i) {
2513 rd.p[i] += delta;
2514 wr.p[i] += delta;
2515 }
2516 rd.c += delta;
2517 wr.c += delta;
2518
2519 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2520 res.ld[f] += delta;
2521 res.st[f] += delta;
2522 }
2523 res.sfu += delta;
2524 res.imul += delta;
2525 res.tex += delta;
2526 }
2527 void wipe()
2528 {
2529 memset(&rd, 0, sizeof(rd));
2530 memset(&wr, 0, sizeof(wr));
2531 memset(&res, 0, sizeof(res));
2532 }
2533 int getLatest(const ScoreData& d) const
2534 {
2535 int max = 0;
2536 for (int i = 0; i < 64; ++i)
2537 if (d.r[i] > max)
2538 max = d.r[i];
2539 for (int i = 0; i < 8; ++i)
2540 if (d.p[i] > max)
2541 max = d.p[i];
2542 if (d.c > max)
2543 max = d.c;
2544 return max;
2545 }
2546 inline int getLatestRd() const
2547 {
2548 return getLatest(rd);
2549 }
2550 inline int getLatestWr() const
2551 {
2552 return getLatest(wr);
2553 }
2554 inline int getLatest() const
2555 {
2556 const int a = getLatestRd();
2557 const int b = getLatestWr();
2558
2559 int max = MAX2(a, b);
2560 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2561 max = MAX2(res.ld[f], max);
2562 max = MAX2(res.st[f], max);
2563 }
2564 max = MAX2(res.sfu, max);
2565 max = MAX2(res.imul, max);
2566 max = MAX2(res.tex, max);
2567 return max;
2568 }
2569 void setMax(const RegScores *that)
2570 {
2571 for (int i = 0; i < 64; ++i) {
2572 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2573 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2574 }
2575 for (int i = 0; i < 8; ++i) {
2576 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2577 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2578 }
2579 rd.c = MAX2(rd.c, that->rd.c);
2580 wr.c = MAX2(wr.c, that->wr.c);
2581
2582 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2583 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2584 res.st[f] = MAX2(res.st[f], that->res.st[f]);
2585 }
2586 res.sfu = MAX2(res.sfu, that->res.sfu);
2587 res.imul = MAX2(res.imul, that->res.imul);
2588 res.tex = MAX2(res.tex, that->res.tex);
2589 }
2590 void print(int cycle)
2591 {
2592 for (int i = 0; i < 64; ++i) {
2593 if (rd.r[i] > cycle)
2594 INFO("rd $r%i @ %i\n", i, rd.r[i]);
2595 if (wr.r[i] > cycle)
2596 INFO("wr $r%i @ %i\n", i, wr.r[i]);
2597 }
2598 for (int i = 0; i < 8; ++i) {
2599 if (rd.p[i] > cycle)
2600 INFO("rd $p%i @ %i\n", i, rd.p[i]);
2601 if (wr.p[i] > cycle)
2602 INFO("wr $p%i @ %i\n", i, wr.p[i]);
2603 }
2604 if (rd.c > cycle)
2605 INFO("rd $c @ %i\n", rd.c);
2606 if (wr.c > cycle)
2607 INFO("wr $c @ %i\n", wr.c);
2608 if (res.sfu > cycle)
2609 INFO("sfu @ %i\n", res.sfu);
2610 if (res.imul > cycle)
2611 INFO("imul @ %i\n", res.imul);
2612 if (res.tex > cycle)
2613 INFO("tex @ %i\n", res.tex);
2614 }
2615 };
2616
2617 RegScores *score; // for current BB
2618 std::vector<RegScores> scoreBoards;
2619 int cycle;
2620 int prevData;
2621 operation prevOp;
2622
2623 const Target *targ;
2624
2625 bool visit(Function *);
2626 bool visit(BasicBlock *);
2627
2628 void commitInsn(const Instruction *, int cycle);
2629 int calcDelay(const Instruction *, int cycle) const;
2630 void setDelay(Instruction *, int delay, Instruction *next);
2631
2632 void recordRd(const Value *, const int ready);
2633 void recordWr(const Value *, const int ready);
2634 void checkRd(const Value *, int cycle, int& delay) const;
2635 void checkWr(const Value *, int cycle, int& delay) const;
2636
2637 int getCycles(const Instruction *, int origDelay) const;
2638 };
2639
2640 void
2641 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2642 {
2643 if (insn->op == OP_EXIT || insn->op == OP_RET)
2644 delay = MAX2(delay, 14);
2645
2646 if (insn->op == OP_TEXBAR) {
2647 // TODO: except if results not used before EXIT
2648 insn->sched = 0xc2;
2649 } else
2650 if (insn->op == OP_JOIN || insn->join) {
2651 insn->sched = 0x00;
2652 } else
2653 if (delay >= 0 || prevData == 0x04 ||
2654 !next || !targ->canDualIssue(insn, next)) {
2655 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
2656 if (prevOp == OP_EXPORT)
2657 insn->sched |= 0x40;
2658 else
2659 insn->sched |= 0x20;
2660 } else {
2661 insn->sched = 0x04; // dual-issue
2662 }
2663
2664 if (prevData != 0x04 || prevOp != OP_EXPORT)
2665 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2666 prevOp = insn->op;
2667
2668 prevData = insn->sched;
2669 }
2670
2671 int
2672 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2673 {
2674 if (insn->sched & 0x80) {
2675 int c = (insn->sched & 0x0f) * 2 + 1;
2676 if (insn->op == OP_TEXBAR && origDelay > 0)
2677 c += origDelay;
2678 return c;
2679 }
2680 if (insn->sched & 0x60)
2681 return (insn->sched & 0x1f) + 1;
2682 return (insn->sched == 0x04) ? 0 : 32;
2683 }
2684
2685 bool
2686 SchedDataCalculator::visit(Function *func)
2687 {
2688 scoreBoards.resize(func->cfg.getSize());
2689 for (size_t i = 0; i < scoreBoards.size(); ++i)
2690 scoreBoards[i].wipe();
2691 return true;
2692 }
2693
2694 bool
2695 SchedDataCalculator::visit(BasicBlock *bb)
2696 {
2697 Instruction *insn;
2698 Instruction *next = NULL;
2699
2700 int cycle = 0;
2701
2702 prevData = 0x00;
2703 prevOp = OP_NOP;
2704 score = &scoreBoards.at(bb->getId());
2705
2706 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2707 // back branches will wait until all target dependencies are satisfied
2708 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
2709 continue;
2710 BasicBlock *in = BasicBlock::get(ei.getNode());
2711 if (in->getExit()) {
2712 if (prevData != 0x04)
2713 prevData = in->getExit()->sched;
2714 prevOp = in->getExit()->op;
2715 }
2716 score->setMax(&scoreBoards.at(in->getId()));
2717 }
2718 if (bb->cfg.incidentCount() > 1)
2719 prevOp = OP_NOP;
2720
2721 #ifdef NVC0_DEBUG_SCHED_DATA
2722 INFO("=== BB:%i initial scores\n", bb->getId());
2723 score->print(cycle);
2724 #endif
2725
2726 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2727 next = insn->next;
2728
2729 commitInsn(insn, cycle);
2730 int delay = calcDelay(next, cycle);
2731 setDelay(insn, delay, next);
2732 cycle += getCycles(insn, delay);
2733
2734 #ifdef NVC0_DEBUG_SCHED_DATA
2735 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2736 insn->print();
2737 next->print();
2738 #endif
2739 }
2740 if (!insn)
2741 return true;
2742 commitInsn(insn, cycle);
2743
2744 int bbDelay = -1;
2745
2746 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2747 BasicBlock *out = BasicBlock::get(ei.getNode());
2748
2749 if (ei.getType() != Graph::Edge::BACK) {
2750 // only test the first instruction of the outgoing block
2751 next = out->getEntry();
2752 if (next)
2753 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2754 } else {
2755 // wait until all dependencies are satisfied
2756 const int regsFree = score->getLatest();
2757 next = out->getFirst();
2758 for (int c = cycle; next && c < regsFree; next = next->next) {
2759 bbDelay = MAX2(bbDelay, calcDelay(next, c));
2760 c += getCycles(next, bbDelay);
2761 }
2762 next = NULL;
2763 }
2764 }
2765 if (bb->cfg.outgoingCount() != 1)
2766 next = NULL;
2767 setDelay(insn, bbDelay, next);
2768 cycle += getCycles(insn, bbDelay);
2769
2770 score->rebase(cycle); // common base for initializing out blocks' scores
2771 return true;
2772 }
2773
2774 #define NVE4_MAX_ISSUE_DELAY 0x1f
2775 int
2776 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2777 {
2778 int delay = 0, ready = cycle;
2779
2780 for (int s = 0; insn->srcExists(s); ++s)
2781 checkRd(insn->getSrc(s), cycle, delay);
2782 // WAR & WAW don't seem to matter
2783 // for (int s = 0; insn->srcExists(s); ++s)
2784 // recordRd(insn->getSrc(s), cycle);
2785
2786 switch (Target::getOpClass(insn->op)) {
2787 case OPCLASS_SFU:
2788 ready = score->res.sfu;
2789 break;
2790 case OPCLASS_ARITH:
2791 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2792 ready = score->res.imul;
2793 break;
2794 case OPCLASS_TEXTURE:
2795 ready = score->res.tex;
2796 break;
2797 case OPCLASS_LOAD:
2798 ready = score->res.ld[insn->src(0).getFile()];
2799 break;
2800 case OPCLASS_STORE:
2801 ready = score->res.st[insn->src(0).getFile()];
2802 break;
2803 default:
2804 break;
2805 }
2806 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2807 ready = MAX2(ready, score->res.tex);
2808
2809 delay = MAX2(delay, ready - cycle);
2810
2811 // if can issue next cycle, delay is 0, not 1
2812 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2813 }
2814
2815 void
2816 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2817 {
2818 const int ready = cycle + targ->getLatency(insn);
2819
2820 for (int d = 0; insn->defExists(d); ++d)
2821 recordWr(insn->getDef(d), ready);
2822 // WAR & WAW don't seem to matter
2823 // for (int s = 0; insn->srcExists(s); ++s)
2824 // recordRd(insn->getSrc(s), cycle);
2825
2826 switch (Target::getOpClass(insn->op)) {
2827 case OPCLASS_SFU:
2828 score->res.sfu = cycle + 4;
2829 break;
2830 case OPCLASS_ARITH:
2831 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2832 score->res.imul = cycle + 4;
2833 break;
2834 case OPCLASS_TEXTURE:
2835 score->res.tex = cycle + 18;
2836 break;
2837 case OPCLASS_LOAD:
2838 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2839 break;
2840 score->res.ld[insn->src(0).getFile()] = cycle + 4;
2841 score->res.st[insn->src(0).getFile()] = ready;
2842 break;
2843 case OPCLASS_STORE:
2844 score->res.st[insn->src(0).getFile()] = cycle + 4;
2845 score->res.ld[insn->src(0).getFile()] = ready;
2846 break;
2847 case OPCLASS_OTHER:
2848 if (insn->op == OP_TEXBAR)
2849 score->res.tex = cycle;
2850 break;
2851 default:
2852 break;
2853 }
2854
2855 #ifdef NVC0_DEBUG_SCHED_DATA
2856 score->print(cycle);
2857 #endif
2858 }
2859
2860 void
2861 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2862 {
2863 int ready = cycle;
2864 int a, b;
2865
2866 switch (v->reg.file) {
2867 case FILE_GPR:
2868 a = v->reg.data.id;
2869 b = a + v->reg.size / 4;
2870 for (int r = a; r < b; ++r)
2871 ready = MAX2(ready, score->rd.r[r]);
2872 break;
2873 case FILE_PREDICATE:
2874 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2875 break;
2876 case FILE_FLAGS:
2877 ready = MAX2(ready, score->rd.c);
2878 break;
2879 case FILE_SHADER_INPUT:
2880 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2881 case FILE_MEMORY_LOCAL:
2882 case FILE_MEMORY_CONST:
2883 case FILE_MEMORY_SHARED:
2884 case FILE_MEMORY_GLOBAL:
2885 case FILE_SYSTEM_VALUE:
2886 // TODO: any restrictions here ?
2887 break;
2888 case FILE_IMMEDIATE:
2889 break;
2890 default:
2891 assert(0);
2892 break;
2893 }
2894 if (cycle < ready)
2895 delay = MAX2(delay, ready - cycle);
2896 }
2897
2898 void
2899 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2900 {
2901 int ready = cycle;
2902 int a, b;
2903
2904 switch (v->reg.file) {
2905 case FILE_GPR:
2906 a = v->reg.data.id;
2907 b = a + v->reg.size / 4;
2908 for (int r = a; r < b; ++r)
2909 ready = MAX2(ready, score->wr.r[r]);
2910 break;
2911 case FILE_PREDICATE:
2912 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
2913 break;
2914 default:
2915 assert(v->reg.file == FILE_FLAGS);
2916 ready = MAX2(ready, score->wr.c);
2917 break;
2918 }
2919 if (cycle < ready)
2920 delay = MAX2(delay, ready - cycle);
2921 }
2922
2923 void
2924 SchedDataCalculator::recordWr(const Value *v, const int ready)
2925 {
2926 int a = v->reg.data.id;
2927
2928 if (v->reg.file == FILE_GPR) {
2929 int b = a + v->reg.size / 4;
2930 for (int r = a; r < b; ++r)
2931 score->rd.r[r] = ready;
2932 } else
2933 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
2934 if (v->reg.file == FILE_PREDICATE) {
2935 score->rd.p[a] = ready + 4;
2936 } else {
2937 assert(v->reg.file == FILE_FLAGS);
2938 score->rd.c = ready + 4;
2939 }
2940 }
2941
2942 void
2943 SchedDataCalculator::recordRd(const Value *v, const int ready)
2944 {
2945 int a = v->reg.data.id;
2946
2947 if (v->reg.file == FILE_GPR) {
2948 int b = a + v->reg.size / 4;
2949 for (int r = a; r < b; ++r)
2950 score->wr.r[r] = ready;
2951 } else
2952 if (v->reg.file == FILE_PREDICATE) {
2953 score->wr.p[a] = ready;
2954 } else
2955 if (v->reg.file == FILE_FLAGS) {
2956 score->wr.c = ready;
2957 }
2958 }
2959
2960 bool
2961 calculateSchedDataNVC0(const Target *targ, Function *func)
2962 {
2963 SchedDataCalculator sched(targ);
2964 return sched.run(func, true, true);
2965 }
2966
2967 void
2968 CodeEmitterNVC0::prepareEmission(Function *func)
2969 {
2970 CodeEmitter::prepareEmission(func);
2971
2972 if (targ->hasSWSched)
2973 calculateSchedDataNVC0(targ, func);
2974 }
2975
2976 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
2977 : CodeEmitter(target),
2978 targNVC0(target),
2979 writeIssueDelays(target->hasSWSched)
2980 {
2981 code = NULL;
2982 codeSize = codeSizeLimit = 0;
2983 relocInfo = NULL;
2984 }
2985
2986 CodeEmitter *
2987 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
2988 {
2989 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
2990 emit->setProgramType(type);
2991 return emit;
2992 }
2993
2994 CodeEmitter *
2995 TargetNVC0::getCodeEmitter(Program::Type type)
2996 {
2997 if (chipset >= NVISA_GK110_CHIPSET)
2998 return createCodeEmitterGK110(type);
2999 return createCodeEmitterNVC0(type);
3000 }
3001
3002 } // namespace nv50_ir