nvc0/ir/emit: hardcode vertex output stream to 0 for now
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_A(const Instruction *, uint64_t);
49 void emitForm_B(const Instruction *, uint64_t);
50 void emitForm_S(const Instruction *, uint32_t, bool pred);
51
52 void emitPredicate(const Instruction *);
53
54 void setAddress16(const ValueRef&);
55 void setAddress24(const ValueRef&);
56 void setAddressByFile(const ValueRef&);
57 void setImmediate(const Instruction *, const int s); // needs op already set
58 void setImmediateS8(const ValueRef&);
59 void setSUConst16(const Instruction *, const int s);
60 void setSUPred(const Instruction *, const int s);
61
62 void emitCondCode(CondCode cc, int pos);
63 void emitInterpMode(const Instruction *);
64 void emitLoadStoreType(DataType ty);
65 void emitSUGType(DataType);
66 void emitCachingMode(CacheMode c);
67
68 void emitShortSrc2(const ValueRef&);
69
70 inline uint8_t getSRegEncoding(const ValueRef&);
71
72 void roundMode_A(const Instruction *);
73 void roundMode_C(const Instruction *);
74 void roundMode_CS(const Instruction *);
75
76 void emitNegAbs12(const Instruction *);
77
78 void emitNOP(const Instruction *);
79
80 void emitLOAD(const Instruction *);
81 void emitSTORE(const Instruction *);
82 void emitMOV(const Instruction *);
83 void emitATOM(const Instruction *);
84 void emitMEMBAR(const Instruction *);
85 void emitCCTL(const Instruction *);
86
87 void emitINTERP(const Instruction *);
88 void emitPFETCH(const Instruction *);
89 void emitVFETCH(const Instruction *);
90 void emitEXPORT(const Instruction *);
91 void emitOUT(const Instruction *);
92
93 void emitUADD(const Instruction *);
94 void emitFADD(const Instruction *);
95 void emitUMUL(const Instruction *);
96 void emitFMUL(const Instruction *);
97 void emitIMAD(const Instruction *);
98 void emitISAD(const Instruction *);
99 void emitFMAD(const Instruction *);
100 void emitMADSP(const Instruction *);
101
102 void emitNOT(Instruction *);
103 void emitLogicOp(const Instruction *, uint8_t subOp);
104 void emitPOPC(const Instruction *);
105 void emitINSBF(const Instruction *);
106 void emitEXTBF(const Instruction *);
107 void emitPERMT(const Instruction *);
108 void emitShift(const Instruction *);
109
110 void emitSFnOp(const Instruction *, uint8_t subOp);
111
112 void emitCVT(Instruction *);
113 void emitMINMAX(const Instruction *);
114 void emitPreOp(const Instruction *);
115
116 void emitSET(const CmpInstruction *);
117 void emitSLCT(const CmpInstruction *);
118 void emitSELP(const Instruction *);
119
120 void emitTEXBAR(const Instruction *);
121 void emitTEX(const TexInstruction *);
122 void emitTEXCSAA(const TexInstruction *);
123 void emitTXQ(const TexInstruction *);
124
125 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
126
127 void emitFlow(const Instruction *);
128 void emitBAR(const Instruction *);
129
130 void emitSUCLAMPMode(uint16_t);
131 void emitSUCalc(Instruction *);
132 void emitSULDGB(const TexInstruction *);
133 void emitSUSTGx(const TexInstruction *);
134
135 void emitVSHL(const Instruction *);
136 void emitVectorSubOp(const Instruction *);
137
138 inline void defId(const ValueDef&, const int pos);
139 inline void defId(const Instruction *, int d, const int pos);
140 inline void srcId(const ValueRef&, const int pos);
141 inline void srcId(const ValueRef *, const int pos);
142 inline void srcId(const Instruction *, int s, const int pos);
143 inline void srcAddr32(const ValueRef&, int pos, int shr);
144
145 inline bool isLIMM(const ValueRef&, DataType ty);
146 };
147
148 // for better visibility
149 #define HEX64(h, l) 0x##h##l##ULL
150
151 #define SDATA(a) ((a).rep()->reg.data)
152 #define DDATA(a) ((a).rep()->reg.data)
153
154 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
155 {
156 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
157 }
158
159 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
160 {
161 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
162 }
163
164 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
165 {
166 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
167 code[pos / 32] |= r << (pos % 32);
168 }
169
170 void
171 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
172 {
173 const uint32_t offset = SDATA(src).offset >> shr;
174
175 code[pos / 32] |= offset << (pos % 32);
176 if (pos && (pos < 32))
177 code[1] |= offset >> (32 - pos);
178 }
179
180 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
181 {
182 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
183 }
184
185 void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
186 {
187 int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
188 code[pos / 32] |= r << (pos % 32);
189 }
190
191 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
192 {
193 const ImmediateValue *imm = ref.get()->asImm();
194
195 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
196 }
197
198 void
199 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
200 {
201 switch (insn->rnd) {
202 case ROUND_M: code[1] |= 1 << 23; break;
203 case ROUND_P: code[1] |= 2 << 23; break;
204 case ROUND_Z: code[1] |= 3 << 23; break;
205 default:
206 assert(insn->rnd == ROUND_N);
207 break;
208 }
209 }
210
211 void
212 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
213 {
214 if (i->src(1).mod.abs()) code[0] |= 1 << 6;
215 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
216 if (i->src(1).mod.neg()) code[0] |= 1 << 8;
217 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
218 }
219
220 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
221 {
222 uint8_t val;
223
224 switch (cc) {
225 case CC_LT: val = 0x1; break;
226 case CC_LTU: val = 0x9; break;
227 case CC_EQ: val = 0x2; break;
228 case CC_EQU: val = 0xa; break;
229 case CC_LE: val = 0x3; break;
230 case CC_LEU: val = 0xb; break;
231 case CC_GT: val = 0x4; break;
232 case CC_GTU: val = 0xc; break;
233 case CC_NE: val = 0x5; break;
234 case CC_NEU: val = 0xd; break;
235 case CC_GE: val = 0x6; break;
236 case CC_GEU: val = 0xe; break;
237 case CC_TR: val = 0xf; break;
238 case CC_FL: val = 0x0; break;
239
240 case CC_A: val = 0x14; break;
241 case CC_NA: val = 0x13; break;
242 case CC_S: val = 0x15; break;
243 case CC_NS: val = 0x12; break;
244 case CC_C: val = 0x16; break;
245 case CC_NC: val = 0x11; break;
246 case CC_O: val = 0x17; break;
247 case CC_NO: val = 0x10; break;
248
249 default:
250 val = 0;
251 assert(!"invalid condition code");
252 break;
253 }
254 code[pos / 32] |= val << (pos % 32);
255 }
256
257 void
258 CodeEmitterNVC0::emitPredicate(const Instruction *i)
259 {
260 if (i->predSrc >= 0) {
261 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
262 srcId(i->src(i->predSrc), 10);
263 if (i->cc == CC_NOT_P)
264 code[0] |= 0x2000; // negate
265 } else {
266 code[0] |= 0x1c00;
267 }
268 }
269
270 void
271 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
272 {
273 switch (src.getFile()) {
274 case FILE_MEMORY_GLOBAL:
275 srcAddr32(src, 26, 0);
276 break;
277 case FILE_MEMORY_LOCAL:
278 case FILE_MEMORY_SHARED:
279 setAddress24(src);
280 break;
281 default:
282 assert(src.getFile() == FILE_MEMORY_CONST);
283 setAddress16(src);
284 break;
285 }
286 }
287
288 void
289 CodeEmitterNVC0::setAddress16(const ValueRef& src)
290 {
291 Symbol *sym = src.get()->asSym();
292
293 assert(sym);
294
295 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
296 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
297 }
298
299 void
300 CodeEmitterNVC0::setAddress24(const ValueRef& src)
301 {
302 Symbol *sym = src.get()->asSym();
303
304 assert(sym);
305
306 code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
307 code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
308 }
309
310 void
311 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
312 {
313 const ImmediateValue *imm = i->src(s).get()->asImm();
314 uint32_t u32;
315
316 assert(imm);
317 u32 = imm->reg.data.u32;
318
319 if ((code[0] & 0xf) == 0x2) {
320 // LIMM
321 code[0] |= (u32 & 0x3f) << 26;
322 code[1] |= u32 >> 6;
323 } else
324 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
325 // integer immediate
326 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
327 assert(!(code[1] & 0xc000));
328 u32 &= 0xfffff;
329 code[0] |= (u32 & 0x3f) << 26;
330 code[1] |= 0xc000 | (u32 >> 6);
331 } else {
332 // float immediate
333 assert(!(u32 & 0x00000fff));
334 assert(!(code[1] & 0xc000));
335 code[0] |= ((u32 >> 12) & 0x3f) << 26;
336 code[1] |= 0xc000 | (u32 >> 18);
337 }
338 }
339
340 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
341 {
342 const ImmediateValue *imm = ref.get()->asImm();
343
344 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
345
346 assert(s8 == imm->reg.data.s32);
347
348 code[0] |= (s8 & 0x3f) << 26;
349 code[0] |= (s8 >> 6) << 8;
350 }
351
352 void
353 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
354 {
355 code[0] = opc;
356 code[1] = opc >> 32;
357
358 emitPredicate(i);
359
360 defId(i->def(0), 14);
361
362 int s1 = 26;
363 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
364 s1 = 49;
365
366 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
367 switch (i->getSrc(s)->reg.file) {
368 case FILE_MEMORY_CONST:
369 assert(!(code[1] & 0xc000));
370 code[1] |= (s == 2) ? 0x8000 : 0x4000;
371 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
372 setAddress16(i->src(s));
373 break;
374 case FILE_IMMEDIATE:
375 assert(s == 1 ||
376 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
377 assert(!(code[1] & 0xc000));
378 setImmediate(i, s);
379 break;
380 case FILE_GPR:
381 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
382 break;
383 srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
384 break;
385 default:
386 // ignore here, can be predicate or flags, but must not be address
387 break;
388 }
389 }
390 }
391
392 void
393 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
394 {
395 code[0] = opc;
396 code[1] = opc >> 32;
397
398 emitPredicate(i);
399
400 defId(i->def(0), 14);
401
402 switch (i->src(0).getFile()) {
403 case FILE_MEMORY_CONST:
404 assert(!(code[1] & 0xc000));
405 code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
406 setAddress16(i->src(0));
407 break;
408 case FILE_IMMEDIATE:
409 assert(!(code[1] & 0xc000));
410 setImmediate(i, 0);
411 break;
412 case FILE_GPR:
413 srcId(i->src(0), 26);
414 break;
415 default:
416 // ignore here, can be predicate or flags, but must not be address
417 break;
418 }
419 }
420
421 void
422 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
423 {
424 code[0] = opc;
425
426 int ss2a = 0;
427 if (opc == 0x0d || opc == 0x0e)
428 ss2a = 2;
429
430 defId(i->def(0), 14);
431 srcId(i->src(0), 20);
432
433 assert(pred || (i->predSrc < 0));
434 if (pred)
435 emitPredicate(i);
436
437 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
438 if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
439 assert(!(code[0] & (0x300 >> ss2a)));
440 switch (i->src(s).get()->reg.fileIndex) {
441 case 0: code[0] |= 0x100 >> ss2a; break;
442 case 1: code[0] |= 0x200 >> ss2a; break;
443 case 16: code[0] |= 0x300 >> ss2a; break;
444 default:
445 ERROR("invalid c[] space for short form\n");
446 break;
447 }
448 if (s == 1)
449 code[0] |= i->getSrc(s)->reg.data.offset << 24;
450 else
451 code[0] |= i->getSrc(s)->reg.data.offset << 6;
452 } else
453 if (i->src(s).getFile() == FILE_IMMEDIATE) {
454 assert(s == 1);
455 setImmediateS8(i->src(s));
456 } else
457 if (i->src(s).getFile() == FILE_GPR) {
458 srcId(i->src(s), (s == 1) ? 26 : 8);
459 }
460 }
461 }
462
463 void
464 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
465 {
466 if (src.getFile() == FILE_MEMORY_CONST) {
467 switch (src.get()->reg.fileIndex) {
468 case 0: code[0] |= 0x100; break;
469 case 1: code[0] |= 0x200; break;
470 case 16: code[0] |= 0x300; break;
471 default:
472 assert(!"unsupported file index for short op");
473 break;
474 }
475 srcAddr32(src, 20, 2);
476 } else {
477 srcId(src, 20);
478 assert(src.getFile() == FILE_GPR);
479 }
480 }
481
482 void
483 CodeEmitterNVC0::emitNOP(const Instruction *i)
484 {
485 code[0] = 0x000001e4;
486 code[1] = 0x40000000;
487 emitPredicate(i);
488 }
489
490 void
491 CodeEmitterNVC0::emitFMAD(const Instruction *i)
492 {
493 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
494
495 if (i->encSize == 8) {
496 if (isLIMM(i->src(1), TYPE_F32)) {
497 emitForm_A(i, HEX64(20000000, 00000002));
498 } else {
499 emitForm_A(i, HEX64(30000000, 00000000));
500
501 if (i->src(2).mod.neg())
502 code[0] |= 1 << 8;
503 }
504 roundMode_A(i);
505
506 if (neg1)
507 code[0] |= 1 << 9;
508
509 if (i->saturate)
510 code[0] |= 1 << 5;
511 if (i->ftz)
512 code[0] |= 1 << 6;
513 } else {
514 assert(!i->saturate && !i->src(2).mod.neg());
515 emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
516 false);
517 if (neg1)
518 code[0] |= 1 << 4;
519 }
520 }
521
522 void
523 CodeEmitterNVC0::emitFMUL(const Instruction *i)
524 {
525 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
526
527 assert(i->postFactor >= -3 && i->postFactor <= 3);
528
529 if (i->encSize == 8) {
530 if (isLIMM(i->src(1), TYPE_F32)) {
531 assert(i->postFactor == 0); // constant folded, hopefully
532 emitForm_A(i, HEX64(30000000, 00000002));
533 } else {
534 emitForm_A(i, HEX64(58000000, 00000000));
535 roundMode_A(i);
536 code[1] |= ((i->postFactor > 0) ?
537 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
538 }
539 if (neg)
540 code[1] ^= 1 << 25; // aliases with LIMM sign bit
541
542 if (i->saturate)
543 code[0] |= 1 << 5;
544
545 if (i->dnz)
546 code[0] |= 1 << 7;
547 else
548 if (i->ftz)
549 code[0] |= 1 << 6;
550 } else {
551 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
552 emitForm_S(i, 0xa8, true);
553 }
554 }
555
556 void
557 CodeEmitterNVC0::emitUMUL(const Instruction *i)
558 {
559 if (i->encSize == 8) {
560 if (i->src(1).getFile() == FILE_IMMEDIATE) {
561 emitForm_A(i, HEX64(10000000, 00000002));
562 } else {
563 emitForm_A(i, HEX64(50000000, 00000003));
564 }
565 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
566 code[0] |= 1 << 6;
567 if (i->sType == TYPE_S32)
568 code[0] |= 1 << 5;
569 if (i->dType == TYPE_S32)
570 code[0] |= 1 << 7;
571 } else {
572 emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
573
574 if (i->sType == TYPE_S32)
575 code[0] |= 1 << 6;
576 }
577 }
578
579 void
580 CodeEmitterNVC0::emitFADD(const Instruction *i)
581 {
582 if (i->encSize == 8) {
583 if (isLIMM(i->src(1), TYPE_F32)) {
584 assert(!i->saturate);
585 emitForm_A(i, HEX64(28000000, 00000002));
586
587 code[0] |= i->src(0).mod.abs() << 7;
588 code[0] |= i->src(0).mod.neg() << 9;
589
590 if (i->src(1).mod.abs())
591 code[1] &= 0xfdffffff;
592 if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
593 code[1] ^= 0x02000000;
594 } else {
595 emitForm_A(i, HEX64(50000000, 00000000));
596
597 roundMode_A(i);
598 if (i->saturate)
599 code[1] |= 1 << 17;
600
601 emitNegAbs12(i);
602 if (i->op == OP_SUB) code[0] ^= 1 << 8;
603 }
604 if (i->ftz)
605 code[0] |= 1 << 5;
606 } else {
607 assert(!i->saturate && i->op != OP_SUB &&
608 !i->src(0).mod.abs() &&
609 !i->src(1).mod.neg() && !i->src(1).mod.abs());
610
611 emitForm_S(i, 0x49, true);
612
613 if (i->src(0).mod.neg())
614 code[0] |= 1 << 7;
615 }
616 }
617
618 void
619 CodeEmitterNVC0::emitUADD(const Instruction *i)
620 {
621 uint32_t addOp = 0;
622
623 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
624 assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
625
626 if (i->src(0).mod.neg())
627 addOp |= 0x200;
628 if (i->src(1).mod.neg())
629 addOp |= 0x100;
630 if (i->op == OP_SUB) {
631 addOp ^= 0x100;
632 assert(addOp != 0x300); // would be add-plus-one
633 }
634
635 if (i->encSize == 8) {
636 if (isLIMM(i->src(1), TYPE_U32)) {
637 emitForm_A(i, HEX64(08000000, 00000002));
638 if (i->defExists(1))
639 code[1] |= 1 << 26; // write carry
640 } else {
641 emitForm_A(i, HEX64(48000000, 00000003));
642 if (i->defExists(1))
643 code[1] |= 1 << 16; // write carry
644 }
645 code[0] |= addOp;
646
647 if (i->saturate)
648 code[0] |= 1 << 5;
649 if (i->flagsSrc >= 0) // add carry
650 code[0] |= 1 << 6;
651 } else {
652 assert(!(addOp & 0x100));
653 emitForm_S(i, (addOp >> 3) |
654 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
655 }
656 }
657
658 // TODO: shl-add
659 void
660 CodeEmitterNVC0::emitIMAD(const Instruction *i)
661 {
662 assert(i->encSize == 8);
663 emitForm_A(i, HEX64(20000000, 00000003));
664
665 if (isSignedType(i->dType))
666 code[0] |= 1 << 7;
667 if (isSignedType(i->sType))
668 code[0] |= 1 << 5;
669
670 code[1] |= i->saturate << 24;
671
672 if (i->flagsDef >= 0) code[1] |= 1 << 16;
673 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
674
675 if (i->src(2).mod.neg()) code[0] |= 0x10;
676 if (i->src(1).mod.neg() ^
677 i->src(0).mod.neg()) code[0] |= 0x20;
678
679 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
680 code[0] |= 1 << 6;
681 }
682
683 void
684 CodeEmitterNVC0::emitMADSP(const Instruction *i)
685 {
686 assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
687
688 emitForm_A(i, HEX64(00000000, 00000003));
689
690 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
691 code[1] |= 0x01800000;
692 } else {
693 code[0] |= (i->subOp & 0x00f) << 7;
694 code[0] |= (i->subOp & 0x0f0) << 1;
695 code[0] |= (i->subOp & 0x100) >> 3;
696 code[0] |= (i->subOp & 0x200) >> 2;
697 code[1] |= (i->subOp & 0xc00) << 13;
698 }
699
700 if (i->flagsDef >= 0)
701 code[1] |= 1 << 16;
702 }
703
704 void
705 CodeEmitterNVC0::emitISAD(const Instruction *i)
706 {
707 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
708 assert(i->encSize == 8);
709
710 emitForm_A(i, HEX64(38000000, 00000003));
711
712 if (i->dType == TYPE_S32)
713 code[0] |= 1 << 5;
714 }
715
716 void
717 CodeEmitterNVC0::emitNOT(Instruction *i)
718 {
719 assert(i->encSize == 8);
720 i->setSrc(1, i->src(0));
721 emitForm_A(i, HEX64(68000000, 000001c3));
722 }
723
724 void
725 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
726 {
727 if (i->def(0).getFile() == FILE_PREDICATE) {
728 code[0] = 0x00000004 | (subOp << 30);
729 code[1] = 0x0c000000;
730
731 emitPredicate(i);
732
733 defId(i->def(0), 17);
734 srcId(i->src(0), 20);
735 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
736 srcId(i->src(1), 26);
737 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
738
739 if (i->defExists(1)) {
740 defId(i->def(1), 14);
741 } else {
742 code[0] |= 7 << 14;
743 }
744 // (a OP b) OP c
745 if (i->predSrc != 2 && i->srcExists(2)) {
746 code[1] |= subOp << 21;
747 srcId(i->src(2), 17);
748 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
749 } else {
750 code[1] |= 0x000e0000;
751 }
752 } else
753 if (i->encSize == 8) {
754 if (isLIMM(i->src(1), TYPE_U32)) {
755 emitForm_A(i, HEX64(38000000, 00000002));
756
757 if (i->flagsDef >= 0)
758 code[1] |= 1 << 26;
759 } else {
760 emitForm_A(i, HEX64(68000000, 00000003));
761
762 if (i->flagsDef >= 0)
763 code[1] |= 1 << 16;
764 }
765 code[0] |= subOp << 6;
766
767 if (i->flagsSrc >= 0) // carry
768 code[0] |= 1 << 5;
769
770 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
771 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
772 } else {
773 emitForm_S(i, (subOp << 5) |
774 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
775 }
776 }
777
778 void
779 CodeEmitterNVC0::emitPOPC(const Instruction *i)
780 {
781 emitForm_A(i, HEX64(54000000, 00000004));
782
783 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
784 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
785 }
786
787 void
788 CodeEmitterNVC0::emitINSBF(const Instruction *i)
789 {
790 emitForm_A(i, HEX64(28000000, 00000003));
791 }
792
793 void
794 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
795 {
796 emitForm_A(i, HEX64(70000000, 00000003));
797
798 if (i->dType == TYPE_S32)
799 code[0] |= 1 << 5;
800 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
801 code[0] |= 1 << 8;
802 }
803
804 void
805 CodeEmitterNVC0::emitPERMT(const Instruction *i)
806 {
807 emitForm_A(i, HEX64(24000000, 00000004));
808
809 code[0] |= i->subOp << 5;
810 }
811
812 void
813 CodeEmitterNVC0::emitShift(const Instruction *i)
814 {
815 if (i->op == OP_SHR) {
816 emitForm_A(i, HEX64(58000000, 00000003)
817 | (isSignedType(i->dType) ? 0x20 : 0x00));
818 } else {
819 emitForm_A(i, HEX64(60000000, 00000003));
820 }
821
822 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
823 code[0] |= 1 << 9;
824 }
825
826 void
827 CodeEmitterNVC0::emitPreOp(const Instruction *i)
828 {
829 if (i->encSize == 8) {
830 emitForm_B(i, HEX64(60000000, 00000000));
831
832 if (i->op == OP_PREEX2)
833 code[0] |= 0x20;
834
835 if (i->src(0).mod.abs()) code[0] |= 1 << 6;
836 if (i->src(0).mod.neg()) code[0] |= 1 << 8;
837 } else {
838 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
839 }
840 }
841
842 void
843 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
844 {
845 if (i->encSize == 8) {
846 code[0] = 0x00000000 | (subOp << 26);
847 code[1] = 0xc8000000;
848
849 emitPredicate(i);
850
851 defId(i->def(0), 14);
852 srcId(i->src(0), 20);
853
854 assert(i->src(0).getFile() == FILE_GPR);
855
856 if (i->saturate) code[0] |= 1 << 5;
857
858 if (i->src(0).mod.abs()) code[0] |= 1 << 7;
859 if (i->src(0).mod.neg()) code[0] |= 1 << 9;
860 } else {
861 emitForm_S(i, 0x80000008 | (subOp << 26), true);
862
863 assert(!i->src(0).mod.neg());
864 if (i->src(0).mod.abs()) code[0] |= 1 << 30;
865 }
866 }
867
868 void
869 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
870 {
871 uint64_t op;
872
873 assert(i->encSize == 8);
874
875 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
876
877 if (i->ftz)
878 op |= 1 << 5;
879 else
880 if (!isFloatType(i->dType))
881 op |= isSignedType(i->dType) ? 0x23 : 0x03;
882
883 emitForm_A(i, op);
884 emitNegAbs12(i);
885 }
886
887 void
888 CodeEmitterNVC0::roundMode_C(const Instruction *i)
889 {
890 switch (i->rnd) {
891 case ROUND_M: code[1] |= 1 << 17; break;
892 case ROUND_P: code[1] |= 2 << 17; break;
893 case ROUND_Z: code[1] |= 3 << 17; break;
894 case ROUND_NI: code[0] |= 1 << 7; break;
895 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
896 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
897 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
898 case ROUND_N: break;
899 default:
900 assert(!"invalid round mode");
901 break;
902 }
903 }
904
905 void
906 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
907 {
908 switch (i->rnd) {
909 case ROUND_M:
910 case ROUND_MI: code[0] |= 1 << 16; break;
911 case ROUND_P:
912 case ROUND_PI: code[0] |= 2 << 16; break;
913 case ROUND_Z:
914 case ROUND_ZI: code[0] |= 3 << 16; break;
915 default:
916 break;
917 }
918 }
919
920 void
921 CodeEmitterNVC0::emitCVT(Instruction *i)
922 {
923 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
924 DataType dType;
925
926 switch (i->op) {
927 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
928 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
929 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
930 default:
931 break;
932 }
933
934 const bool sat = (i->op == OP_SAT) || i->saturate;
935 const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
936 const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
937
938 if (i->op == OP_NEG && i->dType == TYPE_U32)
939 dType = TYPE_S32;
940 else
941 dType = i->dType;
942
943 if (i->encSize == 8) {
944 emitForm_B(i, HEX64(10000000, 00000004));
945
946 roundMode_C(i);
947
948 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
949 code[0] |= util_logbase2(typeSizeof(dType)) << 20;
950 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
951
952 if (sat)
953 code[0] |= 0x20;
954 if (abs)
955 code[0] |= 1 << 6;
956 if (neg && i->op != OP_ABS)
957 code[0] |= 1 << 8;
958
959 if (i->ftz)
960 code[1] |= 1 << 23;
961
962 if (isSignedIntType(dType))
963 code[0] |= 0x080;
964 if (isSignedIntType(i->sType))
965 code[0] |= 0x200;
966
967 if (isFloatType(dType)) {
968 if (!isFloatType(i->sType))
969 code[1] |= 0x08000000;
970 } else {
971 if (isFloatType(i->sType))
972 code[1] |= 0x04000000;
973 else
974 code[1] |= 0x0c000000;
975 }
976 } else {
977 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
978 code[0] = 0x298;
979 } else
980 if (isFloatType(dType)) {
981 if (isFloatType(i->sType))
982 code[0] = 0x098;
983 else
984 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
985 } else {
986 assert(isFloatType(i->sType));
987
988 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
989 }
990
991 if (neg) code[0] |= 1 << 16;
992 if (sat) code[0] |= 1 << 18;
993 if (abs) code[0] |= 1 << 19;
994
995 roundMode_CS(i);
996 }
997 }
998
999 void
1000 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1001 {
1002 uint32_t hi;
1003 uint32_t lo = 0;
1004
1005 if (i->sType == TYPE_F64)
1006 lo = 0x1;
1007 else
1008 if (!isFloatType(i->sType))
1009 lo = 0x3;
1010
1011 if (isFloatType(i->dType) || isSignedIntType(i->sType))
1012 lo |= 0x20;
1013
1014 switch (i->op) {
1015 case OP_SET_AND: hi = 0x10000000; break;
1016 case OP_SET_OR: hi = 0x10200000; break;
1017 case OP_SET_XOR: hi = 0x10400000; break;
1018 default:
1019 hi = 0x100e0000;
1020 break;
1021 }
1022 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
1023
1024 if (i->op != OP_SET)
1025 srcId(i->src(2), 32 + 17);
1026
1027 if (i->def(0).getFile() == FILE_PREDICATE) {
1028 if (i->sType == TYPE_F32)
1029 code[1] += 0x10000000;
1030 else
1031 code[1] += 0x08000000;
1032
1033 code[0] &= ~0xfc000;
1034 defId(i->def(0), 17);
1035 if (i->defExists(1))
1036 defId(i->def(1), 14);
1037 else
1038 code[0] |= 0x1c000;
1039 }
1040
1041 if (i->ftz)
1042 code[1] |= 1 << 27;
1043
1044 emitCondCode(i->setCond, 32 + 23);
1045 emitNegAbs12(i);
1046 }
1047
1048 void
1049 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1050 {
1051 uint64_t op;
1052
1053 switch (i->dType) {
1054 case TYPE_S32:
1055 op = HEX64(30000000, 00000023);
1056 break;
1057 case TYPE_U32:
1058 op = HEX64(30000000, 00000003);
1059 break;
1060 case TYPE_F32:
1061 op = HEX64(38000000, 00000000);
1062 break;
1063 default:
1064 assert(!"invalid type for SLCT");
1065 op = 0;
1066 break;
1067 }
1068 emitForm_A(i, op);
1069
1070 CondCode cc = i->setCond;
1071
1072 if (i->src(2).mod.neg())
1073 cc = reverseCondCode(cc);
1074
1075 emitCondCode(cc, 32 + 23);
1076
1077 if (i->ftz)
1078 code[0] |= 1 << 5;
1079 }
1080
1081 void CodeEmitterNVC0::emitSELP(const Instruction *i)
1082 {
1083 emitForm_A(i, HEX64(20000000, 00000004));
1084
1085 if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1086 code[1] |= 1 << 20;
1087 }
1088
1089 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1090 {
1091 code[0] = 0x00000006 | (i->subOp << 26);
1092 code[1] = 0xf0000000;
1093 emitPredicate(i);
1094 emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1095 }
1096
1097 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1098 {
1099 code[0] = 0x00000086;
1100 code[1] = 0xd0000000;
1101
1102 code[1] |= i->tex.r;
1103 code[1] |= i->tex.s << 8;
1104
1105 if (i->tex.liveOnly)
1106 code[0] |= 1 << 9;
1107
1108 defId(i->def(0), 14);
1109 srcId(i->src(0), 20);
1110 }
1111
1112 static inline bool
1113 isNextIndependentTex(const TexInstruction *i)
1114 {
1115 if (!i->next || !isTextureOp(i->next->op))
1116 return false;
1117 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1118 return false;
1119 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1120 }
1121
1122 void
1123 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1124 {
1125 code[0] = 0x00000006;
1126
1127 if (isNextIndependentTex(i))
1128 code[0] |= 0x080; // t mode
1129 else
1130 code[0] |= 0x100; // p mode
1131
1132 if (i->tex.liveOnly)
1133 code[0] |= 1 << 9;
1134
1135 switch (i->op) {
1136 case OP_TEX: code[1] = 0x80000000; break;
1137 case OP_TXB: code[1] = 0x84000000; break;
1138 case OP_TXL: code[1] = 0x86000000; break;
1139 case OP_TXF: code[1] = 0x90000000; break;
1140 case OP_TXG: code[1] = 0xa0000000; break;
1141 case OP_TXD: code[1] = 0xe0000000; break;
1142 default:
1143 assert(!"invalid texture op");
1144 break;
1145 }
1146 if (i->op == OP_TXF) {
1147 if (!i->tex.levelZero)
1148 code[1] |= 0x02000000;
1149 } else
1150 if (i->tex.levelZero) {
1151 code[1] |= 0x02000000;
1152 }
1153
1154 if (i->op != OP_TXD && i->tex.derivAll)
1155 code[1] |= 1 << 13;
1156
1157 defId(i->def(0), 14);
1158 srcId(i->src(0), 20);
1159
1160 emitPredicate(i);
1161
1162 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1163
1164 code[1] |= i->tex.mask << 14;
1165
1166 code[1] |= i->tex.r;
1167 code[1] |= i->tex.s << 8;
1168 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1169 code[1] |= 1 << 18; // in 1st source (with array index)
1170
1171 // texture target:
1172 code[1] |= (i->tex.target.getDim() - 1) << 20;
1173 if (i->tex.target.isCube())
1174 code[1] += 2 << 20;
1175 if (i->tex.target.isArray())
1176 code[1] |= 1 << 19;
1177 if (i->tex.target.isShadow())
1178 code[1] |= 1 << 24;
1179
1180 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1181
1182 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1183 // lzero
1184 if (i->op == OP_TXL)
1185 code[1] &= ~(1 << 26);
1186 else
1187 if (i->op == OP_TXF)
1188 code[1] &= ~(1 << 25);
1189 }
1190 if (i->tex.target == TEX_TARGET_2D_MS ||
1191 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1192 code[1] |= 1 << 23;
1193
1194 if (i->tex.useOffsets) // in vecSrc0.w
1195 code[1] |= 1 << 22;
1196
1197 srcId(i, src1, 26);
1198 }
1199
1200 void
1201 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1202 {
1203 code[0] = 0x00000086;
1204 code[1] = 0xc0000000;
1205
1206 switch (i->tex.query) {
1207 case TXQ_DIMS: code[1] |= 0 << 22; break;
1208 case TXQ_TYPE: code[1] |= 1 << 22; break;
1209 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1210 case TXQ_FILTER: code[1] |= 3 << 22; break;
1211 case TXQ_LOD: code[1] |= 4 << 22; break;
1212 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1213 default:
1214 assert(!"invalid texture query");
1215 break;
1216 }
1217
1218 code[1] |= i->tex.mask << 14;
1219
1220 code[1] |= i->tex.r;
1221 code[1] |= i->tex.s << 8;
1222 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1223 code[1] |= 1 << 18;
1224
1225 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1226
1227 defId(i->def(0), 14);
1228 srcId(i->src(0), 20);
1229 srcId(i, src1, 26);
1230
1231 emitPredicate(i);
1232 }
1233
1234 void
1235 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1236 {
1237 code[0] = 0x00000000 | (laneMask << 6);
1238 code[1] = 0x48000000 | qOp;
1239
1240 defId(i->def(0), 14);
1241 srcId(i->src(0), 20);
1242 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1243
1244 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1245 code[0] |= 1 << 9; // dall
1246
1247 emitPredicate(i);
1248 }
1249
1250 void
1251 CodeEmitterNVC0::emitFlow(const Instruction *i)
1252 {
1253 const FlowInstruction *f = i->asFlow();
1254
1255 unsigned mask; // bit 0: predicate, bit 1: target
1256
1257 code[0] = 0x00000007;
1258
1259 switch (i->op) {
1260 case OP_BRA:
1261 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1262 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1263 code[0] |= 0x4000;
1264 mask = 3;
1265 break;
1266 case OP_CALL:
1267 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1268 if (f->indirect)
1269 code[0] |= 0x4000; // indirect calls always use c[] source
1270 mask = 2;
1271 break;
1272
1273 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1274 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1275 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1276 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1277 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1278
1279 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1280 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1281 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1282 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1283
1284 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1285 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1286 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1287 default:
1288 assert(!"invalid flow operation");
1289 return;
1290 }
1291
1292 if (mask & 1) {
1293 emitPredicate(i);
1294 if (i->flagsSrc < 0)
1295 code[0] |= 0x1e0;
1296 }
1297
1298 if (!f)
1299 return;
1300
1301 if (f->allWarp)
1302 code[0] |= 1 << 15;
1303 if (f->limit)
1304 code[0] |= 1 << 16;
1305
1306 if (f->indirect) {
1307 if (code[0] & 0x4000) {
1308 assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1309 setAddress16(i->src(0));
1310 code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1311 if (f->op == OP_BRA)
1312 srcId(f->src(0).getIndirect(0), 20);
1313 } else {
1314 srcId(f, 0, 20);
1315 }
1316 }
1317
1318 if (f->op == OP_CALL) {
1319 if (f->indirect) {
1320 // nothing
1321 } else
1322 if (f->builtin) {
1323 assert(f->absolute);
1324 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1325 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1326 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1327 } else {
1328 assert(!f->absolute);
1329 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1330 code[0] |= (pcRel & 0x3f) << 26;
1331 code[1] |= (pcRel >> 6) & 0x3ffff;
1332 }
1333 } else
1334 if (mask & 2) {
1335 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1336 // currently we don't want absolute branches
1337 assert(!f->absolute);
1338 code[0] |= (pcRel & 0x3f) << 26;
1339 code[1] |= (pcRel >> 6) & 0x3ffff;
1340 }
1341 }
1342
1343 void
1344 CodeEmitterNVC0::emitBAR(const Instruction *i)
1345 {
1346 Value *rDef = NULL, *pDef = NULL;
1347
1348 switch (i->subOp) {
1349 case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;
1350 case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;
1351 case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;
1352 case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1353 default:
1354 code[0] = 0x04;
1355 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1356 break;
1357 }
1358 code[1] = 0x50000000;
1359
1360 code[0] |= 63 << 14;
1361 code[1] |= 7 << 21;
1362
1363 emitPredicate(i);
1364
1365 // barrier id
1366 if (i->src(0).getFile() == FILE_GPR) {
1367 srcId(i->src(0), 20);
1368 } else {
1369 ImmediateValue *imm = i->getSrc(0)->asImm();
1370 assert(imm);
1371 code[0] |= imm->reg.data.u32 << 20;
1372 }
1373
1374 // thread count
1375 if (i->src(1).getFile() == FILE_GPR) {
1376 srcId(i->src(1), 26);
1377 } else {
1378 ImmediateValue *imm = i->getSrc(1)->asImm();
1379 assert(imm);
1380 code[0] |= imm->reg.data.u32 << 26;
1381 code[1] |= imm->reg.data.u32 >> 6;
1382 }
1383
1384 if (i->srcExists(2) && (i->predSrc != 2)) {
1385 srcId(i->src(2), 32 + 17);
1386 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1387 code[1] |= 1 << 20;
1388 } else {
1389 code[1] |= 7 << 17;
1390 }
1391
1392 if (i->defExists(0)) {
1393 if (i->def(0).getFile() == FILE_GPR)
1394 rDef = i->getDef(0);
1395 else
1396 pDef = i->getDef(0);
1397
1398 if (i->defExists(1)) {
1399 if (i->def(1).getFile() == FILE_GPR)
1400 rDef = i->getDef(1);
1401 else
1402 pDef = i->getDef(1);
1403 }
1404 }
1405 if (rDef) {
1406 code[0] &= ~(63 << 14);
1407 defId(rDef, 14);
1408 }
1409 if (pDef) {
1410 code[1] &= ~(7 << 21);
1411 defId(pDef, 32 + 21);
1412 }
1413 }
1414
1415 void
1416 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1417 {
1418 uint32_t prim = i->src(0).get()->reg.data.u32;
1419
1420 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1421 code[1] = 0x00000000 | (prim >> 6);
1422
1423 emitPredicate(i);
1424
1425 defId(i->def(0), 14);
1426 srcId(i->src(1), 20);
1427 }
1428
1429 void
1430 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1431 {
1432 code[0] = 0x00000006;
1433 code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1434
1435 if (i->perPatch)
1436 code[0] |= 0x100;
1437 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1438 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1439
1440 emitPredicate(i);
1441
1442 code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1443
1444 defId(i->def(0), 14);
1445 srcId(i->src(0).getIndirect(0), 20);
1446 srcId(i->src(0).getIndirect(1), 26); // vertex address
1447 }
1448
1449 void
1450 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1451 {
1452 unsigned int size = typeSizeof(i->dType);
1453
1454 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1455 code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1456
1457 assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1458
1459 if (i->perPatch)
1460 code[0] |= 0x100;
1461
1462 emitPredicate(i);
1463
1464 assert(i->src(1).getFile() == FILE_GPR);
1465
1466 srcId(i->src(0).getIndirect(0), 20);
1467 srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1468 srcId(i->src(1), 26);
1469 }
1470
1471 void
1472 CodeEmitterNVC0::emitOUT(const Instruction *i)
1473 {
1474 code[0] = 0x00000006;
1475 code[1] = 0x1c000000;
1476
1477 emitPredicate(i);
1478
1479 defId(i->def(0), 14); // new secret address
1480 srcId(i->src(0), 20); // old secret address, should be 0 initially
1481
1482 assert(i->src(0).getFile() == FILE_GPR);
1483
1484 if (i->op == OP_EMIT)
1485 code[0] |= 1 << 5;
1486 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1487 code[0] |= 1 << 6;
1488
1489 // vertex stream
1490 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1491 // Using immediate encoding here triggers an invalid opcode error
1492 // or random results when error reporting is disabled.
1493 // TODO: figure this out when we get multiple vertex streams
1494 assert(SDATA(i->src(1)).u32 == 0);
1495 srcId(NULL, 26);
1496 // code[1] |= 0xc000;
1497 // code[0] |= SDATA(i->src(1)).u32 << 26;
1498 } else {
1499 srcId(i->src(1), 26);
1500 }
1501 }
1502
1503 void
1504 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1505 {
1506 if (i->encSize == 8) {
1507 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1508 } else {
1509 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1510 code[0] |= 0x80;
1511 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1512 }
1513 }
1514
1515 void
1516 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1517 {
1518 const uint32_t base = i->getSrc(0)->reg.data.offset;
1519
1520 if (i->encSize == 8) {
1521 code[0] = 0x00000000;
1522 code[1] = 0xc0000000 | (base & 0xffff);
1523
1524 if (i->saturate)
1525 code[0] |= 1 << 5;
1526
1527 if (i->op == OP_PINTERP)
1528 srcId(i->src(1), 26);
1529 else
1530 code[0] |= 0x3f << 26;
1531
1532 srcId(i->src(0).getIndirect(0), 20);
1533 } else {
1534 assert(i->op == OP_PINTERP);
1535 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1536 srcId(i->src(1), 20);
1537 }
1538 emitInterpMode(i);
1539
1540 emitPredicate(i);
1541 defId(i->def(0), 14);
1542
1543 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1544 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
1545 else
1546 code[1] |= 0x3f << 17;
1547 }
1548
1549 void
1550 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1551 {
1552 uint8_t val;
1553
1554 switch (ty) {
1555 case TYPE_U8:
1556 val = 0x00;
1557 break;
1558 case TYPE_S8:
1559 val = 0x20;
1560 break;
1561 case TYPE_F16:
1562 case TYPE_U16:
1563 val = 0x40;
1564 break;
1565 case TYPE_S16:
1566 val = 0x60;
1567 break;
1568 case TYPE_F32:
1569 case TYPE_U32:
1570 case TYPE_S32:
1571 val = 0x80;
1572 break;
1573 case TYPE_F64:
1574 case TYPE_U64:
1575 case TYPE_S64:
1576 val = 0xa0;
1577 break;
1578 case TYPE_B128:
1579 val = 0xc0;
1580 break;
1581 default:
1582 val = 0x80;
1583 assert(!"invalid type");
1584 break;
1585 }
1586 code[0] |= val;
1587 }
1588
1589 void
1590 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1591 {
1592 uint32_t val;
1593
1594 switch (c) {
1595 case CACHE_CA:
1596 // case CACHE_WB:
1597 val = 0x000;
1598 break;
1599 case CACHE_CG:
1600 val = 0x100;
1601 break;
1602 case CACHE_CS:
1603 val = 0x200;
1604 break;
1605 case CACHE_CV:
1606 // case CACHE_WT:
1607 val = 0x300;
1608 break;
1609 default:
1610 val = 0;
1611 assert(!"invalid caching mode");
1612 break;
1613 }
1614 code[0] |= val;
1615 }
1616
1617 static inline bool
1618 uses64bitAddress(const Instruction *ldst)
1619 {
1620 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1621 ldst->src(0).isIndirect(0) &&
1622 ldst->getIndirect(0, 0)->reg.size == 8;
1623 }
1624
1625 void
1626 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1627 {
1628 uint32_t opc;
1629
1630 switch (i->src(0).getFile()) {
1631 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1632 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1633 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1634 default:
1635 assert(!"invalid memory file");
1636 opc = 0;
1637 break;
1638 }
1639 code[0] = 0x00000005;
1640 code[1] = opc;
1641
1642 setAddressByFile(i->src(0));
1643 srcId(i->src(1), 14);
1644 srcId(i->src(0).getIndirect(0), 20);
1645 if (uses64bitAddress(i))
1646 code[1] |= 1 << 26;
1647
1648 emitPredicate(i);
1649
1650 emitLoadStoreType(i->dType);
1651 emitCachingMode(i->cache);
1652 }
1653
1654 void
1655 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1656 {
1657 uint32_t opc;
1658
1659 code[0] = 0x00000005;
1660
1661 switch (i->src(0).getFile()) {
1662 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1663 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1664 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1665 case FILE_MEMORY_CONST:
1666 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1667 emitMOV(i); // not sure if this is any better
1668 return;
1669 }
1670 opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1671 code[0] = 0x00000006 | (i->subOp << 8);
1672 break;
1673 default:
1674 assert(!"invalid memory file");
1675 opc = 0;
1676 break;
1677 }
1678 code[1] = opc;
1679
1680 defId(i->def(0), 14);
1681
1682 setAddressByFile(i->src(0));
1683 srcId(i->src(0).getIndirect(0), 20);
1684 if (uses64bitAddress(i))
1685 code[1] |= 1 << 26;
1686
1687 emitPredicate(i);
1688
1689 emitLoadStoreType(i->dType);
1690 emitCachingMode(i->cache);
1691 }
1692
1693 uint8_t
1694 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1695 {
1696 switch (SDATA(ref).sv.sv) {
1697 case SV_LANEID: return 0x00;
1698 case SV_PHYSID: return 0x03;
1699 case SV_VERTEX_COUNT: return 0x10;
1700 case SV_INVOCATION_ID: return 0x11;
1701 case SV_YDIR: return 0x12;
1702 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1703 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1704 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1705 case SV_GRIDID: return 0x2c;
1706 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1707 case SV_LBASE: return 0x34;
1708 case SV_SBASE: return 0x30;
1709 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1710 default:
1711 assert(!"no sreg for system value");
1712 return 0;
1713 }
1714 }
1715
1716 void
1717 CodeEmitterNVC0::emitMOV(const Instruction *i)
1718 {
1719 if (i->def(0).getFile() == FILE_PREDICATE) {
1720 if (i->src(0).getFile() == FILE_GPR) {
1721 code[0] = 0xfc01c003;
1722 code[1] = 0x1a8e0000;
1723 srcId(i->src(0), 20);
1724 } else {
1725 code[0] = 0x0001c004;
1726 code[1] = 0x0c0e0000;
1727 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1728 code[0] |= 7 << 20;
1729 if (!i->getSrc(0)->reg.data.u32)
1730 code[0] |= 1 << 23;
1731 } else {
1732 srcId(i->src(0), 20);
1733 }
1734 }
1735 defId(i->def(0), 17);
1736 emitPredicate(i);
1737 } else
1738 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1739 uint8_t sr = getSRegEncoding(i->src(0));
1740
1741 if (i->encSize == 8) {
1742 code[0] = 0x00000004 | (sr << 26);
1743 code[1] = 0x2c000000;
1744 } else {
1745 code[0] = 0x40000008 | (sr << 20);
1746 }
1747 defId(i->def(0), 14);
1748
1749 emitPredicate(i);
1750 } else
1751 if (i->encSize == 8) {
1752 uint64_t opc;
1753
1754 if (i->src(0).getFile() == FILE_IMMEDIATE)
1755 opc = HEX64(18000000, 000001e2);
1756 else
1757 if (i->src(0).getFile() == FILE_PREDICATE)
1758 opc = HEX64(080e0000, 1c000004);
1759 else
1760 opc = HEX64(28000000, 00000004);
1761
1762 opc |= i->lanes << 5;
1763
1764 emitForm_B(i, opc);
1765 } else {
1766 uint32_t imm;
1767
1768 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1769 imm = SDATA(i->src(0)).u32;
1770 if (imm & 0xfff00000) {
1771 assert(!(imm & 0x000fffff));
1772 code[0] = 0x00000318 | imm;
1773 } else {
1774 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1775 code[0] = 0x00000118 | (imm << 20);
1776 }
1777 } else {
1778 code[0] = 0x0028;
1779 emitShortSrc2(i->src(0));
1780 }
1781 defId(i->def(0), 14);
1782
1783 emitPredicate(i);
1784 }
1785 }
1786
1787 void
1788 CodeEmitterNVC0::emitATOM(const Instruction *i)
1789 {
1790 const bool hasDst = i->defExists(0);
1791 const bool casOrExch =
1792 i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1793 i->subOp == NV50_IR_SUBOP_ATOM_CAS;
1794
1795 if (i->dType == TYPE_U64) {
1796 switch (i->subOp) {
1797 case NV50_IR_SUBOP_ATOM_ADD:
1798 code[0] = 0x205;
1799 if (hasDst)
1800 code[1] = 0x507e0000;
1801 else
1802 code[1] = 0x10000000;
1803 break;
1804 case NV50_IR_SUBOP_ATOM_EXCH:
1805 code[0] = 0x305;
1806 code[1] = 0x507e0000;
1807 break;
1808 case NV50_IR_SUBOP_ATOM_CAS:
1809 code[0] = 0x325;
1810 code[1] = 0x50000000;
1811 break;
1812 default:
1813 assert(!"invalid u64 red op");
1814 break;
1815 }
1816 } else
1817 if (i->dType == TYPE_U32) {
1818 switch (i->subOp) {
1819 case NV50_IR_SUBOP_ATOM_EXCH:
1820 code[0] = 0x105;
1821 code[1] = 0x507e0000;
1822 break;
1823 case NV50_IR_SUBOP_ATOM_CAS:
1824 code[0] = 0x125;
1825 code[1] = 0x50000000;
1826 break;
1827 default:
1828 code[0] = 0x5 | (i->subOp << 5);
1829 if (hasDst)
1830 code[1] = 0x507e0000;
1831 else
1832 code[1] = 0x10000000;
1833 break;
1834 }
1835 } else
1836 if (i->dType == TYPE_S32) {
1837 assert(i->subOp <= 2);
1838 code[0] = 0x205 | (i->subOp << 5);
1839 if (hasDst)
1840 code[1] = 0x587e0000;
1841 else
1842 code[1] = 0x18000000;
1843 } else
1844 if (i->dType == TYPE_F32) {
1845 assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
1846 code[0] = 0x205;
1847 if (hasDst)
1848 code[1] = 0x687e0000;
1849 else
1850 code[1] = 0x28000000;
1851 }
1852
1853 emitPredicate(i);
1854
1855 srcId(i->src(1), 14);
1856
1857 if (hasDst)
1858 defId(i->def(0), 32 + 11);
1859 else
1860 if (casOrExch)
1861 code[1] |= 63 << 11;
1862
1863 if (hasDst || casOrExch) {
1864 const int32_t offset = SDATA(i->src(0)).offset;
1865 assert(offset < 0x80000 && offset >= -0x80000);
1866 code[0] |= offset << 26;
1867 code[1] |= (offset & 0x1ffc0) >> 6;
1868 code[1] |= (offset & 0xe0000) << 6;
1869 } else {
1870 srcAddr32(i->src(0), 26, 0);
1871 }
1872 if (i->getIndirect(0, 0)) {
1873 srcId(i->getIndirect(0, 0), 20);
1874 if (i->getIndirect(0, 0)->reg.size == 8)
1875 code[1] |= 1 << 26;
1876 } else {
1877 code[0] |= 63 << 20;
1878 }
1879
1880 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1881 srcId(i->src(2), 32 + 17);
1882 }
1883
1884 void
1885 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
1886 {
1887 switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
1888 case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
1889 case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;
1890 default:
1891 code[0] = 0x45;
1892 assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
1893 break;
1894 }
1895 code[1] = 0xe0000000;
1896
1897 emitPredicate(i);
1898 }
1899
1900 void
1901 CodeEmitterNVC0::emitCCTL(const Instruction *i)
1902 {
1903 code[0] = 0x00000005 | (i->subOp << 5);
1904
1905 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
1906 code[1] = 0x98000000;
1907 srcAddr32(i->src(0), 28, 2);
1908 } else {
1909 code[1] = 0xd0000000;
1910 setAddress24(i->src(0));
1911 }
1912 if (uses64bitAddress(i))
1913 code[1] |= 1 << 26;
1914 srcId(i->src(0).getIndirect(0), 20);
1915
1916 emitPredicate(i);
1917
1918 defId(i, 0, 14);
1919 }
1920
1921 void
1922 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
1923 {
1924 uint8_t m;
1925 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1926 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1927 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1928 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1929 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1930 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1931 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1932 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1933 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1934 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1935 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1936 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1937 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1938 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1939 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1940 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1941 default:
1942 return;
1943 }
1944 code[0] |= m << 5;
1945 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1946 code[1] |= 1 << 16;
1947 }
1948
1949 void
1950 CodeEmitterNVC0::emitSUCalc(Instruction *i)
1951 {
1952 ImmediateValue *imm = NULL;
1953 uint64_t opc;
1954
1955 if (i->srcExists(2)) {
1956 imm = i->getSrc(2)->asImm();
1957 if (imm)
1958 i->setSrc(2, NULL); // special case, make emitForm_A not assert
1959 }
1960
1961 switch (i->op) {
1962 case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
1963 case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
1964 case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
1965 default:
1966 assert(0);
1967 return;
1968 }
1969 emitForm_A(i, opc);
1970
1971 if (i->op == OP_SUCLAMP) {
1972 if (i->dType == TYPE_S32)
1973 code[0] |= 1 << 9;
1974 emitSUCLAMPMode(i->subOp);
1975 }
1976
1977 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1978 code[1] |= 1 << 16;
1979
1980 if (i->op != OP_SUEAU) {
1981 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1982 code[0] |= 63 << 14;
1983 code[1] |= i->getDef(0)->reg.data.id << 23;
1984 } else
1985 if (i->defExists(1)) { // r, p
1986 assert(i->def(1).getFile() == FILE_PREDICATE);
1987 code[1] |= i->getDef(1)->reg.data.id << 23;
1988 } else { // r, #
1989 code[1] |= 7 << 23;
1990 }
1991 }
1992 if (imm) {
1993 assert(i->op == OP_SUCLAMP);
1994 i->setSrc(2, imm);
1995 code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
1996 }
1997 }
1998
1999 void
2000 CodeEmitterNVC0::emitSUGType(DataType ty)
2001 {
2002 switch (ty) {
2003 case TYPE_S32: code[1] |= 1 << 13; break;
2004 case TYPE_U8: code[1] |= 2 << 13; break;
2005 case TYPE_S8: code[1] |= 3 << 13; break;
2006 default:
2007 assert(ty == TYPE_U32);
2008 break;
2009 }
2010 }
2011
2012 void
2013 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2014 {
2015 const uint32_t offset = i->getSrc(s)->reg.data.offset;
2016
2017 assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2018 assert(offset == (offset & 0xfffc));
2019
2020 code[1] |= 1 << 21;
2021 code[0] |= offset << 24;
2022 code[1] |= offset >> 8;
2023 code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2024 }
2025
2026 void
2027 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2028 {
2029 if (!i->srcExists(s) || (i->predSrc == s)) {
2030 code[1] |= 0x7 << 17;
2031 } else {
2032 if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2033 code[1] |= 1 << 20;
2034 srcId(i->src(s), 32 + 17);
2035 }
2036 }
2037
2038 void
2039 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2040 {
2041 code[0] = 0x5;
2042 code[1] = 0xd4000000 | (i->subOp << 15);
2043
2044 emitLoadStoreType(i->dType);
2045 emitSUGType(i->sType);
2046 emitCachingMode(i->cache);
2047
2048 emitPredicate(i);
2049 defId(i->def(0), 14); // destination
2050 srcId(i->src(0), 20); // address
2051 // format
2052 if (i->src(1).getFile() == FILE_GPR)
2053 srcId(i->src(1), 26);
2054 else
2055 setSUConst16(i, 1);
2056 setSUPred(i, 2);
2057 }
2058
2059 void
2060 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2061 {
2062 code[0] = 0x5;
2063 code[1] = 0xdc000000 | (i->subOp << 15);
2064
2065 if (i->op == OP_SUSTP)
2066 code[1] |= i->tex.mask << 22;
2067 else
2068 emitLoadStoreType(i->dType);
2069 emitSUGType(i->sType);
2070 emitCachingMode(i->cache);
2071
2072 emitPredicate(i);
2073 srcId(i->src(0), 20); // address
2074 // format
2075 if (i->src(1).getFile() == FILE_GPR)
2076 srcId(i->src(1), 26);
2077 else
2078 setSUConst16(i, 1);
2079 srcId(i->src(3), 14); // values
2080 setSUPred(i, 2);
2081 }
2082
2083 void
2084 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2085 {
2086 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2087 case 0:
2088 code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2089 code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2
2090 code[1] |= (i->subOp & 0x0100) << 7; // vsrc2
2091 code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2092 break;
2093 case 1:
2094 code[1] |= (i->subOp & 0x000f) << 8; // v2src1
2095 code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2096 code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2
2097 code[1] |= (i->subOp & 0x0200) << 6; // v2src2
2098 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2099 code[1] |= (i->mask & 0x3) << 2;
2100 break;
2101 case 2:
2102 code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2103 code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2104 code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2105 code[1] |= (i->mask & 0x3) << 2;
2106 code[1] |= (i->mask & 0xc) << 21;
2107 break;
2108 default:
2109 assert(0);
2110 break;
2111 }
2112 }
2113
2114 void
2115 CodeEmitterNVC0::emitVSHL(const Instruction *i)
2116 {
2117 uint64_t opc = 0x4;
2118
2119 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2120 case 0: opc |= 0xe8ULL << 56; break;
2121 case 1: opc |= 0xb4ULL << 56; break;
2122 case 2: opc |= 0x94ULL << 56; break;
2123 default:
2124 assert(0);
2125 break;
2126 }
2127 if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2128 if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2129 if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2130 } else {
2131 if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2132 if (isSignedType(i->sType)) opc |= 1 << 6;
2133 }
2134 emitForm_A(i, opc);
2135 emitVectorSubOp(i);
2136
2137 if (i->saturate)
2138 code[0] |= 1 << 9;
2139 if (i->flagsDef >= 0)
2140 code[1] |= 1 << 16;
2141 }
2142
2143 bool
2144 CodeEmitterNVC0::emitInstruction(Instruction *insn)
2145 {
2146 unsigned int size = insn->encSize;
2147
2148 if (writeIssueDelays && !(codeSize & 0x3f))
2149 size += 8;
2150
2151 if (!insn->encSize) {
2152 ERROR("skipping unencodable instruction: "); insn->print();
2153 return false;
2154 } else
2155 if (codeSize + size > codeSizeLimit) {
2156 ERROR("code emitter output buffer too small\n");
2157 return false;
2158 }
2159
2160 if (writeIssueDelays) {
2161 if (!(codeSize & 0x3f)) {
2162 code[0] = 0x00000007; // cf issue delay "instruction"
2163 code[1] = 0x20000000;
2164 code += 2;
2165 codeSize += 8;
2166 }
2167 const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2168 uint32_t *data = code - (id * 2 + 2);
2169 if (id <= 2) {
2170 data[0] |= insn->sched << (id * 8 + 4);
2171 } else
2172 if (id == 3) {
2173 data[0] |= insn->sched << 28;
2174 data[1] |= insn->sched >> 4;
2175 } else {
2176 data[1] |= insn->sched << ((id - 4) * 8 + 4);
2177 }
2178 }
2179
2180 // assert that instructions with multiple defs don't corrupt registers
2181 for (int d = 0; insn->defExists(d); ++d)
2182 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2183
2184 switch (insn->op) {
2185 case OP_MOV:
2186 case OP_RDSV:
2187 emitMOV(insn);
2188 break;
2189 case OP_NOP:
2190 break;
2191 case OP_LOAD:
2192 emitLOAD(insn);
2193 break;
2194 case OP_STORE:
2195 emitSTORE(insn);
2196 break;
2197 case OP_LINTERP:
2198 case OP_PINTERP:
2199 emitINTERP(insn);
2200 break;
2201 case OP_VFETCH:
2202 emitVFETCH(insn);
2203 break;
2204 case OP_EXPORT:
2205 emitEXPORT(insn);
2206 break;
2207 case OP_PFETCH:
2208 emitPFETCH(insn);
2209 break;
2210 case OP_EMIT:
2211 case OP_RESTART:
2212 emitOUT(insn);
2213 break;
2214 case OP_ADD:
2215 case OP_SUB:
2216 if (isFloatType(insn->dType))
2217 emitFADD(insn);
2218 else
2219 emitUADD(insn);
2220 break;
2221 case OP_MUL:
2222 if (isFloatType(insn->dType))
2223 emitFMUL(insn);
2224 else
2225 emitUMUL(insn);
2226 break;
2227 case OP_MAD:
2228 case OP_FMA:
2229 if (isFloatType(insn->dType))
2230 emitFMAD(insn);
2231 else
2232 emitIMAD(insn);
2233 break;
2234 case OP_SAD:
2235 emitISAD(insn);
2236 break;
2237 case OP_NOT:
2238 emitNOT(insn);
2239 break;
2240 case OP_AND:
2241 emitLogicOp(insn, 0);
2242 break;
2243 case OP_OR:
2244 emitLogicOp(insn, 1);
2245 break;
2246 case OP_XOR:
2247 emitLogicOp(insn, 2);
2248 break;
2249 case OP_SHL:
2250 case OP_SHR:
2251 emitShift(insn);
2252 break;
2253 case OP_SET:
2254 case OP_SET_AND:
2255 case OP_SET_OR:
2256 case OP_SET_XOR:
2257 emitSET(insn->asCmp());
2258 break;
2259 case OP_SELP:
2260 emitSELP(insn);
2261 break;
2262 case OP_SLCT:
2263 emitSLCT(insn->asCmp());
2264 break;
2265 case OP_MIN:
2266 case OP_MAX:
2267 emitMINMAX(insn);
2268 break;
2269 case OP_ABS:
2270 case OP_NEG:
2271 case OP_CEIL:
2272 case OP_FLOOR:
2273 case OP_TRUNC:
2274 case OP_CVT:
2275 case OP_SAT:
2276 emitCVT(insn);
2277 break;
2278 case OP_RSQ:
2279 emitSFnOp(insn, 5);
2280 break;
2281 case OP_RCP:
2282 emitSFnOp(insn, 4);
2283 break;
2284 case OP_LG2:
2285 emitSFnOp(insn, 3);
2286 break;
2287 case OP_EX2:
2288 emitSFnOp(insn, 2);
2289 break;
2290 case OP_SIN:
2291 emitSFnOp(insn, 1);
2292 break;
2293 case OP_COS:
2294 emitSFnOp(insn, 0);
2295 break;
2296 case OP_PRESIN:
2297 case OP_PREEX2:
2298 emitPreOp(insn);
2299 break;
2300 case OP_TEX:
2301 case OP_TXB:
2302 case OP_TXL:
2303 case OP_TXD:
2304 case OP_TXF:
2305 emitTEX(insn->asTex());
2306 break;
2307 case OP_TXQ:
2308 emitTXQ(insn->asTex());
2309 break;
2310 case OP_TEXBAR:
2311 emitTEXBAR(insn);
2312 break;
2313 case OP_SUBFM:
2314 case OP_SUCLAMP:
2315 case OP_SUEAU:
2316 emitSUCalc(insn);
2317 break;
2318 case OP_MADSP:
2319 emitMADSP(insn);
2320 break;
2321 case OP_SULDB:
2322 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2323 emitSULDGB(insn->asTex());
2324 else
2325 ERROR("SULDB not yet supported on < nve4\n");
2326 break;
2327 case OP_SUSTB:
2328 case OP_SUSTP:
2329 if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2330 emitSUSTGx(insn->asTex());
2331 else
2332 ERROR("SUSTx not yet supported on < nve4\n");
2333 break;
2334 case OP_ATOM:
2335 emitATOM(insn);
2336 break;
2337 case OP_BRA:
2338 case OP_CALL:
2339 case OP_PRERET:
2340 case OP_RET:
2341 case OP_DISCARD:
2342 case OP_EXIT:
2343 case OP_PRECONT:
2344 case OP_CONT:
2345 case OP_PREBREAK:
2346 case OP_BREAK:
2347 case OP_JOINAT:
2348 case OP_BRKPT:
2349 case OP_QUADON:
2350 case OP_QUADPOP:
2351 emitFlow(insn);
2352 break;
2353 case OP_QUADOP:
2354 emitQUADOP(insn, insn->subOp, insn->lanes);
2355 break;
2356 case OP_DFDX:
2357 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2358 break;
2359 case OP_DFDY:
2360 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2361 break;
2362 case OP_POPCNT:
2363 emitPOPC(insn);
2364 break;
2365 case OP_INSBF:
2366 emitINSBF(insn);
2367 break;
2368 case OP_EXTBF:
2369 emitEXTBF(insn);
2370 break;
2371 case OP_PERMT:
2372 emitPERMT(insn);
2373 break;
2374 case OP_JOIN:
2375 emitNOP(insn);
2376 insn->join = 1;
2377 break;
2378 case OP_BAR:
2379 emitBAR(insn);
2380 break;
2381 case OP_MEMBAR:
2382 emitMEMBAR(insn);
2383 break;
2384 case OP_CCTL:
2385 emitCCTL(insn);
2386 break;
2387 case OP_VSHL:
2388 emitVSHL(insn);
2389 break;
2390 case OP_PHI:
2391 case OP_UNION:
2392 case OP_CONSTRAINT:
2393 ERROR("operation should have been eliminated");
2394 return false;
2395 case OP_EXP:
2396 case OP_LOG:
2397 case OP_SQRT:
2398 case OP_POW:
2399 ERROR("operation should have been lowered\n");
2400 return false;
2401 default:
2402 ERROR("unknow op\n");
2403 return false;
2404 }
2405
2406 if (insn->join) {
2407 code[0] |= 0x10;
2408 assert(insn->encSize == 8);
2409 }
2410
2411 code += insn->encSize / 4;
2412 codeSize += insn->encSize;
2413 return true;
2414 }
2415
2416 uint32_t
2417 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2418 {
2419 const Target::OpInfo &info = targ->getOpInfo(i);
2420
2421 if (writeIssueDelays || info.minEncSize == 8 || 1)
2422 return 8;
2423
2424 if (i->ftz || i->saturate || i->join)
2425 return 8;
2426 if (i->rnd != ROUND_N)
2427 return 8;
2428 if (i->predSrc >= 0 && i->op == OP_MAD)
2429 return 8;
2430
2431 if (i->op == OP_PINTERP) {
2432 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2433 return 8;
2434 } else
2435 if (i->op == OP_MOV && i->lanes != 0xf) {
2436 return 8;
2437 }
2438
2439 for (int s = 0; i->srcExists(s); ++s) {
2440 if (i->src(s).isIndirect(0))
2441 return 8;
2442
2443 if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2444 if (SDATA(i->src(s)).offset >= 0x100)
2445 return 8;
2446 if (i->getSrc(s)->reg.fileIndex > 1 &&
2447 i->getSrc(s)->reg.fileIndex != 16)
2448 return 8;
2449 } else
2450 if (i->src(s).getFile() == FILE_IMMEDIATE) {
2451 if (i->dType == TYPE_F32) {
2452 if (SDATA(i->src(s)).u32 >= 0x100)
2453 return 8;
2454 } else {
2455 if (SDATA(i->src(s)).u32 > 0xff)
2456 return 8;
2457 }
2458 }
2459
2460 if (i->op == OP_CVT)
2461 continue;
2462 if (i->src(s).mod != Modifier(0)) {
2463 if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2464 if (i->op != OP_RSQ)
2465 return 8;
2466 if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2467 if (i->op != OP_ADD || s != 0)
2468 return 8;
2469 }
2470 }
2471
2472 return 4;
2473 }
2474
2475 // Simplified, erring on safe side.
2476 class SchedDataCalculator : public Pass
2477 {
2478 public:
2479 SchedDataCalculator(const Target *targ) : targ(targ) { }
2480
2481 private:
2482 struct RegScores
2483 {
2484 struct Resource {
2485 int st[DATA_FILE_COUNT]; // LD to LD delay 3
2486 int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2487 int tex; // TEX to non-TEX delay 17 (0x11)
2488 int sfu; // SFU to SFU delay 3 (except PRE-ops)
2489 int imul; // integer MUL to MUL delay 3
2490 } res;
2491 struct ScoreData {
2492 int r[64];
2493 int p[8];
2494 int c;
2495 } rd, wr;
2496 int base;
2497
2498 void rebase(const int base)
2499 {
2500 const int delta = this->base - base;
2501 if (!delta)
2502 return;
2503 this->base = 0;
2504
2505 for (int i = 0; i < 64; ++i) {
2506 rd.r[i] += delta;
2507 wr.r[i] += delta;
2508 }
2509 for (int i = 0; i < 8; ++i) {
2510 rd.p[i] += delta;
2511 wr.p[i] += delta;
2512 }
2513 rd.c += delta;
2514 wr.c += delta;
2515
2516 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2517 res.ld[f] += delta;
2518 res.st[f] += delta;
2519 }
2520 res.sfu += delta;
2521 res.imul += delta;
2522 res.tex += delta;
2523 }
2524 void wipe()
2525 {
2526 memset(&rd, 0, sizeof(rd));
2527 memset(&wr, 0, sizeof(wr));
2528 memset(&res, 0, sizeof(res));
2529 }
2530 int getLatest(const ScoreData& d) const
2531 {
2532 int max = 0;
2533 for (int i = 0; i < 64; ++i)
2534 if (d.r[i] > max)
2535 max = d.r[i];
2536 for (int i = 0; i < 8; ++i)
2537 if (d.p[i] > max)
2538 max = d.p[i];
2539 if (d.c > max)
2540 max = d.c;
2541 return max;
2542 }
2543 inline int getLatestRd() const
2544 {
2545 return getLatest(rd);
2546 }
2547 inline int getLatestWr() const
2548 {
2549 return getLatest(wr);
2550 }
2551 inline int getLatest() const
2552 {
2553 const int a = getLatestRd();
2554 const int b = getLatestWr();
2555
2556 int max = MAX2(a, b);
2557 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2558 max = MAX2(res.ld[f], max);
2559 max = MAX2(res.st[f], max);
2560 }
2561 max = MAX2(res.sfu, max);
2562 max = MAX2(res.imul, max);
2563 max = MAX2(res.tex, max);
2564 return max;
2565 }
2566 void setMax(const RegScores *that)
2567 {
2568 for (int i = 0; i < 64; ++i) {
2569 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2570 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2571 }
2572 for (int i = 0; i < 8; ++i) {
2573 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2574 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2575 }
2576 rd.c = MAX2(rd.c, that->rd.c);
2577 wr.c = MAX2(wr.c, that->wr.c);
2578
2579 for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2580 res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2581 res.st[f] = MAX2(res.st[f], that->res.st[f]);
2582 }
2583 res.sfu = MAX2(res.sfu, that->res.sfu);
2584 res.imul = MAX2(res.imul, that->res.imul);
2585 res.tex = MAX2(res.tex, that->res.tex);
2586 }
2587 void print(int cycle)
2588 {
2589 for (int i = 0; i < 64; ++i) {
2590 if (rd.r[i] > cycle)
2591 INFO("rd $r%i @ %i\n", i, rd.r[i]);
2592 if (wr.r[i] > cycle)
2593 INFO("wr $r%i @ %i\n", i, wr.r[i]);
2594 }
2595 for (int i = 0; i < 8; ++i) {
2596 if (rd.p[i] > cycle)
2597 INFO("rd $p%i @ %i\n", i, rd.p[i]);
2598 if (wr.p[i] > cycle)
2599 INFO("wr $p%i @ %i\n", i, wr.p[i]);
2600 }
2601 if (rd.c > cycle)
2602 INFO("rd $c @ %i\n", rd.c);
2603 if (wr.c > cycle)
2604 INFO("wr $c @ %i\n", wr.c);
2605 if (res.sfu > cycle)
2606 INFO("sfu @ %i\n", res.sfu);
2607 if (res.imul > cycle)
2608 INFO("imul @ %i\n", res.imul);
2609 if (res.tex > cycle)
2610 INFO("tex @ %i\n", res.tex);
2611 }
2612 };
2613
2614 RegScores *score; // for current BB
2615 std::vector<RegScores> scoreBoards;
2616 int cycle;
2617 int prevData;
2618 operation prevOp;
2619
2620 const Target *targ;
2621
2622 bool visit(Function *);
2623 bool visit(BasicBlock *);
2624
2625 void commitInsn(const Instruction *, int cycle);
2626 int calcDelay(const Instruction *, int cycle) const;
2627 void setDelay(Instruction *, int delay, Instruction *next);
2628
2629 void recordRd(const Value *, const int ready);
2630 void recordWr(const Value *, const int ready);
2631 void checkRd(const Value *, int cycle, int& delay) const;
2632 void checkWr(const Value *, int cycle, int& delay) const;
2633
2634 int getCycles(const Instruction *, int origDelay) const;
2635 };
2636
2637 void
2638 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2639 {
2640 if (insn->op == OP_EXIT || insn->op == OP_RET)
2641 delay = MAX2(delay, 14);
2642
2643 if (insn->op == OP_TEXBAR) {
2644 // TODO: except if results not used before EXIT
2645 insn->sched = 0xc2;
2646 } else
2647 if (insn->op == OP_JOIN || insn->join) {
2648 insn->sched = 0x00;
2649 } else
2650 if (delay >= 0 || prevData == 0x04 ||
2651 !next || !targ->canDualIssue(insn, next)) {
2652 insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
2653 if (prevOp == OP_EXPORT)
2654 insn->sched |= 0x40;
2655 else
2656 insn->sched |= 0x20;
2657 } else {
2658 insn->sched = 0x04; // dual-issue
2659 }
2660
2661 if (prevData != 0x04 || prevOp != OP_EXPORT)
2662 if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2663 prevOp = insn->op;
2664
2665 prevData = insn->sched;
2666 }
2667
2668 int
2669 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2670 {
2671 if (insn->sched & 0x80) {
2672 int c = (insn->sched & 0x0f) * 2 + 1;
2673 if (insn->op == OP_TEXBAR && origDelay > 0)
2674 c += origDelay;
2675 return c;
2676 }
2677 if (insn->sched & 0x60)
2678 return (insn->sched & 0x1f) + 1;
2679 return (insn->sched == 0x04) ? 0 : 32;
2680 }
2681
2682 bool
2683 SchedDataCalculator::visit(Function *func)
2684 {
2685 scoreBoards.resize(func->cfg.getSize());
2686 for (size_t i = 0; i < scoreBoards.size(); ++i)
2687 scoreBoards[i].wipe();
2688 return true;
2689 }
2690
2691 bool
2692 SchedDataCalculator::visit(BasicBlock *bb)
2693 {
2694 Instruction *insn;
2695 Instruction *next = NULL;
2696
2697 int cycle = 0;
2698
2699 prevData = 0x00;
2700 prevOp = OP_NOP;
2701 score = &scoreBoards.at(bb->getId());
2702
2703 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2704 // back branches will wait until all target dependencies are satisfied
2705 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
2706 continue;
2707 BasicBlock *in = BasicBlock::get(ei.getNode());
2708 if (in->getExit()) {
2709 if (prevData != 0x04)
2710 prevData = in->getExit()->sched;
2711 prevOp = in->getExit()->op;
2712 }
2713 score->setMax(&scoreBoards.at(in->getId()));
2714 }
2715 if (bb->cfg.incidentCount() > 1)
2716 prevOp = OP_NOP;
2717
2718 #ifdef NVC0_DEBUG_SCHED_DATA
2719 INFO("=== BB:%i initial scores\n", bb->getId());
2720 score->print(cycle);
2721 #endif
2722
2723 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2724 next = insn->next;
2725
2726 commitInsn(insn, cycle);
2727 int delay = calcDelay(next, cycle);
2728 setDelay(insn, delay, next);
2729 cycle += getCycles(insn, delay);
2730
2731 #ifdef NVC0_DEBUG_SCHED_DATA
2732 INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2733 insn->print();
2734 next->print();
2735 #endif
2736 }
2737 if (!insn)
2738 return true;
2739 commitInsn(insn, cycle);
2740
2741 int bbDelay = -1;
2742
2743 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2744 BasicBlock *out = BasicBlock::get(ei.getNode());
2745
2746 if (ei.getType() != Graph::Edge::BACK) {
2747 // only test the first instruction of the outgoing block
2748 next = out->getEntry();
2749 if (next)
2750 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2751 } else {
2752 // wait until all dependencies are satisfied
2753 const int regsFree = score->getLatest();
2754 next = out->getFirst();
2755 for (int c = cycle; next && c < regsFree; next = next->next) {
2756 bbDelay = MAX2(bbDelay, calcDelay(next, c));
2757 c += getCycles(next, bbDelay);
2758 }
2759 next = NULL;
2760 }
2761 }
2762 if (bb->cfg.outgoingCount() != 1)
2763 next = NULL;
2764 setDelay(insn, bbDelay, next);
2765 cycle += getCycles(insn, bbDelay);
2766
2767 score->rebase(cycle); // common base for initializing out blocks' scores
2768 return true;
2769 }
2770
2771 #define NVE4_MAX_ISSUE_DELAY 0x1f
2772 int
2773 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2774 {
2775 int delay = 0, ready = cycle;
2776
2777 for (int s = 0; insn->srcExists(s); ++s)
2778 checkRd(insn->getSrc(s), cycle, delay);
2779 // WAR & WAW don't seem to matter
2780 // for (int s = 0; insn->srcExists(s); ++s)
2781 // recordRd(insn->getSrc(s), cycle);
2782
2783 switch (Target::getOpClass(insn->op)) {
2784 case OPCLASS_SFU:
2785 ready = score->res.sfu;
2786 break;
2787 case OPCLASS_ARITH:
2788 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2789 ready = score->res.imul;
2790 break;
2791 case OPCLASS_TEXTURE:
2792 ready = score->res.tex;
2793 break;
2794 case OPCLASS_LOAD:
2795 ready = score->res.ld[insn->src(0).getFile()];
2796 break;
2797 case OPCLASS_STORE:
2798 ready = score->res.st[insn->src(0).getFile()];
2799 break;
2800 default:
2801 break;
2802 }
2803 if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2804 ready = MAX2(ready, score->res.tex);
2805
2806 delay = MAX2(delay, ready - cycle);
2807
2808 // if can issue next cycle, delay is 0, not 1
2809 return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2810 }
2811
2812 void
2813 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2814 {
2815 const int ready = cycle + targ->getLatency(insn);
2816
2817 for (int d = 0; insn->defExists(d); ++d)
2818 recordWr(insn->getDef(d), ready);
2819 // WAR & WAW don't seem to matter
2820 // for (int s = 0; insn->srcExists(s); ++s)
2821 // recordRd(insn->getSrc(s), cycle);
2822
2823 switch (Target::getOpClass(insn->op)) {
2824 case OPCLASS_SFU:
2825 score->res.sfu = cycle + 4;
2826 break;
2827 case OPCLASS_ARITH:
2828 if (insn->op == OP_MUL && !isFloatType(insn->dType))
2829 score->res.imul = cycle + 4;
2830 break;
2831 case OPCLASS_TEXTURE:
2832 score->res.tex = cycle + 18;
2833 break;
2834 case OPCLASS_LOAD:
2835 if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2836 break;
2837 score->res.ld[insn->src(0).getFile()] = cycle + 4;
2838 score->res.st[insn->src(0).getFile()] = ready;
2839 break;
2840 case OPCLASS_STORE:
2841 score->res.st[insn->src(0).getFile()] = cycle + 4;
2842 score->res.ld[insn->src(0).getFile()] = ready;
2843 break;
2844 case OPCLASS_OTHER:
2845 if (insn->op == OP_TEXBAR)
2846 score->res.tex = cycle;
2847 break;
2848 default:
2849 break;
2850 }
2851
2852 #ifdef NVC0_DEBUG_SCHED_DATA
2853 score->print(cycle);
2854 #endif
2855 }
2856
2857 void
2858 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2859 {
2860 int ready = cycle;
2861 int a, b;
2862
2863 switch (v->reg.file) {
2864 case FILE_GPR:
2865 a = v->reg.data.id;
2866 b = a + v->reg.size / 4;
2867 for (int r = a; r < b; ++r)
2868 ready = MAX2(ready, score->rd.r[r]);
2869 break;
2870 case FILE_PREDICATE:
2871 ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2872 break;
2873 case FILE_FLAGS:
2874 ready = MAX2(ready, score->rd.c);
2875 break;
2876 case FILE_SHADER_INPUT:
2877 case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2878 case FILE_MEMORY_LOCAL:
2879 case FILE_MEMORY_CONST:
2880 case FILE_MEMORY_SHARED:
2881 case FILE_MEMORY_GLOBAL:
2882 case FILE_SYSTEM_VALUE:
2883 // TODO: any restrictions here ?
2884 break;
2885 case FILE_IMMEDIATE:
2886 break;
2887 default:
2888 assert(0);
2889 break;
2890 }
2891 if (cycle < ready)
2892 delay = MAX2(delay, ready - cycle);
2893 }
2894
2895 void
2896 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2897 {
2898 int ready = cycle;
2899 int a, b;
2900
2901 switch (v->reg.file) {
2902 case FILE_GPR:
2903 a = v->reg.data.id;
2904 b = a + v->reg.size / 4;
2905 for (int r = a; r < b; ++r)
2906 ready = MAX2(ready, score->wr.r[r]);
2907 break;
2908 case FILE_PREDICATE:
2909 ready = MAX2(ready, score->wr.p[v->reg.data.id]);
2910 break;
2911 default:
2912 assert(v->reg.file == FILE_FLAGS);
2913 ready = MAX2(ready, score->wr.c);
2914 break;
2915 }
2916 if (cycle < ready)
2917 delay = MAX2(delay, ready - cycle);
2918 }
2919
2920 void
2921 SchedDataCalculator::recordWr(const Value *v, const int ready)
2922 {
2923 int a = v->reg.data.id;
2924
2925 if (v->reg.file == FILE_GPR) {
2926 int b = a + v->reg.size / 4;
2927 for (int r = a; r < b; ++r)
2928 score->rd.r[r] = ready;
2929 } else
2930 // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
2931 if (v->reg.file == FILE_PREDICATE) {
2932 score->rd.p[a] = ready + 4;
2933 } else {
2934 assert(v->reg.file == FILE_FLAGS);
2935 score->rd.c = ready + 4;
2936 }
2937 }
2938
2939 void
2940 SchedDataCalculator::recordRd(const Value *v, const int ready)
2941 {
2942 int a = v->reg.data.id;
2943
2944 if (v->reg.file == FILE_GPR) {
2945 int b = a + v->reg.size / 4;
2946 for (int r = a; r < b; ++r)
2947 score->wr.r[r] = ready;
2948 } else
2949 if (v->reg.file == FILE_PREDICATE) {
2950 score->wr.p[a] = ready;
2951 } else
2952 if (v->reg.file == FILE_FLAGS) {
2953 score->wr.c = ready;
2954 }
2955 }
2956
2957 bool
2958 calculateSchedDataNVC0(const Target *targ, Function *func)
2959 {
2960 SchedDataCalculator sched(targ);
2961 return sched.run(func, true, true);
2962 }
2963
2964 void
2965 CodeEmitterNVC0::prepareEmission(Function *func)
2966 {
2967 CodeEmitter::prepareEmission(func);
2968
2969 if (targ->hasSWSched)
2970 calculateSchedDataNVC0(targ, func);
2971 }
2972
2973 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
2974 : CodeEmitter(target),
2975 targNVC0(target),
2976 writeIssueDelays(target->hasSWSched)
2977 {
2978 code = NULL;
2979 codeSize = codeSizeLimit = 0;
2980 relocInfo = NULL;
2981 }
2982
2983 CodeEmitter *
2984 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
2985 {
2986 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
2987 emit->setProgramType(type);
2988 return emit;
2989 }
2990
2991 CodeEmitter *
2992 TargetNVC0::getCodeEmitter(Program::Type type)
2993 {
2994 if (chipset >= NVISA_GK110_CHIPSET)
2995 return createCodeEmitterGK110(type);
2996 return createCodeEmitterNVC0(type);
2997 }
2998
2999 } // namespace nv50_ir