nv50/ir: add missing license headers
[mesa.git] / src / gallium / drivers / nvc0 / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36
37 inline void setProgramType(Program::Type pType) { progType = pType; }
38
39 private:
40 const TargetNVC0 *targ;
41
42 Program::Type progType;
43
44 private:
45 void emitForm_A(const Instruction *, uint64_t);
46 void emitForm_B(const Instruction *, uint64_t);
47 void emitForm_S(const Instruction *, uint32_t, bool pred);
48
49 void emitPredicate(const Instruction *);
50
51 void setAddress16(const ValueRef&);
52 void setImmediate(const Instruction *, const int s); // needs op already set
53 void setImmediateS8(const ValueRef&);
54
55 void emitCondCode(CondCode cc, int pos);
56 void emitInterpMode(const Instruction *);
57 void emitLoadStoreType(DataType ty);
58 void emitCachingMode(CacheMode c);
59
60 void emitShortSrc2(const ValueRef&);
61
62 inline uint8_t getSRegEncoding(const ValueRef&);
63
64 void roundMode_A(const Instruction *);
65 void roundMode_C(const Instruction *);
66 void roundMode_CS(const Instruction *);
67
68 void emitNegAbs12(const Instruction *);
69
70 void emitNOP(const Instruction *);
71
72 void emitLOAD(const Instruction *);
73 void emitSTORE(const Instruction *);
74 void emitMOV(const Instruction *);
75
76 void emitINTERP(const Instruction *);
77 void emitPFETCH(const Instruction *);
78 void emitVFETCH(const Instruction *);
79 void emitEXPORT(const Instruction *);
80 void emitOUT(const Instruction *);
81
82 void emitUADD(const Instruction *);
83 void emitFADD(const Instruction *);
84 void emitUMUL(const Instruction *);
85 void emitFMUL(const Instruction *);
86 void emitIMAD(const Instruction *);
87 void emitFMAD(const Instruction *);
88
89 void emitNOT(Instruction *);
90 void emitLogicOp(const Instruction *, uint8_t subOp);
91 void emitPOPC(const Instruction *);
92 void emitINSBF(const Instruction *);
93 void emitShift(const Instruction *);
94
95 void emitSFnOp(const Instruction *, uint8_t subOp);
96
97 void emitCVT(Instruction *);
98 void emitMINMAX(const Instruction *);
99 void emitPreOp(const Instruction *);
100
101 void emitSET(const CmpInstruction *);
102 void emitSLCT(const CmpInstruction *);
103 void emitSELP(const Instruction *);
104
105 void emitTEX(const TexInstruction *);
106 void emitTEXCSAA(const TexInstruction *);
107 void emitTXQ(const TexInstruction *);
108 void emitPIXLD(const TexInstruction *);
109
110 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
111
112 void emitFlow(const Instruction *);
113
114 inline void defId(const ValueDef&, const int pos);
115 inline void srcId(const ValueRef&, const int pos);
116
117 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
118
119 inline void srcId(const ValueRef *, const int pos);
120
121 inline bool isLIMM(const ValueRef&, DataType ty);
122 };
123
124 // for better visibility
125 #define HEX64(h, l) 0x##h##l##ULL
126
127 #define SDATA(a) ((a).rep()->reg.data)
128 #define DDATA(a) ((a).rep()->reg.data)
129
130 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
131 {
132 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
133 }
134
135 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
136 {
137 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
138 }
139
140 void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
141 {
142 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
143 }
144
145 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
146 {
147 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
148 }
149
150 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
151 {
152 const ImmediateValue *imm = ref.get()->asImm();
153
154 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
155 }
156
157 void
158 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
159 {
160 switch (insn->rnd) {
161 case ROUND_M: code[1] |= 1 << 23; break;
162 case ROUND_P: code[1] |= 2 << 23; break;
163 case ROUND_Z: code[1] |= 3 << 23; break;
164 default:
165 assert(insn->rnd == ROUND_N);
166 break;
167 }
168 }
169
170 void
171 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
172 {
173 if (i->src[1].mod.abs()) code[0] |= 1 << 6;
174 if (i->src[0].mod.abs()) code[0] |= 1 << 7;
175 if (i->src[1].mod.neg()) code[0] |= 1 << 8;
176 if (i->src[0].mod.neg()) code[0] |= 1 << 9;
177 }
178
179 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
180 {
181 uint8_t val;
182
183 switch (cc) {
184 case CC_LT: val = 0x1; break;
185 case CC_LTU: val = 0x9; break;
186 case CC_EQ: val = 0x2; break;
187 case CC_EQU: val = 0xa; break;
188 case CC_LE: val = 0x3; break;
189 case CC_LEU: val = 0xb; break;
190 case CC_GT: val = 0x4; break;
191 case CC_GTU: val = 0xc; break;
192 case CC_NE: val = 0x5; break;
193 case CC_NEU: val = 0xd; break;
194 case CC_GE: val = 0x6; break;
195 case CC_GEU: val = 0xe; break;
196 case CC_TR: val = 0xf; break;
197 case CC_FL: val = 0x0; break;
198
199 case CC_A: val = 0x14; break;
200 case CC_NA: val = 0x13; break;
201 case CC_S: val = 0x15; break;
202 case CC_NS: val = 0x12; break;
203 case CC_C: val = 0x16; break;
204 case CC_NC: val = 0x11; break;
205 case CC_O: val = 0x17; break;
206 case CC_NO: val = 0x10; break;
207
208 default:
209 val = 0;
210 assert(!"invalid condition code");
211 break;
212 }
213 code[pos / 32] |= val << (pos % 32);
214 }
215
216 void
217 CodeEmitterNVC0::emitPredicate(const Instruction *i)
218 {
219 if (i->predSrc >= 0) {
220 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
221 srcId(i->src[i->predSrc], 10);
222 if (i->cc == CC_NOT_P)
223 code[0] |= 0x2000; // negate
224 } else {
225 code[0] |= 0x1c00;
226 }
227 }
228
229 void
230 CodeEmitterNVC0::setAddress16(const ValueRef& src)
231 {
232 Symbol *sym = src.get()->asSym();
233
234 assert(sym);
235
236 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
237 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
238 }
239
240 void
241 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
242 {
243 const ImmediateValue *imm = i->src[s].get()->asImm();
244 uint32_t u32;
245
246 assert(imm);
247 u32 = imm->reg.data.u32;
248
249 if ((code[0] & 0xf) == 0x2) {
250 // LIMM
251 code[0] |= (u32 & 0x3f) << 26;
252 code[1] |= u32 >> 6;
253 } else
254 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
255 // integer immediate
256 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
257 assert(!(code[1] & 0xc000));
258 u32 &= 0xfffff;
259 code[0] |= (u32 & 0x3f) << 26;
260 code[1] |= 0xc000 | (u32 >> 6);
261 } else {
262 // float immediate
263 assert(!(u32 & 0x00000fff));
264 assert(!(code[1] & 0xc000));
265 code[0] |= ((u32 >> 12) & 0x3f) << 26;
266 code[1] |= 0xc000 | (u32 >> 18);
267 }
268 }
269
270 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
271 {
272 const ImmediateValue *imm = ref.get()->asImm();
273
274 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
275
276 assert(s8 == imm->reg.data.s32);
277
278 code[0] |= (s8 & 0x3f) << 26;
279 code[0] |= (s8 >> 6) << 8;
280 }
281
282 void
283 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
284 {
285 code[0] = opc;
286 code[1] = opc >> 32;
287
288 emitPredicate(i);
289
290 defId(i->def[0], 14);
291
292 int s1 = 26;
293 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
294 s1 = 49;
295
296 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
297 switch (i->getSrc(s)->reg.file) {
298 case FILE_MEMORY_CONST:
299 assert(!(code[1] & 0xc000));
300 code[1] |= (s == 2) ? 0x8000 : 0x4000;
301 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
302 setAddress16(i->src[s]);
303 break;
304 case FILE_IMMEDIATE:
305 assert(s == 1 ||
306 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
307 assert(!(code[1] & 0xc000));
308 setImmediate(i, s);
309 break;
310 case FILE_GPR:
311 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
312 break;
313 srcId(i->src[s], s ? ((s == 2) ? 49 : s1) : 20);
314 break;
315 default:
316 // ignore here, can be predicate or flags, but must not be address
317 break;
318 }
319 }
320 }
321
322 void
323 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
324 {
325 code[0] = opc;
326 code[1] = opc >> 32;
327
328 emitPredicate(i);
329
330 defId(i->def[0], 14);
331
332 switch (i->src[0].getFile()) {
333 case FILE_MEMORY_CONST:
334 assert(!(code[1] & 0xc000));
335 code[1] |= 0x4000 | (i->src[0].get()->reg.fileIndex << 10);
336 setAddress16(i->src[0]);
337 break;
338 case FILE_IMMEDIATE:
339 assert(!(code[1] & 0xc000));
340 setImmediate(i, 0);
341 break;
342 case FILE_GPR:
343 srcId(i->src[0], 26);
344 break;
345 default:
346 // ignore here, can be predicate or flags, but must not be address
347 break;
348 }
349 }
350
351 void
352 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
353 {
354 code[0] = opc;
355
356 int ss2a = 0;
357 if (opc == 0x0d || opc == 0x0e)
358 ss2a = 2;
359
360 defId(i->def[0], 14);
361 srcId(i->src[0], 20);
362
363 assert(pred || (i->predSrc < 0));
364 if (pred)
365 emitPredicate(i);
366
367 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
368 if (i->src[s].get()->reg.file == FILE_MEMORY_CONST) {
369 assert(!(code[0] & (0x300 >> ss2a)));
370 switch (i->src[s].get()->reg.fileIndex) {
371 case 0: code[0] |= 0x100 >> ss2a; break;
372 case 1: code[0] |= 0x200 >> ss2a; break;
373 case 16: code[0] |= 0x300 >> ss2a; break;
374 default:
375 ERROR("invalid c[] space for short form\n");
376 break;
377 }
378 if (s == 1)
379 code[0] |= i->getSrc(s)->reg.data.offset << 24;
380 else
381 code[0] |= i->getSrc(s)->reg.data.offset << 6;
382 } else
383 if (i->src[s].getFile() == FILE_IMMEDIATE) {
384 assert(s == 1);
385 setImmediateS8(i->src[s]);
386 } else
387 if (i->src[s].getFile() == FILE_GPR) {
388 srcId(i->src[s], (s == 1) ? 26 : 8);
389 }
390 }
391 }
392
393 void
394 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
395 {
396 if (src.getFile() == FILE_MEMORY_CONST) {
397 switch (src.get()->reg.fileIndex) {
398 case 0: code[0] |= 0x100; break;
399 case 1: code[0] |= 0x200; break;
400 case 16: code[0] |= 0x300; break;
401 default:
402 assert(!"unsupported file index for short op");
403 break;
404 }
405 srcAddr32(src, 20);
406 } else {
407 srcId(src, 20);
408 assert(src.getFile() == FILE_GPR);
409 }
410 }
411
412 void
413 CodeEmitterNVC0::emitNOP(const Instruction *i)
414 {
415 code[0] = 0x000001e4;
416 code[1] = 0x40000000;
417 emitPredicate(i);
418 }
419
420 void
421 CodeEmitterNVC0::emitFMAD(const Instruction *i)
422 {
423 bool neg1 = (i->src[0].mod ^ i->src[1].mod).neg();
424
425 if (i->encSize == 8) {
426 if (isLIMM(i->src[1], TYPE_F32)) {
427 emitForm_A(i, HEX64(20000000, 00000002));
428 } else {
429 emitForm_A(i, HEX64(30000000, 00000000));
430
431 if (i->src[2].mod.neg())
432 code[0] |= 1 << 8;
433 }
434 roundMode_A(i);
435
436 if (neg1)
437 code[0] |= 1 << 9;
438
439 if (i->saturate)
440 code[0] |= 1 << 5;
441 if (i->ftz)
442 code[0] |= 1 << 6;
443 } else {
444 assert(!i->saturate && !i->src[2].mod.neg());
445 emitForm_S(i, (i->src[2].getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
446 false);
447 if (neg1)
448 code[0] |= 1 << 4;
449 }
450 }
451
452 void
453 CodeEmitterNVC0::emitFMUL(const Instruction *i)
454 {
455 bool neg = (i->src[0].mod ^ i->src[1].mod).neg();
456
457 assert(i->postFactor >= -3 && i->postFactor <= 3);
458
459 if (i->encSize == 8) {
460 if (isLIMM(i->src[1], TYPE_F32)) {
461 assert(i->postFactor == 0); // constant folded, hopefully
462 emitForm_A(i, HEX64(30000000, 00000002));
463 } else {
464 emitForm_A(i, HEX64(58000000, 00000000));
465 roundMode_A(i);
466 code[1] |= ((i->postFactor > 0) ?
467 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
468 }
469 if (neg)
470 code[1] ^= 1 << 25; // aliases with LIMM sign bit
471
472 if (i->saturate)
473 code[0] |= 1 << 5;
474
475 if (i->dnz)
476 code[0] |= 1 << 7;
477 else
478 if (i->ftz)
479 code[0] |= 1 << 6;
480 } else {
481 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
482 emitForm_S(i, 0xa8, true);
483 }
484 }
485
486 void
487 CodeEmitterNVC0::emitUMUL(const Instruction *i)
488 {
489 if (i->encSize == 8) {
490 if (i->src[1].getFile() == FILE_IMMEDIATE) {
491 emitForm_A(i, HEX64(10000000, 00000002));
492 } else {
493 emitForm_A(i, HEX64(50000000, 00000003));
494 }
495 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
496 code[0] |= 1 << 6;
497 if (i->sType == TYPE_S32)
498 code[0] |= 1 << 5;
499 if (i->dType == TYPE_S32)
500 code[0] |= 1 << 7;
501 } else {
502 emitForm_S(i, i->src[1].getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
503
504 if (i->sType == TYPE_S32)
505 code[0] |= 1 << 6;
506 }
507 }
508
509 void
510 CodeEmitterNVC0::emitFADD(const Instruction *i)
511 {
512 if (i->encSize == 8) {
513 if (isLIMM(i->src[1], TYPE_F32)) {
514 emitForm_A(i, HEX64(28000000, 00000002));
515
516 assert(!i->src[1].mod.neg() && !i->src[1].mod.abs() && !i->saturate);
517 } else {
518 emitForm_A(i, HEX64(50000000, 00000000));
519
520 roundMode_A(i);
521 if (i->saturate)
522 code[1] |= 1 << 17;
523 }
524 emitNegAbs12(i);
525
526 if (i->op == OP_SUB) code[0] ^= 1 << 8;
527
528 if (i->ftz)
529 code[0] |= 1 << 5;
530 } else {
531 assert(!i->saturate && i->op != OP_SUB &&
532 !i->src[0].mod.abs() &&
533 !i->src[1].mod.neg() && !i->src[1].mod.abs());
534
535 emitForm_S(i, 0x49, true);
536
537 if (i->src[0].mod.neg())
538 code[0] |= 1 << 7;
539 }
540 }
541
542 void
543 CodeEmitterNVC0::emitUADD(const Instruction *i)
544 {
545 uint32_t addOp = 0;
546
547 assert(!i->src[0].mod.abs() && !i->src[1].mod.abs());
548 assert(!i->src[0].mod.neg() || !i->src[1].mod.neg());
549
550 if (i->src[0].mod.neg())
551 addOp |= 0x200;
552 if (i->src[1].mod.neg())
553 addOp |= 0x100;
554 if (i->op == OP_SUB) {
555 addOp ^= 0x100;
556 assert(addOp != 0x300); // would be add-plus-one
557 }
558
559 if (i->encSize == 8) {
560 if (isLIMM(i->src[1], TYPE_U32)) {
561 emitForm_A(i, HEX64(08000000, 00000002));
562 if (i->def[1].exists())
563 code[1] |= 1 << 26; // write carry
564 } else {
565 emitForm_A(i, HEX64(48000000, 00000003));
566 if (i->def[1].exists())
567 code[1] |= 1 << 16; // write carry
568 }
569 code[0] |= addOp;
570
571 if (i->saturate)
572 code[0] |= 1 << 5;
573 if (i->flagsSrc >= 0) // add carry
574 code[0] |= 1 << 6;
575 } else {
576 assert(!(addOp & 0x100));
577 emitForm_S(i, (addOp >> 3) |
578 ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
579 }
580 }
581
582 // TODO: shl-add
583 void
584 CodeEmitterNVC0::emitIMAD(const Instruction *i)
585 {
586 assert(i->encSize == 8);
587 emitForm_A(i, HEX64(20000000, 00000003));
588
589 if (isSignedType(i->dType))
590 code[0] |= 1 << 7;
591 if (isSignedType(i->sType))
592 code[0] |= 1 << 5;
593
594 code[1] |= i->saturate << 24;
595
596 if (i->flagsDef >= 0) code[1] |= 1 << 16;
597 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
598
599 if (i->src[2].mod.neg()) code[0] |= 0x10;
600 if (i->src[1].mod.neg() ^
601 i->src[0].mod.neg()) code[0] |= 0x20;
602
603 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
604 code[0] |= 1 << 6;
605 }
606
607 void
608 CodeEmitterNVC0::emitNOT(Instruction *i)
609 {
610 assert(i->encSize == 8);
611 i->src[1].set(i->src[0]);
612 emitForm_A(i, HEX64(68000000, 000001c3));
613 }
614
615 void
616 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
617 {
618 if (i->encSize == 8) {
619 if (isLIMM(i->src[1], TYPE_U32)) {
620 emitForm_A(i, HEX64(38000000, 00000002));
621
622 if (i->src[2].exists())
623 code[1] |= 1 << 26;
624 } else {
625 emitForm_A(i, HEX64(68000000, 00000003));
626
627 if (i->src[2].exists())
628 code[1] |= 1 << 16;
629 }
630 code[0] |= subOp << 6;
631
632 if (i->src[2].exists()) // carry
633 code[0] |= 1 << 5;
634
635 if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
636 if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
637 } else {
638 emitForm_S(i, (subOp << 5) |
639 ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
640 }
641 }
642
643 void
644 CodeEmitterNVC0::emitPOPC(const Instruction *i)
645 {
646 emitForm_A(i, HEX64(54000000, 00000004));
647
648 if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
649 if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
650 }
651
652 void
653 CodeEmitterNVC0::emitINSBF(const Instruction *i)
654 {
655 emitForm_A(i, HEX64(28000000, 30000000));
656 }
657
658 void
659 CodeEmitterNVC0::emitShift(const Instruction *i)
660 {
661 if (i->op == OP_SHR) {
662 emitForm_A(i, HEX64(58000000, 00000003)
663 | (isSignedType(i->dType) ? 0x20 : 0x00));
664 } else {
665 emitForm_A(i, HEX64(60000000, 00000003));
666 }
667
668 if (0)
669 code[0] |= 1 << 9; // clamp shift amount
670 }
671
672 void
673 CodeEmitterNVC0::emitPreOp(const Instruction *i)
674 {
675 if (i->encSize == 8) {
676 emitForm_B(i, HEX64(60000000, 00000000));
677
678 if (i->op == OP_PREEX2)
679 code[0] |= 0x20;
680
681 if (i->src[0].mod.abs()) code[0] |= 1 << 6;
682 if (i->src[0].mod.neg()) code[0] |= 1 << 8;
683 } else {
684 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
685 }
686 }
687
688 void
689 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
690 {
691 if (i->encSize == 8) {
692 code[0] = 0x00000000 | (subOp << 26);
693 code[1] = 0xc8000000;
694
695 emitPredicate(i);
696
697 defId(i->def[0], 14);
698 srcId(i->src[0], 20);
699
700 assert(i->src[0].getFile() == FILE_GPR);
701
702 if (i->saturate) code[0] |= 1 << 5;
703
704 if (i->src[0].mod.abs()) code[0] |= 1 << 7;
705 if (i->src[0].mod.neg()) code[0] |= 1 << 9;
706 } else {
707 emitForm_S(i, 0x80000008 | (subOp << 26), true);
708
709 assert(!i->src[0].mod.neg());
710 if (i->src[0].mod.abs()) code[0] |= 1 << 30;
711 }
712 }
713
714 void
715 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
716 {
717 uint64_t op;
718
719 assert(i->encSize == 8);
720
721 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
722
723 if (i->ftz)
724 op |= 1 << 5;
725 else
726 if (!isFloatType(i->dType))
727 op |= isSignedType(i->dType) ? 0x23 : 0x03;
728
729 emitForm_A(i, op);
730 emitNegAbs12(i);
731 }
732
733 void
734 CodeEmitterNVC0::roundMode_C(const Instruction *i)
735 {
736 switch (i->rnd) {
737 case ROUND_M: code[1] |= 1 << 17; break;
738 case ROUND_P: code[1] |= 2 << 17; break;
739 case ROUND_Z: code[1] |= 3 << 17; break;
740 case ROUND_NI: code[0] |= 1 << 7; break;
741 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
742 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
743 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
744 case ROUND_N: break;
745 default:
746 assert(!"invalid round mode");
747 break;
748 }
749 }
750
751 void
752 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
753 {
754 switch (i->rnd) {
755 case ROUND_M:
756 case ROUND_MI: code[0] |= 1 << 16; break;
757 case ROUND_P:
758 case ROUND_PI: code[0] |= 2 << 16; break;
759 case ROUND_Z:
760 case ROUND_ZI: code[0] |= 3 << 16; break;
761 default:
762 break;
763 }
764 }
765
766 void
767 CodeEmitterNVC0::emitCVT(Instruction *i)
768 {
769 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
770
771 switch (i->op) {
772 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
773 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
774 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
775 default:
776 break;
777 }
778
779 const bool sat = (i->op == OP_SAT) || i->saturate;
780 const bool abs = (i->op == OP_ABS) || i->src[0].mod.abs();
781 const bool neg = (i->op == OP_NEG) || i->src[0].mod.neg();
782
783 if (i->encSize == 8) {
784 emitForm_B(i, HEX64(10000000, 00000004));
785
786 roundMode_C(i);
787
788 code[0] |= util_logbase2(i->def[0].getSize()) << 20;
789 code[0] |= util_logbase2(i->src[0].getSize()) << 23;
790
791 if (sat)
792 code[0] |= 0x20;
793 if (abs)
794 code[0] |= 1 << 6;
795 if (neg && i->op != OP_ABS)
796 code[0] |= 1 << 8;
797
798 if (i->ftz)
799 code[1] |= 1 << 23;
800
801 if (isSignedIntType(i->dType))
802 code[0] |= 0x080;
803 if (isSignedIntType(i->sType))
804 code[0] |= 0x200;
805
806 if (isFloatType(i->dType)) {
807 if (!isFloatType(i->sType))
808 code[1] |= 0x08000000;
809 } else {
810 if (isFloatType(i->sType))
811 code[1] |= 0x04000000;
812 else
813 code[1] |= 0x0c000000;
814 }
815 } else {
816 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
817 code[0] = 0x298;
818 } else
819 if (isFloatType(i->dType)) {
820 if (isFloatType(i->sType))
821 code[0] = 0x098;
822 else
823 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
824 } else {
825 assert(isFloatType(i->sType));
826
827 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
828 }
829
830 if (neg) code[0] |= 1 << 16;
831 if (sat) code[0] |= 1 << 18;
832 if (abs) code[0] |= 1 << 19;
833
834 roundMode_CS(i);
835 }
836 }
837
838 void
839 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
840 {
841 uint32_t hi;
842 uint32_t lo = 0;
843
844 if (i->sType == TYPE_F64)
845 lo = 0x1;
846 else
847 if (!isFloatType(i->sType))
848 lo = 0x3;
849
850 if (isFloatType(i->dType) || isSignedIntType(i->sType))
851 lo |= 0x20;
852
853 switch (i->op) {
854 case OP_SET_AND: hi = 0x10000000; break;
855 case OP_SET_OR: hi = 0x10200000; break;
856 case OP_SET_XOR: hi = 0x10400000; break;
857 default:
858 hi = 0x100e0000;
859 break;
860 }
861 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
862
863 if (i->def[0].getFile() == FILE_PREDICATE) {
864 if (i->sType == TYPE_F32)
865 code[1] += 0x10000000;
866 else
867 code[1] += 0x08000000;
868
869 code[0] &= ~0xfc000;
870 defId(i->def[0], 17);
871 if (i->defExists(1))
872 defId(i->def[1], 14);
873 else
874 code[0] |= 0x1c000;
875 }
876
877 if (i->ftz)
878 code[1] |= 1 << 27;
879
880 emitCondCode(i->setCond, 32 + 23);
881 emitNegAbs12(i);
882 }
883
884 void
885 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
886 {
887 uint64_t op;
888
889 switch (i->dType) {
890 case TYPE_S32:
891 op = HEX64(30000000, 00000023);
892 break;
893 case TYPE_U32:
894 op = HEX64(30000000, 00000003);
895 break;
896 case TYPE_F32:
897 op = HEX64(38000000, 00000000);
898 break;
899 default:
900 assert(!"invalid type for SLCT");
901 op = 0;
902 break;
903 }
904 emitForm_A(i, op);
905
906 CondCode cc = i->setCond;
907
908 if (i->src[2].mod.neg())
909 cc = reverseCondCode(cc);
910
911 emitCondCode(cc, 32 + 23);
912
913 if (i->ftz)
914 code[0] |= 1 << 5;
915 }
916
917 void CodeEmitterNVC0::emitSELP(const Instruction *i)
918 {
919 emitForm_A(i, HEX64(20000000, 00000004));
920
921 if (i->cc == CC_NOT_P || i->src[2].mod & Modifier(NV50_IR_MOD_NOT))
922 code[1] |= 1 << 20;
923 }
924
925 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
926 {
927 code[0] = 0x00000086;
928 code[1] = 0xd0000000;
929
930 code[1] |= i->tex.r;
931 code[1] |= i->tex.s << 8;
932
933 if (i->tex.liveOnly)
934 code[0] |= 1 << 9;
935
936 defId(i->def[0], 14);
937 srcId(i->src[0], 20);
938 }
939
940 void
941 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
942 {
943 code[0] = 0x00000006;
944
945 if (1)
946 code[0] |= 0x80; // normal/t/p mode = t, XXX: what is this ?
947
948 if (i->tex.liveOnly)
949 code[0] |= 1 << 9;
950
951 switch (i->op) {
952 case OP_TEX: code[1] = 0x80000000; break;
953 case OP_TXB: code[1] = 0x84000000; break;
954 case OP_TXL: code[1] = 0x86000000; break;
955 case OP_TXF: code[1] = 0x92000000; break;
956 case OP_TXG: code[1] = 0xa0000000; break;
957 case OP_TXD: code[1] = 0xe0000000; break;
958 default:
959 assert(!"invalid texture op");
960 break;
961 }
962 defId(i->def[0], 14);
963 srcId(i->src[0], 20);
964
965 emitPredicate(i);
966
967 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
968
969 code[1] |= i->tex.mask << 14;
970
971 code[1] |= i->tex.r;
972 code[1] |= i->tex.s << 8;
973 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
974 code[1] |= 1 << 18; // in 1st source (with array index)
975
976 // texture target:
977 code[1] |= (i->tex.target.getDim() - 1) << 20;
978 if (i->tex.target.isCube())
979 code[1] += 2 << 20;
980 if (i->tex.target.isArray())
981 code[1] |= 1 << 19;
982 if (i->tex.target.isShadow())
983 code[1] |= 1 << 24;
984
985 int src1 = i->tex.target.getArgCount();
986
987 if (i->src[src1].getFile() == FILE_IMMEDIATE) { // lzero
988 if (i->op == OP_TXL)
989 code[1] &= ~(1 << 26);
990 else
991 if (i->op == OP_TXF)
992 code[1] &= ~(1 << 25);
993 }
994 if (i->tex.target == TEX_TARGET_2D_MS ||
995 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
996 code[1] |= 1 << 23;
997
998 if (i->tex.useOffsets) // in vecSrc0.w
999 code[1] |= 1 << 22;
1000
1001 srcId(i->src[src1], 26);
1002 }
1003
1004 void
1005 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1006 {
1007 code[0] = 0x00000086;
1008 code[1] = 0xc0000000;
1009
1010 switch (i->tex.query) {
1011 case TXQ_DIMS: code[1] |= 0 << 22; break;
1012 case TXQ_TYPE: code[1] |= 1 << 22; break;
1013 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1014 case TXQ_FILTER: code[1] |= 3 << 22; break;
1015 case TXQ_LOD: code[1] |= 4 << 22; break;
1016 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1017 default:
1018 assert(!"invalid texture query");
1019 break;
1020 }
1021
1022 code[1] |= i->tex.mask << 14;
1023
1024 code[1] |= i->tex.r;
1025 code[1] |= i->tex.s << 8;
1026 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1027 code[1] |= 1 << 18;
1028
1029 defId(i->def[0], 14);
1030 srcId(i->src[0], 20);
1031 srcId(i->src[1], 26);
1032
1033 emitPredicate(i);
1034 }
1035
1036 void
1037 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1038 {
1039 code[0] = 0x00000000 | (laneMask << 6);
1040 code[1] = 0x48000000 | qOp;
1041
1042 defId(i->def[0], 14);
1043 srcId(i->src[0], 20);
1044 srcId(i->srcExists(1) ? i->src[1] : i->src[0], 26);
1045
1046 emitPredicate(i);
1047 }
1048
1049 void
1050 CodeEmitterNVC0::emitFlow(const Instruction *i)
1051 {
1052 const FlowInstruction *f = i->asFlow();
1053
1054 unsigned mask; // bit 0: predicate, bit 1: target
1055
1056 code[0] = 0x00000007;
1057
1058 switch (i->op) {
1059 case OP_BRA:
1060 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1061 if (i->src[0].getFile() == FILE_MEMORY_CONST ||
1062 i->src[1].getFile() == FILE_MEMORY_CONST)
1063 code[1] |= 0x4000;
1064 mask = 3;
1065 break;
1066 case OP_CALL:
1067 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1068 if (i->src[0].getFile() == FILE_MEMORY_CONST)
1069 code[1] |= 0x4000;
1070 mask = 2;
1071 break;
1072
1073 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1074 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1075 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1076 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1077 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1078
1079 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1080 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1081 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1082 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1083
1084 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1085 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1086 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1087 default:
1088 assert(!"invalid flow operation");
1089 return;
1090 }
1091
1092 if (mask & 1) {
1093 emitPredicate(i);
1094 if (i->flagsSrc < 0)
1095 code[0] |= 0x1e0;
1096 }
1097
1098 if (!f)
1099 return;
1100
1101 if (f->allWarp)
1102 code[0] |= 1 << 15;
1103 if (f->limit)
1104 code[0] |= 1 << 16;
1105
1106 if (f->op == OP_CALL) {
1107 if (f->builtin) {
1108 assert(f->absolute);
1109 uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
1110 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1111 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1112 } else {
1113 assert(!f->absolute);
1114 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1115 code[0] |= (pcRel & 0x3f) << 26;
1116 code[1] |= (pcRel >> 6) & 0x3ffff;
1117 }
1118 } else
1119 if (mask & 2) {
1120 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1121 // currently we don't want absolute branches
1122 assert(!f->absolute);
1123 code[0] |= (pcRel & 0x3f) << 26;
1124 code[1] |= (pcRel >> 6) & 0x3ffff;
1125 }
1126 }
1127
1128 void
1129 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1130 {
1131 uint32_t prim = i->src[0].get()->reg.data.u32;
1132
1133 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1134 code[1] = 0x00000000 | (prim >> 6);
1135
1136 emitPredicate(i);
1137
1138 defId(i->def[0], 14);
1139 srcId(i->src[1], 20);
1140 }
1141
1142 void
1143 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1144 {
1145 code[0] = 0x00000006;
1146 code[1] = 0x06000000 | i->src[0].get()->reg.data.offset;
1147
1148 if (i->perPatch)
1149 code[0] |= 0x100;
1150 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1151 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1152
1153 emitPredicate(i);
1154
1155 code[0] |= (i->defCount(0xf) - 1) << 5;
1156
1157 defId(i->def[0], 14);
1158 srcId(i->src[0].getIndirect(0), 20);
1159 srcId(i->src[0].getIndirect(1), 26); // vertex address
1160 }
1161
1162 void
1163 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1164 {
1165 unsigned int size = typeSizeof(i->dType);
1166
1167 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1168 code[1] = 0x0a000000 | i->src[0].get()->reg.data.offset;
1169
1170 assert(size != 12 && !(code[1] & (size - 1)));
1171
1172 if (i->perPatch)
1173 code[0] |= 0x100;
1174
1175 emitPredicate(i);
1176
1177 assert(i->src[1].getFile() == FILE_GPR);
1178
1179 srcId(i->src[0].getIndirect(0), 20);
1180 srcId(i->src[0].getIndirect(1), 32 + 17); // vertex base address
1181 srcId(i->src[1], 26);
1182 }
1183
1184 void
1185 CodeEmitterNVC0::emitOUT(const Instruction *i)
1186 {
1187 code[0] = 0x00000006;
1188 code[1] = 0x1c000000;
1189
1190 emitPredicate(i);
1191
1192 defId(i->def[0], 14); // new secret address
1193 srcId(i->src[0], 20); // old secret address, should be 0 initially
1194
1195 assert(i->src[0].getFile() == FILE_GPR);
1196
1197 if (i->op == OP_EMIT)
1198 code[0] |= 1 << 5;
1199 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1200 code[0] |= 1 << 6;
1201
1202 // vertex stream
1203 if (i->src[1].getFile() == FILE_IMMEDIATE) {
1204 code[1] |= 0xc000;
1205 code[0] |= SDATA(i->src[1]).u32 << 26;
1206 } else {
1207 srcId(i->src[1], 26);
1208 }
1209 }
1210
1211 void
1212 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1213 {
1214 if (i->encSize == 8) {
1215 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1216 } else {
1217 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1218 code[0] |= 0x80;
1219 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1220 }
1221 }
1222
1223 void
1224 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1225 {
1226 const uint32_t base = i->getSrc(0)->reg.data.offset;
1227
1228 if (i->encSize == 8) {
1229 code[0] = 0x00000000;
1230 code[1] = 0xc0000000 | (base & 0xffff);
1231
1232 if (i->saturate)
1233 code[0] |= 1 << 5;
1234
1235 if (i->op == OP_PINTERP)
1236 srcId(i->src[1], 26);
1237 else
1238 code[0] |= 0x3f << 26;
1239
1240 srcId(i->src[0].getIndirect(0), 20);
1241 } else {
1242 assert(i->op == OP_PINTERP);
1243 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1244 srcId(i->src[1], 20);
1245 }
1246 emitInterpMode(i);
1247
1248 emitPredicate(i);
1249 defId(i->def[0], 14);
1250
1251 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1252 srcId(i->src[i->op == OP_PINTERP ? 2 : 1], 17);
1253 else
1254 code[1] |= 0x3f << 17;
1255 }
1256
1257 void
1258 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1259 {
1260 uint8_t val;
1261
1262 switch (ty) {
1263 case TYPE_U8:
1264 val = 0x00;
1265 break;
1266 case TYPE_S8:
1267 val = 0x20;
1268 break;
1269 case TYPE_F16:
1270 case TYPE_U16:
1271 val = 0x40;
1272 break;
1273 case TYPE_S16:
1274 val = 0x60;
1275 break;
1276 case TYPE_F32:
1277 case TYPE_U32:
1278 case TYPE_S32:
1279 val = 0x80;
1280 break;
1281 case TYPE_F64:
1282 case TYPE_U64:
1283 case TYPE_S64:
1284 val = 0xa0;
1285 break;
1286 case TYPE_B128:
1287 val = 0xc0;
1288 break;
1289 default:
1290 val = 0x80;
1291 assert(!"invalid type");
1292 break;
1293 }
1294 code[0] |= val;
1295 }
1296
1297 void
1298 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1299 {
1300 uint32_t val;
1301
1302 switch (c) {
1303 case CACHE_CA:
1304 // case CACHE_WB:
1305 val = 0x000;
1306 break;
1307 case CACHE_CG:
1308 val = 0x100;
1309 break;
1310 case CACHE_CS:
1311 val = 0x200;
1312 break;
1313 case CACHE_CV:
1314 // case CACHE_WT:
1315 val = 0x300;
1316 break;
1317 default:
1318 val = 0;
1319 assert(!"invalid caching mode");
1320 break;
1321 }
1322 code[0] |= val;
1323 }
1324
1325 void
1326 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1327 {
1328 uint32_t opc;
1329
1330 switch (i->src[0].getFile()) {
1331 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1332 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1333 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1334 default:
1335 assert(!"invalid memory file");
1336 opc = 0;
1337 break;
1338 }
1339 code[0] = 0x00000005;
1340 code[1] = opc;
1341
1342 setAddress16(i->src[0]);
1343 srcId(i->src[1], 14);
1344 srcId(i->src[0].getIndirect(0), 20);
1345
1346 emitPredicate(i);
1347
1348 emitLoadStoreType(i->dType);
1349 emitCachingMode(i->cache);
1350 }
1351
1352 void
1353 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1354 {
1355 uint32_t opc;
1356
1357 code[0] = 0x00000005;
1358
1359 switch (i->src[0].getFile()) {
1360 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1361 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1362 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1363 case FILE_MEMORY_CONST:
1364 if (!i->src[0].isIndirect(0) && typeSizeof(i->dType) == 4) {
1365 emitMOV(i); // not sure if this is any better
1366 return;
1367 }
1368 opc = 0x14000000 | (i->src[0].get()->reg.fileIndex << 10);
1369 code[0] = 0x00000006 | (i->subOp << 8);
1370 break;
1371 default:
1372 assert(!"invalid memory file");
1373 opc = 0;
1374 break;
1375 }
1376 code[1] = opc;
1377
1378 defId(i->def[0], 14);
1379
1380 setAddress16(i->src[0]);
1381 srcId(i->src[0].getIndirect(0), 20);
1382
1383 emitPredicate(i);
1384
1385 emitLoadStoreType(i->dType);
1386 emitCachingMode(i->cache);
1387 }
1388
1389 uint8_t
1390 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1391 {
1392 switch (SDATA(ref).sv.sv) {
1393 case SV_LANEID: return 0x00;
1394 case SV_PHYSID: return 0x03;
1395 case SV_VERTEX_COUNT: return 0x10;
1396 case SV_INVOCATION_ID: return 0x11;
1397 case SV_YDIR: return 0x12;
1398 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1399 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1400 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1401 case SV_GRIDID: return 0x2c;
1402 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1403 case SV_LBASE: return 0x34;
1404 case SV_SBASE: return 0x30;
1405 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1406 default:
1407 assert(!"no sreg for system value");
1408 return 0;
1409 }
1410 }
1411
1412 void
1413 CodeEmitterNVC0::emitMOV(const Instruction *i)
1414 {
1415 if (i->src[0].getFile() == FILE_SYSTEM_VALUE) {
1416 uint8_t sr = getSRegEncoding(i->src[0]);
1417
1418 if (i->encSize == 8) {
1419 code[0] = 0x00000004 | (sr << 26);
1420 code[1] = 0x2c000000;
1421 } else {
1422 code[0] = 0x40000008 | (sr << 20);
1423 }
1424 defId(i->def[0], 14);
1425
1426 emitPredicate(i);
1427 } else
1428 if (i->encSize == 8) {
1429 uint64_t opc;
1430
1431 if (i->src[0].getFile() == FILE_IMMEDIATE)
1432 opc = HEX64(18000000, 000001e2);
1433 else
1434 if (i->src[0].getFile() == FILE_PREDICATE)
1435 opc = HEX64(080e0000, 1c000004);
1436 else
1437 opc = HEX64(28000000, 00000004);
1438
1439 opc |= i->lanes << 5;
1440
1441 emitForm_B(i, opc);
1442 } else {
1443 uint32_t imm;
1444
1445 if (i->src[0].getFile() == FILE_IMMEDIATE) {
1446 imm = SDATA(i->src[0]).u32;
1447 if (imm & 0xfff00000) {
1448 assert(!(imm & 0x000fffff));
1449 code[0] = 0x00000318 | imm;
1450 } else {
1451 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1452 code[0] = 0x00000118 | (imm << 20);
1453 }
1454 } else {
1455 code[0] = 0x0028;
1456 emitShortSrc2(i->src[0]);
1457 }
1458 defId(i->def[0], 14);
1459
1460 emitPredicate(i);
1461 }
1462 }
1463
1464 bool
1465 CodeEmitterNVC0::emitInstruction(Instruction *insn)
1466 {
1467 if (!insn->encSize) {
1468 ERROR("skipping unencodable instruction: "); insn->print();
1469 return false;
1470 } else
1471 if (codeSize + insn->encSize > codeSizeLimit) {
1472 ERROR("code emitter output buffer too small\n");
1473 return false;
1474 }
1475
1476 // assert that instructions with multiple defs don't corrupt registers
1477 for (int d = 0; insn->defExists(d); ++d)
1478 assert(insn->asTex() || insn->def[d].rep()->reg.data.id >= 0);
1479
1480 switch (insn->op) {
1481 case OP_MOV:
1482 case OP_RDSV:
1483 emitMOV(insn);
1484 break;
1485 case OP_NOP:
1486 break;
1487 case OP_LOAD:
1488 emitLOAD(insn);
1489 break;
1490 case OP_STORE:
1491 emitSTORE(insn);
1492 break;
1493 case OP_LINTERP:
1494 case OP_PINTERP:
1495 emitINTERP(insn);
1496 break;
1497 case OP_VFETCH:
1498 emitVFETCH(insn);
1499 break;
1500 case OP_EXPORT:
1501 emitEXPORT(insn);
1502 break;
1503 case OP_PFETCH:
1504 emitPFETCH(insn);
1505 break;
1506 case OP_EMIT:
1507 case OP_RESTART:
1508 emitOUT(insn);
1509 break;
1510 case OP_ADD:
1511 case OP_SUB:
1512 if (isFloatType(insn->dType))
1513 emitFADD(insn);
1514 else
1515 emitUADD(insn);
1516 break;
1517 case OP_MUL:
1518 if (isFloatType(insn->dType))
1519 emitFMUL(insn);
1520 else
1521 emitUMUL(insn);
1522 break;
1523 case OP_MAD:
1524 case OP_FMA:
1525 if (isFloatType(insn->dType))
1526 emitFMAD(insn);
1527 else
1528 emitIMAD(insn);
1529 break;
1530 case OP_NOT:
1531 emitNOT(insn);
1532 break;
1533 case OP_AND:
1534 emitLogicOp(insn, 0);
1535 break;
1536 case OP_OR:
1537 emitLogicOp(insn, 1);
1538 break;
1539 case OP_XOR:
1540 emitLogicOp(insn, 2);
1541 break;
1542 case OP_SHL:
1543 case OP_SHR:
1544 emitShift(insn);
1545 break;
1546 case OP_SET:
1547 case OP_SET_AND:
1548 case OP_SET_OR:
1549 case OP_SET_XOR:
1550 emitSET(insn->asCmp());
1551 break;
1552 case OP_SELP:
1553 emitSELP(insn);
1554 break;
1555 case OP_SLCT:
1556 emitSLCT(insn->asCmp());
1557 break;
1558 case OP_MIN:
1559 case OP_MAX:
1560 emitMINMAX(insn);
1561 break;
1562 case OP_ABS:
1563 case OP_NEG:
1564 case OP_CEIL:
1565 case OP_FLOOR:
1566 case OP_TRUNC:
1567 case OP_CVT:
1568 case OP_SAT:
1569 emitCVT(insn);
1570 break;
1571 case OP_RSQ:
1572 emitSFnOp(insn, 5);
1573 break;
1574 case OP_RCP:
1575 emitSFnOp(insn, 4);
1576 break;
1577 case OP_LG2:
1578 emitSFnOp(insn, 3);
1579 break;
1580 case OP_EX2:
1581 emitSFnOp(insn, 2);
1582 break;
1583 case OP_SIN:
1584 emitSFnOp(insn, 1);
1585 break;
1586 case OP_COS:
1587 emitSFnOp(insn, 0);
1588 break;
1589 case OP_PRESIN:
1590 case OP_PREEX2:
1591 emitPreOp(insn);
1592 break;
1593 case OP_TEX:
1594 case OP_TXB:
1595 case OP_TXL:
1596 case OP_TXD:
1597 case OP_TXF:
1598 emitTEX(insn->asTex());
1599 break;
1600 case OP_TXQ:
1601 emitTXQ(insn->asTex());
1602 break;
1603 case OP_BRA:
1604 case OP_CALL:
1605 case OP_PRERET:
1606 case OP_RET:
1607 case OP_DISCARD:
1608 case OP_EXIT:
1609 case OP_PRECONT:
1610 case OP_CONT:
1611 case OP_PREBREAK:
1612 case OP_BREAK:
1613 case OP_JOINAT:
1614 case OP_BRKPT:
1615 case OP_QUADON:
1616 case OP_QUADPOP:
1617 emitFlow(insn);
1618 break;
1619 case OP_QUADOP:
1620 emitQUADOP(insn, insn->subOp, insn->lanes);
1621 break;
1622 case OP_DFDX:
1623 emitQUADOP(insn, insn->src[0].mod.neg() ? 0x66 : 0x99, 0x4);
1624 break;
1625 case OP_DFDY:
1626 emitQUADOP(insn, insn->src[0].mod.neg() ? 0x5a : 0xa5, 0x5);
1627 break;
1628 case OP_POPCNT:
1629 emitPOPC(insn);
1630 break;
1631 case OP_JOIN:
1632 emitNOP(insn);
1633 insn->join = 1;
1634 break;
1635 case OP_PHI:
1636 case OP_UNION:
1637 case OP_CONSTRAINT:
1638 ERROR("operation should have been eliminated");
1639 return false;
1640 case OP_EXP:
1641 case OP_LOG:
1642 case OP_SQRT:
1643 case OP_POW:
1644 ERROR("operation should have been lowered\n");
1645 return false;
1646 default:
1647 ERROR("unknow op\n");
1648 return false;
1649 }
1650
1651 if (insn->join) {
1652 code[0] |= 0x10;
1653 assert(insn->encSize == 8);
1654 }
1655
1656 code += insn->encSize / 4;
1657 codeSize += insn->encSize;
1658 return true;
1659 }
1660
1661 uint32_t
1662 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
1663 {
1664 const Target::OpInfo &info = targ->getOpInfo(i);
1665
1666 if (info.minEncSize == 8 || 1)
1667 return 8;
1668
1669 if (i->ftz || i->saturate || i->join)
1670 return 8;
1671 if (i->rnd != ROUND_N)
1672 return 8;
1673 if (i->predSrc >= 0 && i->op == OP_MAD)
1674 return 8;
1675
1676 if (i->op == OP_PINTERP) {
1677 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
1678 return 8;
1679 } else
1680 if (i->op == OP_MOV && i->lanes != 0xf) {
1681 return 8;
1682 }
1683
1684 for (int s = 0; i->srcExists(s); ++s) {
1685 if (i->src[s].isIndirect(0))
1686 return 8;
1687
1688 if (i->src[s].getFile() == FILE_MEMORY_CONST) {
1689 if (SDATA(i->src[s]).offset >= 0x100)
1690 return 8;
1691 if (i->getSrc(s)->reg.fileIndex > 1 &&
1692 i->getSrc(s)->reg.fileIndex != 16)
1693 return 8;
1694 } else
1695 if (i->src[s].getFile() == FILE_IMMEDIATE) {
1696 if (i->dType == TYPE_F32) {
1697 if (SDATA(i->src[s]).u32 >= 0x100)
1698 return 8;
1699 } else {
1700 if (SDATA(i->src[s]).u32 > 0xff)
1701 return 8;
1702 }
1703 }
1704
1705 if (i->op == OP_CVT)
1706 continue;
1707 if (i->src[s].mod != Modifier(0)) {
1708 if (i->src[s].mod == Modifier(NV50_IR_MOD_ABS))
1709 if (i->op != OP_RSQ)
1710 return 8;
1711 if (i->src[s].mod == Modifier(NV50_IR_MOD_NEG))
1712 if (i->op != OP_ADD || s != 0)
1713 return 8;
1714 }
1715 }
1716
1717 return 4;
1718 }
1719
1720 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) : targ(target)
1721 {
1722 code = NULL;
1723 codeSize = codeSizeLimit = 0;
1724 relocInfo = NULL;
1725 }
1726
1727 CodeEmitter *
1728 TargetNVC0::getCodeEmitter(Program::Type type)
1729 {
1730 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
1731 emit->setProgramType(type);
1732 return emit;
1733 }
1734
1735 } // namespace nv50_ir