nv50/ir: fix textureGrad with offsets and in non-FPs
[mesa.git] / src / gallium / drivers / nvc0 / codegen / nv50_ir_emit_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 // Argh, all these assertions ...
28
29 class CodeEmitterNVC0 : public CodeEmitter
30 {
31 public:
32 CodeEmitterNVC0(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36
37 inline void setProgramType(Program::Type pType) { progType = pType; }
38
39 private:
40 const TargetNVC0 *targ;
41
42 Program::Type progType;
43
44 private:
45 void emitForm_A(const Instruction *, uint64_t);
46 void emitForm_B(const Instruction *, uint64_t);
47 void emitForm_S(const Instruction *, uint32_t, bool pred);
48
49 void emitPredicate(const Instruction *);
50
51 void setAddress16(const ValueRef&);
52 void setImmediate(const Instruction *, const int s); // needs op already set
53 void setImmediateS8(const ValueRef&);
54
55 void emitCondCode(CondCode cc, int pos);
56 void emitInterpMode(const Instruction *);
57 void emitLoadStoreType(DataType ty);
58 void emitCachingMode(CacheMode c);
59
60 void emitShortSrc2(const ValueRef&);
61
62 inline uint8_t getSRegEncoding(const ValueRef&);
63
64 void roundMode_A(const Instruction *);
65 void roundMode_C(const Instruction *);
66 void roundMode_CS(const Instruction *);
67
68 void emitNegAbs12(const Instruction *);
69
70 void emitNOP(const Instruction *);
71
72 void emitLOAD(const Instruction *);
73 void emitSTORE(const Instruction *);
74 void emitMOV(const Instruction *);
75
76 void emitINTERP(const Instruction *);
77 void emitPFETCH(const Instruction *);
78 void emitVFETCH(const Instruction *);
79 void emitEXPORT(const Instruction *);
80 void emitOUT(const Instruction *);
81
82 void emitUADD(const Instruction *);
83 void emitFADD(const Instruction *);
84 void emitUMUL(const Instruction *);
85 void emitFMUL(const Instruction *);
86 void emitIMAD(const Instruction *);
87 void emitFMAD(const Instruction *);
88
89 void emitNOT(Instruction *);
90 void emitLogicOp(const Instruction *, uint8_t subOp);
91 void emitPOPC(const Instruction *);
92 void emitINSBF(const Instruction *);
93 void emitShift(const Instruction *);
94
95 void emitSFnOp(const Instruction *, uint8_t subOp);
96
97 void emitCVT(Instruction *);
98 void emitMINMAX(const Instruction *);
99 void emitPreOp(const Instruction *);
100
101 void emitSET(const CmpInstruction *);
102 void emitSLCT(const CmpInstruction *);
103 void emitSELP(const Instruction *);
104
105 void emitTEX(const TexInstruction *);
106 void emitTEXCSAA(const TexInstruction *);
107 void emitTXQ(const TexInstruction *);
108 void emitPIXLD(const TexInstruction *);
109
110 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
111
112 void emitFlow(const Instruction *);
113
114 inline void defId(const ValueDef&, const int pos);
115 inline void srcId(const ValueRef&, const int pos);
116
117 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
118
119 inline void srcId(const ValueRef *, const int pos);
120
121 inline bool isLIMM(const ValueRef&, DataType ty);
122 };
123
124 // for better visibility
125 #define HEX64(h, l) 0x##h##l##ULL
126
127 #define SDATA(a) ((a).rep()->reg.data)
128 #define DDATA(a) ((a).rep()->reg.data)
129
130 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
131 {
132 code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
133 }
134
135 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
136 {
137 code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
138 }
139
140 void CodeEmitterNVC0::srcAddr32(const ValueRef& src, const int pos)
141 {
142 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
143 }
144
145 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
146 {
147 code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
148 }
149
150 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
151 {
152 const ImmediateValue *imm = ref.get()->asImm();
153
154 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
155 }
156
157 void
158 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
159 {
160 switch (insn->rnd) {
161 case ROUND_M: code[1] |= 1 << 23; break;
162 case ROUND_P: code[1] |= 2 << 23; break;
163 case ROUND_Z: code[1] |= 3 << 23; break;
164 default:
165 assert(insn->rnd == ROUND_N);
166 break;
167 }
168 }
169
170 void
171 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
172 {
173 if (i->src[1].mod.abs()) code[0] |= 1 << 6;
174 if (i->src[0].mod.abs()) code[0] |= 1 << 7;
175 if (i->src[1].mod.neg()) code[0] |= 1 << 8;
176 if (i->src[0].mod.neg()) code[0] |= 1 << 9;
177 }
178
179 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
180 {
181 uint8_t val;
182
183 switch (cc) {
184 case CC_LT: val = 0x1; break;
185 case CC_LTU: val = 0x9; break;
186 case CC_EQ: val = 0x2; break;
187 case CC_EQU: val = 0xa; break;
188 case CC_LE: val = 0x3; break;
189 case CC_LEU: val = 0xb; break;
190 case CC_GT: val = 0x4; break;
191 case CC_GTU: val = 0xc; break;
192 case CC_NE: val = 0x5; break;
193 case CC_NEU: val = 0xd; break;
194 case CC_GE: val = 0x6; break;
195 case CC_GEU: val = 0xe; break;
196 case CC_TR: val = 0xf; break;
197 case CC_FL: val = 0x0; break;
198
199 case CC_A: val = 0x14; break;
200 case CC_NA: val = 0x13; break;
201 case CC_S: val = 0x15; break;
202 case CC_NS: val = 0x12; break;
203 case CC_C: val = 0x16; break;
204 case CC_NC: val = 0x11; break;
205 case CC_O: val = 0x17; break;
206 case CC_NO: val = 0x10; break;
207
208 default:
209 val = 0;
210 assert(!"invalid condition code");
211 break;
212 }
213 code[pos / 32] |= val << (pos % 32);
214 }
215
216 void
217 CodeEmitterNVC0::emitPredicate(const Instruction *i)
218 {
219 if (i->predSrc >= 0) {
220 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
221 srcId(i->src[i->predSrc], 10);
222 if (i->cc == CC_NOT_P)
223 code[0] |= 0x2000; // negate
224 } else {
225 code[0] |= 0x1c00;
226 }
227 }
228
229 void
230 CodeEmitterNVC0::setAddress16(const ValueRef& src)
231 {
232 Symbol *sym = src.get()->asSym();
233
234 assert(sym);
235
236 code[0] |= (sym->reg.data.offset & 0x003f) << 26;
237 code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
238 }
239
240 void
241 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
242 {
243 const ImmediateValue *imm = i->src[s].get()->asImm();
244 uint32_t u32;
245
246 assert(imm);
247 u32 = imm->reg.data.u32;
248
249 if ((code[0] & 0xf) == 0x2) {
250 // LIMM
251 code[0] |= (u32 & 0x3f) << 26;
252 code[1] |= u32 >> 6;
253 } else
254 if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
255 // integer immediate
256 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
257 assert(!(code[1] & 0xc000));
258 u32 &= 0xfffff;
259 code[0] |= (u32 & 0x3f) << 26;
260 code[1] |= 0xc000 | (u32 >> 6);
261 } else {
262 // float immediate
263 assert(!(u32 & 0x00000fff));
264 assert(!(code[1] & 0xc000));
265 code[0] |= ((u32 >> 12) & 0x3f) << 26;
266 code[1] |= 0xc000 | (u32 >> 18);
267 }
268 }
269
270 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
271 {
272 const ImmediateValue *imm = ref.get()->asImm();
273
274 int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
275
276 assert(s8 == imm->reg.data.s32);
277
278 code[0] |= (s8 & 0x3f) << 26;
279 code[0] |= (s8 >> 6) << 8;
280 }
281
282 void
283 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
284 {
285 code[0] = opc;
286 code[1] = opc >> 32;
287
288 emitPredicate(i);
289
290 defId(i->def[0], 14);
291
292 int s1 = 26;
293 if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
294 s1 = 49;
295
296 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
297 switch (i->getSrc(s)->reg.file) {
298 case FILE_MEMORY_CONST:
299 assert(!(code[1] & 0xc000));
300 code[1] |= (s == 2) ? 0x8000 : 0x4000;
301 code[1] |= i->getSrc(s)->reg.fileIndex << 10;
302 setAddress16(i->src[s]);
303 break;
304 case FILE_IMMEDIATE:
305 assert(s == 1 ||
306 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
307 assert(!(code[1] & 0xc000));
308 setImmediate(i, s);
309 break;
310 case FILE_GPR:
311 if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
312 break;
313 srcId(i->src[s], s ? ((s == 2) ? 49 : s1) : 20);
314 break;
315 default:
316 // ignore here, can be predicate or flags, but must not be address
317 break;
318 }
319 }
320 }
321
322 void
323 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
324 {
325 code[0] = opc;
326 code[1] = opc >> 32;
327
328 emitPredicate(i);
329
330 defId(i->def[0], 14);
331
332 switch (i->src[0].getFile()) {
333 case FILE_MEMORY_CONST:
334 assert(!(code[1] & 0xc000));
335 code[1] |= 0x4000 | (i->src[0].get()->reg.fileIndex << 10);
336 setAddress16(i->src[0]);
337 break;
338 case FILE_IMMEDIATE:
339 assert(!(code[1] & 0xc000));
340 setImmediate(i, 0);
341 break;
342 case FILE_GPR:
343 srcId(i->src[0], 26);
344 break;
345 default:
346 // ignore here, can be predicate or flags, but must not be address
347 break;
348 }
349 }
350
351 void
352 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
353 {
354 code[0] = opc;
355
356 int ss2a = 0;
357 if (opc == 0x0d || opc == 0x0e)
358 ss2a = 2;
359
360 defId(i->def[0], 14);
361 srcId(i->src[0], 20);
362
363 assert(pred || (i->predSrc < 0));
364 if (pred)
365 emitPredicate(i);
366
367 for (int s = 1; s < 3 && i->srcExists(s); ++s) {
368 if (i->src[s].get()->reg.file == FILE_MEMORY_CONST) {
369 assert(!(code[0] & (0x300 >> ss2a)));
370 switch (i->src[s].get()->reg.fileIndex) {
371 case 0: code[0] |= 0x100 >> ss2a; break;
372 case 1: code[0] |= 0x200 >> ss2a; break;
373 case 16: code[0] |= 0x300 >> ss2a; break;
374 default:
375 ERROR("invalid c[] space for short form\n");
376 break;
377 }
378 if (s == 1)
379 code[0] |= i->getSrc(s)->reg.data.offset << 24;
380 else
381 code[0] |= i->getSrc(s)->reg.data.offset << 6;
382 } else
383 if (i->src[s].getFile() == FILE_IMMEDIATE) {
384 assert(s == 1);
385 setImmediateS8(i->src[s]);
386 } else
387 if (i->src[s].getFile() == FILE_GPR) {
388 srcId(i->src[s], (s == 1) ? 26 : 8);
389 }
390 }
391 }
392
393 void
394 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
395 {
396 if (src.getFile() == FILE_MEMORY_CONST) {
397 switch (src.get()->reg.fileIndex) {
398 case 0: code[0] |= 0x100; break;
399 case 1: code[0] |= 0x200; break;
400 case 16: code[0] |= 0x300; break;
401 default:
402 assert(!"unsupported file index for short op");
403 break;
404 }
405 srcAddr32(src, 20);
406 } else {
407 srcId(src, 20);
408 assert(src.getFile() == FILE_GPR);
409 }
410 }
411
412 void
413 CodeEmitterNVC0::emitNOP(const Instruction *i)
414 {
415 code[0] = 0x000001e4;
416 code[1] = 0x40000000;
417 emitPredicate(i);
418 }
419
420 void
421 CodeEmitterNVC0::emitFMAD(const Instruction *i)
422 {
423 bool neg1 = (i->src[0].mod ^ i->src[1].mod).neg();
424
425 if (i->encSize == 8) {
426 if (isLIMM(i->src[1], TYPE_F32)) {
427 emitForm_A(i, HEX64(20000000, 00000002));
428 } else {
429 emitForm_A(i, HEX64(30000000, 00000000));
430
431 if (i->src[2].mod.neg())
432 code[0] |= 1 << 8;
433 }
434 roundMode_A(i);
435
436 if (neg1)
437 code[0] |= 1 << 9;
438
439 if (i->saturate)
440 code[0] |= 1 << 5;
441 if (i->ftz)
442 code[0] |= 1 << 6;
443 } else {
444 assert(!i->saturate && !i->src[2].mod.neg());
445 emitForm_S(i, (i->src[2].getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
446 false);
447 if (neg1)
448 code[0] |= 1 << 4;
449 }
450 }
451
452 void
453 CodeEmitterNVC0::emitFMUL(const Instruction *i)
454 {
455 bool neg = (i->src[0].mod ^ i->src[1].mod).neg();
456
457 assert(i->postFactor >= -3 && i->postFactor <= 3);
458
459 if (i->encSize == 8) {
460 if (isLIMM(i->src[1], TYPE_F32)) {
461 assert(i->postFactor == 0); // constant folded, hopefully
462 emitForm_A(i, HEX64(30000000, 00000002));
463 } else {
464 emitForm_A(i, HEX64(58000000, 00000000));
465 roundMode_A(i);
466 code[1] |= ((i->postFactor > 0) ?
467 (7 - i->postFactor) : (0 - i->postFactor)) << 17;
468 }
469 if (neg)
470 code[1] ^= 1 << 25; // aliases with LIMM sign bit
471
472 if (i->saturate)
473 code[0] |= 1 << 5;
474
475 if (i->dnz)
476 code[0] |= 1 << 7;
477 else
478 if (i->ftz)
479 code[0] |= 1 << 6;
480 } else {
481 assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
482 emitForm_S(i, 0xa8, true);
483 }
484 }
485
486 void
487 CodeEmitterNVC0::emitUMUL(const Instruction *i)
488 {
489 if (i->encSize == 8) {
490 if (i->src[1].getFile() == FILE_IMMEDIATE) {
491 emitForm_A(i, HEX64(10000000, 00000002));
492 } else {
493 emitForm_A(i, HEX64(50000000, 00000003));
494 }
495 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
496 code[0] |= 1 << 6;
497 if (i->sType == TYPE_S32)
498 code[0] |= 1 << 5;
499 if (i->dType == TYPE_S32)
500 code[0] |= 1 << 7;
501 } else {
502 emitForm_S(i, i->src[1].getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
503
504 if (i->sType == TYPE_S32)
505 code[0] |= 1 << 6;
506 }
507 }
508
509 void
510 CodeEmitterNVC0::emitFADD(const Instruction *i)
511 {
512 if (i->encSize == 8) {
513 if (isLIMM(i->src[1], TYPE_F32)) {
514 emitForm_A(i, HEX64(28000000, 00000002));
515
516 assert(!i->src[1].mod.neg() && !i->src[1].mod.abs() && !i->saturate);
517 } else {
518 emitForm_A(i, HEX64(50000000, 00000000));
519
520 roundMode_A(i);
521 if (i->saturate)
522 code[1] |= 1 << 17;
523 }
524 emitNegAbs12(i);
525
526 if (i->op == OP_SUB) code[0] ^= 1 << 8;
527
528 if (i->ftz)
529 code[0] |= 1 << 5;
530 } else {
531 assert(!i->saturate && i->op != OP_SUB &&
532 !i->src[0].mod.abs() &&
533 !i->src[1].mod.neg() && !i->src[1].mod.abs());
534
535 emitForm_S(i, 0x49, true);
536
537 if (i->src[0].mod.neg())
538 code[0] |= 1 << 7;
539 }
540 }
541
542 void
543 CodeEmitterNVC0::emitUADD(const Instruction *i)
544 {
545 uint32_t addOp = 0;
546
547 assert(!i->src[0].mod.abs() && !i->src[1].mod.abs());
548 assert(!i->src[0].mod.neg() || !i->src[1].mod.neg());
549
550 if (i->src[0].mod.neg())
551 addOp |= 0x200;
552 if (i->src[1].mod.neg())
553 addOp |= 0x100;
554 if (i->op == OP_SUB) {
555 addOp ^= 0x100;
556 assert(addOp != 0x300); // would be add-plus-one
557 }
558
559 if (i->encSize == 8) {
560 if (isLIMM(i->src[1], TYPE_U32)) {
561 emitForm_A(i, HEX64(08000000, 00000002));
562 if (i->def[1].exists())
563 code[1] |= 1 << 26; // write carry
564 } else {
565 emitForm_A(i, HEX64(48000000, 00000003));
566 if (i->def[1].exists())
567 code[1] |= 1 << 16; // write carry
568 }
569 code[0] |= addOp;
570
571 if (i->saturate)
572 code[0] |= 1 << 5;
573 if (i->flagsSrc >= 0) // add carry
574 code[0] |= 1 << 6;
575 } else {
576 assert(!(addOp & 0x100));
577 emitForm_S(i, (addOp >> 3) |
578 ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
579 }
580 }
581
582 // TODO: shl-add
583 void
584 CodeEmitterNVC0::emitIMAD(const Instruction *i)
585 {
586 assert(i->encSize == 8);
587 emitForm_A(i, HEX64(20000000, 00000003));
588
589 if (isSignedType(i->dType))
590 code[0] |= 1 << 7;
591 if (isSignedType(i->sType))
592 code[0] |= 1 << 5;
593
594 code[1] |= i->saturate << 24;
595
596 if (i->flagsDef >= 0) code[1] |= 1 << 16;
597 if (i->flagsSrc >= 0) code[1] |= 1 << 23;
598
599 if (i->src[2].mod.neg()) code[0] |= 0x10;
600 if (i->src[1].mod.neg() ^
601 i->src[0].mod.neg()) code[0] |= 0x20;
602
603 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
604 code[0] |= 1 << 6;
605 }
606
607 void
608 CodeEmitterNVC0::emitNOT(Instruction *i)
609 {
610 assert(i->encSize == 8);
611 i->src[1].set(i->src[0]);
612 emitForm_A(i, HEX64(68000000, 000001c3));
613 }
614
615 void
616 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
617 {
618 if (i->encSize == 8) {
619 if (isLIMM(i->src[1], TYPE_U32)) {
620 emitForm_A(i, HEX64(38000000, 00000002));
621
622 if (i->src[2].exists())
623 code[1] |= 1 << 26;
624 } else {
625 emitForm_A(i, HEX64(68000000, 00000003));
626
627 if (i->src[2].exists())
628 code[1] |= 1 << 16;
629 }
630 code[0] |= subOp << 6;
631
632 if (i->src[2].exists()) // carry
633 code[0] |= 1 << 5;
634
635 if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
636 if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
637 } else {
638 emitForm_S(i, (subOp << 5) |
639 ((i->src[1].getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
640 }
641 }
642
643 void
644 CodeEmitterNVC0::emitPOPC(const Instruction *i)
645 {
646 emitForm_A(i, HEX64(54000000, 00000004));
647
648 if (i->src[0].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
649 if (i->src[1].mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
650 }
651
652 void
653 CodeEmitterNVC0::emitINSBF(const Instruction *i)
654 {
655 emitForm_A(i, HEX64(28000000, 30000000));
656 }
657
658 void
659 CodeEmitterNVC0::emitShift(const Instruction *i)
660 {
661 if (i->op == OP_SHR) {
662 emitForm_A(i, HEX64(58000000, 00000003)
663 | (isSignedType(i->dType) ? 0x20 : 0x00));
664 } else {
665 emitForm_A(i, HEX64(60000000, 00000003));
666 }
667
668 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
669 code[0] |= 1 << 9;
670 }
671
672 void
673 CodeEmitterNVC0::emitPreOp(const Instruction *i)
674 {
675 if (i->encSize == 8) {
676 emitForm_B(i, HEX64(60000000, 00000000));
677
678 if (i->op == OP_PREEX2)
679 code[0] |= 0x20;
680
681 if (i->src[0].mod.abs()) code[0] |= 1 << 6;
682 if (i->src[0].mod.neg()) code[0] |= 1 << 8;
683 } else {
684 emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
685 }
686 }
687
688 void
689 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
690 {
691 if (i->encSize == 8) {
692 code[0] = 0x00000000 | (subOp << 26);
693 code[1] = 0xc8000000;
694
695 emitPredicate(i);
696
697 defId(i->def[0], 14);
698 srcId(i->src[0], 20);
699
700 assert(i->src[0].getFile() == FILE_GPR);
701
702 if (i->saturate) code[0] |= 1 << 5;
703
704 if (i->src[0].mod.abs()) code[0] |= 1 << 7;
705 if (i->src[0].mod.neg()) code[0] |= 1 << 9;
706 } else {
707 emitForm_S(i, 0x80000008 | (subOp << 26), true);
708
709 assert(!i->src[0].mod.neg());
710 if (i->src[0].mod.abs()) code[0] |= 1 << 30;
711 }
712 }
713
714 void
715 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
716 {
717 uint64_t op;
718
719 assert(i->encSize == 8);
720
721 op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
722
723 if (i->ftz)
724 op |= 1 << 5;
725 else
726 if (!isFloatType(i->dType))
727 op |= isSignedType(i->dType) ? 0x23 : 0x03;
728
729 emitForm_A(i, op);
730 emitNegAbs12(i);
731 }
732
733 void
734 CodeEmitterNVC0::roundMode_C(const Instruction *i)
735 {
736 switch (i->rnd) {
737 case ROUND_M: code[1] |= 1 << 17; break;
738 case ROUND_P: code[1] |= 2 << 17; break;
739 case ROUND_Z: code[1] |= 3 << 17; break;
740 case ROUND_NI: code[0] |= 1 << 7; break;
741 case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
742 case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
743 case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
744 case ROUND_N: break;
745 default:
746 assert(!"invalid round mode");
747 break;
748 }
749 }
750
751 void
752 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
753 {
754 switch (i->rnd) {
755 case ROUND_M:
756 case ROUND_MI: code[0] |= 1 << 16; break;
757 case ROUND_P:
758 case ROUND_PI: code[0] |= 2 << 16; break;
759 case ROUND_Z:
760 case ROUND_ZI: code[0] |= 3 << 16; break;
761 default:
762 break;
763 }
764 }
765
766 void
767 CodeEmitterNVC0::emitCVT(Instruction *i)
768 {
769 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
770
771 switch (i->op) {
772 case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;
773 case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
774 case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
775 default:
776 break;
777 }
778
779 const bool sat = (i->op == OP_SAT) || i->saturate;
780 const bool abs = (i->op == OP_ABS) || i->src[0].mod.abs();
781 const bool neg = (i->op == OP_NEG) || i->src[0].mod.neg();
782
783 if (i->encSize == 8) {
784 emitForm_B(i, HEX64(10000000, 00000004));
785
786 roundMode_C(i);
787
788 // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
789 code[0] |= util_logbase2(typeSizeof(i->dType)) << 20;
790 code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
791
792 if (sat)
793 code[0] |= 0x20;
794 if (abs)
795 code[0] |= 1 << 6;
796 if (neg && i->op != OP_ABS)
797 code[0] |= 1 << 8;
798
799 if (i->ftz)
800 code[1] |= 1 << 23;
801
802 if (isSignedIntType(i->dType))
803 code[0] |= 0x080;
804 if (isSignedIntType(i->sType))
805 code[0] |= 0x200;
806
807 if (isFloatType(i->dType)) {
808 if (!isFloatType(i->sType))
809 code[1] |= 0x08000000;
810 } else {
811 if (isFloatType(i->sType))
812 code[1] |= 0x04000000;
813 else
814 code[1] |= 0x0c000000;
815 }
816 } else {
817 if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
818 code[0] = 0x298;
819 } else
820 if (isFloatType(i->dType)) {
821 if (isFloatType(i->sType))
822 code[0] = 0x098;
823 else
824 code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
825 } else {
826 assert(isFloatType(i->sType));
827
828 code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
829 }
830
831 if (neg) code[0] |= 1 << 16;
832 if (sat) code[0] |= 1 << 18;
833 if (abs) code[0] |= 1 << 19;
834
835 roundMode_CS(i);
836 }
837 }
838
839 void
840 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
841 {
842 uint32_t hi;
843 uint32_t lo = 0;
844
845 if (i->sType == TYPE_F64)
846 lo = 0x1;
847 else
848 if (!isFloatType(i->sType))
849 lo = 0x3;
850
851 if (isFloatType(i->dType) || isSignedIntType(i->sType))
852 lo |= 0x20;
853
854 switch (i->op) {
855 case OP_SET_AND: hi = 0x10000000; break;
856 case OP_SET_OR: hi = 0x10200000; break;
857 case OP_SET_XOR: hi = 0x10400000; break;
858 default:
859 hi = 0x100e0000;
860 break;
861 }
862 emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
863
864 if (i->def[0].getFile() == FILE_PREDICATE) {
865 if (i->sType == TYPE_F32)
866 code[1] += 0x10000000;
867 else
868 code[1] += 0x08000000;
869
870 code[0] &= ~0xfc000;
871 defId(i->def[0], 17);
872 if (i->defExists(1))
873 defId(i->def[1], 14);
874 else
875 code[0] |= 0x1c000;
876 }
877
878 if (i->ftz)
879 code[1] |= 1 << 27;
880
881 emitCondCode(i->setCond, 32 + 23);
882 emitNegAbs12(i);
883 }
884
885 void
886 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
887 {
888 uint64_t op;
889
890 switch (i->dType) {
891 case TYPE_S32:
892 op = HEX64(30000000, 00000023);
893 break;
894 case TYPE_U32:
895 op = HEX64(30000000, 00000003);
896 break;
897 case TYPE_F32:
898 op = HEX64(38000000, 00000000);
899 break;
900 default:
901 assert(!"invalid type for SLCT");
902 op = 0;
903 break;
904 }
905 emitForm_A(i, op);
906
907 CondCode cc = i->setCond;
908
909 if (i->src[2].mod.neg())
910 cc = reverseCondCode(cc);
911
912 emitCondCode(cc, 32 + 23);
913
914 if (i->ftz)
915 code[0] |= 1 << 5;
916 }
917
918 void CodeEmitterNVC0::emitSELP(const Instruction *i)
919 {
920 emitForm_A(i, HEX64(20000000, 00000004));
921
922 if (i->cc == CC_NOT_P || i->src[2].mod & Modifier(NV50_IR_MOD_NOT))
923 code[1] |= 1 << 20;
924 }
925
926 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
927 {
928 code[0] = 0x00000086;
929 code[1] = 0xd0000000;
930
931 code[1] |= i->tex.r;
932 code[1] |= i->tex.s << 8;
933
934 if (i->tex.liveOnly)
935 code[0] |= 1 << 9;
936
937 defId(i->def[0], 14);
938 srcId(i->src[0], 20);
939 }
940
941 void
942 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
943 {
944 code[0] = 0x00000006;
945
946 if (1)
947 code[0] |= 0x80; // normal/t/p mode = t, XXX: what is this ?
948
949 if (i->tex.liveOnly)
950 code[0] |= 1 << 9;
951
952 switch (i->op) {
953 case OP_TEX: code[1] = 0x80000000; break;
954 case OP_TXB: code[1] = 0x84000000; break;
955 case OP_TXL: code[1] = 0x86000000; break;
956 case OP_TXF: code[1] = 0x90000000; break;
957 case OP_TXG: code[1] = 0xa0000000; break;
958 case OP_TXD: code[1] = 0xe0000000; break;
959 default:
960 assert(!"invalid texture op");
961 break;
962 }
963 if (i->op == OP_TXF) {
964 if (!i->tex.levelZero)
965 code[1] |= 0x02000000;
966 } else
967 if (i->tex.levelZero) {
968 code[1] |= 0x02000000;
969 }
970
971 if (i->tex.derivAll)
972 code[1] |= 1 << 13;
973
974 defId(i->def[0], 14);
975 srcId(i->src[0], 20);
976
977 emitPredicate(i);
978
979 if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
980
981 code[1] |= i->tex.mask << 14;
982
983 code[1] |= i->tex.r;
984 code[1] |= i->tex.s << 8;
985 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
986 code[1] |= 1 << 18; // in 1st source (with array index)
987
988 // texture target:
989 code[1] |= (i->tex.target.getDim() - 1) << 20;
990 if (i->tex.target.isCube())
991 code[1] += 2 << 20;
992 if (i->tex.target.isArray())
993 code[1] |= 1 << 19;
994 if (i->tex.target.isShadow())
995 code[1] |= 1 << 24;
996
997 int src1 = i->tex.target.getArgCount();
998 if (i->op == OP_TXD && i->tex.useOffsets)
999 ++src1;
1000
1001 if (i->src[src1].getFile() == FILE_IMMEDIATE) { // lzero
1002 if (i->op == OP_TXL)
1003 code[1] &= ~(1 << 26);
1004 else
1005 if (i->op == OP_TXF)
1006 code[1] &= ~(1 << 25);
1007 }
1008 if (i->tex.target == TEX_TARGET_2D_MS ||
1009 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1010 code[1] |= 1 << 23;
1011
1012 if (i->tex.useOffsets) // in vecSrc0.w
1013 code[1] |= 1 << 22;
1014
1015 srcId(i->src[src1], 26);
1016 }
1017
1018 void
1019 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1020 {
1021 code[0] = 0x00000086;
1022 code[1] = 0xc0000000;
1023
1024 switch (i->tex.query) {
1025 case TXQ_DIMS: code[1] |= 0 << 22; break;
1026 case TXQ_TYPE: code[1] |= 1 << 22; break;
1027 case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1028 case TXQ_FILTER: code[1] |= 3 << 22; break;
1029 case TXQ_LOD: code[1] |= 4 << 22; break;
1030 case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;
1031 default:
1032 assert(!"invalid texture query");
1033 break;
1034 }
1035
1036 code[1] |= i->tex.mask << 14;
1037
1038 code[1] |= i->tex.r;
1039 code[1] |= i->tex.s << 8;
1040 if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1041 code[1] |= 1 << 18;
1042
1043 defId(i->def[0], 14);
1044 srcId(i->src[0], 20);
1045 srcId(i->src[1], 26);
1046
1047 emitPredicate(i);
1048 }
1049
1050 void
1051 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1052 {
1053 code[0] = 0x00000000 | (laneMask << 6);
1054 code[1] = 0x48000000 | qOp;
1055
1056 defId(i->def[0], 14);
1057 srcId(i->src[0], 20);
1058 srcId(i->srcExists(1) ? i->src[1] : i->src[0], 26);
1059
1060 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1061 code[0] |= 1 << 9; // dall
1062
1063 emitPredicate(i);
1064 }
1065
1066 void
1067 CodeEmitterNVC0::emitFlow(const Instruction *i)
1068 {
1069 const FlowInstruction *f = i->asFlow();
1070
1071 unsigned mask; // bit 0: predicate, bit 1: target
1072
1073 code[0] = 0x00000007;
1074
1075 switch (i->op) {
1076 case OP_BRA:
1077 code[1] = f->absolute ? 0x00000000 : 0x40000000;
1078 if (i->src[0].getFile() == FILE_MEMORY_CONST ||
1079 i->src[1].getFile() == FILE_MEMORY_CONST)
1080 code[1] |= 0x4000;
1081 mask = 3;
1082 break;
1083 case OP_CALL:
1084 code[1] = f->absolute ? 0x10000000 : 0x50000000;
1085 if (i->src[0].getFile() == FILE_MEMORY_CONST)
1086 code[1] |= 0x4000;
1087 mask = 2;
1088 break;
1089
1090 case OP_EXIT: code[1] = 0x80000000; mask = 1; break;
1091 case OP_RET: code[1] = 0x90000000; mask = 1; break;
1092 case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1093 case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;
1094 case OP_CONT: code[1] = 0xb0000000; mask = 1; break;
1095
1096 case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;
1097 case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1098 case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;
1099 case OP_PRERET: code[1] = 0x78000000; mask = 2; break;
1100
1101 case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;
1102 case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1103 case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;
1104 default:
1105 assert(!"invalid flow operation");
1106 return;
1107 }
1108
1109 if (mask & 1) {
1110 emitPredicate(i);
1111 if (i->flagsSrc < 0)
1112 code[0] |= 0x1e0;
1113 }
1114
1115 if (!f)
1116 return;
1117
1118 if (f->allWarp)
1119 code[0] |= 1 << 15;
1120 if (f->limit)
1121 code[0] |= 1 << 16;
1122
1123 if (f->op == OP_CALL) {
1124 if (f->builtin) {
1125 assert(f->absolute);
1126 uint32_t pcAbs = targ->getBuiltinOffset(f->target.builtin);
1127 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1128 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1129 } else {
1130 assert(!f->absolute);
1131 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1132 code[0] |= (pcRel & 0x3f) << 26;
1133 code[1] |= (pcRel >> 6) & 0x3ffff;
1134 }
1135 } else
1136 if (mask & 2) {
1137 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1138 // currently we don't want absolute branches
1139 assert(!f->absolute);
1140 code[0] |= (pcRel & 0x3f) << 26;
1141 code[1] |= (pcRel >> 6) & 0x3ffff;
1142 }
1143 }
1144
1145 void
1146 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1147 {
1148 uint32_t prim = i->src[0].get()->reg.data.u32;
1149
1150 code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1151 code[1] = 0x00000000 | (prim >> 6);
1152
1153 emitPredicate(i);
1154
1155 defId(i->def[0], 14);
1156 srcId(i->src[1], 20);
1157 }
1158
1159 void
1160 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1161 {
1162 code[0] = 0x00000006;
1163 code[1] = 0x06000000 | i->src[0].get()->reg.data.offset;
1164
1165 if (i->perPatch)
1166 code[0] |= 0x100;
1167 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1168 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1169
1170 emitPredicate(i);
1171
1172 code[0] |= (i->defCount(0xf) - 1) << 5;
1173
1174 defId(i->def[0], 14);
1175 srcId(i->src[0].getIndirect(0), 20);
1176 srcId(i->src[0].getIndirect(1), 26); // vertex address
1177 }
1178
1179 void
1180 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1181 {
1182 unsigned int size = typeSizeof(i->dType);
1183
1184 code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1185 code[1] = 0x0a000000 | i->src[0].get()->reg.data.offset;
1186
1187 assert(size != 12 && !(code[1] & (size - 1)));
1188
1189 if (i->perPatch)
1190 code[0] |= 0x100;
1191
1192 emitPredicate(i);
1193
1194 assert(i->src[1].getFile() == FILE_GPR);
1195
1196 srcId(i->src[0].getIndirect(0), 20);
1197 srcId(i->src[0].getIndirect(1), 32 + 17); // vertex base address
1198 srcId(i->src[1], 26);
1199 }
1200
1201 void
1202 CodeEmitterNVC0::emitOUT(const Instruction *i)
1203 {
1204 code[0] = 0x00000006;
1205 code[1] = 0x1c000000;
1206
1207 emitPredicate(i);
1208
1209 defId(i->def[0], 14); // new secret address
1210 srcId(i->src[0], 20); // old secret address, should be 0 initially
1211
1212 assert(i->src[0].getFile() == FILE_GPR);
1213
1214 if (i->op == OP_EMIT)
1215 code[0] |= 1 << 5;
1216 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1217 code[0] |= 1 << 6;
1218
1219 // vertex stream
1220 if (i->src[1].getFile() == FILE_IMMEDIATE) {
1221 code[1] |= 0xc000;
1222 code[0] |= SDATA(i->src[1]).u32 << 26;
1223 } else {
1224 srcId(i->src[1], 26);
1225 }
1226 }
1227
1228 void
1229 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1230 {
1231 if (i->encSize == 8) {
1232 code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1233 } else {
1234 if (i->getInterpMode() == NV50_IR_INTERP_SC)
1235 code[0] |= 0x80;
1236 assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1237 }
1238 }
1239
1240 void
1241 CodeEmitterNVC0::emitINTERP(const Instruction *i)
1242 {
1243 const uint32_t base = i->getSrc(0)->reg.data.offset;
1244
1245 if (i->encSize == 8) {
1246 code[0] = 0x00000000;
1247 code[1] = 0xc0000000 | (base & 0xffff);
1248
1249 if (i->saturate)
1250 code[0] |= 1 << 5;
1251
1252 if (i->op == OP_PINTERP)
1253 srcId(i->src[1], 26);
1254 else
1255 code[0] |= 0x3f << 26;
1256
1257 srcId(i->src[0].getIndirect(0), 20);
1258 } else {
1259 assert(i->op == OP_PINTERP);
1260 code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1261 srcId(i->src[1], 20);
1262 }
1263 emitInterpMode(i);
1264
1265 emitPredicate(i);
1266 defId(i->def[0], 14);
1267
1268 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1269 srcId(i->src[i->op == OP_PINTERP ? 2 : 1], 17);
1270 else
1271 code[1] |= 0x3f << 17;
1272 }
1273
1274 void
1275 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1276 {
1277 uint8_t val;
1278
1279 switch (ty) {
1280 case TYPE_U8:
1281 val = 0x00;
1282 break;
1283 case TYPE_S8:
1284 val = 0x20;
1285 break;
1286 case TYPE_F16:
1287 case TYPE_U16:
1288 val = 0x40;
1289 break;
1290 case TYPE_S16:
1291 val = 0x60;
1292 break;
1293 case TYPE_F32:
1294 case TYPE_U32:
1295 case TYPE_S32:
1296 val = 0x80;
1297 break;
1298 case TYPE_F64:
1299 case TYPE_U64:
1300 case TYPE_S64:
1301 val = 0xa0;
1302 break;
1303 case TYPE_B128:
1304 val = 0xc0;
1305 break;
1306 default:
1307 val = 0x80;
1308 assert(!"invalid type");
1309 break;
1310 }
1311 code[0] |= val;
1312 }
1313
1314 void
1315 CodeEmitterNVC0::emitCachingMode(CacheMode c)
1316 {
1317 uint32_t val;
1318
1319 switch (c) {
1320 case CACHE_CA:
1321 // case CACHE_WB:
1322 val = 0x000;
1323 break;
1324 case CACHE_CG:
1325 val = 0x100;
1326 break;
1327 case CACHE_CS:
1328 val = 0x200;
1329 break;
1330 case CACHE_CV:
1331 // case CACHE_WT:
1332 val = 0x300;
1333 break;
1334 default:
1335 val = 0;
1336 assert(!"invalid caching mode");
1337 break;
1338 }
1339 code[0] |= val;
1340 }
1341
1342 void
1343 CodeEmitterNVC0::emitSTORE(const Instruction *i)
1344 {
1345 uint32_t opc;
1346
1347 switch (i->src[0].getFile()) {
1348 case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1349 case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;
1350 case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1351 default:
1352 assert(!"invalid memory file");
1353 opc = 0;
1354 break;
1355 }
1356 code[0] = 0x00000005;
1357 code[1] = opc;
1358
1359 setAddress16(i->src[0]);
1360 srcId(i->src[1], 14);
1361 srcId(i->src[0].getIndirect(0), 20);
1362
1363 emitPredicate(i);
1364
1365 emitLoadStoreType(i->dType);
1366 emitCachingMode(i->cache);
1367 }
1368
1369 void
1370 CodeEmitterNVC0::emitLOAD(const Instruction *i)
1371 {
1372 uint32_t opc;
1373
1374 code[0] = 0x00000005;
1375
1376 switch (i->src[0].getFile()) {
1377 case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1378 case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;
1379 case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1380 case FILE_MEMORY_CONST:
1381 if (!i->src[0].isIndirect(0) && typeSizeof(i->dType) == 4) {
1382 emitMOV(i); // not sure if this is any better
1383 return;
1384 }
1385 opc = 0x14000000 | (i->src[0].get()->reg.fileIndex << 10);
1386 code[0] = 0x00000006 | (i->subOp << 8);
1387 break;
1388 default:
1389 assert(!"invalid memory file");
1390 opc = 0;
1391 break;
1392 }
1393 code[1] = opc;
1394
1395 defId(i->def[0], 14);
1396
1397 setAddress16(i->src[0]);
1398 srcId(i->src[0].getIndirect(0), 20);
1399
1400 emitPredicate(i);
1401
1402 emitLoadStoreType(i->dType);
1403 emitCachingMode(i->cache);
1404 }
1405
1406 uint8_t
1407 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1408 {
1409 switch (SDATA(ref).sv.sv) {
1410 case SV_LANEID: return 0x00;
1411 case SV_PHYSID: return 0x03;
1412 case SV_VERTEX_COUNT: return 0x10;
1413 case SV_INVOCATION_ID: return 0x11;
1414 case SV_YDIR: return 0x12;
1415 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1416 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1417 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1418 case SV_GRIDID: return 0x2c;
1419 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1420 case SV_LBASE: return 0x34;
1421 case SV_SBASE: return 0x30;
1422 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1423 default:
1424 assert(!"no sreg for system value");
1425 return 0;
1426 }
1427 }
1428
1429 void
1430 CodeEmitterNVC0::emitMOV(const Instruction *i)
1431 {
1432 if (i->src[0].getFile() == FILE_SYSTEM_VALUE) {
1433 uint8_t sr = getSRegEncoding(i->src[0]);
1434
1435 if (i->encSize == 8) {
1436 code[0] = 0x00000004 | (sr << 26);
1437 code[1] = 0x2c000000;
1438 } else {
1439 code[0] = 0x40000008 | (sr << 20);
1440 }
1441 defId(i->def[0], 14);
1442
1443 emitPredicate(i);
1444 } else
1445 if (i->encSize == 8) {
1446 uint64_t opc;
1447
1448 if (i->src[0].getFile() == FILE_IMMEDIATE)
1449 opc = HEX64(18000000, 000001e2);
1450 else
1451 if (i->src[0].getFile() == FILE_PREDICATE)
1452 opc = HEX64(080e0000, 1c000004);
1453 else
1454 opc = HEX64(28000000, 00000004);
1455
1456 opc |= i->lanes << 5;
1457
1458 emitForm_B(i, opc);
1459 } else {
1460 uint32_t imm;
1461
1462 if (i->src[0].getFile() == FILE_IMMEDIATE) {
1463 imm = SDATA(i->src[0]).u32;
1464 if (imm & 0xfff00000) {
1465 assert(!(imm & 0x000fffff));
1466 code[0] = 0x00000318 | imm;
1467 } else {
1468 assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1469 code[0] = 0x00000118 | (imm << 20);
1470 }
1471 } else {
1472 code[0] = 0x0028;
1473 emitShortSrc2(i->src[0]);
1474 }
1475 defId(i->def[0], 14);
1476
1477 emitPredicate(i);
1478 }
1479 }
1480
1481 bool
1482 CodeEmitterNVC0::emitInstruction(Instruction *insn)
1483 {
1484 if (!insn->encSize) {
1485 ERROR("skipping unencodable instruction: "); insn->print();
1486 return false;
1487 } else
1488 if (codeSize + insn->encSize > codeSizeLimit) {
1489 ERROR("code emitter output buffer too small\n");
1490 return false;
1491 }
1492
1493 // assert that instructions with multiple defs don't corrupt registers
1494 for (int d = 0; insn->defExists(d); ++d)
1495 assert(insn->asTex() || insn->def[d].rep()->reg.data.id >= 0);
1496
1497 switch (insn->op) {
1498 case OP_MOV:
1499 case OP_RDSV:
1500 emitMOV(insn);
1501 break;
1502 case OP_NOP:
1503 break;
1504 case OP_LOAD:
1505 emitLOAD(insn);
1506 break;
1507 case OP_STORE:
1508 emitSTORE(insn);
1509 break;
1510 case OP_LINTERP:
1511 case OP_PINTERP:
1512 emitINTERP(insn);
1513 break;
1514 case OP_VFETCH:
1515 emitVFETCH(insn);
1516 break;
1517 case OP_EXPORT:
1518 emitEXPORT(insn);
1519 break;
1520 case OP_PFETCH:
1521 emitPFETCH(insn);
1522 break;
1523 case OP_EMIT:
1524 case OP_RESTART:
1525 emitOUT(insn);
1526 break;
1527 case OP_ADD:
1528 case OP_SUB:
1529 if (isFloatType(insn->dType))
1530 emitFADD(insn);
1531 else
1532 emitUADD(insn);
1533 break;
1534 case OP_MUL:
1535 if (isFloatType(insn->dType))
1536 emitFMUL(insn);
1537 else
1538 emitUMUL(insn);
1539 break;
1540 case OP_MAD:
1541 case OP_FMA:
1542 if (isFloatType(insn->dType))
1543 emitFMAD(insn);
1544 else
1545 emitIMAD(insn);
1546 break;
1547 case OP_NOT:
1548 emitNOT(insn);
1549 break;
1550 case OP_AND:
1551 emitLogicOp(insn, 0);
1552 break;
1553 case OP_OR:
1554 emitLogicOp(insn, 1);
1555 break;
1556 case OP_XOR:
1557 emitLogicOp(insn, 2);
1558 break;
1559 case OP_SHL:
1560 case OP_SHR:
1561 emitShift(insn);
1562 break;
1563 case OP_SET:
1564 case OP_SET_AND:
1565 case OP_SET_OR:
1566 case OP_SET_XOR:
1567 emitSET(insn->asCmp());
1568 break;
1569 case OP_SELP:
1570 emitSELP(insn);
1571 break;
1572 case OP_SLCT:
1573 emitSLCT(insn->asCmp());
1574 break;
1575 case OP_MIN:
1576 case OP_MAX:
1577 emitMINMAX(insn);
1578 break;
1579 case OP_ABS:
1580 case OP_NEG:
1581 case OP_CEIL:
1582 case OP_FLOOR:
1583 case OP_TRUNC:
1584 case OP_CVT:
1585 case OP_SAT:
1586 emitCVT(insn);
1587 break;
1588 case OP_RSQ:
1589 emitSFnOp(insn, 5);
1590 break;
1591 case OP_RCP:
1592 emitSFnOp(insn, 4);
1593 break;
1594 case OP_LG2:
1595 emitSFnOp(insn, 3);
1596 break;
1597 case OP_EX2:
1598 emitSFnOp(insn, 2);
1599 break;
1600 case OP_SIN:
1601 emitSFnOp(insn, 1);
1602 break;
1603 case OP_COS:
1604 emitSFnOp(insn, 0);
1605 break;
1606 case OP_PRESIN:
1607 case OP_PREEX2:
1608 emitPreOp(insn);
1609 break;
1610 case OP_TEX:
1611 case OP_TXB:
1612 case OP_TXL:
1613 case OP_TXD:
1614 case OP_TXF:
1615 emitTEX(insn->asTex());
1616 break;
1617 case OP_TXQ:
1618 emitTXQ(insn->asTex());
1619 break;
1620 case OP_BRA:
1621 case OP_CALL:
1622 case OP_PRERET:
1623 case OP_RET:
1624 case OP_DISCARD:
1625 case OP_EXIT:
1626 case OP_PRECONT:
1627 case OP_CONT:
1628 case OP_PREBREAK:
1629 case OP_BREAK:
1630 case OP_JOINAT:
1631 case OP_BRKPT:
1632 case OP_QUADON:
1633 case OP_QUADPOP:
1634 emitFlow(insn);
1635 break;
1636 case OP_QUADOP:
1637 emitQUADOP(insn, insn->subOp, insn->lanes);
1638 break;
1639 case OP_DFDX:
1640 emitQUADOP(insn, insn->src[0].mod.neg() ? 0x66 : 0x99, 0x4);
1641 break;
1642 case OP_DFDY:
1643 emitQUADOP(insn, insn->src[0].mod.neg() ? 0x5a : 0xa5, 0x5);
1644 break;
1645 case OP_POPCNT:
1646 emitPOPC(insn);
1647 break;
1648 case OP_JOIN:
1649 emitNOP(insn);
1650 insn->join = 1;
1651 break;
1652 case OP_PHI:
1653 case OP_UNION:
1654 case OP_CONSTRAINT:
1655 ERROR("operation should have been eliminated");
1656 return false;
1657 case OP_EXP:
1658 case OP_LOG:
1659 case OP_SQRT:
1660 case OP_POW:
1661 ERROR("operation should have been lowered\n");
1662 return false;
1663 default:
1664 ERROR("unknow op\n");
1665 return false;
1666 }
1667
1668 if (insn->join) {
1669 code[0] |= 0x10;
1670 assert(insn->encSize == 8);
1671 }
1672
1673 code += insn->encSize / 4;
1674 codeSize += insn->encSize;
1675 return true;
1676 }
1677
1678 uint32_t
1679 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
1680 {
1681 const Target::OpInfo &info = targ->getOpInfo(i);
1682
1683 if (info.minEncSize == 8 || 1)
1684 return 8;
1685
1686 if (i->ftz || i->saturate || i->join)
1687 return 8;
1688 if (i->rnd != ROUND_N)
1689 return 8;
1690 if (i->predSrc >= 0 && i->op == OP_MAD)
1691 return 8;
1692
1693 if (i->op == OP_PINTERP) {
1694 if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
1695 return 8;
1696 } else
1697 if (i->op == OP_MOV && i->lanes != 0xf) {
1698 return 8;
1699 }
1700
1701 for (int s = 0; i->srcExists(s); ++s) {
1702 if (i->src[s].isIndirect(0))
1703 return 8;
1704
1705 if (i->src[s].getFile() == FILE_MEMORY_CONST) {
1706 if (SDATA(i->src[s]).offset >= 0x100)
1707 return 8;
1708 if (i->getSrc(s)->reg.fileIndex > 1 &&
1709 i->getSrc(s)->reg.fileIndex != 16)
1710 return 8;
1711 } else
1712 if (i->src[s].getFile() == FILE_IMMEDIATE) {
1713 if (i->dType == TYPE_F32) {
1714 if (SDATA(i->src[s]).u32 >= 0x100)
1715 return 8;
1716 } else {
1717 if (SDATA(i->src[s]).u32 > 0xff)
1718 return 8;
1719 }
1720 }
1721
1722 if (i->op == OP_CVT)
1723 continue;
1724 if (i->src[s].mod != Modifier(0)) {
1725 if (i->src[s].mod == Modifier(NV50_IR_MOD_ABS))
1726 if (i->op != OP_RSQ)
1727 return 8;
1728 if (i->src[s].mod == Modifier(NV50_IR_MOD_NEG))
1729 if (i->op != OP_ADD || s != 0)
1730 return 8;
1731 }
1732 }
1733
1734 return 4;
1735 }
1736
1737 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) : targ(target)
1738 {
1739 code = NULL;
1740 codeSize = codeSizeLimit = 0;
1741 relocInfo = NULL;
1742 }
1743
1744 CodeEmitter *
1745 TargetNVC0::getCodeEmitter(Program::Type type)
1746 {
1747 CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
1748 emit->setProgramType(type);
1749 return emit;
1750 }
1751
1752 } // namespace nv50_ir