gm107/ir: fix indirect txq emission
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gk110.cpp
1 /*
2 * Copyright 2012 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26
27 namespace nv50_ir {
28
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32 CodeEmitterGK110(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49 void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50 void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier);
51
52 void emitPredicate(const Instruction *);
53
54 void setCAddress14(const ValueRef&);
55 void setShortImmediate(const Instruction *, const int s);
56 void setImmediate32(const Instruction *, const int s, Modifier);
57
58 void modNegAbsF32_3b(const Instruction *, const int s);
59
60 void emitCondCode(CondCode cc, int pos, uint8_t mask);
61 void emitInterpMode(const Instruction *);
62 void emitLoadStoreType(DataType ty, const int pos);
63 void emitCachingMode(CacheMode c, const int pos);
64
65 inline uint8_t getSRegEncoding(const ValueRef&);
66
67 void emitRoundMode(RoundMode, const int pos, const int rintPos);
68 void emitRoundModeF(RoundMode, const int pos);
69 void emitRoundModeI(RoundMode, const int pos);
70
71 void emitNegAbs12(const Instruction *);
72
73 void emitNOP(const Instruction *);
74
75 void emitLOAD(const Instruction *);
76 void emitSTORE(const Instruction *);
77 void emitMOV(const Instruction *);
78
79 void emitINTERP(const Instruction *);
80 void emitPFETCH(const Instruction *);
81 void emitVFETCH(const Instruction *);
82 void emitEXPORT(const Instruction *);
83 void emitOUT(const Instruction *);
84
85 void emitUADD(const Instruction *);
86 void emitFADD(const Instruction *);
87 void emitDADD(const Instruction *);
88 void emitIMUL(const Instruction *);
89 void emitFMUL(const Instruction *);
90 void emitDMUL(const Instruction *);
91 void emitIMAD(const Instruction *);
92 void emitISAD(const Instruction *);
93 void emitFMAD(const Instruction *);
94 void emitDMAD(const Instruction *);
95
96 void emitNOT(const Instruction *);
97 void emitLogicOp(const Instruction *, uint8_t subOp);
98 void emitPOPC(const Instruction *);
99 void emitINSBF(const Instruction *);
100 void emitEXTBF(const Instruction *);
101 void emitBFIND(const Instruction *);
102 void emitShift(const Instruction *);
103
104 void emitSFnOp(const Instruction *, uint8_t subOp);
105
106 void emitCVT(const Instruction *);
107 void emitMINMAX(const Instruction *);
108 void emitPreOp(const Instruction *);
109
110 void emitSET(const CmpInstruction *);
111 void emitSLCT(const CmpInstruction *);
112 void emitSELP(const Instruction *);
113
114 void emitTEXBAR(const Instruction *);
115 void emitTEX(const TexInstruction *);
116 void emitTEXCSAA(const TexInstruction *);
117 void emitTXQ(const TexInstruction *);
118
119 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
120
121 void emitPIXLD(const Instruction *);
122
123 void emitFlow(const Instruction *);
124
125 inline void defId(const ValueDef&, const int pos);
126 inline void srcId(const ValueRef&, const int pos);
127 inline void srcId(const ValueRef *, const int pos);
128 inline void srcId(const Instruction *, int s, const int pos);
129
130 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
131
132 inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
133 };
134
135 #define GK110_GPR_ZERO 255
136
137 #define NEG_(b, s) \
138 if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
139 #define ABS_(b, s) \
140 if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
141
142 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \
143 code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
144
145 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
146 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
147
148 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
149
150 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
151
152 #define SDATA(a) ((a).rep()->reg.data)
153 #define DDATA(a) ((a).rep()->reg.data)
154
155 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
156 {
157 code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
158 }
159
160 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
161 {
162 code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
163 }
164
165 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
166 {
167 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
168 code[pos / 32] |= r << (pos % 32);
169 }
170
171 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
172 {
173 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
174 }
175
176 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
177 {
178 code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
179 }
180
181 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
182 {
183 const ImmediateValue *imm = ref.get()->asImm();
184
185 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
186 }
187
188 void
189 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
190 {
191 bool rint = false;
192 uint8_t n;
193
194 switch (rnd) {
195 case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
196 case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
197 case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
198 default:
199 rint = rnd == ROUND_NI;
200 n = 0;
201 assert(rnd == ROUND_N || rnd == ROUND_NI);
202 break;
203 }
204 code[pos / 32] |= n << (pos % 32);
205 if (rint && rintPos >= 0)
206 code[rintPos / 32] |= 1 << (rintPos % 32);
207 }
208
209 void
210 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
211 {
212 uint8_t n;
213
214 switch (rnd) {
215 case ROUND_M: n = 1; break;
216 case ROUND_P: n = 2; break;
217 case ROUND_Z: n = 3; break;
218 default:
219 n = 0;
220 assert(rnd == ROUND_N);
221 break;
222 }
223 code[pos / 32] |= n << (pos % 32);
224 }
225
226 void
227 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
228 {
229 uint8_t n;
230
231 switch (rnd) {
232 case ROUND_MI: n = 1; break;
233 case ROUND_PI: n = 2; break;
234 case ROUND_ZI: n = 3; break;
235 default:
236 n = 0;
237 assert(rnd == ROUND_NI);
238 break;
239 }
240 code[pos / 32] |= n << (pos % 32);
241 }
242
243 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
244 {
245 uint8_t n;
246
247 switch (cc) {
248 case CC_FL: n = 0x00; break;
249 case CC_LT: n = 0x01; break;
250 case CC_EQ: n = 0x02; break;
251 case CC_LE: n = 0x03; break;
252 case CC_GT: n = 0x04; break;
253 case CC_NE: n = 0x05; break;
254 case CC_GE: n = 0x06; break;
255 case CC_LTU: n = 0x09; break;
256 case CC_EQU: n = 0x0a; break;
257 case CC_LEU: n = 0x0b; break;
258 case CC_GTU: n = 0x0c; break;
259 case CC_NEU: n = 0x0d; break;
260 case CC_GEU: n = 0x0e; break;
261 case CC_TR: n = 0x0f; break;
262 case CC_NO: n = 0x10; break;
263 case CC_NC: n = 0x11; break;
264 case CC_NS: n = 0x12; break;
265 case CC_NA: n = 0x13; break;
266 case CC_A: n = 0x14; break;
267 case CC_S: n = 0x15; break;
268 case CC_C: n = 0x16; break;
269 case CC_O: n = 0x17; break;
270 default:
271 n = 0;
272 assert(!"invalid condition code");
273 break;
274 }
275 code[pos / 32] |= (n & mask) << (pos % 32);
276 }
277
278 void
279 CodeEmitterGK110::emitPredicate(const Instruction *i)
280 {
281 if (i->predSrc >= 0) {
282 srcId(i->src(i->predSrc), 18);
283 if (i->cc == CC_NOT_P)
284 code[0] |= 8 << 18; // negate
285 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
286 } else {
287 code[0] |= 7 << 18;
288 }
289 }
290
291 void
292 CodeEmitterGK110::setCAddress14(const ValueRef& src)
293 {
294 const Storage& res = src.get()->asSym()->reg;
295 const int32_t addr = res.data.offset / 4;
296
297 code[0] |= (addr & 0x01ff) << 23;
298 code[1] |= (addr & 0x3e00) >> 9;
299 code[1] |= res.fileIndex << 5;
300 }
301
302 void
303 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
304 {
305 const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
306 const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
307
308 if (i->sType == TYPE_F32) {
309 assert(!(u32 & 0x00000fff));
310 code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
311 code[1] |= ((u32 & 0x7fe00000) >> 21);
312 code[1] |= ((u32 & 0x80000000) >> 4);
313 } else
314 if (i->sType == TYPE_F64) {
315 assert(!(u64 & 0x00000fffffffffffULL));
316 code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
317 code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
318 code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
319 } else {
320 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
321 code[0] |= (u32 & 0x001ff) << 23;
322 code[1] |= (u32 & 0x7fe00) >> 9;
323 code[1] |= (u32 & 0x80000) << 8;
324 }
325 }
326
327 void
328 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
329 Modifier mod)
330 {
331 uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
332
333 if (mod) {
334 ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
335 mod.applyTo(imm);
336 u32 = imm.reg.data.u32;
337 }
338
339 code[0] |= u32 << 23;
340 code[1] |= u32 >> 9;
341 }
342
343 void
344 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
345 Modifier mod)
346 {
347 code[0] = ctg;
348 code[1] = opc << 20;
349
350 emitPredicate(i);
351
352 defId(i->def(0), 2);
353
354 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
355 switch (i->src(s).getFile()) {
356 case FILE_GPR:
357 srcId(i->src(s), s ? 42 : 10);
358 break;
359 case FILE_IMMEDIATE:
360 setImmediate32(i, s, mod);
361 break;
362 default:
363 break;
364 }
365 }
366 }
367
368
369 void
370 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
371 {
372 code[0] = ctg;
373 code[1] = opc << 20;
374
375 emitPredicate(i);
376
377 defId(i->def(0), 2);
378
379 switch (i->src(0).getFile()) {
380 case FILE_MEMORY_CONST:
381 code[1] |= 0x4 << 28;
382 setCAddress14(i->src(0));
383 break;
384 case FILE_GPR:
385 code[1] |= 0xc << 28;
386 srcId(i->src(0), 23);
387 break;
388 default:
389 assert(0);
390 break;
391 }
392 }
393
394 // 0x2 for GPR, c[] and 0x1 for short immediate
395 void
396 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
397 uint32_t opc1)
398 {
399 const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
400
401 int s1 = 23;
402 if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
403 s1 = 42;
404
405 if (imm) {
406 code[0] = 0x1;
407 code[1] = opc1 << 20;
408 } else {
409 code[0] = 0x2;
410 code[1] = (0xc << 28) | (opc2 << 20);
411 }
412
413 emitPredicate(i);
414
415 defId(i->def(0), 2);
416
417 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
418 switch (i->src(s).getFile()) {
419 case FILE_MEMORY_CONST:
420 code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
421 setCAddress14(i->src(s));
422 break;
423 case FILE_IMMEDIATE:
424 setShortImmediate(i, s);
425 break;
426 case FILE_GPR:
427 srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
428 break;
429 default:
430 // ignore here, can be predicate or flags, but must not be address
431 break;
432 }
433 }
434 // 0x0 = invalid
435 // 0xc = rrr
436 // 0x8 = rrc
437 // 0x4 = rcr
438 assert(imm || (code[1] & (0xc << 28)));
439 }
440
441 inline void
442 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
443 {
444 if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
445 if (i->src(s).mod.neg()) code[1] ^= (1 << 27);
446 }
447
448 void
449 CodeEmitterGK110::emitNOP(const Instruction *i)
450 {
451 code[0] = 0x00003c02;
452 code[1] = 0x85800000;
453
454 if (i)
455 emitPredicate(i);
456 else
457 code[0] = 0x001c3c02;
458 }
459
460 void
461 CodeEmitterGK110::emitFMAD(const Instruction *i)
462 {
463 assert(!isLIMM(i->src(1), TYPE_F32));
464
465 emitForm_21(i, 0x0c0, 0x940);
466
467 NEG_(34, 2);
468 SAT_(35);
469 RND_(36, F);
470 FTZ_(38);
471 DNZ_(39);
472
473 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
474
475 if (code[0] & 0x1) {
476 if (neg1)
477 code[1] ^= 1 << 27;
478 } else
479 if (neg1) {
480 code[1] |= 1 << 19;
481 }
482 }
483
484 void
485 CodeEmitterGK110::emitDMAD(const Instruction *i)
486 {
487 assert(!i->saturate);
488 assert(!i->ftz);
489
490 emitForm_21(i, 0x1b8, 0xb38);
491
492 NEG_(34, 2);
493 RND_(36, F);
494
495 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
496
497 if (code[0] & 0x1) {
498 if (neg1)
499 code[1] ^= 1 << 27;
500 } else
501 if (neg1) {
502 code[1] |= 1 << 19;
503 }
504 }
505
506 void
507 CodeEmitterGK110::emitFMUL(const Instruction *i)
508 {
509 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
510
511 assert(i->postFactor >= -3 && i->postFactor <= 3);
512
513 if (isLIMM(i->src(1), TYPE_F32)) {
514 emitForm_L(i, 0x200, 0x2, Modifier(0));
515
516 FTZ_(38);
517 DNZ_(39);
518 SAT_(3a);
519 if (neg)
520 code[1] ^= 1 << 22;
521
522 assert(i->postFactor == 0);
523 } else {
524 emitForm_21(i, 0x234, 0xc34);
525 code[1] |= ((i->postFactor > 0) ?
526 (7 - i->postFactor) : (0 - i->postFactor)) << 12;
527
528 RND_(2a, F);
529 FTZ_(2f);
530 DNZ_(30);
531 SAT_(35);
532
533 if (code[0] & 0x1) {
534 if (neg)
535 code[1] ^= 1 << 27;
536 } else
537 if (neg) {
538 code[1] |= 1 << 19;
539 }
540 }
541 }
542
543 void
544 CodeEmitterGK110::emitDMUL(const Instruction *i)
545 {
546 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
547
548 assert(!i->postFactor);
549 assert(!i->saturate);
550 assert(!i->ftz);
551 assert(!i->dnz);
552
553 emitForm_21(i, 0x240, 0xc40);
554
555 RND_(2a, F);
556
557 if (code[0] & 0x1) {
558 if (neg)
559 code[1] ^= 1 << 27;
560 } else
561 if (neg) {
562 code[1] |= 1 << 19;
563 }
564 }
565
566 void
567 CodeEmitterGK110::emitIMUL(const Instruction *i)
568 {
569 assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
570 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
571
572 if (isLIMM(i->src(1), TYPE_S32)) {
573 emitForm_L(i, 0x280, 2, Modifier(0));
574
575 assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
576
577 if (i->sType == TYPE_S32)
578 code[1] |= 3 << 25;
579 } else {
580 emitForm_21(i, 0x21c, 0xc1c);
581
582 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
583 code[1] |= 1 << 10;
584 if (i->sType == TYPE_S32)
585 code[1] |= 3 << 11;
586 }
587 }
588
589 void
590 CodeEmitterGK110::emitFADD(const Instruction *i)
591 {
592 if (isLIMM(i->src(1), TYPE_F32)) {
593 assert(i->rnd == ROUND_N);
594 assert(!i->saturate);
595
596 Modifier mod = i->src(1).mod ^
597 Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
598
599 emitForm_L(i, 0x400, 0, mod);
600
601 FTZ_(3a);
602 NEG_(3b, 0);
603 ABS_(39, 0);
604 } else {
605 emitForm_21(i, 0x22c, 0xc2c);
606
607 FTZ_(2f);
608 RND_(2a, F);
609 ABS_(31, 0);
610 NEG_(33, 0);
611 SAT_(35);
612
613 if (code[0] & 0x1) {
614 modNegAbsF32_3b(i, 1);
615 if (i->op == OP_SUB) code[1] ^= 1 << 27;
616 } else {
617 ABS_(34, 1);
618 NEG_(30, 1);
619 if (i->op == OP_SUB) code[1] ^= 1 << 16;
620 }
621 }
622 }
623
624 void
625 CodeEmitterGK110::emitDADD(const Instruction *i)
626 {
627 assert(!i->saturate);
628 assert(!i->ftz);
629
630 emitForm_21(i, 0x238, 0xc38);
631 RND_(2a, F);
632 ABS_(31, 0);
633 NEG_(33, 0);
634 if (code[0] & 0x1) {
635 modNegAbsF32_3b(i, 1);
636 if (i->op == OP_SUB) code[1] ^= 1 << 27;
637 } else {
638 NEG_(30, 1);
639 ABS_(34, 1);
640 if (i->op == OP_SUB) code[1] ^= 1 << 16;
641 }
642 }
643
644 void
645 CodeEmitterGK110::emitUADD(const Instruction *i)
646 {
647 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
648
649 if (i->op == OP_SUB)
650 addOp ^= 1;
651
652 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
653
654 if (isLIMM(i->src(1), TYPE_S32)) {
655 emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
656
657 if (addOp & 2)
658 code[1] |= 1 << 27;
659
660 assert(!i->defExists(1));
661 assert(i->flagsSrc < 0);
662
663 SAT_(39);
664 } else {
665 emitForm_21(i, 0x208, 0xc08);
666
667 assert(addOp != 3); // would be add-plus-one
668
669 code[1] |= addOp << 19;
670
671 if (i->defExists(1))
672 code[1] |= 1 << 18; // write carry
673 if (i->flagsSrc >= 0)
674 code[1] |= 1 << 14; // add carry
675
676 SAT_(35);
677 }
678 }
679
680 // TODO: shl-add
681 void
682 CodeEmitterGK110::emitIMAD(const Instruction *i)
683 {
684 uint8_t addOp =
685 (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
686
687 emitForm_21(i, 0x100, 0xa00);
688
689 assert(addOp != 3);
690 code[1] |= addOp << 26;
691
692 if (i->sType == TYPE_S32)
693 code[1] |= (1 << 19) | (1 << 24);
694
695 if (code[0] & 0x1) {
696 assert(!i->subOp);
697 SAT_(39);
698 } else {
699 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
700 code[1] |= 1 << 25;
701 SAT_(35);
702 }
703 }
704
705 void
706 CodeEmitterGK110::emitISAD(const Instruction *i)
707 {
708 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
709
710 emitForm_21(i, 0x1f4, 0xb74);
711
712 if (i->dType == TYPE_S32)
713 code[1] |= 1 << 19;
714 }
715
716 void
717 CodeEmitterGK110::emitNOT(const Instruction *i)
718 {
719 code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
720 code[1] = 0x22003800;
721
722 emitPredicate(i);
723
724 defId(i->def(0), 2);
725
726 switch (i->src(0).getFile()) {
727 case FILE_GPR:
728 code[1] |= 0xc << 28;
729 srcId(i->src(0), 23);
730 break;
731 case FILE_MEMORY_CONST:
732 code[1] |= 0x4 << 28;
733 setCAddress14(i->src(1));
734 break;
735 default:
736 assert(0);
737 break;
738 }
739 }
740
741 void
742 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
743 {
744 if (isLIMM(i->src(1), TYPE_S32)) {
745 emitForm_L(i, 0x200, 0, i->src(1).mod);
746 code[1] |= subOp << 24;
747 NOT_(3a, 0);
748 } else {
749 emitForm_21(i, 0x220, 0xc20);
750 code[1] |= subOp << 12;
751 NOT_(2a, 0);
752 NOT_(2b, 1);
753 }
754 }
755
756 void
757 CodeEmitterGK110::emitPOPC(const Instruction *i)
758 {
759 assert(!isLIMM(i->src(1), TYPE_S32, true));
760
761 emitForm_21(i, 0x204, 0xc04);
762
763 NOT_(2a, 0);
764 if (!(code[0] & 0x1))
765 NOT_(2b, 1);
766 }
767
768 void
769 CodeEmitterGK110::emitINSBF(const Instruction *i)
770 {
771 emitForm_21(i, 0x1f8, 0xb78);
772 }
773
774 void
775 CodeEmitterGK110::emitEXTBF(const Instruction *i)
776 {
777 emitForm_21(i, 0x600, 0xc00);
778
779 if (i->dType == TYPE_S32)
780 code[1] |= 0x80000;
781 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
782 code[1] |= 0x800;
783 }
784
785 void
786 CodeEmitterGK110::emitBFIND(const Instruction *i)
787 {
788 emitForm_C(i, 0x218, 0x2);
789
790 if (i->dType == TYPE_S32)
791 code[1] |= 0x80000;
792 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
793 code[1] |= 0x800;
794 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
795 code[1] |= 0x1000;
796 }
797
798 void
799 CodeEmitterGK110::emitShift(const Instruction *i)
800 {
801 if (i->op == OP_SHR) {
802 emitForm_21(i, 0x214, 0xc14);
803 if (isSignedType(i->dType))
804 code[1] |= 1 << 19;
805 } else {
806 emitForm_21(i, 0x224, 0xc24);
807 }
808
809 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
810 code[1] |= 1 << 10;
811 }
812
813 void
814 CodeEmitterGK110::emitPreOp(const Instruction *i)
815 {
816 emitForm_C(i, 0x248, 0x2);
817
818 if (i->op == OP_PREEX2)
819 code[1] |= 1 << 10;
820
821 NEG_(30, 0);
822 ABS_(34, 0);
823 }
824
825 void
826 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
827 {
828 code[0] = 0x00000002 | (subOp << 23);
829 code[1] = 0x84000000;
830
831 emitPredicate(i);
832
833 defId(i->def(0), 2);
834 srcId(i->src(0), 10);
835
836 NEG_(33, 0);
837 ABS_(31, 0);
838 SAT_(35);
839 }
840
841 void
842 CodeEmitterGK110::emitMINMAX(const Instruction *i)
843 {
844 uint32_t op2, op1;
845
846 switch (i->dType) {
847 case TYPE_U32:
848 case TYPE_S32:
849 op2 = 0x210;
850 op1 = 0xc10;
851 break;
852 case TYPE_F32:
853 op2 = 0x230;
854 op1 = 0xc30;
855 break;
856 case TYPE_F64:
857 op2 = 0x228;
858 op1 = 0xc28;
859 break;
860 default:
861 assert(0);
862 op2 = 0;
863 op1 = 0;
864 break;
865 }
866 emitForm_21(i, op2, op1);
867
868 if (i->dType == TYPE_S32)
869 code[1] |= 1 << 19;
870 code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
871
872 FTZ_(2f);
873 ABS_(31, 0);
874 NEG_(33, 0);
875 if (code[0] & 0x1) {
876 modNegAbsF32_3b(i, 1);
877 } else {
878 ABS_(34, 1);
879 NEG_(30, 1);
880 }
881 }
882
883 void
884 CodeEmitterGK110::emitCVT(const Instruction *i)
885 {
886 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
887 const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
888 const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
889
890 bool sat = i->saturate;
891 bool abs = i->src(0).mod.abs();
892 bool neg = i->src(0).mod.neg();
893
894 RoundMode rnd = i->rnd;
895
896 switch (i->op) {
897 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
898 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
899 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
900 case OP_SAT: sat = true; break;
901 case OP_NEG: neg = !neg; break;
902 case OP_ABS: abs = true; neg = false; break;
903 default:
904 break;
905 }
906
907 DataType dType;
908
909 if (i->op == OP_NEG && i->dType == TYPE_U32)
910 dType = TYPE_S32;
911 else
912 dType = i->dType;
913
914
915 uint32_t op;
916
917 if (f2f) op = 0x254;
918 else if (f2i) op = 0x258;
919 else if (i2f) op = 0x25c;
920 else op = 0x260;
921
922 emitForm_C(i, op, 0x2);
923
924 FTZ_(2f);
925 if (neg) code[1] |= 1 << 16;
926 if (abs) code[1] |= 1 << 20;
927 if (sat) code[1] |= 1 << 21;
928
929 emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
930
931 code[0] |= typeSizeofLog2(dType) << 10;
932 code[0] |= typeSizeofLog2(i->sType) << 12;
933
934 if (isSignedIntType(dType))
935 code[0] |= 0x4000;
936 if (isSignedIntType(i->sType))
937 code[0] |= 0x8000;
938 }
939
940 void
941 CodeEmitterGK110::emitSET(const CmpInstruction *i)
942 {
943 uint16_t op1, op2;
944
945 if (i->def(0).getFile() == FILE_PREDICATE) {
946 switch (i->sType) {
947 case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
948 case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
949 default:
950 op2 = 0x1b0;
951 op1 = 0xb30;
952 break;
953 }
954 emitForm_21(i, op2, op1);
955
956 NEG_(2e, 0);
957 ABS_(9, 0);
958 if (!(code[0] & 0x1)) {
959 NEG_(8, 1);
960 ABS_(2f, 1);
961 } else {
962 modNegAbsF32_3b(i, 1);
963 }
964 FTZ_(32);
965
966 // normal DST field is negated predicate result
967 code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
968 if (i->defExists(1))
969 defId(i->def(1), 2);
970 else
971 code[0] |= 0x1c;
972 } else {
973 switch (i->sType) {
974 case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
975 case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
976 default:
977 op2 = 0x1a8;
978 op1 = 0xb28;
979 break;
980 }
981 emitForm_21(i, op2, op1);
982
983 NEG_(2e, 0);
984 ABS_(39, 0);
985 if (!(code[0] & 0x1)) {
986 NEG_(38, 1);
987 ABS_(2f, 1);
988 } else {
989 modNegAbsF32_3b(i, 1);
990 }
991 FTZ_(3a);
992
993 if (i->dType == TYPE_F32) {
994 if (isFloatType(i->sType))
995 code[1] |= 1 << 23;
996 else
997 code[1] |= 1 << 15;
998 }
999 }
1000 if (i->sType == TYPE_S32)
1001 code[1] |= 1 << 19;
1002
1003 if (i->op != OP_SET) {
1004 switch (i->op) {
1005 case OP_SET_AND: code[1] |= 0x0 << 16; break;
1006 case OP_SET_OR: code[1] |= 0x1 << 16; break;
1007 case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1008 default:
1009 assert(0);
1010 break;
1011 }
1012 srcId(i->src(2), 0x2a);
1013 } else {
1014 code[1] |= 0x7 << 10;
1015 }
1016 emitCondCode(i->setCond,
1017 isFloatType(i->sType) ? 0x33 : 0x34,
1018 isFloatType(i->sType) ? 0xf : 0x7);
1019 }
1020
1021 void
1022 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1023 {
1024 CondCode cc = i->setCond;
1025 if (i->src(2).mod.neg())
1026 cc = reverseCondCode(cc);
1027
1028 if (i->dType == TYPE_F32) {
1029 emitForm_21(i, 0x1d0, 0xb50);
1030 FTZ_(32);
1031 emitCondCode(cc, 0x33, 0xf);
1032 } else {
1033 emitForm_21(i, 0x1a0, 0xb20);
1034 emitCondCode(cc, 0x34, 0x7);
1035 }
1036 }
1037
1038 void CodeEmitterGK110::emitSELP(const Instruction *i)
1039 {
1040 emitForm_21(i, 0x250, 0x050);
1041
1042 if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
1043 code[1] |= 1 << 13;
1044 }
1045
1046 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1047 {
1048 code[0] = 0x0000003e | (i->subOp << 23);
1049 code[1] = 0x77000000;
1050
1051 emitPredicate(i);
1052 }
1053
1054 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1055 {
1056 code[0] = 0x00000002;
1057 code[1] = 0x76c00000;
1058
1059 code[1] |= i->tex.r << 9;
1060 // code[1] |= i->tex.s << (9 + 8);
1061
1062 if (i->tex.liveOnly)
1063 code[0] |= 0x80000000;
1064
1065 defId(i->def(0), 2);
1066 srcId(i->src(0), 10);
1067 }
1068
1069 static inline bool
1070 isNextIndependentTex(const TexInstruction *i)
1071 {
1072 if (!i->next || !isTextureOp(i->next->op))
1073 return false;
1074 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1075 return false;
1076 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1077 }
1078
1079 void
1080 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1081 {
1082 const bool ind = i->tex.rIndirectSrc >= 0;
1083
1084 if (ind) {
1085 code[0] = 0x00000002;
1086 switch (i->op) {
1087 case OP_TXD:
1088 code[1] = 0x7e000000;
1089 break;
1090 case OP_TXLQ:
1091 code[1] = 0x7e800000;
1092 break;
1093 case OP_TXF:
1094 code[1] = 0x78000000;
1095 break;
1096 case OP_TXG:
1097 code[1] = 0x7dc00000;
1098 break;
1099 default:
1100 code[1] = 0x7d800000;
1101 break;
1102 }
1103 } else {
1104 switch (i->op) {
1105 case OP_TXD:
1106 code[0] = 0x00000002;
1107 code[1] = 0x76000000;
1108 code[1] |= i->tex.r << 9;
1109 break;
1110 case OP_TXLQ:
1111 code[0] = 0x00000002;
1112 code[1] = 0x76800000;
1113 code[1] |= i->tex.r << 9;
1114 break;
1115 case OP_TXF:
1116 code[0] = 0x00000002;
1117 code[1] = 0x70000000;
1118 code[1] |= i->tex.r << 13;
1119 break;
1120 case OP_TXG:
1121 code[0] = 0x00000001;
1122 code[1] = 0x70000000;
1123 code[1] |= i->tex.r << 15;
1124 break;
1125 default:
1126 code[0] = 0x00000001;
1127 code[1] = 0x60000000;
1128 code[1] |= i->tex.r << 15;
1129 break;
1130 }
1131 }
1132
1133 code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1134
1135 if (i->tex.liveOnly)
1136 code[0] |= 0x80000000;
1137
1138 switch (i->op) {
1139 case OP_TEX: break;
1140 case OP_TXB: code[1] |= 0x2000; break;
1141 case OP_TXL: code[1] |= 0x3000; break;
1142 case OP_TXF: break;
1143 case OP_TXG: break;
1144 case OP_TXD: break;
1145 case OP_TXLQ: break;
1146 default:
1147 assert(!"invalid texture op");
1148 break;
1149 }
1150
1151 if (i->op == OP_TXF) {
1152 if (!i->tex.levelZero)
1153 code[1] |= 0x1000;
1154 } else
1155 if (i->tex.levelZero) {
1156 code[1] |= 0x1000;
1157 }
1158
1159 if (i->op != OP_TXD && i->tex.derivAll)
1160 code[1] |= 0x200;
1161
1162 emitPredicate(i);
1163
1164 code[1] |= i->tex.mask << 2;
1165
1166 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1167
1168 defId(i->def(0), 2);
1169 srcId(i->src(0), 10);
1170 srcId(i, src1, 23);
1171
1172 if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1173
1174 // texture target:
1175 code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1176 if (i->tex.target.isArray())
1177 code[1] |= 0x40;
1178 if (i->tex.target.isShadow())
1179 code[1] |= 0x400;
1180 if (i->tex.target == TEX_TARGET_2D_MS ||
1181 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1182 code[1] |= 0x800;
1183
1184 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1185 // ?
1186 }
1187
1188 if (i->tex.useOffsets == 1) {
1189 switch (i->op) {
1190 case OP_TXF: code[1] |= 0x200; break;
1191 case OP_TXD: code[1] |= 0x00400000; break;
1192 default: code[1] |= 0x800; break;
1193 }
1194 }
1195 if (i->tex.useOffsets == 4)
1196 code[1] |= 0x1000;
1197 }
1198
1199 void
1200 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1201 {
1202 code[0] = 0x00000002;
1203 code[1] = 0x75400001;
1204
1205 switch (i->tex.query) {
1206 case TXQ_DIMS: code[0] |= 0x01 << 25; break;
1207 case TXQ_TYPE: code[0] |= 0x02 << 25; break;
1208 case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1209 case TXQ_FILTER: code[0] |= 0x10 << 25; break;
1210 case TXQ_LOD: code[0] |= 0x12 << 25; break;
1211 case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;
1212 default:
1213 assert(!"invalid texture query");
1214 break;
1215 }
1216
1217 code[1] |= i->tex.mask << 2;
1218 code[1] |= i->tex.r << 9;
1219 if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1220 code[1] |= 0x08000000;
1221
1222 defId(i->def(0), 2);
1223 srcId(i->src(0), 10);
1224
1225 emitPredicate(i);
1226 }
1227
1228 void
1229 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1230 {
1231 code[0] = 0x00000002 | ((qOp & 1) << 31);
1232 code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
1233
1234 defId(i->def(0), 2);
1235 srcId(i->src(0), 10);
1236 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
1237
1238 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1239 code[1] |= 1 << 9; // dall
1240
1241 emitPredicate(i);
1242 }
1243
1244 void
1245 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1246 {
1247 emitForm_L(i, 0x7f4, 2, Modifier(0));
1248 code[1] |= i->subOp << 2;
1249 code[1] |= 0x00070000;
1250 }
1251
1252 void
1253 CodeEmitterGK110::emitFlow(const Instruction *i)
1254 {
1255 const FlowInstruction *f = i->asFlow();
1256
1257 unsigned mask; // bit 0: predicate, bit 1: target
1258
1259 code[0] = 0x00000000;
1260
1261 switch (i->op) {
1262 case OP_BRA:
1263 code[1] = f->absolute ? 0x10800000 : 0x12000000;
1264 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1265 code[0] |= 0x80;
1266 mask = 3;
1267 break;
1268 case OP_CALL:
1269 code[1] = f->absolute ? 0x11000000 : 0x13000000;
1270 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1271 code[0] |= 0x80;
1272 mask = 2;
1273 break;
1274
1275 case OP_EXIT: code[1] = 0x18000000; mask = 1; break;
1276 case OP_RET: code[1] = 0x19000000; mask = 1; break;
1277 case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1278 case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;
1279 case OP_CONT: code[1] = 0x1a800000; mask = 1; break;
1280
1281 case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;
1282 case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1283 case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
1284 case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
1285
1286 case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
1287 case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1288 case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
1289 default:
1290 assert(!"invalid flow operation");
1291 return;
1292 }
1293
1294 if (mask & 1) {
1295 emitPredicate(i);
1296 if (i->flagsSrc < 0)
1297 code[0] |= 0x3c;
1298 }
1299
1300 if (!f)
1301 return;
1302
1303 if (f->allWarp)
1304 code[0] |= 1 << 9;
1305 if (f->limit)
1306 code[0] |= 1 << 8;
1307
1308 if (f->op == OP_CALL) {
1309 if (f->builtin) {
1310 assert(f->absolute);
1311 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1312 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1313 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1314 } else {
1315 assert(!f->absolute);
1316 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1317 code[0] |= (pcRel & 0x1ff) << 23;
1318 code[1] |= (pcRel >> 9) & 0x7fff;
1319 }
1320 } else
1321 if (mask & 2) {
1322 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1323 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1324 pcRel += 8;
1325 // currently we don't want absolute branches
1326 assert(!f->absolute);
1327 code[0] |= (pcRel & 0x1ff) << 23;
1328 code[1] |= (pcRel >> 9) & 0x7fff;
1329 }
1330 }
1331
1332 void
1333 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1334 {
1335 uint32_t prim = i->src(0).get()->reg.data.u32;
1336
1337 code[0] = 0x00000002 | ((prim & 0xff) << 23);
1338 code[1] = 0x7f800000;
1339
1340 emitPredicate(i);
1341
1342 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1343
1344 defId(i->def(0), 2);
1345 srcId(i, src1, 10);
1346 }
1347
1348 void
1349 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1350 {
1351 unsigned int size = typeSizeof(i->dType);
1352 uint32_t offset = i->src(0).get()->reg.data.offset;
1353
1354 code[0] = 0x00000002 | (offset << 23);
1355 code[1] = 0x7ec00000 | (offset >> 9);
1356 code[1] |= (size / 4 - 1) << 18;
1357
1358 if (i->perPatch)
1359 code[1] |= 0x4;
1360 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1361 code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
1362
1363 emitPredicate(i);
1364
1365 defId(i->def(0), 2);
1366 srcId(i->src(0).getIndirect(0), 10);
1367 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
1368 }
1369
1370 void
1371 CodeEmitterGK110::emitEXPORT(const Instruction *i)
1372 {
1373 unsigned int size = typeSizeof(i->dType);
1374 uint32_t offset = i->src(0).get()->reg.data.offset;
1375
1376 code[0] = 0x00000002 | (offset << 23);
1377 code[1] = 0x7f000000 | (offset >> 9);
1378 code[1] |= (size / 4 - 1) << 18;
1379
1380 if (i->perPatch)
1381 code[1] |= 0x4;
1382
1383 emitPredicate(i);
1384
1385 assert(i->src(1).getFile() == FILE_GPR);
1386
1387 srcId(i->src(0).getIndirect(0), 10);
1388 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
1389 srcId(i->src(1), 2);
1390 }
1391
1392 void
1393 CodeEmitterGK110::emitOUT(const Instruction *i)
1394 {
1395 assert(i->src(0).getFile() == FILE_GPR);
1396
1397 emitForm_21(i, 0x1f0, 0xb70);
1398
1399 if (i->op == OP_EMIT)
1400 code[1] |= 1 << 10;
1401 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1402 code[1] |= 1 << 11;
1403 }
1404
1405 void
1406 CodeEmitterGK110::emitInterpMode(const Instruction *i)
1407 {
1408 code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
1409 code[1] |= (i->ipa & 0xc) << (19 - 2);
1410 }
1411
1412 void
1413 CodeEmitterGK110::emitINTERP(const Instruction *i)
1414 {
1415 const uint32_t base = i->getSrc(0)->reg.data.offset;
1416
1417 code[0] = 0x00000002 | (base << 31);
1418 code[1] = 0x74800000 | (base >> 1);
1419
1420 if (i->saturate)
1421 code[1] |= 1 << 18;
1422
1423 if (i->op == OP_PINTERP)
1424 srcId(i->src(1), 23);
1425 else
1426 code[0] |= 0xff << 23;
1427
1428 srcId(i->src(0).getIndirect(0), 10);
1429 emitInterpMode(i);
1430
1431 emitPredicate(i);
1432 defId(i->def(0), 2);
1433
1434 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1435 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
1436 else
1437 code[1] |= 0xff << 10;
1438 }
1439
1440 void
1441 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
1442 {
1443 uint8_t n;
1444
1445 switch (ty) {
1446 case TYPE_U8:
1447 n = 0;
1448 break;
1449 case TYPE_S8:
1450 n = 1;
1451 break;
1452 case TYPE_U16:
1453 n = 2;
1454 break;
1455 case TYPE_S16:
1456 n = 3;
1457 break;
1458 case TYPE_F32:
1459 case TYPE_U32:
1460 case TYPE_S32:
1461 n = 4;
1462 break;
1463 case TYPE_F64:
1464 case TYPE_U64:
1465 case TYPE_S64:
1466 n = 5;
1467 break;
1468 case TYPE_B128:
1469 n = 6;
1470 break;
1471 default:
1472 n = 0;
1473 assert(!"invalid ld/st type");
1474 break;
1475 }
1476 code[pos / 32] |= n << (pos % 32);
1477 }
1478
1479 void
1480 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
1481 {
1482 uint8_t n;
1483
1484 switch (c) {
1485 case CACHE_CA:
1486 // case CACHE_WB:
1487 n = 0;
1488 break;
1489 case CACHE_CG:
1490 n = 1;
1491 break;
1492 case CACHE_CS:
1493 n = 2;
1494 break;
1495 case CACHE_CV:
1496 // case CACHE_WT:
1497 n = 3;
1498 break;
1499 default:
1500 n = 0;
1501 assert(!"invalid caching mode");
1502 break;
1503 }
1504 code[pos / 32] |= n << (pos % 32);
1505 }
1506
1507 void
1508 CodeEmitterGK110::emitSTORE(const Instruction *i)
1509 {
1510 int32_t offset = SDATA(i->src(0)).offset;
1511
1512 switch (i->src(0).getFile()) {
1513 case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
1514 case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
1515 case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
1516 default:
1517 assert(!"invalid memory file");
1518 break;
1519 }
1520
1521 if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
1522 offset &= 0xffffff;
1523
1524 if (code[0] & 0x2) {
1525 emitLoadStoreType(i->dType, 0x33);
1526 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
1527 emitCachingMode(i->cache, 0x2f);
1528 } else {
1529 emitLoadStoreType(i->dType, 0x38);
1530 emitCachingMode(i->cache, 0x3b);
1531 }
1532 code[0] |= offset << 23;
1533 code[1] |= offset >> 9;
1534
1535 emitPredicate(i);
1536
1537 srcId(i->src(1), 2);
1538 srcId(i->src(0).getIndirect(0), 10);
1539 }
1540
1541 void
1542 CodeEmitterGK110::emitLOAD(const Instruction *i)
1543 {
1544 int32_t offset = SDATA(i->src(0)).offset;
1545
1546 switch (i->src(0).getFile()) {
1547 case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
1548 case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
1549 case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
1550 case FILE_MEMORY_CONST:
1551 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1552 emitMOV(i);
1553 return;
1554 }
1555 offset &= 0xffff;
1556 code[0] = 0x00000002;
1557 code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
1558 code[1] |= i->subOp << 15;
1559 break;
1560 default:
1561 assert(!"invalid memory file");
1562 break;
1563 }
1564
1565 if (code[0] & 0x2) {
1566 offset &= 0xffffff;
1567 emitLoadStoreType(i->dType, 0x33);
1568 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
1569 emitCachingMode(i->cache, 0x2f);
1570 } else {
1571 emitLoadStoreType(i->dType, 0x38);
1572 emitCachingMode(i->cache, 0x3b);
1573 }
1574 code[0] |= offset << 23;
1575 code[1] |= offset >> 9;
1576
1577 emitPredicate(i);
1578
1579 defId(i->def(0), 2);
1580 srcId(i->src(0).getIndirect(0), 10);
1581 }
1582
1583 uint8_t
1584 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
1585 {
1586 switch (SDATA(ref).sv.sv) {
1587 case SV_LANEID: return 0x00;
1588 case SV_PHYSID: return 0x03;
1589 case SV_VERTEX_COUNT: return 0x10;
1590 case SV_INVOCATION_ID: return 0x11;
1591 case SV_YDIR: return 0x12;
1592 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1593 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1594 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1595 case SV_GRIDID: return 0x2c;
1596 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1597 case SV_LBASE: return 0x34;
1598 case SV_SBASE: return 0x30;
1599 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1600 default:
1601 assert(!"no sreg for system value");
1602 return 0;
1603 }
1604 }
1605
1606 void
1607 CodeEmitterGK110::emitMOV(const Instruction *i)
1608 {
1609 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1610 code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
1611 code[1] = 0x86400000;
1612 emitPredicate(i);
1613 defId(i->def(0), 2);
1614 } else
1615 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1616 code[0] = 0x00000002 | (i->lanes << 14);
1617 code[1] = 0x74000000;
1618 emitPredicate(i);
1619 defId(i->def(0), 2);
1620 setImmediate32(i, 0, Modifier(0));
1621 } else
1622 if (i->src(0).getFile() == FILE_PREDICATE) {
1623 code[0] = 0x00000002;
1624 code[1] = 0x84401c07;
1625 emitPredicate(i);
1626 defId(i->def(0), 2);
1627 srcId(i->src(0), 14);
1628 } else {
1629 emitForm_C(i, 0x24c, 2);
1630 code[1] |= i->lanes << 10;
1631 }
1632 }
1633
1634 bool
1635 CodeEmitterGK110::emitInstruction(Instruction *insn)
1636 {
1637 const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
1638
1639 if (insn->encSize != 8) {
1640 ERROR("skipping unencodable instruction: ");
1641 insn->print();
1642 return false;
1643 } else
1644 if (codeSize + size > codeSizeLimit) {
1645 ERROR("code emitter output buffer too small\n");
1646 return false;
1647 }
1648
1649 if (writeIssueDelays) {
1650 int id = (codeSize & 0x3f) / 8 - 1;
1651 if (id < 0) {
1652 id += 1;
1653 code[0] = 0x00000000; // cf issue delay "instruction"
1654 code[1] = 0x08000000;
1655 code += 2;
1656 codeSize += 8;
1657 }
1658 uint32_t *data = code - (id * 2 + 2);
1659
1660 switch (id) {
1661 case 0: data[0] |= insn->sched << 2; break;
1662 case 1: data[0] |= insn->sched << 10; break;
1663 case 2: data[0] |= insn->sched << 18; break;
1664 case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
1665 case 4: data[1] |= insn->sched << 2; break;
1666 case 5: data[1] |= insn->sched << 10; break;
1667 case 6: data[1] |= insn->sched << 18; break;
1668 default:
1669 assert(0);
1670 break;
1671 }
1672 }
1673
1674 // assert that instructions with multiple defs don't corrupt registers
1675 for (int d = 0; insn->defExists(d); ++d)
1676 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
1677
1678 switch (insn->op) {
1679 case OP_MOV:
1680 case OP_RDSV:
1681 emitMOV(insn);
1682 break;
1683 case OP_NOP:
1684 break;
1685 case OP_LOAD:
1686 emitLOAD(insn);
1687 break;
1688 case OP_STORE:
1689 emitSTORE(insn);
1690 break;
1691 case OP_LINTERP:
1692 case OP_PINTERP:
1693 emitINTERP(insn);
1694 break;
1695 case OP_VFETCH:
1696 emitVFETCH(insn);
1697 break;
1698 case OP_EXPORT:
1699 emitEXPORT(insn);
1700 break;
1701 case OP_PFETCH:
1702 emitPFETCH(insn);
1703 break;
1704 case OP_EMIT:
1705 case OP_RESTART:
1706 emitOUT(insn);
1707 break;
1708 case OP_ADD:
1709 case OP_SUB:
1710 if (insn->dType == TYPE_F64)
1711 emitDADD(insn);
1712 else if (isFloatType(insn->dType))
1713 emitFADD(insn);
1714 else
1715 emitUADD(insn);
1716 break;
1717 case OP_MUL:
1718 if (insn->dType == TYPE_F64)
1719 emitDMUL(insn);
1720 else if (isFloatType(insn->dType))
1721 emitFMUL(insn);
1722 else
1723 emitIMUL(insn);
1724 break;
1725 case OP_MAD:
1726 case OP_FMA:
1727 if (insn->dType == TYPE_F64)
1728 emitDMAD(insn);
1729 else if (isFloatType(insn->dType))
1730 emitFMAD(insn);
1731 else
1732 emitIMAD(insn);
1733 break;
1734 case OP_SAD:
1735 emitISAD(insn);
1736 break;
1737 case OP_NOT:
1738 emitNOT(insn);
1739 break;
1740 case OP_AND:
1741 emitLogicOp(insn, 0);
1742 break;
1743 case OP_OR:
1744 emitLogicOp(insn, 1);
1745 break;
1746 case OP_XOR:
1747 emitLogicOp(insn, 2);
1748 break;
1749 case OP_SHL:
1750 case OP_SHR:
1751 emitShift(insn);
1752 break;
1753 case OP_SET:
1754 case OP_SET_AND:
1755 case OP_SET_OR:
1756 case OP_SET_XOR:
1757 emitSET(insn->asCmp());
1758 break;
1759 case OP_SELP:
1760 emitSELP(insn);
1761 break;
1762 case OP_SLCT:
1763 emitSLCT(insn->asCmp());
1764 break;
1765 case OP_MIN:
1766 case OP_MAX:
1767 emitMINMAX(insn);
1768 break;
1769 case OP_ABS:
1770 case OP_NEG:
1771 case OP_CEIL:
1772 case OP_FLOOR:
1773 case OP_TRUNC:
1774 case OP_CVT:
1775 case OP_SAT:
1776 emitCVT(insn);
1777 break;
1778 case OP_RSQ:
1779 emitSFnOp(insn, 5 + 2 * insn->subOp);
1780 break;
1781 case OP_RCP:
1782 emitSFnOp(insn, 4 + 2 * insn->subOp);
1783 break;
1784 case OP_LG2:
1785 emitSFnOp(insn, 3);
1786 break;
1787 case OP_EX2:
1788 emitSFnOp(insn, 2);
1789 break;
1790 case OP_SIN:
1791 emitSFnOp(insn, 1);
1792 break;
1793 case OP_COS:
1794 emitSFnOp(insn, 0);
1795 break;
1796 case OP_PRESIN:
1797 case OP_PREEX2:
1798 emitPreOp(insn);
1799 break;
1800 case OP_TEX:
1801 case OP_TXB:
1802 case OP_TXL:
1803 case OP_TXD:
1804 case OP_TXF:
1805 case OP_TXG:
1806 case OP_TXLQ:
1807 emitTEX(insn->asTex());
1808 break;
1809 case OP_TXQ:
1810 emitTXQ(insn->asTex());
1811 break;
1812 case OP_TEXBAR:
1813 emitTEXBAR(insn);
1814 break;
1815 case OP_PIXLD:
1816 emitPIXLD(insn);
1817 break;
1818 case OP_BRA:
1819 case OP_CALL:
1820 case OP_PRERET:
1821 case OP_RET:
1822 case OP_DISCARD:
1823 case OP_EXIT:
1824 case OP_PRECONT:
1825 case OP_CONT:
1826 case OP_PREBREAK:
1827 case OP_BREAK:
1828 case OP_JOINAT:
1829 case OP_BRKPT:
1830 case OP_QUADON:
1831 case OP_QUADPOP:
1832 emitFlow(insn);
1833 break;
1834 case OP_QUADOP:
1835 emitQUADOP(insn, insn->subOp, insn->lanes);
1836 break;
1837 case OP_DFDX:
1838 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
1839 break;
1840 case OP_DFDY:
1841 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
1842 break;
1843 case OP_POPCNT:
1844 emitPOPC(insn);
1845 break;
1846 case OP_INSBF:
1847 emitINSBF(insn);
1848 break;
1849 case OP_EXTBF:
1850 emitEXTBF(insn);
1851 break;
1852 case OP_BFIND:
1853 emitBFIND(insn);
1854 break;
1855 case OP_JOIN:
1856 emitNOP(insn);
1857 insn->join = 1;
1858 break;
1859 case OP_PHI:
1860 case OP_UNION:
1861 case OP_CONSTRAINT:
1862 ERROR("operation should have been eliminated");
1863 return false;
1864 case OP_EXP:
1865 case OP_LOG:
1866 case OP_SQRT:
1867 case OP_POW:
1868 ERROR("operation should have been lowered\n");
1869 return false;
1870 default:
1871 ERROR("unknow op\n");
1872 return false;
1873 }
1874
1875 if (insn->join)
1876 code[0] |= 1 << 22;
1877
1878 code += 2;
1879 codeSize += 8;
1880 return true;
1881 }
1882
1883 uint32_t
1884 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
1885 {
1886 // No more short instruction encodings.
1887 return 8;
1888 }
1889
1890 void
1891 CodeEmitterGK110::prepareEmission(Function *func)
1892 {
1893 const Target *targ = func->getProgram()->getTarget();
1894
1895 CodeEmitter::prepareEmission(func);
1896
1897 if (targ->hasSWSched)
1898 calculateSchedDataNVC0(targ, func);
1899 }
1900
1901 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
1902 : CodeEmitter(target),
1903 targNVC0(target),
1904 writeIssueDelays(target->hasSWSched)
1905 {
1906 code = NULL;
1907 codeSize = codeSizeLimit = 0;
1908 relocInfo = NULL;
1909 }
1910
1911 CodeEmitter *
1912 TargetNVC0::createCodeEmitterGK110(Program::Type type)
1913 {
1914 CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
1915 emit->setProgramType(type);
1916 return emit;
1917 }
1918
1919 } // namespace nv50_ir