1121ae0912343fcf979109b7a4b9c8eda89cf32e
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gk110.cpp
1 /*
2 * Copyright 2012 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26
27 namespace nv50_ir {
28
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32 CodeEmitterGK110(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49 void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50 void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3);
51
52 void emitPredicate(const Instruction *);
53
54 void setCAddress14(const ValueRef&);
55 void setShortImmediate(const Instruction *, const int s);
56 void setImmediate32(const Instruction *, const int s, Modifier);
57 void setSUConst16(const Instruction *, const int s);
58
59 void modNegAbsF32_3b(const Instruction *, const int s);
60
61 void emitCondCode(CondCode cc, int pos, uint8_t mask);
62 void emitInterpMode(const Instruction *);
63 void emitLoadStoreType(DataType ty, const int pos);
64 void emitCachingMode(CacheMode c, const int pos);
65 void emitSUGType(DataType, const int pos);
66 void emitSUCachingMode(CacheMode c);
67
68 inline uint8_t getSRegEncoding(const ValueRef&);
69
70 void emitRoundMode(RoundMode, const int pos, const int rintPos);
71 void emitRoundModeF(RoundMode, const int pos);
72 void emitRoundModeI(RoundMode, const int pos);
73
74 void emitNegAbs12(const Instruction *);
75
76 void emitNOP(const Instruction *);
77
78 void emitLOAD(const Instruction *);
79 void emitSTORE(const Instruction *);
80 void emitMOV(const Instruction *);
81 void emitATOM(const Instruction *);
82 void emitCCTL(const Instruction *);
83
84 void emitINTERP(const Instruction *);
85 void emitAFETCH(const Instruction *);
86 void emitPFETCH(const Instruction *);
87 void emitVFETCH(const Instruction *);
88 void emitEXPORT(const Instruction *);
89 void emitOUT(const Instruction *);
90
91 void emitUADD(const Instruction *);
92 void emitFADD(const Instruction *);
93 void emitDADD(const Instruction *);
94 void emitIMUL(const Instruction *);
95 void emitFMUL(const Instruction *);
96 void emitDMUL(const Instruction *);
97 void emitIMAD(const Instruction *);
98 void emitISAD(const Instruction *);
99 void emitSHLADD(const Instruction *);
100 void emitFMAD(const Instruction *);
101 void emitDMAD(const Instruction *);
102 void emitMADSP(const Instruction *i);
103
104 void emitNOT(const Instruction *);
105 void emitLogicOp(const Instruction *, uint8_t subOp);
106 void emitPOPC(const Instruction *);
107 void emitINSBF(const Instruction *);
108 void emitEXTBF(const Instruction *);
109 void emitBFIND(const Instruction *);
110 void emitPERMT(const Instruction *);
111 void emitShift(const Instruction *);
112 void emitShift64(const Instruction *);
113
114 void emitSFnOp(const Instruction *, uint8_t subOp);
115
116 void emitCVT(const Instruction *);
117 void emitMINMAX(const Instruction *);
118 void emitPreOp(const Instruction *);
119
120 void emitSET(const CmpInstruction *);
121 void emitSLCT(const CmpInstruction *);
122 void emitSELP(const Instruction *);
123
124 void emitTEXBAR(const Instruction *);
125 void emitTEX(const TexInstruction *);
126 void emitTEXCSAA(const TexInstruction *);
127 void emitTXQ(const TexInstruction *);
128
129 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
130
131 void emitPIXLD(const Instruction *);
132
133 void emitBAR(const Instruction *);
134 void emitMEMBAR(const Instruction *);
135
136 void emitFlow(const Instruction *);
137
138 void emitVOTE(const Instruction *);
139
140 void emitSULDGB(const TexInstruction *);
141 void emitSUSTGx(const TexInstruction *);
142 void emitSUCLAMPMode(uint16_t);
143 void emitSUCalc(Instruction *);
144
145 void emitVSHL(const Instruction *);
146 void emitVectorSubOp(const Instruction *);
147
148 inline void defId(const ValueDef&, const int pos);
149 inline void srcId(const ValueRef&, const int pos);
150 inline void srcId(const ValueRef *, const int pos);
151 inline void srcId(const Instruction *, int s, const int pos);
152
153 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
154
155 inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
156 };
157
158 #define GK110_GPR_ZERO 255
159
160 #define NEG_(b, s) \
161 if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
162 #define ABS_(b, s) \
163 if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
164
165 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \
166 code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
167
168 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
169 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
170
171 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
172
173 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
174
175 #define SDATA(a) ((a).rep()->reg.data)
176 #define DDATA(a) ((a).rep()->reg.data)
177
178 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
179 {
180 code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
181 }
182
183 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
184 {
185 code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
186 }
187
188 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
189 {
190 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
191 code[pos / 32] |= r << (pos % 32);
192 }
193
194 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
195 {
196 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
197 }
198
199 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
200 {
201 code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
202 }
203
204 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
205 {
206 const ImmediateValue *imm = ref.get()->asImm();
207
208 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
209 }
210
211 void
212 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
213 {
214 bool rint = false;
215 uint8_t n;
216
217 switch (rnd) {
218 case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
219 case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
220 case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
221 default:
222 rint = rnd == ROUND_NI;
223 n = 0;
224 assert(rnd == ROUND_N || rnd == ROUND_NI);
225 break;
226 }
227 code[pos / 32] |= n << (pos % 32);
228 if (rint && rintPos >= 0)
229 code[rintPos / 32] |= 1 << (rintPos % 32);
230 }
231
232 void
233 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
234 {
235 uint8_t n;
236
237 switch (rnd) {
238 case ROUND_M: n = 1; break;
239 case ROUND_P: n = 2; break;
240 case ROUND_Z: n = 3; break;
241 default:
242 n = 0;
243 assert(rnd == ROUND_N);
244 break;
245 }
246 code[pos / 32] |= n << (pos % 32);
247 }
248
249 void
250 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
251 {
252 uint8_t n;
253
254 switch (rnd) {
255 case ROUND_MI: n = 1; break;
256 case ROUND_PI: n = 2; break;
257 case ROUND_ZI: n = 3; break;
258 default:
259 n = 0;
260 assert(rnd == ROUND_NI);
261 break;
262 }
263 code[pos / 32] |= n << (pos % 32);
264 }
265
266 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
267 {
268 uint8_t n;
269
270 switch (cc) {
271 case CC_FL: n = 0x00; break;
272 case CC_LT: n = 0x01; break;
273 case CC_EQ: n = 0x02; break;
274 case CC_LE: n = 0x03; break;
275 case CC_GT: n = 0x04; break;
276 case CC_NE: n = 0x05; break;
277 case CC_GE: n = 0x06; break;
278 case CC_LTU: n = 0x09; break;
279 case CC_EQU: n = 0x0a; break;
280 case CC_LEU: n = 0x0b; break;
281 case CC_GTU: n = 0x0c; break;
282 case CC_NEU: n = 0x0d; break;
283 case CC_GEU: n = 0x0e; break;
284 case CC_TR: n = 0x0f; break;
285 case CC_NO: n = 0x10; break;
286 case CC_NC: n = 0x11; break;
287 case CC_NS: n = 0x12; break;
288 case CC_NA: n = 0x13; break;
289 case CC_A: n = 0x14; break;
290 case CC_S: n = 0x15; break;
291 case CC_C: n = 0x16; break;
292 case CC_O: n = 0x17; break;
293 default:
294 n = 0;
295 assert(!"invalid condition code");
296 break;
297 }
298 code[pos / 32] |= (n & mask) << (pos % 32);
299 }
300
301 void
302 CodeEmitterGK110::emitPredicate(const Instruction *i)
303 {
304 if (i->predSrc >= 0) {
305 srcId(i->src(i->predSrc), 18);
306 if (i->cc == CC_NOT_P)
307 code[0] |= 8 << 18; // negate
308 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
309 } else {
310 code[0] |= 7 << 18;
311 }
312 }
313
314 void
315 CodeEmitterGK110::setCAddress14(const ValueRef& src)
316 {
317 const Storage& res = src.get()->asSym()->reg;
318 const int32_t addr = res.data.offset / 4;
319
320 code[0] |= (addr & 0x01ff) << 23;
321 code[1] |= (addr & 0x3e00) >> 9;
322 code[1] |= res.fileIndex << 5;
323 }
324
325 void
326 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
327 {
328 const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
329 const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
330
331 if (i->sType == TYPE_F32) {
332 assert(!(u32 & 0x00000fff));
333 code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
334 code[1] |= ((u32 & 0x7fe00000) >> 21);
335 code[1] |= ((u32 & 0x80000000) >> 4);
336 } else
337 if (i->sType == TYPE_F64) {
338 assert(!(u64 & 0x00000fffffffffffULL));
339 code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
340 code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
341 code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
342 } else {
343 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
344 code[0] |= (u32 & 0x001ff) << 23;
345 code[1] |= (u32 & 0x7fe00) >> 9;
346 code[1] |= (u32 & 0x80000) << 8;
347 }
348 }
349
350 void
351 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
352 Modifier mod)
353 {
354 uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
355
356 if (mod) {
357 ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
358 mod.applyTo(imm);
359 u32 = imm.reg.data.u32;
360 }
361
362 code[0] |= u32 << 23;
363 code[1] |= u32 >> 9;
364 }
365
366 void
367 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
368 Modifier mod, int sCount)
369 {
370 code[0] = ctg;
371 code[1] = opc << 20;
372
373 emitPredicate(i);
374
375 defId(i->def(0), 2);
376
377 for (int s = 0; s < sCount && i->srcExists(s); ++s) {
378 switch (i->src(s).getFile()) {
379 case FILE_GPR:
380 srcId(i->src(s), s ? 42 : 10);
381 break;
382 case FILE_IMMEDIATE:
383 setImmediate32(i, s, mod);
384 break;
385 default:
386 break;
387 }
388 }
389 }
390
391
392 void
393 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
394 {
395 code[0] = ctg;
396 code[1] = opc << 20;
397
398 emitPredicate(i);
399
400 defId(i->def(0), 2);
401
402 switch (i->src(0).getFile()) {
403 case FILE_MEMORY_CONST:
404 code[1] |= 0x4 << 28;
405 setCAddress14(i->src(0));
406 break;
407 case FILE_GPR:
408 code[1] |= 0xc << 28;
409 srcId(i->src(0), 23);
410 break;
411 default:
412 assert(0);
413 break;
414 }
415 }
416
417 // 0x2 for GPR, c[] and 0x1 for short immediate
418 void
419 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
420 uint32_t opc1)
421 {
422 const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
423
424 int s1 = 23;
425 if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
426 s1 = 42;
427
428 if (imm) {
429 code[0] = 0x1;
430 code[1] = opc1 << 20;
431 } else {
432 code[0] = 0x2;
433 code[1] = (0xc << 28) | (opc2 << 20);
434 }
435
436 emitPredicate(i);
437
438 defId(i->def(0), 2);
439
440 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
441 switch (i->src(s).getFile()) {
442 case FILE_MEMORY_CONST:
443 code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
444 setCAddress14(i->src(s));
445 break;
446 case FILE_IMMEDIATE:
447 setShortImmediate(i, s);
448 break;
449 case FILE_GPR:
450 srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
451 break;
452 default:
453 if (i->op == OP_SELP) {
454 assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
455 srcId(i->src(s), 42);
456 }
457 // ignore here, can be predicate or flags, but must not be address
458 break;
459 }
460 }
461 // 0x0 = invalid
462 // 0xc = rrr
463 // 0x8 = rrc
464 // 0x4 = rcr
465 assert(imm || (code[1] & (0xc << 28)));
466 }
467
468 inline void
469 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
470 {
471 if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
472 if (i->src(s).mod.neg()) code[1] ^= (1 << 27);
473 }
474
475 void
476 CodeEmitterGK110::emitNOP(const Instruction *i)
477 {
478 code[0] = 0x00003c02;
479 code[1] = 0x85800000;
480
481 if (i)
482 emitPredicate(i);
483 else
484 code[0] = 0x001c3c02;
485 }
486
487 void
488 CodeEmitterGK110::emitFMAD(const Instruction *i)
489 {
490 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
491
492 if (isLIMM(i->src(1), TYPE_F32)) {
493 assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);
494
495 // last source is dst, so force 2 sources
496 emitForm_L(i, 0x600, 0x0, 0, 2);
497
498 if (i->flagsDef >= 0)
499 code[1] |= 1 << 23;
500
501 SAT_(3a);
502 NEG_(3c, 2);
503
504 if (neg1) {
505 code[1] |= 1 << 27;
506 }
507 } else {
508 emitForm_21(i, 0x0c0, 0x940);
509
510 NEG_(34, 2);
511 SAT_(35);
512 RND_(36, F);
513
514 if (code[0] & 0x1) {
515 if (neg1)
516 code[1] ^= 1 << 27;
517 } else
518 if (neg1) {
519 code[1] |= 1 << 19;
520 }
521 }
522
523 FTZ_(38);
524 DNZ_(39);
525 }
526
527 void
528 CodeEmitterGK110::emitDMAD(const Instruction *i)
529 {
530 assert(!i->saturate);
531 assert(!i->ftz);
532
533 emitForm_21(i, 0x1b8, 0xb38);
534
535 NEG_(34, 2);
536 RND_(36, F);
537
538 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
539
540 if (code[0] & 0x1) {
541 if (neg1)
542 code[1] ^= 1 << 27;
543 } else
544 if (neg1) {
545 code[1] |= 1 << 19;
546 }
547 }
548
549 void
550 CodeEmitterGK110::emitMADSP(const Instruction *i)
551 {
552 emitForm_21(i, 0x140, 0xa40);
553
554 if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
555 code[1] |= 0x00c00000;
556 } else {
557 code[1] |= (i->subOp & 0x00f) << 19; // imadp1
558 code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
559 code[1] |= (i->subOp & 0x100) << 11; // imadp3
560 code[1] |= (i->subOp & 0x200) << 15; // imadp3
561 code[1] |= (i->subOp & 0xc00) << 12; // imadp3
562 }
563
564 if (i->flagsDef >= 0)
565 code[1] |= 1 << 18;
566 }
567
568 void
569 CodeEmitterGK110::emitFMUL(const Instruction *i)
570 {
571 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
572
573 assert(i->postFactor >= -3 && i->postFactor <= 3);
574
575 if (isLIMM(i->src(1), TYPE_F32)) {
576 emitForm_L(i, 0x200, 0x2, Modifier(0));
577
578 FTZ_(38);
579 DNZ_(39);
580 SAT_(3a);
581 if (neg)
582 code[1] ^= 1 << 22;
583
584 assert(i->postFactor == 0);
585 } else {
586 emitForm_21(i, 0x234, 0xc34);
587 code[1] |= ((i->postFactor > 0) ?
588 (7 - i->postFactor) : (0 - i->postFactor)) << 12;
589
590 RND_(2a, F);
591 FTZ_(2f);
592 DNZ_(30);
593 SAT_(35);
594
595 if (code[0] & 0x1) {
596 if (neg)
597 code[1] ^= 1 << 27;
598 } else
599 if (neg) {
600 code[1] |= 1 << 19;
601 }
602 }
603 }
604
605 void
606 CodeEmitterGK110::emitDMUL(const Instruction *i)
607 {
608 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
609
610 assert(!i->postFactor);
611 assert(!i->saturate);
612 assert(!i->ftz);
613 assert(!i->dnz);
614
615 emitForm_21(i, 0x240, 0xc40);
616
617 RND_(2a, F);
618
619 if (code[0] & 0x1) {
620 if (neg)
621 code[1] ^= 1 << 27;
622 } else
623 if (neg) {
624 code[1] |= 1 << 19;
625 }
626 }
627
628 void
629 CodeEmitterGK110::emitIMUL(const Instruction *i)
630 {
631 assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
632 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
633
634 if (i->src(1).getFile() == FILE_IMMEDIATE) {
635 emitForm_L(i, 0x280, 2, Modifier(0));
636
637 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
638 code[1] |= 1 << 24;
639 if (i->sType == TYPE_S32)
640 code[1] |= 3 << 25;
641 } else {
642 emitForm_21(i, 0x21c, 0xc1c);
643
644 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
645 code[1] |= 1 << 10;
646 if (i->sType == TYPE_S32)
647 code[1] |= 3 << 11;
648 }
649 }
650
651 void
652 CodeEmitterGK110::emitFADD(const Instruction *i)
653 {
654 if (isLIMM(i->src(1), TYPE_F32)) {
655 assert(i->rnd == ROUND_N);
656 assert(!i->saturate);
657
658 Modifier mod = i->src(1).mod ^
659 Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
660
661 emitForm_L(i, 0x400, 0, mod);
662
663 FTZ_(3a);
664 NEG_(3b, 0);
665 ABS_(39, 0);
666 } else {
667 emitForm_21(i, 0x22c, 0xc2c);
668
669 FTZ_(2f);
670 RND_(2a, F);
671 ABS_(31, 0);
672 NEG_(33, 0);
673 SAT_(35);
674
675 if (code[0] & 0x1) {
676 modNegAbsF32_3b(i, 1);
677 if (i->op == OP_SUB) code[1] ^= 1 << 27;
678 } else {
679 ABS_(34, 1);
680 NEG_(30, 1);
681 if (i->op == OP_SUB) code[1] ^= 1 << 16;
682 }
683 }
684 }
685
686 void
687 CodeEmitterGK110::emitDADD(const Instruction *i)
688 {
689 assert(!i->saturate);
690 assert(!i->ftz);
691
692 emitForm_21(i, 0x238, 0xc38);
693 RND_(2a, F);
694 ABS_(31, 0);
695 NEG_(33, 0);
696 if (code[0] & 0x1) {
697 modNegAbsF32_3b(i, 1);
698 if (i->op == OP_SUB) code[1] ^= 1 << 27;
699 } else {
700 NEG_(30, 1);
701 ABS_(34, 1);
702 if (i->op == OP_SUB) code[1] ^= 1 << 16;
703 }
704 }
705
706 void
707 CodeEmitterGK110::emitUADD(const Instruction *i)
708 {
709 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
710
711 if (i->op == OP_SUB)
712 addOp ^= 1;
713
714 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
715
716 if (isLIMM(i->src(1), TYPE_S32)) {
717 emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
718
719 if (addOp & 2)
720 code[1] |= 1 << 27;
721
722 assert(i->flagsDef < 0);
723 assert(i->flagsSrc < 0);
724
725 SAT_(39);
726 } else {
727 emitForm_21(i, 0x208, 0xc08);
728
729 assert(addOp != 3); // would be add-plus-one
730
731 code[1] |= addOp << 19;
732
733 if (i->flagsDef >= 0)
734 code[1] |= 1 << 18; // write carry
735 if (i->flagsSrc >= 0)
736 code[1] |= 1 << 14; // add carry
737
738 SAT_(35);
739 }
740 }
741
742 void
743 CodeEmitterGK110::emitIMAD(const Instruction *i)
744 {
745 uint8_t addOp =
746 i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
747
748 emitForm_21(i, 0x100, 0xa00);
749
750 assert(addOp != 3);
751 code[1] |= addOp << 26;
752
753 if (i->sType == TYPE_S32)
754 code[1] |= (1 << 19) | (1 << 24);
755
756 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
757 code[1] |= 1 << 25;
758
759 if (i->flagsDef >= 0) code[1] |= 1 << 18;
760 if (i->flagsSrc >= 0) code[1] |= 1 << 20;
761
762 SAT_(35);
763 }
764
765 void
766 CodeEmitterGK110::emitISAD(const Instruction *i)
767 {
768 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
769
770 emitForm_21(i, 0x1f4, 0xb74);
771
772 if (i->dType == TYPE_S32)
773 code[1] |= 1 << 19;
774 }
775
776 void
777 CodeEmitterGK110::emitSHLADD(const Instruction *i)
778 {
779 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
780 const ImmediateValue *imm = i->src(1).get()->asImm();
781 assert(imm);
782
783 if (i->src(2).getFile() == FILE_IMMEDIATE) {
784 code[0] = 0x1;
785 code[1] = 0xc0c << 20;
786 } else {
787 code[0] = 0x2;
788 code[1] = 0x20c << 20;
789 }
790 code[1] |= addOp << 19;
791
792 emitPredicate(i);
793
794 defId(i->def(0), 2);
795 srcId(i->src(0), 10);
796
797 if (i->flagsDef >= 0)
798 code[1] |= 1 << 18;
799
800 assert(!(imm->reg.data.u32 & 0xffffffe0));
801 code[1] |= imm->reg.data.u32 << 10;
802
803 switch (i->src(2).getFile()) {
804 case FILE_GPR:
805 assert(code[0] & 0x2);
806 code[1] |= 0xc << 28;
807 srcId(i->src(2), 23);
808 break;
809 case FILE_MEMORY_CONST:
810 assert(code[0] & 0x2);
811 code[1] |= 0x4 << 28;
812 setCAddress14(i->src(2));
813 break;
814 case FILE_IMMEDIATE:
815 assert(code[0] & 0x1);
816 setShortImmediate(i, 2);
817 break;
818 default:
819 assert(!"bad src2 file");
820 break;
821 }
822 }
823
824 void
825 CodeEmitterGK110::emitNOT(const Instruction *i)
826 {
827 code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
828 code[1] = 0x22003800;
829
830 emitPredicate(i);
831
832 defId(i->def(0), 2);
833
834 switch (i->src(0).getFile()) {
835 case FILE_GPR:
836 code[1] |= 0xc << 28;
837 srcId(i->src(0), 23);
838 break;
839 case FILE_MEMORY_CONST:
840 code[1] |= 0x4 << 28;
841 setCAddress14(i->src(0));
842 break;
843 default:
844 assert(0);
845 break;
846 }
847 }
848
849 void
850 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
851 {
852 if (i->def(0).getFile() == FILE_PREDICATE) {
853 code[0] = 0x00000002 | (subOp << 27);
854 code[1] = 0x84800000;
855
856 emitPredicate(i);
857
858 defId(i->def(0), 5);
859 srcId(i->src(0), 14);
860 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
861 srcId(i->src(1), 32);
862 if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
863
864 if (i->defExists(1)) {
865 defId(i->def(1), 2);
866 } else {
867 code[0] |= 7 << 2;
868 }
869 // (a OP b) OP c
870 if (i->predSrc != 2 && i->srcExists(2)) {
871 code[1] |= subOp << 16;
872 srcId(i->src(2), 42);
873 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
874 } else {
875 code[1] |= 7 << 10;
876 }
877 } else
878 if (isLIMM(i->src(1), TYPE_S32)) {
879 emitForm_L(i, 0x200, 0, i->src(1).mod);
880 code[1] |= subOp << 24;
881 NOT_(3a, 0);
882 } else {
883 emitForm_21(i, 0x220, 0xc20);
884 code[1] |= subOp << 12;
885 NOT_(2a, 0);
886 NOT_(2b, 1);
887 }
888 }
889
890 void
891 CodeEmitterGK110::emitPOPC(const Instruction *i)
892 {
893 assert(!isLIMM(i->src(1), TYPE_S32, true));
894
895 emitForm_21(i, 0x204, 0xc04);
896
897 NOT_(2a, 0);
898 if (!(code[0] & 0x1))
899 NOT_(2b, 1);
900 }
901
902 void
903 CodeEmitterGK110::emitINSBF(const Instruction *i)
904 {
905 emitForm_21(i, 0x1f8, 0xb78);
906 }
907
908 void
909 CodeEmitterGK110::emitEXTBF(const Instruction *i)
910 {
911 emitForm_21(i, 0x600, 0xc00);
912
913 if (i->dType == TYPE_S32)
914 code[1] |= 0x80000;
915 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
916 code[1] |= 0x800;
917 }
918
919 void
920 CodeEmitterGK110::emitBFIND(const Instruction *i)
921 {
922 emitForm_C(i, 0x218, 0x2);
923
924 if (i->dType == TYPE_S32)
925 code[1] |= 0x80000;
926 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
927 code[1] |= 0x800;
928 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
929 code[1] |= 0x1000;
930 }
931
932 void
933 CodeEmitterGK110::emitPERMT(const Instruction *i)
934 {
935 emitForm_21(i, 0x1e0, 0xb60);
936
937 code[1] |= i->subOp << 19;
938 }
939
940 void
941 CodeEmitterGK110::emitShift(const Instruction *i)
942 {
943 if (i->op == OP_SHR) {
944 emitForm_21(i, 0x214, 0xc14);
945 if (isSignedType(i->dType))
946 code[1] |= 1 << 19;
947 } else {
948 emitForm_21(i, 0x224, 0xc24);
949 }
950
951 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
952 code[1] |= 1 << 10;
953 }
954
955 void
956 CodeEmitterGK110::emitShift64(const Instruction *i)
957 {
958 if (i->op == OP_SHR) {
959 emitForm_21(i, 0x27c, 0xc7c);
960 if (isSignedType(i->sType))
961 code[1] |= 0x100;
962 if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH)
963 code[1] |= 1 << 19;
964 } else {
965 emitForm_21(i, 0xdfc, 0xf7c);
966 }
967 code[1] |= 0x200;
968
969 if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
970 code[1] |= 1 << 21;
971 }
972
973 void
974 CodeEmitterGK110::emitPreOp(const Instruction *i)
975 {
976 emitForm_C(i, 0x248, 0x2);
977
978 if (i->op == OP_PREEX2)
979 code[1] |= 1 << 10;
980
981 NEG_(30, 0);
982 ABS_(34, 0);
983 }
984
985 void
986 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
987 {
988 code[0] = 0x00000002 | (subOp << 23);
989 code[1] = 0x84000000;
990
991 emitPredicate(i);
992
993 defId(i->def(0), 2);
994 srcId(i->src(0), 10);
995
996 NEG_(33, 0);
997 ABS_(31, 0);
998 SAT_(35);
999 }
1000
1001 void
1002 CodeEmitterGK110::emitMINMAX(const Instruction *i)
1003 {
1004 uint32_t op2, op1;
1005
1006 switch (i->dType) {
1007 case TYPE_U32:
1008 case TYPE_S32:
1009 op2 = 0x210;
1010 op1 = 0xc10;
1011 break;
1012 case TYPE_F32:
1013 op2 = 0x230;
1014 op1 = 0xc30;
1015 break;
1016 case TYPE_F64:
1017 op2 = 0x228;
1018 op1 = 0xc28;
1019 break;
1020 default:
1021 assert(0);
1022 op2 = 0;
1023 op1 = 0;
1024 break;
1025 }
1026 emitForm_21(i, op2, op1);
1027
1028 if (i->dType == TYPE_S32)
1029 code[1] |= 1 << 19;
1030 code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
1031 code[1] |= i->subOp << 14;
1032 if (i->flagsDef >= 0)
1033 code[1] |= i->subOp << 18;
1034
1035 FTZ_(2f);
1036 ABS_(31, 0);
1037 NEG_(33, 0);
1038 if (code[0] & 0x1) {
1039 modNegAbsF32_3b(i, 1);
1040 } else {
1041 ABS_(34, 1);
1042 NEG_(30, 1);
1043 }
1044 }
1045
1046 void
1047 CodeEmitterGK110::emitCVT(const Instruction *i)
1048 {
1049 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1050 const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
1051 const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
1052
1053 bool sat = i->saturate;
1054 bool abs = i->src(0).mod.abs();
1055 bool neg = i->src(0).mod.neg();
1056
1057 RoundMode rnd = i->rnd;
1058
1059 switch (i->op) {
1060 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1061 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1062 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1063 case OP_SAT: sat = true; break;
1064 case OP_NEG: neg = !neg; break;
1065 case OP_ABS: abs = true; neg = false; break;
1066 default:
1067 break;
1068 }
1069
1070 DataType dType;
1071
1072 if (i->op == OP_NEG && i->dType == TYPE_U32)
1073 dType = TYPE_S32;
1074 else
1075 dType = i->dType;
1076
1077
1078 uint32_t op;
1079
1080 if (f2f) op = 0x254;
1081 else if (f2i) op = 0x258;
1082 else if (i2f) op = 0x25c;
1083 else op = 0x260;
1084
1085 emitForm_C(i, op, 0x2);
1086
1087 FTZ_(2f);
1088 if (neg) code[1] |= 1 << 16;
1089 if (abs) code[1] |= 1 << 20;
1090 if (sat) code[1] |= 1 << 21;
1091
1092 emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
1093
1094 code[0] |= typeSizeofLog2(dType) << 10;
1095 code[0] |= typeSizeofLog2(i->sType) << 12;
1096 code[1] |= i->subOp << 12;
1097
1098 if (isSignedIntType(dType))
1099 code[0] |= 0x4000;
1100 if (isSignedIntType(i->sType))
1101 code[0] |= 0x8000;
1102 }
1103
1104 void
1105 CodeEmitterGK110::emitSET(const CmpInstruction *i)
1106 {
1107 uint16_t op1, op2;
1108
1109 if (i->def(0).getFile() == FILE_PREDICATE) {
1110 switch (i->sType) {
1111 case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
1112 case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
1113 default:
1114 op2 = 0x1b0;
1115 op1 = 0xb30;
1116 break;
1117 }
1118 emitForm_21(i, op2, op1);
1119
1120 NEG_(2e, 0);
1121 ABS_(9, 0);
1122 if (!(code[0] & 0x1)) {
1123 NEG_(8, 1);
1124 ABS_(2f, 1);
1125 } else {
1126 modNegAbsF32_3b(i, 1);
1127 }
1128 FTZ_(32);
1129
1130 // normal DST field is negated predicate result
1131 code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
1132 if (i->defExists(1))
1133 defId(i->def(1), 2);
1134 else
1135 code[0] |= 0x1c;
1136 } else {
1137 switch (i->sType) {
1138 case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
1139 case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
1140 default:
1141 op2 = 0x1a8;
1142 op1 = 0xb28;
1143 break;
1144 }
1145 emitForm_21(i, op2, op1);
1146
1147 NEG_(2e, 0);
1148 ABS_(39, 0);
1149 if (!(code[0] & 0x1)) {
1150 NEG_(38, 1);
1151 ABS_(2f, 1);
1152 } else {
1153 modNegAbsF32_3b(i, 1);
1154 }
1155 FTZ_(3a);
1156
1157 if (i->dType == TYPE_F32) {
1158 if (isFloatType(i->sType))
1159 code[1] |= 1 << 23;
1160 else
1161 code[1] |= 1 << 15;
1162 }
1163 }
1164 if (i->sType == TYPE_S32)
1165 code[1] |= 1 << 19;
1166
1167 if (i->op != OP_SET) {
1168 switch (i->op) {
1169 case OP_SET_AND: code[1] |= 0x0 << 16; break;
1170 case OP_SET_OR: code[1] |= 0x1 << 16; break;
1171 case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1172 default:
1173 assert(0);
1174 break;
1175 }
1176 srcId(i->src(2), 0x2a);
1177 } else {
1178 code[1] |= 0x7 << 10;
1179 }
1180 if (i->flagsSrc >= 0)
1181 code[1] |= 1 << 14;
1182 emitCondCode(i->setCond,
1183 isFloatType(i->sType) ? 0x33 : 0x34,
1184 isFloatType(i->sType) ? 0xf : 0x7);
1185 }
1186
1187 void
1188 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1189 {
1190 CondCode cc = i->setCond;
1191 if (i->src(2).mod.neg())
1192 cc = reverseCondCode(cc);
1193
1194 if (i->dType == TYPE_F32) {
1195 emitForm_21(i, 0x1d0, 0xb50);
1196 FTZ_(32);
1197 emitCondCode(cc, 0x33, 0xf);
1198 } else {
1199 emitForm_21(i, 0x1a0, 0xb20);
1200 emitCondCode(cc, 0x34, 0x7);
1201 if (i->dType == TYPE_S32)
1202 code[1] |= 1 << 19;
1203 }
1204 }
1205
1206 static void
1207 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1208 {
1209 int loc = entry->loc;
1210 if (data.force_persample_interp)
1211 code[loc + 1] |= 1 << 13;
1212 else
1213 code[loc + 1] &= ~(1 << 13);
1214 }
1215
1216 void CodeEmitterGK110::emitSELP(const Instruction *i)
1217 {
1218 emitForm_21(i, 0x250, 0x050);
1219
1220 if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1221 code[1] |= 1 << 13;
1222
1223 if (i->subOp == 1) {
1224 addInterp(0, 0, selpFlip);
1225 }
1226 }
1227
1228 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1229 {
1230 code[0] = 0x0000003e | (i->subOp << 23);
1231 code[1] = 0x77000000;
1232
1233 emitPredicate(i);
1234 }
1235
1236 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1237 {
1238 code[0] = 0x00000002;
1239 code[1] = 0x76c00000;
1240
1241 code[1] |= i->tex.r << 9;
1242 // code[1] |= i->tex.s << (9 + 8);
1243
1244 if (i->tex.liveOnly)
1245 code[0] |= 0x80000000;
1246
1247 defId(i->def(0), 2);
1248 srcId(i->src(0), 10);
1249 }
1250
1251 static inline bool
1252 isNextIndependentTex(const TexInstruction *i)
1253 {
1254 if (!i->next || !isTextureOp(i->next->op))
1255 return false;
1256 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1257 return false;
1258 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1259 }
1260
1261 void
1262 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1263 {
1264 const bool ind = i->tex.rIndirectSrc >= 0;
1265
1266 if (ind) {
1267 code[0] = 0x00000002;
1268 switch (i->op) {
1269 case OP_TXD:
1270 code[1] = 0x7e000000;
1271 break;
1272 case OP_TXLQ:
1273 code[1] = 0x7e800000;
1274 break;
1275 case OP_TXF:
1276 code[1] = 0x78000000;
1277 break;
1278 case OP_TXG:
1279 code[1] = 0x7dc00000;
1280 break;
1281 default:
1282 code[1] = 0x7d800000;
1283 break;
1284 }
1285 } else {
1286 switch (i->op) {
1287 case OP_TXD:
1288 code[0] = 0x00000002;
1289 code[1] = 0x76000000;
1290 code[1] |= i->tex.r << 9;
1291 break;
1292 case OP_TXLQ:
1293 code[0] = 0x00000002;
1294 code[1] = 0x76800000;
1295 code[1] |= i->tex.r << 9;
1296 break;
1297 case OP_TXF:
1298 code[0] = 0x00000002;
1299 code[1] = 0x70000000;
1300 code[1] |= i->tex.r << 13;
1301 break;
1302 case OP_TXG:
1303 code[0] = 0x00000001;
1304 code[1] = 0x70000000;
1305 code[1] |= i->tex.r << 15;
1306 break;
1307 default:
1308 code[0] = 0x00000001;
1309 code[1] = 0x60000000;
1310 code[1] |= i->tex.r << 15;
1311 break;
1312 }
1313 }
1314
1315 code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1316
1317 if (i->tex.liveOnly)
1318 code[0] |= 0x80000000;
1319
1320 switch (i->op) {
1321 case OP_TEX: break;
1322 case OP_TXB: code[1] |= 0x2000; break;
1323 case OP_TXL: code[1] |= 0x3000; break;
1324 case OP_TXF: break;
1325 case OP_TXG: break;
1326 case OP_TXD: break;
1327 case OP_TXLQ: break;
1328 default:
1329 assert(!"invalid texture op");
1330 break;
1331 }
1332
1333 if (i->op == OP_TXF) {
1334 if (!i->tex.levelZero)
1335 code[1] |= 0x1000;
1336 } else
1337 if (i->tex.levelZero) {
1338 code[1] |= 0x1000;
1339 }
1340
1341 if (i->op != OP_TXD && i->tex.derivAll)
1342 code[1] |= 0x200;
1343
1344 emitPredicate(i);
1345
1346 code[1] |= i->tex.mask << 2;
1347
1348 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1349
1350 defId(i->def(0), 2);
1351 srcId(i->src(0), 10);
1352 srcId(i, src1, 23);
1353
1354 if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1355
1356 // texture target:
1357 code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1358 if (i->tex.target.isArray())
1359 code[1] |= 0x40;
1360 if (i->tex.target.isShadow())
1361 code[1] |= 0x400;
1362 if (i->tex.target == TEX_TARGET_2D_MS ||
1363 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1364 code[1] |= 0x800;
1365
1366 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1367 // ?
1368 }
1369
1370 if (i->tex.useOffsets == 1) {
1371 switch (i->op) {
1372 case OP_TXF: code[1] |= 0x200; break;
1373 case OP_TXD: code[1] |= 0x00400000; break;
1374 default: code[1] |= 0x800; break;
1375 }
1376 }
1377 if (i->tex.useOffsets == 4)
1378 code[1] |= 0x1000;
1379 }
1380
1381 void
1382 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1383 {
1384 code[0] = 0x00000002;
1385 code[1] = 0x75400001;
1386
1387 switch (i->tex.query) {
1388 case TXQ_DIMS: code[0] |= 0x01 << 25; break;
1389 case TXQ_TYPE: code[0] |= 0x02 << 25; break;
1390 case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1391 case TXQ_FILTER: code[0] |= 0x10 << 25; break;
1392 case TXQ_LOD: code[0] |= 0x12 << 25; break;
1393 case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;
1394 default:
1395 assert(!"invalid texture query");
1396 break;
1397 }
1398
1399 code[1] |= i->tex.mask << 2;
1400 code[1] |= i->tex.r << 9;
1401 if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1402 code[1] |= 0x08000000;
1403
1404 defId(i->def(0), 2);
1405 srcId(i->src(0), 10);
1406
1407 emitPredicate(i);
1408 }
1409
1410 void
1411 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1412 {
1413 code[0] = 0x00000002 | ((qOp & 1) << 31);
1414 code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
1415
1416 defId(i->def(0), 2);
1417 srcId(i->src(0), 10);
1418 srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
1419
1420 emitPredicate(i);
1421 }
1422
1423 void
1424 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1425 {
1426 emitForm_L(i, 0x7f4, 2, Modifier(0));
1427 code[1] |= i->subOp << 2;
1428 code[1] |= 0x00070000;
1429 }
1430
1431 void
1432 CodeEmitterGK110::emitBAR(const Instruction *i)
1433 {
1434 code[0] = 0x00000002;
1435 code[1] = 0x85400000;
1436
1437 switch (i->subOp) {
1438 case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break;
1439 case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break;
1440 case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break;
1441 case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1442 default:
1443 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1444 break;
1445 }
1446
1447 emitPredicate(i);
1448
1449 // barrier id
1450 if (i->src(0).getFile() == FILE_GPR) {
1451 srcId(i->src(0), 10);
1452 } else {
1453 ImmediateValue *imm = i->getSrc(0)->asImm();
1454 assert(imm);
1455 code[0] |= imm->reg.data.u32 << 10;
1456 code[1] |= 0x8000;
1457 }
1458
1459 // thread count
1460 if (i->src(1).getFile() == FILE_GPR) {
1461 srcId(i->src(1), 23);
1462 } else {
1463 ImmediateValue *imm = i->getSrc(0)->asImm();
1464 assert(imm);
1465 assert(imm->reg.data.u32 <= 0xfff);
1466 code[0] |= imm->reg.data.u32 << 23;
1467 code[1] |= imm->reg.data.u32 >> 9;
1468 code[1] |= 0x4000;
1469 }
1470
1471 if (i->srcExists(2) && (i->predSrc != 2)) {
1472 srcId(i->src(2), 32 + 10);
1473 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1474 code[1] |= 1 << 13;
1475 } else {
1476 code[1] |= 7 << 10;
1477 }
1478 }
1479
1480 void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1481 {
1482 code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1483 code[1] = 0x7cc00000;
1484
1485 emitPredicate(i);
1486 }
1487
1488 void
1489 CodeEmitterGK110::emitFlow(const Instruction *i)
1490 {
1491 const FlowInstruction *f = i->asFlow();
1492
1493 unsigned mask; // bit 0: predicate, bit 1: target
1494
1495 code[0] = 0x00000000;
1496
1497 switch (i->op) {
1498 case OP_BRA:
1499 code[1] = f->absolute ? 0x10800000 : 0x12000000;
1500 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1501 code[0] |= 0x80;
1502 mask = 3;
1503 break;
1504 case OP_CALL:
1505 code[1] = f->absolute ? 0x11000000 : 0x13000000;
1506 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1507 code[0] |= 0x80;
1508 mask = 2;
1509 break;
1510
1511 case OP_EXIT: code[1] = 0x18000000; mask = 1; break;
1512 case OP_RET: code[1] = 0x19000000; mask = 1; break;
1513 case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1514 case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;
1515 case OP_CONT: code[1] = 0x1a800000; mask = 1; break;
1516
1517 case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;
1518 case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1519 case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
1520 case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
1521
1522 case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
1523 case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1524 case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
1525 default:
1526 assert(!"invalid flow operation");
1527 return;
1528 }
1529
1530 if (mask & 1) {
1531 emitPredicate(i);
1532 if (i->flagsSrc < 0)
1533 code[0] |= 0x3c;
1534 }
1535
1536 if (!f)
1537 return;
1538
1539 if (f->allWarp)
1540 code[0] |= 1 << 9;
1541 if (f->limit)
1542 code[0] |= 1 << 8;
1543
1544 if (f->op == OP_CALL) {
1545 if (f->builtin) {
1546 assert(f->absolute);
1547 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1548 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1549 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1550 } else {
1551 assert(!f->absolute);
1552 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1553 code[0] |= (pcRel & 0x1ff) << 23;
1554 code[1] |= (pcRel >> 9) & 0x7fff;
1555 }
1556 } else
1557 if (mask & 2) {
1558 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1559 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1560 pcRel += 8;
1561 // currently we don't want absolute branches
1562 assert(!f->absolute);
1563 code[0] |= (pcRel & 0x1ff) << 23;
1564 code[1] |= (pcRel >> 9) & 0x7fff;
1565 }
1566 }
1567
1568 void
1569 CodeEmitterGK110::emitVOTE(const Instruction *i)
1570 {
1571 assert(i->src(0).getFile() == FILE_PREDICATE);
1572
1573 code[0] = 0x00000002;
1574 code[1] = 0x86c00000 | (i->subOp << 19);
1575
1576 emitPredicate(i);
1577
1578 unsigned rp = 0;
1579 for (int d = 0; i->defExists(d); d++) {
1580 if (i->def(d).getFile() == FILE_PREDICATE) {
1581 assert(!(rp & 2));
1582 rp |= 2;
1583 defId(i->def(d), 48);
1584 } else if (i->def(d).getFile() == FILE_GPR) {
1585 assert(!(rp & 1));
1586 rp |= 1;
1587 defId(i->def(d), 2);
1588 } else {
1589 assert(!"Unhandled def");
1590 }
1591 }
1592 if (!(rp & 1))
1593 code[0] |= 255 << 2;
1594 if (!(rp & 2))
1595 code[1] |= 7 << 16;
1596 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
1597 code[1] |= 1 << 13;
1598 srcId(i->src(0), 42);
1599 }
1600
1601 void
1602 CodeEmitterGK110::emitSUGType(DataType ty, const int pos)
1603 {
1604 uint8_t n = 0;
1605
1606 switch (ty) {
1607 case TYPE_S32: n = 1; break;
1608 case TYPE_U8: n = 2; break;
1609 case TYPE_S8: n = 3; break;
1610 default:
1611 assert(ty == TYPE_U32);
1612 break;
1613 }
1614 code[pos / 32] |= n << (pos % 32);
1615 }
1616
1617 void
1618 CodeEmitterGK110::emitSUCachingMode(CacheMode c)
1619 {
1620 uint8_t n = 0;
1621
1622 switch (c) {
1623 case CACHE_CA:
1624 // case CACHE_WB:
1625 n = 0;
1626 break;
1627 case CACHE_CG:
1628 n = 1;
1629 break;
1630 case CACHE_CS:
1631 n = 2;
1632 break;
1633 case CACHE_CV:
1634 // case CACHE_WT:
1635 n = 3;
1636 break;
1637 default:
1638 assert(!"invalid caching mode");
1639 break;
1640 }
1641 code[0] |= (n & 1) << 31;
1642 code[1] |= (n & 2) >> 1;
1643 }
1644
1645 void
1646 CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)
1647 {
1648 const uint32_t offset = i->getSrc(s)->reg.data.offset;
1649
1650 assert(offset == (offset & 0xfffc));
1651
1652 code[0] |= offset << 21;
1653 code[1] |= offset >> 11;
1654 code[1] |= i->getSrc(s)->reg.fileIndex << 5;
1655 }
1656
1657 void
1658 CodeEmitterGK110::emitSULDGB(const TexInstruction *i)
1659 {
1660 code[0] = 0x00000002;
1661 code[1] = 0x30000000 | (i->subOp << 14);
1662
1663 if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1664 emitLoadStoreType(i->dType, 0x38);
1665 emitCachingMode(i->cache, 0x36);
1666
1667 // format
1668 setSUConst16(i, 1);
1669 } else {
1670 assert(i->src(1).getFile() == FILE_GPR);
1671 code[1] |= 0x49800000;
1672
1673 emitLoadStoreType(i->dType, 0x21);
1674 emitSUCachingMode(i->cache);
1675
1676 srcId(i->src(1), 23);
1677 }
1678
1679 emitSUGType(i->sType, 0x34);
1680
1681 emitPredicate(i);
1682 defId(i->def(0), 2); // destination
1683 srcId(i->src(0), 10); // address
1684
1685 // surface predicate
1686 if (!i->srcExists(2) || (i->predSrc == 2)) {
1687 code[1] |= 0x7 << 10;
1688 } else {
1689 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1690 code[1] |= 1 << 13;
1691 srcId(i->src(2), 32 + 10);
1692 }
1693 }
1694
1695 void
1696 CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)
1697 {
1698 assert(i->op == OP_SUSTP);
1699
1700 code[0] = 0x00000002;
1701 code[1] = 0x38000000;
1702
1703 if (i->src(1).getFile() == FILE_MEMORY_CONST) {
1704 code[0] |= i->subOp << 2;
1705
1706 if (i->op == OP_SUSTP)
1707 code[0] |= i->tex.mask << 4;
1708
1709 emitSUGType(i->sType, 0x8);
1710 emitCachingMode(i->cache, 0x36);
1711
1712 // format
1713 setSUConst16(i, 1);
1714 } else {
1715 assert(i->src(1).getFile() == FILE_GPR);
1716
1717 code[0] |= i->subOp << 23;
1718 code[1] |= 0x41c00000;
1719
1720 if (i->op == OP_SUSTP)
1721 code[0] |= i->tex.mask << 25;
1722
1723 emitSUGType(i->sType, 0x1d);
1724 emitSUCachingMode(i->cache);
1725
1726 srcId(i->src(1), 2);
1727 }
1728
1729 emitPredicate(i);
1730 srcId(i->src(0), 10); // address
1731 srcId(i->src(3), 42); // values
1732
1733 // surface predicate
1734 if (!i->srcExists(2) || (i->predSrc == 2)) {
1735 code[1] |= 0x7 << 18;
1736 } else {
1737 if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1738 code[1] |= 1 << 21;
1739 srcId(i->src(2), 32 + 18);
1740 }
1741 }
1742
1743 void
1744 CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)
1745 {
1746 uint8_t m;
1747 switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1748 case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1749 case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1750 case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1751 case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1752 case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1753 case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1754 case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1755 case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1756 case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1757 case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1758 case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1759 case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1760 case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1761 case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1762 case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1763 default:
1764 return;
1765 }
1766 code[1] |= m << 20;
1767 if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1768 code[1] |= 1 << 24;
1769 }
1770
1771 void
1772 CodeEmitterGK110::emitSUCalc(Instruction *i)
1773 {
1774 ImmediateValue *imm = NULL;
1775 uint64_t opc1, opc2;
1776
1777 if (i->srcExists(2)) {
1778 imm = i->getSrc(2)->asImm();
1779 if (imm)
1780 i->setSrc(2, NULL); // special case, make emitForm_21 not assert
1781 }
1782
1783 switch (i->op) {
1784 case OP_SUCLAMP: opc1 = 0xb00; opc2 = 0x580; break;
1785 case OP_SUBFM: opc1 = 0xb68; opc2 = 0x1e8; break;
1786 case OP_SUEAU: opc1 = 0xb6c; opc2 = 0x1ec; break;
1787 default:
1788 assert(0);
1789 return;
1790 }
1791 emitForm_21(i, opc2, opc1);
1792
1793 if (i->op == OP_SUCLAMP) {
1794 if (i->dType == TYPE_S32)
1795 code[1] |= 1 << 19;
1796 emitSUCLAMPMode(i->subOp);
1797 }
1798
1799 if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1800 code[1] |= 1 << 18;
1801
1802 if (i->op != OP_SUEAU) {
1803 const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;
1804 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1805 code[0] |= 255 << 2;
1806 code[1] |= i->getDef(1)->reg.data.id << pos;
1807 } else
1808 if (i->defExists(1)) { // r, p
1809 assert(i->def(1).getFile() == FILE_PREDICATE);
1810 code[1] |= i->getDef(1)->reg.data.id << pos;
1811 } else { // r, #
1812 code[1] |= 7 << pos;
1813 }
1814 }
1815
1816 if (imm) {
1817 assert(i->op == OP_SUCLAMP);
1818 i->setSrc(2, imm);
1819 code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint6
1820 }
1821 }
1822
1823
1824 void
1825 CodeEmitterGK110::emitVectorSubOp(const Instruction *i)
1826 {
1827 switch (NV50_IR_SUBOP_Vn(i->subOp)) {
1828 case 0:
1829 code[1] |= (i->subOp & 0x000f) << 7; // vsrc1
1830 code[1] |= (i->subOp & 0x00e0) >> 6; // vsrc2
1831 code[1] |= (i->subOp & 0x0100) << 13; // vsrc2
1832 code[1] |= (i->subOp & 0x3c00) << 12; // vdst
1833 break;
1834 default:
1835 assert(0);
1836 break;
1837 }
1838 }
1839
1840 void
1841 CodeEmitterGK110::emitVSHL(const Instruction *i)
1842 {
1843 code[0] = 0x00000002;
1844 code[1] = 0xb8000000;
1845
1846 assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);
1847
1848 if (isSignedType(i->dType)) code[1] |= 1 << 25;
1849 if (isSignedType(i->sType)) code[1] |= 1 << 19;
1850
1851 emitVectorSubOp(i);
1852
1853 emitPredicate(i);
1854 defId(i->def(0), 2);
1855 srcId(i->src(0), 10);
1856
1857 if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {
1858 ImmediateValue *imm = i->getSrc(1)->asImm();
1859 assert(imm);
1860 code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;
1861 code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;
1862 } else {
1863 assert(i->getSrc(1)->reg.file == FILE_GPR);
1864 code[1] |= 1 << 21;
1865 srcId(i->src(1), 23);
1866 }
1867 srcId(i->src(2), 42);
1868
1869 if (i->saturate)
1870 code[0] |= 1 << 22;
1871 if (i->flagsDef >= 0)
1872 code[1] |= 1 << 18;
1873 }
1874
1875 void
1876 CodeEmitterGK110::emitAFETCH(const Instruction *i)
1877 {
1878 uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1879
1880 code[0] = 0x00000002 | (offset << 23);
1881 code[1] = 0x7d000000 | (offset >> 9);
1882
1883 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1884 code[1] |= 0x8;
1885
1886 emitPredicate(i);
1887
1888 defId(i->def(0), 2);
1889 srcId(i->src(0).getIndirect(0), 10);
1890 }
1891
1892 void
1893 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1894 {
1895 uint32_t prim = i->src(0).get()->reg.data.u32;
1896
1897 code[0] = 0x00000002 | ((prim & 0xff) << 23);
1898 code[1] = 0x7f800000;
1899
1900 emitPredicate(i);
1901
1902 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1903
1904 defId(i->def(0), 2);
1905 srcId(i, src1, 10);
1906 }
1907
1908 void
1909 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1910 {
1911 unsigned int size = typeSizeof(i->dType);
1912 uint32_t offset = i->src(0).get()->reg.data.offset;
1913
1914 code[0] = 0x00000002 | (offset << 23);
1915 code[1] = 0x7ec00000 | (offset >> 9);
1916 code[1] |= (size / 4 - 1) << 18;
1917
1918 if (i->perPatch)
1919 code[1] |= 0x4;
1920 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1921 code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
1922
1923 emitPredicate(i);
1924
1925 defId(i->def(0), 2);
1926 srcId(i->src(0).getIndirect(0), 10);
1927 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
1928 }
1929
1930 void
1931 CodeEmitterGK110::emitEXPORT(const Instruction *i)
1932 {
1933 unsigned int size = typeSizeof(i->dType);
1934 uint32_t offset = i->src(0).get()->reg.data.offset;
1935
1936 code[0] = 0x00000002 | (offset << 23);
1937 code[1] = 0x7f000000 | (offset >> 9);
1938 code[1] |= (size / 4 - 1) << 18;
1939
1940 if (i->perPatch)
1941 code[1] |= 0x4;
1942
1943 emitPredicate(i);
1944
1945 assert(i->src(1).getFile() == FILE_GPR);
1946
1947 srcId(i->src(0).getIndirect(0), 10);
1948 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
1949 srcId(i->src(1), 2);
1950 }
1951
1952 void
1953 CodeEmitterGK110::emitOUT(const Instruction *i)
1954 {
1955 assert(i->src(0).getFile() == FILE_GPR);
1956
1957 emitForm_21(i, 0x1f0, 0xb70);
1958
1959 if (i->op == OP_EMIT)
1960 code[1] |= 1 << 10;
1961 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1962 code[1] |= 1 << 11;
1963 }
1964
1965 void
1966 CodeEmitterGK110::emitInterpMode(const Instruction *i)
1967 {
1968 code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
1969 code[1] |= (i->ipa & 0xc) << (19 - 2);
1970 }
1971
1972 static void
1973 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
1974 {
1975 int ipa = entry->ipa;
1976 int reg = entry->reg;
1977 int loc = entry->loc;
1978
1979 if (data.flatshade &&
1980 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1981 ipa = NV50_IR_INTERP_FLAT;
1982 reg = 0xff;
1983 } else if (data.force_persample_interp &&
1984 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1985 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1986 ipa |= NV50_IR_INTERP_CENTROID;
1987 }
1988 code[loc + 1] &= ~(0xf << 19);
1989 code[loc + 1] |= (ipa & 0x3) << 21;
1990 code[loc + 1] |= (ipa & 0xc) << (19 - 2);
1991 code[loc + 0] &= ~(0xff << 23);
1992 code[loc + 0] |= reg << 23;
1993 }
1994
1995 void
1996 CodeEmitterGK110::emitINTERP(const Instruction *i)
1997 {
1998 const uint32_t base = i->getSrc(0)->reg.data.offset;
1999
2000 code[0] = 0x00000002 | (base << 31);
2001 code[1] = 0x74800000 | (base >> 1);
2002
2003 if (i->saturate)
2004 code[1] |= 1 << 18;
2005
2006 if (i->op == OP_PINTERP) {
2007 srcId(i->src(1), 23);
2008 addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
2009 } else {
2010 code[0] |= 0xff << 23;
2011 addInterp(i->ipa, 0xff, interpApply);
2012 }
2013
2014 srcId(i->src(0).getIndirect(0), 10);
2015 emitInterpMode(i);
2016
2017 emitPredicate(i);
2018 defId(i->def(0), 2);
2019
2020 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
2021 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
2022 else
2023 code[1] |= 0xff << 10;
2024 }
2025
2026 void
2027 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
2028 {
2029 uint8_t n;
2030
2031 switch (ty) {
2032 case TYPE_U8:
2033 n = 0;
2034 break;
2035 case TYPE_S8:
2036 n = 1;
2037 break;
2038 case TYPE_U16:
2039 n = 2;
2040 break;
2041 case TYPE_S16:
2042 n = 3;
2043 break;
2044 case TYPE_F32:
2045 case TYPE_U32:
2046 case TYPE_S32:
2047 n = 4;
2048 break;
2049 case TYPE_F64:
2050 case TYPE_U64:
2051 case TYPE_S64:
2052 n = 5;
2053 break;
2054 case TYPE_B128:
2055 n = 6;
2056 break;
2057 default:
2058 n = 0;
2059 assert(!"invalid ld/st type");
2060 break;
2061 }
2062 code[pos / 32] |= n << (pos % 32);
2063 }
2064
2065 void
2066 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
2067 {
2068 uint8_t n;
2069
2070 switch (c) {
2071 case CACHE_CA:
2072 // case CACHE_WB:
2073 n = 0;
2074 break;
2075 case CACHE_CG:
2076 n = 1;
2077 break;
2078 case CACHE_CS:
2079 n = 2;
2080 break;
2081 case CACHE_CV:
2082 // case CACHE_WT:
2083 n = 3;
2084 break;
2085 default:
2086 n = 0;
2087 assert(!"invalid caching mode");
2088 break;
2089 }
2090 code[pos / 32] |= n << (pos % 32);
2091 }
2092
2093 void
2094 CodeEmitterGK110::emitSTORE(const Instruction *i)
2095 {
2096 int32_t offset = SDATA(i->src(0)).offset;
2097
2098 switch (i->src(0).getFile()) {
2099 case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
2100 case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
2101 case FILE_MEMORY_SHARED:
2102 code[0] = 0x00000002;
2103 if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
2104 code[1] = 0x78400000;
2105 else
2106 code[1] = 0x7ac00000;
2107 break;
2108 default:
2109 assert(!"invalid memory file");
2110 break;
2111 }
2112
2113 if (code[0] & 0x2) {
2114 offset &= 0xffffff;
2115 emitLoadStoreType(i->dType, 0x33);
2116 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2117 emitCachingMode(i->cache, 0x2f);
2118 } else {
2119 emitLoadStoreType(i->dType, 0x38);
2120 emitCachingMode(i->cache, 0x3b);
2121 }
2122 code[0] |= offset << 23;
2123 code[1] |= offset >> 9;
2124
2125 // Unlocked store on shared memory can fail.
2126 if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2127 i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
2128 assert(i->defExists(0));
2129 defId(i->def(0), 32 + 16);
2130 }
2131
2132 emitPredicate(i);
2133
2134 srcId(i->src(1), 2);
2135 srcId(i->src(0).getIndirect(0), 10);
2136 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2137 i->src(0).isIndirect(0) &&
2138 i->getIndirect(0, 0)->reg.size == 8)
2139 code[1] |= 1 << 23;
2140 }
2141
2142 void
2143 CodeEmitterGK110::emitLOAD(const Instruction *i)
2144 {
2145 int32_t offset = SDATA(i->src(0)).offset;
2146
2147 switch (i->src(0).getFile()) {
2148 case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
2149 case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
2150 case FILE_MEMORY_SHARED:
2151 code[0] = 0x00000002;
2152 if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
2153 code[1] = 0x77400000;
2154 else
2155 code[1] = 0x7a400000;
2156 break;
2157 case FILE_MEMORY_CONST:
2158 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
2159 emitMOV(i);
2160 return;
2161 }
2162 offset &= 0xffff;
2163 code[0] = 0x00000002;
2164 code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
2165 code[1] |= i->subOp << 15;
2166 break;
2167 default:
2168 assert(!"invalid memory file");
2169 break;
2170 }
2171
2172 if (code[0] & 0x2) {
2173 offset &= 0xffffff;
2174 emitLoadStoreType(i->dType, 0x33);
2175 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
2176 emitCachingMode(i->cache, 0x2f);
2177 } else {
2178 emitLoadStoreType(i->dType, 0x38);
2179 emitCachingMode(i->cache, 0x3b);
2180 }
2181 code[0] |= offset << 23;
2182 code[1] |= offset >> 9;
2183
2184 // Locked store on shared memory can fail.
2185 int r = 0, p = -1;
2186 if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
2187 i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
2188 if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2189 r = -1;
2190 p = 0;
2191 } else if (i->defExists(1)) { // r, p
2192 p = 1;
2193 } else {
2194 assert(!"Expected predicate dest for load locked");
2195 }
2196 }
2197
2198 emitPredicate(i);
2199
2200 if (r >= 0)
2201 defId(i->def(r), 2);
2202 else
2203 code[0] |= 255 << 2;
2204
2205 if (p >= 0)
2206 defId(i->def(p), 32 + 16);
2207
2208 if (i->getIndirect(0, 0)) {
2209 srcId(i->src(0).getIndirect(0), 10);
2210 if (i->getIndirect(0, 0)->reg.size == 8)
2211 code[1] |= 1 << 23;
2212 } else {
2213 code[0] |= 255 << 10;
2214 }
2215 }
2216
2217 uint8_t
2218 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
2219 {
2220 switch (SDATA(ref).sv.sv) {
2221 case SV_LANEID: return 0x00;
2222 case SV_PHYSID: return 0x03;
2223 case SV_VERTEX_COUNT: return 0x10;
2224 case SV_INVOCATION_ID: return 0x11;
2225 case SV_YDIR: return 0x12;
2226 case SV_THREAD_KILL: return 0x13;
2227 case SV_TID: return 0x21 + SDATA(ref).sv.index;
2228 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
2229 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
2230 case SV_GRIDID: return 0x2c;
2231 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
2232 case SV_LBASE: return 0x34;
2233 case SV_SBASE: return 0x30;
2234 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
2235 default:
2236 assert(!"no sreg for system value");
2237 return 0;
2238 }
2239 }
2240
2241 void
2242 CodeEmitterGK110::emitMOV(const Instruction *i)
2243 {
2244 if (i->def(0).getFile() == FILE_PREDICATE) {
2245 if (i->src(0).getFile() == FILE_GPR) {
2246 // Use ISETP.NE.AND dst, PT, src, RZ, PT
2247 code[0] = 0x00000002;
2248 code[1] = 0xdb500000;
2249
2250 code[0] |= 0x7 << 2;
2251 code[0] |= 0xff << 23;
2252 code[1] |= 0x7 << 10;
2253 srcId(i->src(0), 10);
2254 } else
2255 if (i->src(0).getFile() == FILE_PREDICATE) {
2256 // Use PSETP.AND.AND dst, PT, src, PT, PT
2257 code[0] = 0x00000002;
2258 code[1] = 0x84800000;
2259
2260 code[0] |= 0x7 << 2;
2261 code[1] |= 0x7 << 0;
2262 code[1] |= 0x7 << 10;
2263
2264 srcId(i->src(0), 14);
2265 } else {
2266 assert(!"Unexpected source for predicate destination");
2267 emitNOP(i);
2268 }
2269 emitPredicate(i);
2270 defId(i->def(0), 5);
2271 } else
2272 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
2273 code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
2274 code[1] = 0x86400000;
2275 emitPredicate(i);
2276 defId(i->def(0), 2);
2277 } else
2278 if (i->src(0).getFile() == FILE_IMMEDIATE) {
2279 code[0] = 0x00000002 | (i->lanes << 14);
2280 code[1] = 0x74000000;
2281 emitPredicate(i);
2282 defId(i->def(0), 2);
2283 setImmediate32(i, 0, Modifier(0));
2284 } else
2285 if (i->src(0).getFile() == FILE_PREDICATE) {
2286 code[0] = 0x00000002;
2287 code[1] = 0x84401c07;
2288 emitPredicate(i);
2289 defId(i->def(0), 2);
2290 srcId(i->src(0), 14);
2291 } else {
2292 emitForm_C(i, 0x24c, 2);
2293 code[1] |= i->lanes << 10;
2294 }
2295 }
2296
2297 static inline bool
2298 uses64bitAddress(const Instruction *ldst)
2299 {
2300 return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
2301 ldst->src(0).isIndirect(0) &&
2302 ldst->getIndirect(0, 0)->reg.size == 8;
2303 }
2304
2305 void
2306 CodeEmitterGK110::emitATOM(const Instruction *i)
2307 {
2308 const bool hasDst = i->defExists(0);
2309 const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
2310
2311 code[0] = 0x00000002;
2312 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
2313 code[1] = 0x77800000;
2314 else
2315 code[1] = 0x68000000;
2316
2317 switch (i->subOp) {
2318 case NV50_IR_SUBOP_ATOM_CAS: break;
2319 case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;
2320 default: code[1] |= i->subOp << 23; break;
2321 }
2322
2323 switch (i->dType) {
2324 case TYPE_U32: break;
2325 case TYPE_S32: code[1] |= 0x00100000; break;
2326 case TYPE_U64: code[1] |= 0x00200000; break;
2327 case TYPE_F32: code[1] |= 0x00300000; break;
2328 case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */
2329 case TYPE_S64: code[1] |= 0x00500000; break;
2330 default: assert(!"unsupported type"); break;
2331 }
2332
2333 emitPredicate(i);
2334
2335 /* TODO: cas: check that src regs line up */
2336 /* TODO: cas: flip bits if $r255 is used */
2337 srcId(i->src(1), 23);
2338
2339 if (hasDst) {
2340 defId(i->def(0), 2);
2341 } else
2342 if (!exch) {
2343 code[0] |= 255 << 2;
2344 }
2345
2346 if (hasDst || !exch) {
2347 const int32_t offset = SDATA(i->src(0)).offset;
2348 assert(offset < 0x80000 && offset >= -0x80000);
2349 code[0] |= (offset & 1) << 31;
2350 code[1] |= (offset & 0xffffe) >> 1;
2351 } else {
2352 srcAddr32(i->src(0), 31);
2353 }
2354
2355 if (i->getIndirect(0, 0)) {
2356 srcId(i->getIndirect(0, 0), 10);
2357 if (i->getIndirect(0, 0)->reg.size == 8)
2358 code[1] |= 1 << 19;
2359 } else {
2360 code[0] |= 255 << 10;
2361 }
2362 }
2363
2364 void
2365 CodeEmitterGK110::emitCCTL(const Instruction *i)
2366 {
2367 int32_t offset = SDATA(i->src(0)).offset;
2368
2369 code[0] = 0x00000002 | (i->subOp << 2);
2370
2371 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2372 code[1] = 0x7b000000;
2373 } else {
2374 code[1] = 0x7c000000;
2375 offset &= 0xffffff;
2376 }
2377 code[0] |= offset << 23;
2378 code[1] |= offset >> 9;
2379
2380 if (uses64bitAddress(i))
2381 code[1] |= 1 << 23;
2382 srcId(i->src(0).getIndirect(0), 10);
2383
2384 emitPredicate(i);
2385 }
2386
2387 bool
2388 CodeEmitterGK110::emitInstruction(Instruction *insn)
2389 {
2390 const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
2391
2392 if (insn->encSize != 8) {
2393 ERROR("skipping unencodable instruction: ");
2394 insn->print();
2395 return false;
2396 } else
2397 if (codeSize + size > codeSizeLimit) {
2398 ERROR("code emitter output buffer too small\n");
2399 return false;
2400 }
2401
2402 if (writeIssueDelays) {
2403 int id = (codeSize & 0x3f) / 8 - 1;
2404 if (id < 0) {
2405 id += 1;
2406 code[0] = 0x00000000; // cf issue delay "instruction"
2407 code[1] = 0x08000000;
2408 code += 2;
2409 codeSize += 8;
2410 }
2411 uint32_t *data = code - (id * 2 + 2);
2412
2413 switch (id) {
2414 case 0: data[0] |= insn->sched << 2; break;
2415 case 1: data[0] |= insn->sched << 10; break;
2416 case 2: data[0] |= insn->sched << 18; break;
2417 case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
2418 case 4: data[1] |= insn->sched << 2; break;
2419 case 5: data[1] |= insn->sched << 10; break;
2420 case 6: data[1] |= insn->sched << 18; break;
2421 default:
2422 assert(0);
2423 break;
2424 }
2425 }
2426
2427 // assert that instructions with multiple defs don't corrupt registers
2428 for (int d = 0; insn->defExists(d); ++d)
2429 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2430
2431 switch (insn->op) {
2432 case OP_MOV:
2433 case OP_RDSV:
2434 emitMOV(insn);
2435 break;
2436 case OP_NOP:
2437 break;
2438 case OP_LOAD:
2439 emitLOAD(insn);
2440 break;
2441 case OP_STORE:
2442 emitSTORE(insn);
2443 break;
2444 case OP_LINTERP:
2445 case OP_PINTERP:
2446 emitINTERP(insn);
2447 break;
2448 case OP_VFETCH:
2449 emitVFETCH(insn);
2450 break;
2451 case OP_EXPORT:
2452 emitEXPORT(insn);
2453 break;
2454 case OP_AFETCH:
2455 emitAFETCH(insn);
2456 break;
2457 case OP_PFETCH:
2458 emitPFETCH(insn);
2459 break;
2460 case OP_EMIT:
2461 case OP_RESTART:
2462 emitOUT(insn);
2463 break;
2464 case OP_ADD:
2465 case OP_SUB:
2466 if (insn->dType == TYPE_F64)
2467 emitDADD(insn);
2468 else if (isFloatType(insn->dType))
2469 emitFADD(insn);
2470 else
2471 emitUADD(insn);
2472 break;
2473 case OP_MUL:
2474 if (insn->dType == TYPE_F64)
2475 emitDMUL(insn);
2476 else if (isFloatType(insn->dType))
2477 emitFMUL(insn);
2478 else
2479 emitIMUL(insn);
2480 break;
2481 case OP_MAD:
2482 case OP_FMA:
2483 if (insn->dType == TYPE_F64)
2484 emitDMAD(insn);
2485 else if (isFloatType(insn->dType))
2486 emitFMAD(insn);
2487 else
2488 emitIMAD(insn);
2489 break;
2490 case OP_MADSP:
2491 emitMADSP(insn);
2492 break;
2493 case OP_SAD:
2494 emitISAD(insn);
2495 break;
2496 case OP_SHLADD:
2497 emitSHLADD(insn);
2498 break;
2499 case OP_NOT:
2500 emitNOT(insn);
2501 break;
2502 case OP_AND:
2503 emitLogicOp(insn, 0);
2504 break;
2505 case OP_OR:
2506 emitLogicOp(insn, 1);
2507 break;
2508 case OP_XOR:
2509 emitLogicOp(insn, 2);
2510 break;
2511 case OP_SHL:
2512 case OP_SHR:
2513 if (typeSizeof(insn->sType) == 8)
2514 emitShift64(insn);
2515 else
2516 emitShift(insn);
2517 break;
2518 case OP_SET:
2519 case OP_SET_AND:
2520 case OP_SET_OR:
2521 case OP_SET_XOR:
2522 emitSET(insn->asCmp());
2523 break;
2524 case OP_SELP:
2525 emitSELP(insn);
2526 break;
2527 case OP_SLCT:
2528 emitSLCT(insn->asCmp());
2529 break;
2530 case OP_MIN:
2531 case OP_MAX:
2532 emitMINMAX(insn);
2533 break;
2534 case OP_ABS:
2535 case OP_NEG:
2536 case OP_CEIL:
2537 case OP_FLOOR:
2538 case OP_TRUNC:
2539 case OP_SAT:
2540 emitCVT(insn);
2541 break;
2542 case OP_CVT:
2543 if (insn->def(0).getFile() == FILE_PREDICATE ||
2544 insn->src(0).getFile() == FILE_PREDICATE)
2545 emitMOV(insn);
2546 else
2547 emitCVT(insn);
2548 break;
2549 case OP_RSQ:
2550 emitSFnOp(insn, 5 + 2 * insn->subOp);
2551 break;
2552 case OP_RCP:
2553 emitSFnOp(insn, 4 + 2 * insn->subOp);
2554 break;
2555 case OP_LG2:
2556 emitSFnOp(insn, 3);
2557 break;
2558 case OP_EX2:
2559 emitSFnOp(insn, 2);
2560 break;
2561 case OP_SIN:
2562 emitSFnOp(insn, 1);
2563 break;
2564 case OP_COS:
2565 emitSFnOp(insn, 0);
2566 break;
2567 case OP_PRESIN:
2568 case OP_PREEX2:
2569 emitPreOp(insn);
2570 break;
2571 case OP_TEX:
2572 case OP_TXB:
2573 case OP_TXL:
2574 case OP_TXD:
2575 case OP_TXF:
2576 case OP_TXG:
2577 case OP_TXLQ:
2578 emitTEX(insn->asTex());
2579 break;
2580 case OP_TXQ:
2581 emitTXQ(insn->asTex());
2582 break;
2583 case OP_TEXBAR:
2584 emitTEXBAR(insn);
2585 break;
2586 case OP_PIXLD:
2587 emitPIXLD(insn);
2588 break;
2589 case OP_BRA:
2590 case OP_CALL:
2591 case OP_PRERET:
2592 case OP_RET:
2593 case OP_DISCARD:
2594 case OP_EXIT:
2595 case OP_PRECONT:
2596 case OP_CONT:
2597 case OP_PREBREAK:
2598 case OP_BREAK:
2599 case OP_JOINAT:
2600 case OP_BRKPT:
2601 case OP_QUADON:
2602 case OP_QUADPOP:
2603 emitFlow(insn);
2604 break;
2605 case OP_QUADOP:
2606 emitQUADOP(insn, insn->subOp, insn->lanes);
2607 break;
2608 case OP_DFDX:
2609 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2610 break;
2611 case OP_DFDY:
2612 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2613 break;
2614 case OP_POPCNT:
2615 emitPOPC(insn);
2616 break;
2617 case OP_INSBF:
2618 emitINSBF(insn);
2619 break;
2620 case OP_EXTBF:
2621 emitEXTBF(insn);
2622 break;
2623 case OP_BFIND:
2624 emitBFIND(insn);
2625 break;
2626 case OP_PERMT:
2627 emitPERMT(insn);
2628 break;
2629 case OP_JOIN:
2630 emitNOP(insn);
2631 insn->join = 1;
2632 break;
2633 case OP_BAR:
2634 emitBAR(insn);
2635 break;
2636 case OP_MEMBAR:
2637 emitMEMBAR(insn);
2638 break;
2639 case OP_ATOM:
2640 emitATOM(insn);
2641 break;
2642 case OP_CCTL:
2643 emitCCTL(insn);
2644 break;
2645 case OP_VOTE:
2646 emitVOTE(insn);
2647 break;
2648 case OP_SULDB:
2649 emitSULDGB(insn->asTex());
2650 break;
2651 case OP_SUSTB:
2652 case OP_SUSTP:
2653 emitSUSTGx(insn->asTex());
2654 break;
2655 case OP_SUBFM:
2656 case OP_SUCLAMP:
2657 case OP_SUEAU:
2658 emitSUCalc(insn);
2659 break;
2660 case OP_VSHL:
2661 emitVSHL(insn);
2662 break;
2663 case OP_PHI:
2664 case OP_UNION:
2665 case OP_CONSTRAINT:
2666 ERROR("operation should have been eliminated");
2667 return false;
2668 case OP_EXP:
2669 case OP_LOG:
2670 case OP_SQRT:
2671 case OP_POW:
2672 ERROR("operation should have been lowered\n");
2673 return false;
2674 default:
2675 ERROR("unknown op: %u\n", insn->op);
2676 return false;
2677 }
2678
2679 if (insn->join)
2680 code[0] |= 1 << 22;
2681
2682 code += 2;
2683 codeSize += 8;
2684 return true;
2685 }
2686
2687 uint32_t
2688 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
2689 {
2690 // No more short instruction encodings.
2691 return 8;
2692 }
2693
2694 void
2695 CodeEmitterGK110::prepareEmission(Function *func)
2696 {
2697 const Target *targ = func->getProgram()->getTarget();
2698
2699 CodeEmitter::prepareEmission(func);
2700
2701 if (targ->hasSWSched)
2702 calculateSchedDataNVC0(targ, func);
2703 }
2704
2705 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
2706 : CodeEmitter(target),
2707 targNVC0(target),
2708 writeIssueDelays(target->hasSWSched)
2709 {
2710 code = NULL;
2711 codeSize = codeSizeLimit = 0;
2712 relocInfo = NULL;
2713 }
2714
2715 CodeEmitter *
2716 TargetNVC0::createCodeEmitterGK110(Program::Type type)
2717 {
2718 CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
2719 emit->setProgramType(type);
2720 return emit;
2721 }
2722
2723 } // namespace nv50_ir