gk110/ir: add partial BAR support
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gk110.cpp
1 /*
2 * Copyright 2012 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26
27 namespace nv50_ir {
28
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32 CodeEmitterGK110(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49 void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50 void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier);
51
52 void emitPredicate(const Instruction *);
53
54 void setCAddress14(const ValueRef&);
55 void setShortImmediate(const Instruction *, const int s);
56 void setImmediate32(const Instruction *, const int s, Modifier);
57
58 void modNegAbsF32_3b(const Instruction *, const int s);
59
60 void emitCondCode(CondCode cc, int pos, uint8_t mask);
61 void emitInterpMode(const Instruction *);
62 void emitLoadStoreType(DataType ty, const int pos);
63 void emitCachingMode(CacheMode c, const int pos);
64
65 inline uint8_t getSRegEncoding(const ValueRef&);
66
67 void emitRoundMode(RoundMode, const int pos, const int rintPos);
68 void emitRoundModeF(RoundMode, const int pos);
69 void emitRoundModeI(RoundMode, const int pos);
70
71 void emitNegAbs12(const Instruction *);
72
73 void emitNOP(const Instruction *);
74
75 void emitLOAD(const Instruction *);
76 void emitSTORE(const Instruction *);
77 void emitMOV(const Instruction *);
78 void emitMEMBAR(const Instruction *);
79
80 void emitINTERP(const Instruction *);
81 void emitAFETCH(const Instruction *);
82 void emitPFETCH(const Instruction *);
83 void emitVFETCH(const Instruction *);
84 void emitEXPORT(const Instruction *);
85 void emitOUT(const Instruction *);
86
87 void emitUADD(const Instruction *);
88 void emitFADD(const Instruction *);
89 void emitDADD(const Instruction *);
90 void emitIMUL(const Instruction *);
91 void emitFMUL(const Instruction *);
92 void emitDMUL(const Instruction *);
93 void emitIMAD(const Instruction *);
94 void emitISAD(const Instruction *);
95 void emitFMAD(const Instruction *);
96 void emitDMAD(const Instruction *);
97
98 void emitNOT(const Instruction *);
99 void emitLogicOp(const Instruction *, uint8_t subOp);
100 void emitPOPC(const Instruction *);
101 void emitINSBF(const Instruction *);
102 void emitEXTBF(const Instruction *);
103 void emitBFIND(const Instruction *);
104 void emitShift(const Instruction *);
105
106 void emitSFnOp(const Instruction *, uint8_t subOp);
107
108 void emitCVT(const Instruction *);
109 void emitMINMAX(const Instruction *);
110 void emitPreOp(const Instruction *);
111
112 void emitSET(const CmpInstruction *);
113 void emitSLCT(const CmpInstruction *);
114 void emitSELP(const Instruction *);
115
116 void emitTEXBAR(const Instruction *);
117 void emitTEX(const TexInstruction *);
118 void emitTEXCSAA(const TexInstruction *);
119 void emitTXQ(const TexInstruction *);
120
121 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
122
123 void emitPIXLD(const Instruction *);
124
125 void emitBAR(const Instruction *);
126
127 void emitFlow(const Instruction *);
128
129 inline void defId(const ValueDef&, const int pos);
130 inline void srcId(const ValueRef&, const int pos);
131 inline void srcId(const ValueRef *, const int pos);
132 inline void srcId(const Instruction *, int s, const int pos);
133
134 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
135
136 inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
137 };
138
139 #define GK110_GPR_ZERO 255
140
141 #define NEG_(b, s) \
142 if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
143 #define ABS_(b, s) \
144 if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
145
146 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \
147 code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
148
149 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
150 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
151
152 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
153
154 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
155
156 #define SDATA(a) ((a).rep()->reg.data)
157 #define DDATA(a) ((a).rep()->reg.data)
158
159 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
160 {
161 code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
162 }
163
164 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
165 {
166 code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
167 }
168
169 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
170 {
171 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
172 code[pos / 32] |= r << (pos % 32);
173 }
174
175 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
176 {
177 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
178 }
179
180 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
181 {
182 code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
183 }
184
185 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
186 {
187 const ImmediateValue *imm = ref.get()->asImm();
188
189 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
190 }
191
192 void
193 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
194 {
195 bool rint = false;
196 uint8_t n;
197
198 switch (rnd) {
199 case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
200 case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
201 case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
202 default:
203 rint = rnd == ROUND_NI;
204 n = 0;
205 assert(rnd == ROUND_N || rnd == ROUND_NI);
206 break;
207 }
208 code[pos / 32] |= n << (pos % 32);
209 if (rint && rintPos >= 0)
210 code[rintPos / 32] |= 1 << (rintPos % 32);
211 }
212
213 void
214 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
215 {
216 uint8_t n;
217
218 switch (rnd) {
219 case ROUND_M: n = 1; break;
220 case ROUND_P: n = 2; break;
221 case ROUND_Z: n = 3; break;
222 default:
223 n = 0;
224 assert(rnd == ROUND_N);
225 break;
226 }
227 code[pos / 32] |= n << (pos % 32);
228 }
229
230 void
231 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
232 {
233 uint8_t n;
234
235 switch (rnd) {
236 case ROUND_MI: n = 1; break;
237 case ROUND_PI: n = 2; break;
238 case ROUND_ZI: n = 3; break;
239 default:
240 n = 0;
241 assert(rnd == ROUND_NI);
242 break;
243 }
244 code[pos / 32] |= n << (pos % 32);
245 }
246
247 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
248 {
249 uint8_t n;
250
251 switch (cc) {
252 case CC_FL: n = 0x00; break;
253 case CC_LT: n = 0x01; break;
254 case CC_EQ: n = 0x02; break;
255 case CC_LE: n = 0x03; break;
256 case CC_GT: n = 0x04; break;
257 case CC_NE: n = 0x05; break;
258 case CC_GE: n = 0x06; break;
259 case CC_LTU: n = 0x09; break;
260 case CC_EQU: n = 0x0a; break;
261 case CC_LEU: n = 0x0b; break;
262 case CC_GTU: n = 0x0c; break;
263 case CC_NEU: n = 0x0d; break;
264 case CC_GEU: n = 0x0e; break;
265 case CC_TR: n = 0x0f; break;
266 case CC_NO: n = 0x10; break;
267 case CC_NC: n = 0x11; break;
268 case CC_NS: n = 0x12; break;
269 case CC_NA: n = 0x13; break;
270 case CC_A: n = 0x14; break;
271 case CC_S: n = 0x15; break;
272 case CC_C: n = 0x16; break;
273 case CC_O: n = 0x17; break;
274 default:
275 n = 0;
276 assert(!"invalid condition code");
277 break;
278 }
279 code[pos / 32] |= (n & mask) << (pos % 32);
280 }
281
282 void
283 CodeEmitterGK110::emitPredicate(const Instruction *i)
284 {
285 if (i->predSrc >= 0) {
286 srcId(i->src(i->predSrc), 18);
287 if (i->cc == CC_NOT_P)
288 code[0] |= 8 << 18; // negate
289 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
290 } else {
291 code[0] |= 7 << 18;
292 }
293 }
294
295 void
296 CodeEmitterGK110::setCAddress14(const ValueRef& src)
297 {
298 const Storage& res = src.get()->asSym()->reg;
299 const int32_t addr = res.data.offset / 4;
300
301 code[0] |= (addr & 0x01ff) << 23;
302 code[1] |= (addr & 0x3e00) >> 9;
303 code[1] |= res.fileIndex << 5;
304 }
305
306 void
307 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
308 {
309 const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
310 const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
311
312 if (i->sType == TYPE_F32) {
313 assert(!(u32 & 0x00000fff));
314 code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
315 code[1] |= ((u32 & 0x7fe00000) >> 21);
316 code[1] |= ((u32 & 0x80000000) >> 4);
317 } else
318 if (i->sType == TYPE_F64) {
319 assert(!(u64 & 0x00000fffffffffffULL));
320 code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
321 code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
322 code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
323 } else {
324 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
325 code[0] |= (u32 & 0x001ff) << 23;
326 code[1] |= (u32 & 0x7fe00) >> 9;
327 code[1] |= (u32 & 0x80000) << 8;
328 }
329 }
330
331 void
332 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
333 Modifier mod)
334 {
335 uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
336
337 if (mod) {
338 ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
339 mod.applyTo(imm);
340 u32 = imm.reg.data.u32;
341 }
342
343 code[0] |= u32 << 23;
344 code[1] |= u32 >> 9;
345 }
346
347 void
348 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
349 Modifier mod)
350 {
351 code[0] = ctg;
352 code[1] = opc << 20;
353
354 emitPredicate(i);
355
356 defId(i->def(0), 2);
357
358 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
359 switch (i->src(s).getFile()) {
360 case FILE_GPR:
361 srcId(i->src(s), s ? 42 : 10);
362 break;
363 case FILE_IMMEDIATE:
364 setImmediate32(i, s, mod);
365 break;
366 default:
367 break;
368 }
369 }
370 }
371
372
373 void
374 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
375 {
376 code[0] = ctg;
377 code[1] = opc << 20;
378
379 emitPredicate(i);
380
381 defId(i->def(0), 2);
382
383 switch (i->src(0).getFile()) {
384 case FILE_MEMORY_CONST:
385 code[1] |= 0x4 << 28;
386 setCAddress14(i->src(0));
387 break;
388 case FILE_GPR:
389 code[1] |= 0xc << 28;
390 srcId(i->src(0), 23);
391 break;
392 default:
393 assert(0);
394 break;
395 }
396 }
397
398 // 0x2 for GPR, c[] and 0x1 for short immediate
399 void
400 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
401 uint32_t opc1)
402 {
403 const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
404
405 int s1 = 23;
406 if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
407 s1 = 42;
408
409 if (imm) {
410 code[0] = 0x1;
411 code[1] = opc1 << 20;
412 } else {
413 code[0] = 0x2;
414 code[1] = (0xc << 28) | (opc2 << 20);
415 }
416
417 emitPredicate(i);
418
419 defId(i->def(0), 2);
420
421 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
422 switch (i->src(s).getFile()) {
423 case FILE_MEMORY_CONST:
424 code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
425 setCAddress14(i->src(s));
426 break;
427 case FILE_IMMEDIATE:
428 setShortImmediate(i, s);
429 break;
430 case FILE_GPR:
431 srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
432 break;
433 default:
434 // ignore here, can be predicate or flags, but must not be address
435 break;
436 }
437 }
438 // 0x0 = invalid
439 // 0xc = rrr
440 // 0x8 = rrc
441 // 0x4 = rcr
442 assert(imm || (code[1] & (0xc << 28)));
443 }
444
445 inline void
446 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
447 {
448 if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
449 if (i->src(s).mod.neg()) code[1] ^= (1 << 27);
450 }
451
452 void
453 CodeEmitterGK110::emitNOP(const Instruction *i)
454 {
455 code[0] = 0x00003c02;
456 code[1] = 0x85800000;
457
458 if (i)
459 emitPredicate(i);
460 else
461 code[0] = 0x001c3c02;
462 }
463
464 void
465 CodeEmitterGK110::emitFMAD(const Instruction *i)
466 {
467 assert(!isLIMM(i->src(1), TYPE_F32));
468
469 emitForm_21(i, 0x0c0, 0x940);
470
471 NEG_(34, 2);
472 SAT_(35);
473 RND_(36, F);
474 FTZ_(38);
475 DNZ_(39);
476
477 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
478
479 if (code[0] & 0x1) {
480 if (neg1)
481 code[1] ^= 1 << 27;
482 } else
483 if (neg1) {
484 code[1] |= 1 << 19;
485 }
486 }
487
488 void
489 CodeEmitterGK110::emitDMAD(const Instruction *i)
490 {
491 assert(!i->saturate);
492 assert(!i->ftz);
493
494 emitForm_21(i, 0x1b8, 0xb38);
495
496 NEG_(34, 2);
497 RND_(36, F);
498
499 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
500
501 if (code[0] & 0x1) {
502 if (neg1)
503 code[1] ^= 1 << 27;
504 } else
505 if (neg1) {
506 code[1] |= 1 << 19;
507 }
508 }
509
510 void
511 CodeEmitterGK110::emitFMUL(const Instruction *i)
512 {
513 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
514
515 assert(i->postFactor >= -3 && i->postFactor <= 3);
516
517 if (isLIMM(i->src(1), TYPE_F32)) {
518 emitForm_L(i, 0x200, 0x2, Modifier(0));
519
520 FTZ_(38);
521 DNZ_(39);
522 SAT_(3a);
523 if (neg)
524 code[1] ^= 1 << 22;
525
526 assert(i->postFactor == 0);
527 } else {
528 emitForm_21(i, 0x234, 0xc34);
529 code[1] |= ((i->postFactor > 0) ?
530 (7 - i->postFactor) : (0 - i->postFactor)) << 12;
531
532 RND_(2a, F);
533 FTZ_(2f);
534 DNZ_(30);
535 SAT_(35);
536
537 if (code[0] & 0x1) {
538 if (neg)
539 code[1] ^= 1 << 27;
540 } else
541 if (neg) {
542 code[1] |= 1 << 19;
543 }
544 }
545 }
546
547 void
548 CodeEmitterGK110::emitDMUL(const Instruction *i)
549 {
550 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
551
552 assert(!i->postFactor);
553 assert(!i->saturate);
554 assert(!i->ftz);
555 assert(!i->dnz);
556
557 emitForm_21(i, 0x240, 0xc40);
558
559 RND_(2a, F);
560
561 if (code[0] & 0x1) {
562 if (neg)
563 code[1] ^= 1 << 27;
564 } else
565 if (neg) {
566 code[1] |= 1 << 19;
567 }
568 }
569
570 void
571 CodeEmitterGK110::emitIMUL(const Instruction *i)
572 {
573 assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
574 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
575
576 if (isLIMM(i->src(1), TYPE_S32)) {
577 emitForm_L(i, 0x280, 2, Modifier(0));
578
579 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
580 code[1] |= 1 << 24;
581 if (i->sType == TYPE_S32)
582 code[1] |= 3 << 25;
583 } else {
584 emitForm_21(i, 0x21c, 0xc1c);
585
586 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
587 code[1] |= 1 << 10;
588 if (i->sType == TYPE_S32)
589 code[1] |= 3 << 11;
590 }
591 }
592
593 void
594 CodeEmitterGK110::emitFADD(const Instruction *i)
595 {
596 if (isLIMM(i->src(1), TYPE_F32)) {
597 assert(i->rnd == ROUND_N);
598 assert(!i->saturate);
599
600 Modifier mod = i->src(1).mod ^
601 Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
602
603 emitForm_L(i, 0x400, 0, mod);
604
605 FTZ_(3a);
606 NEG_(3b, 0);
607 ABS_(39, 0);
608 } else {
609 emitForm_21(i, 0x22c, 0xc2c);
610
611 FTZ_(2f);
612 RND_(2a, F);
613 ABS_(31, 0);
614 NEG_(33, 0);
615 SAT_(35);
616
617 if (code[0] & 0x1) {
618 modNegAbsF32_3b(i, 1);
619 if (i->op == OP_SUB) code[1] ^= 1 << 27;
620 } else {
621 ABS_(34, 1);
622 NEG_(30, 1);
623 if (i->op == OP_SUB) code[1] ^= 1 << 16;
624 }
625 }
626 }
627
628 void
629 CodeEmitterGK110::emitDADD(const Instruction *i)
630 {
631 assert(!i->saturate);
632 assert(!i->ftz);
633
634 emitForm_21(i, 0x238, 0xc38);
635 RND_(2a, F);
636 ABS_(31, 0);
637 NEG_(33, 0);
638 if (code[0] & 0x1) {
639 modNegAbsF32_3b(i, 1);
640 if (i->op == OP_SUB) code[1] ^= 1 << 27;
641 } else {
642 NEG_(30, 1);
643 ABS_(34, 1);
644 if (i->op == OP_SUB) code[1] ^= 1 << 16;
645 }
646 }
647
648 void
649 CodeEmitterGK110::emitUADD(const Instruction *i)
650 {
651 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
652
653 if (i->op == OP_SUB)
654 addOp ^= 1;
655
656 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
657
658 if (isLIMM(i->src(1), TYPE_S32)) {
659 emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
660
661 if (addOp & 2)
662 code[1] |= 1 << 27;
663
664 assert(!i->defExists(1));
665 assert(i->flagsSrc < 0);
666
667 SAT_(39);
668 } else {
669 emitForm_21(i, 0x208, 0xc08);
670
671 assert(addOp != 3); // would be add-plus-one
672
673 code[1] |= addOp << 19;
674
675 if (i->defExists(1))
676 code[1] |= 1 << 18; // write carry
677 if (i->flagsSrc >= 0)
678 code[1] |= 1 << 14; // add carry
679
680 SAT_(35);
681 }
682 }
683
684 // TODO: shl-add
685 void
686 CodeEmitterGK110::emitIMAD(const Instruction *i)
687 {
688 uint8_t addOp =
689 (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
690
691 emitForm_21(i, 0x100, 0xa00);
692
693 assert(addOp != 3);
694 code[1] |= addOp << 26;
695
696 if (i->sType == TYPE_S32)
697 code[1] |= (1 << 19) | (1 << 24);
698
699 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
700 code[1] |= 1 << 25;
701 SAT_(35);
702 }
703
704 void
705 CodeEmitterGK110::emitISAD(const Instruction *i)
706 {
707 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
708
709 emitForm_21(i, 0x1f4, 0xb74);
710
711 if (i->dType == TYPE_S32)
712 code[1] |= 1 << 19;
713 }
714
715 void
716 CodeEmitterGK110::emitNOT(const Instruction *i)
717 {
718 code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
719 code[1] = 0x22003800;
720
721 emitPredicate(i);
722
723 defId(i->def(0), 2);
724
725 switch (i->src(0).getFile()) {
726 case FILE_GPR:
727 code[1] |= 0xc << 28;
728 srcId(i->src(0), 23);
729 break;
730 case FILE_MEMORY_CONST:
731 code[1] |= 0x4 << 28;
732 setCAddress14(i->src(1));
733 break;
734 default:
735 assert(0);
736 break;
737 }
738 }
739
740 void
741 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
742 {
743 if (isLIMM(i->src(1), TYPE_S32)) {
744 emitForm_L(i, 0x200, 0, i->src(1).mod);
745 code[1] |= subOp << 24;
746 NOT_(3a, 0);
747 } else {
748 emitForm_21(i, 0x220, 0xc20);
749 code[1] |= subOp << 12;
750 NOT_(2a, 0);
751 NOT_(2b, 1);
752 }
753 }
754
755 void
756 CodeEmitterGK110::emitPOPC(const Instruction *i)
757 {
758 assert(!isLIMM(i->src(1), TYPE_S32, true));
759
760 emitForm_21(i, 0x204, 0xc04);
761
762 NOT_(2a, 0);
763 if (!(code[0] & 0x1))
764 NOT_(2b, 1);
765 }
766
767 void
768 CodeEmitterGK110::emitINSBF(const Instruction *i)
769 {
770 emitForm_21(i, 0x1f8, 0xb78);
771 }
772
773 void
774 CodeEmitterGK110::emitEXTBF(const Instruction *i)
775 {
776 emitForm_21(i, 0x600, 0xc00);
777
778 if (i->dType == TYPE_S32)
779 code[1] |= 0x80000;
780 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
781 code[1] |= 0x800;
782 }
783
784 void
785 CodeEmitterGK110::emitBFIND(const Instruction *i)
786 {
787 emitForm_C(i, 0x218, 0x2);
788
789 if (i->dType == TYPE_S32)
790 code[1] |= 0x80000;
791 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
792 code[1] |= 0x800;
793 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
794 code[1] |= 0x1000;
795 }
796
797 void
798 CodeEmitterGK110::emitShift(const Instruction *i)
799 {
800 if (i->op == OP_SHR) {
801 emitForm_21(i, 0x214, 0xc14);
802 if (isSignedType(i->dType))
803 code[1] |= 1 << 19;
804 } else {
805 emitForm_21(i, 0x224, 0xc24);
806 }
807
808 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
809 code[1] |= 1 << 10;
810 }
811
812 void
813 CodeEmitterGK110::emitPreOp(const Instruction *i)
814 {
815 emitForm_C(i, 0x248, 0x2);
816
817 if (i->op == OP_PREEX2)
818 code[1] |= 1 << 10;
819
820 NEG_(30, 0);
821 ABS_(34, 0);
822 }
823
824 void
825 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
826 {
827 code[0] = 0x00000002 | (subOp << 23);
828 code[1] = 0x84000000;
829
830 emitPredicate(i);
831
832 defId(i->def(0), 2);
833 srcId(i->src(0), 10);
834
835 NEG_(33, 0);
836 ABS_(31, 0);
837 SAT_(35);
838 }
839
840 void
841 CodeEmitterGK110::emitMINMAX(const Instruction *i)
842 {
843 uint32_t op2, op1;
844
845 switch (i->dType) {
846 case TYPE_U32:
847 case TYPE_S32:
848 op2 = 0x210;
849 op1 = 0xc10;
850 break;
851 case TYPE_F32:
852 op2 = 0x230;
853 op1 = 0xc30;
854 break;
855 case TYPE_F64:
856 op2 = 0x228;
857 op1 = 0xc28;
858 break;
859 default:
860 assert(0);
861 op2 = 0;
862 op1 = 0;
863 break;
864 }
865 emitForm_21(i, op2, op1);
866
867 if (i->dType == TYPE_S32)
868 code[1] |= 1 << 19;
869 code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
870
871 FTZ_(2f);
872 ABS_(31, 0);
873 NEG_(33, 0);
874 if (code[0] & 0x1) {
875 modNegAbsF32_3b(i, 1);
876 } else {
877 ABS_(34, 1);
878 NEG_(30, 1);
879 }
880 }
881
882 void
883 CodeEmitterGK110::emitCVT(const Instruction *i)
884 {
885 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
886 const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
887 const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
888
889 bool sat = i->saturate;
890 bool abs = i->src(0).mod.abs();
891 bool neg = i->src(0).mod.neg();
892
893 RoundMode rnd = i->rnd;
894
895 switch (i->op) {
896 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
897 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
898 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
899 case OP_SAT: sat = true; break;
900 case OP_NEG: neg = !neg; break;
901 case OP_ABS: abs = true; neg = false; break;
902 default:
903 break;
904 }
905
906 DataType dType;
907
908 if (i->op == OP_NEG && i->dType == TYPE_U32)
909 dType = TYPE_S32;
910 else
911 dType = i->dType;
912
913
914 uint32_t op;
915
916 if (f2f) op = 0x254;
917 else if (f2i) op = 0x258;
918 else if (i2f) op = 0x25c;
919 else op = 0x260;
920
921 emitForm_C(i, op, 0x2);
922
923 FTZ_(2f);
924 if (neg) code[1] |= 1 << 16;
925 if (abs) code[1] |= 1 << 20;
926 if (sat) code[1] |= 1 << 21;
927
928 emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
929
930 code[0] |= typeSizeofLog2(dType) << 10;
931 code[0] |= typeSizeofLog2(i->sType) << 12;
932 code[1] |= i->subOp << 12;
933
934 if (isSignedIntType(dType))
935 code[0] |= 0x4000;
936 if (isSignedIntType(i->sType))
937 code[0] |= 0x8000;
938 }
939
940 void
941 CodeEmitterGK110::emitSET(const CmpInstruction *i)
942 {
943 uint16_t op1, op2;
944
945 if (i->def(0).getFile() == FILE_PREDICATE) {
946 switch (i->sType) {
947 case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
948 case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
949 default:
950 op2 = 0x1b0;
951 op1 = 0xb30;
952 break;
953 }
954 emitForm_21(i, op2, op1);
955
956 NEG_(2e, 0);
957 ABS_(9, 0);
958 if (!(code[0] & 0x1)) {
959 NEG_(8, 1);
960 ABS_(2f, 1);
961 } else {
962 modNegAbsF32_3b(i, 1);
963 }
964 FTZ_(32);
965
966 // normal DST field is negated predicate result
967 code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
968 if (i->defExists(1))
969 defId(i->def(1), 2);
970 else
971 code[0] |= 0x1c;
972 } else {
973 switch (i->sType) {
974 case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
975 case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
976 default:
977 op2 = 0x1a8;
978 op1 = 0xb28;
979 break;
980 }
981 emitForm_21(i, op2, op1);
982
983 NEG_(2e, 0);
984 ABS_(39, 0);
985 if (!(code[0] & 0x1)) {
986 NEG_(38, 1);
987 ABS_(2f, 1);
988 } else {
989 modNegAbsF32_3b(i, 1);
990 }
991 FTZ_(3a);
992
993 if (i->dType == TYPE_F32) {
994 if (isFloatType(i->sType))
995 code[1] |= 1 << 23;
996 else
997 code[1] |= 1 << 15;
998 }
999 }
1000 if (i->sType == TYPE_S32)
1001 code[1] |= 1 << 19;
1002
1003 if (i->op != OP_SET) {
1004 switch (i->op) {
1005 case OP_SET_AND: code[1] |= 0x0 << 16; break;
1006 case OP_SET_OR: code[1] |= 0x1 << 16; break;
1007 case OP_SET_XOR: code[1] |= 0x2 << 16; break;
1008 default:
1009 assert(0);
1010 break;
1011 }
1012 srcId(i->src(2), 0x2a);
1013 } else {
1014 code[1] |= 0x7 << 10;
1015 }
1016 emitCondCode(i->setCond,
1017 isFloatType(i->sType) ? 0x33 : 0x34,
1018 isFloatType(i->sType) ? 0xf : 0x7);
1019 }
1020
1021 void
1022 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
1023 {
1024 CondCode cc = i->setCond;
1025 if (i->src(2).mod.neg())
1026 cc = reverseCondCode(cc);
1027
1028 if (i->dType == TYPE_F32) {
1029 emitForm_21(i, 0x1d0, 0xb50);
1030 FTZ_(32);
1031 emitCondCode(cc, 0x33, 0xf);
1032 } else {
1033 emitForm_21(i, 0x1a0, 0xb20);
1034 emitCondCode(cc, 0x34, 0x7);
1035 }
1036 }
1037
1038 void CodeEmitterGK110::emitSELP(const Instruction *i)
1039 {
1040 emitForm_21(i, 0x250, 0x050);
1041
1042 if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
1043 code[1] |= 1 << 13;
1044 }
1045
1046 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
1047 {
1048 code[0] = 0x0000003e | (i->subOp << 23);
1049 code[1] = 0x77000000;
1050
1051 emitPredicate(i);
1052 }
1053
1054 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
1055 {
1056 code[0] = 0x00000002;
1057 code[1] = 0x76c00000;
1058
1059 code[1] |= i->tex.r << 9;
1060 // code[1] |= i->tex.s << (9 + 8);
1061
1062 if (i->tex.liveOnly)
1063 code[0] |= 0x80000000;
1064
1065 defId(i->def(0), 2);
1066 srcId(i->src(0), 10);
1067 }
1068
1069 static inline bool
1070 isNextIndependentTex(const TexInstruction *i)
1071 {
1072 if (!i->next || !isTextureOp(i->next->op))
1073 return false;
1074 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1075 return false;
1076 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1077 }
1078
1079 void
1080 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1081 {
1082 const bool ind = i->tex.rIndirectSrc >= 0;
1083
1084 if (ind) {
1085 code[0] = 0x00000002;
1086 switch (i->op) {
1087 case OP_TXD:
1088 code[1] = 0x7e000000;
1089 break;
1090 case OP_TXLQ:
1091 code[1] = 0x7e800000;
1092 break;
1093 case OP_TXF:
1094 code[1] = 0x78000000;
1095 break;
1096 case OP_TXG:
1097 code[1] = 0x7dc00000;
1098 break;
1099 default:
1100 code[1] = 0x7d800000;
1101 break;
1102 }
1103 } else {
1104 switch (i->op) {
1105 case OP_TXD:
1106 code[0] = 0x00000002;
1107 code[1] = 0x76000000;
1108 code[1] |= i->tex.r << 9;
1109 break;
1110 case OP_TXLQ:
1111 code[0] = 0x00000002;
1112 code[1] = 0x76800000;
1113 code[1] |= i->tex.r << 9;
1114 break;
1115 case OP_TXF:
1116 code[0] = 0x00000002;
1117 code[1] = 0x70000000;
1118 code[1] |= i->tex.r << 13;
1119 break;
1120 case OP_TXG:
1121 code[0] = 0x00000001;
1122 code[1] = 0x70000000;
1123 code[1] |= i->tex.r << 15;
1124 break;
1125 default:
1126 code[0] = 0x00000001;
1127 code[1] = 0x60000000;
1128 code[1] |= i->tex.r << 15;
1129 break;
1130 }
1131 }
1132
1133 code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1134
1135 if (i->tex.liveOnly)
1136 code[0] |= 0x80000000;
1137
1138 switch (i->op) {
1139 case OP_TEX: break;
1140 case OP_TXB: code[1] |= 0x2000; break;
1141 case OP_TXL: code[1] |= 0x3000; break;
1142 case OP_TXF: break;
1143 case OP_TXG: break;
1144 case OP_TXD: break;
1145 case OP_TXLQ: break;
1146 default:
1147 assert(!"invalid texture op");
1148 break;
1149 }
1150
1151 if (i->op == OP_TXF) {
1152 if (!i->tex.levelZero)
1153 code[1] |= 0x1000;
1154 } else
1155 if (i->tex.levelZero) {
1156 code[1] |= 0x1000;
1157 }
1158
1159 if (i->op != OP_TXD && i->tex.derivAll)
1160 code[1] |= 0x200;
1161
1162 emitPredicate(i);
1163
1164 code[1] |= i->tex.mask << 2;
1165
1166 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1167
1168 defId(i->def(0), 2);
1169 srcId(i->src(0), 10);
1170 srcId(i, src1, 23);
1171
1172 if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1173
1174 // texture target:
1175 code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1176 if (i->tex.target.isArray())
1177 code[1] |= 0x40;
1178 if (i->tex.target.isShadow())
1179 code[1] |= 0x400;
1180 if (i->tex.target == TEX_TARGET_2D_MS ||
1181 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1182 code[1] |= 0x800;
1183
1184 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1185 // ?
1186 }
1187
1188 if (i->tex.useOffsets == 1) {
1189 switch (i->op) {
1190 case OP_TXF: code[1] |= 0x200; break;
1191 case OP_TXD: code[1] |= 0x00400000; break;
1192 default: code[1] |= 0x800; break;
1193 }
1194 }
1195 if (i->tex.useOffsets == 4)
1196 code[1] |= 0x1000;
1197 }
1198
1199 void
1200 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1201 {
1202 code[0] = 0x00000002;
1203 code[1] = 0x75400001;
1204
1205 switch (i->tex.query) {
1206 case TXQ_DIMS: code[0] |= 0x01 << 25; break;
1207 case TXQ_TYPE: code[0] |= 0x02 << 25; break;
1208 case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1209 case TXQ_FILTER: code[0] |= 0x10 << 25; break;
1210 case TXQ_LOD: code[0] |= 0x12 << 25; break;
1211 case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;
1212 default:
1213 assert(!"invalid texture query");
1214 break;
1215 }
1216
1217 code[1] |= i->tex.mask << 2;
1218 code[1] |= i->tex.r << 9;
1219 if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1220 code[1] |= 0x08000000;
1221
1222 defId(i->def(0), 2);
1223 srcId(i->src(0), 10);
1224
1225 emitPredicate(i);
1226 }
1227
1228 void
1229 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1230 {
1231 code[0] = 0x00000002 | ((qOp & 1) << 31);
1232 code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
1233
1234 defId(i->def(0), 2);
1235 srcId(i->src(0), 10);
1236 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
1237
1238 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1239 code[1] |= 1 << 9; // dall
1240
1241 emitPredicate(i);
1242 }
1243
1244 void
1245 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1246 {
1247 emitForm_L(i, 0x7f4, 2, Modifier(0));
1248 code[1] |= i->subOp << 2;
1249 code[1] |= 0x00070000;
1250 }
1251
1252 void
1253 CodeEmitterGK110::emitBAR(const Instruction *i)
1254 {
1255 code[0] = 0x00000002;
1256 code[1] = 0x85400000;
1257
1258 switch (i->subOp) {
1259 case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break;
1260 case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break;
1261 case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break;
1262 case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;
1263 default:
1264 code[1] |= 0x20;
1265 assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1266 break;
1267 }
1268
1269 emitPredicate(i);
1270
1271 srcId(i->src(0), 10);
1272 srcId(i->src(1), 23);
1273 }
1274
1275 void
1276 CodeEmitterGK110::emitFlow(const Instruction *i)
1277 {
1278 const FlowInstruction *f = i->asFlow();
1279
1280 unsigned mask; // bit 0: predicate, bit 1: target
1281
1282 code[0] = 0x00000000;
1283
1284 switch (i->op) {
1285 case OP_BRA:
1286 code[1] = f->absolute ? 0x10800000 : 0x12000000;
1287 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1288 code[0] |= 0x80;
1289 mask = 3;
1290 break;
1291 case OP_CALL:
1292 code[1] = f->absolute ? 0x11000000 : 0x13000000;
1293 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1294 code[0] |= 0x80;
1295 mask = 2;
1296 break;
1297
1298 case OP_EXIT: code[1] = 0x18000000; mask = 1; break;
1299 case OP_RET: code[1] = 0x19000000; mask = 1; break;
1300 case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1301 case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;
1302 case OP_CONT: code[1] = 0x1a800000; mask = 1; break;
1303
1304 case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;
1305 case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1306 case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
1307 case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
1308
1309 case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
1310 case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1311 case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
1312 default:
1313 assert(!"invalid flow operation");
1314 return;
1315 }
1316
1317 if (mask & 1) {
1318 emitPredicate(i);
1319 if (i->flagsSrc < 0)
1320 code[0] |= 0x3c;
1321 }
1322
1323 if (!f)
1324 return;
1325
1326 if (f->allWarp)
1327 code[0] |= 1 << 9;
1328 if (f->limit)
1329 code[0] |= 1 << 8;
1330
1331 if (f->op == OP_CALL) {
1332 if (f->builtin) {
1333 assert(f->absolute);
1334 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1335 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1336 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1337 } else {
1338 assert(!f->absolute);
1339 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1340 code[0] |= (pcRel & 0x1ff) << 23;
1341 code[1] |= (pcRel >> 9) & 0x7fff;
1342 }
1343 } else
1344 if (mask & 2) {
1345 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1346 if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
1347 pcRel += 8;
1348 // currently we don't want absolute branches
1349 assert(!f->absolute);
1350 code[0] |= (pcRel & 0x1ff) << 23;
1351 code[1] |= (pcRel >> 9) & 0x7fff;
1352 }
1353 }
1354
1355 void
1356 CodeEmitterGK110::emitAFETCH(const Instruction *i)
1357 {
1358 uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
1359
1360 code[0] = 0x00000002 | (offset << 23);
1361 code[1] = 0x7d000000 | (offset >> 9);
1362
1363 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1364 code[1] |= 0x8;
1365
1366 emitPredicate(i);
1367
1368 defId(i->def(0), 2);
1369 srcId(i->src(0).getIndirect(0), 10);
1370 }
1371
1372 void
1373 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1374 {
1375 uint32_t prim = i->src(0).get()->reg.data.u32;
1376
1377 code[0] = 0x00000002 | ((prim & 0xff) << 23);
1378 code[1] = 0x7f800000;
1379
1380 emitPredicate(i);
1381
1382 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1383
1384 defId(i->def(0), 2);
1385 srcId(i, src1, 10);
1386 }
1387
1388 void
1389 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1390 {
1391 unsigned int size = typeSizeof(i->dType);
1392 uint32_t offset = i->src(0).get()->reg.data.offset;
1393
1394 code[0] = 0x00000002 | (offset << 23);
1395 code[1] = 0x7ec00000 | (offset >> 9);
1396 code[1] |= (size / 4 - 1) << 18;
1397
1398 if (i->perPatch)
1399 code[1] |= 0x4;
1400 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1401 code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads
1402
1403 emitPredicate(i);
1404
1405 defId(i->def(0), 2);
1406 srcId(i->src(0).getIndirect(0), 10);
1407 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
1408 }
1409
1410 void
1411 CodeEmitterGK110::emitEXPORT(const Instruction *i)
1412 {
1413 unsigned int size = typeSizeof(i->dType);
1414 uint32_t offset = i->src(0).get()->reg.data.offset;
1415
1416 code[0] = 0x00000002 | (offset << 23);
1417 code[1] = 0x7f000000 | (offset >> 9);
1418 code[1] |= (size / 4 - 1) << 18;
1419
1420 if (i->perPatch)
1421 code[1] |= 0x4;
1422
1423 emitPredicate(i);
1424
1425 assert(i->src(1).getFile() == FILE_GPR);
1426
1427 srcId(i->src(0).getIndirect(0), 10);
1428 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
1429 srcId(i->src(1), 2);
1430 }
1431
1432 void
1433 CodeEmitterGK110::emitOUT(const Instruction *i)
1434 {
1435 assert(i->src(0).getFile() == FILE_GPR);
1436
1437 emitForm_21(i, 0x1f0, 0xb70);
1438
1439 if (i->op == OP_EMIT)
1440 code[1] |= 1 << 10;
1441 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1442 code[1] |= 1 << 11;
1443 }
1444
1445 void
1446 CodeEmitterGK110::emitInterpMode(const Instruction *i)
1447 {
1448 code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
1449 code[1] |= (i->ipa & 0xc) << (19 - 2);
1450 }
1451
1452 static void
1453 interpApply(const InterpEntry *entry, uint32_t *code,
1454 bool force_persample_interp, bool flatshade)
1455 {
1456 int ipa = entry->ipa;
1457 int reg = entry->reg;
1458 int loc = entry->loc;
1459
1460 if (flatshade &&
1461 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
1462 ipa = NV50_IR_INTERP_FLAT;
1463 reg = 0xff;
1464 } else if (force_persample_interp &&
1465 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
1466 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
1467 ipa |= NV50_IR_INTERP_CENTROID;
1468 }
1469 code[loc + 1] &= ~(0xf << 19);
1470 code[loc + 1] |= (ipa & 0x3) << 21;
1471 code[loc + 1] |= (ipa & 0xc) << (19 - 2);
1472 code[loc + 0] &= ~(0xff << 23);
1473 code[loc + 0] |= reg << 23;
1474 }
1475
1476 void
1477 CodeEmitterGK110::emitINTERP(const Instruction *i)
1478 {
1479 const uint32_t base = i->getSrc(0)->reg.data.offset;
1480
1481 code[0] = 0x00000002 | (base << 31);
1482 code[1] = 0x74800000 | (base >> 1);
1483
1484 if (i->saturate)
1485 code[1] |= 1 << 18;
1486
1487 if (i->op == OP_PINTERP) {
1488 srcId(i->src(1), 23);
1489 addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
1490 } else {
1491 code[0] |= 0xff << 23;
1492 addInterp(i->ipa, 0xff, interpApply);
1493 }
1494
1495 srcId(i->src(0).getIndirect(0), 10);
1496 emitInterpMode(i);
1497
1498 emitPredicate(i);
1499 defId(i->def(0), 2);
1500
1501 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1502 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
1503 else
1504 code[1] |= 0xff << 10;
1505 }
1506
1507 void
1508 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
1509 {
1510 uint8_t n;
1511
1512 switch (ty) {
1513 case TYPE_U8:
1514 n = 0;
1515 break;
1516 case TYPE_S8:
1517 n = 1;
1518 break;
1519 case TYPE_U16:
1520 n = 2;
1521 break;
1522 case TYPE_S16:
1523 n = 3;
1524 break;
1525 case TYPE_F32:
1526 case TYPE_U32:
1527 case TYPE_S32:
1528 n = 4;
1529 break;
1530 case TYPE_F64:
1531 case TYPE_U64:
1532 case TYPE_S64:
1533 n = 5;
1534 break;
1535 case TYPE_B128:
1536 n = 6;
1537 break;
1538 default:
1539 n = 0;
1540 assert(!"invalid ld/st type");
1541 break;
1542 }
1543 code[pos / 32] |= n << (pos % 32);
1544 }
1545
1546 void
1547 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
1548 {
1549 uint8_t n;
1550
1551 switch (c) {
1552 case CACHE_CA:
1553 // case CACHE_WB:
1554 n = 0;
1555 break;
1556 case CACHE_CG:
1557 n = 1;
1558 break;
1559 case CACHE_CS:
1560 n = 2;
1561 break;
1562 case CACHE_CV:
1563 // case CACHE_WT:
1564 n = 3;
1565 break;
1566 default:
1567 n = 0;
1568 assert(!"invalid caching mode");
1569 break;
1570 }
1571 code[pos / 32] |= n << (pos % 32);
1572 }
1573
1574 void
1575 CodeEmitterGK110::emitSTORE(const Instruction *i)
1576 {
1577 int32_t offset = SDATA(i->src(0)).offset;
1578
1579 switch (i->src(0).getFile()) {
1580 case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
1581 case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
1582 case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
1583 default:
1584 assert(!"invalid memory file");
1585 break;
1586 }
1587
1588 if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
1589 offset &= 0xffffff;
1590
1591 if (code[0] & 0x2) {
1592 emitLoadStoreType(i->dType, 0x33);
1593 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
1594 emitCachingMode(i->cache, 0x2f);
1595 } else {
1596 emitLoadStoreType(i->dType, 0x38);
1597 emitCachingMode(i->cache, 0x3b);
1598 }
1599 code[0] |= offset << 23;
1600 code[1] |= offset >> 9;
1601
1602 emitPredicate(i);
1603
1604 srcId(i->src(1), 2);
1605 srcId(i->src(0).getIndirect(0), 10);
1606 }
1607
1608 void
1609 CodeEmitterGK110::emitLOAD(const Instruction *i)
1610 {
1611 int32_t offset = SDATA(i->src(0)).offset;
1612
1613 switch (i->src(0).getFile()) {
1614 case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
1615 case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
1616 case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
1617 case FILE_MEMORY_CONST:
1618 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1619 emitMOV(i);
1620 return;
1621 }
1622 offset &= 0xffff;
1623 code[0] = 0x00000002;
1624 code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
1625 code[1] |= i->subOp << 15;
1626 break;
1627 default:
1628 assert(!"invalid memory file");
1629 break;
1630 }
1631
1632 if (code[0] & 0x2) {
1633 offset &= 0xffffff;
1634 emitLoadStoreType(i->dType, 0x33);
1635 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
1636 emitCachingMode(i->cache, 0x2f);
1637 } else {
1638 emitLoadStoreType(i->dType, 0x38);
1639 emitCachingMode(i->cache, 0x3b);
1640 }
1641 code[0] |= offset << 23;
1642 code[1] |= offset >> 9;
1643
1644 emitPredicate(i);
1645
1646 defId(i->def(0), 2);
1647 srcId(i->src(0).getIndirect(0), 10);
1648 }
1649
1650 uint8_t
1651 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
1652 {
1653 switch (SDATA(ref).sv.sv) {
1654 case SV_LANEID: return 0x00;
1655 case SV_PHYSID: return 0x03;
1656 case SV_VERTEX_COUNT: return 0x10;
1657 case SV_INVOCATION_ID: return 0x11;
1658 case SV_YDIR: return 0x12;
1659 case SV_THREAD_KILL: return 0x13;
1660 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1661 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1662 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1663 case SV_GRIDID: return 0x2c;
1664 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1665 case SV_LBASE: return 0x34;
1666 case SV_SBASE: return 0x30;
1667 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1668 default:
1669 assert(!"no sreg for system value");
1670 return 0;
1671 }
1672 }
1673
1674 void
1675 CodeEmitterGK110::emitMOV(const Instruction *i)
1676 {
1677 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1678 code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
1679 code[1] = 0x86400000;
1680 emitPredicate(i);
1681 defId(i->def(0), 2);
1682 } else
1683 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1684 code[0] = 0x00000002 | (i->lanes << 14);
1685 code[1] = 0x74000000;
1686 emitPredicate(i);
1687 defId(i->def(0), 2);
1688 setImmediate32(i, 0, Modifier(0));
1689 } else
1690 if (i->src(0).getFile() == FILE_PREDICATE) {
1691 code[0] = 0x00000002;
1692 code[1] = 0x84401c07;
1693 emitPredicate(i);
1694 defId(i->def(0), 2);
1695 srcId(i->src(0), 14);
1696 } else {
1697 emitForm_C(i, 0x24c, 2);
1698 code[1] |= i->lanes << 10;
1699 }
1700 }
1701
1702 void CodeEmitterGK110::emitMEMBAR(const Instruction *i)
1703 {
1704 code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;
1705 code[1] = 0x7cc00000;
1706
1707 emitPredicate(i);
1708 }
1709
1710 bool
1711 CodeEmitterGK110::emitInstruction(Instruction *insn)
1712 {
1713 const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
1714
1715 if (insn->encSize != 8) {
1716 ERROR("skipping unencodable instruction: ");
1717 insn->print();
1718 return false;
1719 } else
1720 if (codeSize + size > codeSizeLimit) {
1721 ERROR("code emitter output buffer too small\n");
1722 return false;
1723 }
1724
1725 if (writeIssueDelays) {
1726 int id = (codeSize & 0x3f) / 8 - 1;
1727 if (id < 0) {
1728 id += 1;
1729 code[0] = 0x00000000; // cf issue delay "instruction"
1730 code[1] = 0x08000000;
1731 code += 2;
1732 codeSize += 8;
1733 }
1734 uint32_t *data = code - (id * 2 + 2);
1735
1736 switch (id) {
1737 case 0: data[0] |= insn->sched << 2; break;
1738 case 1: data[0] |= insn->sched << 10; break;
1739 case 2: data[0] |= insn->sched << 18; break;
1740 case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
1741 case 4: data[1] |= insn->sched << 2; break;
1742 case 5: data[1] |= insn->sched << 10; break;
1743 case 6: data[1] |= insn->sched << 18; break;
1744 default:
1745 assert(0);
1746 break;
1747 }
1748 }
1749
1750 // assert that instructions with multiple defs don't corrupt registers
1751 for (int d = 0; insn->defExists(d); ++d)
1752 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
1753
1754 switch (insn->op) {
1755 case OP_MOV:
1756 case OP_RDSV:
1757 emitMOV(insn);
1758 break;
1759 case OP_NOP:
1760 break;
1761 case OP_LOAD:
1762 emitLOAD(insn);
1763 break;
1764 case OP_STORE:
1765 emitSTORE(insn);
1766 break;
1767 case OP_LINTERP:
1768 case OP_PINTERP:
1769 emitINTERP(insn);
1770 break;
1771 case OP_VFETCH:
1772 emitVFETCH(insn);
1773 break;
1774 case OP_EXPORT:
1775 emitEXPORT(insn);
1776 break;
1777 case OP_AFETCH:
1778 emitAFETCH(insn);
1779 break;
1780 case OP_PFETCH:
1781 emitPFETCH(insn);
1782 break;
1783 case OP_EMIT:
1784 case OP_RESTART:
1785 emitOUT(insn);
1786 break;
1787 case OP_ADD:
1788 case OP_SUB:
1789 if (insn->dType == TYPE_F64)
1790 emitDADD(insn);
1791 else if (isFloatType(insn->dType))
1792 emitFADD(insn);
1793 else
1794 emitUADD(insn);
1795 break;
1796 case OP_MUL:
1797 if (insn->dType == TYPE_F64)
1798 emitDMUL(insn);
1799 else if (isFloatType(insn->dType))
1800 emitFMUL(insn);
1801 else
1802 emitIMUL(insn);
1803 break;
1804 case OP_MAD:
1805 case OP_FMA:
1806 if (insn->dType == TYPE_F64)
1807 emitDMAD(insn);
1808 else if (isFloatType(insn->dType))
1809 emitFMAD(insn);
1810 else
1811 emitIMAD(insn);
1812 break;
1813 case OP_SAD:
1814 emitISAD(insn);
1815 break;
1816 case OP_NOT:
1817 emitNOT(insn);
1818 break;
1819 case OP_AND:
1820 emitLogicOp(insn, 0);
1821 break;
1822 case OP_OR:
1823 emitLogicOp(insn, 1);
1824 break;
1825 case OP_XOR:
1826 emitLogicOp(insn, 2);
1827 break;
1828 case OP_SHL:
1829 case OP_SHR:
1830 emitShift(insn);
1831 break;
1832 case OP_SET:
1833 case OP_SET_AND:
1834 case OP_SET_OR:
1835 case OP_SET_XOR:
1836 emitSET(insn->asCmp());
1837 break;
1838 case OP_SELP:
1839 emitSELP(insn);
1840 break;
1841 case OP_SLCT:
1842 emitSLCT(insn->asCmp());
1843 break;
1844 case OP_MIN:
1845 case OP_MAX:
1846 emitMINMAX(insn);
1847 break;
1848 case OP_ABS:
1849 case OP_NEG:
1850 case OP_CEIL:
1851 case OP_FLOOR:
1852 case OP_TRUNC:
1853 case OP_CVT:
1854 case OP_SAT:
1855 emitCVT(insn);
1856 break;
1857 case OP_RSQ:
1858 emitSFnOp(insn, 5 + 2 * insn->subOp);
1859 break;
1860 case OP_RCP:
1861 emitSFnOp(insn, 4 + 2 * insn->subOp);
1862 break;
1863 case OP_LG2:
1864 emitSFnOp(insn, 3);
1865 break;
1866 case OP_EX2:
1867 emitSFnOp(insn, 2);
1868 break;
1869 case OP_SIN:
1870 emitSFnOp(insn, 1);
1871 break;
1872 case OP_COS:
1873 emitSFnOp(insn, 0);
1874 break;
1875 case OP_PRESIN:
1876 case OP_PREEX2:
1877 emitPreOp(insn);
1878 break;
1879 case OP_TEX:
1880 case OP_TXB:
1881 case OP_TXL:
1882 case OP_TXD:
1883 case OP_TXF:
1884 case OP_TXG:
1885 case OP_TXLQ:
1886 emitTEX(insn->asTex());
1887 break;
1888 case OP_TXQ:
1889 emitTXQ(insn->asTex());
1890 break;
1891 case OP_TEXBAR:
1892 emitTEXBAR(insn);
1893 break;
1894 case OP_PIXLD:
1895 emitPIXLD(insn);
1896 break;
1897 case OP_BRA:
1898 case OP_CALL:
1899 case OP_PRERET:
1900 case OP_RET:
1901 case OP_DISCARD:
1902 case OP_EXIT:
1903 case OP_PRECONT:
1904 case OP_CONT:
1905 case OP_PREBREAK:
1906 case OP_BREAK:
1907 case OP_JOINAT:
1908 case OP_BRKPT:
1909 case OP_QUADON:
1910 case OP_QUADPOP:
1911 emitFlow(insn);
1912 break;
1913 case OP_QUADOP:
1914 emitQUADOP(insn, insn->subOp, insn->lanes);
1915 break;
1916 case OP_DFDX:
1917 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
1918 break;
1919 case OP_DFDY:
1920 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
1921 break;
1922 case OP_POPCNT:
1923 emitPOPC(insn);
1924 break;
1925 case OP_INSBF:
1926 emitINSBF(insn);
1927 break;
1928 case OP_EXTBF:
1929 emitEXTBF(insn);
1930 break;
1931 case OP_BFIND:
1932 emitBFIND(insn);
1933 break;
1934 case OP_JOIN:
1935 emitNOP(insn);
1936 insn->join = 1;
1937 break;
1938 case OP_BAR:
1939 emitBAR(insn);
1940 break;
1941 case OP_MEMBAR:
1942 emitMEMBAR(insn);
1943 break;
1944 case OP_PHI:
1945 case OP_UNION:
1946 case OP_CONSTRAINT:
1947 ERROR("operation should have been eliminated");
1948 return false;
1949 case OP_EXP:
1950 case OP_LOG:
1951 case OP_SQRT:
1952 case OP_POW:
1953 ERROR("operation should have been lowered\n");
1954 return false;
1955 default:
1956 ERROR("unknown op: %u\n", insn->op);
1957 return false;
1958 }
1959
1960 if (insn->join)
1961 code[0] |= 1 << 22;
1962
1963 code += 2;
1964 codeSize += 8;
1965 return true;
1966 }
1967
1968 uint32_t
1969 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
1970 {
1971 // No more short instruction encodings.
1972 return 8;
1973 }
1974
1975 void
1976 CodeEmitterGK110::prepareEmission(Function *func)
1977 {
1978 const Target *targ = func->getProgram()->getTarget();
1979
1980 CodeEmitter::prepareEmission(func);
1981
1982 if (targ->hasSWSched)
1983 calculateSchedDataNVC0(targ, func);
1984 }
1985
1986 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
1987 : CodeEmitter(target),
1988 targNVC0(target),
1989 writeIssueDelays(target->hasSWSched)
1990 {
1991 code = NULL;
1992 codeSize = codeSizeLimit = 0;
1993 relocInfo = NULL;
1994 }
1995
1996 CodeEmitter *
1997 TargetNVC0::createCodeEmitterGK110(Program::Type type)
1998 {
1999 CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
2000 emit->setProgramType(type);
2001 return emit;
2002 }
2003
2004 } // namespace nv50_ir