radeon: enable Hyper-Z on r600g and radeonsi by default
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gk110.cpp
1 /*
2 * Copyright 2012 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir_target_nvc0.h"
24
25 // CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.
26
27 namespace nv50_ir {
28
29 class CodeEmitterGK110 : public CodeEmitter
30 {
31 public:
32 CodeEmitterGK110(const TargetNVC0 *);
33
34 virtual bool emitInstruction(Instruction *);
35 virtual uint32_t getMinEncodingSize(const Instruction *) const;
36 virtual void prepareEmission(Function *);
37
38 inline void setProgramType(Program::Type pType) { progType = pType; }
39
40 private:
41 const TargetNVC0 *targNVC0;
42
43 Program::Type progType;
44
45 const bool writeIssueDelays;
46
47 private:
48 void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
49 void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
50 void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier);
51
52 void emitPredicate(const Instruction *);
53
54 void setCAddress14(const ValueRef&);
55 void setShortImmediate(const Instruction *, const int s);
56 void setImmediate32(const Instruction *, const int s, Modifier);
57
58 void modNegAbsF32_3b(const Instruction *, const int s);
59
60 void emitCondCode(CondCode cc, int pos, uint8_t mask);
61 void emitInterpMode(const Instruction *);
62 void emitLoadStoreType(DataType ty, const int pos);
63 void emitCachingMode(CacheMode c, const int pos);
64
65 inline uint8_t getSRegEncoding(const ValueRef&);
66
67 void emitRoundMode(RoundMode, const int pos, const int rintPos);
68 void emitRoundModeF(RoundMode, const int pos);
69 void emitRoundModeI(RoundMode, const int pos);
70
71 void emitNegAbs12(const Instruction *);
72
73 void emitNOP(const Instruction *);
74
75 void emitLOAD(const Instruction *);
76 void emitSTORE(const Instruction *);
77 void emitMOV(const Instruction *);
78
79 void emitINTERP(const Instruction *);
80 void emitPFETCH(const Instruction *);
81 void emitVFETCH(const Instruction *);
82 void emitEXPORT(const Instruction *);
83 void emitOUT(const Instruction *);
84
85 void emitUADD(const Instruction *);
86 void emitFADD(const Instruction *);
87 void emitIMUL(const Instruction *);
88 void emitFMUL(const Instruction *);
89 void emitIMAD(const Instruction *);
90 void emitISAD(const Instruction *);
91 void emitFMAD(const Instruction *);
92
93 void emitNOT(const Instruction *);
94 void emitLogicOp(const Instruction *, uint8_t subOp);
95 void emitPOPC(const Instruction *);
96 void emitINSBF(const Instruction *);
97 void emitEXTBF(const Instruction *);
98 void emitBFIND(const Instruction *);
99 void emitShift(const Instruction *);
100
101 void emitSFnOp(const Instruction *, uint8_t subOp);
102
103 void emitCVT(const Instruction *);
104 void emitMINMAX(const Instruction *);
105 void emitPreOp(const Instruction *);
106
107 void emitSET(const CmpInstruction *);
108 void emitSLCT(const CmpInstruction *);
109 void emitSELP(const Instruction *);
110
111 void emitTEXBAR(const Instruction *);
112 void emitTEX(const TexInstruction *);
113 void emitTEXCSAA(const TexInstruction *);
114 void emitTXQ(const TexInstruction *);
115
116 void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
117
118 void emitPIXLD(const Instruction *);
119
120 void emitFlow(const Instruction *);
121
122 inline void defId(const ValueDef&, const int pos);
123 inline void srcId(const ValueRef&, const int pos);
124 inline void srcId(const ValueRef *, const int pos);
125 inline void srcId(const Instruction *, int s, const int pos);
126
127 inline void srcAddr32(const ValueRef&, const int pos); // address / 4
128
129 inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);
130 };
131
132 #define GK110_GPR_ZERO 255
133
134 #define NEG_(b, s) \
135 if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
136 #define ABS_(b, s) \
137 if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
138
139 #define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \
140 code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
141
142 #define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
143 #define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
144
145 #define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
146
147 #define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)
148
149 #define SDATA(a) ((a).rep()->reg.data)
150 #define DDATA(a) ((a).rep()->reg.data)
151
152 void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)
153 {
154 code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);
155 }
156
157 void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)
158 {
159 code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);
160 }
161
162 void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)
163 {
164 int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;
165 code[pos / 32] |= r << (pos % 32);
166 }
167
168 void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)
169 {
170 code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);
171 }
172
173 void CodeEmitterGK110::defId(const ValueDef& def, const int pos)
174 {
175 code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);
176 }
177
178 bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)
179 {
180 const ImmediateValue *imm = ref.get()->asImm();
181
182 return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
183 }
184
185 void
186 CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)
187 {
188 bool rint = false;
189 uint8_t n;
190
191 switch (rnd) {
192 case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break;
193 case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break;
194 case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break;
195 default:
196 rint = rnd == ROUND_NI;
197 n = 0;
198 assert(rnd == ROUND_N || rnd == ROUND_NI);
199 break;
200 }
201 code[pos / 32] |= n << (pos % 32);
202 if (rint && rintPos >= 0)
203 code[rintPos / 32] |= 1 << (rintPos % 32);
204 }
205
206 void
207 CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)
208 {
209 uint8_t n;
210
211 switch (rnd) {
212 case ROUND_M: n = 1; break;
213 case ROUND_P: n = 2; break;
214 case ROUND_Z: n = 3; break;
215 default:
216 n = 0;
217 assert(rnd == ROUND_N);
218 break;
219 }
220 code[pos / 32] |= n << (pos % 32);
221 }
222
223 void
224 CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)
225 {
226 uint8_t n;
227
228 switch (rnd) {
229 case ROUND_MI: n = 1; break;
230 case ROUND_PI: n = 2; break;
231 case ROUND_ZI: n = 3; break;
232 default:
233 n = 0;
234 assert(rnd == ROUND_NI);
235 break;
236 }
237 code[pos / 32] |= n << (pos % 32);
238 }
239
240 void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)
241 {
242 uint8_t n;
243
244 switch (cc) {
245 case CC_FL: n = 0x00; break;
246 case CC_LT: n = 0x01; break;
247 case CC_EQ: n = 0x02; break;
248 case CC_LE: n = 0x03; break;
249 case CC_GT: n = 0x04; break;
250 case CC_NE: n = 0x05; break;
251 case CC_GE: n = 0x06; break;
252 case CC_LTU: n = 0x09; break;
253 case CC_EQU: n = 0x0a; break;
254 case CC_LEU: n = 0x0b; break;
255 case CC_GTU: n = 0x0c; break;
256 case CC_NEU: n = 0x0d; break;
257 case CC_GEU: n = 0x0e; break;
258 case CC_TR: n = 0x0f; break;
259 case CC_NO: n = 0x10; break;
260 case CC_NC: n = 0x11; break;
261 case CC_NS: n = 0x12; break;
262 case CC_NA: n = 0x13; break;
263 case CC_A: n = 0x14; break;
264 case CC_S: n = 0x15; break;
265 case CC_C: n = 0x16; break;
266 case CC_O: n = 0x17; break;
267 default:
268 n = 0;
269 assert(!"invalid condition code");
270 break;
271 }
272 code[pos / 32] |= (n & mask) << (pos % 32);
273 }
274
275 void
276 CodeEmitterGK110::emitPredicate(const Instruction *i)
277 {
278 if (i->predSrc >= 0) {
279 srcId(i->src(i->predSrc), 18);
280 if (i->cc == CC_NOT_P)
281 code[0] |= 8 << 18; // negate
282 assert(i->getPredicate()->reg.file == FILE_PREDICATE);
283 } else {
284 code[0] |= 7 << 18;
285 }
286 }
287
288 void
289 CodeEmitterGK110::setCAddress14(const ValueRef& src)
290 {
291 const Storage& res = src.get()->asSym()->reg;
292 const int32_t addr = res.data.offset / 4;
293
294 code[0] |= (addr & 0x01ff) << 23;
295 code[1] |= (addr & 0x3e00) >> 9;
296 code[1] |= res.fileIndex << 5;
297 }
298
299 void
300 CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)
301 {
302 const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
303 const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;
304
305 if (i->sType == TYPE_F32) {
306 assert(!(u32 & 0x00000fff));
307 code[0] |= ((u32 & 0x001ff000) >> 12) << 23;
308 code[1] |= ((u32 & 0x7fe00000) >> 21);
309 code[1] |= ((u32 & 0x80000000) >> 4);
310 } else
311 if (i->sType == TYPE_F64) {
312 assert(!(u64 & 0x00000fffffffffffULL));
313 code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;
314 code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);
315 code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);
316 } else {
317 assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
318 code[0] |= (u32 & 0x001ff) << 23;
319 code[1] |= (u32 & 0x7fe00) >> 9;
320 code[1] |= (u32 & 0x80000) << 8;
321 }
322 }
323
324 void
325 CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,
326 Modifier mod)
327 {
328 uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;
329
330 if (mod) {
331 ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);
332 mod.applyTo(imm);
333 u32 = imm.reg.data.u32;
334 }
335
336 code[0] |= u32 << 23;
337 code[1] |= u32 >> 9;
338 }
339
340 void
341 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
342 Modifier mod)
343 {
344 code[0] = ctg;
345 code[1] = opc << 20;
346
347 emitPredicate(i);
348
349 defId(i->def(0), 2);
350
351 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
352 switch (i->src(s).getFile()) {
353 case FILE_GPR:
354 srcId(i->src(s), s ? 42 : 10);
355 break;
356 case FILE_IMMEDIATE:
357 setImmediate32(i, s, mod);
358 break;
359 default:
360 break;
361 }
362 }
363 }
364
365
366 void
367 CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)
368 {
369 code[0] = ctg;
370 code[1] = opc << 20;
371
372 emitPredicate(i);
373
374 defId(i->def(0), 2);
375
376 switch (i->src(0).getFile()) {
377 case FILE_MEMORY_CONST:
378 code[1] |= 0x4 << 28;
379 setCAddress14(i->src(0));
380 break;
381 case FILE_GPR:
382 code[1] |= 0xc << 28;
383 srcId(i->src(0), 23);
384 break;
385 default:
386 assert(0);
387 break;
388 }
389 }
390
391 // 0x2 for GPR, c[] and 0x1 for short immediate
392 void
393 CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,
394 uint32_t opc1)
395 {
396 const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;
397
398 int s1 = 23;
399 if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)
400 s1 = 42;
401
402 if (imm) {
403 code[0] = 0x1;
404 code[1] = opc1 << 20;
405 } else {
406 code[0] = 0x2;
407 code[1] = (0xc << 28) | (opc2 << 20);
408 }
409
410 emitPredicate(i);
411
412 defId(i->def(0), 2);
413
414 for (int s = 0; s < 3 && i->srcExists(s); ++s) {
415 switch (i->src(s).getFile()) {
416 case FILE_MEMORY_CONST:
417 code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);
418 setCAddress14(i->src(s));
419 break;
420 case FILE_IMMEDIATE:
421 setShortImmediate(i, s);
422 break;
423 case FILE_GPR:
424 srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);
425 break;
426 default:
427 // ignore here, can be predicate or flags, but must not be address
428 break;
429 }
430 }
431 // 0x0 = invalid
432 // 0xc = rrr
433 // 0x8 = rrc
434 // 0x4 = rcr
435 assert(imm || (code[1] & (0xc << 28)));
436 }
437
438 inline void
439 CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)
440 {
441 if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);
442 if (i->src(s).mod.neg()) code[1] ^= (1 << 27);
443 }
444
445 void
446 CodeEmitterGK110::emitNOP(const Instruction *i)
447 {
448 code[0] = 0x00003c02;
449 code[1] = 0x85800000;
450
451 if (i)
452 emitPredicate(i);
453 else
454 code[0] = 0x001c3c02;
455 }
456
457 void
458 CodeEmitterGK110::emitFMAD(const Instruction *i)
459 {
460 assert(!isLIMM(i->src(1), TYPE_F32));
461
462 emitForm_21(i, 0x0c0, 0x940);
463
464 NEG_(34, 2);
465 SAT_(35);
466 RND_(36, F);
467 FTZ_(38);
468 DNZ_(39);
469
470 bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
471
472 if (code[0] & 0x1) {
473 if (neg1)
474 code[1] ^= 1 << 27;
475 } else
476 if (neg1) {
477 code[1] |= 1 << 19;
478 }
479 }
480
481 void
482 CodeEmitterGK110::emitFMUL(const Instruction *i)
483 {
484 bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
485
486 assert(i->postFactor >= -3 && i->postFactor <= 3);
487
488 if (isLIMM(i->src(1), TYPE_F32)) {
489 emitForm_L(i, 0x200, 0x2, Modifier(0));
490
491 FTZ_(38);
492 DNZ_(39);
493 SAT_(3a);
494 if (neg)
495 code[1] ^= 1 << 22;
496
497 assert(i->postFactor == 0);
498 } else {
499 emitForm_21(i, 0x234, 0xc34);
500 code[1] |= ((i->postFactor > 0) ?
501 (7 - i->postFactor) : (0 - i->postFactor)) << 12;
502
503 RND_(2a, F);
504 FTZ_(2f);
505 DNZ_(30);
506 SAT_(35);
507
508 if (code[0] & 0x1) {
509 if (neg)
510 code[1] ^= 1 << 27;
511 } else
512 if (neg) {
513 code[1] |= 1 << 19;
514 }
515 }
516 }
517
518 void
519 CodeEmitterGK110::emitIMUL(const Instruction *i)
520 {
521 assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
522 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
523
524 if (isLIMM(i->src(1), TYPE_S32)) {
525 emitForm_L(i, 0x280, 2, Modifier(0));
526
527 assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
528
529 if (i->sType == TYPE_S32)
530 code[1] |= 3 << 25;
531 } else {
532 emitForm_21(i, 0x21c, 0xc1c);
533
534 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
535 code[1] |= 1 << 10;
536 if (i->sType == TYPE_S32)
537 code[1] |= 3 << 11;
538 }
539 }
540
541 void
542 CodeEmitterGK110::emitFADD(const Instruction *i)
543 {
544 if (isLIMM(i->src(1), TYPE_F32)) {
545 assert(i->rnd == ROUND_N);
546 assert(!i->saturate);
547
548 Modifier mod = i->src(1).mod ^
549 Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);
550
551 emitForm_L(i, 0x400, 0, mod);
552
553 FTZ_(3a);
554 NEG_(3b, 0);
555 ABS_(39, 0);
556 } else {
557 emitForm_21(i, 0x22c, 0xc2c);
558
559 FTZ_(2f);
560 RND_(2a, F);
561 ABS_(31, 0);
562 NEG_(33, 0);
563 SAT_(35);
564
565 if (code[0] & 0x1) {
566 modNegAbsF32_3b(i, 1);
567 if (i->op == OP_SUB) code[1] ^= 1 << 27;
568 } else {
569 ABS_(34, 1);
570 NEG_(30, 1);
571 if (i->op == OP_SUB) code[1] ^= 1 << 16;
572 }
573 }
574 }
575
576 void
577 CodeEmitterGK110::emitUADD(const Instruction *i)
578 {
579 uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
580
581 if (i->op == OP_SUB)
582 addOp ^= 1;
583
584 assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
585
586 if (isLIMM(i->src(1), TYPE_S32)) {
587 emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));
588
589 if (addOp & 2)
590 code[1] |= 1 << 27;
591
592 assert(!i->defExists(1));
593 assert(i->flagsSrc < 0);
594
595 SAT_(39);
596 } else {
597 emitForm_21(i, 0x208, 0xc08);
598
599 assert(addOp != 3); // would be add-plus-one
600
601 code[1] |= addOp << 19;
602
603 if (i->defExists(1))
604 code[1] |= 1 << 18; // write carry
605 if (i->flagsSrc >= 0)
606 code[1] |= 1 << 14; // add carry
607
608 SAT_(35);
609 }
610 }
611
612 // TODO: shl-add
613 void
614 CodeEmitterGK110::emitIMAD(const Instruction *i)
615 {
616 uint8_t addOp =
617 (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
618
619 emitForm_21(i, 0x100, 0xa00);
620
621 assert(addOp != 3);
622 code[1] |= addOp << 26;
623
624 if (i->sType == TYPE_S32)
625 code[1] |= (1 << 19) | (1 << 24);
626
627 if (code[0] & 0x1) {
628 assert(!i->subOp);
629 SAT_(39);
630 } else {
631 if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
632 code[1] |= 1 << 25;
633 SAT_(35);
634 }
635 }
636
637 void
638 CodeEmitterGK110::emitISAD(const Instruction *i)
639 {
640 assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
641
642 emitForm_21(i, 0x1f4, 0xb74);
643
644 if (i->dType == TYPE_S32)
645 code[1] |= 1 << 19;
646 }
647
648 void
649 CodeEmitterGK110::emitNOT(const Instruction *i)
650 {
651 code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
652 code[1] = 0x22003800;
653
654 emitPredicate(i);
655
656 defId(i->def(0), 2);
657
658 switch (i->src(0).getFile()) {
659 case FILE_GPR:
660 code[1] |= 0xc << 28;
661 srcId(i->src(0), 23);
662 break;
663 case FILE_MEMORY_CONST:
664 code[1] |= 0x4 << 28;
665 setCAddress14(i->src(1));
666 break;
667 default:
668 assert(0);
669 break;
670 }
671 }
672
673 void
674 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
675 {
676 if (isLIMM(i->src(1), TYPE_S32)) {
677 emitForm_L(i, 0x200, 0, i->src(1).mod);
678 code[1] |= subOp << 24;
679 NOT_(3a, 0);
680 } else {
681 emitForm_21(i, 0x220, 0xc20);
682 code[1] |= subOp << 12;
683 NOT_(2a, 0);
684 NOT_(2b, 1);
685 }
686 }
687
688 void
689 CodeEmitterGK110::emitPOPC(const Instruction *i)
690 {
691 assert(!isLIMM(i->src(1), TYPE_S32, true));
692
693 emitForm_21(i, 0x204, 0xc04);
694
695 NOT_(2a, 0);
696 if (!(code[0] & 0x1))
697 NOT_(2b, 1);
698 }
699
700 void
701 CodeEmitterGK110::emitINSBF(const Instruction *i)
702 {
703 emitForm_21(i, 0x1f8, 0xb78);
704 }
705
706 void
707 CodeEmitterGK110::emitEXTBF(const Instruction *i)
708 {
709 emitForm_21(i, 0x600, 0xc00);
710
711 if (i->dType == TYPE_S32)
712 code[1] |= 0x80000;
713 if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
714 code[1] |= 0x800;
715 }
716
717 void
718 CodeEmitterGK110::emitBFIND(const Instruction *i)
719 {
720 emitForm_C(i, 0x218, 0x2);
721
722 if (i->dType == TYPE_S32)
723 code[1] |= 0x80000;
724 if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
725 code[1] |= 0x800;
726 if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
727 code[1] |= 0x1000;
728 }
729
730 void
731 CodeEmitterGK110::emitShift(const Instruction *i)
732 {
733 if (i->op == OP_SHR) {
734 emitForm_21(i, 0x214, 0xc14);
735 if (isSignedType(i->dType))
736 code[1] |= 1 << 19;
737 } else {
738 emitForm_21(i, 0x224, 0xc24);
739 }
740
741 if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
742 code[1] |= 1 << 10;
743 }
744
745 void
746 CodeEmitterGK110::emitPreOp(const Instruction *i)
747 {
748 emitForm_C(i, 0x248, 0x2);
749
750 if (i->op == OP_PREEX2)
751 code[1] |= 1 << 10;
752
753 NEG_(30, 0);
754 ABS_(34, 0);
755 }
756
757 void
758 CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)
759 {
760 code[0] = 0x00000002 | (subOp << 23);
761 code[1] = 0x84000000;
762
763 emitPredicate(i);
764
765 defId(i->def(0), 2);
766 srcId(i->src(0), 10);
767
768 NEG_(33, 0);
769 ABS_(31, 0);
770 SAT_(35);
771 }
772
773 void
774 CodeEmitterGK110::emitMINMAX(const Instruction *i)
775 {
776 uint32_t op2, op1;
777
778 switch (i->dType) {
779 case TYPE_U32:
780 case TYPE_S32:
781 op2 = 0x210;
782 op1 = 0xc10;
783 break;
784 case TYPE_F32:
785 op2 = 0x230;
786 op1 = 0xc30;
787 break;
788 case TYPE_F64:
789 op2 = 0x228;
790 op1 = 0xc28;
791 break;
792 default:
793 assert(0);
794 op2 = 0;
795 op1 = 0;
796 break;
797 }
798 emitForm_21(i, op2, op1);
799
800 if (i->dType == TYPE_S32)
801 code[1] |= 1 << 19;
802 code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt
803
804 FTZ_(2f);
805 ABS_(31, 0);
806 NEG_(33, 0);
807 if (code[0] & 0x1) {
808 modNegAbsF32_3b(i, 1);
809 } else {
810 ABS_(34, 1);
811 NEG_(30, 1);
812 }
813 }
814
815 void
816 CodeEmitterGK110::emitCVT(const Instruction *i)
817 {
818 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
819 const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);
820 const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);
821
822 bool sat = i->saturate;
823 bool abs = i->src(0).mod.abs();
824 bool neg = i->src(0).mod.neg();
825
826 RoundMode rnd = i->rnd;
827
828 switch (i->op) {
829 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
830 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
831 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
832 case OP_SAT: sat = true; break;
833 case OP_NEG: neg = !neg; break;
834 case OP_ABS: abs = true; neg = false; break;
835 default:
836 break;
837 }
838
839 DataType dType;
840
841 if (i->op == OP_NEG && i->dType == TYPE_U32)
842 dType = TYPE_S32;
843 else
844 dType = i->dType;
845
846
847 uint32_t op;
848
849 if (f2f) op = 0x254;
850 else if (f2i) op = 0x258;
851 else if (i2f) op = 0x25c;
852 else op = 0x260;
853
854 emitForm_C(i, op, 0x2);
855
856 FTZ_(2f);
857 if (neg) code[1] |= 1 << 16;
858 if (abs) code[1] |= 1 << 20;
859 if (sat) code[1] |= 1 << 21;
860
861 emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);
862
863 code[0] |= typeSizeofLog2(dType) << 10;
864 code[0] |= typeSizeofLog2(i->sType) << 12;
865
866 if (isSignedIntType(dType))
867 code[0] |= 0x4000;
868 if (isSignedIntType(i->sType))
869 code[0] |= 0x8000;
870 }
871
872 void
873 CodeEmitterGK110::emitSET(const CmpInstruction *i)
874 {
875 uint16_t op1, op2;
876
877 if (i->def(0).getFile() == FILE_PREDICATE) {
878 switch (i->sType) {
879 case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;
880 case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;
881 default:
882 op2 = 0x1b0;
883 op1 = 0xb30;
884 break;
885 }
886 emitForm_21(i, op2, op1);
887
888 NEG_(2e, 0);
889 ABS_(9, 0);
890 if (!(code[0] & 0x1)) {
891 NEG_(8, 1);
892 ABS_(2f, 1);
893 } else {
894 modNegAbsF32_3b(i, 1);
895 }
896 FTZ_(32);
897
898 // normal DST field is negated predicate result
899 code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
900 if (i->defExists(1))
901 defId(i->def(1), 2);
902 else
903 code[0] |= 0x1c;
904 } else {
905 switch (i->sType) {
906 case TYPE_F32: op2 = 0x000; op1 = 0x820; break;
907 case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
908 default:
909 op2 = 0x1a8;
910 op1 = 0xb28;
911 break;
912 }
913 emitForm_21(i, op2, op1);
914
915 NEG_(2e, 0);
916 ABS_(39, 0);
917 if (!(code[0] & 0x1)) {
918 NEG_(38, 1);
919 ABS_(2f, 1);
920 } else {
921 modNegAbsF32_3b(i, 1);
922 }
923 FTZ_(3a);
924
925 if (i->dType == TYPE_F32)
926 code[1] |= 1 << 23;
927 }
928 if (i->sType == TYPE_S32)
929 code[1] |= 1 << 19;
930
931 if (i->op != OP_SET) {
932 switch (i->op) {
933 case OP_SET_AND: code[1] |= 0x0 << 16; break;
934 case OP_SET_OR: code[1] |= 0x1 << 16; break;
935 case OP_SET_XOR: code[1] |= 0x2 << 16; break;
936 default:
937 assert(0);
938 break;
939 }
940 srcId(i->src(2), 0x2a);
941 } else {
942 code[1] |= 0x7 << 10;
943 }
944 emitCondCode(i->setCond,
945 isFloatType(i->sType) ? 0x33 : 0x34,
946 isFloatType(i->sType) ? 0xf : 0x7);
947 }
948
949 void
950 CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
951 {
952 CondCode cc = i->setCond;
953 if (i->src(2).mod.neg())
954 cc = reverseCondCode(cc);
955
956 if (i->dType == TYPE_F32) {
957 emitForm_21(i, 0x1d0, 0xb50);
958 FTZ_(32);
959 emitCondCode(cc, 0x33, 0xf);
960 } else {
961 emitForm_21(i, 0x1a0, 0xb20);
962 emitCondCode(cc, 0x34, 0x7);
963 }
964 }
965
966 void CodeEmitterGK110::emitSELP(const Instruction *i)
967 {
968 emitForm_21(i, 0x250, 0x050);
969
970 if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT)))
971 code[1] |= 1 << 13;
972 }
973
974 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
975 {
976 code[0] = 0x0000003e | (i->subOp << 23);
977 code[1] = 0x77000000;
978
979 emitPredicate(i);
980 }
981
982 void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)
983 {
984 code[0] = 0x00000002;
985 code[1] = 0x76c00000;
986
987 code[1] |= i->tex.r << 9;
988 // code[1] |= i->tex.s << (9 + 8);
989
990 if (i->tex.liveOnly)
991 code[0] |= 0x80000000;
992
993 defId(i->def(0), 2);
994 srcId(i->src(0), 10);
995 }
996
997 static inline bool
998 isNextIndependentTex(const TexInstruction *i)
999 {
1000 if (!i->next || !isTextureOp(i->next->op))
1001 return false;
1002 if (i->getDef(0)->interfers(i->next->getSrc(0)))
1003 return false;
1004 return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1005 }
1006
1007 void
1008 CodeEmitterGK110::emitTEX(const TexInstruction *i)
1009 {
1010 const bool ind = i->tex.rIndirectSrc >= 0;
1011
1012 if (ind) {
1013 code[0] = 0x00000002;
1014 switch (i->op) {
1015 case OP_TXD:
1016 code[1] = 0x7e000000;
1017 break;
1018 case OP_TXLQ:
1019 code[1] = 0x7e800000;
1020 break;
1021 case OP_TXF:
1022 code[1] = 0x78000000;
1023 break;
1024 case OP_TXG:
1025 code[1] = 0x7dc00000;
1026 break;
1027 default:
1028 code[1] = 0x7d800000;
1029 break;
1030 }
1031 } else {
1032 switch (i->op) {
1033 case OP_TXD:
1034 code[0] = 0x00000002;
1035 code[1] = 0x76000000;
1036 code[1] |= i->tex.r << 9;
1037 break;
1038 case OP_TXLQ:
1039 code[0] = 0x00000002;
1040 code[1] = 0x76800000;
1041 code[1] |= i->tex.r << 9;
1042 break;
1043 case OP_TXF:
1044 code[0] = 0x00000002;
1045 code[1] = 0x70000000;
1046 code[1] |= i->tex.r << 13;
1047 break;
1048 case OP_TXG:
1049 code[0] = 0x00000001;
1050 code[1] = 0x70000000;
1051 code[1] |= i->tex.r << 15;
1052 break;
1053 default:
1054 code[0] = 0x00000001;
1055 code[1] = 0x60000000;
1056 code[1] |= i->tex.r << 15;
1057 break;
1058 }
1059 }
1060
1061 code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode
1062
1063 if (i->tex.liveOnly)
1064 code[0] |= 0x80000000;
1065
1066 switch (i->op) {
1067 case OP_TEX: break;
1068 case OP_TXB: code[1] |= 0x2000; break;
1069 case OP_TXL: code[1] |= 0x3000; break;
1070 case OP_TXF: break;
1071 case OP_TXG: break;
1072 case OP_TXD: break;
1073 case OP_TXLQ: break;
1074 default:
1075 assert(!"invalid texture op");
1076 break;
1077 }
1078
1079 if (i->op == OP_TXF) {
1080 if (!i->tex.levelZero)
1081 code[1] |= 0x1000;
1082 } else
1083 if (i->tex.levelZero) {
1084 code[1] |= 0x1000;
1085 }
1086
1087 if (i->op != OP_TXD && i->tex.derivAll)
1088 code[1] |= 0x200;
1089
1090 emitPredicate(i);
1091
1092 code[1] |= i->tex.mask << 2;
1093
1094 const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1095
1096 defId(i->def(0), 2);
1097 srcId(i->src(0), 10);
1098 srcId(i, src1, 23);
1099
1100 if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;
1101
1102 // texture target:
1103 code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;
1104 if (i->tex.target.isArray())
1105 code[1] |= 0x40;
1106 if (i->tex.target.isShadow())
1107 code[1] |= 0x400;
1108 if (i->tex.target == TEX_TARGET_2D_MS ||
1109 i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1110 code[1] |= 0x800;
1111
1112 if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1113 // ?
1114 }
1115
1116 if (i->tex.useOffsets == 1) {
1117 switch (i->op) {
1118 case OP_TXF: code[1] |= 0x200; break;
1119 default: code[1] |= 0x800; break;
1120 }
1121 }
1122 if (i->tex.useOffsets == 4)
1123 code[1] |= 0x1000;
1124 }
1125
1126 void
1127 CodeEmitterGK110::emitTXQ(const TexInstruction *i)
1128 {
1129 code[0] = 0x00000002;
1130 code[1] = 0x75400001;
1131
1132 switch (i->tex.query) {
1133 case TXQ_DIMS: code[0] |= 0x01 << 25; break;
1134 case TXQ_TYPE: code[0] |= 0x02 << 25; break;
1135 case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;
1136 case TXQ_FILTER: code[0] |= 0x10 << 25; break;
1137 case TXQ_LOD: code[0] |= 0x12 << 25; break;
1138 case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;
1139 default:
1140 assert(!"invalid texture query");
1141 break;
1142 }
1143
1144 code[1] |= i->tex.mask << 2;
1145 code[1] |= i->tex.r << 9;
1146 if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)
1147 code[1] |= 0x08000000;
1148
1149 defId(i->def(0), 2);
1150 srcId(i->src(0), 10);
1151
1152 emitPredicate(i);
1153 }
1154
1155 void
1156 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1157 {
1158 code[0] = 0x00000002 | ((qOp & 1) << 31);
1159 code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
1160
1161 defId(i->def(0), 2);
1162 srcId(i->src(0), 10);
1163 srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23);
1164
1165 if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1166 code[1] |= 1 << 9; // dall
1167
1168 emitPredicate(i);
1169 }
1170
1171 void
1172 CodeEmitterGK110::emitPIXLD(const Instruction *i)
1173 {
1174 emitForm_L(i, 0x7f4, 2, Modifier(0));
1175 code[1] |= i->subOp << 2;
1176 code[1] |= 0x00070000;
1177 }
1178
1179 void
1180 CodeEmitterGK110::emitFlow(const Instruction *i)
1181 {
1182 const FlowInstruction *f = i->asFlow();
1183
1184 unsigned mask; // bit 0: predicate, bit 1: target
1185
1186 code[0] = 0x00000000;
1187
1188 switch (i->op) {
1189 case OP_BRA:
1190 code[1] = f->absolute ? 0x10800000 : 0x12000000;
1191 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1192 code[0] |= 0x80;
1193 mask = 3;
1194 break;
1195 case OP_CALL:
1196 code[1] = f->absolute ? 0x11000000 : 0x13000000;
1197 if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1198 code[0] |= 0x80;
1199 mask = 2;
1200 break;
1201
1202 case OP_EXIT: code[1] = 0x18000000; mask = 1; break;
1203 case OP_RET: code[1] = 0x19000000; mask = 1; break;
1204 case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;
1205 case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;
1206 case OP_CONT: code[1] = 0x1a800000; mask = 1; break;
1207
1208 case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;
1209 case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;
1210 case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;
1211 case OP_PRERET: code[1] = 0x13800000; mask = 2; break;
1212
1213 case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;
1214 case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;
1215 case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;
1216 default:
1217 assert(!"invalid flow operation");
1218 return;
1219 }
1220
1221 if (mask & 1) {
1222 emitPredicate(i);
1223 if (i->flagsSrc < 0)
1224 code[0] |= 0x3c;
1225 }
1226
1227 if (!f)
1228 return;
1229
1230 if (f->allWarp)
1231 code[0] |= 1 << 9;
1232 if (f->limit)
1233 code[0] |= 1 << 8;
1234
1235 if (f->op == OP_CALL) {
1236 if (f->builtin) {
1237 assert(f->absolute);
1238 uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1239 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);
1240 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);
1241 } else {
1242 assert(!f->absolute);
1243 int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1244 code[0] |= (pcRel & 0x1ff) << 23;
1245 code[1] |= (pcRel >> 9) & 0x7fff;
1246 }
1247 } else
1248 if (mask & 2) {
1249 int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1250 // currently we don't want absolute branches
1251 assert(!f->absolute);
1252 code[0] |= (pcRel & 0x1ff) << 23;
1253 code[1] |= (pcRel >> 9) & 0x7fff;
1254 }
1255 }
1256
1257 void
1258 CodeEmitterGK110::emitPFETCH(const Instruction *i)
1259 {
1260 uint32_t prim = i->src(0).get()->reg.data.u32;
1261
1262 code[0] = 0x00000002 | ((prim & 0xff) << 23);
1263 code[1] = 0x7f800000;
1264
1265 emitPredicate(i);
1266
1267 defId(i->def(0), 2);
1268 srcId(i->src(1), 10);
1269 }
1270
1271 void
1272 CodeEmitterGK110::emitVFETCH(const Instruction *i)
1273 {
1274 unsigned int size = typeSizeof(i->dType);
1275 uint32_t offset = i->src(0).get()->reg.data.offset;
1276
1277 code[0] = 0x00000002 | (offset << 23);
1278 code[1] = 0x7ec00000 | (offset >> 9);
1279 code[1] |= (size / 4 - 1) << 18;
1280
1281 #if 0
1282 if (i->perPatch)
1283 code[0] |= 0x100;
1284 if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1285 code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1286 #endif
1287
1288 emitPredicate(i);
1289
1290 defId(i->def(0), 2);
1291 srcId(i->src(0).getIndirect(0), 10);
1292 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address
1293 }
1294
1295 void
1296 CodeEmitterGK110::emitEXPORT(const Instruction *i)
1297 {
1298 unsigned int size = typeSizeof(i->dType);
1299 uint32_t offset = i->src(0).get()->reg.data.offset;
1300
1301 code[0] = 0x00000002 | (offset << 23);
1302 code[1] = 0x7f000000 | (offset >> 9);
1303 code[1] |= (size / 4 - 1) << 18;
1304
1305 #if 0
1306 if (i->perPatch)
1307 code[0] |= 0x100;
1308 #endif
1309
1310 emitPredicate(i);
1311
1312 assert(i->src(1).getFile() == FILE_GPR);
1313
1314 srcId(i->src(0).getIndirect(0), 10);
1315 srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address
1316 srcId(i->src(1), 2);
1317 }
1318
1319 void
1320 CodeEmitterGK110::emitOUT(const Instruction *i)
1321 {
1322 assert(i->src(0).getFile() == FILE_GPR);
1323
1324 emitForm_21(i, 0x1f0, 0xb70);
1325
1326 if (i->op == OP_EMIT)
1327 code[1] |= 1 << 10;
1328 if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1329 code[1] |= 1 << 11;
1330 }
1331
1332 void
1333 CodeEmitterGK110::emitInterpMode(const Instruction *i)
1334 {
1335 code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID
1336 code[1] |= (i->ipa & 0xc) << (19 - 2);
1337 }
1338
1339 void
1340 CodeEmitterGK110::emitINTERP(const Instruction *i)
1341 {
1342 const uint32_t base = i->getSrc(0)->reg.data.offset;
1343
1344 code[0] = 0x00000002 | (base << 31);
1345 code[1] = 0x74800000 | (base >> 1);
1346
1347 if (i->saturate)
1348 code[1] |= 1 << 18;
1349
1350 if (i->op == OP_PINTERP)
1351 srcId(i->src(1), 23);
1352 else
1353 code[0] |= 0xff << 23;
1354
1355 srcId(i->src(0).getIndirect(0), 10);
1356 emitInterpMode(i);
1357
1358 emitPredicate(i);
1359 defId(i->def(0), 2);
1360
1361 if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1362 srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);
1363 else
1364 code[1] |= 0xff << 10;
1365 }
1366
1367 void
1368 CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)
1369 {
1370 uint8_t n;
1371
1372 switch (ty) {
1373 case TYPE_U8:
1374 n = 0;
1375 break;
1376 case TYPE_S8:
1377 n = 1;
1378 break;
1379 case TYPE_U16:
1380 n = 2;
1381 break;
1382 case TYPE_S16:
1383 n = 3;
1384 break;
1385 case TYPE_F32:
1386 case TYPE_U32:
1387 case TYPE_S32:
1388 n = 4;
1389 break;
1390 case TYPE_F64:
1391 case TYPE_U64:
1392 case TYPE_S64:
1393 n = 5;
1394 break;
1395 case TYPE_B128:
1396 n = 6;
1397 break;
1398 default:
1399 n = 0;
1400 assert(!"invalid ld/st type");
1401 break;
1402 }
1403 code[pos / 32] |= n << (pos % 32);
1404 }
1405
1406 void
1407 CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)
1408 {
1409 uint8_t n;
1410
1411 switch (c) {
1412 case CACHE_CA:
1413 // case CACHE_WB:
1414 n = 0;
1415 break;
1416 case CACHE_CG:
1417 n = 1;
1418 break;
1419 case CACHE_CS:
1420 n = 2;
1421 break;
1422 case CACHE_CV:
1423 // case CACHE_WT:
1424 n = 3;
1425 break;
1426 default:
1427 n = 0;
1428 assert(!"invalid caching mode");
1429 break;
1430 }
1431 code[pos / 32] |= n << (pos % 32);
1432 }
1433
1434 void
1435 CodeEmitterGK110::emitSTORE(const Instruction *i)
1436 {
1437 int32_t offset = SDATA(i->src(0)).offset;
1438
1439 switch (i->src(0).getFile()) {
1440 case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;
1441 case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;
1442 case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
1443 default:
1444 assert(!"invalid memory file");
1445 break;
1446 }
1447
1448 if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
1449 offset &= 0xffffff;
1450
1451 if (code[0] & 0x2) {
1452 emitLoadStoreType(i->dType, 0x33);
1453 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
1454 emitCachingMode(i->cache, 0x2f);
1455 } else {
1456 emitLoadStoreType(i->dType, 0x38);
1457 emitCachingMode(i->cache, 0x3b);
1458 }
1459 code[0] |= offset << 23;
1460 code[1] |= offset >> 9;
1461
1462 emitPredicate(i);
1463
1464 srcId(i->src(1), 2);
1465 srcId(i->src(0).getIndirect(0), 10);
1466 }
1467
1468 void
1469 CodeEmitterGK110::emitLOAD(const Instruction *i)
1470 {
1471 int32_t offset = SDATA(i->src(0)).offset;
1472
1473 switch (i->src(0).getFile()) {
1474 case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;
1475 case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;
1476 case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break;
1477 case FILE_MEMORY_CONST:
1478 if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1479 emitMOV(i);
1480 return;
1481 }
1482 offset &= 0xffff;
1483 code[0] = 0x00000002;
1484 code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);
1485 code[1] |= i->subOp << 15;
1486 break;
1487 default:
1488 assert(!"invalid memory file");
1489 break;
1490 }
1491
1492 if (code[0] & 0x2) {
1493 offset &= 0xffffff;
1494 emitLoadStoreType(i->dType, 0x33);
1495 if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
1496 emitCachingMode(i->cache, 0x2f);
1497 } else {
1498 emitLoadStoreType(i->dType, 0x38);
1499 emitCachingMode(i->cache, 0x3b);
1500 }
1501 code[0] |= offset << 23;
1502 code[1] |= offset >> 9;
1503
1504 emitPredicate(i);
1505
1506 defId(i->def(0), 2);
1507 srcId(i->src(0).getIndirect(0), 10);
1508 }
1509
1510 uint8_t
1511 CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
1512 {
1513 switch (SDATA(ref).sv.sv) {
1514 case SV_LANEID: return 0x00;
1515 case SV_PHYSID: return 0x03;
1516 case SV_VERTEX_COUNT: return 0x10;
1517 case SV_INVOCATION_ID: return 0x11;
1518 case SV_YDIR: return 0x12;
1519 case SV_TID: return 0x21 + SDATA(ref).sv.index;
1520 case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
1521 case SV_NTID: return 0x29 + SDATA(ref).sv.index;
1522 case SV_GRIDID: return 0x2c;
1523 case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;
1524 case SV_LBASE: return 0x34;
1525 case SV_SBASE: return 0x30;
1526 case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;
1527 default:
1528 assert(!"no sreg for system value");
1529 return 0;
1530 }
1531 }
1532
1533 void
1534 CodeEmitterGK110::emitMOV(const Instruction *i)
1535 {
1536 if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1537 code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);
1538 code[1] = 0x86400000;
1539 emitPredicate(i);
1540 defId(i->def(0), 2);
1541 } else
1542 if (i->src(0).getFile() == FILE_IMMEDIATE) {
1543 code[0] = 0x00000002 | (i->lanes << 14);
1544 code[1] = 0x74000000;
1545 emitPredicate(i);
1546 defId(i->def(0), 2);
1547 setImmediate32(i, 0, Modifier(0));
1548 } else
1549 if (i->src(0).getFile() == FILE_PREDICATE) {
1550 code[0] = 0x00000002;
1551 code[1] = 0x84401c07;
1552 emitPredicate(i);
1553 defId(i->def(0), 2);
1554 srcId(i->src(0), 14);
1555 } else {
1556 emitForm_C(i, 0x24c, 2);
1557 code[1] |= i->lanes << 10;
1558 }
1559 }
1560
1561 bool
1562 CodeEmitterGK110::emitInstruction(Instruction *insn)
1563 {
1564 const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;
1565
1566 if (insn->encSize != 8) {
1567 ERROR("skipping unencodable instruction: ");
1568 insn->print();
1569 return false;
1570 } else
1571 if (codeSize + size > codeSizeLimit) {
1572 ERROR("code emitter output buffer too small\n");
1573 return false;
1574 }
1575
1576 if (writeIssueDelays) {
1577 int id = (codeSize & 0x3f) / 8 - 1;
1578 if (id < 0) {
1579 id += 1;
1580 code[0] = 0x00000000; // cf issue delay "instruction"
1581 code[1] = 0x08000000;
1582 code += 2;
1583 codeSize += 8;
1584 }
1585 uint32_t *data = code - (id * 2 + 2);
1586
1587 switch (id) {
1588 case 0: data[0] |= insn->sched << 2; break;
1589 case 1: data[0] |= insn->sched << 10; break;
1590 case 2: data[0] |= insn->sched << 18; break;
1591 case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;
1592 case 4: data[1] |= insn->sched << 2; break;
1593 case 5: data[1] |= insn->sched << 10; break;
1594 case 6: data[1] |= insn->sched << 18; break;
1595 default:
1596 assert(0);
1597 break;
1598 }
1599 }
1600
1601 // assert that instructions with multiple defs don't corrupt registers
1602 for (int d = 0; insn->defExists(d); ++d)
1603 assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
1604
1605 switch (insn->op) {
1606 case OP_MOV:
1607 case OP_RDSV:
1608 emitMOV(insn);
1609 break;
1610 case OP_NOP:
1611 break;
1612 case OP_LOAD:
1613 emitLOAD(insn);
1614 break;
1615 case OP_STORE:
1616 emitSTORE(insn);
1617 break;
1618 case OP_LINTERP:
1619 case OP_PINTERP:
1620 emitINTERP(insn);
1621 break;
1622 case OP_VFETCH:
1623 emitVFETCH(insn);
1624 break;
1625 case OP_EXPORT:
1626 emitEXPORT(insn);
1627 break;
1628 case OP_PFETCH:
1629 emitPFETCH(insn);
1630 break;
1631 case OP_EMIT:
1632 case OP_RESTART:
1633 emitOUT(insn);
1634 break;
1635 case OP_ADD:
1636 case OP_SUB:
1637 if (isFloatType(insn->dType))
1638 emitFADD(insn);
1639 else
1640 emitUADD(insn);
1641 break;
1642 case OP_MUL:
1643 if (isFloatType(insn->dType))
1644 emitFMUL(insn);
1645 else
1646 emitIMUL(insn);
1647 break;
1648 case OP_MAD:
1649 case OP_FMA:
1650 if (isFloatType(insn->dType))
1651 emitFMAD(insn);
1652 else
1653 emitIMAD(insn);
1654 break;
1655 case OP_SAD:
1656 emitISAD(insn);
1657 break;
1658 case OP_NOT:
1659 emitNOT(insn);
1660 break;
1661 case OP_AND:
1662 emitLogicOp(insn, 0);
1663 break;
1664 case OP_OR:
1665 emitLogicOp(insn, 1);
1666 break;
1667 case OP_XOR:
1668 emitLogicOp(insn, 2);
1669 break;
1670 case OP_SHL:
1671 case OP_SHR:
1672 emitShift(insn);
1673 break;
1674 case OP_SET:
1675 case OP_SET_AND:
1676 case OP_SET_OR:
1677 case OP_SET_XOR:
1678 emitSET(insn->asCmp());
1679 break;
1680 case OP_SELP:
1681 emitSELP(insn);
1682 break;
1683 case OP_SLCT:
1684 emitSLCT(insn->asCmp());
1685 break;
1686 case OP_MIN:
1687 case OP_MAX:
1688 emitMINMAX(insn);
1689 break;
1690 case OP_ABS:
1691 case OP_NEG:
1692 case OP_CEIL:
1693 case OP_FLOOR:
1694 case OP_TRUNC:
1695 case OP_CVT:
1696 case OP_SAT:
1697 emitCVT(insn);
1698 break;
1699 case OP_RSQ:
1700 emitSFnOp(insn, 5);
1701 break;
1702 case OP_RCP:
1703 emitSFnOp(insn, 4);
1704 break;
1705 case OP_LG2:
1706 emitSFnOp(insn, 3);
1707 break;
1708 case OP_EX2:
1709 emitSFnOp(insn, 2);
1710 break;
1711 case OP_SIN:
1712 emitSFnOp(insn, 1);
1713 break;
1714 case OP_COS:
1715 emitSFnOp(insn, 0);
1716 break;
1717 case OP_PRESIN:
1718 case OP_PREEX2:
1719 emitPreOp(insn);
1720 break;
1721 case OP_TEX:
1722 case OP_TXB:
1723 case OP_TXL:
1724 case OP_TXD:
1725 case OP_TXF:
1726 case OP_TXG:
1727 case OP_TXLQ:
1728 emitTEX(insn->asTex());
1729 break;
1730 case OP_TXQ:
1731 emitTXQ(insn->asTex());
1732 break;
1733 case OP_TEXBAR:
1734 emitTEXBAR(insn);
1735 break;
1736 case OP_PIXLD:
1737 emitPIXLD(insn);
1738 break;
1739 case OP_BRA:
1740 case OP_CALL:
1741 case OP_PRERET:
1742 case OP_RET:
1743 case OP_DISCARD:
1744 case OP_EXIT:
1745 case OP_PRECONT:
1746 case OP_CONT:
1747 case OP_PREBREAK:
1748 case OP_BREAK:
1749 case OP_JOINAT:
1750 case OP_BRKPT:
1751 case OP_QUADON:
1752 case OP_QUADPOP:
1753 emitFlow(insn);
1754 break;
1755 case OP_QUADOP:
1756 emitQUADOP(insn, insn->subOp, insn->lanes);
1757 break;
1758 case OP_DFDX:
1759 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
1760 break;
1761 case OP_DFDY:
1762 emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
1763 break;
1764 case OP_POPCNT:
1765 emitPOPC(insn);
1766 break;
1767 case OP_INSBF:
1768 emitINSBF(insn);
1769 break;
1770 case OP_EXTBF:
1771 emitEXTBF(insn);
1772 break;
1773 case OP_BFIND:
1774 emitBFIND(insn);
1775 break;
1776 case OP_JOIN:
1777 emitNOP(insn);
1778 insn->join = 1;
1779 break;
1780 case OP_PHI:
1781 case OP_UNION:
1782 case OP_CONSTRAINT:
1783 ERROR("operation should have been eliminated");
1784 return false;
1785 case OP_EXP:
1786 case OP_LOG:
1787 case OP_SQRT:
1788 case OP_POW:
1789 ERROR("operation should have been lowered\n");
1790 return false;
1791 default:
1792 ERROR("unknow op\n");
1793 return false;
1794 }
1795
1796 if (insn->join)
1797 code[0] |= 1 << 22;
1798
1799 code += 2;
1800 codeSize += 8;
1801 return true;
1802 }
1803
1804 uint32_t
1805 CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const
1806 {
1807 // No more short instruction encodings.
1808 return 8;
1809 }
1810
1811 void
1812 CodeEmitterGK110::prepareEmission(Function *func)
1813 {
1814 const Target *targ = func->getProgram()->getTarget();
1815
1816 CodeEmitter::prepareEmission(func);
1817
1818 if (targ->hasSWSched)
1819 calculateSchedDataNVC0(targ, func);
1820 }
1821
1822 CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target)
1823 : CodeEmitter(target),
1824 targNVC0(target),
1825 writeIssueDelays(target->hasSWSched)
1826 {
1827 code = NULL;
1828 codeSize = codeSizeLimit = 0;
1829 relocInfo = NULL;
1830 }
1831
1832 CodeEmitter *
1833 TargetNVC0::createCodeEmitterGK110(Program::Type type)
1834 {
1835 CodeEmitterGK110 *emit = new CodeEmitterGK110(this);
1836 emit->setProgramType(type);
1837 return emit;
1838 }
1839
1840 } // namespace nv50_ir