8fec6a85800a8e224fb45f00bc31cc55ac61ecf4
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitF2F();
128 void emitF2I();
129 void emitI2F();
130 void emitI2I();
131 void emitSEL();
132 void emitSHFL();
133
134 void emitDADD();
135 void emitDMUL();
136 void emitDFMA();
137 void emitDMNMX();
138 void emitDSET();
139 void emitDSETP();
140
141 void emitFADD();
142 void emitFMUL();
143 void emitFFMA();
144 void emitMUFU();
145 void emitFMNMX();
146 void emitRRO();
147 void emitFCMP();
148 void emitFSET();
149 void emitFSETP();
150 void emitFSWZADD();
151
152 void emitLOP();
153 void emitNOT();
154 void emitIADD();
155 void emitIMUL();
156 void emitIMAD();
157 void emitISCADD();
158 void emitIMNMX();
159 void emitICMP();
160 void emitISET();
161 void emitISETP();
162 void emitSHL();
163 void emitSHR();
164 void emitSHF();
165 void emitPOPC();
166 void emitBFI();
167 void emitBFE();
168 void emitFLO();
169
170 void emitLDSTs(int, DataType);
171 void emitLDSTc(int);
172 void emitLDC();
173 void emitLDL();
174 void emitLDS();
175 void emitLD();
176 void emitSTL();
177 void emitSTS();
178 void emitST();
179 void emitALD();
180 void emitAST();
181 void emitISBERD();
182 void emitAL2P();
183 void emitIPA();
184 void emitATOM();
185 void emitATOMS();
186 void emitRED();
187 void emitCCTL();
188
189 void emitPIXLD();
190
191 void emitTEXs(int);
192 void emitTEX();
193 void emitTLD();
194 void emitTLD4();
195 void emitTXD();
196 void emitTXQ();
197 void emitTMML();
198 void emitDEPBAR();
199
200 void emitNOP();
201 void emitKIL();
202 void emitOUT();
203
204 void emitBAR();
205 void emitMEMBAR();
206
207 void emitVOTE();
208
209 void emitSUTarget();
210 void emitSUHandle(const int s);
211 void emitSUSTx();
212 void emitSULDx();
213 void emitSUREDx();
214 };
215
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
219
220 void
221 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
222 {
223 if (b >= 0) {
224 uint32_t m = ((1ULL << s) - 1);
225 uint64_t d = (uint64_t)(v & m) << b;
226 assert(!(v & ~m) || (v & ~m) == ~m);
227 data[1] |= d >> 32;
228 data[0] |= d;
229 }
230 }
231
232 void
233 CodeEmitterGM107::emitPred()
234 {
235 if (insn->predSrc >= 0) {
236 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
237 emitField(19, 1, insn->cc == CC_NOT_P);
238 } else {
239 emitField(16, 3, 7);
240 }
241 }
242
243 void
244 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
245 {
246 code[0] = 0x00000000;
247 code[1] = hi;
248 if (pred)
249 emitPred();
250 }
251
252 void
253 CodeEmitterGM107::emitGPR(int pos, const Value *val)
254 {
255 emitField(pos, 8, val ? val->reg.data.id : 255);
256 }
257
258 void
259 CodeEmitterGM107::emitSYS(int pos, const Value *val)
260 {
261 int id = val ? val->reg.data.id : -1;
262
263 switch (id) {
264 case SV_LANEID : id = 0x00; break;
265 case SV_VERTEX_COUNT : id = 0x10; break;
266 case SV_INVOCATION_ID : id = 0x11; break;
267 case SV_THREAD_KILL : id = 0x13; break;
268 case SV_INVOCATION_INFO: id = 0x1d; break;
269 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
270 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
271 default:
272 assert(!"invalid system value");
273 id = 0;
274 break;
275 }
276
277 emitField(pos, 8, id);
278 }
279
280 void
281 CodeEmitterGM107::emitPRED(int pos, const Value *val)
282 {
283 emitField(pos, 3, val ? val->reg.data.id : 7);
284 }
285
286 void
287 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
288 const ValueRef &ref)
289 {
290 const Value *v = ref.get();
291 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
292 if (gpr >= 0)
293 emitGPR(gpr, ref.getIndirect(0));
294 emitField(off, len, v->reg.data.offset >> shr);
295 }
296
297 void
298 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
299 const ValueRef &ref)
300 {
301 const Value *v = ref.get();
302 const Symbol *s = v->asSym();
303
304 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
305
306 emitField(buf, 5, v->reg.fileIndex);
307 if (gpr >= 0)
308 emitGPR(gpr, ref.getIndirect(0));
309 emitField(off, 16, s->reg.data.offset >> shr);
310 }
311
312 bool
313 CodeEmitterGM107::longIMMD(const ValueRef &ref)
314 {
315 if (ref.getFile() == FILE_IMMEDIATE) {
316 const ImmediateValue *imm = ref.get()->asImm();
317 if (isFloatType(insn->sType)) {
318 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
319 return true;
320 } else {
321 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
322 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
323 return true;
324 }
325 }
326 return false;
327 }
328
329 void
330 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
331 {
332 const ImmediateValue *imm = ref.get()->asImm();
333 uint32_t val = imm->reg.data.u32;
334
335 if (len == 19) {
336 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
337 assert(!(val & 0x00000fff));
338 val >>= 12;
339 } else if (insn->sType == TYPE_F64) {
340 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
341 val = imm->reg.data.u64 >> 44;
342 }
343 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
344 emitField( 56, 1, (val & 0x80000) >> 19);
345 emitField(pos, len, (val & 0x7ffff));
346 } else {
347 emitField(pos, len, val);
348 }
349 }
350
351 /*******************************************************************************
352 * modifiers
353 ******************************************************************************/
354
355 void
356 CodeEmitterGM107::emitCond3(int pos, CondCode code)
357 {
358 int data = 0;
359
360 switch (code) {
361 case CC_FL : data = 0x00; break;
362 case CC_LTU:
363 case CC_LT : data = 0x01; break;
364 case CC_EQU:
365 case CC_EQ : data = 0x02; break;
366 case CC_LEU:
367 case CC_LE : data = 0x03; break;
368 case CC_GTU:
369 case CC_GT : data = 0x04; break;
370 case CC_NEU:
371 case CC_NE : data = 0x05; break;
372 case CC_GEU:
373 case CC_GE : data = 0x06; break;
374 case CC_TR : data = 0x07; break;
375 default:
376 assert(!"invalid cond3");
377 break;
378 }
379
380 emitField(pos, 3, data);
381 }
382
383 void
384 CodeEmitterGM107::emitCond4(int pos, CondCode code)
385 {
386 int data = 0;
387
388 switch (code) {
389 case CC_FL: data = 0x00; break;
390 case CC_LT: data = 0x01; break;
391 case CC_EQ: data = 0x02; break;
392 case CC_LE: data = 0x03; break;
393 case CC_GT: data = 0x04; break;
394 case CC_NE: data = 0x05; break;
395 case CC_GE: data = 0x06; break;
396 // case CC_NUM: data = 0x07; break;
397 // case CC_NAN: data = 0x08; break;
398 case CC_LTU: data = 0x09; break;
399 case CC_EQU: data = 0x0a; break;
400 case CC_LEU: data = 0x0b; break;
401 case CC_GTU: data = 0x0c; break;
402 case CC_NEU: data = 0x0d; break;
403 case CC_GEU: data = 0x0e; break;
404 case CC_TR: data = 0x0f; break;
405 default:
406 assert(!"invalid cond4");
407 break;
408 }
409
410 emitField(pos, 4, data);
411 }
412
413 void
414 CodeEmitterGM107::emitO(int pos)
415 {
416 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
417 }
418
419 void
420 CodeEmitterGM107::emitP(int pos)
421 {
422 emitField(pos, 1, insn->perPatch);
423 }
424
425 void
426 CodeEmitterGM107::emitSAT(int pos)
427 {
428 emitField(pos, 1, insn->saturate);
429 }
430
431 void
432 CodeEmitterGM107::emitCC(int pos)
433 {
434 emitField(pos, 1, insn->flagsDef >= 0);
435 }
436
437 void
438 CodeEmitterGM107::emitX(int pos)
439 {
440 emitField(pos, 1, insn->flagsSrc >= 0);
441 }
442
443 void
444 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
445 {
446 emitField(pos, 1, ref.mod.abs());
447 }
448
449 void
450 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
451 {
452 emitField(pos, 1, ref.mod.neg());
453 }
454
455 void
456 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
457 {
458 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
459 }
460
461 void
462 CodeEmitterGM107::emitFMZ(int pos, int len)
463 {
464 emitField(pos, len, insn->dnz << 1 | insn->ftz);
465 }
466
467 void
468 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
469 {
470 int rm = 0, ri = 0;
471 switch (rnd) {
472 case ROUND_NI: ri = 1;
473 case ROUND_N : rm = 0; break;
474 case ROUND_MI: ri = 1;
475 case ROUND_M : rm = 1; break;
476 case ROUND_PI: ri = 1;
477 case ROUND_P : rm = 2; break;
478 case ROUND_ZI: ri = 1;
479 case ROUND_Z : rm = 3; break;
480 default:
481 assert(!"invalid round mode");
482 break;
483 }
484 emitField(rip, 1, ri);
485 emitField(rmp, 2, rm);
486 }
487
488 void
489 CodeEmitterGM107::emitPDIV(int pos)
490 {
491 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
492 if (insn->postFactor > 0)
493 emitField(pos, 3, 7 - insn->postFactor);
494 else
495 emitField(pos, 3, 0 - insn->postFactor);
496 }
497
498 void
499 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
500 {
501 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
502 }
503
504 /*******************************************************************************
505 * control flow
506 ******************************************************************************/
507
508 void
509 CodeEmitterGM107::emitEXIT()
510 {
511 emitInsn (0xe3000000);
512 emitCond5(0x00, CC_TR);
513 }
514
515 void
516 CodeEmitterGM107::emitBRA()
517 {
518 const FlowInstruction *insn = this->insn->asFlow();
519 int gpr = -1;
520
521 if (insn->indirect) {
522 if (insn->absolute)
523 emitInsn(0xe2000000); // JMX
524 else
525 emitInsn(0xe2500000); // BRX
526 gpr = 0x08;
527 } else {
528 if (insn->absolute)
529 emitInsn(0xe2100000); // JMP
530 else
531 emitInsn(0xe2400000); // BRA
532 emitField(0x07, 1, insn->allWarp);
533 }
534
535 emitField(0x06, 1, insn->limit);
536 emitCond5(0x00, CC_TR);
537
538 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
539 int32_t pos = insn->target.bb->binPos;
540 if (writeIssueDelays && !(pos & 0x1f))
541 pos += 8;
542 if (!insn->absolute)
543 emitField(0x14, 24, pos - (codeSize + 8));
544 else
545 emitField(0x14, 32, pos);
546 } else {
547 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
548 emitField(0x05, 1, 1);
549 }
550 }
551
552 void
553 CodeEmitterGM107::emitCAL()
554 {
555 const FlowInstruction *insn = this->insn->asFlow();
556
557 if (insn->absolute) {
558 emitInsn(0xe2200000, 0); // JCAL
559 } else {
560 emitInsn(0xe2600000, 0); // CAL
561 }
562
563 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
564 if (!insn->absolute)
565 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
566 else {
567 if (insn->builtin) {
568 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
569 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
570 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
571 } else {
572 emitField(0x14, 32, insn->target.bb->binPos);
573 }
574 }
575 } else {
576 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
577 emitField(0x05, 1, 1);
578 }
579 }
580
581 void
582 CodeEmitterGM107::emitPCNT()
583 {
584 const FlowInstruction *insn = this->insn->asFlow();
585
586 emitInsn(0xe2b00000, 0);
587
588 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
589 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
590 } else {
591 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
592 emitField(0x05, 1, 1);
593 }
594 }
595
596 void
597 CodeEmitterGM107::emitCONT()
598 {
599 emitInsn (0xe3500000);
600 emitCond5(0x00, CC_TR);
601 }
602
603 void
604 CodeEmitterGM107::emitPBK()
605 {
606 const FlowInstruction *insn = this->insn->asFlow();
607
608 emitInsn(0xe2a00000, 0);
609
610 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
611 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
612 } else {
613 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
614 emitField(0x05, 1, 1);
615 }
616 }
617
618 void
619 CodeEmitterGM107::emitBRK()
620 {
621 emitInsn (0xe3400000);
622 emitCond5(0x00, CC_TR);
623 }
624
625 void
626 CodeEmitterGM107::emitPRET()
627 {
628 const FlowInstruction *insn = this->insn->asFlow();
629
630 emitInsn(0xe2700000, 0);
631
632 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
633 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
634 } else {
635 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
636 emitField(0x05, 1, 1);
637 }
638 }
639
640 void
641 CodeEmitterGM107::emitRET()
642 {
643 emitInsn (0xe3200000);
644 emitCond5(0x00, CC_TR);
645 }
646
647 void
648 CodeEmitterGM107::emitSSY()
649 {
650 const FlowInstruction *insn = this->insn->asFlow();
651
652 emitInsn(0xe2900000, 0);
653
654 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
655 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
656 } else {
657 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
658 emitField(0x05, 1, 1);
659 }
660 }
661
662 void
663 CodeEmitterGM107::emitSYNC()
664 {
665 emitInsn (0xf0f80000);
666 emitCond5(0x00, CC_TR);
667 }
668
669 void
670 CodeEmitterGM107::emitSAM()
671 {
672 emitInsn(0xe3700000, 0);
673 }
674
675 void
676 CodeEmitterGM107::emitRAM()
677 {
678 emitInsn(0xe3800000, 0);
679 }
680
681 /*******************************************************************************
682 * predicate/cc
683 ******************************************************************************/
684
685 /*******************************************************************************
686 * movement / conversion
687 ******************************************************************************/
688
689 void
690 CodeEmitterGM107::emitMOV()
691 {
692 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
693 switch (insn->src(0).getFile()) {
694 case FILE_GPR:
695 if (insn->def(0).getFile() == FILE_PREDICATE) {
696 emitInsn(0x5b6a0000);
697 emitGPR (0x08);
698 } else {
699 emitInsn(0x5c980000);
700 }
701 emitGPR (0x14, insn->src(0));
702 break;
703 case FILE_MEMORY_CONST:
704 emitInsn(0x4c980000);
705 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
706 break;
707 case FILE_IMMEDIATE:
708 emitInsn(0x38980000);
709 emitIMMD(0x14, 19, insn->src(0));
710 break;
711 case FILE_PREDICATE:
712 emitInsn(0x50880000);
713 emitPRED(0x0c, insn->src(0));
714 emitPRED(0x1d);
715 emitPRED(0x27);
716 break;
717 default:
718 assert(!"bad src file");
719 break;
720 }
721 if (insn->def(0).getFile() != FILE_PREDICATE &&
722 insn->src(0).getFile() != FILE_PREDICATE)
723 emitField(0x27, 4, insn->lanes);
724 } else {
725 emitInsn (0x01000000);
726 emitIMMD (0x14, 32, insn->src(0));
727 emitField(0x0c, 4, insn->lanes);
728 }
729
730 if (insn->def(0).getFile() == FILE_PREDICATE) {
731 emitPRED(0x27);
732 emitPRED(0x03, insn->def(0));
733 emitPRED(0x00);
734 } else {
735 emitGPR(0x00, insn->def(0));
736 }
737 }
738
739 void
740 CodeEmitterGM107::emitS2R()
741 {
742 emitInsn(0xf0c80000);
743 emitSYS (0x14, insn->src(0));
744 emitGPR (0x00, insn->def(0));
745 }
746
747 void
748 CodeEmitterGM107::emitF2F()
749 {
750 RoundMode rnd = insn->rnd;
751
752 switch (insn->op) {
753 case OP_FLOOR: rnd = ROUND_MI; break;
754 case OP_CEIL : rnd = ROUND_PI; break;
755 case OP_TRUNC: rnd = ROUND_ZI; break;
756 default:
757 break;
758 }
759
760 switch (insn->src(0).getFile()) {
761 case FILE_GPR:
762 emitInsn(0x5ca80000);
763 emitGPR (0x14, insn->src(0));
764 break;
765 case FILE_MEMORY_CONST:
766 emitInsn(0x4ca80000);
767 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
768 break;
769 case FILE_IMMEDIATE:
770 emitInsn(0x38a80000);
771 emitIMMD(0x14, 19, insn->src(0));
772 break;
773 default:
774 assert(!"bad src0 file");
775 break;
776 }
777
778 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
779 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
780 emitCC (0x2f);
781 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
782 emitFMZ (0x2c, 1);
783 emitField(0x29, 1, insn->subOp);
784 emitRND (0x27, rnd, 0x2a);
785 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
786 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
787 emitGPR (0x00, insn->def(0));
788 }
789
790 void
791 CodeEmitterGM107::emitF2I()
792 {
793 RoundMode rnd = insn->rnd;
794
795 switch (insn->op) {
796 case OP_FLOOR: rnd = ROUND_M; break;
797 case OP_CEIL : rnd = ROUND_P; break;
798 case OP_TRUNC: rnd = ROUND_Z; break;
799 default:
800 break;
801 }
802
803 switch (insn->src(0).getFile()) {
804 case FILE_GPR:
805 emitInsn(0x5cb00000);
806 emitGPR (0x14, insn->src(0));
807 break;
808 case FILE_MEMORY_CONST:
809 emitInsn(0x4cb00000);
810 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
811 break;
812 case FILE_IMMEDIATE:
813 emitInsn(0x38b00000);
814 emitIMMD(0x14, 19, insn->src(0));
815 break;
816 default:
817 assert(!"bad src0 file");
818 break;
819 }
820
821 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
822 emitCC (0x2f);
823 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
824 emitFMZ (0x2c, 1);
825 emitRND (0x27, rnd, 0x2a);
826 emitField(0x0c, 1, isSignedType(insn->dType));
827 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
828 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
829 emitGPR (0x00, insn->def(0));
830 }
831
832 void
833 CodeEmitterGM107::emitI2F()
834 {
835 RoundMode rnd = insn->rnd;
836
837 switch (insn->op) {
838 case OP_FLOOR: rnd = ROUND_M; break;
839 case OP_CEIL : rnd = ROUND_P; break;
840 case OP_TRUNC: rnd = ROUND_Z; break;
841 default:
842 break;
843 }
844
845 switch (insn->src(0).getFile()) {
846 case FILE_GPR:
847 emitInsn(0x5cb80000);
848 emitGPR (0x14, insn->src(0));
849 break;
850 case FILE_MEMORY_CONST:
851 emitInsn(0x4cb80000);
852 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
853 break;
854 case FILE_IMMEDIATE:
855 emitInsn(0x38b80000);
856 emitIMMD(0x14, 19, insn->src(0));
857 break;
858 default:
859 assert(!"bad src0 file");
860 break;
861 }
862
863 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
864 emitCC (0x2f);
865 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
866 emitField(0x29, 2, insn->subOp);
867 emitRND (0x27, rnd, -1);
868 emitField(0x0d, 1, isSignedType(insn->sType));
869 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
870 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
871 emitGPR (0x00, insn->def(0));
872 }
873
874 void
875 CodeEmitterGM107::emitI2I()
876 {
877 switch (insn->src(0).getFile()) {
878 case FILE_GPR:
879 emitInsn(0x5ce00000);
880 emitGPR (0x14, insn->src(0));
881 break;
882 case FILE_MEMORY_CONST:
883 emitInsn(0x4ce00000);
884 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
885 break;
886 case FILE_IMMEDIATE:
887 emitInsn(0x38e00000);
888 emitIMMD(0x14, 19, insn->src(0));
889 break;
890 default:
891 assert(!"bad src0 file");
892 break;
893 }
894
895 emitSAT (0x32);
896 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
897 emitCC (0x2f);
898 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
899 emitField(0x29, 2, insn->subOp);
900 emitField(0x0d, 1, isSignedType(insn->sType));
901 emitField(0x0c, 1, isSignedType(insn->dType));
902 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
903 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
904 emitGPR (0x00, insn->def(0));
905 }
906
907 static void
908 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
909 {
910 int loc = entry->loc;
911 if (data.force_persample_interp)
912 code[loc + 1] |= 1 << 10;
913 else
914 code[loc + 1] &= ~(1 << 10);
915 }
916
917 void
918 CodeEmitterGM107::emitSEL()
919 {
920 switch (insn->src(1).getFile()) {
921 case FILE_GPR:
922 emitInsn(0x5ca00000);
923 emitGPR (0x14, insn->src(1));
924 break;
925 case FILE_MEMORY_CONST:
926 emitInsn(0x4ca00000);
927 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
928 break;
929 case FILE_IMMEDIATE:
930 emitInsn(0x38a00000);
931 emitIMMD(0x14, 19, insn->src(1));
932 break;
933 default:
934 assert(!"bad src1 file");
935 break;
936 }
937
938 emitINV (0x2a, insn->src(2));
939 emitPRED(0x27, insn->src(2));
940 emitGPR (0x08, insn->src(0));
941 emitGPR (0x00, insn->def(0));
942
943 if (insn->subOp == 1) {
944 addInterp(0, 0, selpFlip);
945 }
946 }
947
948 void
949 CodeEmitterGM107::emitSHFL()
950 {
951 int type = 0;
952
953 emitInsn (0xef100000);
954
955 switch (insn->src(1).getFile()) {
956 case FILE_GPR:
957 emitGPR(0x14, insn->src(1));
958 break;
959 case FILE_IMMEDIATE:
960 emitIMMD(0x14, 5, insn->src(1));
961 type |= 1;
962 break;
963 default:
964 assert(!"invalid src1 file");
965 break;
966 }
967
968 /*XXX: what is this arg? hardcode immediate for now */
969 emitField(0x22, 13, 0x1c03);
970 type |= 2;
971
972 emitPRED (0x30);
973 emitField(0x1e, 2, insn->subOp);
974 emitField(0x1c, 2, type);
975 emitGPR (0x08, insn->src(0));
976 emitGPR (0x00, insn->def(0));
977 }
978
979 /*******************************************************************************
980 * double
981 ******************************************************************************/
982
983 void
984 CodeEmitterGM107::emitDADD()
985 {
986 switch (insn->src(1).getFile()) {
987 case FILE_GPR:
988 emitInsn(0x5c700000);
989 emitGPR (0x14, insn->src(1));
990 break;
991 case FILE_MEMORY_CONST:
992 emitInsn(0x4c700000);
993 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
994 break;
995 case FILE_IMMEDIATE:
996 emitInsn(0x38700000);
997 emitIMMD(0x14, 19, insn->src(1));
998 break;
999 default:
1000 assert(!"bad src1 file");
1001 break;
1002 }
1003 emitABS(0x31, insn->src(1));
1004 emitNEG(0x30, insn->src(0));
1005 emitCC (0x2f);
1006 emitABS(0x2e, insn->src(0));
1007 emitNEG(0x2d, insn->src(1));
1008
1009 if (insn->op == OP_SUB)
1010 code[1] ^= 0x00002000;
1011
1012 emitGPR(0x08, insn->src(0));
1013 emitGPR(0x00, insn->def(0));
1014 }
1015
1016 void
1017 CodeEmitterGM107::emitDMUL()
1018 {
1019 switch (insn->src(1).getFile()) {
1020 case FILE_GPR:
1021 emitInsn(0x5c800000);
1022 emitGPR (0x14, insn->src(1));
1023 break;
1024 case FILE_MEMORY_CONST:
1025 emitInsn(0x4c800000);
1026 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1027 break;
1028 case FILE_IMMEDIATE:
1029 emitInsn(0x38800000);
1030 emitIMMD(0x14, 19, insn->src(1));
1031 break;
1032 default:
1033 assert(!"bad src1 file");
1034 break;
1035 }
1036
1037 emitNEG2(0x30, insn->src(0), insn->src(1));
1038 emitCC (0x2f);
1039 emitRND (0x27);
1040 emitGPR (0x08, insn->src(0));
1041 emitGPR (0x00, insn->def(0));
1042 }
1043
1044 void
1045 CodeEmitterGM107::emitDFMA()
1046 {
1047 switch(insn->src(2).getFile()) {
1048 case FILE_GPR:
1049 switch (insn->src(1).getFile()) {
1050 case FILE_GPR:
1051 emitInsn(0x5b700000);
1052 emitGPR (0x14, insn->src(1));
1053 break;
1054 case FILE_MEMORY_CONST:
1055 emitInsn(0x4b700000);
1056 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1057 break;
1058 case FILE_IMMEDIATE:
1059 emitInsn(0x36700000);
1060 emitIMMD(0x14, 19, insn->src(1));
1061 break;
1062 default:
1063 assert(!"bad src1 file");
1064 break;
1065 }
1066 emitGPR (0x27, insn->src(2));
1067 break;
1068 case FILE_MEMORY_CONST:
1069 emitInsn(0x53700000);
1070 emitGPR (0x27, insn->src(1));
1071 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1072 break;
1073 default:
1074 assert(!"bad src2 file");
1075 break;
1076 }
1077
1078 emitRND (0x32);
1079 emitNEG (0x31, insn->src(2));
1080 emitNEG2(0x30, insn->src(0), insn->src(1));
1081 emitCC (0x2f);
1082 emitGPR (0x08, insn->src(0));
1083 emitGPR (0x00, insn->def(0));
1084 }
1085
1086 void
1087 CodeEmitterGM107::emitDMNMX()
1088 {
1089 switch (insn->src(1).getFile()) {
1090 case FILE_GPR:
1091 emitInsn(0x5c500000);
1092 emitGPR (0x14, insn->src(1));
1093 break;
1094 case FILE_MEMORY_CONST:
1095 emitInsn(0x4c500000);
1096 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1097 break;
1098 case FILE_IMMEDIATE:
1099 emitInsn(0x38500000);
1100 emitIMMD(0x14, 19, insn->src(1));
1101 break;
1102 default:
1103 assert(!"bad src1 file");
1104 break;
1105 }
1106
1107 emitABS (0x31, insn->src(1));
1108 emitNEG (0x30, insn->src(0));
1109 emitCC (0x2f);
1110 emitABS (0x2e, insn->src(0));
1111 emitNEG (0x2d, insn->src(1));
1112 emitField(0x2a, 1, insn->op == OP_MAX);
1113 emitPRED (0x27);
1114 emitGPR (0x08, insn->src(0));
1115 emitGPR (0x00, insn->def(0));
1116 }
1117
1118 void
1119 CodeEmitterGM107::emitDSET()
1120 {
1121 const CmpInstruction *insn = this->insn->asCmp();
1122
1123 switch (insn->src(1).getFile()) {
1124 case FILE_GPR:
1125 emitInsn(0x59000000);
1126 emitGPR (0x14, insn->src(1));
1127 break;
1128 case FILE_MEMORY_CONST:
1129 emitInsn(0x49000000);
1130 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1131 break;
1132 case FILE_IMMEDIATE:
1133 emitInsn(0x32000000);
1134 emitIMMD(0x14, 19, insn->src(1));
1135 break;
1136 default:
1137 assert(!"bad src1 file");
1138 break;
1139 }
1140
1141 if (insn->op != OP_SET) {
1142 switch (insn->op) {
1143 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1144 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1145 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1146 default:
1147 assert(!"invalid set op");
1148 break;
1149 }
1150 emitPRED(0x27, insn->src(2));
1151 } else {
1152 emitPRED(0x27);
1153 }
1154
1155 emitABS (0x36, insn->src(0));
1156 emitNEG (0x35, insn->src(1));
1157 emitField(0x34, 1, insn->dType == TYPE_F32);
1158 emitCond4(0x30, insn->setCond);
1159 emitCC (0x2f);
1160 emitABS (0x2c, insn->src(1));
1161 emitNEG (0x2b, insn->src(0));
1162 emitGPR (0x08, insn->src(0));
1163 emitGPR (0x00, insn->def(0));
1164 }
1165
1166 void
1167 CodeEmitterGM107::emitDSETP()
1168 {
1169 const CmpInstruction *insn = this->insn->asCmp();
1170
1171 switch (insn->src(1).getFile()) {
1172 case FILE_GPR:
1173 emitInsn(0x5b800000);
1174 emitGPR (0x14, insn->src(1));
1175 break;
1176 case FILE_MEMORY_CONST:
1177 emitInsn(0x4b800000);
1178 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1179 break;
1180 case FILE_IMMEDIATE:
1181 emitInsn(0x36800000);
1182 emitIMMD(0x14, 19, insn->src(1));
1183 break;
1184 default:
1185 assert(!"bad src1 file");
1186 break;
1187 }
1188
1189 if (insn->op != OP_SET) {
1190 switch (insn->op) {
1191 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1192 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1193 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1194 default:
1195 assert(!"invalid set op");
1196 break;
1197 }
1198 emitPRED(0x27, insn->src(2));
1199 } else {
1200 emitPRED(0x27);
1201 }
1202
1203 emitCond4(0x30, insn->setCond);
1204 emitABS (0x2c, insn->src(1));
1205 emitNEG (0x2b, insn->src(0));
1206 emitGPR (0x08, insn->src(0));
1207 emitABS (0x07, insn->src(0));
1208 emitNEG (0x06, insn->src(1));
1209 emitPRED (0x03, insn->def(0));
1210 if (insn->defExists(1))
1211 emitPRED(0x00, insn->def(1));
1212 else
1213 emitPRED(0x00);
1214 }
1215
1216 /*******************************************************************************
1217 * float
1218 ******************************************************************************/
1219
1220 void
1221 CodeEmitterGM107::emitFADD()
1222 {
1223 if (!longIMMD(insn->src(1))) {
1224 switch (insn->src(1).getFile()) {
1225 case FILE_GPR:
1226 emitInsn(0x5c580000);
1227 emitGPR (0x14, insn->src(1));
1228 break;
1229 case FILE_MEMORY_CONST:
1230 emitInsn(0x4c580000);
1231 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1232 break;
1233 case FILE_IMMEDIATE:
1234 emitInsn(0x38580000);
1235 emitIMMD(0x14, 19, insn->src(1));
1236 break;
1237 default:
1238 assert(!"bad src1 file");
1239 break;
1240 }
1241 emitSAT(0x32);
1242 emitABS(0x31, insn->src(1));
1243 emitNEG(0x30, insn->src(0));
1244 emitCC (0x2f);
1245 emitABS(0x2e, insn->src(0));
1246 emitNEG(0x2d, insn->src(1));
1247 emitFMZ(0x2c, 1);
1248
1249 if (insn->op == OP_SUB)
1250 code[1] ^= 0x00002000;
1251 } else {
1252 emitInsn(0x08000000);
1253 emitABS(0x39, insn->src(1));
1254 emitNEG(0x38, insn->src(0));
1255 emitFMZ(0x37, 1);
1256 emitABS(0x36, insn->src(0));
1257 emitNEG(0x35, insn->src(1));
1258 emitCC (0x34);
1259 emitIMMD(0x14, 32, insn->src(1));
1260
1261 if (insn->op == OP_SUB)
1262 code[1] ^= 0x00080000;
1263 }
1264
1265 emitGPR(0x08, insn->src(0));
1266 emitGPR(0x00, insn->def(0));
1267 }
1268
1269 void
1270 CodeEmitterGM107::emitFMUL()
1271 {
1272 if (!longIMMD(insn->src(1))) {
1273 switch (insn->src(1).getFile()) {
1274 case FILE_GPR:
1275 emitInsn(0x5c680000);
1276 emitGPR (0x14, insn->src(1));
1277 break;
1278 case FILE_MEMORY_CONST:
1279 emitInsn(0x4c680000);
1280 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1281 break;
1282 case FILE_IMMEDIATE:
1283 emitInsn(0x38680000);
1284 emitIMMD(0x14, 19, insn->src(1));
1285 break;
1286 default:
1287 assert(!"bad src1 file");
1288 break;
1289 }
1290 emitSAT (0x32);
1291 emitNEG2(0x30, insn->src(0), insn->src(1));
1292 emitCC (0x2f);
1293 emitFMZ (0x2c, 2);
1294 emitPDIV(0x29);
1295 emitRND (0x27);
1296 } else {
1297 emitInsn(0x1e000000);
1298 emitSAT (0x37);
1299 emitFMZ (0x35, 2);
1300 emitCC (0x34);
1301 emitIMMD(0x14, 32, insn->src(1));
1302 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1303 code[1] ^= 0x00080000; /* flip immd sign bit */
1304 }
1305
1306 emitGPR(0x08, insn->src(0));
1307 emitGPR(0x00, insn->def(0));
1308 }
1309
1310 void
1311 CodeEmitterGM107::emitFFMA()
1312 {
1313 /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1314 switch(insn->src(2).getFile()) {
1315 case FILE_GPR:
1316 switch (insn->src(1).getFile()) {
1317 case FILE_GPR:
1318 emitInsn(0x59800000);
1319 emitGPR (0x14, insn->src(1));
1320 break;
1321 case FILE_MEMORY_CONST:
1322 emitInsn(0x49800000);
1323 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1324 break;
1325 case FILE_IMMEDIATE:
1326 emitInsn(0x32800000);
1327 emitIMMD(0x14, 19, insn->src(1));
1328 break;
1329 default:
1330 assert(!"bad src1 file");
1331 break;
1332 }
1333 emitGPR (0x27, insn->src(2));
1334 break;
1335 case FILE_MEMORY_CONST:
1336 emitInsn(0x51800000);
1337 emitGPR (0x27, insn->src(1));
1338 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1339 break;
1340 default:
1341 assert(!"bad src2 file");
1342 break;
1343 }
1344 emitRND (0x33);
1345 emitSAT (0x32);
1346 emitNEG (0x31, insn->src(2));
1347 emitNEG2(0x30, insn->src(0), insn->src(1));
1348 emitCC (0x2f);
1349
1350 emitFMZ(0x35, 2);
1351 emitGPR(0x08, insn->src(0));
1352 emitGPR(0x00, insn->def(0));
1353 }
1354
1355 void
1356 CodeEmitterGM107::emitMUFU()
1357 {
1358 int mufu = 0;
1359
1360 switch (insn->op) {
1361 case OP_COS: mufu = 0; break;
1362 case OP_SIN: mufu = 1; break;
1363 case OP_EX2: mufu = 2; break;
1364 case OP_LG2: mufu = 3; break;
1365 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1366 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1367 default:
1368 assert(!"invalid mufu");
1369 break;
1370 }
1371
1372 emitInsn (0x50800000);
1373 emitSAT (0x32);
1374 emitNEG (0x30, insn->src(0));
1375 emitABS (0x2e, insn->src(0));
1376 emitField(0x14, 3, mufu);
1377 emitGPR (0x08, insn->src(0));
1378 emitGPR (0x00, insn->def(0));
1379 }
1380
1381 void
1382 CodeEmitterGM107::emitFMNMX()
1383 {
1384 switch (insn->src(1).getFile()) {
1385 case FILE_GPR:
1386 emitInsn(0x5c600000);
1387 emitGPR (0x14, insn->src(1));
1388 break;
1389 case FILE_MEMORY_CONST:
1390 emitInsn(0x4c600000);
1391 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1392 break;
1393 case FILE_IMMEDIATE:
1394 emitInsn(0x38600000);
1395 emitIMMD(0x14, 19, insn->src(1));
1396 break;
1397 default:
1398 assert(!"bad src1 file");
1399 break;
1400 }
1401
1402 emitField(0x2a, 1, insn->op == OP_MAX);
1403 emitPRED (0x27);
1404
1405 emitABS(0x31, insn->src(1));
1406 emitNEG(0x30, insn->src(0));
1407 emitCC (0x2f);
1408 emitABS(0x2e, insn->src(0));
1409 emitNEG(0x2d, insn->src(1));
1410 emitFMZ(0x2c, 1);
1411 emitGPR(0x08, insn->src(0));
1412 emitGPR(0x00, insn->def(0));
1413 }
1414
1415 void
1416 CodeEmitterGM107::emitRRO()
1417 {
1418 switch (insn->src(0).getFile()) {
1419 case FILE_GPR:
1420 emitInsn(0x5c900000);
1421 emitGPR (0x14, insn->src(0));
1422 break;
1423 case FILE_MEMORY_CONST:
1424 emitInsn(0x4c900000);
1425 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1426 break;
1427 case FILE_IMMEDIATE:
1428 emitInsn(0x38900000);
1429 emitIMMD(0x14, 19, insn->src(0));
1430 break;
1431 default:
1432 assert(!"bad src file");
1433 break;
1434 }
1435
1436 emitABS (0x31, insn->src(0));
1437 emitNEG (0x2d, insn->src(0));
1438 emitField(0x27, 1, insn->op == OP_PREEX2);
1439 emitGPR (0x00, insn->def(0));
1440 }
1441
1442 void
1443 CodeEmitterGM107::emitFCMP()
1444 {
1445 const CmpInstruction *insn = this->insn->asCmp();
1446 CondCode cc = insn->setCond;
1447
1448 if (insn->src(2).mod.neg())
1449 cc = reverseCondCode(cc);
1450
1451 switch(insn->src(2).getFile()) {
1452 case FILE_GPR:
1453 switch (insn->src(1).getFile()) {
1454 case FILE_GPR:
1455 emitInsn(0x5ba00000);
1456 emitGPR (0x14, insn->src(1));
1457 break;
1458 case FILE_MEMORY_CONST:
1459 emitInsn(0x4ba00000);
1460 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1461 break;
1462 case FILE_IMMEDIATE:
1463 emitInsn(0x36a00000);
1464 emitIMMD(0x14, 19, insn->src(1));
1465 break;
1466 default:
1467 assert(!"bad src1 file");
1468 break;
1469 }
1470 emitGPR (0x27, insn->src(2));
1471 break;
1472 case FILE_MEMORY_CONST:
1473 emitInsn(0x53a00000);
1474 emitGPR (0x27, insn->src(1));
1475 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1476 break;
1477 default:
1478 assert(!"bad src2 file");
1479 break;
1480 }
1481
1482 emitCond4(0x30, cc);
1483 emitFMZ (0x2f, 1);
1484 emitGPR (0x08, insn->src(0));
1485 emitGPR (0x00, insn->def(0));
1486 }
1487
1488 void
1489 CodeEmitterGM107::emitFSET()
1490 {
1491 const CmpInstruction *insn = this->insn->asCmp();
1492
1493 switch (insn->src(1).getFile()) {
1494 case FILE_GPR:
1495 emitInsn(0x58000000);
1496 emitGPR (0x14, insn->src(1));
1497 break;
1498 case FILE_MEMORY_CONST:
1499 emitInsn(0x48000000);
1500 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1501 break;
1502 case FILE_IMMEDIATE:
1503 emitInsn(0x30000000);
1504 emitIMMD(0x14, 19, insn->src(1));
1505 break;
1506 default:
1507 assert(!"bad src1 file");
1508 break;
1509 }
1510
1511 if (insn->op != OP_SET) {
1512 switch (insn->op) {
1513 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1514 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1515 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1516 default:
1517 assert(!"invalid set op");
1518 break;
1519 }
1520 emitPRED(0x27, insn->src(2));
1521 } else {
1522 emitPRED(0x27);
1523 }
1524
1525 emitFMZ (0x37, 1);
1526 emitABS (0x36, insn->src(0));
1527 emitNEG (0x35, insn->src(1));
1528 emitField(0x34, 1, insn->dType == TYPE_F32);
1529 emitCond4(0x30, insn->setCond);
1530 emitCC (0x2f);
1531 emitABS (0x2c, insn->src(1));
1532 emitNEG (0x2b, insn->src(0));
1533 emitGPR (0x08, insn->src(0));
1534 emitGPR (0x00, insn->def(0));
1535 }
1536
1537 void
1538 CodeEmitterGM107::emitFSETP()
1539 {
1540 const CmpInstruction *insn = this->insn->asCmp();
1541
1542 switch (insn->src(1).getFile()) {
1543 case FILE_GPR:
1544 emitInsn(0x5bb00000);
1545 emitGPR (0x14, insn->src(1));
1546 break;
1547 case FILE_MEMORY_CONST:
1548 emitInsn(0x4bb00000);
1549 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1550 break;
1551 case FILE_IMMEDIATE:
1552 emitInsn(0x36b00000);
1553 emitIMMD(0x14, 19, insn->src(1));
1554 break;
1555 default:
1556 assert(!"bad src1 file");
1557 break;
1558 }
1559
1560 if (insn->op != OP_SET) {
1561 switch (insn->op) {
1562 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1563 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1564 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1565 default:
1566 assert(!"invalid set op");
1567 break;
1568 }
1569 emitPRED(0x27, insn->src(2));
1570 } else {
1571 emitPRED(0x27);
1572 }
1573
1574 emitCond4(0x30, insn->setCond);
1575 emitFMZ (0x2f, 1);
1576 emitABS (0x2c, insn->src(1));
1577 emitNEG (0x2b, insn->src(0));
1578 emitGPR (0x08, insn->src(0));
1579 emitABS (0x07, insn->src(0));
1580 emitNEG (0x06, insn->src(1));
1581 emitPRED (0x03, insn->def(0));
1582 if (insn->defExists(1))
1583 emitPRED(0x00, insn->def(1));
1584 else
1585 emitPRED(0x00);
1586 }
1587
1588 void
1589 CodeEmitterGM107::emitFSWZADD()
1590 {
1591 emitInsn (0x50f80000);
1592 emitCC (0x2f);
1593 emitFMZ (0x2c, 1);
1594 emitRND (0x27);
1595 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1596 emitField(0x1c, 8, insn->subOp);
1597 if (insn->predSrc != 1)
1598 emitGPR (0x14, insn->src(1));
1599 else
1600 emitGPR (0x14);
1601 emitGPR (0x08, insn->src(0));
1602 emitGPR (0x00, insn->def(0));
1603 }
1604
1605 /*******************************************************************************
1606 * integer
1607 ******************************************************************************/
1608
1609 void
1610 CodeEmitterGM107::emitLOP()
1611 {
1612 int lop = 0;
1613
1614 switch (insn->op) {
1615 case OP_AND: lop = 0; break;
1616 case OP_OR : lop = 1; break;
1617 case OP_XOR: lop = 2; break;
1618 default:
1619 assert(!"invalid lop");
1620 break;
1621 }
1622
1623 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1624 switch (insn->src(1).getFile()) {
1625 case FILE_GPR:
1626 emitInsn(0x5c400000);
1627 emitGPR (0x14, insn->src(1));
1628 break;
1629 case FILE_MEMORY_CONST:
1630 emitInsn(0x4c400000);
1631 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1632 break;
1633 case FILE_IMMEDIATE:
1634 emitInsn(0x38400000);
1635 emitIMMD(0x14, 19, insn->src(1));
1636 break;
1637 default:
1638 assert(!"bad src1 file");
1639 break;
1640 }
1641 emitPRED (0x30);
1642 emitCC (0x2f);
1643 emitX (0x2b);
1644 emitField(0x29, 2, lop);
1645 emitINV (0x28, insn->src(1));
1646 emitINV (0x27, insn->src(0));
1647 } else {
1648 emitInsn (0x04000000);
1649 emitX (0x39);
1650 emitINV (0x38, insn->src(1));
1651 emitINV (0x37, insn->src(0));
1652 emitField(0x35, 2, lop);
1653 emitCC (0x34);
1654 emitIMMD (0x14, 32, insn->src(1));
1655 }
1656
1657 emitGPR (0x08, insn->src(0));
1658 emitGPR (0x00, insn->def(0));
1659 }
1660
1661 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1662 void
1663 CodeEmitterGM107::emitNOT()
1664 {
1665 if (!longIMMD(insn->src(0))) {
1666 switch (insn->src(0).getFile()) {
1667 case FILE_GPR:
1668 emitInsn(0x5c400700);
1669 emitGPR (0x14, insn->src(0));
1670 break;
1671 case FILE_MEMORY_CONST:
1672 emitInsn(0x4c400700);
1673 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1674 break;
1675 case FILE_IMMEDIATE:
1676 emitInsn(0x38400700);
1677 emitIMMD(0x14, 19, insn->src(0));
1678 break;
1679 default:
1680 assert(!"bad src1 file");
1681 break;
1682 }
1683 emitPRED (0x30);
1684 } else {
1685 emitInsn (0x05600000);
1686 emitIMMD (0x14, 32, insn->src(1));
1687 }
1688
1689 emitGPR(0x08);
1690 emitGPR(0x00, insn->def(0));
1691 }
1692
1693 void
1694 CodeEmitterGM107::emitIADD()
1695 {
1696 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1697 switch (insn->src(1).getFile()) {
1698 case FILE_GPR:
1699 emitInsn(0x5c100000);
1700 emitGPR (0x14, insn->src(1));
1701 break;
1702 case FILE_MEMORY_CONST:
1703 emitInsn(0x4c100000);
1704 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1705 break;
1706 case FILE_IMMEDIATE:
1707 emitInsn(0x38100000);
1708 emitIMMD(0x14, 19, insn->src(1));
1709 break;
1710 default:
1711 assert(!"bad src1 file");
1712 break;
1713 }
1714 emitSAT(0x32);
1715 emitNEG(0x31, insn->src(0));
1716 emitNEG(0x30, insn->src(1));
1717 emitCC (0x2f);
1718 emitX (0x2b);
1719 } else {
1720 emitInsn(0x1c000000);
1721 emitNEG (0x38, insn->src(0));
1722 emitSAT (0x36);
1723 emitX (0x35);
1724 emitCC (0x34);
1725 emitIMMD(0x14, 32, insn->src(1));
1726 }
1727
1728 if (insn->op == OP_SUB)
1729 code[1] ^= 0x00010000;
1730
1731 emitGPR(0x08, insn->src(0));
1732 emitGPR(0x00, insn->def(0));
1733 }
1734
1735 void
1736 CodeEmitterGM107::emitIMUL()
1737 {
1738 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1739 switch (insn->src(1).getFile()) {
1740 case FILE_GPR:
1741 emitInsn(0x5c380000);
1742 emitGPR (0x14, insn->src(1));
1743 break;
1744 case FILE_MEMORY_CONST:
1745 emitInsn(0x4c380000);
1746 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1747 break;
1748 case FILE_IMMEDIATE:
1749 emitInsn(0x38380000);
1750 emitIMMD(0x14, 19, insn->src(1));
1751 break;
1752 default:
1753 assert(!"bad src1 file");
1754 break;
1755 }
1756 emitCC (0x2f);
1757 emitField(0x29, 1, isSignedType(insn->sType));
1758 emitField(0x28, 1, isSignedType(insn->dType));
1759 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1760 } else {
1761 emitInsn (0x1f000000);
1762 emitField(0x37, 1, isSignedType(insn->sType));
1763 emitField(0x36, 1, isSignedType(insn->dType));
1764 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1765 emitCC (0x34);
1766 emitIMMD (0x14, 32, insn->src(1));
1767 }
1768
1769 emitGPR(0x08, insn->src(0));
1770 emitGPR(0x00, insn->def(0));
1771 }
1772
1773 void
1774 CodeEmitterGM107::emitIMAD()
1775 {
1776 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1777 switch(insn->src(2).getFile()) {
1778 case FILE_GPR:
1779 switch (insn->src(1).getFile()) {
1780 case FILE_GPR:
1781 emitInsn(0x5a000000);
1782 emitGPR (0x14, insn->src(1));
1783 break;
1784 case FILE_MEMORY_CONST:
1785 emitInsn(0x4a000000);
1786 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1787 break;
1788 case FILE_IMMEDIATE:
1789 emitInsn(0x34000000);
1790 emitIMMD(0x14, 19, insn->src(1));
1791 break;
1792 default:
1793 assert(!"bad src1 file");
1794 break;
1795 }
1796 emitGPR (0x27, insn->src(2));
1797 break;
1798 case FILE_MEMORY_CONST:
1799 emitInsn(0x52000000);
1800 emitGPR (0x27, insn->src(1));
1801 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1802 break;
1803 default:
1804 assert(!"bad src2 file");
1805 break;
1806 }
1807
1808 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1809 emitField(0x35, 1, isSignedType(insn->sType));
1810 emitNEG (0x34, insn->src(2));
1811 emitNEG2 (0x33, insn->src(0), insn->src(1));
1812 emitSAT (0x32);
1813 emitX (0x31);
1814 emitField(0x30, 1, isSignedType(insn->dType));
1815 emitCC (0x2f);
1816 emitGPR (0x08, insn->src(0));
1817 emitGPR (0x00, insn->def(0));
1818 }
1819
1820 void
1821 CodeEmitterGM107::emitISCADD()
1822 {
1823 switch (insn->src(2).getFile()) {
1824 case FILE_GPR:
1825 emitInsn(0x5c180000);
1826 emitGPR (0x14, insn->src(2));
1827 break;
1828 case FILE_MEMORY_CONST:
1829 emitInsn(0x4c180000);
1830 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1831 break;
1832 case FILE_IMMEDIATE:
1833 emitInsn(0x38180000);
1834 emitIMMD(0x14, 19, insn->src(2));
1835 break;
1836 default:
1837 assert(!"bad src1 file");
1838 break;
1839 }
1840 emitNEG (0x31, insn->src(0));
1841 emitNEG (0x30, insn->src(2));
1842 emitCC (0x2f);
1843 emitIMMD(0x27, 5, insn->src(1));
1844 emitGPR (0x08, insn->src(0));
1845 emitGPR (0x00, insn->def(0));
1846 }
1847
1848 void
1849 CodeEmitterGM107::emitIMNMX()
1850 {
1851 switch (insn->src(1).getFile()) {
1852 case FILE_GPR:
1853 emitInsn(0x5c200000);
1854 emitGPR (0x14, insn->src(1));
1855 break;
1856 case FILE_MEMORY_CONST:
1857 emitInsn(0x4c200000);
1858 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1859 break;
1860 case FILE_IMMEDIATE:
1861 emitInsn(0x38200000);
1862 emitIMMD(0x14, 19, insn->src(1));
1863 break;
1864 default:
1865 assert(!"bad src1 file");
1866 break;
1867 }
1868
1869 emitField(0x30, 1, isSignedType(insn->dType));
1870 emitCC (0x2f);
1871 emitField(0x2b, 2, insn->subOp);
1872 emitField(0x2a, 1, insn->op == OP_MAX);
1873 emitPRED (0x27);
1874 emitGPR (0x08, insn->src(0));
1875 emitGPR (0x00, insn->def(0));
1876 }
1877
1878 void
1879 CodeEmitterGM107::emitICMP()
1880 {
1881 const CmpInstruction *insn = this->insn->asCmp();
1882 CondCode cc = insn->setCond;
1883
1884 if (insn->src(2).mod.neg())
1885 cc = reverseCondCode(cc);
1886
1887 switch(insn->src(2).getFile()) {
1888 case FILE_GPR:
1889 switch (insn->src(1).getFile()) {
1890 case FILE_GPR:
1891 emitInsn(0x5b400000);
1892 emitGPR (0x14, insn->src(1));
1893 break;
1894 case FILE_MEMORY_CONST:
1895 emitInsn(0x4b400000);
1896 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1897 break;
1898 case FILE_IMMEDIATE:
1899 emitInsn(0x36400000);
1900 emitIMMD(0x14, 19, insn->src(1));
1901 break;
1902 default:
1903 assert(!"bad src1 file");
1904 break;
1905 }
1906 emitGPR (0x27, insn->src(2));
1907 break;
1908 case FILE_MEMORY_CONST:
1909 emitInsn(0x53400000);
1910 emitGPR (0x27, insn->src(1));
1911 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1912 break;
1913 default:
1914 assert(!"bad src2 file");
1915 break;
1916 }
1917
1918 emitCond3(0x31, cc);
1919 emitField(0x30, 1, isSignedType(insn->sType));
1920 emitGPR (0x08, insn->src(0));
1921 emitGPR (0x00, insn->def(0));
1922 }
1923
1924 void
1925 CodeEmitterGM107::emitISET()
1926 {
1927 const CmpInstruction *insn = this->insn->asCmp();
1928
1929 switch (insn->src(1).getFile()) {
1930 case FILE_GPR:
1931 emitInsn(0x5b500000);
1932 emitGPR (0x14, insn->src(1));
1933 break;
1934 case FILE_MEMORY_CONST:
1935 emitInsn(0x4b500000);
1936 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1937 break;
1938 case FILE_IMMEDIATE:
1939 emitInsn(0x36500000);
1940 emitIMMD(0x14, 19, insn->src(1));
1941 break;
1942 default:
1943 assert(!"bad src1 file");
1944 break;
1945 }
1946
1947 if (insn->op != OP_SET) {
1948 switch (insn->op) {
1949 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1950 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1951 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1952 default:
1953 assert(!"invalid set op");
1954 break;
1955 }
1956 emitPRED(0x27, insn->src(2));
1957 } else {
1958 emitPRED(0x27);
1959 }
1960
1961 emitCond3(0x31, insn->setCond);
1962 emitField(0x30, 1, isSignedType(insn->sType));
1963 emitCC (0x2f);
1964 emitField(0x2c, 1, insn->dType == TYPE_F32);
1965 emitX (0x2b);
1966 emitGPR (0x08, insn->src(0));
1967 emitGPR (0x00, insn->def(0));
1968 }
1969
1970 void
1971 CodeEmitterGM107::emitISETP()
1972 {
1973 const CmpInstruction *insn = this->insn->asCmp();
1974
1975 switch (insn->src(1).getFile()) {
1976 case FILE_GPR:
1977 emitInsn(0x5b600000);
1978 emitGPR (0x14, insn->src(1));
1979 break;
1980 case FILE_MEMORY_CONST:
1981 emitInsn(0x4b600000);
1982 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1983 break;
1984 case FILE_IMMEDIATE:
1985 emitInsn(0x36600000);
1986 emitIMMD(0x14, 19, insn->src(1));
1987 break;
1988 default:
1989 assert(!"bad src1 file");
1990 break;
1991 }
1992
1993 if (insn->op != OP_SET) {
1994 switch (insn->op) {
1995 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1996 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1997 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1998 default:
1999 assert(!"invalid set op");
2000 break;
2001 }
2002 emitPRED(0x27, insn->src(2));
2003 } else {
2004 emitPRED(0x27);
2005 }
2006
2007 emitCond3(0x31, insn->setCond);
2008 emitField(0x30, 1, isSignedType(insn->sType));
2009 emitX (0x2b);
2010 emitGPR (0x08, insn->src(0));
2011 emitPRED (0x03, insn->def(0));
2012 if (insn->defExists(1))
2013 emitPRED(0x00, insn->def(1));
2014 else
2015 emitPRED(0x00);
2016 }
2017
2018 void
2019 CodeEmitterGM107::emitSHL()
2020 {
2021 switch (insn->src(1).getFile()) {
2022 case FILE_GPR:
2023 emitInsn(0x5c480000);
2024 emitGPR (0x14, insn->src(1));
2025 break;
2026 case FILE_MEMORY_CONST:
2027 emitInsn(0x4c480000);
2028 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2029 break;
2030 case FILE_IMMEDIATE:
2031 emitInsn(0x38480000);
2032 emitIMMD(0x14, 19, insn->src(1));
2033 break;
2034 default:
2035 assert(!"bad src1 file");
2036 break;
2037 }
2038
2039 emitCC (0x2f);
2040 emitX (0x2b);
2041 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2042 emitGPR (0x08, insn->src(0));
2043 emitGPR (0x00, insn->def(0));
2044 }
2045
2046 void
2047 CodeEmitterGM107::emitSHR()
2048 {
2049 switch (insn->src(1).getFile()) {
2050 case FILE_GPR:
2051 emitInsn(0x5c280000);
2052 emitGPR (0x14, insn->src(1));
2053 break;
2054 case FILE_MEMORY_CONST:
2055 emitInsn(0x4c280000);
2056 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2057 break;
2058 case FILE_IMMEDIATE:
2059 emitInsn(0x38280000);
2060 emitIMMD(0x14, 19, insn->src(1));
2061 break;
2062 default:
2063 assert(!"bad src1 file");
2064 break;
2065 }
2066
2067 emitField(0x30, 1, isSignedType(insn->dType));
2068 emitCC (0x2f);
2069 emitX (0x2c);
2070 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2071 emitGPR (0x08, insn->src(0));
2072 emitGPR (0x00, insn->def(0));
2073 }
2074
2075 void
2076 CodeEmitterGM107::emitSHF()
2077 {
2078 unsigned type;
2079
2080 switch (insn->src(1).getFile()) {
2081 case FILE_GPR:
2082 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2083 emitGPR(0x14, insn->src(1));
2084 break;
2085 case FILE_IMMEDIATE:
2086 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2087 emitIMMD(0x14, 19, insn->src(1));
2088 break;
2089 default:
2090 assert(!"bad src1 file");
2091 break;
2092 }
2093
2094 switch (insn->sType) {
2095 case TYPE_U64:
2096 type = 2;
2097 break;
2098 case TYPE_S64:
2099 type = 3;
2100 break;
2101 default:
2102 type = 0;
2103 break;
2104 }
2105
2106 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2107 emitX (0x31);
2108 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2109 emitCC (0x2f);
2110 emitGPR (0x27, insn->src(2));
2111 emitField(0x25, 2, type);
2112 emitGPR (0x08, insn->src(0));
2113 emitGPR (0x00, insn->def(0));
2114 }
2115
2116 void
2117 CodeEmitterGM107::emitPOPC()
2118 {
2119 switch (insn->src(0).getFile()) {
2120 case FILE_GPR:
2121 emitInsn(0x5c080000);
2122 emitGPR (0x14, insn->src(0));
2123 break;
2124 case FILE_MEMORY_CONST:
2125 emitInsn(0x4c080000);
2126 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2127 break;
2128 case FILE_IMMEDIATE:
2129 emitInsn(0x38080000);
2130 emitIMMD(0x14, 19, insn->src(0));
2131 break;
2132 default:
2133 assert(!"bad src1 file");
2134 break;
2135 }
2136
2137 emitINV(0x28, insn->src(0));
2138 emitGPR(0x00, insn->def(0));
2139 }
2140
2141 void
2142 CodeEmitterGM107::emitBFI()
2143 {
2144 switch(insn->src(2).getFile()) {
2145 case FILE_GPR:
2146 switch (insn->src(1).getFile()) {
2147 case FILE_GPR:
2148 emitInsn(0x5bf00000);
2149 emitGPR (0x14, insn->src(1));
2150 break;
2151 case FILE_MEMORY_CONST:
2152 emitInsn(0x4bf00000);
2153 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2154 break;
2155 case FILE_IMMEDIATE:
2156 emitInsn(0x36f00000);
2157 emitIMMD(0x14, 19, insn->src(1));
2158 break;
2159 default:
2160 assert(!"bad src1 file");
2161 break;
2162 }
2163 emitGPR (0x27, insn->src(2));
2164 break;
2165 case FILE_MEMORY_CONST:
2166 emitInsn(0x53f00000);
2167 emitGPR (0x27, insn->src(1));
2168 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2169 break;
2170 default:
2171 assert(!"bad src2 file");
2172 break;
2173 }
2174
2175 emitCC (0x2f);
2176 emitGPR (0x08, insn->src(0));
2177 emitGPR (0x00, insn->def(0));
2178 }
2179
2180 void
2181 CodeEmitterGM107::emitBFE()
2182 {
2183 switch (insn->src(1).getFile()) {
2184 case FILE_GPR:
2185 emitInsn(0x5c000000);
2186 emitGPR (0x14, insn->src(1));
2187 break;
2188 case FILE_MEMORY_CONST:
2189 emitInsn(0x4c000000);
2190 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2191 break;
2192 case FILE_IMMEDIATE:
2193 emitInsn(0x38000000);
2194 emitIMMD(0x14, 19, insn->src(1));
2195 break;
2196 default:
2197 assert(!"bad src1 file");
2198 break;
2199 }
2200
2201 emitField(0x30, 1, isSignedType(insn->dType));
2202 emitCC (0x2f);
2203 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2204 emitGPR (0x08, insn->src(0));
2205 emitGPR (0x00, insn->def(0));
2206 }
2207
2208 void
2209 CodeEmitterGM107::emitFLO()
2210 {
2211 switch (insn->src(0).getFile()) {
2212 case FILE_GPR:
2213 emitInsn(0x5c300000);
2214 emitGPR (0x14, insn->src(0));
2215 break;
2216 case FILE_MEMORY_CONST:
2217 emitInsn(0x4c300000);
2218 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2219 break;
2220 case FILE_IMMEDIATE:
2221 emitInsn(0x38300000);
2222 emitIMMD(0x14, 19, insn->src(0));
2223 break;
2224 default:
2225 assert(!"bad src1 file");
2226 break;
2227 }
2228
2229 emitField(0x30, 1, isSignedType(insn->dType));
2230 emitCC (0x2f);
2231 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2232 emitINV (0x28, insn->src(0));
2233 emitGPR (0x00, insn->def(0));
2234 }
2235
2236 /*******************************************************************************
2237 * memory
2238 ******************************************************************************/
2239
2240 void
2241 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2242 {
2243 int data = 0;
2244
2245 switch (typeSizeof(type)) {
2246 case 1: data = isSignedType(type) ? 1 : 0; break;
2247 case 2: data = isSignedType(type) ? 3 : 2; break;
2248 case 4: data = 4; break;
2249 case 8: data = 5; break;
2250 case 16: data = 6; break;
2251 default:
2252 assert(!"bad type");
2253 break;
2254 }
2255
2256 emitField(pos, 3, data);
2257 }
2258
2259 void
2260 CodeEmitterGM107::emitLDSTc(int pos)
2261 {
2262 int mode = 0;
2263
2264 switch (insn->cache) {
2265 case CACHE_CA: mode = 0; break;
2266 case CACHE_CG: mode = 1; break;
2267 case CACHE_CS: mode = 2; break;
2268 case CACHE_CV: mode = 3; break;
2269 default:
2270 assert(!"invalid caching mode");
2271 break;
2272 }
2273
2274 emitField(pos, 2, mode);
2275 }
2276
2277 void
2278 CodeEmitterGM107::emitLDC()
2279 {
2280 emitInsn (0xef900000);
2281 emitLDSTs(0x30, insn->dType);
2282 emitField(0x2c, 2, insn->subOp);
2283 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2284 emitGPR (0x00, insn->def(0));
2285 }
2286
2287 void
2288 CodeEmitterGM107::emitLDL()
2289 {
2290 emitInsn (0xef400000);
2291 emitLDSTs(0x30, insn->dType);
2292 emitLDSTc(0x2c);
2293 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2294 emitGPR (0x00, insn->def(0));
2295 }
2296
2297 void
2298 CodeEmitterGM107::emitLDS()
2299 {
2300 emitInsn (0xef480000);
2301 emitLDSTs(0x30, insn->dType);
2302 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2303 emitGPR (0x00, insn->def(0));
2304 }
2305
2306 void
2307 CodeEmitterGM107::emitLD()
2308 {
2309 emitInsn (0x80000000);
2310 emitPRED (0x3a);
2311 emitLDSTc(0x38);
2312 emitLDSTs(0x35, insn->dType);
2313 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2314 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2315 emitGPR (0x00, insn->def(0));
2316 }
2317
2318 void
2319 CodeEmitterGM107::emitSTL()
2320 {
2321 emitInsn (0xef500000);
2322 emitLDSTs(0x30, insn->dType);
2323 emitLDSTc(0x2c);
2324 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2325 emitGPR (0x00, insn->src(1));
2326 }
2327
2328 void
2329 CodeEmitterGM107::emitSTS()
2330 {
2331 emitInsn (0xef580000);
2332 emitLDSTs(0x30, insn->dType);
2333 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2334 emitGPR (0x00, insn->src(1));
2335 }
2336
2337 void
2338 CodeEmitterGM107::emitST()
2339 {
2340 emitInsn (0xa0000000);
2341 emitPRED (0x3a);
2342 emitLDSTc(0x38);
2343 emitLDSTs(0x35, insn->dType);
2344 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2345 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2346 emitGPR (0x00, insn->src(1));
2347 }
2348
2349 void
2350 CodeEmitterGM107::emitALD()
2351 {
2352 emitInsn (0xefd80000);
2353 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2354 emitGPR (0x27, insn->src(0).getIndirect(1));
2355 emitO (0x20);
2356 emitP (0x1f);
2357 emitADDR (0x08, 20, 10, 0, insn->src(0));
2358 emitGPR (0x00, insn->def(0));
2359 }
2360
2361 void
2362 CodeEmitterGM107::emitAST()
2363 {
2364 emitInsn (0xeff00000);
2365 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2366 emitGPR (0x27, insn->src(0).getIndirect(1));
2367 emitP (0x1f);
2368 emitADDR (0x08, 20, 10, 0, insn->src(0));
2369 emitGPR (0x00, insn->src(1));
2370 }
2371
2372 void
2373 CodeEmitterGM107::emitISBERD()
2374 {
2375 emitInsn(0xefd00000);
2376 emitGPR (0x08, insn->src(0));
2377 emitGPR (0x00, insn->def(0));
2378 }
2379
2380 void
2381 CodeEmitterGM107::emitAL2P()
2382 {
2383 emitInsn (0xefa00000);
2384 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2385 emitPRED (0x2c);
2386 emitO (0x20);
2387 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2388 emitGPR (0x08, insn->src(0).getIndirect(0));
2389 emitGPR (0x00, insn->def(0));
2390 }
2391
2392 static void
2393 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2394 {
2395 int ipa = entry->ipa;
2396 int reg = entry->reg;
2397 int loc = entry->loc;
2398
2399 if (data.flatshade &&
2400 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2401 ipa = NV50_IR_INTERP_FLAT;
2402 reg = 0xff;
2403 } else if (data.force_persample_interp &&
2404 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2405 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2406 ipa |= NV50_IR_INTERP_CENTROID;
2407 }
2408 code[loc + 1] &= ~(0xf << 0x14);
2409 code[loc + 1] |= (ipa & 0x3) << 0x16;
2410 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2411 code[loc + 0] &= ~(0xff << 0x14);
2412 code[loc + 0] |= reg << 0x14;
2413 }
2414
2415 void
2416 CodeEmitterGM107::emitIPA()
2417 {
2418 int ipam = 0, ipas = 0;
2419
2420 switch (insn->getInterpMode()) {
2421 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2422 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2423 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2424 case NV50_IR_INTERP_SC : ipam = 3; break;
2425 default:
2426 assert(!"invalid ipa mode");
2427 break;
2428 }
2429
2430 switch (insn->getSampleMode()) {
2431 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2432 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2433 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2434 default:
2435 assert(!"invalid ipa sample mode");
2436 break;
2437 }
2438
2439 emitInsn (0xe0000000);
2440 emitField(0x36, 2, ipam);
2441 emitField(0x34, 2, ipas);
2442 emitSAT (0x33);
2443 emitField(0x2f, 3, 7);
2444 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2445 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2446 code[1] |= 0x00000040; /* .idx */
2447 emitGPR(0x00, insn->def(0));
2448
2449 if (insn->op == OP_PINTERP) {
2450 emitGPR(0x14, insn->src(1));
2451 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2452 emitGPR(0x27, insn->src(2));
2453 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2454 } else {
2455 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2456 emitGPR(0x27, insn->src(1));
2457 emitGPR(0x14);
2458 addInterp(insn->ipa, 0xff, interpApply);
2459 }
2460
2461 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2462 emitGPR(0x27);
2463 }
2464
2465 void
2466 CodeEmitterGM107::emitATOM()
2467 {
2468 unsigned dType, subOp;
2469
2470 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2471 switch (insn->dType) {
2472 case TYPE_U32: dType = 0; break;
2473 case TYPE_U64: dType = 1; break;
2474 default: assert(!"unexpected dType"); dType = 0; break;
2475 }
2476 subOp = 15;
2477
2478 emitInsn (0xee000000);
2479 } else {
2480 switch (insn->dType) {
2481 case TYPE_U32: dType = 0; break;
2482 case TYPE_S32: dType = 1; break;
2483 case TYPE_U64: dType = 2; break;
2484 case TYPE_F32: dType = 3; break;
2485 case TYPE_B128: dType = 4; break;
2486 case TYPE_S64: dType = 5; break;
2487 default: assert(!"unexpected dType"); dType = 0; break;
2488 }
2489 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2490 subOp = 8;
2491 else
2492 subOp = insn->subOp;
2493
2494 emitInsn (0xed000000);
2495 }
2496
2497 emitField(0x34, 4, subOp);
2498 emitField(0x31, 3, dType);
2499 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2500 emitGPR (0x14, insn->src(1));
2501 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2502 emitGPR (0x00, insn->def(0));
2503 }
2504
2505 void
2506 CodeEmitterGM107::emitATOMS()
2507 {
2508 unsigned dType, subOp;
2509
2510 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2511 switch (insn->dType) {
2512 case TYPE_U32: dType = 0; break;
2513 case TYPE_U64: dType = 1; break;
2514 default: assert(!"unexpected dType"); dType = 0; break;
2515 }
2516 subOp = 4;
2517
2518 emitInsn (0xee000000);
2519 emitField(0x34, 1, dType);
2520 } else {
2521 switch (insn->dType) {
2522 case TYPE_U32: dType = 0; break;
2523 case TYPE_S32: dType = 1; break;
2524 case TYPE_U64: dType = 2; break;
2525 case TYPE_S64: dType = 3; break;
2526 default: assert(!"unexpected dType"); dType = 0; break;
2527 }
2528
2529 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2530 subOp = 8;
2531 else
2532 subOp = insn->subOp;
2533
2534 emitInsn (0xec000000);
2535 emitField(0x1c, 3, dType);
2536 }
2537
2538 emitField(0x34, 4, subOp);
2539 emitGPR (0x14, insn->src(1));
2540 emitADDR (0x08, 0x12, 22, 0, insn->src(0));
2541 emitGPR (0x00, insn->def(0));
2542 }
2543
2544 void
2545 CodeEmitterGM107::emitRED()
2546 {
2547 unsigned dType;
2548
2549 switch (insn->dType) {
2550 case TYPE_U32: dType = 0; break;
2551 case TYPE_S32: dType = 1; break;
2552 case TYPE_U64: dType = 2; break;
2553 case TYPE_F32: dType = 3; break;
2554 case TYPE_B128: dType = 4; break;
2555 case TYPE_S64: dType = 5; break;
2556 default: assert(!"unexpected dType"); dType = 0; break;
2557 }
2558
2559 emitInsn (0xebf80000);
2560 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2561 emitField(0x17, 3, insn->subOp);
2562 emitField(0x14, 3, dType);
2563 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2564 emitGPR (0x00, insn->src(1));
2565 }
2566
2567 void
2568 CodeEmitterGM107::emitCCTL()
2569 {
2570 unsigned width;
2571 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2572 emitInsn(0xef600000);
2573 width = 30;
2574 } else {
2575 emitInsn(0xef800000);
2576 width = 22;
2577 }
2578 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2579 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2580 emitField(0x00, 4, insn->subOp);
2581 }
2582
2583 /*******************************************************************************
2584 * surface
2585 ******************************************************************************/
2586
2587 void
2588 CodeEmitterGM107::emitPIXLD()
2589 {
2590 emitInsn (0xefe80000);
2591 emitPRED (0x2d);
2592 emitField(0x1f, 3, insn->subOp);
2593 emitGPR (0x08, insn->src(0));
2594 emitGPR (0x00, insn->def(0));
2595 }
2596
2597 /*******************************************************************************
2598 * texture
2599 ******************************************************************************/
2600
2601 void
2602 CodeEmitterGM107::emitTEXs(int pos)
2603 {
2604 int src1 = insn->predSrc == 1 ? 2 : 1;
2605 if (insn->srcExists(src1))
2606 emitGPR(pos, insn->src(src1));
2607 else
2608 emitGPR(pos);
2609 }
2610
2611 void
2612 CodeEmitterGM107::emitTEX()
2613 {
2614 const TexInstruction *insn = this->insn->asTex();
2615 int lodm = 0;
2616
2617 if (!insn->tex.levelZero) {
2618 switch (insn->op) {
2619 case OP_TEX: lodm = 0; break;
2620 case OP_TXB: lodm = 2; break;
2621 case OP_TXL: lodm = 3; break;
2622 default:
2623 assert(!"invalid tex op");
2624 break;
2625 }
2626 } else {
2627 lodm = 1;
2628 }
2629
2630 if (insn->tex.rIndirectSrc >= 0) {
2631 emitInsn (0xdeb80000);
2632 emitField(0x25, 2, lodm);
2633 emitField(0x24, 1, insn->tex.useOffsets == 1);
2634 } else {
2635 emitInsn (0xc0380000);
2636 emitField(0x37, 2, lodm);
2637 emitField(0x36, 1, insn->tex.useOffsets == 1);
2638 emitField(0x24, 13, insn->tex.r);
2639 }
2640
2641 emitField(0x32, 1, insn->tex.target.isShadow());
2642 emitField(0x31, 1, insn->tex.liveOnly);
2643 emitField(0x23, 1, insn->tex.derivAll);
2644 emitField(0x1f, 4, insn->tex.mask);
2645 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2646 insn->tex.target.getDim() - 1);
2647 emitField(0x1c, 1, insn->tex.target.isArray());
2648 emitTEXs (0x14);
2649 emitGPR (0x08, insn->src(0));
2650 emitGPR (0x00, insn->def(0));
2651 }
2652
2653 void
2654 CodeEmitterGM107::emitTLD()
2655 {
2656 const TexInstruction *insn = this->insn->asTex();
2657
2658 if (insn->tex.rIndirectSrc >= 0) {
2659 emitInsn (0xdd380000);
2660 } else {
2661 emitInsn (0xdc380000);
2662 emitField(0x24, 13, insn->tex.r);
2663 }
2664
2665 emitField(0x37, 1, insn->tex.levelZero == 0);
2666 emitField(0x32, 1, insn->tex.target.isMS());
2667 emitField(0x31, 1, insn->tex.liveOnly);
2668 emitField(0x23, 1, insn->tex.useOffsets == 1);
2669 emitField(0x1f, 4, insn->tex.mask);
2670 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2671 insn->tex.target.getDim() - 1);
2672 emitField(0x1c, 1, insn->tex.target.isArray());
2673 emitTEXs (0x14);
2674 emitGPR (0x08, insn->src(0));
2675 emitGPR (0x00, insn->def(0));
2676 }
2677
2678 void
2679 CodeEmitterGM107::emitTLD4()
2680 {
2681 const TexInstruction *insn = this->insn->asTex();
2682
2683 if (insn->tex.rIndirectSrc >= 0) {
2684 emitInsn (0xdef80000);
2685 emitField(0x26, 2, insn->tex.gatherComp);
2686 emitField(0x25, 2, insn->tex.useOffsets == 4);
2687 emitField(0x24, 2, insn->tex.useOffsets == 1);
2688 } else {
2689 emitInsn (0xc8380000);
2690 emitField(0x38, 2, insn->tex.gatherComp);
2691 emitField(0x37, 2, insn->tex.useOffsets == 4);
2692 emitField(0x36, 2, insn->tex.useOffsets == 1);
2693 emitField(0x24, 13, insn->tex.r);
2694 }
2695
2696 emitField(0x32, 1, insn->tex.target.isShadow());
2697 emitField(0x31, 1, insn->tex.liveOnly);
2698 emitField(0x23, 1, insn->tex.derivAll);
2699 emitField(0x1f, 4, insn->tex.mask);
2700 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2701 insn->tex.target.getDim() - 1);
2702 emitField(0x1c, 1, insn->tex.target.isArray());
2703 emitTEXs (0x14);
2704 emitGPR (0x08, insn->src(0));
2705 emitGPR (0x00, insn->def(0));
2706 }
2707
2708 void
2709 CodeEmitterGM107::emitTXD()
2710 {
2711 const TexInstruction *insn = this->insn->asTex();
2712
2713 if (insn->tex.rIndirectSrc >= 0) {
2714 emitInsn (0xde780000);
2715 } else {
2716 emitInsn (0xde380000);
2717 emitField(0x24, 13, insn->tex.r);
2718 }
2719
2720 emitField(0x31, 1, insn->tex.liveOnly);
2721 emitField(0x23, 1, insn->tex.useOffsets == 1);
2722 emitField(0x1f, 4, insn->tex.mask);
2723 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2724 insn->tex.target.getDim() - 1);
2725 emitField(0x1c, 1, insn->tex.target.isArray());
2726 emitTEXs (0x14);
2727 emitGPR (0x08, insn->src(0));
2728 emitGPR (0x00, insn->def(0));
2729 }
2730
2731 void
2732 CodeEmitterGM107::emitTMML()
2733 {
2734 const TexInstruction *insn = this->insn->asTex();
2735
2736 if (insn->tex.rIndirectSrc >= 0) {
2737 emitInsn (0xdf600000);
2738 } else {
2739 emitInsn (0xdf580000);
2740 emitField(0x24, 13, insn->tex.r);
2741 }
2742
2743 emitField(0x31, 1, insn->tex.liveOnly);
2744 emitField(0x23, 1, insn->tex.derivAll);
2745 emitField(0x1f, 4, insn->tex.mask);
2746 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2747 insn->tex.target.getDim() - 1);
2748 emitField(0x1c, 1, insn->tex.target.isArray());
2749 emitTEXs (0x14);
2750 emitGPR (0x08, insn->src(0));
2751 emitGPR (0x00, insn->def(0));
2752 }
2753
2754 void
2755 CodeEmitterGM107::emitTXQ()
2756 {
2757 const TexInstruction *insn = this->insn->asTex();
2758 int type = 0;
2759
2760 switch (insn->tex.query) {
2761 case TXQ_DIMS : type = 0x01; break;
2762 case TXQ_TYPE : type = 0x02; break;
2763 case TXQ_SAMPLE_POSITION: type = 0x05; break;
2764 case TXQ_FILTER : type = 0x10; break;
2765 case TXQ_LOD : type = 0x12; break;
2766 case TXQ_WRAP : type = 0x14; break;
2767 case TXQ_BORDER_COLOUR : type = 0x16; break;
2768 default:
2769 assert(!"invalid txq query");
2770 break;
2771 }
2772
2773 if (insn->tex.rIndirectSrc >= 0) {
2774 emitInsn (0xdf500000);
2775 } else {
2776 emitInsn (0xdf480000);
2777 emitField(0x24, 13, insn->tex.r);
2778 }
2779
2780 emitField(0x31, 1, insn->tex.liveOnly);
2781 emitField(0x1f, 4, insn->tex.mask);
2782 emitField(0x16, 6, type);
2783 emitGPR (0x08, insn->src(0));
2784 emitGPR (0x00, insn->def(0));
2785 }
2786
2787 void
2788 CodeEmitterGM107::emitDEPBAR()
2789 {
2790 emitInsn (0xf0f00000);
2791 emitField(0x1d, 1, 1); /* le */
2792 emitField(0x1a, 3, 5);
2793 emitField(0x14, 6, insn->subOp);
2794 emitField(0x00, 6, insn->subOp);
2795 }
2796
2797 /*******************************************************************************
2798 * misc
2799 ******************************************************************************/
2800
2801 void
2802 CodeEmitterGM107::emitNOP()
2803 {
2804 emitInsn(0x50b00000);
2805 }
2806
2807 void
2808 CodeEmitterGM107::emitKIL()
2809 {
2810 emitInsn (0xe3300000);
2811 emitCond5(0x00, CC_TR);
2812 }
2813
2814 void
2815 CodeEmitterGM107::emitOUT()
2816 {
2817 const int cut = insn->op == OP_RESTART || insn->subOp;
2818 const int emit = insn->op == OP_EMIT;
2819
2820 switch (insn->src(1).getFile()) {
2821 case FILE_GPR:
2822 emitInsn(0xfbe00000);
2823 emitGPR (0x14, insn->src(1));
2824 break;
2825 case FILE_IMMEDIATE:
2826 emitInsn(0xf6e00000);
2827 emitIMMD(0x14, 19, insn->src(1));
2828 break;
2829 case FILE_MEMORY_CONST:
2830 emitInsn(0xebe00000);
2831 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2832 break;
2833 default:
2834 assert(!"bad src1 file");
2835 break;
2836 }
2837
2838 emitField(0x27, 2, (cut << 1) | emit);
2839 emitGPR (0x08, insn->src(0));
2840 emitGPR (0x00, insn->def(0));
2841 }
2842
2843 void
2844 CodeEmitterGM107::emitBAR()
2845 {
2846 uint8_t subop;
2847
2848 emitInsn (0xf0a80000);
2849
2850 switch (insn->subOp) {
2851 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2852 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
2853 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
2854 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
2855 default:
2856 subop = 0x80;
2857 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2858 break;
2859 }
2860
2861 emitField(0x20, 8, subop);
2862
2863 // barrier id
2864 if (insn->src(0).getFile() == FILE_GPR) {
2865 emitGPR(0x08, insn->src(0));
2866 } else {
2867 ImmediateValue *imm = insn->getSrc(0)->asImm();
2868 assert(imm);
2869 emitField(0x08, 8, imm->reg.data.u32);
2870 emitField(0x2b, 1, 1);
2871 }
2872
2873 // thread count
2874 if (insn->src(1).getFile() == FILE_GPR) {
2875 emitGPR(0x14, insn->src(1));
2876 } else {
2877 ImmediateValue *imm = insn->getSrc(0)->asImm();
2878 assert(imm);
2879 emitField(0x14, 12, imm->reg.data.u32);
2880 emitField(0x2c, 1, 1);
2881 }
2882
2883 if (insn->srcExists(2) && (insn->predSrc != 2)) {
2884 emitPRED (0x27, insn->src(2));
2885 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2886 } else {
2887 emitField(0x27, 3, 7);
2888 }
2889 }
2890
2891 void
2892 CodeEmitterGM107::emitMEMBAR()
2893 {
2894 emitInsn (0xef980000);
2895 emitField(0x08, 2, insn->subOp >> 2);
2896 }
2897
2898 void
2899 CodeEmitterGM107::emitVOTE()
2900 {
2901 assert(insn->src(0).getFile() == FILE_PREDICATE);
2902
2903 int r = -1, p = -1;
2904 for (int i = 0; insn->defExists(i); i++) {
2905 if (insn->def(i).getFile() == FILE_GPR)
2906 r = i;
2907 else if (insn->def(i).getFile() == FILE_PREDICATE)
2908 p = i;
2909 }
2910
2911 emitInsn (0x50d80000);
2912 emitField(0x30, 2, insn->subOp);
2913 if (r >= 0)
2914 emitGPR (0x00, insn->def(r));
2915 else
2916 emitGPR (0x00);
2917 if (p >= 0)
2918 emitPRED (0x2d, insn->def(p));
2919 else
2920 emitPRED (0x2d);
2921 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2922 emitPRED (0x27, insn->src(0));
2923 }
2924
2925 void
2926 CodeEmitterGM107::emitSUTarget()
2927 {
2928 const TexInstruction *insn = this->insn->asTex();
2929 int target = 0;
2930
2931 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2932
2933 if (insn->tex.target == TEX_TARGET_BUFFER) {
2934 target = 2;
2935 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2936 target = 4;
2937 } else if (insn->tex.target == TEX_TARGET_2D ||
2938 insn->tex.target == TEX_TARGET_RECT) {
2939 target = 6;
2940 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2941 insn->tex.target == TEX_TARGET_CUBE ||
2942 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2943 target = 8;
2944 } else if (insn->tex.target == TEX_TARGET_3D) {
2945 target = 10;
2946 } else {
2947 assert(insn->tex.target == TEX_TARGET_1D);
2948 }
2949 emitField(0x20, 4, target);
2950 }
2951
2952 void
2953 CodeEmitterGM107::emitSUHandle(const int s)
2954 {
2955 const TexInstruction *insn = this->insn->asTex();
2956
2957 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2958
2959 if (insn->src(s).getFile() == FILE_GPR) {
2960 emitGPR(0x27, insn->src(s));
2961 } else {
2962 ImmediateValue *imm = insn->getSrc(s)->asImm();
2963 assert(imm);
2964 emitField(0x33, 1, 1);
2965 emitField(0x24, 13, imm->reg.data.u32);
2966 }
2967 }
2968
2969 void
2970 CodeEmitterGM107::emitSUSTx()
2971 {
2972 const TexInstruction *insn = this->insn->asTex();
2973
2974 emitInsn(0xeb200000);
2975 if (insn->op == OP_SUSTB)
2976 emitField(0x34, 1, 1);
2977 emitSUTarget();
2978
2979 emitLDSTc(0x18);
2980 emitField(0x14, 4, 0xf); // rgba
2981 emitGPR (0x08, insn->src(0));
2982 emitGPR (0x00, insn->src(1));
2983
2984 emitSUHandle(2);
2985 }
2986
2987 void
2988 CodeEmitterGM107::emitSULDx()
2989 {
2990 const TexInstruction *insn = this->insn->asTex();
2991 int type = 0;
2992
2993 emitInsn(0xeb000000);
2994 if (insn->op == OP_SULDB)
2995 emitField(0x34, 1, 1);
2996 emitSUTarget();
2997
2998 switch (insn->dType) {
2999 case TYPE_S8: type = 1; break;
3000 case TYPE_U16: type = 2; break;
3001 case TYPE_S16: type = 3; break;
3002 case TYPE_U32: type = 4; break;
3003 case TYPE_U64: type = 5; break;
3004 case TYPE_B128: type = 6; break;
3005 default:
3006 assert(insn->dType == TYPE_U8);
3007 break;
3008 }
3009 emitLDSTc(0x18);
3010 emitField(0x14, 3, type);
3011 emitGPR (0x00, insn->def(0));
3012 emitGPR (0x08, insn->src(0));
3013
3014 emitSUHandle(1);
3015 }
3016
3017 void
3018 CodeEmitterGM107::emitSUREDx()
3019 {
3020 const TexInstruction *insn = this->insn->asTex();
3021 uint8_t type = 0, subOp;
3022
3023 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3024 emitInsn(0xeac00000);
3025 else
3026 emitInsn(0xea600000);
3027
3028 if (insn->op == OP_SUREDB)
3029 emitField(0x34, 1, 1);
3030 emitSUTarget();
3031
3032 // destination type
3033 switch (insn->dType) {
3034 case TYPE_S32: type = 1; break;
3035 case TYPE_U64: type = 2; break;
3036 case TYPE_F32: type = 3; break;
3037 case TYPE_S64: type = 5; break;
3038 default:
3039 assert(insn->dType == TYPE_U32);
3040 break;
3041 }
3042
3043 // atomic operation
3044 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3045 subOp = 0;
3046 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3047 subOp = 8;
3048 } else {
3049 subOp = insn->subOp;
3050 }
3051
3052 emitField(0x24, 3, type);
3053 emitField(0x1d, 4, subOp);
3054 emitGPR (0x14, insn->src(1));
3055 emitGPR (0x08, insn->src(0));
3056 emitGPR (0x00, insn->def(0));
3057
3058 emitSUHandle(2);
3059 }
3060
3061 /*******************************************************************************
3062 * assembler front-end
3063 ******************************************************************************/
3064
3065 bool
3066 CodeEmitterGM107::emitInstruction(Instruction *i)
3067 {
3068 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3069 bool ret = true;
3070
3071 insn = i;
3072
3073 if (insn->encSize != 8) {
3074 ERROR("skipping undecodable instruction: "); insn->print();
3075 return false;
3076 } else
3077 if (codeSize + size > codeSizeLimit) {
3078 ERROR("code emitter output buffer too small\n");
3079 return false;
3080 }
3081
3082 if (writeIssueDelays) {
3083 int n = ((codeSize & 0x1f) / 8) - 1;
3084 if (n < 0) {
3085 data = code;
3086 data[0] = 0x00000000;
3087 data[1] = 0x00000000;
3088 code += 2;
3089 codeSize += 8;
3090 n++;
3091 }
3092
3093 emitField(data, n * 21, 21, insn->sched);
3094 }
3095
3096 switch (insn->op) {
3097 case OP_EXIT:
3098 emitEXIT();
3099 break;
3100 case OP_BRA:
3101 emitBRA();
3102 break;
3103 case OP_CALL:
3104 emitCAL();
3105 break;
3106 case OP_PRECONT:
3107 emitPCNT();
3108 break;
3109 case OP_CONT:
3110 emitCONT();
3111 break;
3112 case OP_PREBREAK:
3113 emitPBK();
3114 break;
3115 case OP_BREAK:
3116 emitBRK();
3117 break;
3118 case OP_PRERET:
3119 emitPRET();
3120 break;
3121 case OP_RET:
3122 emitRET();
3123 break;
3124 case OP_JOINAT:
3125 emitSSY();
3126 break;
3127 case OP_JOIN:
3128 emitSYNC();
3129 break;
3130 case OP_QUADON:
3131 emitSAM();
3132 break;
3133 case OP_QUADPOP:
3134 emitRAM();
3135 break;
3136 case OP_MOV:
3137 emitMOV();
3138 break;
3139 case OP_RDSV:
3140 emitS2R();
3141 break;
3142 case OP_ABS:
3143 case OP_NEG:
3144 case OP_SAT:
3145 case OP_FLOOR:
3146 case OP_CEIL:
3147 case OP_TRUNC:
3148 case OP_CVT:
3149 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3150 insn->src(0).getFile() == FILE_PREDICATE)) {
3151 emitMOV();
3152 } else if (isFloatType(insn->dType)) {
3153 if (isFloatType(insn->sType))
3154 emitF2F();
3155 else
3156 emitI2F();
3157 } else {
3158 if (isFloatType(insn->sType))
3159 emitF2I();
3160 else
3161 emitI2I();
3162 }
3163 break;
3164 case OP_SHFL:
3165 emitSHFL();
3166 break;
3167 case OP_ADD:
3168 case OP_SUB:
3169 if (isFloatType(insn->dType)) {
3170 if (insn->dType == TYPE_F64)
3171 emitDADD();
3172 else
3173 emitFADD();
3174 } else {
3175 emitIADD();
3176 }
3177 break;
3178 case OP_MUL:
3179 if (isFloatType(insn->dType)) {
3180 if (insn->dType == TYPE_F64)
3181 emitDMUL();
3182 else
3183 emitFMUL();
3184 } else {
3185 emitIMUL();
3186 }
3187 break;
3188 case OP_MAD:
3189 case OP_FMA:
3190 if (isFloatType(insn->dType)) {
3191 if (insn->dType == TYPE_F64)
3192 emitDFMA();
3193 else
3194 emitFFMA();
3195 } else {
3196 emitIMAD();
3197 }
3198 break;
3199 case OP_SHLADD:
3200 emitISCADD();
3201 break;
3202 case OP_MIN:
3203 case OP_MAX:
3204 if (isFloatType(insn->dType)) {
3205 if (insn->dType == TYPE_F64)
3206 emitDMNMX();
3207 else
3208 emitFMNMX();
3209 } else {
3210 emitIMNMX();
3211 }
3212 break;
3213 case OP_SHL:
3214 if (typeSizeof(insn->sType) == 8)
3215 emitSHF();
3216 else
3217 emitSHL();
3218 break;
3219 case OP_SHR:
3220 if (typeSizeof(insn->sType) == 8)
3221 emitSHF();
3222 else
3223 emitSHR();
3224 break;
3225 case OP_POPCNT:
3226 emitPOPC();
3227 break;
3228 case OP_INSBF:
3229 emitBFI();
3230 break;
3231 case OP_EXTBF:
3232 emitBFE();
3233 break;
3234 case OP_BFIND:
3235 emitFLO();
3236 break;
3237 case OP_SLCT:
3238 if (isFloatType(insn->dType))
3239 emitFCMP();
3240 else
3241 emitICMP();
3242 break;
3243 case OP_SET:
3244 case OP_SET_AND:
3245 case OP_SET_OR:
3246 case OP_SET_XOR:
3247 if (insn->def(0).getFile() != FILE_PREDICATE) {
3248 if (isFloatType(insn->sType))
3249 if (insn->sType == TYPE_F64)
3250 emitDSET();
3251 else
3252 emitFSET();
3253 else
3254 emitISET();
3255 } else {
3256 if (isFloatType(insn->sType))
3257 if (insn->sType == TYPE_F64)
3258 emitDSETP();
3259 else
3260 emitFSETP();
3261 else
3262 emitISETP();
3263 }
3264 break;
3265 case OP_SELP:
3266 emitSEL();
3267 break;
3268 case OP_PRESIN:
3269 case OP_PREEX2:
3270 emitRRO();
3271 break;
3272 case OP_COS:
3273 case OP_SIN:
3274 case OP_EX2:
3275 case OP_LG2:
3276 case OP_RCP:
3277 case OP_RSQ:
3278 emitMUFU();
3279 break;
3280 case OP_AND:
3281 case OP_OR:
3282 case OP_XOR:
3283 emitLOP();
3284 break;
3285 case OP_NOT:
3286 emitNOT();
3287 break;
3288 case OP_LOAD:
3289 switch (insn->src(0).getFile()) {
3290 case FILE_MEMORY_CONST : emitLDC(); break;
3291 case FILE_MEMORY_LOCAL : emitLDL(); break;
3292 case FILE_MEMORY_SHARED: emitLDS(); break;
3293 case FILE_MEMORY_GLOBAL: emitLD(); break;
3294 default:
3295 assert(!"invalid load");
3296 emitNOP();
3297 break;
3298 }
3299 break;
3300 case OP_STORE:
3301 switch (insn->src(0).getFile()) {
3302 case FILE_MEMORY_LOCAL : emitSTL(); break;
3303 case FILE_MEMORY_SHARED: emitSTS(); break;
3304 case FILE_MEMORY_GLOBAL: emitST(); break;
3305 default:
3306 assert(!"invalid store");
3307 emitNOP();
3308 break;
3309 }
3310 break;
3311 case OP_ATOM:
3312 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3313 emitATOMS();
3314 else
3315 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3316 emitRED();
3317 else
3318 emitATOM();
3319 break;
3320 case OP_CCTL:
3321 emitCCTL();
3322 break;
3323 case OP_VFETCH:
3324 emitALD();
3325 break;
3326 case OP_EXPORT:
3327 emitAST();
3328 break;
3329 case OP_PFETCH:
3330 emitISBERD();
3331 break;
3332 case OP_AFETCH:
3333 emitAL2P();
3334 break;
3335 case OP_LINTERP:
3336 case OP_PINTERP:
3337 emitIPA();
3338 break;
3339 case OP_PIXLD:
3340 emitPIXLD();
3341 break;
3342 case OP_TEX:
3343 case OP_TXB:
3344 case OP_TXL:
3345 emitTEX();
3346 break;
3347 case OP_TXF:
3348 emitTLD();
3349 break;
3350 case OP_TXG:
3351 emitTLD4();
3352 break;
3353 case OP_TXD:
3354 emitTXD();
3355 break;
3356 case OP_TXQ:
3357 emitTXQ();
3358 break;
3359 case OP_TXLQ:
3360 emitTMML();
3361 break;
3362 case OP_TEXBAR:
3363 emitDEPBAR();
3364 break;
3365 case OP_QUADOP:
3366 emitFSWZADD();
3367 break;
3368 case OP_NOP:
3369 emitNOP();
3370 break;
3371 case OP_DISCARD:
3372 emitKIL();
3373 break;
3374 case OP_EMIT:
3375 case OP_RESTART:
3376 emitOUT();
3377 break;
3378 case OP_BAR:
3379 emitBAR();
3380 break;
3381 case OP_MEMBAR:
3382 emitMEMBAR();
3383 break;
3384 case OP_VOTE:
3385 emitVOTE();
3386 break;
3387 case OP_SUSTB:
3388 case OP_SUSTP:
3389 emitSUSTx();
3390 break;
3391 case OP_SULDB:
3392 case OP_SULDP:
3393 emitSULDx();
3394 break;
3395 case OP_SUREDB:
3396 case OP_SUREDP:
3397 emitSUREDx();
3398 break;
3399 default:
3400 assert(!"invalid opcode");
3401 emitNOP();
3402 ret = false;
3403 break;
3404 }
3405
3406 if (insn->join) {
3407 /*XXX*/
3408 }
3409
3410 code += 2;
3411 codeSize += 8;
3412 return ret;
3413 }
3414
3415 uint32_t
3416 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3417 {
3418 return 8;
3419 }
3420
3421 /*******************************************************************************
3422 * sched data calculator
3423 ******************************************************************************/
3424
3425 class SchedDataCalculatorGM107 : public Pass
3426 {
3427 public:
3428 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3429
3430 private:
3431 struct RegScores
3432 {
3433 struct ScoreData {
3434 int r[256];
3435 int p[8];
3436 int c;
3437 } rd, wr;
3438 int base;
3439
3440 void rebase(const int base)
3441 {
3442 const int delta = this->base - base;
3443 if (!delta)
3444 return;
3445 this->base = 0;
3446
3447 for (int i = 0; i < 256; ++i) {
3448 rd.r[i] += delta;
3449 wr.r[i] += delta;
3450 }
3451 for (int i = 0; i < 8; ++i) {
3452 rd.p[i] += delta;
3453 wr.p[i] += delta;
3454 }
3455 rd.c += delta;
3456 wr.c += delta;
3457 }
3458 void wipe()
3459 {
3460 memset(&rd, 0, sizeof(rd));
3461 memset(&wr, 0, sizeof(wr));
3462 }
3463 int getLatest(const ScoreData& d) const
3464 {
3465 int max = 0;
3466 for (int i = 0; i < 256; ++i)
3467 if (d.r[i] > max)
3468 max = d.r[i];
3469 for (int i = 0; i < 8; ++i)
3470 if (d.p[i] > max)
3471 max = d.p[i];
3472 if (d.c > max)
3473 max = d.c;
3474 return max;
3475 }
3476 inline int getLatestRd() const
3477 {
3478 return getLatest(rd);
3479 }
3480 inline int getLatestWr() const
3481 {
3482 return getLatest(wr);
3483 }
3484 inline int getLatest() const
3485 {
3486 return MAX2(getLatestRd(), getLatestWr());
3487 }
3488 void setMax(const RegScores *that)
3489 {
3490 for (int i = 0; i < 256; ++i) {
3491 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3492 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3493 }
3494 for (int i = 0; i < 8; ++i) {
3495 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3496 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3497 }
3498 rd.c = MAX2(rd.c, that->rd.c);
3499 wr.c = MAX2(wr.c, that->wr.c);
3500 }
3501 void print(int cycle)
3502 {
3503 for (int i = 0; i < 256; ++i) {
3504 if (rd.r[i] > cycle)
3505 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3506 if (wr.r[i] > cycle)
3507 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3508 }
3509 for (int i = 0; i < 8; ++i) {
3510 if (rd.p[i] > cycle)
3511 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3512 if (wr.p[i] > cycle)
3513 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3514 }
3515 if (rd.c > cycle)
3516 INFO("rd $c @ %i\n", rd.c);
3517 if (wr.c > cycle)
3518 INFO("wr $c @ %i\n", wr.c);
3519 }
3520 };
3521
3522 RegScores *score; // for current BB
3523 std::vector<RegScores> scoreBoards;
3524
3525 const TargetGM107 *targ;
3526 bool visit(Function *);
3527 bool visit(BasicBlock *);
3528
3529 void commitInsn(const Instruction *, int);
3530 int calcDelay(const Instruction *, int) const;
3531 void setDelay(Instruction *, int, const Instruction *);
3532 void recordWr(const Value *, int, int);
3533 void checkRd(const Value *, int, int&) const;
3534
3535 inline void emitYield(Instruction *);
3536 inline void emitStall(Instruction *, uint8_t);
3537 inline void emitReuse(Instruction *, uint8_t);
3538 inline void emitWrDepBar(Instruction *, uint8_t);
3539 inline void emitRdDepBar(Instruction *, uint8_t);
3540 inline void emitWtDepBar(Instruction *, uint8_t);
3541
3542 inline int getStall(const Instruction *) const;
3543 inline int getWrDepBar(const Instruction *) const;
3544 inline int getRdDepBar(const Instruction *) const;
3545 inline int getWtDepBar(const Instruction *) const;
3546
3547 void setReuseFlag(Instruction *);
3548
3549 inline void printSchedInfo(int, const Instruction *) const;
3550
3551 struct LiveBarUse {
3552 LiveBarUse(Instruction *insn, Instruction *usei)
3553 : insn(insn), usei(usei) { }
3554 Instruction *insn;
3555 Instruction *usei;
3556 };
3557
3558 struct LiveBarDef {
3559 LiveBarDef(Instruction *insn, Instruction *defi)
3560 : insn(insn), defi(defi) { }
3561 Instruction *insn;
3562 Instruction *defi;
3563 };
3564
3565 bool insertBarriers(BasicBlock *);
3566
3567 Instruction *findFirstUse(const Instruction *) const;
3568 Instruction *findFirstDef(const Instruction *) const;
3569
3570 bool needRdDepBar(const Instruction *) const;
3571 bool needWrDepBar(const Instruction *) const;
3572 };
3573
3574 inline void
3575 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3576 {
3577 assert(cnt < 16);
3578 insn->sched |= cnt;
3579 }
3580
3581 inline void
3582 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3583 {
3584 insn->sched |= 1 << 4;
3585 }
3586
3587 inline void
3588 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3589 {
3590 assert(id < 6);
3591 if ((insn->sched & 0xe0) == 0xe0)
3592 insn->sched ^= 0xe0;
3593 insn->sched |= id << 5;
3594 }
3595
3596 inline void
3597 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3598 {
3599 assert(id < 6);
3600 if ((insn->sched & 0x700) == 0x700)
3601 insn->sched ^= 0x700;
3602 insn->sched |= id << 8;
3603 }
3604
3605 inline void
3606 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3607 {
3608 assert(id < 6);
3609 insn->sched |= 1 << (11 + id);
3610 }
3611
3612 inline void
3613 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3614 {
3615 assert(id < 4);
3616 insn->sched |= 1 << (17 + id);
3617 }
3618
3619 inline void
3620 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3621 const Instruction *insn) const
3622 {
3623 uint8_t st, yl, wr, rd, wt, ru;
3624
3625 st = (insn->sched & 0x00000f) >> 0;
3626 yl = (insn->sched & 0x000010) >> 4;
3627 wr = (insn->sched & 0x0000e0) >> 5;
3628 rd = (insn->sched & 0x000700) >> 8;
3629 wt = (insn->sched & 0x01f800) >> 11;
3630 ru = (insn->sched & 0x1e0000) >> 17;
3631
3632 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3633 cycle, st, yl, wr, rd, wt, ru);
3634 }
3635
3636 inline int
3637 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3638 {
3639 return insn->sched & 0xf;
3640 }
3641
3642 inline int
3643 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3644 {
3645 return (insn->sched & 0x0000e0) >> 5;
3646 }
3647
3648 inline int
3649 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3650 {
3651 return (insn->sched & 0x000700) >> 8;
3652 }
3653
3654 inline int
3655 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3656 {
3657 return (insn->sched & 0x01f800) >> 11;
3658 }
3659
3660 // Emit the reuse flag which allows to make use of the new memory hierarchy
3661 // introduced since Maxwell, the operand reuse cache.
3662 //
3663 // It allows to reduce bank conflicts by caching operands. Each time you issue
3664 // an instruction, that flag can tell the hw which operands are going to be
3665 // re-used by the next instruction. Note that the next instruction has to use
3666 // the same GPR id in the same operand slot.
3667 void
3668 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3669 {
3670 Instruction *next = insn->next;
3671 BitSet defs(255, 1);
3672
3673 if (!targ->isReuseSupported(insn))
3674 return;
3675
3676 for (int d = 0; insn->defExists(d); ++d) {
3677 const Value *def = insn->def(d).rep();
3678 if (insn->def(d).getFile() != FILE_GPR)
3679 continue;
3680 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3681 continue;
3682 defs.set(def->reg.data.id);
3683 }
3684
3685 for (int s = 0; insn->srcExists(s); s++) {
3686 const Value *src = insn->src(s).rep();
3687 if (insn->src(s).getFile() != FILE_GPR)
3688 continue;
3689 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3690 continue;
3691 if (defs.test(src->reg.data.id))
3692 continue;
3693 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3694 continue;
3695 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3696 continue;
3697 assert(s < 4);
3698 emitReuse(insn, s);
3699 }
3700 }
3701
3702 void
3703 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3704 {
3705 int a = v->reg.data.id, b;
3706
3707 switch (v->reg.file) {
3708 case FILE_GPR:
3709 b = a + v->reg.size / 4;
3710 for (int r = a; r < b; ++r)
3711 score->rd.r[r] = ready;
3712 break;
3713 case FILE_PREDICATE:
3714 // To immediately use a predicate set by any instructions, the minimum
3715 // number of stall counts is 13.
3716 score->rd.p[a] = cycle + 13;
3717 break;
3718 case FILE_FLAGS:
3719 score->rd.c = ready;
3720 break;
3721 default:
3722 break;
3723 }
3724 }
3725
3726 void
3727 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3728 {
3729 int a = v->reg.data.id, b;
3730 int ready = cycle;
3731
3732 switch (v->reg.file) {
3733 case FILE_GPR:
3734 b = a + v->reg.size / 4;
3735 for (int r = a; r < b; ++r)
3736 ready = MAX2(ready, score->rd.r[r]);
3737 break;
3738 case FILE_PREDICATE:
3739 ready = MAX2(ready, score->rd.p[a]);
3740 break;
3741 case FILE_FLAGS:
3742 ready = MAX2(ready, score->rd.c);
3743 break;
3744 default:
3745 break;
3746 }
3747 if (cycle < ready)
3748 delay = MAX2(delay, ready - cycle);
3749 }
3750
3751 void
3752 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3753 {
3754 const int ready = cycle + targ->getLatency(insn);
3755
3756 for (int d = 0; insn->defExists(d); ++d)
3757 recordWr(insn->getDef(d), cycle, ready);
3758
3759 #ifdef GM107_DEBUG_SCHED_DATA
3760 score->print(cycle);
3761 #endif
3762 }
3763
3764 #define GM107_MIN_ISSUE_DELAY 0x1
3765 #define GM107_MAX_ISSUE_DELAY 0xf
3766
3767 int
3768 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3769 {
3770 int delay = 0, ready = cycle;
3771
3772 for (int s = 0; insn->srcExists(s); ++s)
3773 checkRd(insn->getSrc(s), cycle, delay);
3774
3775 // TODO: make use of getReadLatency()!
3776
3777 return MAX2(delay, ready - cycle);
3778 }
3779
3780 void
3781 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3782 const Instruction *next)
3783 {
3784 const OpClass cl = targ->getOpClass(insn->op);
3785 int wr, rd;
3786
3787 if (insn->op == OP_EXIT ||
3788 insn->op == OP_BAR ||
3789 insn->op == OP_MEMBAR) {
3790 delay = GM107_MAX_ISSUE_DELAY;
3791 } else
3792 if (insn->op == OP_QUADON ||
3793 insn->op == OP_QUADPOP) {
3794 delay = 0xd;
3795 } else
3796 if (cl == OPCLASS_FLOW || insn->join) {
3797 delay = 0xd;
3798 }
3799
3800 if (!next || !targ->canDualIssue(insn, next)) {
3801 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3802 } else {
3803 delay = 0x0; // dual-issue
3804 }
3805
3806 wr = getWrDepBar(insn);
3807 rd = getRdDepBar(insn);
3808
3809 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3810 // Barriers take one additional clock cycle to become active on top of
3811 // the clock consumed by the instruction producing it.
3812 if (!next || insn->bb != next->bb) {
3813 delay = 0x2;
3814 } else {
3815 int wt = getWtDepBar(next);
3816 if ((wt & (1 << wr)) | (wt & (1 << rd)))
3817 delay = 0x2;
3818 }
3819 }
3820
3821 emitStall(insn, delay);
3822 }
3823
3824
3825 // Return true when the given instruction needs to emit a read dependency
3826 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3827 // setting the maximum number of stall counts is not enough.
3828 bool
3829 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3830 {
3831 BitSet srcs(255, 1), defs(255, 1);
3832 int a, b;
3833
3834 if (!targ->isBarrierRequired(insn))
3835 return false;
3836
3837 // Do not emit a read dependency barrier when the instruction doesn't use
3838 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3839 for (int s = 0; insn->srcExists(s); ++s) {
3840 const Value *src = insn->src(s).rep();
3841 if (insn->src(s).getFile() != FILE_GPR)
3842 continue;
3843 if (src->reg.data.id == 255)
3844 continue;
3845
3846 a = src->reg.data.id;
3847 b = a + src->reg.size / 4;
3848 for (int r = a; r < b; ++r)
3849 srcs.set(r);
3850 }
3851
3852 if (!srcs.popCount())
3853 return false;
3854
3855 // Do not emit a read dependency barrier when the output GPRs are equal to
3856 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3857 // be produced and WaR hazards are prevented.
3858 for (int d = 0; insn->defExists(d); ++d) {
3859 const Value *def = insn->def(d).rep();
3860 if (insn->def(d).getFile() != FILE_GPR)
3861 continue;
3862 if (def->reg.data.id == 255)
3863 continue;
3864
3865 a = def->reg.data.id;
3866 b = a + def->reg.size / 4;
3867 for (int r = a; r < b; ++r)
3868 defs.set(r);
3869 }
3870
3871 srcs.andNot(defs);
3872 if (!srcs.popCount())
3873 return false;
3874
3875 return true;
3876 }
3877
3878 // Return true when the given instruction needs to emit a write dependency
3879 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3880 // setting the maximum number of stall counts is not enough. This is only legal
3881 // if the instruction output something.
3882 bool
3883 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3884 {
3885 if (!targ->isBarrierRequired(insn))
3886 return false;
3887
3888 for (int d = 0; insn->defExists(d); ++d) {
3889 if (insn->def(d).getFile() == FILE_GPR ||
3890 insn->def(d).getFile() == FILE_PREDICATE)
3891 return true;
3892 }
3893 return false;
3894 }
3895
3896 // Find the next instruction inside the same basic block which uses the output
3897 // of the given instruction in order to avoid RaW hazards.
3898 Instruction *
3899 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3900 {
3901 Instruction *insn, *next;
3902 int minGPR, maxGPR;
3903
3904 if (!bari->defExists(0))
3905 return NULL;
3906
3907 minGPR = bari->def(0).rep()->reg.data.id;
3908 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3909
3910 for (insn = bari->next; insn != NULL; insn = next) {
3911 next = insn->next;
3912
3913 for (int s = 0; insn->srcExists(s); ++s) {
3914 const Value *src = insn->src(s).rep();
3915 if (bari->def(0).getFile() == FILE_GPR) {
3916 if (insn->src(s).getFile() != FILE_GPR ||
3917 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3918 src->reg.data.id > maxGPR)
3919 continue;
3920 return insn;
3921 } else
3922 if (bari->def(0).getFile() == FILE_PREDICATE) {
3923 if (insn->src(s).getFile() != FILE_PREDICATE ||
3924 src->reg.data.id != minGPR)
3925 continue;
3926 return insn;
3927 }
3928 }
3929 }
3930 return NULL;
3931 }
3932
3933 // Find the next instruction inside the same basic block which overwrites, at
3934 // least, one source of the given instruction in order to avoid WaR hazards.
3935 Instruction *
3936 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3937 {
3938 Instruction *insn, *next;
3939 int minGPR, maxGPR;
3940
3941 for (insn = bari->next; insn != NULL; insn = next) {
3942 next = insn->next;
3943
3944 for (int d = 0; insn->defExists(d); ++d) {
3945 const Value *def = insn->def(d).rep();
3946 if (insn->def(d).getFile() != FILE_GPR)
3947 continue;
3948
3949 minGPR = def->reg.data.id;
3950 maxGPR = minGPR + def->reg.size / 4 - 1;
3951
3952 for (int s = 0; bari->srcExists(s); ++s) {
3953 const Value *src = bari->src(s).rep();
3954 if (bari->src(s).getFile() != FILE_GPR ||
3955 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3956 src->reg.data.id > maxGPR)
3957 continue;
3958 return insn;
3959 }
3960 }
3961 }
3962 return NULL;
3963 }
3964
3965 // Dependency barriers:
3966 // This pass is a bit ugly and could probably be improved by performing a
3967 // better allocation.
3968 //
3969 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3970 // dependency barriers using the control codes.
3971 bool
3972 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
3973 {
3974 std::list<LiveBarUse> live_uses;
3975 std::list<LiveBarDef> live_defs;
3976 Instruction *insn, *next;
3977 BitSet bars(6, 1);
3978 int bar_id;
3979
3980 for (insn = bb->getEntry(); insn != NULL; insn = next) {
3981 Instruction *usei = NULL, *defi = NULL;
3982 bool need_wr_bar, need_rd_bar;
3983
3984 next = insn->next;
3985
3986 // Expire old barrier uses.
3987 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
3988 it != live_uses.end();) {
3989 if (insn->serial >= it->usei->serial) {
3990 int wr = getWrDepBar(it->insn);
3991 emitWtDepBar(insn, wr);
3992 bars.clr(wr); // free barrier
3993 it = live_uses.erase(it);
3994 continue;
3995 }
3996 ++it;
3997 }
3998
3999 // Expire old barrier defs.
4000 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4001 it != live_defs.end();) {
4002 if (insn->serial >= it->defi->serial) {
4003 int rd = getRdDepBar(it->insn);
4004 emitWtDepBar(insn, rd);
4005 bars.clr(rd); // free barrier
4006 it = live_defs.erase(it);
4007 continue;
4008 }
4009 ++it;
4010 }
4011
4012 need_wr_bar = needWrDepBar(insn);
4013 need_rd_bar = needRdDepBar(insn);
4014
4015 if (need_wr_bar) {
4016 // When the instruction requires to emit a write dependency barrier
4017 // (all which write something at a variable latency), find the next
4018 // instruction which reads the outputs.
4019 usei = findFirstUse(insn);
4020
4021 // Allocate and emit a new barrier.
4022 bar_id = bars.findFreeRange(1);
4023 if (bar_id == -1)
4024 bar_id = 5;
4025 bars.set(bar_id);
4026 emitWrDepBar(insn, bar_id);
4027 if (usei)
4028 live_uses.push_back(LiveBarUse(insn, usei));
4029 }
4030
4031 if (need_rd_bar) {
4032 // When the instruction requires to emit a read dependency barrier
4033 // (all which read something at a variable latency), find the next
4034 // instruction which will write the inputs.
4035 defi = findFirstDef(insn);
4036
4037 if (usei && defi && usei->serial <= defi->serial)
4038 continue;
4039
4040 // Allocate and emit a new barrier.
4041 bar_id = bars.findFreeRange(1);
4042 if (bar_id == -1)
4043 bar_id = 5;
4044 bars.set(bar_id);
4045 emitRdDepBar(insn, bar_id);
4046 if (defi)
4047 live_defs.push_back(LiveBarDef(insn, defi));
4048 }
4049 }
4050
4051 // Remove unnecessary barrier waits.
4052 BitSet alive_bars(6, 1);
4053 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4054 int wr, rd, wt;
4055
4056 next = insn->next;
4057
4058 wr = getWrDepBar(insn);
4059 rd = getRdDepBar(insn);
4060 wt = getWtDepBar(insn);
4061
4062 for (int idx = 0; idx < 6; ++idx) {
4063 if (!(wt & (1 << idx)))
4064 continue;
4065 if (!alive_bars.test(idx)) {
4066 insn->sched &= ~(1 << (11 + idx));
4067 } else {
4068 alive_bars.clr(idx);
4069 }
4070 }
4071
4072 if (wr < 6)
4073 alive_bars.set(wr);
4074 if (rd < 6)
4075 alive_bars.set(rd);
4076 }
4077
4078 return true;
4079 }
4080
4081 bool
4082 SchedDataCalculatorGM107::visit(Function *func)
4083 {
4084 ArrayList insns;
4085
4086 func->orderInstructions(insns);
4087
4088 scoreBoards.resize(func->cfg.getSize());
4089 for (size_t i = 0; i < scoreBoards.size(); ++i)
4090 scoreBoards[i].wipe();
4091 return true;
4092 }
4093
4094 bool
4095 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4096 {
4097 Instruction *insn, *next = NULL;
4098 int cycle = 0;
4099
4100 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4101 /*XXX*/
4102 insn->sched = 0x7e0;
4103 }
4104
4105 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4106 return true;
4107
4108 // Insert read/write dependency barriers for instructions which don't
4109 // operate at a fixed latency.
4110 insertBarriers(bb);
4111
4112 score = &scoreBoards.at(bb->getId());
4113
4114 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4115 // back branches will wait until all target dependencies are satisfied
4116 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4117 continue;
4118 BasicBlock *in = BasicBlock::get(ei.getNode());
4119 score->setMax(&scoreBoards.at(in->getId()));
4120 }
4121
4122 #ifdef GM107_DEBUG_SCHED_DATA
4123 INFO("=== BB:%i initial scores\n", bb->getId());
4124 score->print(cycle);
4125 #endif
4126
4127 // Because barriers are allocated locally (intra-BB), we have to make sure
4128 // that all produced barriers have been consumed before entering inside a
4129 // new basic block. The best way is to do a global allocation pre RA but
4130 // it's really more difficult, especially because of the phi nodes. Anyways,
4131 // it seems like that waiting on a barrier which has already been consumed
4132 // doesn't add any additional cost, it's just not elegant!
4133 Instruction *start = bb->getEntry();
4134 if (start && bb->cfg.incidentCount() > 0) {
4135 for (int b = 0; b < 6; b++)
4136 emitWtDepBar(start, b);
4137 }
4138
4139 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4140 next = insn->next;
4141
4142 commitInsn(insn, cycle);
4143 int delay = calcDelay(next, cycle);
4144 setDelay(insn, delay, next);
4145 cycle += getStall(insn);
4146
4147 setReuseFlag(insn);
4148
4149 // XXX: The yield flag seems to destroy a bunch of things when it is
4150 // set on every instruction, need investigation.
4151 //emitYield(insn);
4152
4153 #ifdef GM107_DEBUG_SCHED_DATA
4154 printSchedInfo(cycle, insn);
4155 insn->print();
4156 next->print();
4157 #endif
4158 }
4159
4160 if (!insn)
4161 return true;
4162 commitInsn(insn, cycle);
4163
4164 int bbDelay = -1;
4165
4166 #ifdef GM107_DEBUG_SCHED_DATA
4167 fprintf(stderr, "last instruction is : ");
4168 insn->print();
4169 fprintf(stderr, "cycle=%d\n", cycle);
4170 #endif
4171
4172 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4173 BasicBlock *out = BasicBlock::get(ei.getNode());
4174
4175 if (ei.getType() != Graph::Edge::BACK) {
4176 // Only test the first instruction of the outgoing block.
4177 next = out->getEntry();
4178 if (next) {
4179 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4180 } else {
4181 // When the outgoing BB is empty, make sure to set the number of
4182 // stall counts needed by the instruction because we don't know the
4183 // next instruction.
4184 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4185 }
4186 } else {
4187 // Wait until all dependencies are satisfied.
4188 const int regsFree = score->getLatest();
4189 next = out->getFirst();
4190 for (int c = cycle; next && c < regsFree; next = next->next) {
4191 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4192 c += getStall(next);
4193 }
4194 next = NULL;
4195 }
4196 }
4197 if (bb->cfg.outgoingCount() != 1)
4198 next = NULL;
4199 setDelay(insn, bbDelay, next);
4200 cycle += getStall(insn);
4201
4202 score->rebase(cycle); // common base for initializing out blocks' scores
4203 return true;
4204 }
4205
4206 /*******************************************************************************
4207 * main
4208 ******************************************************************************/
4209
4210 void
4211 CodeEmitterGM107::prepareEmission(Function *func)
4212 {
4213 SchedDataCalculatorGM107 sched(targGM107);
4214 CodeEmitter::prepareEmission(func);
4215 sched.run(func, true, true);
4216 }
4217
4218 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4219 {
4220 return (size + 23) / 24;
4221 }
4222
4223 void
4224 CodeEmitterGM107::prepareEmission(Program *prog)
4225 {
4226 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4227 !fi.end(); fi.next()) {
4228 Function *func = reinterpret_cast<Function *>(fi.get());
4229 func->binPos = prog->binSize;
4230 prepareEmission(func);
4231
4232 // adjust sizes & positions for schedulding info:
4233 if (prog->getTarget()->hasSWSched) {
4234 uint32_t adjPos = func->binPos;
4235 BasicBlock *bb = NULL;
4236 for (int i = 0; i < func->bbCount; ++i) {
4237 bb = func->bbArray[i];
4238 int32_t adjSize = bb->binSize;
4239 if (adjPos % 32) {
4240 adjSize -= 32 - adjPos % 32;
4241 if (adjSize < 0)
4242 adjSize = 0;
4243 }
4244 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4245 bb->binPos = adjPos;
4246 bb->binSize = adjSize;
4247 adjPos += adjSize;
4248 }
4249 if (bb)
4250 func->binSize = adjPos - func->binPos;
4251 }
4252
4253 prog->binSize += func->binSize;
4254 }
4255 }
4256
4257 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4258 : CodeEmitter(target),
4259 targGM107(target),
4260 writeIssueDelays(target->hasSWSched)
4261 {
4262 code = NULL;
4263 codeSize = codeSizeLimit = 0;
4264 relocInfo = NULL;
4265 }
4266
4267 CodeEmitter *
4268 TargetGM107::createCodeEmitterGM107(Program::Type type)
4269 {
4270 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4271 emit->setProgramType(type);
4272 return emit;
4273 }
4274
4275 } // namespace nv50_ir