shader-packing
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitCS2R();
128 void emitF2F();
129 void emitF2I();
130 void emitI2F();
131 void emitI2I();
132 void emitSEL();
133 void emitSHFL();
134
135 void emitDADD();
136 void emitDMUL();
137 void emitDFMA();
138 void emitDMNMX();
139 void emitDSET();
140 void emitDSETP();
141
142 void emitFADD();
143 void emitFMUL();
144 void emitFFMA();
145 void emitMUFU();
146 void emitFMNMX();
147 void emitRRO();
148 void emitFCMP();
149 void emitFSET();
150 void emitFSETP();
151 void emitFSWZADD();
152
153 void emitLOP();
154 void emitNOT();
155 void emitIADD();
156 void emitIMUL();
157 void emitIMAD();
158 void emitISCADD();
159 void emitXMAD();
160 void emitIMNMX();
161 void emitICMP();
162 void emitISET();
163 void emitISETP();
164 void emitSHL();
165 void emitSHR();
166 void emitSHF();
167 void emitPOPC();
168 void emitBFI();
169 void emitBFE();
170 void emitFLO();
171
172 void emitLDSTs(int, DataType);
173 void emitLDSTc(int);
174 void emitLDC();
175 void emitLDL();
176 void emitLDS();
177 void emitLD();
178 void emitSTL();
179 void emitSTS();
180 void emitST();
181 void emitALD();
182 void emitAST();
183 void emitISBERD();
184 void emitAL2P();
185 void emitIPA();
186 void emitATOM();
187 void emitATOMS();
188 void emitRED();
189 void emitCCTL();
190
191 void emitPIXLD();
192
193 void emitTEXs(int);
194 void emitTEX();
195 void emitTEXS();
196 void emitTLD();
197 void emitTLD4();
198 void emitTXD();
199 void emitTXQ();
200 void emitTMML();
201 void emitDEPBAR();
202
203 void emitNOP();
204 void emitKIL();
205 void emitOUT();
206
207 void emitBAR();
208 void emitMEMBAR();
209
210 void emitVOTE();
211
212 void emitSUTarget();
213 void emitSUHandle(const int s);
214 void emitSUSTx();
215 void emitSULDx();
216 void emitSUREDx();
217 };
218
219 /*******************************************************************************
220 * general instruction layout/fields
221 ******************************************************************************/
222
223 void
224 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
225 {
226 if (b >= 0) {
227 uint32_t m = ((1ULL << s) - 1);
228 uint64_t d = (uint64_t)(v & m) << b;
229 assert(!(v & ~m) || (v & ~m) == ~m);
230 data[1] |= d >> 32;
231 data[0] |= d;
232 }
233 }
234
235 void
236 CodeEmitterGM107::emitPred()
237 {
238 if (insn->predSrc >= 0) {
239 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
240 emitField(19, 1, insn->cc == CC_NOT_P);
241 } else {
242 emitField(16, 3, 7);
243 }
244 }
245
246 void
247 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
248 {
249 code[0] = 0x00000000;
250 code[1] = hi;
251 if (pred)
252 emitPred();
253 }
254
255 void
256 CodeEmitterGM107::emitGPR(int pos, const Value *val)
257 {
258 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
259 val->reg.data.id : 255);
260 }
261
262 void
263 CodeEmitterGM107::emitSYS(int pos, const Value *val)
264 {
265 int id = val ? val->reg.data.id : -1;
266
267 switch (id) {
268 case SV_LANEID : id = 0x00; break;
269 case SV_VERTEX_COUNT : id = 0x10; break;
270 case SV_INVOCATION_ID : id = 0x11; break;
271 case SV_THREAD_KILL : id = 0x13; break;
272 case SV_INVOCATION_INFO: id = 0x1d; break;
273 case SV_COMBINED_TID : id = 0x20; break;
274 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
275 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
276 case SV_LANEMASK_EQ : id = 0x38; break;
277 case SV_LANEMASK_LT : id = 0x39; break;
278 case SV_LANEMASK_LE : id = 0x3a; break;
279 case SV_LANEMASK_GT : id = 0x3b; break;
280 case SV_LANEMASK_GE : id = 0x3c; break;
281 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
282 default:
283 assert(!"invalid system value");
284 id = 0;
285 break;
286 }
287
288 emitField(pos, 8, id);
289 }
290
291 void
292 CodeEmitterGM107::emitPRED(int pos, const Value *val)
293 {
294 emitField(pos, 3, val ? val->reg.data.id : 7);
295 }
296
297 void
298 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
299 const ValueRef &ref)
300 {
301 const Value *v = ref.get();
302 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
303 if (gpr >= 0)
304 emitGPR(gpr, ref.getIndirect(0));
305 emitField(off, len, v->reg.data.offset >> shr);
306 }
307
308 void
309 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
310 const ValueRef &ref)
311 {
312 const Value *v = ref.get();
313 const Symbol *s = v->asSym();
314
315 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
316
317 emitField(buf, 5, v->reg.fileIndex);
318 if (gpr >= 0)
319 emitGPR(gpr, ref.getIndirect(0));
320 emitField(off, 16, s->reg.data.offset >> shr);
321 }
322
323 bool
324 CodeEmitterGM107::longIMMD(const ValueRef &ref)
325 {
326 if (ref.getFile() == FILE_IMMEDIATE) {
327 const ImmediateValue *imm = ref.get()->asImm();
328 if (isFloatType(insn->sType))
329 return imm->reg.data.u32 & 0xfff;
330 else
331 return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
332 }
333 return false;
334 }
335
336 void
337 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
338 {
339 const ImmediateValue *imm = ref.get()->asImm();
340 uint32_t val = imm->reg.data.u32;
341
342 if (len == 19) {
343 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
344 assert(!(val & 0x00000fff));
345 val >>= 12;
346 } else if (insn->sType == TYPE_F64) {
347 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
348 val = imm->reg.data.u64 >> 44;
349 } else {
350 assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
351 }
352 emitField( 56, 1, (val & 0x80000) >> 19);
353 emitField(pos, len, (val & 0x7ffff));
354 } else {
355 emitField(pos, len, val);
356 }
357 }
358
359 /*******************************************************************************
360 * modifiers
361 ******************************************************************************/
362
363 void
364 CodeEmitterGM107::emitCond3(int pos, CondCode code)
365 {
366 int data = 0;
367
368 switch (code) {
369 case CC_FL : data = 0x00; break;
370 case CC_LTU:
371 case CC_LT : data = 0x01; break;
372 case CC_EQU:
373 case CC_EQ : data = 0x02; break;
374 case CC_LEU:
375 case CC_LE : data = 0x03; break;
376 case CC_GTU:
377 case CC_GT : data = 0x04; break;
378 case CC_NEU:
379 case CC_NE : data = 0x05; break;
380 case CC_GEU:
381 case CC_GE : data = 0x06; break;
382 case CC_TR : data = 0x07; break;
383 default:
384 assert(!"invalid cond3");
385 break;
386 }
387
388 emitField(pos, 3, data);
389 }
390
391 void
392 CodeEmitterGM107::emitCond4(int pos, CondCode code)
393 {
394 int data = 0;
395
396 switch (code) {
397 case CC_FL: data = 0x00; break;
398 case CC_LT: data = 0x01; break;
399 case CC_EQ: data = 0x02; break;
400 case CC_LE: data = 0x03; break;
401 case CC_GT: data = 0x04; break;
402 case CC_NE: data = 0x05; break;
403 case CC_GE: data = 0x06; break;
404 // case CC_NUM: data = 0x07; break;
405 // case CC_NAN: data = 0x08; break;
406 case CC_LTU: data = 0x09; break;
407 case CC_EQU: data = 0x0a; break;
408 case CC_LEU: data = 0x0b; break;
409 case CC_GTU: data = 0x0c; break;
410 case CC_NEU: data = 0x0d; break;
411 case CC_GEU: data = 0x0e; break;
412 case CC_TR: data = 0x0f; break;
413 default:
414 assert(!"invalid cond4");
415 break;
416 }
417
418 emitField(pos, 4, data);
419 }
420
421 void
422 CodeEmitterGM107::emitO(int pos)
423 {
424 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
425 }
426
427 void
428 CodeEmitterGM107::emitP(int pos)
429 {
430 emitField(pos, 1, insn->perPatch);
431 }
432
433 void
434 CodeEmitterGM107::emitSAT(int pos)
435 {
436 emitField(pos, 1, insn->saturate);
437 }
438
439 void
440 CodeEmitterGM107::emitCC(int pos)
441 {
442 emitField(pos, 1, insn->flagsDef >= 0);
443 }
444
445 void
446 CodeEmitterGM107::emitX(int pos)
447 {
448 emitField(pos, 1, insn->flagsSrc >= 0);
449 }
450
451 void
452 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
453 {
454 emitField(pos, 1, ref.mod.abs());
455 }
456
457 void
458 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
459 {
460 emitField(pos, 1, ref.mod.neg());
461 }
462
463 void
464 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
465 {
466 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
467 }
468
469 void
470 CodeEmitterGM107::emitFMZ(int pos, int len)
471 {
472 emitField(pos, len, insn->dnz << 1 | insn->ftz);
473 }
474
475 void
476 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
477 {
478 int rm = 0, ri = 0;
479 switch (rnd) {
480 case ROUND_NI: ri = 1;
481 case ROUND_N : rm = 0; break;
482 case ROUND_MI: ri = 1;
483 case ROUND_M : rm = 1; break;
484 case ROUND_PI: ri = 1;
485 case ROUND_P : rm = 2; break;
486 case ROUND_ZI: ri = 1;
487 case ROUND_Z : rm = 3; break;
488 default:
489 assert(!"invalid round mode");
490 break;
491 }
492 emitField(rip, 1, ri);
493 emitField(rmp, 2, rm);
494 }
495
496 void
497 CodeEmitterGM107::emitPDIV(int pos)
498 {
499 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
500 if (insn->postFactor > 0)
501 emitField(pos, 3, 7 - insn->postFactor);
502 else
503 emitField(pos, 3, 0 - insn->postFactor);
504 }
505
506 void
507 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
508 {
509 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
510 }
511
512 /*******************************************************************************
513 * control flow
514 ******************************************************************************/
515
516 void
517 CodeEmitterGM107::emitEXIT()
518 {
519 emitInsn (0xe3000000);
520 emitCond5(0x00, CC_TR);
521 }
522
523 void
524 CodeEmitterGM107::emitBRA()
525 {
526 const FlowInstruction *insn = this->insn->asFlow();
527 int gpr = -1;
528
529 if (insn->indirect) {
530 if (insn->absolute)
531 emitInsn(0xe2000000); // JMX
532 else
533 emitInsn(0xe2500000); // BRX
534 gpr = 0x08;
535 } else {
536 if (insn->absolute)
537 emitInsn(0xe2100000); // JMP
538 else
539 emitInsn(0xe2400000); // BRA
540 emitField(0x07, 1, insn->allWarp);
541 }
542
543 emitField(0x06, 1, insn->limit);
544 emitCond5(0x00, CC_TR);
545
546 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
547 int32_t pos = insn->target.bb->binPos;
548 if (writeIssueDelays && !(pos & 0x1f))
549 pos += 8;
550 if (!insn->absolute)
551 emitField(0x14, 24, pos - (codeSize + 8));
552 else
553 emitField(0x14, 32, pos);
554 } else {
555 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
556 emitField(0x05, 1, 1);
557 }
558 }
559
560 void
561 CodeEmitterGM107::emitCAL()
562 {
563 const FlowInstruction *insn = this->insn->asFlow();
564
565 if (insn->absolute) {
566 emitInsn(0xe2200000, 0); // JCAL
567 } else {
568 emitInsn(0xe2600000, 0); // CAL
569 }
570
571 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
572 if (!insn->absolute)
573 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
574 else {
575 if (insn->builtin) {
576 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
577 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
578 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
579 } else {
580 emitField(0x14, 32, insn->target.bb->binPos);
581 }
582 }
583 } else {
584 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
585 emitField(0x05, 1, 1);
586 }
587 }
588
589 void
590 CodeEmitterGM107::emitPCNT()
591 {
592 const FlowInstruction *insn = this->insn->asFlow();
593
594 emitInsn(0xe2b00000, 0);
595
596 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
597 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
598 } else {
599 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
600 emitField(0x05, 1, 1);
601 }
602 }
603
604 void
605 CodeEmitterGM107::emitCONT()
606 {
607 emitInsn (0xe3500000);
608 emitCond5(0x00, CC_TR);
609 }
610
611 void
612 CodeEmitterGM107::emitPBK()
613 {
614 const FlowInstruction *insn = this->insn->asFlow();
615
616 emitInsn(0xe2a00000, 0);
617
618 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
619 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
620 } else {
621 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
622 emitField(0x05, 1, 1);
623 }
624 }
625
626 void
627 CodeEmitterGM107::emitBRK()
628 {
629 emitInsn (0xe3400000);
630 emitCond5(0x00, CC_TR);
631 }
632
633 void
634 CodeEmitterGM107::emitPRET()
635 {
636 const FlowInstruction *insn = this->insn->asFlow();
637
638 emitInsn(0xe2700000, 0);
639
640 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
641 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
642 } else {
643 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
644 emitField(0x05, 1, 1);
645 }
646 }
647
648 void
649 CodeEmitterGM107::emitRET()
650 {
651 emitInsn (0xe3200000);
652 emitCond5(0x00, CC_TR);
653 }
654
655 void
656 CodeEmitterGM107::emitSSY()
657 {
658 const FlowInstruction *insn = this->insn->asFlow();
659
660 emitInsn(0xe2900000, 0);
661
662 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
663 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
664 } else {
665 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
666 emitField(0x05, 1, 1);
667 }
668 }
669
670 void
671 CodeEmitterGM107::emitSYNC()
672 {
673 emitInsn (0xf0f80000);
674 emitCond5(0x00, CC_TR);
675 }
676
677 void
678 CodeEmitterGM107::emitSAM()
679 {
680 emitInsn(0xe3700000, 0);
681 }
682
683 void
684 CodeEmitterGM107::emitRAM()
685 {
686 emitInsn(0xe3800000, 0);
687 }
688
689 /*******************************************************************************
690 * predicate/cc
691 ******************************************************************************/
692
693 /*******************************************************************************
694 * movement / conversion
695 ******************************************************************************/
696
697 void
698 CodeEmitterGM107::emitMOV()
699 {
700 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
701 switch (insn->src(0).getFile()) {
702 case FILE_GPR:
703 if (insn->def(0).getFile() == FILE_PREDICATE) {
704 emitInsn(0x5b6a0000);
705 emitGPR (0x08);
706 } else {
707 emitInsn(0x5c980000);
708 }
709 emitGPR (0x14, insn->src(0));
710 break;
711 case FILE_MEMORY_CONST:
712 emitInsn(0x4c980000);
713 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
714 break;
715 case FILE_IMMEDIATE:
716 emitInsn(0x38980000);
717 emitIMMD(0x14, 19, insn->src(0));
718 break;
719 case FILE_PREDICATE:
720 emitInsn(0x50880000);
721 emitPRED(0x0c, insn->src(0));
722 emitPRED(0x1d);
723 emitPRED(0x27);
724 break;
725 default:
726 assert(!"bad src file");
727 break;
728 }
729 if (insn->def(0).getFile() != FILE_PREDICATE &&
730 insn->src(0).getFile() != FILE_PREDICATE)
731 emitField(0x27, 4, insn->lanes);
732 } else {
733 emitInsn (0x01000000);
734 emitIMMD (0x14, 32, insn->src(0));
735 emitField(0x0c, 4, insn->lanes);
736 }
737
738 if (insn->def(0).getFile() == FILE_PREDICATE) {
739 emitPRED(0x27);
740 emitPRED(0x03, insn->def(0));
741 emitPRED(0x00);
742 } else {
743 emitGPR(0x00, insn->def(0));
744 }
745 }
746
747 void
748 CodeEmitterGM107::emitS2R()
749 {
750 emitInsn(0xf0c80000);
751 emitSYS (0x14, insn->src(0));
752 emitGPR (0x00, insn->def(0));
753 }
754
755 void
756 CodeEmitterGM107::emitCS2R()
757 {
758 emitInsn(0x50c80000);
759 emitSYS (0x14, insn->src(0));
760 emitGPR (0x00, insn->def(0));
761 }
762
763 void
764 CodeEmitterGM107::emitF2F()
765 {
766 RoundMode rnd = insn->rnd;
767
768 switch (insn->op) {
769 case OP_FLOOR: rnd = ROUND_MI; break;
770 case OP_CEIL : rnd = ROUND_PI; break;
771 case OP_TRUNC: rnd = ROUND_ZI; break;
772 default:
773 break;
774 }
775
776 switch (insn->src(0).getFile()) {
777 case FILE_GPR:
778 emitInsn(0x5ca80000);
779 emitGPR (0x14, insn->src(0));
780 break;
781 case FILE_MEMORY_CONST:
782 emitInsn(0x4ca80000);
783 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
784 break;
785 case FILE_IMMEDIATE:
786 emitInsn(0x38a80000);
787 emitIMMD(0x14, 19, insn->src(0));
788 break;
789 default:
790 assert(!"bad src0 file");
791 break;
792 }
793
794 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
795 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
796 emitCC (0x2f);
797 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
798 emitFMZ (0x2c, 1);
799 emitField(0x29, 1, insn->subOp);
800 emitRND (0x27, rnd, 0x2a);
801 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
802 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
803 emitGPR (0x00, insn->def(0));
804 }
805
806 void
807 CodeEmitterGM107::emitF2I()
808 {
809 RoundMode rnd = insn->rnd;
810
811 switch (insn->op) {
812 case OP_FLOOR: rnd = ROUND_M; break;
813 case OP_CEIL : rnd = ROUND_P; break;
814 case OP_TRUNC: rnd = ROUND_Z; break;
815 default:
816 break;
817 }
818
819 switch (insn->src(0).getFile()) {
820 case FILE_GPR:
821 emitInsn(0x5cb00000);
822 emitGPR (0x14, insn->src(0));
823 break;
824 case FILE_MEMORY_CONST:
825 emitInsn(0x4cb00000);
826 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
827 break;
828 case FILE_IMMEDIATE:
829 emitInsn(0x38b00000);
830 emitIMMD(0x14, 19, insn->src(0));
831 break;
832 default:
833 assert(!"bad src0 file");
834 break;
835 }
836
837 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
838 emitCC (0x2f);
839 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
840 emitFMZ (0x2c, 1);
841 emitRND (0x27, rnd, 0x2a);
842 emitField(0x0c, 1, isSignedType(insn->dType));
843 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
844 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
845 emitGPR (0x00, insn->def(0));
846 }
847
848 void
849 CodeEmitterGM107::emitI2F()
850 {
851 RoundMode rnd = insn->rnd;
852
853 switch (insn->op) {
854 case OP_FLOOR: rnd = ROUND_M; break;
855 case OP_CEIL : rnd = ROUND_P; break;
856 case OP_TRUNC: rnd = ROUND_Z; break;
857 default:
858 break;
859 }
860
861 switch (insn->src(0).getFile()) {
862 case FILE_GPR:
863 emitInsn(0x5cb80000);
864 emitGPR (0x14, insn->src(0));
865 break;
866 case FILE_MEMORY_CONST:
867 emitInsn(0x4cb80000);
868 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
869 break;
870 case FILE_IMMEDIATE:
871 emitInsn(0x38b80000);
872 emitIMMD(0x14, 19, insn->src(0));
873 break;
874 default:
875 assert(!"bad src0 file");
876 break;
877 }
878
879 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
880 emitCC (0x2f);
881 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
882 emitField(0x29, 2, insn->subOp);
883 emitRND (0x27, rnd, -1);
884 emitField(0x0d, 1, isSignedType(insn->sType));
885 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
886 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
887 emitGPR (0x00, insn->def(0));
888 }
889
890 void
891 CodeEmitterGM107::emitI2I()
892 {
893 switch (insn->src(0).getFile()) {
894 case FILE_GPR:
895 emitInsn(0x5ce00000);
896 emitGPR (0x14, insn->src(0));
897 break;
898 case FILE_MEMORY_CONST:
899 emitInsn(0x4ce00000);
900 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
901 break;
902 case FILE_IMMEDIATE:
903 emitInsn(0x38e00000);
904 emitIMMD(0x14, 19, insn->src(0));
905 break;
906 default:
907 assert(!"bad src0 file");
908 break;
909 }
910
911 emitSAT (0x32);
912 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
913 emitCC (0x2f);
914 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
915 emitField(0x29, 2, insn->subOp);
916 emitField(0x0d, 1, isSignedType(insn->sType));
917 emitField(0x0c, 1, isSignedType(insn->dType));
918 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
919 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
920 emitGPR (0x00, insn->def(0));
921 }
922
923 static void
924 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
925 {
926 int loc = entry->loc;
927 if (data.force_persample_interp)
928 code[loc + 1] |= 1 << 10;
929 else
930 code[loc + 1] &= ~(1 << 10);
931 }
932
933 void
934 CodeEmitterGM107::emitSEL()
935 {
936 switch (insn->src(1).getFile()) {
937 case FILE_GPR:
938 emitInsn(0x5ca00000);
939 emitGPR (0x14, insn->src(1));
940 break;
941 case FILE_MEMORY_CONST:
942 emitInsn(0x4ca00000);
943 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
944 break;
945 case FILE_IMMEDIATE:
946 emitInsn(0x38a00000);
947 emitIMMD(0x14, 19, insn->src(1));
948 break;
949 default:
950 assert(!"bad src1 file");
951 break;
952 }
953
954 emitINV (0x2a, insn->src(2));
955 emitPRED(0x27, insn->src(2));
956 emitGPR (0x08, insn->src(0));
957 emitGPR (0x00, insn->def(0));
958
959 if (insn->subOp == 1) {
960 addInterp(0, 0, selpFlip);
961 }
962 }
963
964 void
965 CodeEmitterGM107::emitSHFL()
966 {
967 int type = 0;
968
969 emitInsn (0xef100000);
970
971 switch (insn->src(1).getFile()) {
972 case FILE_GPR:
973 emitGPR(0x14, insn->src(1));
974 break;
975 case FILE_IMMEDIATE:
976 emitIMMD(0x14, 5, insn->src(1));
977 type |= 1;
978 break;
979 default:
980 assert(!"invalid src1 file");
981 break;
982 }
983
984 switch (insn->src(2).getFile()) {
985 case FILE_GPR:
986 emitGPR(0x27, insn->src(2));
987 break;
988 case FILE_IMMEDIATE:
989 emitIMMD(0x22, 13, insn->src(2));
990 type |= 2;
991 break;
992 default:
993 assert(!"invalid src2 file");
994 break;
995 }
996
997 if (!insn->defExists(1))
998 emitPRED(0x30);
999 else {
1000 assert(insn->def(1).getFile() == FILE_PREDICATE);
1001 emitPRED(0x30, insn->def(1));
1002 }
1003
1004 emitField(0x1e, 2, insn->subOp);
1005 emitField(0x1c, 2, type);
1006 emitGPR (0x08, insn->src(0));
1007 emitGPR (0x00, insn->def(0));
1008 }
1009
1010 /*******************************************************************************
1011 * double
1012 ******************************************************************************/
1013
1014 void
1015 CodeEmitterGM107::emitDADD()
1016 {
1017 switch (insn->src(1).getFile()) {
1018 case FILE_GPR:
1019 emitInsn(0x5c700000);
1020 emitGPR (0x14, insn->src(1));
1021 break;
1022 case FILE_MEMORY_CONST:
1023 emitInsn(0x4c700000);
1024 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1025 break;
1026 case FILE_IMMEDIATE:
1027 emitInsn(0x38700000);
1028 emitIMMD(0x14, 19, insn->src(1));
1029 break;
1030 default:
1031 assert(!"bad src1 file");
1032 break;
1033 }
1034 emitABS(0x31, insn->src(1));
1035 emitNEG(0x30, insn->src(0));
1036 emitCC (0x2f);
1037 emitABS(0x2e, insn->src(0));
1038 emitNEG(0x2d, insn->src(1));
1039
1040 if (insn->op == OP_SUB)
1041 code[1] ^= 0x00002000;
1042
1043 emitGPR(0x08, insn->src(0));
1044 emitGPR(0x00, insn->def(0));
1045 }
1046
1047 void
1048 CodeEmitterGM107::emitDMUL()
1049 {
1050 switch (insn->src(1).getFile()) {
1051 case FILE_GPR:
1052 emitInsn(0x5c800000);
1053 emitGPR (0x14, insn->src(1));
1054 break;
1055 case FILE_MEMORY_CONST:
1056 emitInsn(0x4c800000);
1057 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1058 break;
1059 case FILE_IMMEDIATE:
1060 emitInsn(0x38800000);
1061 emitIMMD(0x14, 19, insn->src(1));
1062 break;
1063 default:
1064 assert(!"bad src1 file");
1065 break;
1066 }
1067
1068 emitNEG2(0x30, insn->src(0), insn->src(1));
1069 emitCC (0x2f);
1070 emitRND (0x27);
1071 emitGPR (0x08, insn->src(0));
1072 emitGPR (0x00, insn->def(0));
1073 }
1074
1075 void
1076 CodeEmitterGM107::emitDFMA()
1077 {
1078 switch(insn->src(2).getFile()) {
1079 case FILE_GPR:
1080 switch (insn->src(1).getFile()) {
1081 case FILE_GPR:
1082 emitInsn(0x5b700000);
1083 emitGPR (0x14, insn->src(1));
1084 break;
1085 case FILE_MEMORY_CONST:
1086 emitInsn(0x4b700000);
1087 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1088 break;
1089 case FILE_IMMEDIATE:
1090 emitInsn(0x36700000);
1091 emitIMMD(0x14, 19, insn->src(1));
1092 break;
1093 default:
1094 assert(!"bad src1 file");
1095 break;
1096 }
1097 emitGPR (0x27, insn->src(2));
1098 break;
1099 case FILE_MEMORY_CONST:
1100 emitInsn(0x53700000);
1101 emitGPR (0x27, insn->src(1));
1102 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1103 break;
1104 default:
1105 assert(!"bad src2 file");
1106 break;
1107 }
1108
1109 emitRND (0x32);
1110 emitNEG (0x31, insn->src(2));
1111 emitNEG2(0x30, insn->src(0), insn->src(1));
1112 emitCC (0x2f);
1113 emitGPR (0x08, insn->src(0));
1114 emitGPR (0x00, insn->def(0));
1115 }
1116
1117 void
1118 CodeEmitterGM107::emitDMNMX()
1119 {
1120 switch (insn->src(1).getFile()) {
1121 case FILE_GPR:
1122 emitInsn(0x5c500000);
1123 emitGPR (0x14, insn->src(1));
1124 break;
1125 case FILE_MEMORY_CONST:
1126 emitInsn(0x4c500000);
1127 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1128 break;
1129 case FILE_IMMEDIATE:
1130 emitInsn(0x38500000);
1131 emitIMMD(0x14, 19, insn->src(1));
1132 break;
1133 default:
1134 assert(!"bad src1 file");
1135 break;
1136 }
1137
1138 emitABS (0x31, insn->src(1));
1139 emitNEG (0x30, insn->src(0));
1140 emitCC (0x2f);
1141 emitABS (0x2e, insn->src(0));
1142 emitNEG (0x2d, insn->src(1));
1143 emitField(0x2a, 1, insn->op == OP_MAX);
1144 emitPRED (0x27);
1145 emitGPR (0x08, insn->src(0));
1146 emitGPR (0x00, insn->def(0));
1147 }
1148
1149 void
1150 CodeEmitterGM107::emitDSET()
1151 {
1152 const CmpInstruction *insn = this->insn->asCmp();
1153
1154 switch (insn->src(1).getFile()) {
1155 case FILE_GPR:
1156 emitInsn(0x59000000);
1157 emitGPR (0x14, insn->src(1));
1158 break;
1159 case FILE_MEMORY_CONST:
1160 emitInsn(0x49000000);
1161 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1162 break;
1163 case FILE_IMMEDIATE:
1164 emitInsn(0x32000000);
1165 emitIMMD(0x14, 19, insn->src(1));
1166 break;
1167 default:
1168 assert(!"bad src1 file");
1169 break;
1170 }
1171
1172 if (insn->op != OP_SET) {
1173 switch (insn->op) {
1174 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1175 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1176 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1177 default:
1178 assert(!"invalid set op");
1179 break;
1180 }
1181 emitPRED(0x27, insn->src(2));
1182 } else {
1183 emitPRED(0x27);
1184 }
1185
1186 emitABS (0x36, insn->src(0));
1187 emitNEG (0x35, insn->src(1));
1188 emitField(0x34, 1, insn->dType == TYPE_F32);
1189 emitCond4(0x30, insn->setCond);
1190 emitCC (0x2f);
1191 emitABS (0x2c, insn->src(1));
1192 emitNEG (0x2b, insn->src(0));
1193 emitGPR (0x08, insn->src(0));
1194 emitGPR (0x00, insn->def(0));
1195 }
1196
1197 void
1198 CodeEmitterGM107::emitDSETP()
1199 {
1200 const CmpInstruction *insn = this->insn->asCmp();
1201
1202 switch (insn->src(1).getFile()) {
1203 case FILE_GPR:
1204 emitInsn(0x5b800000);
1205 emitGPR (0x14, insn->src(1));
1206 break;
1207 case FILE_MEMORY_CONST:
1208 emitInsn(0x4b800000);
1209 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1210 break;
1211 case FILE_IMMEDIATE:
1212 emitInsn(0x36800000);
1213 emitIMMD(0x14, 19, insn->src(1));
1214 break;
1215 default:
1216 assert(!"bad src1 file");
1217 break;
1218 }
1219
1220 if (insn->op != OP_SET) {
1221 switch (insn->op) {
1222 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1223 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1224 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1225 default:
1226 assert(!"invalid set op");
1227 break;
1228 }
1229 emitPRED(0x27, insn->src(2));
1230 } else {
1231 emitPRED(0x27);
1232 }
1233
1234 emitCond4(0x30, insn->setCond);
1235 emitABS (0x2c, insn->src(1));
1236 emitNEG (0x2b, insn->src(0));
1237 emitGPR (0x08, insn->src(0));
1238 emitABS (0x07, insn->src(0));
1239 emitNEG (0x06, insn->src(1));
1240 emitPRED (0x03, insn->def(0));
1241 if (insn->defExists(1))
1242 emitPRED(0x00, insn->def(1));
1243 else
1244 emitPRED(0x00);
1245 }
1246
1247 /*******************************************************************************
1248 * float
1249 ******************************************************************************/
1250
1251 void
1252 CodeEmitterGM107::emitFADD()
1253 {
1254 if (!longIMMD(insn->src(1))) {
1255 switch (insn->src(1).getFile()) {
1256 case FILE_GPR:
1257 emitInsn(0x5c580000);
1258 emitGPR (0x14, insn->src(1));
1259 break;
1260 case FILE_MEMORY_CONST:
1261 emitInsn(0x4c580000);
1262 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1263 break;
1264 case FILE_IMMEDIATE:
1265 emitInsn(0x38580000);
1266 emitIMMD(0x14, 19, insn->src(1));
1267 break;
1268 default:
1269 assert(!"bad src1 file");
1270 break;
1271 }
1272 emitSAT(0x32);
1273 emitABS(0x31, insn->src(1));
1274 emitNEG(0x30, insn->src(0));
1275 emitCC (0x2f);
1276 emitABS(0x2e, insn->src(0));
1277 emitNEG(0x2d, insn->src(1));
1278 emitFMZ(0x2c, 1);
1279
1280 if (insn->op == OP_SUB)
1281 code[1] ^= 0x00002000;
1282 } else {
1283 emitInsn(0x08000000);
1284 emitABS(0x39, insn->src(1));
1285 emitNEG(0x38, insn->src(0));
1286 emitFMZ(0x37, 1);
1287 emitABS(0x36, insn->src(0));
1288 emitNEG(0x35, insn->src(1));
1289 emitCC (0x34);
1290 emitIMMD(0x14, 32, insn->src(1));
1291
1292 if (insn->op == OP_SUB)
1293 code[1] ^= 0x00080000;
1294 }
1295
1296 emitGPR(0x08, insn->src(0));
1297 emitGPR(0x00, insn->def(0));
1298 }
1299
1300 void
1301 CodeEmitterGM107::emitFMUL()
1302 {
1303 if (!longIMMD(insn->src(1))) {
1304 switch (insn->src(1).getFile()) {
1305 case FILE_GPR:
1306 emitInsn(0x5c680000);
1307 emitGPR (0x14, insn->src(1));
1308 break;
1309 case FILE_MEMORY_CONST:
1310 emitInsn(0x4c680000);
1311 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1312 break;
1313 case FILE_IMMEDIATE:
1314 emitInsn(0x38680000);
1315 emitIMMD(0x14, 19, insn->src(1));
1316 break;
1317 default:
1318 assert(!"bad src1 file");
1319 break;
1320 }
1321 emitSAT (0x32);
1322 emitNEG2(0x30, insn->src(0), insn->src(1));
1323 emitCC (0x2f);
1324 emitFMZ (0x2c, 2);
1325 emitPDIV(0x29);
1326 emitRND (0x27);
1327 } else {
1328 emitInsn(0x1e000000);
1329 emitSAT (0x37);
1330 emitFMZ (0x35, 2);
1331 emitCC (0x34);
1332 emitIMMD(0x14, 32, insn->src(1));
1333 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1334 code[1] ^= 0x00080000; /* flip immd sign bit */
1335 }
1336
1337 emitGPR(0x08, insn->src(0));
1338 emitGPR(0x00, insn->def(0));
1339 }
1340
1341 void
1342 CodeEmitterGM107::emitFFMA()
1343 {
1344 bool isLongIMMD = false;
1345 switch(insn->src(2).getFile()) {
1346 case FILE_GPR:
1347 switch (insn->src(1).getFile()) {
1348 case FILE_GPR:
1349 emitInsn(0x59800000);
1350 emitGPR (0x14, insn->src(1));
1351 break;
1352 case FILE_MEMORY_CONST:
1353 emitInsn(0x49800000);
1354 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1355 break;
1356 case FILE_IMMEDIATE:
1357 if (longIMMD(insn->getSrc(1))) {
1358 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1359 isLongIMMD = true;
1360 emitInsn(0x0c000000);
1361 emitIMMD(0x14, 32, insn->src(1));
1362 } else {
1363 emitInsn(0x32800000);
1364 emitIMMD(0x14, 19, insn->src(1));
1365 }
1366 break;
1367 default:
1368 assert(!"bad src1 file");
1369 break;
1370 }
1371 if (!isLongIMMD)
1372 emitGPR (0x27, insn->src(2));
1373 break;
1374 case FILE_MEMORY_CONST:
1375 emitInsn(0x51800000);
1376 emitGPR (0x27, insn->src(1));
1377 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1378 break;
1379 default:
1380 assert(!"bad src2 file");
1381 break;
1382 }
1383
1384 if (isLongIMMD) {
1385 emitNEG (0x39, insn->src(2));
1386 emitNEG2(0x38, insn->src(0), insn->src(1));
1387 emitSAT (0x37);
1388 emitCC (0x34);
1389 } else {
1390 emitRND (0x33);
1391 emitSAT (0x32);
1392 emitNEG (0x31, insn->src(2));
1393 emitNEG2(0x30, insn->src(0), insn->src(1));
1394 emitCC (0x2f);
1395 }
1396
1397 emitFMZ(0x35, 2);
1398 emitGPR(0x08, insn->src(0));
1399 emitGPR(0x00, insn->def(0));
1400 }
1401
1402 void
1403 CodeEmitterGM107::emitMUFU()
1404 {
1405 int mufu = 0;
1406
1407 switch (insn->op) {
1408 case OP_COS: mufu = 0; break;
1409 case OP_SIN: mufu = 1; break;
1410 case OP_EX2: mufu = 2; break;
1411 case OP_LG2: mufu = 3; break;
1412 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1413 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1414 case OP_SQRT: mufu = 8; break;
1415 default:
1416 assert(!"invalid mufu");
1417 break;
1418 }
1419
1420 emitInsn (0x50800000);
1421 emitSAT (0x32);
1422 emitNEG (0x30, insn->src(0));
1423 emitABS (0x2e, insn->src(0));
1424 emitField(0x14, 4, mufu);
1425 emitGPR (0x08, insn->src(0));
1426 emitGPR (0x00, insn->def(0));
1427 }
1428
1429 void
1430 CodeEmitterGM107::emitFMNMX()
1431 {
1432 switch (insn->src(1).getFile()) {
1433 case FILE_GPR:
1434 emitInsn(0x5c600000);
1435 emitGPR (0x14, insn->src(1));
1436 break;
1437 case FILE_MEMORY_CONST:
1438 emitInsn(0x4c600000);
1439 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1440 break;
1441 case FILE_IMMEDIATE:
1442 emitInsn(0x38600000);
1443 emitIMMD(0x14, 19, insn->src(1));
1444 break;
1445 default:
1446 assert(!"bad src1 file");
1447 break;
1448 }
1449
1450 emitField(0x2a, 1, insn->op == OP_MAX);
1451 emitPRED (0x27);
1452
1453 emitABS(0x31, insn->src(1));
1454 emitNEG(0x30, insn->src(0));
1455 emitCC (0x2f);
1456 emitABS(0x2e, insn->src(0));
1457 emitNEG(0x2d, insn->src(1));
1458 emitFMZ(0x2c, 1);
1459 emitGPR(0x08, insn->src(0));
1460 emitGPR(0x00, insn->def(0));
1461 }
1462
1463 void
1464 CodeEmitterGM107::emitRRO()
1465 {
1466 switch (insn->src(0).getFile()) {
1467 case FILE_GPR:
1468 emitInsn(0x5c900000);
1469 emitGPR (0x14, insn->src(0));
1470 break;
1471 case FILE_MEMORY_CONST:
1472 emitInsn(0x4c900000);
1473 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1474 break;
1475 case FILE_IMMEDIATE:
1476 emitInsn(0x38900000);
1477 emitIMMD(0x14, 19, insn->src(0));
1478 break;
1479 default:
1480 assert(!"bad src file");
1481 break;
1482 }
1483
1484 emitABS (0x31, insn->src(0));
1485 emitNEG (0x2d, insn->src(0));
1486 emitField(0x27, 1, insn->op == OP_PREEX2);
1487 emitGPR (0x00, insn->def(0));
1488 }
1489
1490 void
1491 CodeEmitterGM107::emitFCMP()
1492 {
1493 const CmpInstruction *insn = this->insn->asCmp();
1494 CondCode cc = insn->setCond;
1495
1496 if (insn->src(2).mod.neg())
1497 cc = reverseCondCode(cc);
1498
1499 switch(insn->src(2).getFile()) {
1500 case FILE_GPR:
1501 switch (insn->src(1).getFile()) {
1502 case FILE_GPR:
1503 emitInsn(0x5ba00000);
1504 emitGPR (0x14, insn->src(1));
1505 break;
1506 case FILE_MEMORY_CONST:
1507 emitInsn(0x4ba00000);
1508 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1509 break;
1510 case FILE_IMMEDIATE:
1511 emitInsn(0x36a00000);
1512 emitIMMD(0x14, 19, insn->src(1));
1513 break;
1514 default:
1515 assert(!"bad src1 file");
1516 break;
1517 }
1518 emitGPR (0x27, insn->src(2));
1519 break;
1520 case FILE_MEMORY_CONST:
1521 emitInsn(0x53a00000);
1522 emitGPR (0x27, insn->src(1));
1523 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1524 break;
1525 default:
1526 assert(!"bad src2 file");
1527 break;
1528 }
1529
1530 emitCond4(0x30, cc);
1531 emitFMZ (0x2f, 1);
1532 emitGPR (0x08, insn->src(0));
1533 emitGPR (0x00, insn->def(0));
1534 }
1535
1536 void
1537 CodeEmitterGM107::emitFSET()
1538 {
1539 const CmpInstruction *insn = this->insn->asCmp();
1540
1541 switch (insn->src(1).getFile()) {
1542 case FILE_GPR:
1543 emitInsn(0x58000000);
1544 emitGPR (0x14, insn->src(1));
1545 break;
1546 case FILE_MEMORY_CONST:
1547 emitInsn(0x48000000);
1548 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1549 break;
1550 case FILE_IMMEDIATE:
1551 emitInsn(0x30000000);
1552 emitIMMD(0x14, 19, insn->src(1));
1553 break;
1554 default:
1555 assert(!"bad src1 file");
1556 break;
1557 }
1558
1559 if (insn->op != OP_SET) {
1560 switch (insn->op) {
1561 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1562 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1563 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1564 default:
1565 assert(!"invalid set op");
1566 break;
1567 }
1568 emitPRED(0x27, insn->src(2));
1569 } else {
1570 emitPRED(0x27);
1571 }
1572
1573 emitFMZ (0x37, 1);
1574 emitABS (0x36, insn->src(0));
1575 emitNEG (0x35, insn->src(1));
1576 emitField(0x34, 1, insn->dType == TYPE_F32);
1577 emitCond4(0x30, insn->setCond);
1578 emitCC (0x2f);
1579 emitABS (0x2c, insn->src(1));
1580 emitNEG (0x2b, insn->src(0));
1581 emitGPR (0x08, insn->src(0));
1582 emitGPR (0x00, insn->def(0));
1583 }
1584
1585 void
1586 CodeEmitterGM107::emitFSETP()
1587 {
1588 const CmpInstruction *insn = this->insn->asCmp();
1589
1590 switch (insn->src(1).getFile()) {
1591 case FILE_GPR:
1592 emitInsn(0x5bb00000);
1593 emitGPR (0x14, insn->src(1));
1594 break;
1595 case FILE_MEMORY_CONST:
1596 emitInsn(0x4bb00000);
1597 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1598 break;
1599 case FILE_IMMEDIATE:
1600 emitInsn(0x36b00000);
1601 emitIMMD(0x14, 19, insn->src(1));
1602 break;
1603 default:
1604 assert(!"bad src1 file");
1605 break;
1606 }
1607
1608 if (insn->op != OP_SET) {
1609 switch (insn->op) {
1610 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1611 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1612 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1613 default:
1614 assert(!"invalid set op");
1615 break;
1616 }
1617 emitPRED(0x27, insn->src(2));
1618 } else {
1619 emitPRED(0x27);
1620 }
1621
1622 emitCond4(0x30, insn->setCond);
1623 emitFMZ (0x2f, 1);
1624 emitABS (0x2c, insn->src(1));
1625 emitNEG (0x2b, insn->src(0));
1626 emitGPR (0x08, insn->src(0));
1627 emitABS (0x07, insn->src(0));
1628 emitNEG (0x06, insn->src(1));
1629 emitPRED (0x03, insn->def(0));
1630 if (insn->defExists(1))
1631 emitPRED(0x00, insn->def(1));
1632 else
1633 emitPRED(0x00);
1634 }
1635
1636 void
1637 CodeEmitterGM107::emitFSWZADD()
1638 {
1639 emitInsn (0x50f80000);
1640 emitCC (0x2f);
1641 emitFMZ (0x2c, 1);
1642 emitRND (0x27);
1643 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1644 emitField(0x1c, 8, insn->subOp);
1645 if (insn->predSrc != 1)
1646 emitGPR (0x14, insn->src(1));
1647 else
1648 emitGPR (0x14);
1649 emitGPR (0x08, insn->src(0));
1650 emitGPR (0x00, insn->def(0));
1651 }
1652
1653 /*******************************************************************************
1654 * integer
1655 ******************************************************************************/
1656
1657 void
1658 CodeEmitterGM107::emitLOP()
1659 {
1660 int lop = 0;
1661
1662 switch (insn->op) {
1663 case OP_AND: lop = 0; break;
1664 case OP_OR : lop = 1; break;
1665 case OP_XOR: lop = 2; break;
1666 default:
1667 assert(!"invalid lop");
1668 break;
1669 }
1670
1671 if (!longIMMD(insn->src(1))) {
1672 switch (insn->src(1).getFile()) {
1673 case FILE_GPR:
1674 emitInsn(0x5c400000);
1675 emitGPR (0x14, insn->src(1));
1676 break;
1677 case FILE_MEMORY_CONST:
1678 emitInsn(0x4c400000);
1679 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1680 break;
1681 case FILE_IMMEDIATE:
1682 emitInsn(0x38400000);
1683 emitIMMD(0x14, 19, insn->src(1));
1684 break;
1685 default:
1686 assert(!"bad src1 file");
1687 break;
1688 }
1689 emitPRED (0x30);
1690 emitCC (0x2f);
1691 emitX (0x2b);
1692 emitField(0x29, 2, lop);
1693 emitINV (0x28, insn->src(1));
1694 emitINV (0x27, insn->src(0));
1695 } else {
1696 emitInsn (0x04000000);
1697 emitX (0x39);
1698 emitINV (0x38, insn->src(1));
1699 emitINV (0x37, insn->src(0));
1700 emitField(0x35, 2, lop);
1701 emitCC (0x34);
1702 emitIMMD (0x14, 32, insn->src(1));
1703 }
1704
1705 emitGPR (0x08, insn->src(0));
1706 emitGPR (0x00, insn->def(0));
1707 }
1708
1709 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1710 void
1711 CodeEmitterGM107::emitNOT()
1712 {
1713 if (!longIMMD(insn->src(0))) {
1714 switch (insn->src(0).getFile()) {
1715 case FILE_GPR:
1716 emitInsn(0x5c400700);
1717 emitGPR (0x14, insn->src(0));
1718 break;
1719 case FILE_MEMORY_CONST:
1720 emitInsn(0x4c400700);
1721 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1722 break;
1723 case FILE_IMMEDIATE:
1724 emitInsn(0x38400700);
1725 emitIMMD(0x14, 19, insn->src(0));
1726 break;
1727 default:
1728 assert(!"bad src1 file");
1729 break;
1730 }
1731 emitPRED (0x30);
1732 } else {
1733 emitInsn (0x05600000);
1734 emitIMMD (0x14, 32, insn->src(1));
1735 }
1736
1737 emitGPR(0x08);
1738 emitGPR(0x00, insn->def(0));
1739 }
1740
1741 void
1742 CodeEmitterGM107::emitIADD()
1743 {
1744 if (!longIMMD(insn->src(1))) {
1745 switch (insn->src(1).getFile()) {
1746 case FILE_GPR:
1747 emitInsn(0x5c100000);
1748 emitGPR (0x14, insn->src(1));
1749 break;
1750 case FILE_MEMORY_CONST:
1751 emitInsn(0x4c100000);
1752 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1753 break;
1754 case FILE_IMMEDIATE:
1755 emitInsn(0x38100000);
1756 emitIMMD(0x14, 19, insn->src(1));
1757 break;
1758 default:
1759 assert(!"bad src1 file");
1760 break;
1761 }
1762 emitSAT(0x32);
1763 emitNEG(0x31, insn->src(0));
1764 emitNEG(0x30, insn->src(1));
1765 emitCC (0x2f);
1766 emitX (0x2b);
1767 } else {
1768 emitInsn(0x1c000000);
1769 emitNEG (0x38, insn->src(0));
1770 emitSAT (0x36);
1771 emitX (0x35);
1772 emitCC (0x34);
1773 emitIMMD(0x14, 32, insn->src(1));
1774 }
1775
1776 if (insn->op == OP_SUB)
1777 code[1] ^= 0x00010000;
1778
1779 emitGPR(0x08, insn->src(0));
1780 emitGPR(0x00, insn->def(0));
1781 }
1782
1783 void
1784 CodeEmitterGM107::emitIMUL()
1785 {
1786 if (!longIMMD(insn->src(1))) {
1787 switch (insn->src(1).getFile()) {
1788 case FILE_GPR:
1789 emitInsn(0x5c380000);
1790 emitGPR (0x14, insn->src(1));
1791 break;
1792 case FILE_MEMORY_CONST:
1793 emitInsn(0x4c380000);
1794 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1795 break;
1796 case FILE_IMMEDIATE:
1797 emitInsn(0x38380000);
1798 emitIMMD(0x14, 19, insn->src(1));
1799 break;
1800 default:
1801 assert(!"bad src1 file");
1802 break;
1803 }
1804 emitCC (0x2f);
1805 emitField(0x29, 1, isSignedType(insn->sType));
1806 emitField(0x28, 1, isSignedType(insn->dType));
1807 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1808 } else {
1809 emitInsn (0x1f000000);
1810 emitField(0x37, 1, isSignedType(insn->sType));
1811 emitField(0x36, 1, isSignedType(insn->dType));
1812 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1813 emitCC (0x34);
1814 emitIMMD (0x14, 32, insn->src(1));
1815 }
1816
1817 emitGPR(0x08, insn->src(0));
1818 emitGPR(0x00, insn->def(0));
1819 }
1820
1821 void
1822 CodeEmitterGM107::emitIMAD()
1823 {
1824 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1825 switch(insn->src(2).getFile()) {
1826 case FILE_GPR:
1827 switch (insn->src(1).getFile()) {
1828 case FILE_GPR:
1829 emitInsn(0x5a000000);
1830 emitGPR (0x14, insn->src(1));
1831 break;
1832 case FILE_MEMORY_CONST:
1833 emitInsn(0x4a000000);
1834 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1835 break;
1836 case FILE_IMMEDIATE:
1837 emitInsn(0x34000000);
1838 emitIMMD(0x14, 19, insn->src(1));
1839 break;
1840 default:
1841 assert(!"bad src1 file");
1842 break;
1843 }
1844 emitGPR (0x27, insn->src(2));
1845 break;
1846 case FILE_MEMORY_CONST:
1847 emitInsn(0x52000000);
1848 emitGPR (0x27, insn->src(1));
1849 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1850 break;
1851 default:
1852 assert(!"bad src2 file");
1853 break;
1854 }
1855
1856 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1857 emitField(0x35, 1, isSignedType(insn->sType));
1858 emitNEG (0x34, insn->src(2));
1859 emitNEG2 (0x33, insn->src(0), insn->src(1));
1860 emitSAT (0x32);
1861 emitX (0x31);
1862 emitField(0x30, 1, isSignedType(insn->dType));
1863 emitCC (0x2f);
1864 emitGPR (0x08, insn->src(0));
1865 emitGPR (0x00, insn->def(0));
1866 }
1867
1868 void
1869 CodeEmitterGM107::emitISCADD()
1870 {
1871 assert(insn->src(1).get()->asImm());
1872
1873 switch (insn->src(2).getFile()) {
1874 case FILE_GPR:
1875 emitInsn(0x5c180000);
1876 emitGPR (0x14, insn->src(2));
1877 break;
1878 case FILE_MEMORY_CONST:
1879 emitInsn(0x4c180000);
1880 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1881 break;
1882 case FILE_IMMEDIATE:
1883 emitInsn(0x38180000);
1884 emitIMMD(0x14, 19, insn->src(2));
1885 break;
1886 default:
1887 assert(!"bad src1 file");
1888 break;
1889 }
1890 emitNEG (0x31, insn->src(0));
1891 emitNEG (0x30, insn->src(2));
1892 emitCC (0x2f);
1893 emitIMMD(0x27, 5, insn->src(1));
1894 emitGPR (0x08, insn->src(0));
1895 emitGPR (0x00, insn->def(0));
1896 }
1897
1898 void
1899 CodeEmitterGM107::emitXMAD()
1900 {
1901 assert(insn->src(0).getFile() == FILE_GPR);
1902
1903 bool constbuf = false;
1904 bool psl_mrg = true;
1905 bool immediate = false;
1906 if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1907 assert(insn->src(1).getFile() == FILE_GPR);
1908 constbuf = true;
1909 psl_mrg = false;
1910 emitInsn(0x51000000);
1911 emitGPR(0x27, insn->src(1));
1912 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1913 } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1914 assert(insn->src(2).getFile() == FILE_GPR);
1915 constbuf = true;
1916 emitInsn(0x4e000000);
1917 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1918 emitGPR(0x27, insn->src(2));
1919 } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1920 assert(insn->src(2).getFile() == FILE_GPR);
1921 assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1922 immediate = true;
1923 emitInsn(0x36000000);
1924 emitIMMD(0x14, 16, insn->src(1));
1925 emitGPR(0x27, insn->src(2));
1926 } else {
1927 assert(insn->src(1).getFile() == FILE_GPR);
1928 assert(insn->src(2).getFile() == FILE_GPR);
1929 emitInsn(0x5b000000);
1930 emitGPR(0x14, insn->src(1));
1931 emitGPR(0x27, insn->src(2));
1932 }
1933
1934 if (psl_mrg)
1935 emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1936
1937 unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1938 cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1939 emitField(0x32, constbuf ? 2 : 3, cmode);
1940
1941 emitX(constbuf ? 0x36 : 0x26);
1942 emitCC(0x2f);
1943
1944 emitGPR(0x0, insn->def(0));
1945 emitGPR(0x8, insn->src(0));
1946
1947 // source flags
1948 if (isSignedType(insn->sType)) {
1949 uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1950 emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1951 }
1952 emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1953 if (!immediate) {
1954 bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1955 emitField(constbuf ? 0x34 : 0x23, 1, h1);
1956 }
1957 }
1958
1959 void
1960 CodeEmitterGM107::emitIMNMX()
1961 {
1962 switch (insn->src(1).getFile()) {
1963 case FILE_GPR:
1964 emitInsn(0x5c200000);
1965 emitGPR (0x14, insn->src(1));
1966 break;
1967 case FILE_MEMORY_CONST:
1968 emitInsn(0x4c200000);
1969 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1970 break;
1971 case FILE_IMMEDIATE:
1972 emitInsn(0x38200000);
1973 emitIMMD(0x14, 19, insn->src(1));
1974 break;
1975 default:
1976 assert(!"bad src1 file");
1977 break;
1978 }
1979
1980 emitField(0x30, 1, isSignedType(insn->dType));
1981 emitCC (0x2f);
1982 emitField(0x2b, 2, insn->subOp);
1983 emitField(0x2a, 1, insn->op == OP_MAX);
1984 emitPRED (0x27);
1985 emitGPR (0x08, insn->src(0));
1986 emitGPR (0x00, insn->def(0));
1987 }
1988
1989 void
1990 CodeEmitterGM107::emitICMP()
1991 {
1992 const CmpInstruction *insn = this->insn->asCmp();
1993 CondCode cc = insn->setCond;
1994
1995 if (insn->src(2).mod.neg())
1996 cc = reverseCondCode(cc);
1997
1998 switch(insn->src(2).getFile()) {
1999 case FILE_GPR:
2000 switch (insn->src(1).getFile()) {
2001 case FILE_GPR:
2002 emitInsn(0x5b400000);
2003 emitGPR (0x14, insn->src(1));
2004 break;
2005 case FILE_MEMORY_CONST:
2006 emitInsn(0x4b400000);
2007 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2008 break;
2009 case FILE_IMMEDIATE:
2010 emitInsn(0x36400000);
2011 emitIMMD(0x14, 19, insn->src(1));
2012 break;
2013 default:
2014 assert(!"bad src1 file");
2015 break;
2016 }
2017 emitGPR (0x27, insn->src(2));
2018 break;
2019 case FILE_MEMORY_CONST:
2020 emitInsn(0x53400000);
2021 emitGPR (0x27, insn->src(1));
2022 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2023 break;
2024 default:
2025 assert(!"bad src2 file");
2026 break;
2027 }
2028
2029 emitCond3(0x31, cc);
2030 emitField(0x30, 1, isSignedType(insn->sType));
2031 emitGPR (0x08, insn->src(0));
2032 emitGPR (0x00, insn->def(0));
2033 }
2034
2035 void
2036 CodeEmitterGM107::emitISET()
2037 {
2038 const CmpInstruction *insn = this->insn->asCmp();
2039
2040 switch (insn->src(1).getFile()) {
2041 case FILE_GPR:
2042 emitInsn(0x5b500000);
2043 emitGPR (0x14, insn->src(1));
2044 break;
2045 case FILE_MEMORY_CONST:
2046 emitInsn(0x4b500000);
2047 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2048 break;
2049 case FILE_IMMEDIATE:
2050 emitInsn(0x36500000);
2051 emitIMMD(0x14, 19, insn->src(1));
2052 break;
2053 default:
2054 assert(!"bad src1 file");
2055 break;
2056 }
2057
2058 if (insn->op != OP_SET) {
2059 switch (insn->op) {
2060 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2061 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2062 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2063 default:
2064 assert(!"invalid set op");
2065 break;
2066 }
2067 emitPRED(0x27, insn->src(2));
2068 } else {
2069 emitPRED(0x27);
2070 }
2071
2072 emitCond3(0x31, insn->setCond);
2073 emitField(0x30, 1, isSignedType(insn->sType));
2074 emitCC (0x2f);
2075 emitField(0x2c, 1, insn->dType == TYPE_F32);
2076 emitX (0x2b);
2077 emitGPR (0x08, insn->src(0));
2078 emitGPR (0x00, insn->def(0));
2079 }
2080
2081 void
2082 CodeEmitterGM107::emitISETP()
2083 {
2084 const CmpInstruction *insn = this->insn->asCmp();
2085
2086 switch (insn->src(1).getFile()) {
2087 case FILE_GPR:
2088 emitInsn(0x5b600000);
2089 emitGPR (0x14, insn->src(1));
2090 break;
2091 case FILE_MEMORY_CONST:
2092 emitInsn(0x4b600000);
2093 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2094 break;
2095 case FILE_IMMEDIATE:
2096 emitInsn(0x36600000);
2097 emitIMMD(0x14, 19, insn->src(1));
2098 break;
2099 default:
2100 assert(!"bad src1 file");
2101 break;
2102 }
2103
2104 if (insn->op != OP_SET) {
2105 switch (insn->op) {
2106 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2107 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2108 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2109 default:
2110 assert(!"invalid set op");
2111 break;
2112 }
2113 emitPRED(0x27, insn->src(2));
2114 } else {
2115 emitPRED(0x27);
2116 }
2117
2118 emitCond3(0x31, insn->setCond);
2119 emitField(0x30, 1, isSignedType(insn->sType));
2120 emitX (0x2b);
2121 emitGPR (0x08, insn->src(0));
2122 emitPRED (0x03, insn->def(0));
2123 if (insn->defExists(1))
2124 emitPRED(0x00, insn->def(1));
2125 else
2126 emitPRED(0x00);
2127 }
2128
2129 void
2130 CodeEmitterGM107::emitSHL()
2131 {
2132 switch (insn->src(1).getFile()) {
2133 case FILE_GPR:
2134 emitInsn(0x5c480000);
2135 emitGPR (0x14, insn->src(1));
2136 break;
2137 case FILE_MEMORY_CONST:
2138 emitInsn(0x4c480000);
2139 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2140 break;
2141 case FILE_IMMEDIATE:
2142 emitInsn(0x38480000);
2143 emitIMMD(0x14, 19, insn->src(1));
2144 break;
2145 default:
2146 assert(!"bad src1 file");
2147 break;
2148 }
2149
2150 emitCC (0x2f);
2151 emitX (0x2b);
2152 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2153 emitGPR (0x08, insn->src(0));
2154 emitGPR (0x00, insn->def(0));
2155 }
2156
2157 void
2158 CodeEmitterGM107::emitSHR()
2159 {
2160 switch (insn->src(1).getFile()) {
2161 case FILE_GPR:
2162 emitInsn(0x5c280000);
2163 emitGPR (0x14, insn->src(1));
2164 break;
2165 case FILE_MEMORY_CONST:
2166 emitInsn(0x4c280000);
2167 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2168 break;
2169 case FILE_IMMEDIATE:
2170 emitInsn(0x38280000);
2171 emitIMMD(0x14, 19, insn->src(1));
2172 break;
2173 default:
2174 assert(!"bad src1 file");
2175 break;
2176 }
2177
2178 emitField(0x30, 1, isSignedType(insn->dType));
2179 emitCC (0x2f);
2180 emitX (0x2c);
2181 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2182 emitGPR (0x08, insn->src(0));
2183 emitGPR (0x00, insn->def(0));
2184 }
2185
2186 void
2187 CodeEmitterGM107::emitSHF()
2188 {
2189 unsigned type;
2190
2191 switch (insn->src(1).getFile()) {
2192 case FILE_GPR:
2193 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2194 emitGPR(0x14, insn->src(1));
2195 break;
2196 case FILE_IMMEDIATE:
2197 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2198 emitIMMD(0x14, 19, insn->src(1));
2199 break;
2200 default:
2201 assert(!"bad src1 file");
2202 break;
2203 }
2204
2205 switch (insn->sType) {
2206 case TYPE_U64:
2207 type = 2;
2208 break;
2209 case TYPE_S64:
2210 type = 3;
2211 break;
2212 default:
2213 type = 0;
2214 break;
2215 }
2216
2217 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2218 emitX (0x31);
2219 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2220 emitCC (0x2f);
2221 emitGPR (0x27, insn->src(2));
2222 emitField(0x25, 2, type);
2223 emitGPR (0x08, insn->src(0));
2224 emitGPR (0x00, insn->def(0));
2225 }
2226
2227 void
2228 CodeEmitterGM107::emitPOPC()
2229 {
2230 switch (insn->src(0).getFile()) {
2231 case FILE_GPR:
2232 emitInsn(0x5c080000);
2233 emitGPR (0x14, insn->src(0));
2234 break;
2235 case FILE_MEMORY_CONST:
2236 emitInsn(0x4c080000);
2237 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2238 break;
2239 case FILE_IMMEDIATE:
2240 emitInsn(0x38080000);
2241 emitIMMD(0x14, 19, insn->src(0));
2242 break;
2243 default:
2244 assert(!"bad src1 file");
2245 break;
2246 }
2247
2248 emitINV(0x28, insn->src(0));
2249 emitGPR(0x00, insn->def(0));
2250 }
2251
2252 void
2253 CodeEmitterGM107::emitBFI()
2254 {
2255 switch(insn->src(2).getFile()) {
2256 case FILE_GPR:
2257 switch (insn->src(1).getFile()) {
2258 case FILE_GPR:
2259 emitInsn(0x5bf00000);
2260 emitGPR (0x14, insn->src(1));
2261 break;
2262 case FILE_MEMORY_CONST:
2263 emitInsn(0x4bf00000);
2264 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2265 break;
2266 case FILE_IMMEDIATE:
2267 emitInsn(0x36f00000);
2268 emitIMMD(0x14, 19, insn->src(1));
2269 break;
2270 default:
2271 assert(!"bad src1 file");
2272 break;
2273 }
2274 emitGPR (0x27, insn->src(2));
2275 break;
2276 case FILE_MEMORY_CONST:
2277 emitInsn(0x53f00000);
2278 emitGPR (0x27, insn->src(1));
2279 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2280 break;
2281 default:
2282 assert(!"bad src2 file");
2283 break;
2284 }
2285
2286 emitCC (0x2f);
2287 emitGPR (0x08, insn->src(0));
2288 emitGPR (0x00, insn->def(0));
2289 }
2290
2291 void
2292 CodeEmitterGM107::emitBFE()
2293 {
2294 switch (insn->src(1).getFile()) {
2295 case FILE_GPR:
2296 emitInsn(0x5c000000);
2297 emitGPR (0x14, insn->src(1));
2298 break;
2299 case FILE_MEMORY_CONST:
2300 emitInsn(0x4c000000);
2301 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2302 break;
2303 case FILE_IMMEDIATE:
2304 emitInsn(0x38000000);
2305 emitIMMD(0x14, 19, insn->src(1));
2306 break;
2307 default:
2308 assert(!"bad src1 file");
2309 break;
2310 }
2311
2312 emitField(0x30, 1, isSignedType(insn->dType));
2313 emitCC (0x2f);
2314 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2315 emitGPR (0x08, insn->src(0));
2316 emitGPR (0x00, insn->def(0));
2317 }
2318
2319 void
2320 CodeEmitterGM107::emitFLO()
2321 {
2322 switch (insn->src(0).getFile()) {
2323 case FILE_GPR:
2324 emitInsn(0x5c300000);
2325 emitGPR (0x14, insn->src(0));
2326 break;
2327 case FILE_MEMORY_CONST:
2328 emitInsn(0x4c300000);
2329 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2330 break;
2331 case FILE_IMMEDIATE:
2332 emitInsn(0x38300000);
2333 emitIMMD(0x14, 19, insn->src(0));
2334 break;
2335 default:
2336 assert(!"bad src1 file");
2337 break;
2338 }
2339
2340 emitField(0x30, 1, isSignedType(insn->dType));
2341 emitCC (0x2f);
2342 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2343 emitINV (0x28, insn->src(0));
2344 emitGPR (0x00, insn->def(0));
2345 }
2346
2347 /*******************************************************************************
2348 * memory
2349 ******************************************************************************/
2350
2351 void
2352 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2353 {
2354 int data = 0;
2355
2356 switch (typeSizeof(type)) {
2357 case 1: data = isSignedType(type) ? 1 : 0; break;
2358 case 2: data = isSignedType(type) ? 3 : 2; break;
2359 case 4: data = 4; break;
2360 case 8: data = 5; break;
2361 case 16: data = 6; break;
2362 default:
2363 assert(!"bad type");
2364 break;
2365 }
2366
2367 emitField(pos, 3, data);
2368 }
2369
2370 void
2371 CodeEmitterGM107::emitLDSTc(int pos)
2372 {
2373 int mode = 0;
2374
2375 switch (insn->cache) {
2376 case CACHE_CA: mode = 0; break;
2377 case CACHE_CG: mode = 1; break;
2378 case CACHE_CS: mode = 2; break;
2379 case CACHE_CV: mode = 3; break;
2380 default:
2381 assert(!"invalid caching mode");
2382 break;
2383 }
2384
2385 emitField(pos, 2, mode);
2386 }
2387
2388 void
2389 CodeEmitterGM107::emitLDC()
2390 {
2391 emitInsn (0xef900000);
2392 emitLDSTs(0x30, insn->dType);
2393 emitField(0x2c, 2, insn->subOp);
2394 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2395 emitGPR (0x00, insn->def(0));
2396 }
2397
2398 void
2399 CodeEmitterGM107::emitLDL()
2400 {
2401 emitInsn (0xef400000);
2402 emitLDSTs(0x30, insn->dType);
2403 emitLDSTc(0x2c);
2404 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2405 emitGPR (0x00, insn->def(0));
2406 }
2407
2408 void
2409 CodeEmitterGM107::emitLDS()
2410 {
2411 emitInsn (0xef480000);
2412 emitLDSTs(0x30, insn->dType);
2413 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2414 emitGPR (0x00, insn->def(0));
2415 }
2416
2417 void
2418 CodeEmitterGM107::emitLD()
2419 {
2420 emitInsn (0x80000000);
2421 emitPRED (0x3a);
2422 emitLDSTc(0x38);
2423 emitLDSTs(0x35, insn->dType);
2424 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2425 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2426 emitGPR (0x00, insn->def(0));
2427 }
2428
2429 void
2430 CodeEmitterGM107::emitSTL()
2431 {
2432 emitInsn (0xef500000);
2433 emitLDSTs(0x30, insn->dType);
2434 emitLDSTc(0x2c);
2435 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2436 emitGPR (0x00, insn->src(1));
2437 }
2438
2439 void
2440 CodeEmitterGM107::emitSTS()
2441 {
2442 emitInsn (0xef580000);
2443 emitLDSTs(0x30, insn->dType);
2444 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2445 emitGPR (0x00, insn->src(1));
2446 }
2447
2448 void
2449 CodeEmitterGM107::emitST()
2450 {
2451 emitInsn (0xa0000000);
2452 emitPRED (0x3a);
2453 emitLDSTc(0x38);
2454 emitLDSTs(0x35, insn->dType);
2455 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2456 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2457 emitGPR (0x00, insn->src(1));
2458 }
2459
2460 void
2461 CodeEmitterGM107::emitALD()
2462 {
2463 emitInsn (0xefd80000);
2464 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2465 emitGPR (0x27, insn->src(0).getIndirect(1));
2466 emitO (0x20);
2467 emitP (0x1f);
2468 emitADDR (0x08, 20, 10, 0, insn->src(0));
2469 emitGPR (0x00, insn->def(0));
2470 }
2471
2472 void
2473 CodeEmitterGM107::emitAST()
2474 {
2475 emitInsn (0xeff00000);
2476 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2477 emitGPR (0x27, insn->src(0).getIndirect(1));
2478 emitP (0x1f);
2479 emitADDR (0x08, 20, 10, 0, insn->src(0));
2480 emitGPR (0x00, insn->src(1));
2481 }
2482
2483 void
2484 CodeEmitterGM107::emitISBERD()
2485 {
2486 emitInsn(0xefd00000);
2487 emitGPR (0x08, insn->src(0));
2488 emitGPR (0x00, insn->def(0));
2489 }
2490
2491 void
2492 CodeEmitterGM107::emitAL2P()
2493 {
2494 emitInsn (0xefa00000);
2495 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2496 emitPRED (0x2c);
2497 emitO (0x20);
2498 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2499 emitGPR (0x08, insn->src(0).getIndirect(0));
2500 emitGPR (0x00, insn->def(0));
2501 }
2502
2503 static void
2504 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2505 {
2506 int ipa = entry->ipa;
2507 int reg = entry->reg;
2508 int loc = entry->loc;
2509
2510 if (data.flatshade &&
2511 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2512 ipa = NV50_IR_INTERP_FLAT;
2513 reg = 0xff;
2514 } else if (data.force_persample_interp &&
2515 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2516 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2517 ipa |= NV50_IR_INTERP_CENTROID;
2518 }
2519 code[loc + 1] &= ~(0xf << 0x14);
2520 code[loc + 1] |= (ipa & 0x3) << 0x16;
2521 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2522 code[loc + 0] &= ~(0xff << 0x14);
2523 code[loc + 0] |= reg << 0x14;
2524 }
2525
2526 void
2527 CodeEmitterGM107::emitIPA()
2528 {
2529 int ipam = 0, ipas = 0;
2530
2531 switch (insn->getInterpMode()) {
2532 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2533 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2534 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2535 case NV50_IR_INTERP_SC : ipam = 3; break;
2536 default:
2537 assert(!"invalid ipa mode");
2538 break;
2539 }
2540
2541 switch (insn->getSampleMode()) {
2542 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2543 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2544 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2545 default:
2546 assert(!"invalid ipa sample mode");
2547 break;
2548 }
2549
2550 emitInsn (0xe0000000);
2551 emitField(0x36, 2, ipam);
2552 emitField(0x34, 2, ipas);
2553 emitSAT (0x33);
2554 emitField(0x2f, 3, 7);
2555 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2556 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2557 code[1] |= 0x00000040; /* .idx */
2558 emitGPR(0x00, insn->def(0));
2559
2560 if (insn->op == OP_PINTERP) {
2561 emitGPR(0x14, insn->src(1));
2562 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2563 emitGPR(0x27, insn->src(2));
2564 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2565 } else {
2566 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2567 emitGPR(0x27, insn->src(1));
2568 emitGPR(0x14);
2569 addInterp(insn->ipa, 0xff, interpApply);
2570 }
2571
2572 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2573 emitGPR(0x27);
2574 }
2575
2576 void
2577 CodeEmitterGM107::emitATOM()
2578 {
2579 unsigned dType, subOp;
2580
2581 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2582 switch (insn->dType) {
2583 case TYPE_U32: dType = 0; break;
2584 case TYPE_U64: dType = 1; break;
2585 default: assert(!"unexpected dType"); dType = 0; break;
2586 }
2587 subOp = 15;
2588
2589 emitInsn (0xee000000);
2590 } else {
2591 switch (insn->dType) {
2592 case TYPE_U32: dType = 0; break;
2593 case TYPE_S32: dType = 1; break;
2594 case TYPE_U64: dType = 2; break;
2595 case TYPE_F32: dType = 3; break;
2596 case TYPE_B128: dType = 4; break;
2597 case TYPE_S64: dType = 5; break;
2598 default: assert(!"unexpected dType"); dType = 0; break;
2599 }
2600 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2601 subOp = 8;
2602 else
2603 subOp = insn->subOp;
2604
2605 emitInsn (0xed000000);
2606 }
2607
2608 emitField(0x34, 4, subOp);
2609 emitField(0x31, 3, dType);
2610 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2611 emitGPR (0x14, insn->src(1));
2612 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2613 emitGPR (0x00, insn->def(0));
2614 }
2615
2616 void
2617 CodeEmitterGM107::emitATOMS()
2618 {
2619 unsigned dType, subOp;
2620
2621 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2622 switch (insn->dType) {
2623 case TYPE_U32: dType = 0; break;
2624 case TYPE_U64: dType = 1; break;
2625 default: assert(!"unexpected dType"); dType = 0; break;
2626 }
2627 subOp = 4;
2628
2629 emitInsn (0xee000000);
2630 emitField(0x34, 1, dType);
2631 } else {
2632 switch (insn->dType) {
2633 case TYPE_U32: dType = 0; break;
2634 case TYPE_S32: dType = 1; break;
2635 case TYPE_U64: dType = 2; break;
2636 case TYPE_S64: dType = 3; break;
2637 default: assert(!"unexpected dType"); dType = 0; break;
2638 }
2639
2640 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2641 subOp = 8;
2642 else
2643 subOp = insn->subOp;
2644
2645 emitInsn (0xec000000);
2646 emitField(0x1c, 3, dType);
2647 }
2648
2649 emitField(0x34, 4, subOp);
2650 emitGPR (0x14, insn->src(1));
2651 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2652 emitGPR (0x00, insn->def(0));
2653 }
2654
2655 void
2656 CodeEmitterGM107::emitRED()
2657 {
2658 unsigned dType;
2659
2660 switch (insn->dType) {
2661 case TYPE_U32: dType = 0; break;
2662 case TYPE_S32: dType = 1; break;
2663 case TYPE_U64: dType = 2; break;
2664 case TYPE_F32: dType = 3; break;
2665 case TYPE_B128: dType = 4; break;
2666 case TYPE_S64: dType = 5; break;
2667 default: assert(!"unexpected dType"); dType = 0; break;
2668 }
2669
2670 emitInsn (0xebf80000);
2671 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2672 emitField(0x17, 3, insn->subOp);
2673 emitField(0x14, 3, dType);
2674 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2675 emitGPR (0x00, insn->src(1));
2676 }
2677
2678 void
2679 CodeEmitterGM107::emitCCTL()
2680 {
2681 unsigned width;
2682 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2683 emitInsn(0xef600000);
2684 width = 30;
2685 } else {
2686 emitInsn(0xef800000);
2687 width = 22;
2688 }
2689 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2690 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2691 emitField(0x00, 4, insn->subOp);
2692 }
2693
2694 /*******************************************************************************
2695 * surface
2696 ******************************************************************************/
2697
2698 void
2699 CodeEmitterGM107::emitPIXLD()
2700 {
2701 emitInsn (0xefe80000);
2702 emitPRED (0x2d);
2703 emitField(0x1f, 3, insn->subOp);
2704 emitGPR (0x08, insn->src(0));
2705 emitGPR (0x00, insn->def(0));
2706 }
2707
2708 /*******************************************************************************
2709 * texture
2710 ******************************************************************************/
2711
2712 void
2713 CodeEmitterGM107::emitTEXs(int pos)
2714 {
2715 int src1 = insn->predSrc == 1 ? 2 : 1;
2716 if (insn->srcExists(src1))
2717 emitGPR(pos, insn->src(src1));
2718 else
2719 emitGPR(pos);
2720 }
2721
2722 static uint8_t
2723 getTEXSMask(uint8_t mask)
2724 {
2725 switch (mask) {
2726 case 0x1: return 0x0;
2727 case 0x2: return 0x1;
2728 case 0x3: return 0x4;
2729 case 0x4: return 0x2;
2730 case 0x7: return 0x0;
2731 case 0x8: return 0x3;
2732 case 0x9: return 0x5;
2733 case 0xa: return 0x6;
2734 case 0xb: return 0x1;
2735 case 0xc: return 0x7;
2736 case 0xd: return 0x2;
2737 case 0xe: return 0x3;
2738 case 0xf: return 0x4;
2739 default:
2740 assert(!"invalid mask");
2741 return 0;
2742 }
2743 }
2744
2745 static uint8_t
2746 getTEXSTarget(const TexInstruction *tex)
2747 {
2748 assert(tex->op == OP_TEX || tex->op == OP_TXL);
2749
2750 switch (tex->tex.target.getEnum()) {
2751 case TEX_TARGET_1D:
2752 assert(tex->tex.levelZero);
2753 return 0x0;
2754 case TEX_TARGET_2D:
2755 case TEX_TARGET_RECT:
2756 if (tex->tex.levelZero)
2757 return 0x2;
2758 if (tex->op == OP_TXL)
2759 return 0x3;
2760 return 0x1;
2761 case TEX_TARGET_2D_SHADOW:
2762 case TEX_TARGET_RECT_SHADOW:
2763 if (tex->tex.levelZero)
2764 return 0x6;
2765 if (tex->op == OP_TXL)
2766 return 0x5;
2767 return 0x4;
2768 case TEX_TARGET_2D_ARRAY:
2769 if (tex->tex.levelZero)
2770 return 0x8;
2771 return 0x7;
2772 case TEX_TARGET_2D_ARRAY_SHADOW:
2773 assert(tex->tex.levelZero);
2774 return 0x9;
2775 case TEX_TARGET_3D:
2776 if (tex->tex.levelZero)
2777 return 0xb;
2778 assert(tex->op != OP_TXL);
2779 return 0xa;
2780 case TEX_TARGET_CUBE:
2781 assert(!tex->tex.levelZero);
2782 if (tex->op == OP_TXL)
2783 return 0xd;
2784 return 0xc;
2785 default:
2786 assert(false);
2787 return 0x0;
2788 }
2789 }
2790
2791 static uint8_t
2792 getTLDSTarget(const TexInstruction *tex)
2793 {
2794 switch (tex->tex.target.getEnum()) {
2795 case TEX_TARGET_1D:
2796 if (tex->tex.levelZero)
2797 return 0x0;
2798 return 0x1;
2799 case TEX_TARGET_2D:
2800 case TEX_TARGET_RECT:
2801 if (tex->tex.levelZero)
2802 return tex->tex.useOffsets ? 0x4 : 0x2;
2803 return tex->tex.useOffsets ? 0xc : 0x5;
2804 case TEX_TARGET_2D_MS:
2805 assert(tex->tex.levelZero);
2806 return 0x6;
2807 case TEX_TARGET_3D:
2808 assert(tex->tex.levelZero);
2809 return 0x7;
2810 case TEX_TARGET_2D_ARRAY:
2811 assert(tex->tex.levelZero);
2812 return 0x8;
2813
2814 default:
2815 assert(false);
2816 return 0x0;
2817 }
2818 }
2819
2820 void
2821 CodeEmitterGM107::emitTEX()
2822 {
2823 const TexInstruction *insn = this->insn->asTex();
2824 int lodm = 0;
2825
2826 if (!insn->tex.levelZero) {
2827 switch (insn->op) {
2828 case OP_TEX: lodm = 0; break;
2829 case OP_TXB: lodm = 2; break;
2830 case OP_TXL: lodm = 3; break;
2831 default:
2832 assert(!"invalid tex op");
2833 break;
2834 }
2835 } else {
2836 lodm = 1;
2837 }
2838
2839 if (insn->tex.rIndirectSrc >= 0) {
2840 emitInsn (0xdeb80000);
2841 emitField(0x25, 2, lodm);
2842 emitField(0x24, 1, insn->tex.useOffsets == 1);
2843 } else {
2844 emitInsn (0xc0380000);
2845 emitField(0x37, 2, lodm);
2846 emitField(0x36, 1, insn->tex.useOffsets == 1);
2847 emitField(0x24, 13, insn->tex.r);
2848 }
2849
2850 emitField(0x32, 1, insn->tex.target.isShadow());
2851 emitField(0x31, 1, insn->tex.liveOnly);
2852 emitField(0x23, 1, insn->tex.derivAll);
2853 emitField(0x1f, 4, insn->tex.mask);
2854 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2855 insn->tex.target.getDim() - 1);
2856 emitField(0x1c, 1, insn->tex.target.isArray());
2857 emitTEXs (0x14);
2858 emitGPR (0x08, insn->src(0));
2859 emitGPR (0x00, insn->def(0));
2860 }
2861
2862 void
2863 CodeEmitterGM107::emitTEXS()
2864 {
2865 const TexInstruction *insn = this->insn->asTex();
2866
2867 switch (insn->op) {
2868 case OP_TEX:
2869 case OP_TXL:
2870 emitInsn (0xd8000000);
2871 emitField(0x35, 4, getTEXSTarget(insn));
2872 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2873 break;
2874 case OP_TXF:
2875 emitInsn (0xda000000);
2876 emitField(0x35, 4, getTLDSTarget(insn));
2877 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2878 break;
2879 case OP_TXG:
2880 assert(insn->tex.useOffsets != 4);
2881 emitInsn (0xdf000000);
2882 emitField(0x34, 2, insn->tex.gatherComp);
2883 emitField(0x33, 1, insn->tex.useOffsets == 1);
2884 emitField(0x32, 1, insn->tex.target.isShadow());
2885 break;
2886 default:
2887 unreachable("unknown op in emitTEXS()");
2888 break;
2889 }
2890
2891 emitField(0x31, 1, insn->tex.liveOnly);
2892 emitField(0x24, 13, insn->tex.r);
2893 if (insn->defExists(1))
2894 emitGPR(0x1c, insn->def(1));
2895 else
2896 emitGPR(0x1c);
2897 if (insn->srcExists(1))
2898 emitGPR(0x14, insn->getSrc(1));
2899 else
2900 emitGPR(0x14);
2901 emitGPR (0x08, insn->src(0));
2902 emitGPR (0x00, insn->def(0));
2903 }
2904
2905 void
2906 CodeEmitterGM107::emitTLD()
2907 {
2908 const TexInstruction *insn = this->insn->asTex();
2909
2910 if (insn->tex.rIndirectSrc >= 0) {
2911 emitInsn (0xdd380000);
2912 } else {
2913 emitInsn (0xdc380000);
2914 emitField(0x24, 13, insn->tex.r);
2915 }
2916
2917 emitField(0x37, 1, insn->tex.levelZero == 0);
2918 emitField(0x32, 1, insn->tex.target.isMS());
2919 emitField(0x31, 1, insn->tex.liveOnly);
2920 emitField(0x23, 1, insn->tex.useOffsets == 1);
2921 emitField(0x1f, 4, insn->tex.mask);
2922 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2923 insn->tex.target.getDim() - 1);
2924 emitField(0x1c, 1, insn->tex.target.isArray());
2925 emitTEXs (0x14);
2926 emitGPR (0x08, insn->src(0));
2927 emitGPR (0x00, insn->def(0));
2928 }
2929
2930 void
2931 CodeEmitterGM107::emitTLD4()
2932 {
2933 const TexInstruction *insn = this->insn->asTex();
2934
2935 if (insn->tex.rIndirectSrc >= 0) {
2936 emitInsn (0xdef80000);
2937 emitField(0x26, 2, insn->tex.gatherComp);
2938 emitField(0x25, 2, insn->tex.useOffsets == 4);
2939 emitField(0x24, 2, insn->tex.useOffsets == 1);
2940 } else {
2941 emitInsn (0xc8380000);
2942 emitField(0x38, 2, insn->tex.gatherComp);
2943 emitField(0x37, 2, insn->tex.useOffsets == 4);
2944 emitField(0x36, 2, insn->tex.useOffsets == 1);
2945 emitField(0x24, 13, insn->tex.r);
2946 }
2947
2948 emitField(0x32, 1, insn->tex.target.isShadow());
2949 emitField(0x31, 1, insn->tex.liveOnly);
2950 emitField(0x23, 1, insn->tex.derivAll);
2951 emitField(0x1f, 4, insn->tex.mask);
2952 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2953 insn->tex.target.getDim() - 1);
2954 emitField(0x1c, 1, insn->tex.target.isArray());
2955 emitTEXs (0x14);
2956 emitGPR (0x08, insn->src(0));
2957 emitGPR (0x00, insn->def(0));
2958 }
2959
2960 void
2961 CodeEmitterGM107::emitTXD()
2962 {
2963 const TexInstruction *insn = this->insn->asTex();
2964
2965 if (insn->tex.rIndirectSrc >= 0) {
2966 emitInsn (0xde780000);
2967 } else {
2968 emitInsn (0xde380000);
2969 emitField(0x24, 13, insn->tex.r);
2970 }
2971
2972 emitField(0x31, 1, insn->tex.liveOnly);
2973 emitField(0x23, 1, insn->tex.useOffsets == 1);
2974 emitField(0x1f, 4, insn->tex.mask);
2975 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2976 insn->tex.target.getDim() - 1);
2977 emitField(0x1c, 1, insn->tex.target.isArray());
2978 emitTEXs (0x14);
2979 emitGPR (0x08, insn->src(0));
2980 emitGPR (0x00, insn->def(0));
2981 }
2982
2983 void
2984 CodeEmitterGM107::emitTMML()
2985 {
2986 const TexInstruction *insn = this->insn->asTex();
2987
2988 if (insn->tex.rIndirectSrc >= 0) {
2989 emitInsn (0xdf600000);
2990 } else {
2991 emitInsn (0xdf580000);
2992 emitField(0x24, 13, insn->tex.r);
2993 }
2994
2995 emitField(0x31, 1, insn->tex.liveOnly);
2996 emitField(0x23, 1, insn->tex.derivAll);
2997 emitField(0x1f, 4, insn->tex.mask);
2998 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2999 insn->tex.target.getDim() - 1);
3000 emitField(0x1c, 1, insn->tex.target.isArray());
3001 emitTEXs (0x14);
3002 emitGPR (0x08, insn->src(0));
3003 emitGPR (0x00, insn->def(0));
3004 }
3005
3006 void
3007 CodeEmitterGM107::emitTXQ()
3008 {
3009 const TexInstruction *insn = this->insn->asTex();
3010 int type = 0;
3011
3012 switch (insn->tex.query) {
3013 case TXQ_DIMS : type = 0x01; break;
3014 case TXQ_TYPE : type = 0x02; break;
3015 case TXQ_SAMPLE_POSITION: type = 0x05; break;
3016 case TXQ_FILTER : type = 0x10; break;
3017 case TXQ_LOD : type = 0x12; break;
3018 case TXQ_WRAP : type = 0x14; break;
3019 case TXQ_BORDER_COLOUR : type = 0x16; break;
3020 default:
3021 assert(!"invalid txq query");
3022 break;
3023 }
3024
3025 if (insn->tex.rIndirectSrc >= 0) {
3026 emitInsn (0xdf500000);
3027 } else {
3028 emitInsn (0xdf480000);
3029 emitField(0x24, 13, insn->tex.r);
3030 }
3031
3032 emitField(0x31, 1, insn->tex.liveOnly);
3033 emitField(0x1f, 4, insn->tex.mask);
3034 emitField(0x16, 6, type);
3035 emitGPR (0x08, insn->src(0));
3036 emitGPR (0x00, insn->def(0));
3037 }
3038
3039 void
3040 CodeEmitterGM107::emitDEPBAR()
3041 {
3042 emitInsn (0xf0f00000);
3043 emitField(0x1d, 1, 1); /* le */
3044 emitField(0x1a, 3, 5);
3045 emitField(0x14, 6, insn->subOp);
3046 emitField(0x00, 6, insn->subOp);
3047 }
3048
3049 /*******************************************************************************
3050 * misc
3051 ******************************************************************************/
3052
3053 void
3054 CodeEmitterGM107::emitNOP()
3055 {
3056 emitInsn(0x50b00000);
3057 }
3058
3059 void
3060 CodeEmitterGM107::emitKIL()
3061 {
3062 emitInsn (0xe3300000);
3063 emitCond5(0x00, CC_TR);
3064 }
3065
3066 void
3067 CodeEmitterGM107::emitOUT()
3068 {
3069 const int cut = insn->op == OP_RESTART || insn->subOp;
3070 const int emit = insn->op == OP_EMIT;
3071
3072 switch (insn->src(1).getFile()) {
3073 case FILE_GPR:
3074 emitInsn(0xfbe00000);
3075 emitGPR (0x14, insn->src(1));
3076 break;
3077 case FILE_IMMEDIATE:
3078 emitInsn(0xf6e00000);
3079 emitIMMD(0x14, 19, insn->src(1));
3080 break;
3081 case FILE_MEMORY_CONST:
3082 emitInsn(0xebe00000);
3083 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3084 break;
3085 default:
3086 assert(!"bad src1 file");
3087 break;
3088 }
3089
3090 emitField(0x27, 2, (cut << 1) | emit);
3091 emitGPR (0x08, insn->src(0));
3092 emitGPR (0x00, insn->def(0));
3093 }
3094
3095 void
3096 CodeEmitterGM107::emitBAR()
3097 {
3098 uint8_t subop;
3099
3100 emitInsn (0xf0a80000);
3101
3102 switch (insn->subOp) {
3103 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3104 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3105 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3106 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3107 default:
3108 subop = 0x80;
3109 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3110 break;
3111 }
3112
3113 emitField(0x20, 8, subop);
3114
3115 // barrier id
3116 if (insn->src(0).getFile() == FILE_GPR) {
3117 emitGPR(0x08, insn->src(0));
3118 } else {
3119 ImmediateValue *imm = insn->getSrc(0)->asImm();
3120 assert(imm);
3121 emitField(0x08, 8, imm->reg.data.u32);
3122 emitField(0x2b, 1, 1);
3123 }
3124
3125 // thread count
3126 if (insn->src(1).getFile() == FILE_GPR) {
3127 emitGPR(0x14, insn->src(1));
3128 } else {
3129 ImmediateValue *imm = insn->getSrc(0)->asImm();
3130 assert(imm);
3131 emitField(0x14, 12, imm->reg.data.u32);
3132 emitField(0x2c, 1, 1);
3133 }
3134
3135 if (insn->srcExists(2) && (insn->predSrc != 2)) {
3136 emitPRED (0x27, insn->src(2));
3137 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3138 } else {
3139 emitField(0x27, 3, 7);
3140 }
3141 }
3142
3143 void
3144 CodeEmitterGM107::emitMEMBAR()
3145 {
3146 emitInsn (0xef980000);
3147 emitField(0x08, 2, insn->subOp >> 2);
3148 }
3149
3150 void
3151 CodeEmitterGM107::emitVOTE()
3152 {
3153 const ImmediateValue *imm;
3154 uint32_t u32;
3155
3156 int r = -1, p = -1;
3157 for (int i = 0; insn->defExists(i); i++) {
3158 if (insn->def(i).getFile() == FILE_GPR)
3159 r = i;
3160 else if (insn->def(i).getFile() == FILE_PREDICATE)
3161 p = i;
3162 }
3163
3164 emitInsn (0x50d80000);
3165 emitField(0x30, 2, insn->subOp);
3166 if (r >= 0)
3167 emitGPR (0x00, insn->def(r));
3168 else
3169 emitGPR (0x00);
3170 if (p >= 0)
3171 emitPRED (0x2d, insn->def(p));
3172 else
3173 emitPRED (0x2d);
3174
3175 switch (insn->src(0).getFile()) {
3176 case FILE_PREDICATE:
3177 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3178 emitPRED (0x27, insn->src(0));
3179 break;
3180 case FILE_IMMEDIATE:
3181 imm = insn->getSrc(0)->asImm();
3182 assert(imm);
3183 u32 = imm->reg.data.u32;
3184 assert(u32 == 0 || u32 == 1);
3185 emitPRED(0x27);
3186 emitField(0x2a, 1, u32 == 0);
3187 break;
3188 default:
3189 assert(!"Unhandled src");
3190 break;
3191 }
3192 }
3193
3194 void
3195 CodeEmitterGM107::emitSUTarget()
3196 {
3197 const TexInstruction *insn = this->insn->asTex();
3198 int target = 0;
3199
3200 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3201
3202 if (insn->tex.target == TEX_TARGET_BUFFER) {
3203 target = 2;
3204 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3205 target = 4;
3206 } else if (insn->tex.target == TEX_TARGET_2D ||
3207 insn->tex.target == TEX_TARGET_RECT) {
3208 target = 6;
3209 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3210 insn->tex.target == TEX_TARGET_CUBE ||
3211 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3212 target = 8;
3213 } else if (insn->tex.target == TEX_TARGET_3D) {
3214 target = 10;
3215 } else {
3216 assert(insn->tex.target == TEX_TARGET_1D);
3217 }
3218 emitField(0x20, 4, target);
3219 }
3220
3221 void
3222 CodeEmitterGM107::emitSUHandle(const int s)
3223 {
3224 const TexInstruction *insn = this->insn->asTex();
3225
3226 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3227
3228 if (insn->src(s).getFile() == FILE_GPR) {
3229 emitGPR(0x27, insn->src(s));
3230 } else {
3231 ImmediateValue *imm = insn->getSrc(s)->asImm();
3232 assert(imm);
3233 emitField(0x33, 1, 1);
3234 emitField(0x24, 13, imm->reg.data.u32);
3235 }
3236 }
3237
3238 void
3239 CodeEmitterGM107::emitSUSTx()
3240 {
3241 const TexInstruction *insn = this->insn->asTex();
3242
3243 emitInsn(0xeb200000);
3244 if (insn->op == OP_SUSTB)
3245 emitField(0x34, 1, 1);
3246 emitSUTarget();
3247
3248 emitLDSTc(0x18);
3249 emitField(0x14, 4, 0xf); // rgba
3250 emitGPR (0x08, insn->src(0));
3251 emitGPR (0x00, insn->src(1));
3252
3253 emitSUHandle(2);
3254 }
3255
3256 void
3257 CodeEmitterGM107::emitSULDx()
3258 {
3259 const TexInstruction *insn = this->insn->asTex();
3260 int type = 0;
3261
3262 emitInsn(0xeb000000);
3263 if (insn->op == OP_SULDB)
3264 emitField(0x34, 1, 1);
3265 emitSUTarget();
3266
3267 switch (insn->dType) {
3268 case TYPE_S8: type = 1; break;
3269 case TYPE_U16: type = 2; break;
3270 case TYPE_S16: type = 3; break;
3271 case TYPE_U32: type = 4; break;
3272 case TYPE_U64: type = 5; break;
3273 case TYPE_B128: type = 6; break;
3274 default:
3275 assert(insn->dType == TYPE_U8);
3276 break;
3277 }
3278 emitLDSTc(0x18);
3279 emitField(0x14, 3, type);
3280 emitGPR (0x00, insn->def(0));
3281 emitGPR (0x08, insn->src(0));
3282
3283 emitSUHandle(1);
3284 }
3285
3286 void
3287 CodeEmitterGM107::emitSUREDx()
3288 {
3289 const TexInstruction *insn = this->insn->asTex();
3290 uint8_t type = 0, subOp;
3291
3292 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3293 emitInsn(0xeac00000);
3294 else
3295 emitInsn(0xea600000);
3296
3297 if (insn->op == OP_SUREDB)
3298 emitField(0x34, 1, 1);
3299 emitSUTarget();
3300
3301 // destination type
3302 switch (insn->dType) {
3303 case TYPE_S32: type = 1; break;
3304 case TYPE_U64: type = 2; break;
3305 case TYPE_F32: type = 3; break;
3306 case TYPE_S64: type = 5; break;
3307 default:
3308 assert(insn->dType == TYPE_U32);
3309 break;
3310 }
3311
3312 // atomic operation
3313 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3314 subOp = 0;
3315 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3316 subOp = 8;
3317 } else {
3318 subOp = insn->subOp;
3319 }
3320
3321 emitField(0x24, 3, type);
3322 emitField(0x1d, 4, subOp);
3323 emitGPR (0x14, insn->src(1));
3324 emitGPR (0x08, insn->src(0));
3325 emitGPR (0x00, insn->def(0));
3326
3327 emitSUHandle(2);
3328 }
3329
3330 /*******************************************************************************
3331 * assembler front-end
3332 ******************************************************************************/
3333
3334 bool
3335 CodeEmitterGM107::emitInstruction(Instruction *i)
3336 {
3337 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3338 bool ret = true;
3339
3340 insn = i;
3341
3342 if (insn->encSize != 8) {
3343 ERROR("skipping undecodable instruction: "); insn->print();
3344 return false;
3345 } else
3346 if (codeSize + size > codeSizeLimit) {
3347 ERROR("code emitter output buffer too small\n");
3348 return false;
3349 }
3350
3351 if (writeIssueDelays) {
3352 int n = ((codeSize & 0x1f) / 8) - 1;
3353 if (n < 0) {
3354 data = code;
3355 data[0] = 0x00000000;
3356 data[1] = 0x00000000;
3357 code += 2;
3358 codeSize += 8;
3359 n++;
3360 }
3361
3362 emitField(data, n * 21, 21, insn->sched);
3363 }
3364
3365 switch (insn->op) {
3366 case OP_EXIT:
3367 emitEXIT();
3368 break;
3369 case OP_BRA:
3370 emitBRA();
3371 break;
3372 case OP_CALL:
3373 emitCAL();
3374 break;
3375 case OP_PRECONT:
3376 emitPCNT();
3377 break;
3378 case OP_CONT:
3379 emitCONT();
3380 break;
3381 case OP_PREBREAK:
3382 emitPBK();
3383 break;
3384 case OP_BREAK:
3385 emitBRK();
3386 break;
3387 case OP_PRERET:
3388 emitPRET();
3389 break;
3390 case OP_RET:
3391 emitRET();
3392 break;
3393 case OP_JOINAT:
3394 emitSSY();
3395 break;
3396 case OP_JOIN:
3397 emitSYNC();
3398 break;
3399 case OP_QUADON:
3400 emitSAM();
3401 break;
3402 case OP_QUADPOP:
3403 emitRAM();
3404 break;
3405 case OP_MOV:
3406 emitMOV();
3407 break;
3408 case OP_RDSV:
3409 if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3410 emitCS2R();
3411 else
3412 emitS2R();
3413 break;
3414 case OP_ABS:
3415 case OP_NEG:
3416 case OP_SAT:
3417 case OP_FLOOR:
3418 case OP_CEIL:
3419 case OP_TRUNC:
3420 case OP_CVT:
3421 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3422 insn->src(0).getFile() == FILE_PREDICATE)) {
3423 emitMOV();
3424 } else if (isFloatType(insn->dType)) {
3425 if (isFloatType(insn->sType))
3426 emitF2F();
3427 else
3428 emitI2F();
3429 } else {
3430 if (isFloatType(insn->sType))
3431 emitF2I();
3432 else
3433 emitI2I();
3434 }
3435 break;
3436 case OP_SHFL:
3437 emitSHFL();
3438 break;
3439 case OP_ADD:
3440 case OP_SUB:
3441 if (isFloatType(insn->dType)) {
3442 if (insn->dType == TYPE_F64)
3443 emitDADD();
3444 else
3445 emitFADD();
3446 } else {
3447 emitIADD();
3448 }
3449 break;
3450 case OP_MUL:
3451 if (isFloatType(insn->dType)) {
3452 if (insn->dType == TYPE_F64)
3453 emitDMUL();
3454 else
3455 emitFMUL();
3456 } else {
3457 emitIMUL();
3458 }
3459 break;
3460 case OP_MAD:
3461 case OP_FMA:
3462 if (isFloatType(insn->dType)) {
3463 if (insn->dType == TYPE_F64)
3464 emitDFMA();
3465 else
3466 emitFFMA();
3467 } else {
3468 emitIMAD();
3469 }
3470 break;
3471 case OP_SHLADD:
3472 emitISCADD();
3473 break;
3474 case OP_XMAD:
3475 emitXMAD();
3476 break;
3477 case OP_MIN:
3478 case OP_MAX:
3479 if (isFloatType(insn->dType)) {
3480 if (insn->dType == TYPE_F64)
3481 emitDMNMX();
3482 else
3483 emitFMNMX();
3484 } else {
3485 emitIMNMX();
3486 }
3487 break;
3488 case OP_SHL:
3489 if (typeSizeof(insn->sType) == 8)
3490 emitSHF();
3491 else
3492 emitSHL();
3493 break;
3494 case OP_SHR:
3495 if (typeSizeof(insn->sType) == 8)
3496 emitSHF();
3497 else
3498 emitSHR();
3499 break;
3500 case OP_POPCNT:
3501 emitPOPC();
3502 break;
3503 case OP_INSBF:
3504 emitBFI();
3505 break;
3506 case OP_EXTBF:
3507 emitBFE();
3508 break;
3509 case OP_BFIND:
3510 emitFLO();
3511 break;
3512 case OP_SLCT:
3513 if (isFloatType(insn->dType))
3514 emitFCMP();
3515 else
3516 emitICMP();
3517 break;
3518 case OP_SET:
3519 case OP_SET_AND:
3520 case OP_SET_OR:
3521 case OP_SET_XOR:
3522 if (insn->def(0).getFile() != FILE_PREDICATE) {
3523 if (isFloatType(insn->sType))
3524 if (insn->sType == TYPE_F64)
3525 emitDSET();
3526 else
3527 emitFSET();
3528 else
3529 emitISET();
3530 } else {
3531 if (isFloatType(insn->sType))
3532 if (insn->sType == TYPE_F64)
3533 emitDSETP();
3534 else
3535 emitFSETP();
3536 else
3537 emitISETP();
3538 }
3539 break;
3540 case OP_SELP:
3541 emitSEL();
3542 break;
3543 case OP_PRESIN:
3544 case OP_PREEX2:
3545 emitRRO();
3546 break;
3547 case OP_COS:
3548 case OP_SIN:
3549 case OP_EX2:
3550 case OP_LG2:
3551 case OP_RCP:
3552 case OP_RSQ:
3553 case OP_SQRT:
3554 emitMUFU();
3555 break;
3556 case OP_AND:
3557 case OP_OR:
3558 case OP_XOR:
3559 emitLOP();
3560 break;
3561 case OP_NOT:
3562 emitNOT();
3563 break;
3564 case OP_LOAD:
3565 switch (insn->src(0).getFile()) {
3566 case FILE_MEMORY_CONST : emitLDC(); break;
3567 case FILE_MEMORY_LOCAL : emitLDL(); break;
3568 case FILE_MEMORY_SHARED: emitLDS(); break;
3569 case FILE_MEMORY_GLOBAL: emitLD(); break;
3570 default:
3571 assert(!"invalid load");
3572 emitNOP();
3573 break;
3574 }
3575 break;
3576 case OP_STORE:
3577 switch (insn->src(0).getFile()) {
3578 case FILE_MEMORY_LOCAL : emitSTL(); break;
3579 case FILE_MEMORY_SHARED: emitSTS(); break;
3580 case FILE_MEMORY_GLOBAL: emitST(); break;
3581 default:
3582 assert(!"invalid store");
3583 emitNOP();
3584 break;
3585 }
3586 break;
3587 case OP_ATOM:
3588 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3589 emitATOMS();
3590 else
3591 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3592 emitRED();
3593 else
3594 emitATOM();
3595 break;
3596 case OP_CCTL:
3597 emitCCTL();
3598 break;
3599 case OP_VFETCH:
3600 emitALD();
3601 break;
3602 case OP_EXPORT:
3603 emitAST();
3604 break;
3605 case OP_PFETCH:
3606 emitISBERD();
3607 break;
3608 case OP_AFETCH:
3609 emitAL2P();
3610 break;
3611 case OP_LINTERP:
3612 case OP_PINTERP:
3613 emitIPA();
3614 break;
3615 case OP_PIXLD:
3616 emitPIXLD();
3617 break;
3618 case OP_TEX:
3619 case OP_TXL:
3620 if (insn->asTex()->tex.scalar)
3621 emitTEXS();
3622 else
3623 emitTEX();
3624 break;
3625 case OP_TXB:
3626 emitTEX();
3627 break;
3628 case OP_TXF:
3629 if (insn->asTex()->tex.scalar)
3630 emitTEXS();
3631 else
3632 emitTLD();
3633 break;
3634 case OP_TXG:
3635 if (insn->asTex()->tex.scalar)
3636 emitTEXS();
3637 else
3638 emitTLD4();
3639 break;
3640 case OP_TXD:
3641 emitTXD();
3642 break;
3643 case OP_TXQ:
3644 emitTXQ();
3645 break;
3646 case OP_TXLQ:
3647 emitTMML();
3648 break;
3649 case OP_TEXBAR:
3650 emitDEPBAR();
3651 break;
3652 case OP_QUADOP:
3653 emitFSWZADD();
3654 break;
3655 case OP_NOP:
3656 emitNOP();
3657 break;
3658 case OP_DISCARD:
3659 emitKIL();
3660 break;
3661 case OP_EMIT:
3662 case OP_RESTART:
3663 emitOUT();
3664 break;
3665 case OP_BAR:
3666 emitBAR();
3667 break;
3668 case OP_MEMBAR:
3669 emitMEMBAR();
3670 break;
3671 case OP_VOTE:
3672 emitVOTE();
3673 break;
3674 case OP_SUSTB:
3675 case OP_SUSTP:
3676 emitSUSTx();
3677 break;
3678 case OP_SULDB:
3679 case OP_SULDP:
3680 emitSULDx();
3681 break;
3682 case OP_SUREDB:
3683 case OP_SUREDP:
3684 emitSUREDx();
3685 break;
3686 default:
3687 assert(!"invalid opcode");
3688 emitNOP();
3689 ret = false;
3690 break;
3691 }
3692
3693 if (insn->join) {
3694 /*XXX*/
3695 }
3696
3697 code += 2;
3698 codeSize += 8;
3699 return ret;
3700 }
3701
3702 uint32_t
3703 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3704 {
3705 return 8;
3706 }
3707
3708 /*******************************************************************************
3709 * sched data calculator
3710 ******************************************************************************/
3711
3712 class SchedDataCalculatorGM107 : public Pass
3713 {
3714 public:
3715 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3716
3717 private:
3718 struct RegScores
3719 {
3720 struct ScoreData {
3721 int r[256];
3722 int p[8];
3723 int c;
3724 } rd, wr;
3725 int base;
3726
3727 void rebase(const int base)
3728 {
3729 const int delta = this->base - base;
3730 if (!delta)
3731 return;
3732 this->base = 0;
3733
3734 for (int i = 0; i < 256; ++i) {
3735 rd.r[i] += delta;
3736 wr.r[i] += delta;
3737 }
3738 for (int i = 0; i < 8; ++i) {
3739 rd.p[i] += delta;
3740 wr.p[i] += delta;
3741 }
3742 rd.c += delta;
3743 wr.c += delta;
3744 }
3745 void wipe()
3746 {
3747 memset(&rd, 0, sizeof(rd));
3748 memset(&wr, 0, sizeof(wr));
3749 }
3750 int getLatest(const ScoreData& d) const
3751 {
3752 int max = 0;
3753 for (int i = 0; i < 256; ++i)
3754 if (d.r[i] > max)
3755 max = d.r[i];
3756 for (int i = 0; i < 8; ++i)
3757 if (d.p[i] > max)
3758 max = d.p[i];
3759 if (d.c > max)
3760 max = d.c;
3761 return max;
3762 }
3763 inline int getLatestRd() const
3764 {
3765 return getLatest(rd);
3766 }
3767 inline int getLatestWr() const
3768 {
3769 return getLatest(wr);
3770 }
3771 inline int getLatest() const
3772 {
3773 return MAX2(getLatestRd(), getLatestWr());
3774 }
3775 void setMax(const RegScores *that)
3776 {
3777 for (int i = 0; i < 256; ++i) {
3778 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3779 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3780 }
3781 for (int i = 0; i < 8; ++i) {
3782 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3783 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3784 }
3785 rd.c = MAX2(rd.c, that->rd.c);
3786 wr.c = MAX2(wr.c, that->wr.c);
3787 }
3788 void print(int cycle)
3789 {
3790 for (int i = 0; i < 256; ++i) {
3791 if (rd.r[i] > cycle)
3792 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3793 if (wr.r[i] > cycle)
3794 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3795 }
3796 for (int i = 0; i < 8; ++i) {
3797 if (rd.p[i] > cycle)
3798 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3799 if (wr.p[i] > cycle)
3800 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3801 }
3802 if (rd.c > cycle)
3803 INFO("rd $c @ %i\n", rd.c);
3804 if (wr.c > cycle)
3805 INFO("wr $c @ %i\n", wr.c);
3806 }
3807 };
3808
3809 RegScores *score; // for current BB
3810 std::vector<RegScores> scoreBoards;
3811
3812 const TargetGM107 *targ;
3813 bool visit(Function *);
3814 bool visit(BasicBlock *);
3815
3816 void commitInsn(const Instruction *, int);
3817 int calcDelay(const Instruction *, int) const;
3818 void setDelay(Instruction *, int, const Instruction *);
3819 void recordWr(const Value *, int, int);
3820 void checkRd(const Value *, int, int&) const;
3821
3822 inline void emitYield(Instruction *);
3823 inline void emitStall(Instruction *, uint8_t);
3824 inline void emitReuse(Instruction *, uint8_t);
3825 inline void emitWrDepBar(Instruction *, uint8_t);
3826 inline void emitRdDepBar(Instruction *, uint8_t);
3827 inline void emitWtDepBar(Instruction *, uint8_t);
3828
3829 inline int getStall(const Instruction *) const;
3830 inline int getWrDepBar(const Instruction *) const;
3831 inline int getRdDepBar(const Instruction *) const;
3832 inline int getWtDepBar(const Instruction *) const;
3833
3834 void setReuseFlag(Instruction *);
3835
3836 inline void printSchedInfo(int, const Instruction *) const;
3837
3838 struct LiveBarUse {
3839 LiveBarUse(Instruction *insn, Instruction *usei)
3840 : insn(insn), usei(usei) { }
3841 Instruction *insn;
3842 Instruction *usei;
3843 };
3844
3845 struct LiveBarDef {
3846 LiveBarDef(Instruction *insn, Instruction *defi)
3847 : insn(insn), defi(defi) { }
3848 Instruction *insn;
3849 Instruction *defi;
3850 };
3851
3852 bool insertBarriers(BasicBlock *);
3853
3854 bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
3855 Instruction *findFirstUse(const Instruction *) const;
3856 Instruction *findFirstDef(const Instruction *) const;
3857
3858 bool needRdDepBar(const Instruction *) const;
3859 bool needWrDepBar(const Instruction *) const;
3860 };
3861
3862 inline void
3863 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3864 {
3865 assert(cnt < 16);
3866 insn->sched |= cnt;
3867 }
3868
3869 inline void
3870 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3871 {
3872 insn->sched |= 1 << 4;
3873 }
3874
3875 inline void
3876 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3877 {
3878 assert(id < 6);
3879 if ((insn->sched & 0xe0) == 0xe0)
3880 insn->sched ^= 0xe0;
3881 insn->sched |= id << 5;
3882 }
3883
3884 inline void
3885 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3886 {
3887 assert(id < 6);
3888 if ((insn->sched & 0x700) == 0x700)
3889 insn->sched ^= 0x700;
3890 insn->sched |= id << 8;
3891 }
3892
3893 inline void
3894 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3895 {
3896 assert(id < 6);
3897 insn->sched |= 1 << (11 + id);
3898 }
3899
3900 inline void
3901 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3902 {
3903 assert(id < 4);
3904 insn->sched |= 1 << (17 + id);
3905 }
3906
3907 inline void
3908 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3909 const Instruction *insn) const
3910 {
3911 uint8_t st, yl, wr, rd, wt, ru;
3912
3913 st = (insn->sched & 0x00000f) >> 0;
3914 yl = (insn->sched & 0x000010) >> 4;
3915 wr = (insn->sched & 0x0000e0) >> 5;
3916 rd = (insn->sched & 0x000700) >> 8;
3917 wt = (insn->sched & 0x01f800) >> 11;
3918 ru = (insn->sched & 0x1e0000) >> 17;
3919
3920 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3921 cycle, st, yl, wr, rd, wt, ru);
3922 }
3923
3924 inline int
3925 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3926 {
3927 return insn->sched & 0xf;
3928 }
3929
3930 inline int
3931 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3932 {
3933 return (insn->sched & 0x0000e0) >> 5;
3934 }
3935
3936 inline int
3937 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3938 {
3939 return (insn->sched & 0x000700) >> 8;
3940 }
3941
3942 inline int
3943 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3944 {
3945 return (insn->sched & 0x01f800) >> 11;
3946 }
3947
3948 // Emit the reuse flag which allows to make use of the new memory hierarchy
3949 // introduced since Maxwell, the operand reuse cache.
3950 //
3951 // It allows to reduce bank conflicts by caching operands. Each time you issue
3952 // an instruction, that flag can tell the hw which operands are going to be
3953 // re-used by the next instruction. Note that the next instruction has to use
3954 // the same GPR id in the same operand slot.
3955 void
3956 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3957 {
3958 Instruction *next = insn->next;
3959 BitSet defs(255, 1);
3960
3961 if (!targ->isReuseSupported(insn))
3962 return;
3963
3964 for (int d = 0; insn->defExists(d); ++d) {
3965 const Value *def = insn->def(d).rep();
3966 if (insn->def(d).getFile() != FILE_GPR)
3967 continue;
3968 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3969 continue;
3970 defs.set(def->reg.data.id);
3971 }
3972
3973 for (int s = 0; insn->srcExists(s); s++) {
3974 const Value *src = insn->src(s).rep();
3975 if (insn->src(s).getFile() != FILE_GPR)
3976 continue;
3977 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3978 continue;
3979 if (defs.test(src->reg.data.id))
3980 continue;
3981 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3982 continue;
3983 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3984 continue;
3985 assert(s < 4);
3986 emitReuse(insn, s);
3987 }
3988 }
3989
3990 void
3991 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3992 {
3993 int a = v->reg.data.id, b;
3994
3995 switch (v->reg.file) {
3996 case FILE_GPR:
3997 b = a + v->reg.size / 4;
3998 for (int r = a; r < b; ++r)
3999 score->rd.r[r] = ready;
4000 break;
4001 case FILE_PREDICATE:
4002 // To immediately use a predicate set by any instructions, the minimum
4003 // number of stall counts is 13.
4004 score->rd.p[a] = cycle + 13;
4005 break;
4006 case FILE_FLAGS:
4007 score->rd.c = ready;
4008 break;
4009 default:
4010 break;
4011 }
4012 }
4013
4014 void
4015 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
4016 {
4017 int a = v->reg.data.id, b;
4018 int ready = cycle;
4019
4020 switch (v->reg.file) {
4021 case FILE_GPR:
4022 b = a + v->reg.size / 4;
4023 for (int r = a; r < b; ++r)
4024 ready = MAX2(ready, score->rd.r[r]);
4025 break;
4026 case FILE_PREDICATE:
4027 ready = MAX2(ready, score->rd.p[a]);
4028 break;
4029 case FILE_FLAGS:
4030 ready = MAX2(ready, score->rd.c);
4031 break;
4032 default:
4033 break;
4034 }
4035 if (cycle < ready)
4036 delay = MAX2(delay, ready - cycle);
4037 }
4038
4039 void
4040 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
4041 {
4042 const int ready = cycle + targ->getLatency(insn);
4043
4044 for (int d = 0; insn->defExists(d); ++d)
4045 recordWr(insn->getDef(d), cycle, ready);
4046
4047 #ifdef GM107_DEBUG_SCHED_DATA
4048 score->print(cycle);
4049 #endif
4050 }
4051
4052 #define GM107_MIN_ISSUE_DELAY 0x1
4053 #define GM107_MAX_ISSUE_DELAY 0xf
4054
4055 int
4056 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
4057 {
4058 int delay = 0, ready = cycle;
4059
4060 for (int s = 0; insn->srcExists(s); ++s)
4061 checkRd(insn->getSrc(s), cycle, delay);
4062
4063 // TODO: make use of getReadLatency()!
4064
4065 return MAX2(delay, ready - cycle);
4066 }
4067
4068 void
4069 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
4070 const Instruction *next)
4071 {
4072 const OpClass cl = targ->getOpClass(insn->op);
4073 int wr, rd;
4074
4075 if (insn->op == OP_EXIT ||
4076 insn->op == OP_BAR ||
4077 insn->op == OP_MEMBAR) {
4078 delay = GM107_MAX_ISSUE_DELAY;
4079 } else
4080 if (insn->op == OP_QUADON ||
4081 insn->op == OP_QUADPOP) {
4082 delay = 0xd;
4083 } else
4084 if (cl == OPCLASS_FLOW || insn->join) {
4085 delay = 0xd;
4086 }
4087
4088 if (!next || !targ->canDualIssue(insn, next)) {
4089 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4090 } else {
4091 delay = 0x0; // dual-issue
4092 }
4093
4094 wr = getWrDepBar(insn);
4095 rd = getRdDepBar(insn);
4096
4097 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4098 // Barriers take one additional clock cycle to become active on top of
4099 // the clock consumed by the instruction producing it.
4100 if (!next || insn->bb != next->bb) {
4101 delay = 0x2;
4102 } else {
4103 int wt = getWtDepBar(next);
4104 if ((wt & (1 << wr)) | (wt & (1 << rd)))
4105 delay = 0x2;
4106 }
4107 }
4108
4109 emitStall(insn, delay);
4110 }
4111
4112
4113 // Return true when the given instruction needs to emit a read dependency
4114 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4115 // setting the maximum number of stall counts is not enough.
4116 bool
4117 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4118 {
4119 BitSet srcs(255, 1), defs(255, 1);
4120 int a, b;
4121
4122 if (!targ->isBarrierRequired(insn))
4123 return false;
4124
4125 // Do not emit a read dependency barrier when the instruction doesn't use
4126 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4127 for (int s = 0; insn->srcExists(s); ++s) {
4128 const Value *src = insn->src(s).rep();
4129 if (insn->src(s).getFile() != FILE_GPR)
4130 continue;
4131 if (src->reg.data.id == 255)
4132 continue;
4133
4134 a = src->reg.data.id;
4135 b = a + src->reg.size / 4;
4136 for (int r = a; r < b; ++r)
4137 srcs.set(r);
4138 }
4139
4140 if (!srcs.popCount())
4141 return false;
4142
4143 // Do not emit a read dependency barrier when the output GPRs are equal to
4144 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4145 // be produced and WaR hazards are prevented.
4146 for (int d = 0; insn->defExists(d); ++d) {
4147 const Value *def = insn->def(d).rep();
4148 if (insn->def(d).getFile() != FILE_GPR)
4149 continue;
4150 if (def->reg.data.id == 255)
4151 continue;
4152
4153 a = def->reg.data.id;
4154 b = a + def->reg.size / 4;
4155 for (int r = a; r < b; ++r)
4156 defs.set(r);
4157 }
4158
4159 srcs.andNot(defs);
4160 if (!srcs.popCount())
4161 return false;
4162
4163 return true;
4164 }
4165
4166 // Return true when the given instruction needs to emit a write dependency
4167 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4168 // setting the maximum number of stall counts is not enough. This is only legal
4169 // if the instruction output something.
4170 bool
4171 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4172 {
4173 if (!targ->isBarrierRequired(insn))
4174 return false;
4175
4176 for (int d = 0; insn->defExists(d); ++d) {
4177 if (insn->def(d).getFile() == FILE_GPR ||
4178 insn->def(d).getFile() == FILE_FLAGS ||
4179 insn->def(d).getFile() == FILE_PREDICATE)
4180 return true;
4181 }
4182 return false;
4183 }
4184
4185 // Helper function for findFirstUse() and findFirstDef()
4186 bool
4187 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4188 const Value *val) const
4189 {
4190 if (val->reg.file != FILE_GPR &&
4191 val->reg.file != FILE_PREDICATE &&
4192 val->reg.file != FILE_FLAGS)
4193 return false;
4194
4195 for (int d = 0; insn->defExists(d); ++d) {
4196 const Value* def = insn->getDef(d);
4197 int minGPR = def->reg.data.id;
4198 int maxGPR = minGPR + def->reg.size / 4 - 1;
4199
4200 if (def->reg.file != val->reg.file)
4201 continue;
4202
4203 if (def->reg.file == FILE_GPR) {
4204 if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4205 val->reg.data.id > maxGPR)
4206 continue;
4207 return true;
4208 } else
4209 if (def->reg.file == FILE_PREDICATE) {
4210 if (val->reg.data.id != minGPR)
4211 continue;
4212 return true;
4213 } else
4214 if (def->reg.file == FILE_FLAGS) {
4215 if (val->reg.data.id != minGPR)
4216 continue;
4217 return true;
4218 }
4219 }
4220
4221 return false;
4222 }
4223
4224 // Find the next instruction inside the same basic block which uses (reads or
4225 // writes from) the output of the given instruction in order to avoid RaW and
4226 // WaW hazards.
4227 Instruction *
4228 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4229 {
4230 Instruction *insn, *next;
4231
4232 if (!bari->defExists(0))
4233 return NULL;
4234
4235 for (insn = bari->next; insn != NULL; insn = next) {
4236 next = insn->next;
4237
4238 for (int s = 0; insn->srcExists(s); ++s)
4239 if (doesInsnWriteTo(bari, insn->getSrc(s)))
4240 return insn;
4241
4242 for (int d = 0; insn->defExists(d); ++d)
4243 if (doesInsnWriteTo(bari, insn->getDef(d)))
4244 return insn;
4245 }
4246 return NULL;
4247 }
4248
4249 // Find the next instruction inside the same basic block which overwrites, at
4250 // least, one source of the given instruction in order to avoid WaR hazards.
4251 Instruction *
4252 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4253 {
4254 Instruction *insn, *next;
4255
4256 if (!bari->srcExists(0))
4257 return NULL;
4258
4259 for (insn = bari->next; insn != NULL; insn = next) {
4260 next = insn->next;
4261
4262 for (int s = 0; bari->srcExists(s); ++s)
4263 if (doesInsnWriteTo(insn, bari->getSrc(s)))
4264 return insn;
4265 }
4266 return NULL;
4267 }
4268
4269 // Dependency barriers:
4270 // This pass is a bit ugly and could probably be improved by performing a
4271 // better allocation.
4272 //
4273 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4274 // dependency barriers using the control codes.
4275 bool
4276 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4277 {
4278 std::list<LiveBarUse> live_uses;
4279 std::list<LiveBarDef> live_defs;
4280 Instruction *insn, *next;
4281 BitSet bars(6, 1);
4282 int bar_id;
4283
4284 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4285 Instruction *usei = NULL, *defi = NULL;
4286 bool need_wr_bar, need_rd_bar;
4287
4288 next = insn->next;
4289
4290 // Expire old barrier uses.
4291 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4292 it != live_uses.end();) {
4293 if (insn->serial >= it->usei->serial) {
4294 int wr = getWrDepBar(it->insn);
4295 emitWtDepBar(insn, wr);
4296 bars.clr(wr); // free barrier
4297 it = live_uses.erase(it);
4298 continue;
4299 }
4300 ++it;
4301 }
4302
4303 // Expire old barrier defs.
4304 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4305 it != live_defs.end();) {
4306 if (insn->serial >= it->defi->serial) {
4307 int rd = getRdDepBar(it->insn);
4308 emitWtDepBar(insn, rd);
4309 bars.clr(rd); // free barrier
4310 it = live_defs.erase(it);
4311 continue;
4312 }
4313 ++it;
4314 }
4315
4316 need_wr_bar = needWrDepBar(insn);
4317 need_rd_bar = needRdDepBar(insn);
4318
4319 if (need_wr_bar) {
4320 // When the instruction requires to emit a write dependency barrier
4321 // (all which write something at a variable latency), find the next
4322 // instruction which reads the outputs (or writes to them, potentially
4323 // completing before this insn.
4324 usei = findFirstUse(insn);
4325
4326 // Allocate and emit a new barrier.
4327 bar_id = bars.findFreeRange(1);
4328 if (bar_id == -1)
4329 bar_id = 5;
4330 bars.set(bar_id);
4331 emitWrDepBar(insn, bar_id);
4332 if (usei)
4333 live_uses.push_back(LiveBarUse(insn, usei));
4334 }
4335
4336 if (need_rd_bar) {
4337 // When the instruction requires to emit a read dependency barrier
4338 // (all which read something at a variable latency), find the next
4339 // instruction which will write the inputs.
4340 defi = findFirstDef(insn);
4341
4342 if (usei && defi && usei->serial <= defi->serial)
4343 continue;
4344
4345 // Allocate and emit a new barrier.
4346 bar_id = bars.findFreeRange(1);
4347 if (bar_id == -1)
4348 bar_id = 5;
4349 bars.set(bar_id);
4350 emitRdDepBar(insn, bar_id);
4351 if (defi)
4352 live_defs.push_back(LiveBarDef(insn, defi));
4353 }
4354 }
4355
4356 // Remove unnecessary barrier waits.
4357 BitSet alive_bars(6, 1);
4358 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4359 int wr, rd, wt;
4360
4361 next = insn->next;
4362
4363 wr = getWrDepBar(insn);
4364 rd = getRdDepBar(insn);
4365 wt = getWtDepBar(insn);
4366
4367 for (int idx = 0; idx < 6; ++idx) {
4368 if (!(wt & (1 << idx)))
4369 continue;
4370 if (!alive_bars.test(idx)) {
4371 insn->sched &= ~(1 << (11 + idx));
4372 } else {
4373 alive_bars.clr(idx);
4374 }
4375 }
4376
4377 if (wr < 6)
4378 alive_bars.set(wr);
4379 if (rd < 6)
4380 alive_bars.set(rd);
4381 }
4382
4383 return true;
4384 }
4385
4386 bool
4387 SchedDataCalculatorGM107::visit(Function *func)
4388 {
4389 ArrayList insns;
4390
4391 func->orderInstructions(insns);
4392
4393 scoreBoards.resize(func->cfg.getSize());
4394 for (size_t i = 0; i < scoreBoards.size(); ++i)
4395 scoreBoards[i].wipe();
4396 return true;
4397 }
4398
4399 bool
4400 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4401 {
4402 Instruction *insn, *next = NULL;
4403 int cycle = 0;
4404
4405 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4406 /*XXX*/
4407 insn->sched = 0x7e0;
4408 }
4409
4410 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4411 return true;
4412
4413 // Insert read/write dependency barriers for instructions which don't
4414 // operate at a fixed latency.
4415 insertBarriers(bb);
4416
4417 score = &scoreBoards.at(bb->getId());
4418
4419 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4420 // back branches will wait until all target dependencies are satisfied
4421 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4422 continue;
4423 BasicBlock *in = BasicBlock::get(ei.getNode());
4424 score->setMax(&scoreBoards.at(in->getId()));
4425 }
4426
4427 #ifdef GM107_DEBUG_SCHED_DATA
4428 INFO("=== BB:%i initial scores\n", bb->getId());
4429 score->print(cycle);
4430 #endif
4431
4432 // Because barriers are allocated locally (intra-BB), we have to make sure
4433 // that all produced barriers have been consumed before entering inside a
4434 // new basic block. The best way is to do a global allocation pre RA but
4435 // it's really more difficult, especially because of the phi nodes. Anyways,
4436 // it seems like that waiting on a barrier which has already been consumed
4437 // doesn't add any additional cost, it's just not elegant!
4438 Instruction *start = bb->getEntry();
4439 if (start && bb->cfg.incidentCount() > 0) {
4440 for (int b = 0; b < 6; b++)
4441 emitWtDepBar(start, b);
4442 }
4443
4444 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4445 next = insn->next;
4446
4447 commitInsn(insn, cycle);
4448 int delay = calcDelay(next, cycle);
4449 setDelay(insn, delay, next);
4450 cycle += getStall(insn);
4451
4452 setReuseFlag(insn);
4453
4454 // XXX: The yield flag seems to destroy a bunch of things when it is
4455 // set on every instruction, need investigation.
4456 //emitYield(insn);
4457
4458 #ifdef GM107_DEBUG_SCHED_DATA
4459 printSchedInfo(cycle, insn);
4460 insn->print();
4461 next->print();
4462 #endif
4463 }
4464
4465 if (!insn)
4466 return true;
4467 commitInsn(insn, cycle);
4468
4469 int bbDelay = -1;
4470
4471 #ifdef GM107_DEBUG_SCHED_DATA
4472 fprintf(stderr, "last instruction is : ");
4473 insn->print();
4474 fprintf(stderr, "cycle=%d\n", cycle);
4475 #endif
4476
4477 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4478 BasicBlock *out = BasicBlock::get(ei.getNode());
4479
4480 if (ei.getType() != Graph::Edge::BACK) {
4481 // Only test the first instruction of the outgoing block.
4482 next = out->getEntry();
4483 if (next) {
4484 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4485 } else {
4486 // When the outgoing BB is empty, make sure to set the number of
4487 // stall counts needed by the instruction because we don't know the
4488 // next instruction.
4489 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4490 }
4491 } else {
4492 // Wait until all dependencies are satisfied.
4493 const int regsFree = score->getLatest();
4494 next = out->getFirst();
4495 for (int c = cycle; next && c < regsFree; next = next->next) {
4496 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4497 c += getStall(next);
4498 }
4499 next = NULL;
4500 }
4501 }
4502 if (bb->cfg.outgoingCount() != 1)
4503 next = NULL;
4504 setDelay(insn, bbDelay, next);
4505 cycle += getStall(insn);
4506
4507 score->rebase(cycle); // common base for initializing out blocks' scores
4508 return true;
4509 }
4510
4511 /*******************************************************************************
4512 * main
4513 ******************************************************************************/
4514
4515 void
4516 CodeEmitterGM107::prepareEmission(Function *func)
4517 {
4518 SchedDataCalculatorGM107 sched(targGM107);
4519 CodeEmitter::prepareEmission(func);
4520 sched.run(func, true, true);
4521 }
4522
4523 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4524 {
4525 return (size + 23) / 24;
4526 }
4527
4528 void
4529 CodeEmitterGM107::prepareEmission(Program *prog)
4530 {
4531 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4532 !fi.end(); fi.next()) {
4533 Function *func = reinterpret_cast<Function *>(fi.get());
4534 func->binPos = prog->binSize;
4535 prepareEmission(func);
4536
4537 // adjust sizes & positions for schedulding info:
4538 if (prog->getTarget()->hasSWSched) {
4539 uint32_t adjPos = func->binPos;
4540 BasicBlock *bb = NULL;
4541 for (int i = 0; i < func->bbCount; ++i) {
4542 bb = func->bbArray[i];
4543 int32_t adjSize = bb->binSize;
4544 if (adjPos % 32) {
4545 adjSize -= 32 - adjPos % 32;
4546 if (adjSize < 0)
4547 adjSize = 0;
4548 }
4549 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4550 bb->binPos = adjPos;
4551 bb->binSize = adjSize;
4552 adjPos += adjSize;
4553 }
4554 if (bb)
4555 func->binSize = adjPos - func->binPos;
4556 }
4557
4558 prog->binSize += func->binSize;
4559 }
4560 }
4561
4562 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4563 : CodeEmitter(target),
4564 targGM107(target),
4565 writeIssueDelays(target->hasSWSched)
4566 {
4567 code = NULL;
4568 codeSize = codeSizeLimit = 0;
4569 relocInfo = NULL;
4570 }
4571
4572 CodeEmitter *
4573 TargetGM107::createCodeEmitterGM107(Program::Type type)
4574 {
4575 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4576 emit->setProgramType(type);
4577 return emit;
4578 }
4579
4580 } // namespace nv50_ir