freedreno/a6xx: Disable the core layer-size setup.
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitPSETP();
126
127 void emitMOV();
128 void emitS2R();
129 void emitCS2R();
130 void emitF2F();
131 void emitF2I();
132 void emitI2F();
133 void emitI2I();
134 void emitSEL();
135 void emitSHFL();
136
137 void emitDADD();
138 void emitDMUL();
139 void emitDFMA();
140 void emitDMNMX();
141 void emitDSET();
142 void emitDSETP();
143
144 void emitFADD();
145 void emitFMUL();
146 void emitFFMA();
147 void emitMUFU();
148 void emitFMNMX();
149 void emitRRO();
150 void emitFCMP();
151 void emitFSET();
152 void emitFSETP();
153 void emitFSWZADD();
154
155 void emitLOP();
156 void emitNOT();
157 void emitIADD();
158 void emitIMUL();
159 void emitIMAD();
160 void emitISCADD();
161 void emitXMAD();
162 void emitIMNMX();
163 void emitICMP();
164 void emitISET();
165 void emitISETP();
166 void emitSHL();
167 void emitSHR();
168 void emitSHF();
169 void emitPOPC();
170 void emitBFI();
171 void emitBFE();
172 void emitFLO();
173
174 void emitLDSTs(int, DataType);
175 void emitLDSTc(int);
176 void emitLDC();
177 void emitLDL();
178 void emitLDS();
179 void emitLD();
180 void emitSTL();
181 void emitSTS();
182 void emitST();
183 void emitALD();
184 void emitAST();
185 void emitISBERD();
186 void emitAL2P();
187 void emitIPA();
188 void emitATOM();
189 void emitATOMS();
190 void emitRED();
191 void emitCCTL();
192
193 void emitPIXLD();
194
195 void emitTEXs(int);
196 void emitTEX();
197 void emitTEXS();
198 void emitTLD();
199 void emitTLD4();
200 void emitTXD();
201 void emitTXQ();
202 void emitTMML();
203 void emitDEPBAR();
204
205 void emitNOP();
206 void emitKIL();
207 void emitOUT();
208
209 void emitBAR();
210 void emitMEMBAR();
211
212 void emitVOTE();
213
214 void emitSUTarget();
215 void emitSUHandle(const int s);
216 void emitSUSTx();
217 void emitSULDx();
218 void emitSUREDx();
219 };
220
221 /*******************************************************************************
222 * general instruction layout/fields
223 ******************************************************************************/
224
225 void
226 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
227 {
228 if (b >= 0) {
229 uint32_t m = ((1ULL << s) - 1);
230 uint64_t d = (uint64_t)(v & m) << b;
231 assert(!(v & ~m) || (v & ~m) == ~m);
232 data[1] |= d >> 32;
233 data[0] |= d;
234 }
235 }
236
237 void
238 CodeEmitterGM107::emitPred()
239 {
240 if (insn->predSrc >= 0) {
241 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
242 emitField(19, 1, insn->cc == CC_NOT_P);
243 } else {
244 emitField(16, 3, 7);
245 }
246 }
247
248 void
249 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
250 {
251 code[0] = 0x00000000;
252 code[1] = hi;
253 if (pred)
254 emitPred();
255 }
256
257 void
258 CodeEmitterGM107::emitGPR(int pos, const Value *val)
259 {
260 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
261 val->reg.data.id : 255);
262 }
263
264 void
265 CodeEmitterGM107::emitSYS(int pos, const Value *val)
266 {
267 int id = val ? val->reg.data.id : -1;
268
269 switch (id) {
270 case SV_LANEID : id = 0x00; break;
271 case SV_VERTEX_COUNT : id = 0x10; break;
272 case SV_INVOCATION_ID : id = 0x11; break;
273 case SV_THREAD_KILL : id = 0x13; break;
274 case SV_INVOCATION_INFO: id = 0x1d; break;
275 case SV_COMBINED_TID : id = 0x20; break;
276 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
277 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
278 case SV_LANEMASK_EQ : id = 0x38; break;
279 case SV_LANEMASK_LT : id = 0x39; break;
280 case SV_LANEMASK_LE : id = 0x3a; break;
281 case SV_LANEMASK_GT : id = 0x3b; break;
282 case SV_LANEMASK_GE : id = 0x3c; break;
283 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
284 default:
285 assert(!"invalid system value");
286 id = 0;
287 break;
288 }
289
290 emitField(pos, 8, id);
291 }
292
293 void
294 CodeEmitterGM107::emitPRED(int pos, const Value *val)
295 {
296 emitField(pos, 3, val ? val->reg.data.id : 7);
297 }
298
299 void
300 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
301 const ValueRef &ref)
302 {
303 const Value *v = ref.get();
304 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
305 if (gpr >= 0)
306 emitGPR(gpr, ref.getIndirect(0));
307 emitField(off, len, v->reg.data.offset >> shr);
308 }
309
310 void
311 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
312 const ValueRef &ref)
313 {
314 const Value *v = ref.get();
315 const Symbol *s = v->asSym();
316
317 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
318
319 emitField(buf, 5, v->reg.fileIndex);
320 if (gpr >= 0)
321 emitGPR(gpr, ref.getIndirect(0));
322 emitField(off, 16, s->reg.data.offset >> shr);
323 }
324
325 bool
326 CodeEmitterGM107::longIMMD(const ValueRef &ref)
327 {
328 if (ref.getFile() == FILE_IMMEDIATE) {
329 const ImmediateValue *imm = ref.get()->asImm();
330 if (isFloatType(insn->sType))
331 return imm->reg.data.u32 & 0xfff;
332 else
333 return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
334 }
335 return false;
336 }
337
338 void
339 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
340 {
341 const ImmediateValue *imm = ref.get()->asImm();
342 uint32_t val = imm->reg.data.u32;
343
344 if (len == 19) {
345 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
346 assert(!(val & 0x00000fff));
347 val >>= 12;
348 } else if (insn->sType == TYPE_F64) {
349 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
350 val = imm->reg.data.u64 >> 44;
351 } else {
352 assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
353 }
354 emitField( 56, 1, (val & 0x80000) >> 19);
355 emitField(pos, len, (val & 0x7ffff));
356 } else {
357 emitField(pos, len, val);
358 }
359 }
360
361 /*******************************************************************************
362 * modifiers
363 ******************************************************************************/
364
365 void
366 CodeEmitterGM107::emitCond3(int pos, CondCode code)
367 {
368 int data = 0;
369
370 switch (code) {
371 case CC_FL : data = 0x00; break;
372 case CC_LTU:
373 case CC_LT : data = 0x01; break;
374 case CC_EQU:
375 case CC_EQ : data = 0x02; break;
376 case CC_LEU:
377 case CC_LE : data = 0x03; break;
378 case CC_GTU:
379 case CC_GT : data = 0x04; break;
380 case CC_NEU:
381 case CC_NE : data = 0x05; break;
382 case CC_GEU:
383 case CC_GE : data = 0x06; break;
384 case CC_TR : data = 0x07; break;
385 default:
386 assert(!"invalid cond3");
387 break;
388 }
389
390 emitField(pos, 3, data);
391 }
392
393 void
394 CodeEmitterGM107::emitCond4(int pos, CondCode code)
395 {
396 int data = 0;
397
398 switch (code) {
399 case CC_FL: data = 0x00; break;
400 case CC_LT: data = 0x01; break;
401 case CC_EQ: data = 0x02; break;
402 case CC_LE: data = 0x03; break;
403 case CC_GT: data = 0x04; break;
404 case CC_NE: data = 0x05; break;
405 case CC_GE: data = 0x06; break;
406 // case CC_NUM: data = 0x07; break;
407 // case CC_NAN: data = 0x08; break;
408 case CC_LTU: data = 0x09; break;
409 case CC_EQU: data = 0x0a; break;
410 case CC_LEU: data = 0x0b; break;
411 case CC_GTU: data = 0x0c; break;
412 case CC_NEU: data = 0x0d; break;
413 case CC_GEU: data = 0x0e; break;
414 case CC_TR: data = 0x0f; break;
415 default:
416 assert(!"invalid cond4");
417 break;
418 }
419
420 emitField(pos, 4, data);
421 }
422
423 void
424 CodeEmitterGM107::emitO(int pos)
425 {
426 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
427 }
428
429 void
430 CodeEmitterGM107::emitP(int pos)
431 {
432 emitField(pos, 1, insn->perPatch);
433 }
434
435 void
436 CodeEmitterGM107::emitSAT(int pos)
437 {
438 emitField(pos, 1, insn->saturate);
439 }
440
441 void
442 CodeEmitterGM107::emitCC(int pos)
443 {
444 emitField(pos, 1, insn->flagsDef >= 0);
445 }
446
447 void
448 CodeEmitterGM107::emitX(int pos)
449 {
450 emitField(pos, 1, insn->flagsSrc >= 0);
451 }
452
453 void
454 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
455 {
456 emitField(pos, 1, ref.mod.abs());
457 }
458
459 void
460 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
461 {
462 emitField(pos, 1, ref.mod.neg());
463 }
464
465 void
466 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
467 {
468 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
469 }
470
471 void
472 CodeEmitterGM107::emitFMZ(int pos, int len)
473 {
474 emitField(pos, len, insn->dnz << 1 | insn->ftz);
475 }
476
477 void
478 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
479 {
480 int rm = 0, ri = 0;
481 switch (rnd) {
482 case ROUND_NI: ri = 1;
483 case ROUND_N : rm = 0; break;
484 case ROUND_MI: ri = 1;
485 case ROUND_M : rm = 1; break;
486 case ROUND_PI: ri = 1;
487 case ROUND_P : rm = 2; break;
488 case ROUND_ZI: ri = 1;
489 case ROUND_Z : rm = 3; break;
490 default:
491 assert(!"invalid round mode");
492 break;
493 }
494 emitField(rip, 1, ri);
495 emitField(rmp, 2, rm);
496 }
497
498 void
499 CodeEmitterGM107::emitPDIV(int pos)
500 {
501 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
502 if (insn->postFactor > 0)
503 emitField(pos, 3, 7 - insn->postFactor);
504 else
505 emitField(pos, 3, 0 - insn->postFactor);
506 }
507
508 void
509 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
510 {
511 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
512 }
513
514 /*******************************************************************************
515 * control flow
516 ******************************************************************************/
517
518 void
519 CodeEmitterGM107::emitEXIT()
520 {
521 emitInsn (0xe3000000);
522 emitCond5(0x00, CC_TR);
523 }
524
525 void
526 CodeEmitterGM107::emitBRA()
527 {
528 const FlowInstruction *insn = this->insn->asFlow();
529 int gpr = -1;
530
531 if (insn->indirect) {
532 if (insn->absolute)
533 emitInsn(0xe2000000); // JMX
534 else
535 emitInsn(0xe2500000); // BRX
536 gpr = 0x08;
537 } else {
538 if (insn->absolute)
539 emitInsn(0xe2100000); // JMP
540 else
541 emitInsn(0xe2400000); // BRA
542 emitField(0x07, 1, insn->allWarp);
543 }
544
545 emitField(0x06, 1, insn->limit);
546 emitCond5(0x00, CC_TR);
547
548 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
549 int32_t pos = insn->target.bb->binPos;
550 if (writeIssueDelays && !(pos & 0x1f))
551 pos += 8;
552 if (!insn->absolute)
553 emitField(0x14, 24, pos - (codeSize + 8));
554 else
555 emitField(0x14, 32, pos);
556 } else {
557 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
558 emitField(0x05, 1, 1);
559 }
560 }
561
562 void
563 CodeEmitterGM107::emitCAL()
564 {
565 const FlowInstruction *insn = this->insn->asFlow();
566
567 if (insn->absolute) {
568 emitInsn(0xe2200000, 0); // JCAL
569 } else {
570 emitInsn(0xe2600000, 0); // CAL
571 }
572
573 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
574 if (!insn->absolute)
575 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
576 else {
577 if (insn->builtin) {
578 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
579 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
580 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
581 } else {
582 emitField(0x14, 32, insn->target.bb->binPos);
583 }
584 }
585 } else {
586 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
587 emitField(0x05, 1, 1);
588 }
589 }
590
591 void
592 CodeEmitterGM107::emitPCNT()
593 {
594 const FlowInstruction *insn = this->insn->asFlow();
595
596 emitInsn(0xe2b00000, 0);
597
598 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
599 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
600 } else {
601 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
602 emitField(0x05, 1, 1);
603 }
604 }
605
606 void
607 CodeEmitterGM107::emitCONT()
608 {
609 emitInsn (0xe3500000);
610 emitCond5(0x00, CC_TR);
611 }
612
613 void
614 CodeEmitterGM107::emitPBK()
615 {
616 const FlowInstruction *insn = this->insn->asFlow();
617
618 emitInsn(0xe2a00000, 0);
619
620 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
621 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
622 } else {
623 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
624 emitField(0x05, 1, 1);
625 }
626 }
627
628 void
629 CodeEmitterGM107::emitBRK()
630 {
631 emitInsn (0xe3400000);
632 emitCond5(0x00, CC_TR);
633 }
634
635 void
636 CodeEmitterGM107::emitPRET()
637 {
638 const FlowInstruction *insn = this->insn->asFlow();
639
640 emitInsn(0xe2700000, 0);
641
642 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
643 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
644 } else {
645 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
646 emitField(0x05, 1, 1);
647 }
648 }
649
650 void
651 CodeEmitterGM107::emitRET()
652 {
653 emitInsn (0xe3200000);
654 emitCond5(0x00, CC_TR);
655 }
656
657 void
658 CodeEmitterGM107::emitSSY()
659 {
660 const FlowInstruction *insn = this->insn->asFlow();
661
662 emitInsn(0xe2900000, 0);
663
664 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
665 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
666 } else {
667 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
668 emitField(0x05, 1, 1);
669 }
670 }
671
672 void
673 CodeEmitterGM107::emitSYNC()
674 {
675 emitInsn (0xf0f80000);
676 emitCond5(0x00, CC_TR);
677 }
678
679 void
680 CodeEmitterGM107::emitSAM()
681 {
682 emitInsn(0xe3700000, 0);
683 }
684
685 void
686 CodeEmitterGM107::emitRAM()
687 {
688 emitInsn(0xe3800000, 0);
689 }
690
691 /*******************************************************************************
692 * predicate/cc
693 ******************************************************************************/
694
695 void
696 CodeEmitterGM107::emitPSETP()
697 {
698
699 emitInsn(0x50900000);
700
701 switch (insn->op) {
702 case OP_AND: emitField(0x18, 3, 0); break;
703 case OP_OR: emitField(0x18, 3, 1); break;
704 case OP_XOR: emitField(0x18, 3, 2); break;
705 default:
706 assert(!"unexpected operation");
707 break;
708 }
709
710 // emitINV (0x2a);
711 emitPRED(0x27); // TODO: support 3-arg
712 emitINV (0x20, insn->src(1));
713 emitPRED(0x1d, insn->src(1));
714 emitINV (0x0f, insn->src(0));
715 emitPRED(0x0c, insn->src(0));
716 emitPRED(0x03, insn->def(0));
717 emitPRED(0x00);
718 }
719
720 /*******************************************************************************
721 * movement / conversion
722 ******************************************************************************/
723
724 void
725 CodeEmitterGM107::emitMOV()
726 {
727 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
728 switch (insn->src(0).getFile()) {
729 case FILE_GPR:
730 if (insn->def(0).getFile() == FILE_PREDICATE) {
731 emitInsn(0x5b6a0000);
732 emitGPR (0x08);
733 } else {
734 emitInsn(0x5c980000);
735 }
736 emitGPR (0x14, insn->src(0));
737 break;
738 case FILE_MEMORY_CONST:
739 emitInsn(0x4c980000);
740 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
741 break;
742 case FILE_IMMEDIATE:
743 emitInsn(0x38980000);
744 emitIMMD(0x14, 19, insn->src(0));
745 break;
746 case FILE_PREDICATE:
747 emitInsn(0x50880000);
748 emitPRED(0x0c, insn->src(0));
749 emitPRED(0x1d);
750 emitPRED(0x27);
751 break;
752 default:
753 assert(!"bad src file");
754 break;
755 }
756 if (insn->def(0).getFile() != FILE_PREDICATE &&
757 insn->src(0).getFile() != FILE_PREDICATE)
758 emitField(0x27, 4, insn->lanes);
759 } else {
760 emitInsn (0x01000000);
761 emitIMMD (0x14, 32, insn->src(0));
762 emitField(0x0c, 4, insn->lanes);
763 }
764
765 if (insn->def(0).getFile() == FILE_PREDICATE) {
766 emitPRED(0x27);
767 emitPRED(0x03, insn->def(0));
768 emitPRED(0x00);
769 } else {
770 emitGPR(0x00, insn->def(0));
771 }
772 }
773
774 void
775 CodeEmitterGM107::emitS2R()
776 {
777 emitInsn(0xf0c80000);
778 emitSYS (0x14, insn->src(0));
779 emitGPR (0x00, insn->def(0));
780 }
781
782 void
783 CodeEmitterGM107::emitCS2R()
784 {
785 emitInsn(0x50c80000);
786 emitSYS (0x14, insn->src(0));
787 emitGPR (0x00, insn->def(0));
788 }
789
790 void
791 CodeEmitterGM107::emitF2F()
792 {
793 RoundMode rnd = insn->rnd;
794
795 switch (insn->op) {
796 case OP_FLOOR: rnd = ROUND_MI; break;
797 case OP_CEIL : rnd = ROUND_PI; break;
798 case OP_TRUNC: rnd = ROUND_ZI; break;
799 default:
800 break;
801 }
802
803 switch (insn->src(0).getFile()) {
804 case FILE_GPR:
805 emitInsn(0x5ca80000);
806 emitGPR (0x14, insn->src(0));
807 break;
808 case FILE_MEMORY_CONST:
809 emitInsn(0x4ca80000);
810 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
811 break;
812 case FILE_IMMEDIATE:
813 emitInsn(0x38a80000);
814 emitIMMD(0x14, 19, insn->src(0));
815 break;
816 default:
817 assert(!"bad src0 file");
818 break;
819 }
820
821 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
822 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
823 emitCC (0x2f);
824 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
825 emitFMZ (0x2c, 1);
826 emitField(0x29, 1, insn->subOp);
827 emitRND (0x27, rnd, 0x2a);
828 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
829 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
830 emitGPR (0x00, insn->def(0));
831 }
832
833 void
834 CodeEmitterGM107::emitF2I()
835 {
836 RoundMode rnd = insn->rnd;
837
838 switch (insn->op) {
839 case OP_FLOOR: rnd = ROUND_M; break;
840 case OP_CEIL : rnd = ROUND_P; break;
841 case OP_TRUNC: rnd = ROUND_Z; break;
842 default:
843 break;
844 }
845
846 switch (insn->src(0).getFile()) {
847 case FILE_GPR:
848 emitInsn(0x5cb00000);
849 emitGPR (0x14, insn->src(0));
850 break;
851 case FILE_MEMORY_CONST:
852 emitInsn(0x4cb00000);
853 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
854 break;
855 case FILE_IMMEDIATE:
856 emitInsn(0x38b00000);
857 emitIMMD(0x14, 19, insn->src(0));
858 break;
859 default:
860 assert(!"bad src0 file");
861 break;
862 }
863
864 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
865 emitCC (0x2f);
866 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
867 emitFMZ (0x2c, 1);
868 emitRND (0x27, rnd, 0x2a);
869 emitField(0x0c, 1, isSignedType(insn->dType));
870 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
871 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
872 emitGPR (0x00, insn->def(0));
873 }
874
875 void
876 CodeEmitterGM107::emitI2F()
877 {
878 RoundMode rnd = insn->rnd;
879
880 switch (insn->op) {
881 case OP_FLOOR: rnd = ROUND_M; break;
882 case OP_CEIL : rnd = ROUND_P; break;
883 case OP_TRUNC: rnd = ROUND_Z; break;
884 default:
885 break;
886 }
887
888 switch (insn->src(0).getFile()) {
889 case FILE_GPR:
890 emitInsn(0x5cb80000);
891 emitGPR (0x14, insn->src(0));
892 break;
893 case FILE_MEMORY_CONST:
894 emitInsn(0x4cb80000);
895 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
896 break;
897 case FILE_IMMEDIATE:
898 emitInsn(0x38b80000);
899 emitIMMD(0x14, 19, insn->src(0));
900 break;
901 default:
902 assert(!"bad src0 file");
903 break;
904 }
905
906 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
907 emitCC (0x2f);
908 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
909 emitField(0x29, 2, insn->subOp);
910 emitRND (0x27, rnd, -1);
911 emitField(0x0d, 1, isSignedType(insn->sType));
912 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
913 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
914 emitGPR (0x00, insn->def(0));
915 }
916
917 void
918 CodeEmitterGM107::emitI2I()
919 {
920 switch (insn->src(0).getFile()) {
921 case FILE_GPR:
922 emitInsn(0x5ce00000);
923 emitGPR (0x14, insn->src(0));
924 break;
925 case FILE_MEMORY_CONST:
926 emitInsn(0x4ce00000);
927 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
928 break;
929 case FILE_IMMEDIATE:
930 emitInsn(0x38e00000);
931 emitIMMD(0x14, 19, insn->src(0));
932 break;
933 default:
934 assert(!"bad src0 file");
935 break;
936 }
937
938 emitSAT (0x32);
939 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
940 emitCC (0x2f);
941 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
942 emitField(0x29, 2, insn->subOp);
943 emitField(0x0d, 1, isSignedType(insn->sType));
944 emitField(0x0c, 1, isSignedType(insn->dType));
945 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
946 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
947 emitGPR (0x00, insn->def(0));
948 }
949
950 static void
951 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
952 {
953 int loc = entry->loc;
954 if (data.force_persample_interp)
955 code[loc + 1] |= 1 << 10;
956 else
957 code[loc + 1] &= ~(1 << 10);
958 }
959
960 void
961 CodeEmitterGM107::emitSEL()
962 {
963 switch (insn->src(1).getFile()) {
964 case FILE_GPR:
965 emitInsn(0x5ca00000);
966 emitGPR (0x14, insn->src(1));
967 break;
968 case FILE_MEMORY_CONST:
969 emitInsn(0x4ca00000);
970 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
971 break;
972 case FILE_IMMEDIATE:
973 emitInsn(0x38a00000);
974 emitIMMD(0x14, 19, insn->src(1));
975 break;
976 default:
977 assert(!"bad src1 file");
978 break;
979 }
980
981 emitINV (0x2a, insn->src(2));
982 emitPRED(0x27, insn->src(2));
983 emitGPR (0x08, insn->src(0));
984 emitGPR (0x00, insn->def(0));
985
986 if (insn->subOp == 1) {
987 addInterp(0, 0, selpFlip);
988 }
989 }
990
991 void
992 CodeEmitterGM107::emitSHFL()
993 {
994 int type = 0;
995
996 emitInsn (0xef100000);
997
998 switch (insn->src(1).getFile()) {
999 case FILE_GPR:
1000 emitGPR(0x14, insn->src(1));
1001 break;
1002 case FILE_IMMEDIATE:
1003 emitIMMD(0x14, 5, insn->src(1));
1004 type |= 1;
1005 break;
1006 default:
1007 assert(!"invalid src1 file");
1008 break;
1009 }
1010
1011 switch (insn->src(2).getFile()) {
1012 case FILE_GPR:
1013 emitGPR(0x27, insn->src(2));
1014 break;
1015 case FILE_IMMEDIATE:
1016 emitIMMD(0x22, 13, insn->src(2));
1017 type |= 2;
1018 break;
1019 default:
1020 assert(!"invalid src2 file");
1021 break;
1022 }
1023
1024 if (!insn->defExists(1))
1025 emitPRED(0x30);
1026 else {
1027 assert(insn->def(1).getFile() == FILE_PREDICATE);
1028 emitPRED(0x30, insn->def(1));
1029 }
1030
1031 emitField(0x1e, 2, insn->subOp);
1032 emitField(0x1c, 2, type);
1033 emitGPR (0x08, insn->src(0));
1034 emitGPR (0x00, insn->def(0));
1035 }
1036
1037 /*******************************************************************************
1038 * double
1039 ******************************************************************************/
1040
1041 void
1042 CodeEmitterGM107::emitDADD()
1043 {
1044 switch (insn->src(1).getFile()) {
1045 case FILE_GPR:
1046 emitInsn(0x5c700000);
1047 emitGPR (0x14, insn->src(1));
1048 break;
1049 case FILE_MEMORY_CONST:
1050 emitInsn(0x4c700000);
1051 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1052 break;
1053 case FILE_IMMEDIATE:
1054 emitInsn(0x38700000);
1055 emitIMMD(0x14, 19, insn->src(1));
1056 break;
1057 default:
1058 assert(!"bad src1 file");
1059 break;
1060 }
1061 emitABS(0x31, insn->src(1));
1062 emitNEG(0x30, insn->src(0));
1063 emitCC (0x2f);
1064 emitABS(0x2e, insn->src(0));
1065 emitNEG(0x2d, insn->src(1));
1066
1067 if (insn->op == OP_SUB)
1068 code[1] ^= 0x00002000;
1069
1070 emitGPR(0x08, insn->src(0));
1071 emitGPR(0x00, insn->def(0));
1072 }
1073
1074 void
1075 CodeEmitterGM107::emitDMUL()
1076 {
1077 switch (insn->src(1).getFile()) {
1078 case FILE_GPR:
1079 emitInsn(0x5c800000);
1080 emitGPR (0x14, insn->src(1));
1081 break;
1082 case FILE_MEMORY_CONST:
1083 emitInsn(0x4c800000);
1084 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1085 break;
1086 case FILE_IMMEDIATE:
1087 emitInsn(0x38800000);
1088 emitIMMD(0x14, 19, insn->src(1));
1089 break;
1090 default:
1091 assert(!"bad src1 file");
1092 break;
1093 }
1094
1095 emitNEG2(0x30, insn->src(0), insn->src(1));
1096 emitCC (0x2f);
1097 emitRND (0x27);
1098 emitGPR (0x08, insn->src(0));
1099 emitGPR (0x00, insn->def(0));
1100 }
1101
1102 void
1103 CodeEmitterGM107::emitDFMA()
1104 {
1105 switch(insn->src(2).getFile()) {
1106 case FILE_GPR:
1107 switch (insn->src(1).getFile()) {
1108 case FILE_GPR:
1109 emitInsn(0x5b700000);
1110 emitGPR (0x14, insn->src(1));
1111 break;
1112 case FILE_MEMORY_CONST:
1113 emitInsn(0x4b700000);
1114 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1115 break;
1116 case FILE_IMMEDIATE:
1117 emitInsn(0x36700000);
1118 emitIMMD(0x14, 19, insn->src(1));
1119 break;
1120 default:
1121 assert(!"bad src1 file");
1122 break;
1123 }
1124 emitGPR (0x27, insn->src(2));
1125 break;
1126 case FILE_MEMORY_CONST:
1127 emitInsn(0x53700000);
1128 emitGPR (0x27, insn->src(1));
1129 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1130 break;
1131 default:
1132 assert(!"bad src2 file");
1133 break;
1134 }
1135
1136 emitRND (0x32);
1137 emitNEG (0x31, insn->src(2));
1138 emitNEG2(0x30, insn->src(0), insn->src(1));
1139 emitCC (0x2f);
1140 emitGPR (0x08, insn->src(0));
1141 emitGPR (0x00, insn->def(0));
1142 }
1143
1144 void
1145 CodeEmitterGM107::emitDMNMX()
1146 {
1147 switch (insn->src(1).getFile()) {
1148 case FILE_GPR:
1149 emitInsn(0x5c500000);
1150 emitGPR (0x14, insn->src(1));
1151 break;
1152 case FILE_MEMORY_CONST:
1153 emitInsn(0x4c500000);
1154 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1155 break;
1156 case FILE_IMMEDIATE:
1157 emitInsn(0x38500000);
1158 emitIMMD(0x14, 19, insn->src(1));
1159 break;
1160 default:
1161 assert(!"bad src1 file");
1162 break;
1163 }
1164
1165 emitABS (0x31, insn->src(1));
1166 emitNEG (0x30, insn->src(0));
1167 emitCC (0x2f);
1168 emitABS (0x2e, insn->src(0));
1169 emitNEG (0x2d, insn->src(1));
1170 emitField(0x2a, 1, insn->op == OP_MAX);
1171 emitPRED (0x27);
1172 emitGPR (0x08, insn->src(0));
1173 emitGPR (0x00, insn->def(0));
1174 }
1175
1176 void
1177 CodeEmitterGM107::emitDSET()
1178 {
1179 const CmpInstruction *insn = this->insn->asCmp();
1180
1181 switch (insn->src(1).getFile()) {
1182 case FILE_GPR:
1183 emitInsn(0x59000000);
1184 emitGPR (0x14, insn->src(1));
1185 break;
1186 case FILE_MEMORY_CONST:
1187 emitInsn(0x49000000);
1188 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1189 break;
1190 case FILE_IMMEDIATE:
1191 emitInsn(0x32000000);
1192 emitIMMD(0x14, 19, insn->src(1));
1193 break;
1194 default:
1195 assert(!"bad src1 file");
1196 break;
1197 }
1198
1199 if (insn->op != OP_SET) {
1200 switch (insn->op) {
1201 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1202 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1203 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1204 default:
1205 assert(!"invalid set op");
1206 break;
1207 }
1208 emitPRED(0x27, insn->src(2));
1209 } else {
1210 emitPRED(0x27);
1211 }
1212
1213 emitABS (0x36, insn->src(0));
1214 emitNEG (0x35, insn->src(1));
1215 emitField(0x34, 1, insn->dType == TYPE_F32);
1216 emitCond4(0x30, insn->setCond);
1217 emitCC (0x2f);
1218 emitABS (0x2c, insn->src(1));
1219 emitNEG (0x2b, insn->src(0));
1220 emitGPR (0x08, insn->src(0));
1221 emitGPR (0x00, insn->def(0));
1222 }
1223
1224 void
1225 CodeEmitterGM107::emitDSETP()
1226 {
1227 const CmpInstruction *insn = this->insn->asCmp();
1228
1229 switch (insn->src(1).getFile()) {
1230 case FILE_GPR:
1231 emitInsn(0x5b800000);
1232 emitGPR (0x14, insn->src(1));
1233 break;
1234 case FILE_MEMORY_CONST:
1235 emitInsn(0x4b800000);
1236 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1237 break;
1238 case FILE_IMMEDIATE:
1239 emitInsn(0x36800000);
1240 emitIMMD(0x14, 19, insn->src(1));
1241 break;
1242 default:
1243 assert(!"bad src1 file");
1244 break;
1245 }
1246
1247 if (insn->op != OP_SET) {
1248 switch (insn->op) {
1249 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1250 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1251 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1252 default:
1253 assert(!"invalid set op");
1254 break;
1255 }
1256 emitPRED(0x27, insn->src(2));
1257 } else {
1258 emitPRED(0x27);
1259 }
1260
1261 emitCond4(0x30, insn->setCond);
1262 emitABS (0x2c, insn->src(1));
1263 emitNEG (0x2b, insn->src(0));
1264 emitGPR (0x08, insn->src(0));
1265 emitABS (0x07, insn->src(0));
1266 emitNEG (0x06, insn->src(1));
1267 emitPRED (0x03, insn->def(0));
1268 if (insn->defExists(1))
1269 emitPRED(0x00, insn->def(1));
1270 else
1271 emitPRED(0x00);
1272 }
1273
1274 /*******************************************************************************
1275 * float
1276 ******************************************************************************/
1277
1278 void
1279 CodeEmitterGM107::emitFADD()
1280 {
1281 if (!longIMMD(insn->src(1))) {
1282 switch (insn->src(1).getFile()) {
1283 case FILE_GPR:
1284 emitInsn(0x5c580000);
1285 emitGPR (0x14, insn->src(1));
1286 break;
1287 case FILE_MEMORY_CONST:
1288 emitInsn(0x4c580000);
1289 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1290 break;
1291 case FILE_IMMEDIATE:
1292 emitInsn(0x38580000);
1293 emitIMMD(0x14, 19, insn->src(1));
1294 break;
1295 default:
1296 assert(!"bad src1 file");
1297 break;
1298 }
1299 emitSAT(0x32);
1300 emitABS(0x31, insn->src(1));
1301 emitNEG(0x30, insn->src(0));
1302 emitCC (0x2f);
1303 emitABS(0x2e, insn->src(0));
1304 emitNEG(0x2d, insn->src(1));
1305 emitFMZ(0x2c, 1);
1306
1307 if (insn->op == OP_SUB)
1308 code[1] ^= 0x00002000;
1309 } else {
1310 emitInsn(0x08000000);
1311 emitABS(0x39, insn->src(1));
1312 emitNEG(0x38, insn->src(0));
1313 emitFMZ(0x37, 1);
1314 emitABS(0x36, insn->src(0));
1315 emitNEG(0x35, insn->src(1));
1316 emitCC (0x34);
1317 emitIMMD(0x14, 32, insn->src(1));
1318
1319 if (insn->op == OP_SUB)
1320 code[1] ^= 0x00080000;
1321 }
1322
1323 emitGPR(0x08, insn->src(0));
1324 emitGPR(0x00, insn->def(0));
1325 }
1326
1327 void
1328 CodeEmitterGM107::emitFMUL()
1329 {
1330 if (!longIMMD(insn->src(1))) {
1331 switch (insn->src(1).getFile()) {
1332 case FILE_GPR:
1333 emitInsn(0x5c680000);
1334 emitGPR (0x14, insn->src(1));
1335 break;
1336 case FILE_MEMORY_CONST:
1337 emitInsn(0x4c680000);
1338 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1339 break;
1340 case FILE_IMMEDIATE:
1341 emitInsn(0x38680000);
1342 emitIMMD(0x14, 19, insn->src(1));
1343 break;
1344 default:
1345 assert(!"bad src1 file");
1346 break;
1347 }
1348 emitSAT (0x32);
1349 emitNEG2(0x30, insn->src(0), insn->src(1));
1350 emitCC (0x2f);
1351 emitFMZ (0x2c, 2);
1352 emitPDIV(0x29);
1353 emitRND (0x27);
1354 } else {
1355 emitInsn(0x1e000000);
1356 emitSAT (0x37);
1357 emitFMZ (0x35, 2);
1358 emitCC (0x34);
1359 emitIMMD(0x14, 32, insn->src(1));
1360 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1361 code[1] ^= 0x00080000; /* flip immd sign bit */
1362 }
1363
1364 emitGPR(0x08, insn->src(0));
1365 emitGPR(0x00, insn->def(0));
1366 }
1367
1368 void
1369 CodeEmitterGM107::emitFFMA()
1370 {
1371 bool isLongIMMD = false;
1372 switch(insn->src(2).getFile()) {
1373 case FILE_GPR:
1374 switch (insn->src(1).getFile()) {
1375 case FILE_GPR:
1376 emitInsn(0x59800000);
1377 emitGPR (0x14, insn->src(1));
1378 break;
1379 case FILE_MEMORY_CONST:
1380 emitInsn(0x49800000);
1381 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1382 break;
1383 case FILE_IMMEDIATE:
1384 if (longIMMD(insn->getSrc(1))) {
1385 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1386 isLongIMMD = true;
1387 emitInsn(0x0c000000);
1388 emitIMMD(0x14, 32, insn->src(1));
1389 } else {
1390 emitInsn(0x32800000);
1391 emitIMMD(0x14, 19, insn->src(1));
1392 }
1393 break;
1394 default:
1395 assert(!"bad src1 file");
1396 break;
1397 }
1398 if (!isLongIMMD)
1399 emitGPR (0x27, insn->src(2));
1400 break;
1401 case FILE_MEMORY_CONST:
1402 emitInsn(0x51800000);
1403 emitGPR (0x27, insn->src(1));
1404 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1405 break;
1406 default:
1407 assert(!"bad src2 file");
1408 break;
1409 }
1410
1411 if (isLongIMMD) {
1412 emitNEG (0x39, insn->src(2));
1413 emitNEG2(0x38, insn->src(0), insn->src(1));
1414 emitSAT (0x37);
1415 emitCC (0x34);
1416 } else {
1417 emitRND (0x33);
1418 emitSAT (0x32);
1419 emitNEG (0x31, insn->src(2));
1420 emitNEG2(0x30, insn->src(0), insn->src(1));
1421 emitCC (0x2f);
1422 }
1423
1424 emitFMZ(0x35, 2);
1425 emitGPR(0x08, insn->src(0));
1426 emitGPR(0x00, insn->def(0));
1427 }
1428
1429 void
1430 CodeEmitterGM107::emitMUFU()
1431 {
1432 int mufu = 0;
1433
1434 switch (insn->op) {
1435 case OP_COS: mufu = 0; break;
1436 case OP_SIN: mufu = 1; break;
1437 case OP_EX2: mufu = 2; break;
1438 case OP_LG2: mufu = 3; break;
1439 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1440 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1441 case OP_SQRT: mufu = 8; break;
1442 default:
1443 assert(!"invalid mufu");
1444 break;
1445 }
1446
1447 emitInsn (0x50800000);
1448 emitSAT (0x32);
1449 emitNEG (0x30, insn->src(0));
1450 emitABS (0x2e, insn->src(0));
1451 emitField(0x14, 4, mufu);
1452 emitGPR (0x08, insn->src(0));
1453 emitGPR (0x00, insn->def(0));
1454 }
1455
1456 void
1457 CodeEmitterGM107::emitFMNMX()
1458 {
1459 switch (insn->src(1).getFile()) {
1460 case FILE_GPR:
1461 emitInsn(0x5c600000);
1462 emitGPR (0x14, insn->src(1));
1463 break;
1464 case FILE_MEMORY_CONST:
1465 emitInsn(0x4c600000);
1466 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1467 break;
1468 case FILE_IMMEDIATE:
1469 emitInsn(0x38600000);
1470 emitIMMD(0x14, 19, insn->src(1));
1471 break;
1472 default:
1473 assert(!"bad src1 file");
1474 break;
1475 }
1476
1477 emitField(0x2a, 1, insn->op == OP_MAX);
1478 emitPRED (0x27);
1479
1480 emitABS(0x31, insn->src(1));
1481 emitNEG(0x30, insn->src(0));
1482 emitCC (0x2f);
1483 emitABS(0x2e, insn->src(0));
1484 emitNEG(0x2d, insn->src(1));
1485 emitFMZ(0x2c, 1);
1486 emitGPR(0x08, insn->src(0));
1487 emitGPR(0x00, insn->def(0));
1488 }
1489
1490 void
1491 CodeEmitterGM107::emitRRO()
1492 {
1493 switch (insn->src(0).getFile()) {
1494 case FILE_GPR:
1495 emitInsn(0x5c900000);
1496 emitGPR (0x14, insn->src(0));
1497 break;
1498 case FILE_MEMORY_CONST:
1499 emitInsn(0x4c900000);
1500 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1501 break;
1502 case FILE_IMMEDIATE:
1503 emitInsn(0x38900000);
1504 emitIMMD(0x14, 19, insn->src(0));
1505 break;
1506 default:
1507 assert(!"bad src file");
1508 break;
1509 }
1510
1511 emitABS (0x31, insn->src(0));
1512 emitNEG (0x2d, insn->src(0));
1513 emitField(0x27, 1, insn->op == OP_PREEX2);
1514 emitGPR (0x00, insn->def(0));
1515 }
1516
1517 void
1518 CodeEmitterGM107::emitFCMP()
1519 {
1520 const CmpInstruction *insn = this->insn->asCmp();
1521 CondCode cc = insn->setCond;
1522
1523 if (insn->src(2).mod.neg())
1524 cc = reverseCondCode(cc);
1525
1526 switch(insn->src(2).getFile()) {
1527 case FILE_GPR:
1528 switch (insn->src(1).getFile()) {
1529 case FILE_GPR:
1530 emitInsn(0x5ba00000);
1531 emitGPR (0x14, insn->src(1));
1532 break;
1533 case FILE_MEMORY_CONST:
1534 emitInsn(0x4ba00000);
1535 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1536 break;
1537 case FILE_IMMEDIATE:
1538 emitInsn(0x36a00000);
1539 emitIMMD(0x14, 19, insn->src(1));
1540 break;
1541 default:
1542 assert(!"bad src1 file");
1543 break;
1544 }
1545 emitGPR (0x27, insn->src(2));
1546 break;
1547 case FILE_MEMORY_CONST:
1548 emitInsn(0x53a00000);
1549 emitGPR (0x27, insn->src(1));
1550 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1551 break;
1552 default:
1553 assert(!"bad src2 file");
1554 break;
1555 }
1556
1557 emitCond4(0x30, cc);
1558 emitFMZ (0x2f, 1);
1559 emitGPR (0x08, insn->src(0));
1560 emitGPR (0x00, insn->def(0));
1561 }
1562
1563 void
1564 CodeEmitterGM107::emitFSET()
1565 {
1566 const CmpInstruction *insn = this->insn->asCmp();
1567
1568 switch (insn->src(1).getFile()) {
1569 case FILE_GPR:
1570 emitInsn(0x58000000);
1571 emitGPR (0x14, insn->src(1));
1572 break;
1573 case FILE_MEMORY_CONST:
1574 emitInsn(0x48000000);
1575 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1576 break;
1577 case FILE_IMMEDIATE:
1578 emitInsn(0x30000000);
1579 emitIMMD(0x14, 19, insn->src(1));
1580 break;
1581 default:
1582 assert(!"bad src1 file");
1583 break;
1584 }
1585
1586 if (insn->op != OP_SET) {
1587 switch (insn->op) {
1588 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1589 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1590 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1591 default:
1592 assert(!"invalid set op");
1593 break;
1594 }
1595 emitPRED(0x27, insn->src(2));
1596 } else {
1597 emitPRED(0x27);
1598 }
1599
1600 emitFMZ (0x37, 1);
1601 emitABS (0x36, insn->src(0));
1602 emitNEG (0x35, insn->src(1));
1603 emitField(0x34, 1, insn->dType == TYPE_F32);
1604 emitCond4(0x30, insn->setCond);
1605 emitCC (0x2f);
1606 emitABS (0x2c, insn->src(1));
1607 emitNEG (0x2b, insn->src(0));
1608 emitGPR (0x08, insn->src(0));
1609 emitGPR (0x00, insn->def(0));
1610 }
1611
1612 void
1613 CodeEmitterGM107::emitFSETP()
1614 {
1615 const CmpInstruction *insn = this->insn->asCmp();
1616
1617 switch (insn->src(1).getFile()) {
1618 case FILE_GPR:
1619 emitInsn(0x5bb00000);
1620 emitGPR (0x14, insn->src(1));
1621 break;
1622 case FILE_MEMORY_CONST:
1623 emitInsn(0x4bb00000);
1624 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1625 break;
1626 case FILE_IMMEDIATE:
1627 emitInsn(0x36b00000);
1628 emitIMMD(0x14, 19, insn->src(1));
1629 break;
1630 default:
1631 assert(!"bad src1 file");
1632 break;
1633 }
1634
1635 if (insn->op != OP_SET) {
1636 switch (insn->op) {
1637 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1638 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1639 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1640 default:
1641 assert(!"invalid set op");
1642 break;
1643 }
1644 emitPRED(0x27, insn->src(2));
1645 } else {
1646 emitPRED(0x27);
1647 }
1648
1649 emitCond4(0x30, insn->setCond);
1650 emitFMZ (0x2f, 1);
1651 emitABS (0x2c, insn->src(1));
1652 emitNEG (0x2b, insn->src(0));
1653 emitGPR (0x08, insn->src(0));
1654 emitABS (0x07, insn->src(0));
1655 emitNEG (0x06, insn->src(1));
1656 emitPRED (0x03, insn->def(0));
1657 if (insn->defExists(1))
1658 emitPRED(0x00, insn->def(1));
1659 else
1660 emitPRED(0x00);
1661 }
1662
1663 void
1664 CodeEmitterGM107::emitFSWZADD()
1665 {
1666 emitInsn (0x50f80000);
1667 emitCC (0x2f);
1668 emitFMZ (0x2c, 1);
1669 emitRND (0x27);
1670 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1671 emitField(0x1c, 8, insn->subOp);
1672 if (insn->predSrc != 1)
1673 emitGPR (0x14, insn->src(1));
1674 else
1675 emitGPR (0x14);
1676 emitGPR (0x08, insn->src(0));
1677 emitGPR (0x00, insn->def(0));
1678 }
1679
1680 /*******************************************************************************
1681 * integer
1682 ******************************************************************************/
1683
1684 void
1685 CodeEmitterGM107::emitLOP()
1686 {
1687 int lop = 0;
1688
1689 switch (insn->op) {
1690 case OP_AND: lop = 0; break;
1691 case OP_OR : lop = 1; break;
1692 case OP_XOR: lop = 2; break;
1693 default:
1694 assert(!"invalid lop");
1695 break;
1696 }
1697
1698 if (!longIMMD(insn->src(1))) {
1699 switch (insn->src(1).getFile()) {
1700 case FILE_GPR:
1701 emitInsn(0x5c400000);
1702 emitGPR (0x14, insn->src(1));
1703 break;
1704 case FILE_MEMORY_CONST:
1705 emitInsn(0x4c400000);
1706 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1707 break;
1708 case FILE_IMMEDIATE:
1709 emitInsn(0x38400000);
1710 emitIMMD(0x14, 19, insn->src(1));
1711 break;
1712 default:
1713 assert(!"bad src1 file");
1714 break;
1715 }
1716 emitPRED (0x30);
1717 emitCC (0x2f);
1718 emitX (0x2b);
1719 emitField(0x29, 2, lop);
1720 emitINV (0x28, insn->src(1));
1721 emitINV (0x27, insn->src(0));
1722 } else {
1723 emitInsn (0x04000000);
1724 emitX (0x39);
1725 emitINV (0x38, insn->src(1));
1726 emitINV (0x37, insn->src(0));
1727 emitField(0x35, 2, lop);
1728 emitCC (0x34);
1729 emitIMMD (0x14, 32, insn->src(1));
1730 }
1731
1732 emitGPR (0x08, insn->src(0));
1733 emitGPR (0x00, insn->def(0));
1734 }
1735
1736 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1737 void
1738 CodeEmitterGM107::emitNOT()
1739 {
1740 if (!longIMMD(insn->src(0))) {
1741 switch (insn->src(0).getFile()) {
1742 case FILE_GPR:
1743 emitInsn(0x5c400700);
1744 emitGPR (0x14, insn->src(0));
1745 break;
1746 case FILE_MEMORY_CONST:
1747 emitInsn(0x4c400700);
1748 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1749 break;
1750 case FILE_IMMEDIATE:
1751 emitInsn(0x38400700);
1752 emitIMMD(0x14, 19, insn->src(0));
1753 break;
1754 default:
1755 assert(!"bad src1 file");
1756 break;
1757 }
1758 emitPRED (0x30);
1759 } else {
1760 emitInsn (0x05600000);
1761 emitIMMD (0x14, 32, insn->src(1));
1762 }
1763
1764 emitGPR(0x08);
1765 emitGPR(0x00, insn->def(0));
1766 }
1767
1768 void
1769 CodeEmitterGM107::emitIADD()
1770 {
1771 if (!longIMMD(insn->src(1))) {
1772 switch (insn->src(1).getFile()) {
1773 case FILE_GPR:
1774 emitInsn(0x5c100000);
1775 emitGPR (0x14, insn->src(1));
1776 break;
1777 case FILE_MEMORY_CONST:
1778 emitInsn(0x4c100000);
1779 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1780 break;
1781 case FILE_IMMEDIATE:
1782 emitInsn(0x38100000);
1783 emitIMMD(0x14, 19, insn->src(1));
1784 break;
1785 default:
1786 assert(!"bad src1 file");
1787 break;
1788 }
1789 emitSAT(0x32);
1790 emitNEG(0x31, insn->src(0));
1791 emitNEG(0x30, insn->src(1));
1792 emitCC (0x2f);
1793 emitX (0x2b);
1794 } else {
1795 emitInsn(0x1c000000);
1796 emitNEG (0x38, insn->src(0));
1797 emitSAT (0x36);
1798 emitX (0x35);
1799 emitCC (0x34);
1800 emitIMMD(0x14, 32, insn->src(1));
1801 }
1802
1803 if (insn->op == OP_SUB)
1804 code[1] ^= 0x00010000;
1805
1806 emitGPR(0x08, insn->src(0));
1807 emitGPR(0x00, insn->def(0));
1808 }
1809
1810 void
1811 CodeEmitterGM107::emitIMUL()
1812 {
1813 if (!longIMMD(insn->src(1))) {
1814 switch (insn->src(1).getFile()) {
1815 case FILE_GPR:
1816 emitInsn(0x5c380000);
1817 emitGPR (0x14, insn->src(1));
1818 break;
1819 case FILE_MEMORY_CONST:
1820 emitInsn(0x4c380000);
1821 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1822 break;
1823 case FILE_IMMEDIATE:
1824 emitInsn(0x38380000);
1825 emitIMMD(0x14, 19, insn->src(1));
1826 break;
1827 default:
1828 assert(!"bad src1 file");
1829 break;
1830 }
1831 emitCC (0x2f);
1832 emitField(0x29, 1, isSignedType(insn->sType));
1833 emitField(0x28, 1, isSignedType(insn->dType));
1834 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1835 } else {
1836 emitInsn (0x1f000000);
1837 emitField(0x37, 1, isSignedType(insn->sType));
1838 emitField(0x36, 1, isSignedType(insn->dType));
1839 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1840 emitCC (0x34);
1841 emitIMMD (0x14, 32, insn->src(1));
1842 }
1843
1844 emitGPR(0x08, insn->src(0));
1845 emitGPR(0x00, insn->def(0));
1846 }
1847
1848 void
1849 CodeEmitterGM107::emitIMAD()
1850 {
1851 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1852 switch(insn->src(2).getFile()) {
1853 case FILE_GPR:
1854 switch (insn->src(1).getFile()) {
1855 case FILE_GPR:
1856 emitInsn(0x5a000000);
1857 emitGPR (0x14, insn->src(1));
1858 break;
1859 case FILE_MEMORY_CONST:
1860 emitInsn(0x4a000000);
1861 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1862 break;
1863 case FILE_IMMEDIATE:
1864 emitInsn(0x34000000);
1865 emitIMMD(0x14, 19, insn->src(1));
1866 break;
1867 default:
1868 assert(!"bad src1 file");
1869 break;
1870 }
1871 emitGPR (0x27, insn->src(2));
1872 break;
1873 case FILE_MEMORY_CONST:
1874 emitInsn(0x52000000);
1875 emitGPR (0x27, insn->src(1));
1876 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1877 break;
1878 default:
1879 assert(!"bad src2 file");
1880 break;
1881 }
1882
1883 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1884 emitField(0x35, 1, isSignedType(insn->sType));
1885 emitNEG (0x34, insn->src(2));
1886 emitNEG2 (0x33, insn->src(0), insn->src(1));
1887 emitSAT (0x32);
1888 emitX (0x31);
1889 emitField(0x30, 1, isSignedType(insn->dType));
1890 emitCC (0x2f);
1891 emitGPR (0x08, insn->src(0));
1892 emitGPR (0x00, insn->def(0));
1893 }
1894
1895 void
1896 CodeEmitterGM107::emitISCADD()
1897 {
1898 assert(insn->src(1).get()->asImm());
1899
1900 switch (insn->src(2).getFile()) {
1901 case FILE_GPR:
1902 emitInsn(0x5c180000);
1903 emitGPR (0x14, insn->src(2));
1904 break;
1905 case FILE_MEMORY_CONST:
1906 emitInsn(0x4c180000);
1907 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1908 break;
1909 case FILE_IMMEDIATE:
1910 emitInsn(0x38180000);
1911 emitIMMD(0x14, 19, insn->src(2));
1912 break;
1913 default:
1914 assert(!"bad src1 file");
1915 break;
1916 }
1917 emitNEG (0x31, insn->src(0));
1918 emitNEG (0x30, insn->src(2));
1919 emitCC (0x2f);
1920 emitIMMD(0x27, 5, insn->src(1));
1921 emitGPR (0x08, insn->src(0));
1922 emitGPR (0x00, insn->def(0));
1923 }
1924
1925 void
1926 CodeEmitterGM107::emitXMAD()
1927 {
1928 assert(insn->src(0).getFile() == FILE_GPR);
1929
1930 bool constbuf = false;
1931 bool psl_mrg = true;
1932 bool immediate = false;
1933 if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1934 assert(insn->src(1).getFile() == FILE_GPR);
1935 constbuf = true;
1936 psl_mrg = false;
1937 emitInsn(0x51000000);
1938 emitGPR(0x27, insn->src(1));
1939 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1940 } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1941 assert(insn->src(2).getFile() == FILE_GPR);
1942 constbuf = true;
1943 emitInsn(0x4e000000);
1944 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1945 emitGPR(0x27, insn->src(2));
1946 } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1947 assert(insn->src(2).getFile() == FILE_GPR);
1948 assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1949 immediate = true;
1950 emitInsn(0x36000000);
1951 emitIMMD(0x14, 16, insn->src(1));
1952 emitGPR(0x27, insn->src(2));
1953 } else {
1954 assert(insn->src(1).getFile() == FILE_GPR);
1955 assert(insn->src(2).getFile() == FILE_GPR);
1956 emitInsn(0x5b000000);
1957 emitGPR(0x14, insn->src(1));
1958 emitGPR(0x27, insn->src(2));
1959 }
1960
1961 if (psl_mrg)
1962 emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1963
1964 unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1965 cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1966 emitField(0x32, constbuf ? 2 : 3, cmode);
1967
1968 emitX(constbuf ? 0x36 : 0x26);
1969 emitCC(0x2f);
1970
1971 emitGPR(0x0, insn->def(0));
1972 emitGPR(0x8, insn->src(0));
1973
1974 // source flags
1975 if (isSignedType(insn->sType)) {
1976 uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1977 emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1978 }
1979 emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1980 if (!immediate) {
1981 bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1982 emitField(constbuf ? 0x34 : 0x23, 1, h1);
1983 }
1984 }
1985
1986 void
1987 CodeEmitterGM107::emitIMNMX()
1988 {
1989 switch (insn->src(1).getFile()) {
1990 case FILE_GPR:
1991 emitInsn(0x5c200000);
1992 emitGPR (0x14, insn->src(1));
1993 break;
1994 case FILE_MEMORY_CONST:
1995 emitInsn(0x4c200000);
1996 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1997 break;
1998 case FILE_IMMEDIATE:
1999 emitInsn(0x38200000);
2000 emitIMMD(0x14, 19, insn->src(1));
2001 break;
2002 default:
2003 assert(!"bad src1 file");
2004 break;
2005 }
2006
2007 emitField(0x30, 1, isSignedType(insn->dType));
2008 emitCC (0x2f);
2009 emitField(0x2b, 2, insn->subOp);
2010 emitField(0x2a, 1, insn->op == OP_MAX);
2011 emitPRED (0x27);
2012 emitGPR (0x08, insn->src(0));
2013 emitGPR (0x00, insn->def(0));
2014 }
2015
2016 void
2017 CodeEmitterGM107::emitICMP()
2018 {
2019 const CmpInstruction *insn = this->insn->asCmp();
2020 CondCode cc = insn->setCond;
2021
2022 if (insn->src(2).mod.neg())
2023 cc = reverseCondCode(cc);
2024
2025 switch(insn->src(2).getFile()) {
2026 case FILE_GPR:
2027 switch (insn->src(1).getFile()) {
2028 case FILE_GPR:
2029 emitInsn(0x5b400000);
2030 emitGPR (0x14, insn->src(1));
2031 break;
2032 case FILE_MEMORY_CONST:
2033 emitInsn(0x4b400000);
2034 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2035 break;
2036 case FILE_IMMEDIATE:
2037 emitInsn(0x36400000);
2038 emitIMMD(0x14, 19, insn->src(1));
2039 break;
2040 default:
2041 assert(!"bad src1 file");
2042 break;
2043 }
2044 emitGPR (0x27, insn->src(2));
2045 break;
2046 case FILE_MEMORY_CONST:
2047 emitInsn(0x53400000);
2048 emitGPR (0x27, insn->src(1));
2049 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2050 break;
2051 default:
2052 assert(!"bad src2 file");
2053 break;
2054 }
2055
2056 emitCond3(0x31, cc);
2057 emitField(0x30, 1, isSignedType(insn->sType));
2058 emitGPR (0x08, insn->src(0));
2059 emitGPR (0x00, insn->def(0));
2060 }
2061
2062 void
2063 CodeEmitterGM107::emitISET()
2064 {
2065 const CmpInstruction *insn = this->insn->asCmp();
2066
2067 switch (insn->src(1).getFile()) {
2068 case FILE_GPR:
2069 emitInsn(0x5b500000);
2070 emitGPR (0x14, insn->src(1));
2071 break;
2072 case FILE_MEMORY_CONST:
2073 emitInsn(0x4b500000);
2074 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2075 break;
2076 case FILE_IMMEDIATE:
2077 emitInsn(0x36500000);
2078 emitIMMD(0x14, 19, insn->src(1));
2079 break;
2080 default:
2081 assert(!"bad src1 file");
2082 break;
2083 }
2084
2085 if (insn->op != OP_SET) {
2086 switch (insn->op) {
2087 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2088 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2089 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2090 default:
2091 assert(!"invalid set op");
2092 break;
2093 }
2094 emitPRED(0x27, insn->src(2));
2095 } else {
2096 emitPRED(0x27);
2097 }
2098
2099 emitCond3(0x31, insn->setCond);
2100 emitField(0x30, 1, isSignedType(insn->sType));
2101 emitCC (0x2f);
2102 emitField(0x2c, 1, insn->dType == TYPE_F32);
2103 emitX (0x2b);
2104 emitGPR (0x08, insn->src(0));
2105 emitGPR (0x00, insn->def(0));
2106 }
2107
2108 void
2109 CodeEmitterGM107::emitISETP()
2110 {
2111 const CmpInstruction *insn = this->insn->asCmp();
2112
2113 switch (insn->src(1).getFile()) {
2114 case FILE_GPR:
2115 emitInsn(0x5b600000);
2116 emitGPR (0x14, insn->src(1));
2117 break;
2118 case FILE_MEMORY_CONST:
2119 emitInsn(0x4b600000);
2120 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2121 break;
2122 case FILE_IMMEDIATE:
2123 emitInsn(0x36600000);
2124 emitIMMD(0x14, 19, insn->src(1));
2125 break;
2126 default:
2127 assert(!"bad src1 file");
2128 break;
2129 }
2130
2131 if (insn->op != OP_SET) {
2132 switch (insn->op) {
2133 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2134 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2135 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2136 default:
2137 assert(!"invalid set op");
2138 break;
2139 }
2140 emitPRED(0x27, insn->src(2));
2141 } else {
2142 emitPRED(0x27);
2143 }
2144
2145 emitCond3(0x31, insn->setCond);
2146 emitField(0x30, 1, isSignedType(insn->sType));
2147 emitX (0x2b);
2148 emitGPR (0x08, insn->src(0));
2149 emitPRED (0x03, insn->def(0));
2150 if (insn->defExists(1))
2151 emitPRED(0x00, insn->def(1));
2152 else
2153 emitPRED(0x00);
2154 }
2155
2156 void
2157 CodeEmitterGM107::emitSHL()
2158 {
2159 switch (insn->src(1).getFile()) {
2160 case FILE_GPR:
2161 emitInsn(0x5c480000);
2162 emitGPR (0x14, insn->src(1));
2163 break;
2164 case FILE_MEMORY_CONST:
2165 emitInsn(0x4c480000);
2166 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2167 break;
2168 case FILE_IMMEDIATE:
2169 emitInsn(0x38480000);
2170 emitIMMD(0x14, 19, insn->src(1));
2171 break;
2172 default:
2173 assert(!"bad src1 file");
2174 break;
2175 }
2176
2177 emitCC (0x2f);
2178 emitX (0x2b);
2179 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2180 emitGPR (0x08, insn->src(0));
2181 emitGPR (0x00, insn->def(0));
2182 }
2183
2184 void
2185 CodeEmitterGM107::emitSHR()
2186 {
2187 switch (insn->src(1).getFile()) {
2188 case FILE_GPR:
2189 emitInsn(0x5c280000);
2190 emitGPR (0x14, insn->src(1));
2191 break;
2192 case FILE_MEMORY_CONST:
2193 emitInsn(0x4c280000);
2194 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2195 break;
2196 case FILE_IMMEDIATE:
2197 emitInsn(0x38280000);
2198 emitIMMD(0x14, 19, insn->src(1));
2199 break;
2200 default:
2201 assert(!"bad src1 file");
2202 break;
2203 }
2204
2205 emitField(0x30, 1, isSignedType(insn->dType));
2206 emitCC (0x2f);
2207 emitX (0x2c);
2208 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2209 emitGPR (0x08, insn->src(0));
2210 emitGPR (0x00, insn->def(0));
2211 }
2212
2213 void
2214 CodeEmitterGM107::emitSHF()
2215 {
2216 unsigned type;
2217
2218 switch (insn->src(1).getFile()) {
2219 case FILE_GPR:
2220 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2221 emitGPR(0x14, insn->src(1));
2222 break;
2223 case FILE_IMMEDIATE:
2224 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2225 emitIMMD(0x14, 19, insn->src(1));
2226 break;
2227 default:
2228 assert(!"bad src1 file");
2229 break;
2230 }
2231
2232 switch (insn->sType) {
2233 case TYPE_U64:
2234 type = 2;
2235 break;
2236 case TYPE_S64:
2237 type = 3;
2238 break;
2239 default:
2240 type = 0;
2241 break;
2242 }
2243
2244 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2245 emitX (0x31);
2246 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2247 emitCC (0x2f);
2248 emitGPR (0x27, insn->src(2));
2249 emitField(0x25, 2, type);
2250 emitGPR (0x08, insn->src(0));
2251 emitGPR (0x00, insn->def(0));
2252 }
2253
2254 void
2255 CodeEmitterGM107::emitPOPC()
2256 {
2257 switch (insn->src(0).getFile()) {
2258 case FILE_GPR:
2259 emitInsn(0x5c080000);
2260 emitGPR (0x14, insn->src(0));
2261 break;
2262 case FILE_MEMORY_CONST:
2263 emitInsn(0x4c080000);
2264 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2265 break;
2266 case FILE_IMMEDIATE:
2267 emitInsn(0x38080000);
2268 emitIMMD(0x14, 19, insn->src(0));
2269 break;
2270 default:
2271 assert(!"bad src1 file");
2272 break;
2273 }
2274
2275 emitINV(0x28, insn->src(0));
2276 emitGPR(0x00, insn->def(0));
2277 }
2278
2279 void
2280 CodeEmitterGM107::emitBFI()
2281 {
2282 switch(insn->src(2).getFile()) {
2283 case FILE_GPR:
2284 switch (insn->src(1).getFile()) {
2285 case FILE_GPR:
2286 emitInsn(0x5bf00000);
2287 emitGPR (0x14, insn->src(1));
2288 break;
2289 case FILE_MEMORY_CONST:
2290 emitInsn(0x4bf00000);
2291 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2292 break;
2293 case FILE_IMMEDIATE:
2294 emitInsn(0x36f00000);
2295 emitIMMD(0x14, 19, insn->src(1));
2296 break;
2297 default:
2298 assert(!"bad src1 file");
2299 break;
2300 }
2301 emitGPR (0x27, insn->src(2));
2302 break;
2303 case FILE_MEMORY_CONST:
2304 emitInsn(0x53f00000);
2305 emitGPR (0x27, insn->src(1));
2306 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2307 break;
2308 default:
2309 assert(!"bad src2 file");
2310 break;
2311 }
2312
2313 emitCC (0x2f);
2314 emitGPR (0x08, insn->src(0));
2315 emitGPR (0x00, insn->def(0));
2316 }
2317
2318 void
2319 CodeEmitterGM107::emitBFE()
2320 {
2321 switch (insn->src(1).getFile()) {
2322 case FILE_GPR:
2323 emitInsn(0x5c000000);
2324 emitGPR (0x14, insn->src(1));
2325 break;
2326 case FILE_MEMORY_CONST:
2327 emitInsn(0x4c000000);
2328 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2329 break;
2330 case FILE_IMMEDIATE:
2331 emitInsn(0x38000000);
2332 emitIMMD(0x14, 19, insn->src(1));
2333 break;
2334 default:
2335 assert(!"bad src1 file");
2336 break;
2337 }
2338
2339 emitField(0x30, 1, isSignedType(insn->dType));
2340 emitCC (0x2f);
2341 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2342 emitGPR (0x08, insn->src(0));
2343 emitGPR (0x00, insn->def(0));
2344 }
2345
2346 void
2347 CodeEmitterGM107::emitFLO()
2348 {
2349 switch (insn->src(0).getFile()) {
2350 case FILE_GPR:
2351 emitInsn(0x5c300000);
2352 emitGPR (0x14, insn->src(0));
2353 break;
2354 case FILE_MEMORY_CONST:
2355 emitInsn(0x4c300000);
2356 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2357 break;
2358 case FILE_IMMEDIATE:
2359 emitInsn(0x38300000);
2360 emitIMMD(0x14, 19, insn->src(0));
2361 break;
2362 default:
2363 assert(!"bad src1 file");
2364 break;
2365 }
2366
2367 emitField(0x30, 1, isSignedType(insn->dType));
2368 emitCC (0x2f);
2369 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2370 emitINV (0x28, insn->src(0));
2371 emitGPR (0x00, insn->def(0));
2372 }
2373
2374 /*******************************************************************************
2375 * memory
2376 ******************************************************************************/
2377
2378 void
2379 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2380 {
2381 int data = 0;
2382
2383 switch (typeSizeof(type)) {
2384 case 1: data = isSignedType(type) ? 1 : 0; break;
2385 case 2: data = isSignedType(type) ? 3 : 2; break;
2386 case 4: data = 4; break;
2387 case 8: data = 5; break;
2388 case 16: data = 6; break;
2389 default:
2390 assert(!"bad type");
2391 break;
2392 }
2393
2394 emitField(pos, 3, data);
2395 }
2396
2397 void
2398 CodeEmitterGM107::emitLDSTc(int pos)
2399 {
2400 int mode = 0;
2401
2402 switch (insn->cache) {
2403 case CACHE_CA: mode = 0; break;
2404 case CACHE_CG: mode = 1; break;
2405 case CACHE_CS: mode = 2; break;
2406 case CACHE_CV: mode = 3; break;
2407 default:
2408 assert(!"invalid caching mode");
2409 break;
2410 }
2411
2412 emitField(pos, 2, mode);
2413 }
2414
2415 void
2416 CodeEmitterGM107::emitLDC()
2417 {
2418 emitInsn (0xef900000);
2419 emitLDSTs(0x30, insn->dType);
2420 emitField(0x2c, 2, insn->subOp);
2421 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2422 emitGPR (0x00, insn->def(0));
2423 }
2424
2425 void
2426 CodeEmitterGM107::emitLDL()
2427 {
2428 emitInsn (0xef400000);
2429 emitLDSTs(0x30, insn->dType);
2430 emitLDSTc(0x2c);
2431 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2432 emitGPR (0x00, insn->def(0));
2433 }
2434
2435 void
2436 CodeEmitterGM107::emitLDS()
2437 {
2438 emitInsn (0xef480000);
2439 emitLDSTs(0x30, insn->dType);
2440 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2441 emitGPR (0x00, insn->def(0));
2442 }
2443
2444 void
2445 CodeEmitterGM107::emitLD()
2446 {
2447 emitInsn (0x80000000);
2448 emitPRED (0x3a);
2449 emitLDSTc(0x38);
2450 emitLDSTs(0x35, insn->dType);
2451 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2452 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2453 emitGPR (0x00, insn->def(0));
2454 }
2455
2456 void
2457 CodeEmitterGM107::emitSTL()
2458 {
2459 emitInsn (0xef500000);
2460 emitLDSTs(0x30, insn->dType);
2461 emitLDSTc(0x2c);
2462 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2463 emitGPR (0x00, insn->src(1));
2464 }
2465
2466 void
2467 CodeEmitterGM107::emitSTS()
2468 {
2469 emitInsn (0xef580000);
2470 emitLDSTs(0x30, insn->dType);
2471 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2472 emitGPR (0x00, insn->src(1));
2473 }
2474
2475 void
2476 CodeEmitterGM107::emitST()
2477 {
2478 emitInsn (0xa0000000);
2479 emitPRED (0x3a);
2480 emitLDSTc(0x38);
2481 emitLDSTs(0x35, insn->dType);
2482 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2483 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2484 emitGPR (0x00, insn->src(1));
2485 }
2486
2487 void
2488 CodeEmitterGM107::emitALD()
2489 {
2490 emitInsn (0xefd80000);
2491 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2492 emitGPR (0x27, insn->src(0).getIndirect(1));
2493 emitO (0x20);
2494 emitP (0x1f);
2495 emitADDR (0x08, 20, 10, 0, insn->src(0));
2496 emitGPR (0x00, insn->def(0));
2497 }
2498
2499 void
2500 CodeEmitterGM107::emitAST()
2501 {
2502 emitInsn (0xeff00000);
2503 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2504 emitGPR (0x27, insn->src(0).getIndirect(1));
2505 emitP (0x1f);
2506 emitADDR (0x08, 20, 10, 0, insn->src(0));
2507 emitGPR (0x00, insn->src(1));
2508 }
2509
2510 void
2511 CodeEmitterGM107::emitISBERD()
2512 {
2513 emitInsn(0xefd00000);
2514 emitGPR (0x08, insn->src(0));
2515 emitGPR (0x00, insn->def(0));
2516 }
2517
2518 void
2519 CodeEmitterGM107::emitAL2P()
2520 {
2521 emitInsn (0xefa00000);
2522 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2523 emitPRED (0x2c);
2524 emitO (0x20);
2525 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2526 emitGPR (0x08, insn->src(0).getIndirect(0));
2527 emitGPR (0x00, insn->def(0));
2528 }
2529
2530 static void
2531 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2532 {
2533 int ipa = entry->ipa;
2534 int reg = entry->reg;
2535 int loc = entry->loc;
2536
2537 if (data.flatshade &&
2538 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2539 ipa = NV50_IR_INTERP_FLAT;
2540 reg = 0xff;
2541 } else if (data.force_persample_interp &&
2542 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2543 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2544 ipa |= NV50_IR_INTERP_CENTROID;
2545 }
2546 code[loc + 1] &= ~(0xf << 0x14);
2547 code[loc + 1] |= (ipa & 0x3) << 0x16;
2548 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2549 code[loc + 0] &= ~(0xff << 0x14);
2550 code[loc + 0] |= reg << 0x14;
2551 }
2552
2553 void
2554 CodeEmitterGM107::emitIPA()
2555 {
2556 int ipam = 0, ipas = 0;
2557
2558 switch (insn->getInterpMode()) {
2559 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2560 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2561 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2562 case NV50_IR_INTERP_SC : ipam = 3; break;
2563 default:
2564 assert(!"invalid ipa mode");
2565 break;
2566 }
2567
2568 switch (insn->getSampleMode()) {
2569 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2570 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2571 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2572 default:
2573 assert(!"invalid ipa sample mode");
2574 break;
2575 }
2576
2577 emitInsn (0xe0000000);
2578 emitField(0x36, 2, ipam);
2579 emitField(0x34, 2, ipas);
2580 emitSAT (0x33);
2581 emitField(0x2f, 3, 7);
2582 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2583 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2584 code[1] |= 0x00000040; /* .idx */
2585 emitGPR(0x00, insn->def(0));
2586
2587 if (insn->op == OP_PINTERP) {
2588 emitGPR(0x14, insn->src(1));
2589 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2590 emitGPR(0x27, insn->src(2));
2591 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2592 } else {
2593 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2594 emitGPR(0x27, insn->src(1));
2595 emitGPR(0x14);
2596 addInterp(insn->ipa, 0xff, interpApply);
2597 }
2598
2599 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2600 emitGPR(0x27);
2601 }
2602
2603 void
2604 CodeEmitterGM107::emitATOM()
2605 {
2606 unsigned dType, subOp;
2607
2608 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2609 switch (insn->dType) {
2610 case TYPE_U32: dType = 0; break;
2611 case TYPE_U64: dType = 1; break;
2612 default: assert(!"unexpected dType"); dType = 0; break;
2613 }
2614 subOp = 15;
2615
2616 emitInsn (0xee000000);
2617 } else {
2618 switch (insn->dType) {
2619 case TYPE_U32: dType = 0; break;
2620 case TYPE_S32: dType = 1; break;
2621 case TYPE_U64: dType = 2; break;
2622 case TYPE_F32: dType = 3; break;
2623 case TYPE_B128: dType = 4; break;
2624 case TYPE_S64: dType = 5; break;
2625 default: assert(!"unexpected dType"); dType = 0; break;
2626 }
2627 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2628 subOp = 8;
2629 else
2630 subOp = insn->subOp;
2631
2632 emitInsn (0xed000000);
2633 }
2634
2635 emitField(0x34, 4, subOp);
2636 emitField(0x31, 3, dType);
2637 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2638 emitGPR (0x14, insn->src(1));
2639 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2640 emitGPR (0x00, insn->def(0));
2641 }
2642
2643 void
2644 CodeEmitterGM107::emitATOMS()
2645 {
2646 unsigned dType, subOp;
2647
2648 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2649 switch (insn->dType) {
2650 case TYPE_U32: dType = 0; break;
2651 case TYPE_U64: dType = 1; break;
2652 default: assert(!"unexpected dType"); dType = 0; break;
2653 }
2654 subOp = 4;
2655
2656 emitInsn (0xee000000);
2657 emitField(0x34, 1, dType);
2658 } else {
2659 switch (insn->dType) {
2660 case TYPE_U32: dType = 0; break;
2661 case TYPE_S32: dType = 1; break;
2662 case TYPE_U64: dType = 2; break;
2663 case TYPE_S64: dType = 3; break;
2664 default: assert(!"unexpected dType"); dType = 0; break;
2665 }
2666
2667 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2668 subOp = 8;
2669 else
2670 subOp = insn->subOp;
2671
2672 emitInsn (0xec000000);
2673 emitField(0x1c, 3, dType);
2674 }
2675
2676 emitField(0x34, 4, subOp);
2677 emitGPR (0x14, insn->src(1));
2678 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2679 emitGPR (0x00, insn->def(0));
2680 }
2681
2682 void
2683 CodeEmitterGM107::emitRED()
2684 {
2685 unsigned dType;
2686
2687 switch (insn->dType) {
2688 case TYPE_U32: dType = 0; break;
2689 case TYPE_S32: dType = 1; break;
2690 case TYPE_U64: dType = 2; break;
2691 case TYPE_F32: dType = 3; break;
2692 case TYPE_B128: dType = 4; break;
2693 case TYPE_S64: dType = 5; break;
2694 default: assert(!"unexpected dType"); dType = 0; break;
2695 }
2696
2697 emitInsn (0xebf80000);
2698 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2699 emitField(0x17, 3, insn->subOp);
2700 emitField(0x14, 3, dType);
2701 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2702 emitGPR (0x00, insn->src(1));
2703 }
2704
2705 void
2706 CodeEmitterGM107::emitCCTL()
2707 {
2708 unsigned width;
2709 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2710 emitInsn(0xef600000);
2711 width = 30;
2712 } else {
2713 emitInsn(0xef800000);
2714 width = 22;
2715 }
2716 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2717 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2718 emitField(0x00, 4, insn->subOp);
2719 }
2720
2721 /*******************************************************************************
2722 * surface
2723 ******************************************************************************/
2724
2725 void
2726 CodeEmitterGM107::emitPIXLD()
2727 {
2728 emitInsn (0xefe80000);
2729 emitPRED (0x2d);
2730 emitField(0x1f, 3, insn->subOp);
2731 emitGPR (0x08, insn->src(0));
2732 emitGPR (0x00, insn->def(0));
2733 }
2734
2735 /*******************************************************************************
2736 * texture
2737 ******************************************************************************/
2738
2739 void
2740 CodeEmitterGM107::emitTEXs(int pos)
2741 {
2742 int src1 = insn->predSrc == 1 ? 2 : 1;
2743 if (insn->srcExists(src1))
2744 emitGPR(pos, insn->src(src1));
2745 else
2746 emitGPR(pos);
2747 }
2748
2749 static uint8_t
2750 getTEXSMask(uint8_t mask)
2751 {
2752 switch (mask) {
2753 case 0x1: return 0x0;
2754 case 0x2: return 0x1;
2755 case 0x3: return 0x4;
2756 case 0x4: return 0x2;
2757 case 0x7: return 0x0;
2758 case 0x8: return 0x3;
2759 case 0x9: return 0x5;
2760 case 0xa: return 0x6;
2761 case 0xb: return 0x1;
2762 case 0xc: return 0x7;
2763 case 0xd: return 0x2;
2764 case 0xe: return 0x3;
2765 case 0xf: return 0x4;
2766 default:
2767 assert(!"invalid mask");
2768 return 0;
2769 }
2770 }
2771
2772 static uint8_t
2773 getTEXSTarget(const TexInstruction *tex)
2774 {
2775 assert(tex->op == OP_TEX || tex->op == OP_TXL);
2776
2777 switch (tex->tex.target.getEnum()) {
2778 case TEX_TARGET_1D:
2779 assert(tex->tex.levelZero);
2780 return 0x0;
2781 case TEX_TARGET_2D:
2782 case TEX_TARGET_RECT:
2783 if (tex->tex.levelZero)
2784 return 0x2;
2785 if (tex->op == OP_TXL)
2786 return 0x3;
2787 return 0x1;
2788 case TEX_TARGET_2D_SHADOW:
2789 case TEX_TARGET_RECT_SHADOW:
2790 if (tex->tex.levelZero)
2791 return 0x6;
2792 if (tex->op == OP_TXL)
2793 return 0x5;
2794 return 0x4;
2795 case TEX_TARGET_2D_ARRAY:
2796 if (tex->tex.levelZero)
2797 return 0x8;
2798 return 0x7;
2799 case TEX_TARGET_2D_ARRAY_SHADOW:
2800 assert(tex->tex.levelZero);
2801 return 0x9;
2802 case TEX_TARGET_3D:
2803 if (tex->tex.levelZero)
2804 return 0xb;
2805 assert(tex->op != OP_TXL);
2806 return 0xa;
2807 case TEX_TARGET_CUBE:
2808 assert(!tex->tex.levelZero);
2809 if (tex->op == OP_TXL)
2810 return 0xd;
2811 return 0xc;
2812 default:
2813 assert(false);
2814 return 0x0;
2815 }
2816 }
2817
2818 static uint8_t
2819 getTLDSTarget(const TexInstruction *tex)
2820 {
2821 switch (tex->tex.target.getEnum()) {
2822 case TEX_TARGET_1D:
2823 if (tex->tex.levelZero)
2824 return 0x0;
2825 return 0x1;
2826 case TEX_TARGET_2D:
2827 case TEX_TARGET_RECT:
2828 if (tex->tex.levelZero)
2829 return tex->tex.useOffsets ? 0x4 : 0x2;
2830 return tex->tex.useOffsets ? 0xc : 0x5;
2831 case TEX_TARGET_2D_MS:
2832 assert(tex->tex.levelZero);
2833 return 0x6;
2834 case TEX_TARGET_3D:
2835 assert(tex->tex.levelZero);
2836 return 0x7;
2837 case TEX_TARGET_2D_ARRAY:
2838 assert(tex->tex.levelZero);
2839 return 0x8;
2840
2841 default:
2842 assert(false);
2843 return 0x0;
2844 }
2845 }
2846
2847 void
2848 CodeEmitterGM107::emitTEX()
2849 {
2850 const TexInstruction *insn = this->insn->asTex();
2851 int lodm = 0;
2852
2853 if (!insn->tex.levelZero) {
2854 switch (insn->op) {
2855 case OP_TEX: lodm = 0; break;
2856 case OP_TXB: lodm = 2; break;
2857 case OP_TXL: lodm = 3; break;
2858 default:
2859 assert(!"invalid tex op");
2860 break;
2861 }
2862 } else {
2863 lodm = 1;
2864 }
2865
2866 if (insn->tex.rIndirectSrc >= 0) {
2867 emitInsn (0xdeb80000);
2868 emitField(0x25, 2, lodm);
2869 emitField(0x24, 1, insn->tex.useOffsets == 1);
2870 } else {
2871 emitInsn (0xc0380000);
2872 emitField(0x37, 2, lodm);
2873 emitField(0x36, 1, insn->tex.useOffsets == 1);
2874 emitField(0x24, 13, insn->tex.r);
2875 }
2876
2877 emitField(0x32, 1, insn->tex.target.isShadow());
2878 emitField(0x31, 1, insn->tex.liveOnly);
2879 emitField(0x23, 1, insn->tex.derivAll);
2880 emitField(0x1f, 4, insn->tex.mask);
2881 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2882 insn->tex.target.getDim() - 1);
2883 emitField(0x1c, 1, insn->tex.target.isArray());
2884 emitTEXs (0x14);
2885 emitGPR (0x08, insn->src(0));
2886 emitGPR (0x00, insn->def(0));
2887 }
2888
2889 void
2890 CodeEmitterGM107::emitTEXS()
2891 {
2892 const TexInstruction *insn = this->insn->asTex();
2893 assert(!insn->tex.derivAll);
2894
2895 switch (insn->op) {
2896 case OP_TEX:
2897 case OP_TXL:
2898 emitInsn (0xd8000000);
2899 emitField(0x35, 4, getTEXSTarget(insn));
2900 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2901 break;
2902 case OP_TXF:
2903 emitInsn (0xda000000);
2904 emitField(0x35, 4, getTLDSTarget(insn));
2905 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2906 break;
2907 case OP_TXG:
2908 assert(insn->tex.useOffsets != 4);
2909 emitInsn (0xdf000000);
2910 emitField(0x34, 2, insn->tex.gatherComp);
2911 emitField(0x33, 1, insn->tex.useOffsets == 1);
2912 emitField(0x32, 1, insn->tex.target.isShadow());
2913 break;
2914 default:
2915 unreachable("unknown op in emitTEXS()");
2916 break;
2917 }
2918
2919 emitField(0x31, 1, insn->tex.liveOnly);
2920 emitField(0x24, 13, insn->tex.r);
2921 if (insn->defExists(1))
2922 emitGPR(0x1c, insn->def(1));
2923 else
2924 emitGPR(0x1c);
2925 if (insn->srcExists(1))
2926 emitGPR(0x14, insn->getSrc(1));
2927 else
2928 emitGPR(0x14);
2929 emitGPR (0x08, insn->src(0));
2930 emitGPR (0x00, insn->def(0));
2931 }
2932
2933 void
2934 CodeEmitterGM107::emitTLD()
2935 {
2936 const TexInstruction *insn = this->insn->asTex();
2937
2938 if (insn->tex.rIndirectSrc >= 0) {
2939 emitInsn (0xdd380000);
2940 } else {
2941 emitInsn (0xdc380000);
2942 emitField(0x24, 13, insn->tex.r);
2943 }
2944
2945 emitField(0x37, 1, insn->tex.levelZero == 0);
2946 emitField(0x32, 1, insn->tex.target.isMS());
2947 emitField(0x31, 1, insn->tex.liveOnly);
2948 emitField(0x23, 1, insn->tex.useOffsets == 1);
2949 emitField(0x1f, 4, insn->tex.mask);
2950 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2951 insn->tex.target.getDim() - 1);
2952 emitField(0x1c, 1, insn->tex.target.isArray());
2953 emitTEXs (0x14);
2954 emitGPR (0x08, insn->src(0));
2955 emitGPR (0x00, insn->def(0));
2956 }
2957
2958 void
2959 CodeEmitterGM107::emitTLD4()
2960 {
2961 const TexInstruction *insn = this->insn->asTex();
2962
2963 if (insn->tex.rIndirectSrc >= 0) {
2964 emitInsn (0xdef80000);
2965 emitField(0x26, 2, insn->tex.gatherComp);
2966 emitField(0x25, 2, insn->tex.useOffsets == 4);
2967 emitField(0x24, 2, insn->tex.useOffsets == 1);
2968 } else {
2969 emitInsn (0xc8380000);
2970 emitField(0x38, 2, insn->tex.gatherComp);
2971 emitField(0x37, 2, insn->tex.useOffsets == 4);
2972 emitField(0x36, 2, insn->tex.useOffsets == 1);
2973 emitField(0x24, 13, insn->tex.r);
2974 }
2975
2976 emitField(0x32, 1, insn->tex.target.isShadow());
2977 emitField(0x31, 1, insn->tex.liveOnly);
2978 emitField(0x23, 1, insn->tex.derivAll);
2979 emitField(0x1f, 4, insn->tex.mask);
2980 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2981 insn->tex.target.getDim() - 1);
2982 emitField(0x1c, 1, insn->tex.target.isArray());
2983 emitTEXs (0x14);
2984 emitGPR (0x08, insn->src(0));
2985 emitGPR (0x00, insn->def(0));
2986 }
2987
2988 void
2989 CodeEmitterGM107::emitTXD()
2990 {
2991 const TexInstruction *insn = this->insn->asTex();
2992
2993 if (insn->tex.rIndirectSrc >= 0) {
2994 emitInsn (0xde780000);
2995 } else {
2996 emitInsn (0xde380000);
2997 emitField(0x24, 13, insn->tex.r);
2998 }
2999
3000 emitField(0x31, 1, insn->tex.liveOnly);
3001 emitField(0x23, 1, insn->tex.useOffsets == 1);
3002 emitField(0x1f, 4, insn->tex.mask);
3003 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3004 insn->tex.target.getDim() - 1);
3005 emitField(0x1c, 1, insn->tex.target.isArray());
3006 emitTEXs (0x14);
3007 emitGPR (0x08, insn->src(0));
3008 emitGPR (0x00, insn->def(0));
3009 }
3010
3011 void
3012 CodeEmitterGM107::emitTMML()
3013 {
3014 const TexInstruction *insn = this->insn->asTex();
3015
3016 if (insn->tex.rIndirectSrc >= 0) {
3017 emitInsn (0xdf600000);
3018 } else {
3019 emitInsn (0xdf580000);
3020 emitField(0x24, 13, insn->tex.r);
3021 }
3022
3023 emitField(0x31, 1, insn->tex.liveOnly);
3024 emitField(0x23, 1, insn->tex.derivAll);
3025 emitField(0x1f, 4, insn->tex.mask);
3026 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3027 insn->tex.target.getDim() - 1);
3028 emitField(0x1c, 1, insn->tex.target.isArray());
3029 emitTEXs (0x14);
3030 emitGPR (0x08, insn->src(0));
3031 emitGPR (0x00, insn->def(0));
3032 }
3033
3034 void
3035 CodeEmitterGM107::emitTXQ()
3036 {
3037 const TexInstruction *insn = this->insn->asTex();
3038 int type = 0;
3039
3040 switch (insn->tex.query) {
3041 case TXQ_DIMS : type = 0x01; break;
3042 case TXQ_TYPE : type = 0x02; break;
3043 case TXQ_SAMPLE_POSITION: type = 0x05; break;
3044 case TXQ_FILTER : type = 0x10; break;
3045 case TXQ_LOD : type = 0x12; break;
3046 case TXQ_WRAP : type = 0x14; break;
3047 case TXQ_BORDER_COLOUR : type = 0x16; break;
3048 default:
3049 assert(!"invalid txq query");
3050 break;
3051 }
3052
3053 if (insn->tex.rIndirectSrc >= 0) {
3054 emitInsn (0xdf500000);
3055 } else {
3056 emitInsn (0xdf480000);
3057 emitField(0x24, 13, insn->tex.r);
3058 }
3059
3060 emitField(0x31, 1, insn->tex.liveOnly);
3061 emitField(0x1f, 4, insn->tex.mask);
3062 emitField(0x16, 6, type);
3063 emitGPR (0x08, insn->src(0));
3064 emitGPR (0x00, insn->def(0));
3065 }
3066
3067 void
3068 CodeEmitterGM107::emitDEPBAR()
3069 {
3070 emitInsn (0xf0f00000);
3071 emitField(0x1d, 1, 1); /* le */
3072 emitField(0x1a, 3, 5);
3073 emitField(0x14, 6, insn->subOp);
3074 emitField(0x00, 6, insn->subOp);
3075 }
3076
3077 /*******************************************************************************
3078 * misc
3079 ******************************************************************************/
3080
3081 void
3082 CodeEmitterGM107::emitNOP()
3083 {
3084 emitInsn(0x50b00000);
3085 }
3086
3087 void
3088 CodeEmitterGM107::emitKIL()
3089 {
3090 emitInsn (0xe3300000);
3091 emitCond5(0x00, CC_TR);
3092 }
3093
3094 void
3095 CodeEmitterGM107::emitOUT()
3096 {
3097 const int cut = insn->op == OP_RESTART || insn->subOp;
3098 const int emit = insn->op == OP_EMIT;
3099
3100 switch (insn->src(1).getFile()) {
3101 case FILE_GPR:
3102 emitInsn(0xfbe00000);
3103 emitGPR (0x14, insn->src(1));
3104 break;
3105 case FILE_IMMEDIATE:
3106 emitInsn(0xf6e00000);
3107 emitIMMD(0x14, 19, insn->src(1));
3108 break;
3109 case FILE_MEMORY_CONST:
3110 emitInsn(0xebe00000);
3111 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3112 break;
3113 default:
3114 assert(!"bad src1 file");
3115 break;
3116 }
3117
3118 emitField(0x27, 2, (cut << 1) | emit);
3119 emitGPR (0x08, insn->src(0));
3120 emitGPR (0x00, insn->def(0));
3121 }
3122
3123 void
3124 CodeEmitterGM107::emitBAR()
3125 {
3126 uint8_t subop;
3127
3128 emitInsn (0xf0a80000);
3129
3130 switch (insn->subOp) {
3131 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3132 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3133 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3134 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3135 default:
3136 subop = 0x80;
3137 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3138 break;
3139 }
3140
3141 emitField(0x20, 8, subop);
3142
3143 // barrier id
3144 if (insn->src(0).getFile() == FILE_GPR) {
3145 emitGPR(0x08, insn->src(0));
3146 } else {
3147 ImmediateValue *imm = insn->getSrc(0)->asImm();
3148 assert(imm);
3149 emitField(0x08, 8, imm->reg.data.u32);
3150 emitField(0x2b, 1, 1);
3151 }
3152
3153 // thread count
3154 if (insn->src(1).getFile() == FILE_GPR) {
3155 emitGPR(0x14, insn->src(1));
3156 } else {
3157 ImmediateValue *imm = insn->getSrc(0)->asImm();
3158 assert(imm);
3159 emitField(0x14, 12, imm->reg.data.u32);
3160 emitField(0x2c, 1, 1);
3161 }
3162
3163 if (insn->srcExists(2) && (insn->predSrc != 2)) {
3164 emitPRED (0x27, insn->src(2));
3165 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3166 } else {
3167 emitField(0x27, 3, 7);
3168 }
3169 }
3170
3171 void
3172 CodeEmitterGM107::emitMEMBAR()
3173 {
3174 emitInsn (0xef980000);
3175 emitField(0x08, 2, insn->subOp >> 2);
3176 }
3177
3178 void
3179 CodeEmitterGM107::emitVOTE()
3180 {
3181 const ImmediateValue *imm;
3182 uint32_t u32;
3183
3184 int r = -1, p = -1;
3185 for (int i = 0; insn->defExists(i); i++) {
3186 if (insn->def(i).getFile() == FILE_GPR)
3187 r = i;
3188 else if (insn->def(i).getFile() == FILE_PREDICATE)
3189 p = i;
3190 }
3191
3192 emitInsn (0x50d80000);
3193 emitField(0x30, 2, insn->subOp);
3194 if (r >= 0)
3195 emitGPR (0x00, insn->def(r));
3196 else
3197 emitGPR (0x00);
3198 if (p >= 0)
3199 emitPRED (0x2d, insn->def(p));
3200 else
3201 emitPRED (0x2d);
3202
3203 switch (insn->src(0).getFile()) {
3204 case FILE_PREDICATE:
3205 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3206 emitPRED (0x27, insn->src(0));
3207 break;
3208 case FILE_IMMEDIATE:
3209 imm = insn->getSrc(0)->asImm();
3210 assert(imm);
3211 u32 = imm->reg.data.u32;
3212 assert(u32 == 0 || u32 == 1);
3213 emitPRED(0x27);
3214 emitField(0x2a, 1, u32 == 0);
3215 break;
3216 default:
3217 assert(!"Unhandled src");
3218 break;
3219 }
3220 }
3221
3222 void
3223 CodeEmitterGM107::emitSUTarget()
3224 {
3225 const TexInstruction *insn = this->insn->asTex();
3226 int target = 0;
3227
3228 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3229
3230 if (insn->tex.target == TEX_TARGET_BUFFER) {
3231 target = 2;
3232 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3233 target = 4;
3234 } else if (insn->tex.target == TEX_TARGET_2D ||
3235 insn->tex.target == TEX_TARGET_RECT) {
3236 target = 6;
3237 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3238 insn->tex.target == TEX_TARGET_CUBE ||
3239 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3240 target = 8;
3241 } else if (insn->tex.target == TEX_TARGET_3D) {
3242 target = 10;
3243 } else {
3244 assert(insn->tex.target == TEX_TARGET_1D);
3245 }
3246 emitField(0x20, 4, target);
3247 }
3248
3249 void
3250 CodeEmitterGM107::emitSUHandle(const int s)
3251 {
3252 const TexInstruction *insn = this->insn->asTex();
3253
3254 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3255
3256 if (insn->src(s).getFile() == FILE_GPR) {
3257 emitGPR(0x27, insn->src(s));
3258 } else {
3259 ImmediateValue *imm = insn->getSrc(s)->asImm();
3260 assert(imm);
3261 emitField(0x33, 1, 1);
3262 emitField(0x24, 13, imm->reg.data.u32);
3263 }
3264 }
3265
3266 void
3267 CodeEmitterGM107::emitSUSTx()
3268 {
3269 const TexInstruction *insn = this->insn->asTex();
3270
3271 emitInsn(0xeb200000);
3272 if (insn->op == OP_SUSTB)
3273 emitField(0x34, 1, 1);
3274 emitSUTarget();
3275
3276 emitLDSTc(0x18);
3277 emitField(0x14, 4, 0xf); // rgba
3278 emitGPR (0x08, insn->src(0));
3279 emitGPR (0x00, insn->src(1));
3280
3281 emitSUHandle(2);
3282 }
3283
3284 void
3285 CodeEmitterGM107::emitSULDx()
3286 {
3287 const TexInstruction *insn = this->insn->asTex();
3288 int type = 0;
3289
3290 emitInsn(0xeb000000);
3291 if (insn->op == OP_SULDB)
3292 emitField(0x34, 1, 1);
3293 emitSUTarget();
3294
3295 switch (insn->dType) {
3296 case TYPE_S8: type = 1; break;
3297 case TYPE_U16: type = 2; break;
3298 case TYPE_S16: type = 3; break;
3299 case TYPE_U32: type = 4; break;
3300 case TYPE_U64: type = 5; break;
3301 case TYPE_B128: type = 6; break;
3302 default:
3303 assert(insn->dType == TYPE_U8);
3304 break;
3305 }
3306 emitLDSTc(0x18);
3307 emitField(0x14, 3, type);
3308 emitGPR (0x00, insn->def(0));
3309 emitGPR (0x08, insn->src(0));
3310
3311 emitSUHandle(1);
3312 }
3313
3314 void
3315 CodeEmitterGM107::emitSUREDx()
3316 {
3317 const TexInstruction *insn = this->insn->asTex();
3318 uint8_t type = 0, subOp;
3319
3320 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3321 emitInsn(0xeac00000);
3322 else
3323 emitInsn(0xea600000);
3324
3325 if (insn->op == OP_SUREDB)
3326 emitField(0x34, 1, 1);
3327 emitSUTarget();
3328
3329 // destination type
3330 switch (insn->dType) {
3331 case TYPE_S32: type = 1; break;
3332 case TYPE_U64: type = 2; break;
3333 case TYPE_F32: type = 3; break;
3334 case TYPE_S64: type = 5; break;
3335 default:
3336 assert(insn->dType == TYPE_U32);
3337 break;
3338 }
3339
3340 // atomic operation
3341 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3342 subOp = 0;
3343 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3344 subOp = 8;
3345 } else {
3346 subOp = insn->subOp;
3347 }
3348
3349 emitField(0x24, 3, type);
3350 emitField(0x1d, 4, subOp);
3351 emitGPR (0x14, insn->src(1));
3352 emitGPR (0x08, insn->src(0));
3353 emitGPR (0x00, insn->def(0));
3354
3355 emitSUHandle(2);
3356 }
3357
3358 /*******************************************************************************
3359 * assembler front-end
3360 ******************************************************************************/
3361
3362 bool
3363 CodeEmitterGM107::emitInstruction(Instruction *i)
3364 {
3365 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3366 bool ret = true;
3367
3368 insn = i;
3369
3370 if (insn->encSize != 8) {
3371 ERROR("skipping undecodable instruction: "); insn->print();
3372 return false;
3373 } else
3374 if (codeSize + size > codeSizeLimit) {
3375 ERROR("code emitter output buffer too small\n");
3376 return false;
3377 }
3378
3379 if (writeIssueDelays) {
3380 int n = ((codeSize & 0x1f) / 8) - 1;
3381 if (n < 0) {
3382 data = code;
3383 data[0] = 0x00000000;
3384 data[1] = 0x00000000;
3385 code += 2;
3386 codeSize += 8;
3387 n++;
3388 }
3389
3390 emitField(data, n * 21, 21, insn->sched);
3391 }
3392
3393 switch (insn->op) {
3394 case OP_EXIT:
3395 emitEXIT();
3396 break;
3397 case OP_BRA:
3398 emitBRA();
3399 break;
3400 case OP_CALL:
3401 emitCAL();
3402 break;
3403 case OP_PRECONT:
3404 emitPCNT();
3405 break;
3406 case OP_CONT:
3407 emitCONT();
3408 break;
3409 case OP_PREBREAK:
3410 emitPBK();
3411 break;
3412 case OP_BREAK:
3413 emitBRK();
3414 break;
3415 case OP_PRERET:
3416 emitPRET();
3417 break;
3418 case OP_RET:
3419 emitRET();
3420 break;
3421 case OP_JOINAT:
3422 emitSSY();
3423 break;
3424 case OP_JOIN:
3425 emitSYNC();
3426 break;
3427 case OP_QUADON:
3428 emitSAM();
3429 break;
3430 case OP_QUADPOP:
3431 emitRAM();
3432 break;
3433 case OP_MOV:
3434 emitMOV();
3435 break;
3436 case OP_RDSV:
3437 if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3438 emitCS2R();
3439 else
3440 emitS2R();
3441 break;
3442 case OP_ABS:
3443 case OP_NEG:
3444 case OP_SAT:
3445 case OP_FLOOR:
3446 case OP_CEIL:
3447 case OP_TRUNC:
3448 case OP_CVT:
3449 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3450 insn->src(0).getFile() == FILE_PREDICATE)) {
3451 emitMOV();
3452 } else if (isFloatType(insn->dType)) {
3453 if (isFloatType(insn->sType))
3454 emitF2F();
3455 else
3456 emitI2F();
3457 } else {
3458 if (isFloatType(insn->sType))
3459 emitF2I();
3460 else
3461 emitI2I();
3462 }
3463 break;
3464 case OP_SHFL:
3465 emitSHFL();
3466 break;
3467 case OP_ADD:
3468 case OP_SUB:
3469 if (isFloatType(insn->dType)) {
3470 if (insn->dType == TYPE_F64)
3471 emitDADD();
3472 else
3473 emitFADD();
3474 } else {
3475 emitIADD();
3476 }
3477 break;
3478 case OP_MUL:
3479 if (isFloatType(insn->dType)) {
3480 if (insn->dType == TYPE_F64)
3481 emitDMUL();
3482 else
3483 emitFMUL();
3484 } else {
3485 emitIMUL();
3486 }
3487 break;
3488 case OP_MAD:
3489 case OP_FMA:
3490 if (isFloatType(insn->dType)) {
3491 if (insn->dType == TYPE_F64)
3492 emitDFMA();
3493 else
3494 emitFFMA();
3495 } else {
3496 emitIMAD();
3497 }
3498 break;
3499 case OP_SHLADD:
3500 emitISCADD();
3501 break;
3502 case OP_XMAD:
3503 emitXMAD();
3504 break;
3505 case OP_MIN:
3506 case OP_MAX:
3507 if (isFloatType(insn->dType)) {
3508 if (insn->dType == TYPE_F64)
3509 emitDMNMX();
3510 else
3511 emitFMNMX();
3512 } else {
3513 emitIMNMX();
3514 }
3515 break;
3516 case OP_SHL:
3517 if (typeSizeof(insn->sType) == 8)
3518 emitSHF();
3519 else
3520 emitSHL();
3521 break;
3522 case OP_SHR:
3523 if (typeSizeof(insn->sType) == 8)
3524 emitSHF();
3525 else
3526 emitSHR();
3527 break;
3528 case OP_POPCNT:
3529 emitPOPC();
3530 break;
3531 case OP_INSBF:
3532 emitBFI();
3533 break;
3534 case OP_EXTBF:
3535 emitBFE();
3536 break;
3537 case OP_BFIND:
3538 emitFLO();
3539 break;
3540 case OP_SLCT:
3541 if (isFloatType(insn->dType))
3542 emitFCMP();
3543 else
3544 emitICMP();
3545 break;
3546 case OP_SET:
3547 case OP_SET_AND:
3548 case OP_SET_OR:
3549 case OP_SET_XOR:
3550 if (insn->def(0).getFile() != FILE_PREDICATE) {
3551 if (isFloatType(insn->sType))
3552 if (insn->sType == TYPE_F64)
3553 emitDSET();
3554 else
3555 emitFSET();
3556 else
3557 emitISET();
3558 } else {
3559 if (isFloatType(insn->sType))
3560 if (insn->sType == TYPE_F64)
3561 emitDSETP();
3562 else
3563 emitFSETP();
3564 else
3565 emitISETP();
3566 }
3567 break;
3568 case OP_SELP:
3569 emitSEL();
3570 break;
3571 case OP_PRESIN:
3572 case OP_PREEX2:
3573 emitRRO();
3574 break;
3575 case OP_COS:
3576 case OP_SIN:
3577 case OP_EX2:
3578 case OP_LG2:
3579 case OP_RCP:
3580 case OP_RSQ:
3581 case OP_SQRT:
3582 emitMUFU();
3583 break;
3584 case OP_AND:
3585 case OP_OR:
3586 case OP_XOR:
3587 switch (insn->def(0).getFile()) {
3588 case FILE_GPR: emitLOP(); break;
3589 case FILE_PREDICATE: emitPSETP(); break;
3590 default:
3591 assert(!"invalid bool op");
3592 }
3593 break;
3594 case OP_NOT:
3595 emitNOT();
3596 break;
3597 case OP_LOAD:
3598 switch (insn->src(0).getFile()) {
3599 case FILE_MEMORY_CONST : emitLDC(); break;
3600 case FILE_MEMORY_LOCAL : emitLDL(); break;
3601 case FILE_MEMORY_SHARED: emitLDS(); break;
3602 case FILE_MEMORY_GLOBAL: emitLD(); break;
3603 default:
3604 assert(!"invalid load");
3605 emitNOP();
3606 break;
3607 }
3608 break;
3609 case OP_STORE:
3610 switch (insn->src(0).getFile()) {
3611 case FILE_MEMORY_LOCAL : emitSTL(); break;
3612 case FILE_MEMORY_SHARED: emitSTS(); break;
3613 case FILE_MEMORY_GLOBAL: emitST(); break;
3614 default:
3615 assert(!"invalid store");
3616 emitNOP();
3617 break;
3618 }
3619 break;
3620 case OP_ATOM:
3621 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3622 emitATOMS();
3623 else
3624 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3625 emitRED();
3626 else
3627 emitATOM();
3628 break;
3629 case OP_CCTL:
3630 emitCCTL();
3631 break;
3632 case OP_VFETCH:
3633 emitALD();
3634 break;
3635 case OP_EXPORT:
3636 emitAST();
3637 break;
3638 case OP_PFETCH:
3639 emitISBERD();
3640 break;
3641 case OP_AFETCH:
3642 emitAL2P();
3643 break;
3644 case OP_LINTERP:
3645 case OP_PINTERP:
3646 emitIPA();
3647 break;
3648 case OP_PIXLD:
3649 emitPIXLD();
3650 break;
3651 case OP_TEX:
3652 case OP_TXL:
3653 if (insn->asTex()->tex.scalar)
3654 emitTEXS();
3655 else
3656 emitTEX();
3657 break;
3658 case OP_TXB:
3659 emitTEX();
3660 break;
3661 case OP_TXF:
3662 if (insn->asTex()->tex.scalar)
3663 emitTEXS();
3664 else
3665 emitTLD();
3666 break;
3667 case OP_TXG:
3668 if (insn->asTex()->tex.scalar)
3669 emitTEXS();
3670 else
3671 emitTLD4();
3672 break;
3673 case OP_TXD:
3674 emitTXD();
3675 break;
3676 case OP_TXQ:
3677 emitTXQ();
3678 break;
3679 case OP_TXLQ:
3680 emitTMML();
3681 break;
3682 case OP_TEXBAR:
3683 emitDEPBAR();
3684 break;
3685 case OP_QUADOP:
3686 emitFSWZADD();
3687 break;
3688 case OP_NOP:
3689 emitNOP();
3690 break;
3691 case OP_DISCARD:
3692 emitKIL();
3693 break;
3694 case OP_EMIT:
3695 case OP_RESTART:
3696 emitOUT();
3697 break;
3698 case OP_BAR:
3699 emitBAR();
3700 break;
3701 case OP_MEMBAR:
3702 emitMEMBAR();
3703 break;
3704 case OP_VOTE:
3705 emitVOTE();
3706 break;
3707 case OP_SUSTB:
3708 case OP_SUSTP:
3709 emitSUSTx();
3710 break;
3711 case OP_SULDB:
3712 case OP_SULDP:
3713 emitSULDx();
3714 break;
3715 case OP_SUREDB:
3716 case OP_SUREDP:
3717 emitSUREDx();
3718 break;
3719 default:
3720 assert(!"invalid opcode");
3721 emitNOP();
3722 ret = false;
3723 break;
3724 }
3725
3726 if (insn->join) {
3727 /*XXX*/
3728 }
3729
3730 code += 2;
3731 codeSize += 8;
3732 return ret;
3733 }
3734
3735 uint32_t
3736 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3737 {
3738 return 8;
3739 }
3740
3741 /*******************************************************************************
3742 * sched data calculator
3743 ******************************************************************************/
3744
3745 class SchedDataCalculatorGM107 : public Pass
3746 {
3747 public:
3748 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3749
3750 private:
3751 struct RegScores
3752 {
3753 struct ScoreData {
3754 int r[256];
3755 int p[8];
3756 int c;
3757 } rd, wr;
3758 int base;
3759
3760 void rebase(const int base)
3761 {
3762 const int delta = this->base - base;
3763 if (!delta)
3764 return;
3765 this->base = 0;
3766
3767 for (int i = 0; i < 256; ++i) {
3768 rd.r[i] += delta;
3769 wr.r[i] += delta;
3770 }
3771 for (int i = 0; i < 8; ++i) {
3772 rd.p[i] += delta;
3773 wr.p[i] += delta;
3774 }
3775 rd.c += delta;
3776 wr.c += delta;
3777 }
3778 void wipe()
3779 {
3780 memset(&rd, 0, sizeof(rd));
3781 memset(&wr, 0, sizeof(wr));
3782 }
3783 int getLatest(const ScoreData& d) const
3784 {
3785 int max = 0;
3786 for (int i = 0; i < 256; ++i)
3787 if (d.r[i] > max)
3788 max = d.r[i];
3789 for (int i = 0; i < 8; ++i)
3790 if (d.p[i] > max)
3791 max = d.p[i];
3792 if (d.c > max)
3793 max = d.c;
3794 return max;
3795 }
3796 inline int getLatestRd() const
3797 {
3798 return getLatest(rd);
3799 }
3800 inline int getLatestWr() const
3801 {
3802 return getLatest(wr);
3803 }
3804 inline int getLatest() const
3805 {
3806 return MAX2(getLatestRd(), getLatestWr());
3807 }
3808 void setMax(const RegScores *that)
3809 {
3810 for (int i = 0; i < 256; ++i) {
3811 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3812 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3813 }
3814 for (int i = 0; i < 8; ++i) {
3815 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3816 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3817 }
3818 rd.c = MAX2(rd.c, that->rd.c);
3819 wr.c = MAX2(wr.c, that->wr.c);
3820 }
3821 void print(int cycle)
3822 {
3823 for (int i = 0; i < 256; ++i) {
3824 if (rd.r[i] > cycle)
3825 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3826 if (wr.r[i] > cycle)
3827 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3828 }
3829 for (int i = 0; i < 8; ++i) {
3830 if (rd.p[i] > cycle)
3831 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3832 if (wr.p[i] > cycle)
3833 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3834 }
3835 if (rd.c > cycle)
3836 INFO("rd $c @ %i\n", rd.c);
3837 if (wr.c > cycle)
3838 INFO("wr $c @ %i\n", wr.c);
3839 }
3840 };
3841
3842 RegScores *score; // for current BB
3843 std::vector<RegScores> scoreBoards;
3844
3845 const TargetGM107 *targ;
3846 bool visit(Function *);
3847 bool visit(BasicBlock *);
3848
3849 void commitInsn(const Instruction *, int);
3850 int calcDelay(const Instruction *, int) const;
3851 void setDelay(Instruction *, int, const Instruction *);
3852 void recordWr(const Value *, int, int);
3853 void checkRd(const Value *, int, int&) const;
3854
3855 inline void emitYield(Instruction *);
3856 inline void emitStall(Instruction *, uint8_t);
3857 inline void emitReuse(Instruction *, uint8_t);
3858 inline void emitWrDepBar(Instruction *, uint8_t);
3859 inline void emitRdDepBar(Instruction *, uint8_t);
3860 inline void emitWtDepBar(Instruction *, uint8_t);
3861
3862 inline int getStall(const Instruction *) const;
3863 inline int getWrDepBar(const Instruction *) const;
3864 inline int getRdDepBar(const Instruction *) const;
3865 inline int getWtDepBar(const Instruction *) const;
3866
3867 void setReuseFlag(Instruction *);
3868
3869 inline void printSchedInfo(int, const Instruction *) const;
3870
3871 struct LiveBarUse {
3872 LiveBarUse(Instruction *insn, Instruction *usei)
3873 : insn(insn), usei(usei) { }
3874 Instruction *insn;
3875 Instruction *usei;
3876 };
3877
3878 struct LiveBarDef {
3879 LiveBarDef(Instruction *insn, Instruction *defi)
3880 : insn(insn), defi(defi) { }
3881 Instruction *insn;
3882 Instruction *defi;
3883 };
3884
3885 bool insertBarriers(BasicBlock *);
3886
3887 bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
3888 Instruction *findFirstUse(const Instruction *) const;
3889 Instruction *findFirstDef(const Instruction *) const;
3890
3891 bool needRdDepBar(const Instruction *) const;
3892 bool needWrDepBar(const Instruction *) const;
3893 };
3894
3895 inline void
3896 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3897 {
3898 assert(cnt < 16);
3899 insn->sched |= cnt;
3900 }
3901
3902 inline void
3903 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3904 {
3905 insn->sched |= 1 << 4;
3906 }
3907
3908 inline void
3909 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3910 {
3911 assert(id < 6);
3912 if ((insn->sched & 0xe0) == 0xe0)
3913 insn->sched ^= 0xe0;
3914 insn->sched |= id << 5;
3915 }
3916
3917 inline void
3918 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3919 {
3920 assert(id < 6);
3921 if ((insn->sched & 0x700) == 0x700)
3922 insn->sched ^= 0x700;
3923 insn->sched |= id << 8;
3924 }
3925
3926 inline void
3927 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3928 {
3929 assert(id < 6);
3930 insn->sched |= 1 << (11 + id);
3931 }
3932
3933 inline void
3934 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3935 {
3936 assert(id < 4);
3937 insn->sched |= 1 << (17 + id);
3938 }
3939
3940 inline void
3941 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3942 const Instruction *insn) const
3943 {
3944 uint8_t st, yl, wr, rd, wt, ru;
3945
3946 st = (insn->sched & 0x00000f) >> 0;
3947 yl = (insn->sched & 0x000010) >> 4;
3948 wr = (insn->sched & 0x0000e0) >> 5;
3949 rd = (insn->sched & 0x000700) >> 8;
3950 wt = (insn->sched & 0x01f800) >> 11;
3951 ru = (insn->sched & 0x1e0000) >> 17;
3952
3953 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3954 cycle, st, yl, wr, rd, wt, ru);
3955 }
3956
3957 inline int
3958 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3959 {
3960 return insn->sched & 0xf;
3961 }
3962
3963 inline int
3964 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3965 {
3966 return (insn->sched & 0x0000e0) >> 5;
3967 }
3968
3969 inline int
3970 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3971 {
3972 return (insn->sched & 0x000700) >> 8;
3973 }
3974
3975 inline int
3976 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3977 {
3978 return (insn->sched & 0x01f800) >> 11;
3979 }
3980
3981 // Emit the reuse flag which allows to make use of the new memory hierarchy
3982 // introduced since Maxwell, the operand reuse cache.
3983 //
3984 // It allows to reduce bank conflicts by caching operands. Each time you issue
3985 // an instruction, that flag can tell the hw which operands are going to be
3986 // re-used by the next instruction. Note that the next instruction has to use
3987 // the same GPR id in the same operand slot.
3988 void
3989 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3990 {
3991 Instruction *next = insn->next;
3992 BitSet defs(255, 1);
3993
3994 if (!targ->isReuseSupported(insn))
3995 return;
3996
3997 for (int d = 0; insn->defExists(d); ++d) {
3998 const Value *def = insn->def(d).rep();
3999 if (insn->def(d).getFile() != FILE_GPR)
4000 continue;
4001 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
4002 continue;
4003 defs.set(def->reg.data.id);
4004 }
4005
4006 for (int s = 0; insn->srcExists(s); s++) {
4007 const Value *src = insn->src(s).rep();
4008 if (insn->src(s).getFile() != FILE_GPR)
4009 continue;
4010 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
4011 continue;
4012 if (defs.test(src->reg.data.id))
4013 continue;
4014 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
4015 continue;
4016 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
4017 continue;
4018 assert(s < 4);
4019 emitReuse(insn, s);
4020 }
4021 }
4022
4023 void
4024 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
4025 {
4026 int a = v->reg.data.id, b;
4027
4028 switch (v->reg.file) {
4029 case FILE_GPR:
4030 b = a + v->reg.size / 4;
4031 for (int r = a; r < b; ++r)
4032 score->rd.r[r] = ready;
4033 break;
4034 case FILE_PREDICATE:
4035 // To immediately use a predicate set by any instructions, the minimum
4036 // number of stall counts is 13.
4037 score->rd.p[a] = cycle + 13;
4038 break;
4039 case FILE_FLAGS:
4040 score->rd.c = ready;
4041 break;
4042 default:
4043 break;
4044 }
4045 }
4046
4047 void
4048 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
4049 {
4050 int a = v->reg.data.id, b;
4051 int ready = cycle;
4052
4053 switch (v->reg.file) {
4054 case FILE_GPR:
4055 b = a + v->reg.size / 4;
4056 for (int r = a; r < b; ++r)
4057 ready = MAX2(ready, score->rd.r[r]);
4058 break;
4059 case FILE_PREDICATE:
4060 ready = MAX2(ready, score->rd.p[a]);
4061 break;
4062 case FILE_FLAGS:
4063 ready = MAX2(ready, score->rd.c);
4064 break;
4065 default:
4066 break;
4067 }
4068 if (cycle < ready)
4069 delay = MAX2(delay, ready - cycle);
4070 }
4071
4072 void
4073 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
4074 {
4075 const int ready = cycle + targ->getLatency(insn);
4076
4077 for (int d = 0; insn->defExists(d); ++d)
4078 recordWr(insn->getDef(d), cycle, ready);
4079
4080 #ifdef GM107_DEBUG_SCHED_DATA
4081 score->print(cycle);
4082 #endif
4083 }
4084
4085 #define GM107_MIN_ISSUE_DELAY 0x1
4086 #define GM107_MAX_ISSUE_DELAY 0xf
4087
4088 int
4089 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
4090 {
4091 int delay = 0, ready = cycle;
4092
4093 for (int s = 0; insn->srcExists(s); ++s)
4094 checkRd(insn->getSrc(s), cycle, delay);
4095
4096 // TODO: make use of getReadLatency()!
4097
4098 return MAX2(delay, ready - cycle);
4099 }
4100
4101 void
4102 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
4103 const Instruction *next)
4104 {
4105 const OpClass cl = targ->getOpClass(insn->op);
4106 int wr, rd;
4107
4108 if (insn->op == OP_EXIT ||
4109 insn->op == OP_BAR ||
4110 insn->op == OP_MEMBAR) {
4111 delay = GM107_MAX_ISSUE_DELAY;
4112 } else
4113 if (insn->op == OP_QUADON ||
4114 insn->op == OP_QUADPOP) {
4115 delay = 0xd;
4116 } else
4117 if (cl == OPCLASS_FLOW || insn->join) {
4118 delay = 0xd;
4119 }
4120
4121 if (!next || !targ->canDualIssue(insn, next)) {
4122 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4123 } else {
4124 delay = 0x0; // dual-issue
4125 }
4126
4127 wr = getWrDepBar(insn);
4128 rd = getRdDepBar(insn);
4129
4130 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4131 // Barriers take one additional clock cycle to become active on top of
4132 // the clock consumed by the instruction producing it.
4133 if (!next || insn->bb != next->bb) {
4134 delay = 0x2;
4135 } else {
4136 int wt = getWtDepBar(next);
4137 if ((wt & (1 << wr)) | (wt & (1 << rd)))
4138 delay = 0x2;
4139 }
4140 }
4141
4142 emitStall(insn, delay);
4143 }
4144
4145
4146 // Return true when the given instruction needs to emit a read dependency
4147 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4148 // setting the maximum number of stall counts is not enough.
4149 bool
4150 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4151 {
4152 BitSet srcs(255, 1), defs(255, 1);
4153 int a, b;
4154
4155 if (!targ->isBarrierRequired(insn))
4156 return false;
4157
4158 // Do not emit a read dependency barrier when the instruction doesn't use
4159 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4160 for (int s = 0; insn->srcExists(s); ++s) {
4161 const Value *src = insn->src(s).rep();
4162 if (insn->src(s).getFile() != FILE_GPR)
4163 continue;
4164 if (src->reg.data.id == 255)
4165 continue;
4166
4167 a = src->reg.data.id;
4168 b = a + src->reg.size / 4;
4169 for (int r = a; r < b; ++r)
4170 srcs.set(r);
4171 }
4172
4173 if (!srcs.popCount())
4174 return false;
4175
4176 // Do not emit a read dependency barrier when the output GPRs are equal to
4177 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4178 // be produced and WaR hazards are prevented.
4179 for (int d = 0; insn->defExists(d); ++d) {
4180 const Value *def = insn->def(d).rep();
4181 if (insn->def(d).getFile() != FILE_GPR)
4182 continue;
4183 if (def->reg.data.id == 255)
4184 continue;
4185
4186 a = def->reg.data.id;
4187 b = a + def->reg.size / 4;
4188 for (int r = a; r < b; ++r)
4189 defs.set(r);
4190 }
4191
4192 srcs.andNot(defs);
4193 if (!srcs.popCount())
4194 return false;
4195
4196 return true;
4197 }
4198
4199 // Return true when the given instruction needs to emit a write dependency
4200 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4201 // setting the maximum number of stall counts is not enough. This is only legal
4202 // if the instruction output something.
4203 bool
4204 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4205 {
4206 if (!targ->isBarrierRequired(insn))
4207 return false;
4208
4209 for (int d = 0; insn->defExists(d); ++d) {
4210 if (insn->def(d).getFile() == FILE_GPR ||
4211 insn->def(d).getFile() == FILE_FLAGS ||
4212 insn->def(d).getFile() == FILE_PREDICATE)
4213 return true;
4214 }
4215 return false;
4216 }
4217
4218 // Helper function for findFirstUse() and findFirstDef()
4219 bool
4220 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4221 const Value *val) const
4222 {
4223 if (val->reg.file != FILE_GPR &&
4224 val->reg.file != FILE_PREDICATE &&
4225 val->reg.file != FILE_FLAGS)
4226 return false;
4227
4228 for (int d = 0; insn->defExists(d); ++d) {
4229 const Value* def = insn->getDef(d);
4230 int minGPR = def->reg.data.id;
4231 int maxGPR = minGPR + def->reg.size / 4 - 1;
4232
4233 if (def->reg.file != val->reg.file)
4234 continue;
4235
4236 if (def->reg.file == FILE_GPR) {
4237 if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4238 val->reg.data.id > maxGPR)
4239 continue;
4240 return true;
4241 } else
4242 if (def->reg.file == FILE_PREDICATE) {
4243 if (val->reg.data.id != minGPR)
4244 continue;
4245 return true;
4246 } else
4247 if (def->reg.file == FILE_FLAGS) {
4248 if (val->reg.data.id != minGPR)
4249 continue;
4250 return true;
4251 }
4252 }
4253
4254 return false;
4255 }
4256
4257 // Find the next instruction inside the same basic block which uses (reads or
4258 // writes from) the output of the given instruction in order to avoid RaW and
4259 // WaW hazards.
4260 Instruction *
4261 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4262 {
4263 Instruction *insn, *next;
4264
4265 if (!bari->defExists(0))
4266 return NULL;
4267
4268 for (insn = bari->next; insn != NULL; insn = next) {
4269 next = insn->next;
4270
4271 for (int s = 0; insn->srcExists(s); ++s)
4272 if (doesInsnWriteTo(bari, insn->getSrc(s)))
4273 return insn;
4274
4275 for (int d = 0; insn->defExists(d); ++d)
4276 if (doesInsnWriteTo(bari, insn->getDef(d)))
4277 return insn;
4278 }
4279 return NULL;
4280 }
4281
4282 // Find the next instruction inside the same basic block which overwrites, at
4283 // least, one source of the given instruction in order to avoid WaR hazards.
4284 Instruction *
4285 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4286 {
4287 Instruction *insn, *next;
4288
4289 if (!bari->srcExists(0))
4290 return NULL;
4291
4292 for (insn = bari->next; insn != NULL; insn = next) {
4293 next = insn->next;
4294
4295 for (int s = 0; bari->srcExists(s); ++s)
4296 if (doesInsnWriteTo(insn, bari->getSrc(s)))
4297 return insn;
4298 }
4299 return NULL;
4300 }
4301
4302 // Dependency barriers:
4303 // This pass is a bit ugly and could probably be improved by performing a
4304 // better allocation.
4305 //
4306 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4307 // dependency barriers using the control codes.
4308 bool
4309 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4310 {
4311 std::list<LiveBarUse> live_uses;
4312 std::list<LiveBarDef> live_defs;
4313 Instruction *insn, *next;
4314 BitSet bars(6, 1);
4315 int bar_id;
4316
4317 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4318 Instruction *usei = NULL, *defi = NULL;
4319 bool need_wr_bar, need_rd_bar;
4320
4321 next = insn->next;
4322
4323 // Expire old barrier uses.
4324 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4325 it != live_uses.end();) {
4326 if (insn->serial >= it->usei->serial) {
4327 int wr = getWrDepBar(it->insn);
4328 emitWtDepBar(insn, wr);
4329 bars.clr(wr); // free barrier
4330 it = live_uses.erase(it);
4331 continue;
4332 }
4333 ++it;
4334 }
4335
4336 // Expire old barrier defs.
4337 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4338 it != live_defs.end();) {
4339 if (insn->serial >= it->defi->serial) {
4340 int rd = getRdDepBar(it->insn);
4341 emitWtDepBar(insn, rd);
4342 bars.clr(rd); // free barrier
4343 it = live_defs.erase(it);
4344 continue;
4345 }
4346 ++it;
4347 }
4348
4349 need_wr_bar = needWrDepBar(insn);
4350 need_rd_bar = needRdDepBar(insn);
4351
4352 if (need_wr_bar) {
4353 // When the instruction requires to emit a write dependency barrier
4354 // (all which write something at a variable latency), find the next
4355 // instruction which reads the outputs (or writes to them, potentially
4356 // completing before this insn.
4357 usei = findFirstUse(insn);
4358
4359 // Allocate and emit a new barrier.
4360 bar_id = bars.findFreeRange(1);
4361 if (bar_id == -1)
4362 bar_id = 5;
4363 bars.set(bar_id);
4364 emitWrDepBar(insn, bar_id);
4365 if (usei)
4366 live_uses.push_back(LiveBarUse(insn, usei));
4367 }
4368
4369 if (need_rd_bar) {
4370 // When the instruction requires to emit a read dependency barrier
4371 // (all which read something at a variable latency), find the next
4372 // instruction which will write the inputs.
4373 defi = findFirstDef(insn);
4374
4375 if (usei && defi && usei->serial <= defi->serial)
4376 continue;
4377
4378 // Allocate and emit a new barrier.
4379 bar_id = bars.findFreeRange(1);
4380 if (bar_id == -1)
4381 bar_id = 5;
4382 bars.set(bar_id);
4383 emitRdDepBar(insn, bar_id);
4384 if (defi)
4385 live_defs.push_back(LiveBarDef(insn, defi));
4386 }
4387 }
4388
4389 // Remove unnecessary barrier waits.
4390 BitSet alive_bars(6, 1);
4391 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4392 int wr, rd, wt;
4393
4394 next = insn->next;
4395
4396 wr = getWrDepBar(insn);
4397 rd = getRdDepBar(insn);
4398 wt = getWtDepBar(insn);
4399
4400 for (int idx = 0; idx < 6; ++idx) {
4401 if (!(wt & (1 << idx)))
4402 continue;
4403 if (!alive_bars.test(idx)) {
4404 insn->sched &= ~(1 << (11 + idx));
4405 } else {
4406 alive_bars.clr(idx);
4407 }
4408 }
4409
4410 if (wr < 6)
4411 alive_bars.set(wr);
4412 if (rd < 6)
4413 alive_bars.set(rd);
4414 }
4415
4416 return true;
4417 }
4418
4419 bool
4420 SchedDataCalculatorGM107::visit(Function *func)
4421 {
4422 ArrayList insns;
4423
4424 func->orderInstructions(insns);
4425
4426 scoreBoards.resize(func->cfg.getSize());
4427 for (size_t i = 0; i < scoreBoards.size(); ++i)
4428 scoreBoards[i].wipe();
4429 return true;
4430 }
4431
4432 bool
4433 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4434 {
4435 Instruction *insn, *next = NULL;
4436 int cycle = 0;
4437
4438 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4439 /*XXX*/
4440 insn->sched = 0x7e0;
4441 }
4442
4443 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4444 return true;
4445
4446 // Insert read/write dependency barriers for instructions which don't
4447 // operate at a fixed latency.
4448 insertBarriers(bb);
4449
4450 score = &scoreBoards.at(bb->getId());
4451
4452 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4453 // back branches will wait until all target dependencies are satisfied
4454 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4455 continue;
4456 BasicBlock *in = BasicBlock::get(ei.getNode());
4457 score->setMax(&scoreBoards.at(in->getId()));
4458 }
4459
4460 #ifdef GM107_DEBUG_SCHED_DATA
4461 INFO("=== BB:%i initial scores\n", bb->getId());
4462 score->print(cycle);
4463 #endif
4464
4465 // Because barriers are allocated locally (intra-BB), we have to make sure
4466 // that all produced barriers have been consumed before entering inside a
4467 // new basic block. The best way is to do a global allocation pre RA but
4468 // it's really more difficult, especially because of the phi nodes. Anyways,
4469 // it seems like that waiting on a barrier which has already been consumed
4470 // doesn't add any additional cost, it's just not elegant!
4471 Instruction *start = bb->getEntry();
4472 if (start && bb->cfg.incidentCount() > 0) {
4473 for (int b = 0; b < 6; b++)
4474 emitWtDepBar(start, b);
4475 }
4476
4477 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4478 next = insn->next;
4479
4480 commitInsn(insn, cycle);
4481 int delay = calcDelay(next, cycle);
4482 setDelay(insn, delay, next);
4483 cycle += getStall(insn);
4484
4485 setReuseFlag(insn);
4486
4487 // XXX: The yield flag seems to destroy a bunch of things when it is
4488 // set on every instruction, need investigation.
4489 //emitYield(insn);
4490
4491 #ifdef GM107_DEBUG_SCHED_DATA
4492 printSchedInfo(cycle, insn);
4493 insn->print();
4494 next->print();
4495 #endif
4496 }
4497
4498 if (!insn)
4499 return true;
4500 commitInsn(insn, cycle);
4501
4502 int bbDelay = -1;
4503
4504 #ifdef GM107_DEBUG_SCHED_DATA
4505 fprintf(stderr, "last instruction is : ");
4506 insn->print();
4507 fprintf(stderr, "cycle=%d\n", cycle);
4508 #endif
4509
4510 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4511 BasicBlock *out = BasicBlock::get(ei.getNode());
4512
4513 if (ei.getType() != Graph::Edge::BACK) {
4514 // Only test the first instruction of the outgoing block.
4515 next = out->getEntry();
4516 if (next) {
4517 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4518 } else {
4519 // When the outgoing BB is empty, make sure to set the number of
4520 // stall counts needed by the instruction because we don't know the
4521 // next instruction.
4522 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4523 }
4524 } else {
4525 // Wait until all dependencies are satisfied.
4526 const int regsFree = score->getLatest();
4527 next = out->getFirst();
4528 for (int c = cycle; next && c < regsFree; next = next->next) {
4529 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4530 c += getStall(next);
4531 }
4532 next = NULL;
4533 }
4534 }
4535 if (bb->cfg.outgoingCount() != 1)
4536 next = NULL;
4537 setDelay(insn, bbDelay, next);
4538 cycle += getStall(insn);
4539
4540 score->rebase(cycle); // common base for initializing out blocks' scores
4541 return true;
4542 }
4543
4544 /*******************************************************************************
4545 * main
4546 ******************************************************************************/
4547
4548 void
4549 CodeEmitterGM107::prepareEmission(Function *func)
4550 {
4551 SchedDataCalculatorGM107 sched(targGM107);
4552 CodeEmitter::prepareEmission(func);
4553 sched.run(func, true, true);
4554 }
4555
4556 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4557 {
4558 return (size + 23) / 24;
4559 }
4560
4561 void
4562 CodeEmitterGM107::prepareEmission(Program *prog)
4563 {
4564 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4565 !fi.end(); fi.next()) {
4566 Function *func = reinterpret_cast<Function *>(fi.get());
4567 func->binPos = prog->binSize;
4568 prepareEmission(func);
4569
4570 // adjust sizes & positions for schedulding info:
4571 if (prog->getTarget()->hasSWSched) {
4572 uint32_t adjPos = func->binPos;
4573 BasicBlock *bb = NULL;
4574 for (int i = 0; i < func->bbCount; ++i) {
4575 bb = func->bbArray[i];
4576 int32_t adjSize = bb->binSize;
4577 if (adjPos % 32) {
4578 adjSize -= 32 - adjPos % 32;
4579 if (adjSize < 0)
4580 adjSize = 0;
4581 }
4582 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4583 bb->binPos = adjPos;
4584 bb->binSize = adjSize;
4585 adjPos += adjSize;
4586 }
4587 if (bb)
4588 func->binSize = adjPos - func->binPos;
4589 }
4590
4591 prog->binSize += func->binSize;
4592 }
4593 }
4594
4595 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4596 : CodeEmitter(target),
4597 targGM107(target),
4598 writeIssueDelays(target->hasSWSched)
4599 {
4600 code = NULL;
4601 codeSize = codeSizeLimit = 0;
4602 relocInfo = NULL;
4603 }
4604
4605 CodeEmitter *
4606 TargetGM107::createCodeEmitterGM107(Program::Type type)
4607 {
4608 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4609 emit->setProgramType(type);
4610 return emit;
4611 }
4612
4613 } // namespace nv50_ir