ae07d9672217b7600f0d01678717fbcd537c5f87
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26
27 extern "C" {
28 #include "nouveau_debug.h"
29 #include "nv50/nv50_program.h"
30 }
31
32 namespace nv50_ir {
33
34 Modifier::Modifier(operation op)
35 {
36 switch (op) {
37 case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38 case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39 case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40 case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41 default:
42 bits = 0;
43 break;
44 }
45 }
46
47 Modifier Modifier::operator*(const Modifier m) const
48 {
49 unsigned int a, b, c;
50
51 b = m.bits;
52 if (this->bits & NV50_IR_MOD_ABS)
53 b &= ~NV50_IR_MOD_NEG;
54
55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57
58 return Modifier(a | c);
59 }
60
61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62 {
63 indirect[0] = -1;
64 indirect[1] = -1;
65 usedAsPtr = false;
66 set(v);
67 }
68
69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70 {
71 set(ref);
72 usedAsPtr = ref.usedAsPtr;
73 }
74
75 ValueRef::~ValueRef()
76 {
77 this->set(NULL);
78 }
79
80 bool ValueRef::getImmediate(ImmediateValue &imm) const
81 {
82 const ValueRef *src = this;
83 Modifier m;
84 DataType type = src->insn->sType;
85
86 while (src) {
87 if (src->mod) {
88 if (src->insn->sType != type)
89 break;
90 m *= src->mod;
91 }
92 if (src->getFile() == FILE_IMMEDIATE) {
93 imm = *(src->value->asImm());
94 // The immediate's type isn't required to match its use, it's
95 // more of a hint; applying a modifier makes use of that hint.
96 imm.reg.type = type;
97 m.applyTo(imm);
98 return true;
99 }
100
101 Instruction *insn = src->value->getUniqueInsn();
102
103 if (insn && insn->op == OP_MOV) {
104 src = &insn->src(0);
105 if (src->mod)
106 WARN("OP_MOV with modifier encountered !\n");
107 } else {
108 src = NULL;
109 }
110 }
111 return false;
112 }
113
114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
115 {
116 set(v);
117 }
118
119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
120 {
121 set(def.get());
122 }
123
124 ValueDef::~ValueDef()
125 {
126 this->set(NULL);
127 }
128
129 void
130 ValueRef::set(const ValueRef &ref)
131 {
132 this->set(ref.get());
133 mod = ref.mod;
134 indirect[0] = ref.indirect[0];
135 indirect[1] = ref.indirect[1];
136 }
137
138 void
139 ValueRef::set(Value *refVal)
140 {
141 if (value == refVal)
142 return;
143 if (value)
144 value->uses.erase(this);
145 if (refVal)
146 refVal->uses.insert(this);
147
148 value = refVal;
149 }
150
151 void
152 ValueDef::set(Value *defVal)
153 {
154 if (value == defVal)
155 return;
156 if (value)
157 value->defs.remove(this);
158 if (defVal)
159 defVal->defs.push_back(this);
160
161 value = defVal;
162 }
163
164 // Check if we can replace this definition's value by the value in @rep,
165 // including the source modifiers, i.e. make sure that all uses support
166 // @rep.mod.
167 bool
168 ValueDef::mayReplace(const ValueRef &rep)
169 {
170 if (!rep.mod)
171 return true;
172
173 if (!insn || !insn->bb) // Unbound instruction ?
174 return false;
175
176 const Target *target = insn->bb->getProgram()->getTarget();
177
178 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
179 ++it) {
180 Instruction *insn = (*it)->getInsn();
181 int s = -1;
182
183 for (int i = 0; insn->srcExists(i); ++i) {
184 if (insn->src(i).get() == value) {
185 // If there are multiple references to us we'd have to check if the
186 // combination of mods is still supported, but just bail for now.
187 if (&insn->src(i) != (*it))
188 return false;
189 s = i;
190 }
191 }
192 assert(s >= 0); // integrity of uses list
193
194 if (!target->isModSupported(insn, s, rep.mod))
195 return false;
196 }
197 return true;
198 }
199
200 void
201 ValueDef::replace(const ValueRef &repVal, bool doSet)
202 {
203 assert(mayReplace(repVal));
204
205 if (value == repVal.get())
206 return;
207
208 while (!value->uses.empty()) {
209 ValueRef *ref = *value->uses.begin();
210 ref->set(repVal.get());
211 ref->mod *= repVal.mod;
212 }
213
214 if (doSet)
215 set(repVal.get());
216 }
217
218 Value::Value()
219 {
220 join = this;
221 memset(&reg, 0, sizeof(reg));
222 reg.size = 4;
223 }
224
225 LValue::LValue(Function *fn, DataFile file)
226 {
227 reg.file = file;
228 reg.size = (file != FILE_PREDICATE) ? 4 : 1;
229 reg.data.id = -1;
230
231 compMask = 0;
232 compound = 0;
233 ssa = 0;
234 fixedReg = 0;
235 noSpill = 0;
236
237 fn->add(this, this->id);
238 }
239
240 LValue::LValue(Function *fn, LValue *lval)
241 {
242 assert(lval);
243
244 reg.file = lval->reg.file;
245 reg.size = lval->reg.size;
246 reg.data.id = -1;
247
248 compMask = 0;
249 compound = 0;
250 ssa = 0;
251 fixedReg = 0;
252 noSpill = 0;
253
254 fn->add(this, this->id);
255 }
256
257 LValue *
258 LValue::clone(ClonePolicy<Function>& pol) const
259 {
260 LValue *that = new_LValue(pol.context(), reg.file);
261
262 pol.set<Value>(this, that);
263
264 that->reg.size = this->reg.size;
265 that->reg.type = this->reg.type;
266 that->reg.data = this->reg.data;
267
268 return that;
269 }
270
271 bool
272 LValue::isUniform() const
273 {
274 if (defs.size() > 1)
275 return false;
276 Instruction *insn = getInsn();
277 if (!insn)
278 return false;
279 // let's not try too hard here for now ...
280 return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
281 }
282
283 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
284 {
285 baseSym = NULL;
286
287 reg.file = f;
288 reg.fileIndex = fidx;
289 reg.data.offset = 0;
290
291 prog->add(this, this->id);
292 }
293
294 Symbol *
295 Symbol::clone(ClonePolicy<Function>& pol) const
296 {
297 Program *prog = pol.context()->getProgram();
298
299 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
300
301 pol.set<Value>(this, that);
302
303 that->reg.size = this->reg.size;
304 that->reg.type = this->reg.type;
305 that->reg.data = this->reg.data;
306
307 that->baseSym = this->baseSym;
308
309 return that;
310 }
311
312 bool
313 Symbol::isUniform() const
314 {
315 return
316 reg.file != FILE_SYSTEM_VALUE &&
317 reg.file != FILE_MEMORY_LOCAL &&
318 reg.file != FILE_SHADER_INPUT;
319 }
320
321 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
322 {
323 memset(&reg, 0, sizeof(reg));
324
325 reg.file = FILE_IMMEDIATE;
326 reg.size = 4;
327 reg.type = TYPE_U32;
328
329 reg.data.u32 = uval;
330
331 prog->add(this, this->id);
332 }
333
334 ImmediateValue::ImmediateValue(Program *prog, float fval)
335 {
336 memset(&reg, 0, sizeof(reg));
337
338 reg.file = FILE_IMMEDIATE;
339 reg.size = 4;
340 reg.type = TYPE_F32;
341
342 reg.data.f32 = fval;
343
344 prog->add(this, this->id);
345 }
346
347 ImmediateValue::ImmediateValue(Program *prog, double dval)
348 {
349 memset(&reg, 0, sizeof(reg));
350
351 reg.file = FILE_IMMEDIATE;
352 reg.size = 8;
353 reg.type = TYPE_F64;
354
355 reg.data.f64 = dval;
356
357 prog->add(this, this->id);
358 }
359
360 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
361 {
362 reg = proto->reg;
363
364 reg.type = ty;
365 reg.size = typeSizeof(ty);
366 }
367
368 ImmediateValue *
369 ImmediateValue::clone(ClonePolicy<Function>& pol) const
370 {
371 Program *prog = pol.context()->getProgram();
372 ImmediateValue *that = new_ImmediateValue(prog, 0u);
373
374 pol.set<Value>(this, that);
375
376 that->reg.size = this->reg.size;
377 that->reg.type = this->reg.type;
378 that->reg.data = this->reg.data;
379
380 return that;
381 }
382
383 bool
384 ImmediateValue::isInteger(const int i) const
385 {
386 switch (reg.type) {
387 case TYPE_S8:
388 return reg.data.s8 == i;
389 case TYPE_U8:
390 return reg.data.u8 == i;
391 case TYPE_S16:
392 return reg.data.s16 == i;
393 case TYPE_U16:
394 return reg.data.u16 == i;
395 case TYPE_S32:
396 case TYPE_U32:
397 return reg.data.s32 == i; // as if ...
398 case TYPE_S64:
399 case TYPE_U64:
400 return reg.data.s64 == i; // as if ...
401 case TYPE_F32:
402 return reg.data.f32 == static_cast<float>(i);
403 case TYPE_F64:
404 return reg.data.f64 == static_cast<double>(i);
405 default:
406 return false;
407 }
408 }
409
410 bool
411 ImmediateValue::isNegative() const
412 {
413 switch (reg.type) {
414 case TYPE_S8: return reg.data.s8 < 0;
415 case TYPE_S16: return reg.data.s16 < 0;
416 case TYPE_S32:
417 case TYPE_U32: return reg.data.s32 < 0;
418 case TYPE_F32: return reg.data.u32 & (1 << 31);
419 case TYPE_F64: return reg.data.u64 & (1ULL << 63);
420 default:
421 return false;
422 }
423 }
424
425 bool
426 ImmediateValue::isPow2() const
427 {
428 if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
429 return util_is_power_of_two_or_zero64(reg.data.u64);
430 else
431 return util_is_power_of_two_or_zero(reg.data.u32);
432 }
433
434 void
435 ImmediateValue::applyLog2()
436 {
437 switch (reg.type) {
438 case TYPE_S8:
439 case TYPE_S16:
440 case TYPE_S32:
441 assert(!this->isNegative());
442 // fall through
443 case TYPE_U8:
444 case TYPE_U16:
445 case TYPE_U32:
446 reg.data.u32 = util_logbase2(reg.data.u32);
447 break;
448 case TYPE_S64:
449 assert(!this->isNegative());
450 // fall through
451 case TYPE_U64:
452 reg.data.u64 = util_logbase2_64(reg.data.u64);
453 break;
454 case TYPE_F32:
455 reg.data.f32 = log2f(reg.data.f32);
456 break;
457 case TYPE_F64:
458 reg.data.f64 = log2(reg.data.f64);
459 break;
460 default:
461 assert(0);
462 break;
463 }
464 }
465
466 bool
467 ImmediateValue::compare(CondCode cc, float fval) const
468 {
469 if (reg.type != TYPE_F32)
470 ERROR("immediate value is not of type f32");
471
472 switch (static_cast<CondCode>(cc & 7)) {
473 case CC_TR: return true;
474 case CC_FL: return false;
475 case CC_LT: return reg.data.f32 < fval;
476 case CC_LE: return reg.data.f32 <= fval;
477 case CC_GT: return reg.data.f32 > fval;
478 case CC_GE: return reg.data.f32 >= fval;
479 case CC_EQ: return reg.data.f32 == fval;
480 case CC_NE: return reg.data.f32 != fval;
481 default:
482 assert(0);
483 return false;
484 }
485 }
486
487 ImmediateValue&
488 ImmediateValue::operator=(const ImmediateValue &that)
489 {
490 this->reg = that.reg;
491 return (*this);
492 }
493
494 bool
495 Value::interfers(const Value *that) const
496 {
497 uint32_t idA, idB;
498
499 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
500 return false;
501 if (this->asImm())
502 return false;
503
504 if (this->asSym()) {
505 idA = this->join->reg.data.offset;
506 idB = that->join->reg.data.offset;
507 } else {
508 idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
509 idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
510 }
511
512 if (idA < idB)
513 return (idA + this->reg.size > idB);
514 else
515 if (idA > idB)
516 return (idB + that->reg.size > idA);
517 else
518 return (idA == idB);
519 }
520
521 bool
522 Value::equals(const Value *that, bool strict) const
523 {
524 if (strict)
525 return this == that;
526
527 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
528 return false;
529 if (that->reg.size != this->reg.size)
530 return false;
531
532 if (that->reg.data.id != this->reg.data.id)
533 return false;
534
535 return true;
536 }
537
538 bool
539 ImmediateValue::equals(const Value *that, bool strict) const
540 {
541 const ImmediateValue *imm = that->asImm();
542 if (!imm)
543 return false;
544 return reg.data.u64 == imm->reg.data.u64;
545 }
546
547 bool
548 Symbol::equals(const Value *that, bool strict) const
549 {
550 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
551 return false;
552 assert(that->asSym());
553
554 if (this->baseSym != that->asSym()->baseSym)
555 return false;
556
557 if (reg.file == FILE_SYSTEM_VALUE)
558 return (this->reg.data.sv.sv == that->reg.data.sv.sv &&
559 this->reg.data.sv.index == that->reg.data.sv.index);
560 return this->reg.data.offset == that->reg.data.offset;
561 }
562
563 void Instruction::init()
564 {
565 next = prev = 0;
566
567 cc = CC_ALWAYS;
568 rnd = ROUND_N;
569 cache = CACHE_CA;
570 subOp = 0;
571
572 saturate = 0;
573 join = 0;
574 exit = 0;
575 terminator = 0;
576 ftz = 0;
577 dnz = 0;
578 perPatch = 0;
579 fixed = 0;
580 encSize = 0;
581 ipa = 0;
582 mask = 0;
583 precise = 0;
584
585 lanes = 0xf;
586
587 postFactor = 0;
588
589 predSrc = -1;
590 flagsDef = -1;
591 flagsSrc = -1;
592 }
593
594 Instruction::Instruction()
595 {
596 init();
597
598 op = OP_NOP;
599 dType = sType = TYPE_F32;
600
601 id = -1;
602 bb = 0;
603 }
604
605 Instruction::Instruction(Function *fn, operation opr, DataType ty)
606 {
607 init();
608
609 op = opr;
610 dType = sType = ty;
611
612 fn->add(this, id);
613 }
614
615 Instruction::~Instruction()
616 {
617 if (bb) {
618 Function *fn = bb->getFunction();
619 bb->remove(this);
620 fn->allInsns.remove(id);
621 }
622
623 for (int s = 0; srcExists(s); ++s)
624 setSrc(s, NULL);
625 // must unlink defs too since the list pointers will get deallocated
626 for (int d = 0; defExists(d); ++d)
627 setDef(d, NULL);
628 }
629
630 void
631 Instruction::setDef(int i, Value *val)
632 {
633 int size = defs.size();
634 if (i >= size) {
635 defs.resize(i + 1);
636 while (size <= i)
637 defs[size++].setInsn(this);
638 }
639 defs[i].set(val);
640 }
641
642 void
643 Instruction::setSrc(int s, Value *val)
644 {
645 int size = srcs.size();
646 if (s >= size) {
647 srcs.resize(s + 1);
648 while (size <= s)
649 srcs[size++].setInsn(this);
650 }
651 srcs[s].set(val);
652 }
653
654 void
655 Instruction::setSrc(int s, const ValueRef& ref)
656 {
657 setSrc(s, ref.get());
658 srcs[s].mod = ref.mod;
659 }
660
661 void
662 Instruction::swapSources(int a, int b)
663 {
664 Value *value = srcs[a].get();
665 Modifier m = srcs[a].mod;
666
667 setSrc(a, srcs[b]);
668
669 srcs[b].set(value);
670 srcs[b].mod = m;
671 }
672
673 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
674 {
675 if (index >= s)
676 index += delta;
677 else
678 if ((delta < 0) && (index >= (s + delta)))
679 index = -1;
680 }
681
682 // Moves sources [@s,last_source] by @delta.
683 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
684 void
685 Instruction::moveSources(const int s, const int delta)
686 {
687 if (delta == 0)
688 return;
689 assert(s + delta >= 0);
690
691 int k;
692
693 for (k = 0; srcExists(k); ++k) {
694 for (int i = 0; i < 2; ++i)
695 moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
696 }
697 moveSourcesAdjustIndex(predSrc, s, delta);
698 moveSourcesAdjustIndex(flagsSrc, s, delta);
699 if (asTex()) {
700 TexInstruction *tex = asTex();
701 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
702 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
703 }
704
705 if (delta > 0) {
706 --k;
707 for (int p = k + delta; k >= s; --k, --p)
708 setSrc(p, src(k));
709 } else {
710 int p;
711 for (p = s; p < k; ++p)
712 setSrc(p + delta, src(p));
713 for (; (p + delta) < k; ++p)
714 setSrc(p + delta, NULL);
715 }
716 }
717
718 void
719 Instruction::takeExtraSources(int s, Value *values[3])
720 {
721 values[0] = getIndirect(s, 0);
722 if (values[0])
723 setIndirect(s, 0, NULL);
724
725 values[1] = getIndirect(s, 1);
726 if (values[1])
727 setIndirect(s, 1, NULL);
728
729 values[2] = getPredicate();
730 if (values[2])
731 setPredicate(cc, NULL);
732 }
733
734 void
735 Instruction::putExtraSources(int s, Value *values[3])
736 {
737 if (values[0])
738 setIndirect(s, 0, values[0]);
739 if (values[1])
740 setIndirect(s, 1, values[1]);
741 if (values[2])
742 setPredicate(cc, values[2]);
743 }
744
745 Instruction *
746 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
747 {
748 if (!i)
749 i = new_Instruction(pol.context(), op, dType);
750 #ifndef NDEBUG // non-conformant assert, so this is required
751 assert(typeid(*i) == typeid(*this));
752 #endif
753
754 pol.set<Instruction>(this, i);
755
756 i->sType = sType;
757
758 i->rnd = rnd;
759 i->cache = cache;
760 i->subOp = subOp;
761
762 i->saturate = saturate;
763 i->join = join;
764 i->exit = exit;
765 i->mask = mask;
766 i->ftz = ftz;
767 i->dnz = dnz;
768 i->ipa = ipa;
769 i->lanes = lanes;
770 i->perPatch = perPatch;
771
772 i->postFactor = postFactor;
773
774 for (int d = 0; defExists(d); ++d)
775 i->setDef(d, pol.get(getDef(d)));
776
777 for (int s = 0; srcExists(s); ++s) {
778 i->setSrc(s, pol.get(getSrc(s)));
779 i->src(s).mod = src(s).mod;
780 }
781
782 i->cc = cc;
783 i->predSrc = predSrc;
784 i->flagsDef = flagsDef;
785 i->flagsSrc = flagsSrc;
786
787 return i;
788 }
789
790 unsigned int
791 Instruction::defCount(unsigned int mask, bool singleFile) const
792 {
793 unsigned int i, n;
794
795 if (singleFile) {
796 unsigned int d = ffs(mask);
797 if (!d)
798 return 0;
799 for (i = d--; defExists(i); ++i)
800 if (getDef(i)->reg.file != getDef(d)->reg.file)
801 mask &= ~(1 << i);
802 }
803
804 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
805 n += mask & 1;
806 return n;
807 }
808
809 unsigned int
810 Instruction::srcCount(unsigned int mask, bool singleFile) const
811 {
812 unsigned int i, n;
813
814 if (singleFile) {
815 unsigned int s = ffs(mask);
816 if (!s)
817 return 0;
818 for (i = s--; srcExists(i); ++i)
819 if (getSrc(i)->reg.file != getSrc(s)->reg.file)
820 mask &= ~(1 << i);
821 }
822
823 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
824 n += mask & 1;
825 return n;
826 }
827
828 bool
829 Instruction::setIndirect(int s, int dim, Value *value)
830 {
831 assert(this->srcExists(s));
832
833 int p = srcs[s].indirect[dim];
834 if (p < 0) {
835 if (!value)
836 return true;
837 p = srcs.size();
838 while (p > 0 && !srcExists(p - 1))
839 --p;
840 }
841 setSrc(p, value);
842 srcs[p].usedAsPtr = (value != 0);
843 srcs[s].indirect[dim] = value ? p : -1;
844 return true;
845 }
846
847 bool
848 Instruction::setPredicate(CondCode ccode, Value *value)
849 {
850 cc = ccode;
851
852 if (!value) {
853 if (predSrc >= 0) {
854 srcs[predSrc].set(NULL);
855 predSrc = -1;
856 }
857 return true;
858 }
859
860 if (predSrc < 0) {
861 predSrc = srcs.size();
862 while (predSrc > 0 && !srcExists(predSrc - 1))
863 --predSrc;
864 }
865
866 setSrc(predSrc, value);
867 return true;
868 }
869
870 bool
871 Instruction::writesPredicate() const
872 {
873 for (int d = 0; defExists(d); ++d)
874 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
875 return true;
876 return false;
877 }
878
879 bool
880 Instruction::canCommuteDefSrc(const Instruction *i) const
881 {
882 for (int d = 0; defExists(d); ++d)
883 for (int s = 0; i->srcExists(s); ++s)
884 if (getDef(d)->interfers(i->getSrc(s)))
885 return false;
886 return true;
887 }
888
889 bool
890 Instruction::canCommuteDefDef(const Instruction *i) const
891 {
892 for (int d = 0; defExists(d); ++d)
893 for (int c = 0; i->defExists(c); ++c)
894 if (getDef(d)->interfers(i->getDef(c)))
895 return false;
896 return true;
897 }
898
899 bool
900 Instruction::isCommutationLegal(const Instruction *i) const
901 {
902 return canCommuteDefDef(i) &&
903 canCommuteDefSrc(i) &&
904 i->canCommuteDefSrc(this);
905 }
906
907 TexInstruction::TexInstruction(Function *fn, operation op)
908 : Instruction(fn, op, TYPE_F32)
909 {
910 memset(&tex, 0, sizeof(tex));
911
912 tex.rIndirectSrc = -1;
913 tex.sIndirectSrc = -1;
914
915 if (op == OP_TXF)
916 sType = TYPE_U32;
917 }
918
919 TexInstruction::~TexInstruction()
920 {
921 for (int c = 0; c < 3; ++c) {
922 dPdx[c].set(NULL);
923 dPdy[c].set(NULL);
924 }
925 for (int n = 0; n < 4; ++n)
926 for (int c = 0; c < 3; ++c)
927 offset[n][c].set(NULL);
928 }
929
930 TexInstruction *
931 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
932 {
933 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
934 new_TexInstruction(pol.context(), op));
935
936 Instruction::clone(pol, tex);
937
938 tex->tex = this->tex;
939
940 if (op == OP_TXD) {
941 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
942 tex->dPdx[c].set(dPdx[c]);
943 tex->dPdy[c].set(dPdy[c]);
944 }
945 }
946
947 for (int n = 0; n < tex->tex.useOffsets; ++n)
948 for (int c = 0; c < 3; ++c)
949 tex->offset[n][c].set(offset[n][c]);
950
951 return tex;
952 }
953
954 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
955 {
956 { "1D", 1, 1, false, false, false },
957 { "2D", 2, 2, false, false, false },
958 { "2D_MS", 2, 3, false, false, false },
959 { "3D", 3, 3, false, false, false },
960 { "CUBE", 2, 3, false, true, false },
961 { "1D_SHADOW", 1, 1, false, false, true },
962 { "2D_SHADOW", 2, 2, false, false, true },
963 { "CUBE_SHADOW", 2, 3, false, true, true },
964 { "1D_ARRAY", 1, 2, true, false, false },
965 { "2D_ARRAY", 2, 3, true, false, false },
966 { "2D_MS_ARRAY", 2, 4, true, false, false },
967 { "CUBE_ARRAY", 2, 4, true, true, false },
968 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
969 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
970 { "RECT", 2, 2, false, false, false },
971 { "RECT_SHADOW", 2, 2, false, false, true },
972 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
973 { "BUFFER", 1, 1, false, false, false },
974 };
975
976 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
977 {
978 { "NONE", 0, { 0, 0, 0, 0 }, UINT },
979
980 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT },
981 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT },
982 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT },
983 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT },
984 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT },
985 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT },
986 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT },
987
988 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT },
989 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT },
990 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT },
991 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT },
992 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT },
993 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT },
994 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT },
995 { "R32UI", 1, { 32, 0, 0, 0 }, UINT },
996 { "R16UI", 1, { 16, 0, 0, 0 }, UINT },
997 { "R8UI", 1, { 8, 0, 0, 0 }, UINT },
998
999 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT },
1000 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT },
1001 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT },
1002 { "RG32I", 2, { 32, 32, 0, 0 }, SINT },
1003 { "RG16I", 2, { 16, 16, 0, 0 }, SINT },
1004 { "RG8I", 2, { 8, 8, 0, 0 }, SINT },
1005 { "R32I", 1, { 32, 0, 0, 0 }, SINT },
1006 { "R16I", 1, { 16, 0, 0, 0 }, SINT },
1007 { "R8I", 1, { 8, 0, 0, 0 }, SINT },
1008
1009 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM },
1010 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM },
1011 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM },
1012 { "RG16", 2, { 16, 16, 0, 0 }, UNORM },
1013 { "RG8", 2, { 8, 8, 0, 0 }, UNORM },
1014 { "R16", 1, { 16, 0, 0, 0 }, UNORM },
1015 { "R8", 1, { 8, 0, 0, 0 }, UNORM },
1016
1017 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1018 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM },
1019 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM },
1020 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM },
1021 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM },
1022 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM },
1023
1024 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true },
1025 };
1026
1027 void
1028 TexInstruction::setIndirectR(Value *v)
1029 {
1030 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1031 if (p >= 0) {
1032 tex.rIndirectSrc = p;
1033 setSrc(p, v);
1034 srcs[p].usedAsPtr = !!v;
1035 }
1036 }
1037
1038 void
1039 TexInstruction::setIndirectS(Value *v)
1040 {
1041 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1042 if (p >= 0) {
1043 tex.sIndirectSrc = p;
1044 setSrc(p, v);
1045 srcs[p].usedAsPtr = !!v;
1046 }
1047 }
1048
1049 CmpInstruction::CmpInstruction(Function *fn, operation op)
1050 : Instruction(fn, op, TYPE_F32)
1051 {
1052 setCond = CC_ALWAYS;
1053 }
1054
1055 CmpInstruction *
1056 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1057 {
1058 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1059 new_CmpInstruction(pol.context(), op));
1060 cmp->dType = dType;
1061 Instruction::clone(pol, cmp);
1062 cmp->setCond = setCond;
1063 return cmp;
1064 }
1065
1066 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1067 : Instruction(fn, op, TYPE_NONE)
1068 {
1069 if (op == OP_CALL)
1070 target.fn = reinterpret_cast<Function *>(targ);
1071 else
1072 target.bb = reinterpret_cast<BasicBlock *>(targ);
1073
1074 if (op == OP_BRA ||
1075 op == OP_CONT || op == OP_BREAK ||
1076 op == OP_RET || op == OP_EXIT)
1077 terminator = 1;
1078 else
1079 if (op == OP_JOIN)
1080 terminator = targ ? 1 : 0;
1081
1082 allWarp = absolute = limit = builtin = indirect = 0;
1083 }
1084
1085 FlowInstruction *
1086 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1087 {
1088 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1089 new_FlowInstruction(pol.context(), op, NULL));
1090
1091 Instruction::clone(pol, flow);
1092 flow->allWarp = allWarp;
1093 flow->absolute = absolute;
1094 flow->limit = limit;
1095 flow->builtin = builtin;
1096
1097 if (builtin)
1098 flow->target.builtin = target.builtin;
1099 else
1100 if (op == OP_CALL)
1101 flow->target.fn = target.fn;
1102 else
1103 if (target.bb)
1104 flow->target.bb = pol.get<BasicBlock>(target.bb);
1105
1106 return flow;
1107 }
1108
1109 Program::Program(Type type, Target *arch)
1110 : progType(type),
1111 target(arch),
1112 mem_Instruction(sizeof(Instruction), 6),
1113 mem_CmpInstruction(sizeof(CmpInstruction), 4),
1114 mem_TexInstruction(sizeof(TexInstruction), 4),
1115 mem_FlowInstruction(sizeof(FlowInstruction), 4),
1116 mem_LValue(sizeof(LValue), 8),
1117 mem_Symbol(sizeof(Symbol), 7),
1118 mem_ImmediateValue(sizeof(ImmediateValue), 7)
1119 {
1120 code = NULL;
1121 binSize = 0;
1122
1123 maxGPR = -1;
1124 fp64 = false;
1125
1126 main = new Function(this, "MAIN", ~0);
1127 calls.insert(&main->call);
1128
1129 dbgFlags = 0;
1130 optLevel = 0;
1131
1132 targetPriv = NULL;
1133 }
1134
1135 Program::~Program()
1136 {
1137 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1138 delete reinterpret_cast<Function *>(it.get());
1139
1140 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1141 releaseValue(reinterpret_cast<Value *>(it.get()));
1142 }
1143
1144 void Program::releaseInstruction(Instruction *insn)
1145 {
1146 // TODO: make this not suck so much
1147
1148 insn->~Instruction();
1149
1150 if (insn->asCmp())
1151 mem_CmpInstruction.release(insn);
1152 else
1153 if (insn->asTex())
1154 mem_TexInstruction.release(insn);
1155 else
1156 if (insn->asFlow())
1157 mem_FlowInstruction.release(insn);
1158 else
1159 mem_Instruction.release(insn);
1160 }
1161
1162 void Program::releaseValue(Value *value)
1163 {
1164 value->~Value();
1165
1166 if (value->asLValue())
1167 mem_LValue.release(value);
1168 else
1169 if (value->asImm())
1170 mem_ImmediateValue.release(value);
1171 else
1172 if (value->asSym())
1173 mem_Symbol.release(value);
1174 }
1175
1176
1177 } // namespace nv50_ir
1178
1179 extern "C" {
1180
1181 static void
1182 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
1183 {
1184 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1185 info->prop.tp.domain = PIPE_PRIM_MAX;
1186 info->prop.tp.outputPrim = PIPE_PRIM_MAX;
1187 }
1188 if (info->type == PIPE_SHADER_GEOMETRY) {
1189 info->prop.gp.instanceCount = 1;
1190 info->prop.gp.maxVertices = 1;
1191 }
1192 if (info->type == PIPE_SHADER_COMPUTE) {
1193 info->prop.cp.numThreads[0] =
1194 info->prop.cp.numThreads[1] =
1195 info->prop.cp.numThreads[2] = 1;
1196 }
1197 info->io.pointSize = 0xff;
1198 info->io.instanceId = 0xff;
1199 info->io.vertexId = 0xff;
1200 info->io.edgeFlagIn = 0xff;
1201 info->io.edgeFlagOut = 0xff;
1202 info->io.fragDepth = 0xff;
1203 info->io.sampleMask = 0xff;
1204 info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
1205 }
1206
1207 int
1208 nv50_ir_generate_code(struct nv50_ir_prog_info *info)
1209 {
1210 int ret = 0;
1211
1212 nv50_ir::Program::Type type;
1213
1214 nv50_ir_init_prog_info(info);
1215
1216 #define PROG_TYPE_CASE(a, b) \
1217 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1218
1219 switch (info->type) {
1220 PROG_TYPE_CASE(VERTEX, VERTEX);
1221 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1222 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1223 PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1224 PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1225 PROG_TYPE_CASE(COMPUTE, COMPUTE);
1226 default:
1227 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1228 return -1;
1229 }
1230 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1231
1232 nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1233 if (!targ)
1234 return -1;
1235
1236 nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1237 if (!prog) {
1238 nv50_ir::Target::destroy(targ);
1239 return -1;
1240 }
1241 prog->driver = info;
1242 prog->dbgFlags = info->dbgFlags;
1243 prog->optLevel = info->optLevel;
1244
1245 switch (info->bin.sourceRep) {
1246 case PIPE_SHADER_IR_NIR:
1247 ret = prog->makeFromNIR(info) ? 0 : -2;
1248 break;
1249 case PIPE_SHADER_IR_TGSI:
1250 ret = prog->makeFromTGSI(info) ? 0 : -2;
1251 break;
1252 default:
1253 ret = -1;
1254 break;
1255 }
1256 if (ret < 0)
1257 goto out;
1258 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1259 prog->print();
1260
1261 targ->parseDriverInfo(info);
1262 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1263
1264 prog->convertToSSA();
1265
1266 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1267 prog->print();
1268
1269 prog->optimizeSSA(info->optLevel);
1270 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1271
1272 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1273 prog->print();
1274
1275 if (!prog->registerAllocation()) {
1276 ret = -4;
1277 goto out;
1278 }
1279 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1280
1281 prog->optimizePostRA(info->optLevel);
1282
1283 if (!prog->emitBinary(info)) {
1284 ret = -5;
1285 goto out;
1286 }
1287
1288 out:
1289 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1290
1291 info->bin.maxGPR = prog->maxGPR;
1292 info->bin.code = prog->code;
1293 info->bin.codeSize = prog->binSize;
1294 info->bin.tlsSpace = prog->tlsSize;
1295
1296 delete prog;
1297 nv50_ir::Target::destroy(targ);
1298
1299 return ret;
1300 }
1301
1302 } // extern "C"