gk110/ir: Use the new rcp/rsq in library
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25 #include "codegen/nv50_ir_driver.h"
26
27 extern "C" {
28 #include "nouveau_debug.h"
29 #include "nv50/nv50_program.h"
30 }
31
32 namespace nv50_ir {
33
34 Modifier::Modifier(operation op)
35 {
36 switch (op) {
37 case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38 case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39 case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40 case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41 default:
42 bits = 0;
43 break;
44 }
45 }
46
47 Modifier Modifier::operator*(const Modifier m) const
48 {
49 unsigned int a, b, c;
50
51 b = m.bits;
52 if (this->bits & NV50_IR_MOD_ABS)
53 b &= ~NV50_IR_MOD_NEG;
54
55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57
58 return Modifier(a | c);
59 }
60
61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62 {
63 indirect[0] = -1;
64 indirect[1] = -1;
65 usedAsPtr = false;
66 set(v);
67 }
68
69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70 {
71 set(ref);
72 usedAsPtr = ref.usedAsPtr;
73 }
74
75 ValueRef::~ValueRef()
76 {
77 this->set(NULL);
78 }
79
80 bool ValueRef::getImmediate(ImmediateValue &imm) const
81 {
82 const ValueRef *src = this;
83 Modifier m;
84 DataType type = src->insn->sType;
85
86 while (src) {
87 if (src->mod) {
88 if (src->insn->sType != type)
89 break;
90 m *= src->mod;
91 }
92 if (src->getFile() == FILE_IMMEDIATE) {
93 imm = *(src->value->asImm());
94 // The immediate's type isn't required to match its use, it's
95 // more of a hint; applying a modifier makes use of that hint.
96 imm.reg.type = type;
97 m.applyTo(imm);
98 return true;
99 }
100
101 Instruction *insn = src->value->getUniqueInsn();
102
103 if (insn && insn->op == OP_MOV) {
104 src = &insn->src(0);
105 if (src->mod)
106 WARN("OP_MOV with modifier encountered !\n");
107 } else {
108 src = NULL;
109 }
110 }
111 return false;
112 }
113
114 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
115 {
116 set(v);
117 }
118
119 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
120 {
121 set(def.get());
122 }
123
124 ValueDef::~ValueDef()
125 {
126 this->set(NULL);
127 }
128
129 void
130 ValueRef::set(const ValueRef &ref)
131 {
132 this->set(ref.get());
133 mod = ref.mod;
134 indirect[0] = ref.indirect[0];
135 indirect[1] = ref.indirect[1];
136 }
137
138 void
139 ValueRef::set(Value *refVal)
140 {
141 if (value == refVal)
142 return;
143 if (value)
144 value->uses.erase(this);
145 if (refVal)
146 refVal->uses.insert(this);
147
148 value = refVal;
149 }
150
151 void
152 ValueDef::set(Value *defVal)
153 {
154 if (value == defVal)
155 return;
156 if (value)
157 value->defs.remove(this);
158 if (defVal)
159 defVal->defs.push_back(this);
160
161 value = defVal;
162 }
163
164 // Check if we can replace this definition's value by the value in @rep,
165 // including the source modifiers, i.e. make sure that all uses support
166 // @rep.mod.
167 bool
168 ValueDef::mayReplace(const ValueRef &rep)
169 {
170 if (!rep.mod)
171 return true;
172
173 if (!insn || !insn->bb) // Unbound instruction ?
174 return false;
175
176 const Target *target = insn->bb->getProgram()->getTarget();
177
178 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
179 ++it) {
180 Instruction *insn = (*it)->getInsn();
181 int s = -1;
182
183 for (int i = 0; insn->srcExists(i); ++i) {
184 if (insn->src(i).get() == value) {
185 // If there are multiple references to us we'd have to check if the
186 // combination of mods is still supported, but just bail for now.
187 if (&insn->src(i) != (*it))
188 return false;
189 s = i;
190 }
191 }
192 assert(s >= 0); // integrity of uses list
193
194 if (!target->isModSupported(insn, s, rep.mod))
195 return false;
196 }
197 return true;
198 }
199
200 void
201 ValueDef::replace(const ValueRef &repVal, bool doSet)
202 {
203 assert(mayReplace(repVal));
204
205 if (value == repVal.get())
206 return;
207
208 while (!value->uses.empty()) {
209 ValueRef *ref = *value->uses.begin();
210 ref->set(repVal.get());
211 ref->mod *= repVal.mod;
212 }
213
214 if (doSet)
215 set(repVal.get());
216 }
217
218 Value::Value()
219 {
220 join = this;
221 memset(&reg, 0, sizeof(reg));
222 reg.size = 4;
223 }
224
225 LValue::LValue(Function *fn, DataFile file)
226 {
227 reg.file = file;
228 reg.size = (file != FILE_PREDICATE) ? 4 : 1;
229 reg.data.id = -1;
230
231 compMask = 0;
232 compound = 0;
233 ssa = 0;
234 fixedReg = 0;
235 noSpill = 0;
236
237 fn->add(this, this->id);
238 }
239
240 LValue::LValue(Function *fn, LValue *lval)
241 {
242 assert(lval);
243
244 reg.file = lval->reg.file;
245 reg.size = lval->reg.size;
246 reg.data.id = -1;
247
248 compMask = 0;
249 compound = 0;
250 ssa = 0;
251 fixedReg = 0;
252 noSpill = 0;
253
254 fn->add(this, this->id);
255 }
256
257 LValue *
258 LValue::clone(ClonePolicy<Function>& pol) const
259 {
260 LValue *that = new_LValue(pol.context(), reg.file);
261
262 pol.set<Value>(this, that);
263
264 that->reg.size = this->reg.size;
265 that->reg.type = this->reg.type;
266 that->reg.data = this->reg.data;
267
268 return that;
269 }
270
271 bool
272 LValue::isUniform() const
273 {
274 if (defs.size() > 1)
275 return false;
276 Instruction *insn = getInsn();
277 // let's not try too hard here for now ...
278 return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
279 }
280
281 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
282 {
283 baseSym = NULL;
284
285 reg.file = f;
286 reg.fileIndex = fidx;
287 reg.data.offset = 0;
288
289 prog->add(this, this->id);
290 }
291
292 Symbol *
293 Symbol::clone(ClonePolicy<Function>& pol) const
294 {
295 Program *prog = pol.context()->getProgram();
296
297 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
298
299 pol.set<Value>(this, that);
300
301 that->reg.size = this->reg.size;
302 that->reg.type = this->reg.type;
303 that->reg.data = this->reg.data;
304
305 that->baseSym = this->baseSym;
306
307 return that;
308 }
309
310 bool
311 Symbol::isUniform() const
312 {
313 return
314 reg.file != FILE_SYSTEM_VALUE &&
315 reg.file != FILE_MEMORY_LOCAL &&
316 reg.file != FILE_SHADER_INPUT;
317 }
318
319 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
320 {
321 memset(&reg, 0, sizeof(reg));
322
323 reg.file = FILE_IMMEDIATE;
324 reg.size = 4;
325 reg.type = TYPE_U32;
326
327 reg.data.u32 = uval;
328
329 prog->add(this, this->id);
330 }
331
332 ImmediateValue::ImmediateValue(Program *prog, float fval)
333 {
334 memset(&reg, 0, sizeof(reg));
335
336 reg.file = FILE_IMMEDIATE;
337 reg.size = 4;
338 reg.type = TYPE_F32;
339
340 reg.data.f32 = fval;
341
342 prog->add(this, this->id);
343 }
344
345 ImmediateValue::ImmediateValue(Program *prog, double dval)
346 {
347 memset(&reg, 0, sizeof(reg));
348
349 reg.file = FILE_IMMEDIATE;
350 reg.size = 8;
351 reg.type = TYPE_F64;
352
353 reg.data.f64 = dval;
354
355 prog->add(this, this->id);
356 }
357
358 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
359 {
360 reg = proto->reg;
361
362 reg.type = ty;
363 reg.size = typeSizeof(ty);
364 }
365
366 ImmediateValue *
367 ImmediateValue::clone(ClonePolicy<Function>& pol) const
368 {
369 Program *prog = pol.context()->getProgram();
370 ImmediateValue *that = new_ImmediateValue(prog, 0u);
371
372 pol.set<Value>(this, that);
373
374 that->reg.size = this->reg.size;
375 that->reg.type = this->reg.type;
376 that->reg.data = this->reg.data;
377
378 return that;
379 }
380
381 bool
382 ImmediateValue::isInteger(const int i) const
383 {
384 switch (reg.type) {
385 case TYPE_S8:
386 return reg.data.s8 == i;
387 case TYPE_U8:
388 return reg.data.u8 == i;
389 case TYPE_S16:
390 return reg.data.s16 == i;
391 case TYPE_U16:
392 return reg.data.u16 == i;
393 case TYPE_S32:
394 case TYPE_U32:
395 return reg.data.s32 == i; // as if ...
396 case TYPE_S64:
397 case TYPE_U64:
398 return reg.data.s64 == i; // as if ...
399 case TYPE_F32:
400 return reg.data.f32 == static_cast<float>(i);
401 case TYPE_F64:
402 return reg.data.f64 == static_cast<double>(i);
403 default:
404 return false;
405 }
406 }
407
408 bool
409 ImmediateValue::isNegative() const
410 {
411 switch (reg.type) {
412 case TYPE_S8: return reg.data.s8 < 0;
413 case TYPE_S16: return reg.data.s16 < 0;
414 case TYPE_S32:
415 case TYPE_U32: return reg.data.s32 < 0;
416 case TYPE_F32: return reg.data.u32 & (1 << 31);
417 case TYPE_F64: return reg.data.u64 & (1ULL << 63);
418 default:
419 return false;
420 }
421 }
422
423 bool
424 ImmediateValue::isPow2() const
425 {
426 if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
427 return util_is_power_of_two_or_zero64(reg.data.u64);
428 else
429 return util_is_power_of_two_or_zero(reg.data.u32);
430 }
431
432 void
433 ImmediateValue::applyLog2()
434 {
435 switch (reg.type) {
436 case TYPE_S8:
437 case TYPE_S16:
438 case TYPE_S32:
439 assert(!this->isNegative());
440 // fall through
441 case TYPE_U8:
442 case TYPE_U16:
443 case TYPE_U32:
444 reg.data.u32 = util_logbase2(reg.data.u32);
445 break;
446 case TYPE_S64:
447 assert(!this->isNegative());
448 // fall through
449 case TYPE_U64:
450 reg.data.u64 = util_logbase2_64(reg.data.u64);
451 break;
452 case TYPE_F32:
453 reg.data.f32 = log2f(reg.data.f32);
454 break;
455 case TYPE_F64:
456 reg.data.f64 = log2(reg.data.f64);
457 break;
458 default:
459 assert(0);
460 break;
461 }
462 }
463
464 bool
465 ImmediateValue::compare(CondCode cc, float fval) const
466 {
467 if (reg.type != TYPE_F32)
468 ERROR("immediate value is not of type f32");
469
470 switch (static_cast<CondCode>(cc & 7)) {
471 case CC_TR: return true;
472 case CC_FL: return false;
473 case CC_LT: return reg.data.f32 < fval;
474 case CC_LE: return reg.data.f32 <= fval;
475 case CC_GT: return reg.data.f32 > fval;
476 case CC_GE: return reg.data.f32 >= fval;
477 case CC_EQ: return reg.data.f32 == fval;
478 case CC_NE: return reg.data.f32 != fval;
479 default:
480 assert(0);
481 return false;
482 }
483 }
484
485 ImmediateValue&
486 ImmediateValue::operator=(const ImmediateValue &that)
487 {
488 this->reg = that.reg;
489 return (*this);
490 }
491
492 bool
493 Value::interfers(const Value *that) const
494 {
495 uint32_t idA, idB;
496
497 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
498 return false;
499 if (this->asImm())
500 return false;
501
502 if (this->asSym()) {
503 idA = this->join->reg.data.offset;
504 idB = that->join->reg.data.offset;
505 } else {
506 idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
507 idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
508 }
509
510 if (idA < idB)
511 return (idA + this->reg.size > idB);
512 else
513 if (idA > idB)
514 return (idB + that->reg.size > idA);
515 else
516 return (idA == idB);
517 }
518
519 bool
520 Value::equals(const Value *that, bool strict) const
521 {
522 if (strict)
523 return this == that;
524
525 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
526 return false;
527 if (that->reg.size != this->reg.size)
528 return false;
529
530 if (that->reg.data.id != this->reg.data.id)
531 return false;
532
533 return true;
534 }
535
536 bool
537 ImmediateValue::equals(const Value *that, bool strict) const
538 {
539 const ImmediateValue *imm = that->asImm();
540 if (!imm)
541 return false;
542 return reg.data.u64 == imm->reg.data.u64;
543 }
544
545 bool
546 Symbol::equals(const Value *that, bool strict) const
547 {
548 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
549 return false;
550 assert(that->asSym());
551
552 if (this->baseSym != that->asSym()->baseSym)
553 return false;
554
555 if (reg.file == FILE_SYSTEM_VALUE)
556 return (this->reg.data.sv.sv == that->reg.data.sv.sv &&
557 this->reg.data.sv.index == that->reg.data.sv.index);
558 return this->reg.data.offset == that->reg.data.offset;
559 }
560
561 void Instruction::init()
562 {
563 next = prev = 0;
564
565 cc = CC_ALWAYS;
566 rnd = ROUND_N;
567 cache = CACHE_CA;
568 subOp = 0;
569
570 saturate = 0;
571 join = 0;
572 exit = 0;
573 terminator = 0;
574 ftz = 0;
575 dnz = 0;
576 perPatch = 0;
577 fixed = 0;
578 encSize = 0;
579 ipa = 0;
580 mask = 0;
581 precise = 0;
582
583 lanes = 0xf;
584
585 postFactor = 0;
586
587 predSrc = -1;
588 flagsDef = -1;
589 flagsSrc = -1;
590 }
591
592 Instruction::Instruction()
593 {
594 init();
595
596 op = OP_NOP;
597 dType = sType = TYPE_F32;
598
599 id = -1;
600 bb = 0;
601 }
602
603 Instruction::Instruction(Function *fn, operation opr, DataType ty)
604 {
605 init();
606
607 op = opr;
608 dType = sType = ty;
609
610 fn->add(this, id);
611 }
612
613 Instruction::~Instruction()
614 {
615 if (bb) {
616 Function *fn = bb->getFunction();
617 bb->remove(this);
618 fn->allInsns.remove(id);
619 }
620
621 for (int s = 0; srcExists(s); ++s)
622 setSrc(s, NULL);
623 // must unlink defs too since the list pointers will get deallocated
624 for (int d = 0; defExists(d); ++d)
625 setDef(d, NULL);
626 }
627
628 void
629 Instruction::setDef(int i, Value *val)
630 {
631 int size = defs.size();
632 if (i >= size) {
633 defs.resize(i + 1);
634 while (size <= i)
635 defs[size++].setInsn(this);
636 }
637 defs[i].set(val);
638 }
639
640 void
641 Instruction::setSrc(int s, Value *val)
642 {
643 int size = srcs.size();
644 if (s >= size) {
645 srcs.resize(s + 1);
646 while (size <= s)
647 srcs[size++].setInsn(this);
648 }
649 srcs[s].set(val);
650 }
651
652 void
653 Instruction::setSrc(int s, const ValueRef& ref)
654 {
655 setSrc(s, ref.get());
656 srcs[s].mod = ref.mod;
657 }
658
659 void
660 Instruction::swapSources(int a, int b)
661 {
662 Value *value = srcs[a].get();
663 Modifier m = srcs[a].mod;
664
665 setSrc(a, srcs[b]);
666
667 srcs[b].set(value);
668 srcs[b].mod = m;
669 }
670
671 static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
672 {
673 if (index >= s)
674 index += delta;
675 else
676 if ((delta < 0) && (index >= (s + delta)))
677 index = -1;
678 }
679
680 // Moves sources [@s,last_source] by @delta.
681 // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
682 void
683 Instruction::moveSources(const int s, const int delta)
684 {
685 if (delta == 0)
686 return;
687 assert(s + delta >= 0);
688
689 int k;
690
691 for (k = 0; srcExists(k); ++k) {
692 for (int i = 0; i < 2; ++i)
693 moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
694 }
695 moveSourcesAdjustIndex(predSrc, s, delta);
696 moveSourcesAdjustIndex(flagsSrc, s, delta);
697 if (asTex()) {
698 TexInstruction *tex = asTex();
699 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
700 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
701 }
702
703 if (delta > 0) {
704 --k;
705 for (int p = k + delta; k >= s; --k, --p)
706 setSrc(p, src(k));
707 } else {
708 int p;
709 for (p = s; p < k; ++p)
710 setSrc(p + delta, src(p));
711 for (; (p + delta) < k; ++p)
712 setSrc(p + delta, NULL);
713 }
714 }
715
716 void
717 Instruction::takeExtraSources(int s, Value *values[3])
718 {
719 values[0] = getIndirect(s, 0);
720 if (values[0])
721 setIndirect(s, 0, NULL);
722
723 values[1] = getIndirect(s, 1);
724 if (values[1])
725 setIndirect(s, 1, NULL);
726
727 values[2] = getPredicate();
728 if (values[2])
729 setPredicate(cc, NULL);
730 }
731
732 void
733 Instruction::putExtraSources(int s, Value *values[3])
734 {
735 if (values[0])
736 setIndirect(s, 0, values[0]);
737 if (values[1])
738 setIndirect(s, 1, values[1]);
739 if (values[2])
740 setPredicate(cc, values[2]);
741 }
742
743 Instruction *
744 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
745 {
746 if (!i)
747 i = new_Instruction(pol.context(), op, dType);
748 #ifndef NDEBUG // non-conformant assert, so this is required
749 assert(typeid(*i) == typeid(*this));
750 #endif
751
752 pol.set<Instruction>(this, i);
753
754 i->sType = sType;
755
756 i->rnd = rnd;
757 i->cache = cache;
758 i->subOp = subOp;
759
760 i->saturate = saturate;
761 i->join = join;
762 i->exit = exit;
763 i->mask = mask;
764 i->ftz = ftz;
765 i->dnz = dnz;
766 i->ipa = ipa;
767 i->lanes = lanes;
768 i->perPatch = perPatch;
769
770 i->postFactor = postFactor;
771
772 for (int d = 0; defExists(d); ++d)
773 i->setDef(d, pol.get(getDef(d)));
774
775 for (int s = 0; srcExists(s); ++s) {
776 i->setSrc(s, pol.get(getSrc(s)));
777 i->src(s).mod = src(s).mod;
778 }
779
780 i->cc = cc;
781 i->predSrc = predSrc;
782 i->flagsDef = flagsDef;
783 i->flagsSrc = flagsSrc;
784
785 return i;
786 }
787
788 unsigned int
789 Instruction::defCount(unsigned int mask, bool singleFile) const
790 {
791 unsigned int i, n;
792
793 if (singleFile) {
794 unsigned int d = ffs(mask);
795 if (!d)
796 return 0;
797 for (i = d--; defExists(i); ++i)
798 if (getDef(i)->reg.file != getDef(d)->reg.file)
799 mask &= ~(1 << i);
800 }
801
802 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
803 n += mask & 1;
804 return n;
805 }
806
807 unsigned int
808 Instruction::srcCount(unsigned int mask, bool singleFile) const
809 {
810 unsigned int i, n;
811
812 if (singleFile) {
813 unsigned int s = ffs(mask);
814 if (!s)
815 return 0;
816 for (i = s--; srcExists(i); ++i)
817 if (getSrc(i)->reg.file != getSrc(s)->reg.file)
818 mask &= ~(1 << i);
819 }
820
821 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
822 n += mask & 1;
823 return n;
824 }
825
826 bool
827 Instruction::setIndirect(int s, int dim, Value *value)
828 {
829 assert(this->srcExists(s));
830
831 int p = srcs[s].indirect[dim];
832 if (p < 0) {
833 if (!value)
834 return true;
835 p = srcs.size();
836 while (p > 0 && !srcExists(p - 1))
837 --p;
838 }
839 setSrc(p, value);
840 srcs[p].usedAsPtr = (value != 0);
841 srcs[s].indirect[dim] = value ? p : -1;
842 return true;
843 }
844
845 bool
846 Instruction::setPredicate(CondCode ccode, Value *value)
847 {
848 cc = ccode;
849
850 if (!value) {
851 if (predSrc >= 0) {
852 srcs[predSrc].set(NULL);
853 predSrc = -1;
854 }
855 return true;
856 }
857
858 if (predSrc < 0) {
859 predSrc = srcs.size();
860 while (predSrc > 0 && !srcExists(predSrc - 1))
861 --predSrc;
862 }
863
864 setSrc(predSrc, value);
865 return true;
866 }
867
868 bool
869 Instruction::writesPredicate() const
870 {
871 for (int d = 0; defExists(d); ++d)
872 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
873 return true;
874 return false;
875 }
876
877 bool
878 Instruction::canCommuteDefSrc(const Instruction *i) const
879 {
880 for (int d = 0; defExists(d); ++d)
881 for (int s = 0; i->srcExists(s); ++s)
882 if (getDef(d)->interfers(i->getSrc(s)))
883 return false;
884 return true;
885 }
886
887 bool
888 Instruction::canCommuteDefDef(const Instruction *i) const
889 {
890 for (int d = 0; defExists(d); ++d)
891 for (int c = 0; i->defExists(c); ++c)
892 if (getDef(d)->interfers(i->getDef(c)))
893 return false;
894 return true;
895 }
896
897 bool
898 Instruction::isCommutationLegal(const Instruction *i) const
899 {
900 return canCommuteDefDef(i) &&
901 canCommuteDefSrc(i) &&
902 i->canCommuteDefSrc(this);
903 }
904
905 TexInstruction::TexInstruction(Function *fn, operation op)
906 : Instruction(fn, op, TYPE_F32)
907 {
908 memset(&tex, 0, sizeof(tex));
909
910 tex.rIndirectSrc = -1;
911 tex.sIndirectSrc = -1;
912
913 if (op == OP_TXF)
914 sType = TYPE_U32;
915 }
916
917 TexInstruction::~TexInstruction()
918 {
919 for (int c = 0; c < 3; ++c) {
920 dPdx[c].set(NULL);
921 dPdy[c].set(NULL);
922 }
923 for (int n = 0; n < 4; ++n)
924 for (int c = 0; c < 3; ++c)
925 offset[n][c].set(NULL);
926 }
927
928 TexInstruction *
929 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
930 {
931 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
932 new_TexInstruction(pol.context(), op));
933
934 Instruction::clone(pol, tex);
935
936 tex->tex = this->tex;
937
938 if (op == OP_TXD) {
939 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
940 tex->dPdx[c].set(dPdx[c]);
941 tex->dPdy[c].set(dPdy[c]);
942 }
943 }
944
945 for (int n = 0; n < tex->tex.useOffsets; ++n)
946 for (int c = 0; c < 3; ++c)
947 tex->offset[n][c].set(offset[n][c]);
948
949 return tex;
950 }
951
952 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
953 {
954 { "1D", 1, 1, false, false, false },
955 { "2D", 2, 2, false, false, false },
956 { "2D_MS", 2, 3, false, false, false },
957 { "3D", 3, 3, false, false, false },
958 { "CUBE", 2, 3, false, true, false },
959 { "1D_SHADOW", 1, 1, false, false, true },
960 { "2D_SHADOW", 2, 2, false, false, true },
961 { "CUBE_SHADOW", 2, 3, false, true, true },
962 { "1D_ARRAY", 1, 2, true, false, false },
963 { "2D_ARRAY", 2, 3, true, false, false },
964 { "2D_MS_ARRAY", 2, 4, true, false, false },
965 { "CUBE_ARRAY", 2, 4, true, true, false },
966 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
967 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
968 { "RECT", 2, 2, false, false, false },
969 { "RECT_SHADOW", 2, 2, false, false, true },
970 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
971 { "BUFFER", 1, 1, false, false, false },
972 };
973
974 const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
975 {
976 { "NONE", 0, { 0, 0, 0, 0 }, UINT },
977
978 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT },
979 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT },
980 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT },
981 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT },
982 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT },
983 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT },
984 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT },
985
986 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT },
987 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT },
988 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT },
989 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT },
990 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT },
991 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT },
992 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT },
993 { "R32UI", 1, { 32, 0, 0, 0 }, UINT },
994 { "R16UI", 1, { 16, 0, 0, 0 }, UINT },
995 { "R8UI", 1, { 8, 0, 0, 0 }, UINT },
996
997 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT },
998 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT },
999 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT },
1000 { "RG32I", 2, { 32, 32, 0, 0 }, SINT },
1001 { "RG16I", 2, { 16, 16, 0, 0 }, SINT },
1002 { "RG8I", 2, { 8, 8, 0, 0 }, SINT },
1003 { "R32I", 1, { 32, 0, 0, 0 }, SINT },
1004 { "R16I", 1, { 16, 0, 0, 0 }, SINT },
1005 { "R8I", 1, { 8, 0, 0, 0 }, SINT },
1006
1007 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM },
1008 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM },
1009 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM },
1010 { "RG16", 2, { 16, 16, 0, 0 }, UNORM },
1011 { "RG8", 2, { 8, 8, 0, 0 }, UNORM },
1012 { "R16", 1, { 16, 0, 0, 0 }, UNORM },
1013 { "R8", 1, { 8, 0, 0, 0 }, UNORM },
1014
1015 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1016 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM },
1017 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM },
1018 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM },
1019 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM },
1020 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM },
1021
1022 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true },
1023 };
1024
1025 void
1026 TexInstruction::setIndirectR(Value *v)
1027 {
1028 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1029 if (p >= 0) {
1030 tex.rIndirectSrc = p;
1031 setSrc(p, v);
1032 srcs[p].usedAsPtr = !!v;
1033 }
1034 }
1035
1036 void
1037 TexInstruction::setIndirectS(Value *v)
1038 {
1039 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1040 if (p >= 0) {
1041 tex.sIndirectSrc = p;
1042 setSrc(p, v);
1043 srcs[p].usedAsPtr = !!v;
1044 }
1045 }
1046
1047 CmpInstruction::CmpInstruction(Function *fn, operation op)
1048 : Instruction(fn, op, TYPE_F32)
1049 {
1050 setCond = CC_ALWAYS;
1051 }
1052
1053 CmpInstruction *
1054 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1055 {
1056 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1057 new_CmpInstruction(pol.context(), op));
1058 cmp->dType = dType;
1059 Instruction::clone(pol, cmp);
1060 cmp->setCond = setCond;
1061 return cmp;
1062 }
1063
1064 FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1065 : Instruction(fn, op, TYPE_NONE)
1066 {
1067 if (op == OP_CALL)
1068 target.fn = reinterpret_cast<Function *>(targ);
1069 else
1070 target.bb = reinterpret_cast<BasicBlock *>(targ);
1071
1072 if (op == OP_BRA ||
1073 op == OP_CONT || op == OP_BREAK ||
1074 op == OP_RET || op == OP_EXIT)
1075 terminator = 1;
1076 else
1077 if (op == OP_JOIN)
1078 terminator = targ ? 1 : 0;
1079
1080 allWarp = absolute = limit = builtin = indirect = 0;
1081 }
1082
1083 FlowInstruction *
1084 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1085 {
1086 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1087 new_FlowInstruction(pol.context(), op, NULL));
1088
1089 Instruction::clone(pol, flow);
1090 flow->allWarp = allWarp;
1091 flow->absolute = absolute;
1092 flow->limit = limit;
1093 flow->builtin = builtin;
1094
1095 if (builtin)
1096 flow->target.builtin = target.builtin;
1097 else
1098 if (op == OP_CALL)
1099 flow->target.fn = target.fn;
1100 else
1101 if (target.bb)
1102 flow->target.bb = pol.get<BasicBlock>(target.bb);
1103
1104 return flow;
1105 }
1106
1107 Program::Program(Type type, Target *arch)
1108 : progType(type),
1109 target(arch),
1110 mem_Instruction(sizeof(Instruction), 6),
1111 mem_CmpInstruction(sizeof(CmpInstruction), 4),
1112 mem_TexInstruction(sizeof(TexInstruction), 4),
1113 mem_FlowInstruction(sizeof(FlowInstruction), 4),
1114 mem_LValue(sizeof(LValue), 8),
1115 mem_Symbol(sizeof(Symbol), 7),
1116 mem_ImmediateValue(sizeof(ImmediateValue), 7)
1117 {
1118 code = NULL;
1119 binSize = 0;
1120
1121 maxGPR = -1;
1122 fp64 = false;
1123
1124 main = new Function(this, "MAIN", ~0);
1125 calls.insert(&main->call);
1126
1127 dbgFlags = 0;
1128 optLevel = 0;
1129
1130 targetPriv = NULL;
1131 }
1132
1133 Program::~Program()
1134 {
1135 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1136 delete reinterpret_cast<Function *>(it.get());
1137
1138 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1139 releaseValue(reinterpret_cast<Value *>(it.get()));
1140 }
1141
1142 void Program::releaseInstruction(Instruction *insn)
1143 {
1144 // TODO: make this not suck so much
1145
1146 insn->~Instruction();
1147
1148 if (insn->asCmp())
1149 mem_CmpInstruction.release(insn);
1150 else
1151 if (insn->asTex())
1152 mem_TexInstruction.release(insn);
1153 else
1154 if (insn->asFlow())
1155 mem_FlowInstruction.release(insn);
1156 else
1157 mem_Instruction.release(insn);
1158 }
1159
1160 void Program::releaseValue(Value *value)
1161 {
1162 value->~Value();
1163
1164 if (value->asLValue())
1165 mem_LValue.release(value);
1166 else
1167 if (value->asImm())
1168 mem_ImmediateValue.release(value);
1169 else
1170 if (value->asSym())
1171 mem_Symbol.release(value);
1172 }
1173
1174
1175 } // namespace nv50_ir
1176
1177 extern "C" {
1178
1179 static void
1180 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
1181 {
1182 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1183 info->prop.tp.domain = PIPE_PRIM_MAX;
1184 info->prop.tp.outputPrim = PIPE_PRIM_MAX;
1185 }
1186 if (info->type == PIPE_SHADER_GEOMETRY) {
1187 info->prop.gp.instanceCount = 1;
1188 info->prop.gp.maxVertices = 1;
1189 }
1190 if (info->type == PIPE_SHADER_COMPUTE) {
1191 info->prop.cp.numThreads[0] =
1192 info->prop.cp.numThreads[1] =
1193 info->prop.cp.numThreads[2] = 1;
1194 }
1195 info->io.pointSize = 0xff;
1196 info->io.instanceId = 0xff;
1197 info->io.vertexId = 0xff;
1198 info->io.edgeFlagIn = 0xff;
1199 info->io.edgeFlagOut = 0xff;
1200 info->io.fragDepth = 0xff;
1201 info->io.sampleMask = 0xff;
1202 info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
1203 }
1204
1205 int
1206 nv50_ir_generate_code(struct nv50_ir_prog_info *info)
1207 {
1208 int ret = 0;
1209
1210 nv50_ir::Program::Type type;
1211
1212 nv50_ir_init_prog_info(info);
1213
1214 #define PROG_TYPE_CASE(a, b) \
1215 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1216
1217 switch (info->type) {
1218 PROG_TYPE_CASE(VERTEX, VERTEX);
1219 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1220 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1221 PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1222 PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1223 PROG_TYPE_CASE(COMPUTE, COMPUTE);
1224 default:
1225 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1226 return -1;
1227 }
1228 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1229
1230 nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1231 if (!targ)
1232 return -1;
1233
1234 nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1235 if (!prog) {
1236 nv50_ir::Target::destroy(targ);
1237 return -1;
1238 }
1239 prog->driver = info;
1240 prog->dbgFlags = info->dbgFlags;
1241 prog->optLevel = info->optLevel;
1242
1243 switch (info->bin.sourceRep) {
1244 case PIPE_SHADER_IR_TGSI:
1245 ret = prog->makeFromTGSI(info) ? 0 : -2;
1246 break;
1247 default:
1248 ret = -1;
1249 break;
1250 }
1251 if (ret < 0)
1252 goto out;
1253 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1254 prog->print();
1255
1256 targ->parseDriverInfo(info);
1257 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1258
1259 prog->convertToSSA();
1260
1261 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1262 prog->print();
1263
1264 prog->optimizeSSA(info->optLevel);
1265 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1266
1267 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1268 prog->print();
1269
1270 if (!prog->registerAllocation()) {
1271 ret = -4;
1272 goto out;
1273 }
1274 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1275
1276 prog->optimizePostRA(info->optLevel);
1277
1278 if (!prog->emitBinary(info)) {
1279 ret = -5;
1280 goto out;
1281 }
1282
1283 out:
1284 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1285
1286 info->bin.maxGPR = prog->maxGPR;
1287 info->bin.code = prog->code;
1288 info->bin.codeSize = prog->binSize;
1289 info->bin.tlsSpace = prog->tlsSize;
1290
1291 delete prog;
1292 nv50_ir::Target::destroy(targ);
1293
1294 return ret;
1295 }
1296
1297 } // extern "C"