radeon: make texture logging more useful
[mesa.git] / src / gallium / drivers / r600 / sb / sb_ra_init.cpp
index 0447f29eb1b99e06e2c7acaa71c23514a538a017..0b332a9847aba7445d53022c0c5eb9233394e428 100644 (file)
 #endif
 
 #include <cstring>
-#include <iostream>
-#include <iomanip>
 
 #include "sb_bc.h"
 #include "sb_shader.h"
-
 #include "sb_pass.h"
 
 namespace r600_sb {
 
-using std::cerr;
-
 class regbits {
        typedef uint32_t basetype;
        static const unsigned bt_bytes = sizeof(basetype);
@@ -75,8 +70,9 @@ public:
 
        void set(unsigned index, unsigned val);
 
-       sel_chan find_free_bit(unsigned start);
+       sel_chan find_free_bit();
        sel_chan find_free_chans(unsigned mask);
+       sel_chan find_free_chan_by_mask(unsigned mask);
        sel_chan find_free_array(unsigned size, unsigned mask);
 
        void dump();
@@ -88,12 +84,14 @@ void regbits::dump() {
        for (unsigned i = 0; i < size * bt_bits; ++i) {
 
                if (!(i & 31))
-                       cerr << "\n";
+                       sblog << "\n";
 
-               if (!(i & 3))
-                       cerr << "    " << std::setw(3) << (i / 4) << " ";
+               if (!(i & 3)) {
+                       sblog.print_w(i / 4, 7);
+                       sblog << " ";
+               }
 
-               cerr << (get(i) ? 1 : 0);
+               sblog << (get(i) ? 1 : 0);
        }
 }
 
@@ -148,24 +146,21 @@ void regbits::set(unsigned index, unsigned val) {
 }
 
 // free register for ra means the bit is set
-sel_chan regbits::find_free_bit(unsigned start) {
-       unsigned elt = start >> bt_index_shift;
-       unsigned bit = start & bt_index_mask;
-
-       unsigned end = start < MAX_GPR - num_temps ? MAX_GPR - num_temps : MAX_GPR;
+sel_chan regbits::find_free_bit() {
+       unsigned elt = 0;
+       unsigned bit = 0;
 
-       while (elt < end && !dta[elt]) {
+       while (elt < size && !dta[elt])
                ++elt;
-               bit = 0;
-       }
 
-       if (elt >= end)
+       if (elt >= size)
                return 0;
 
-       // FIXME this seems broken when not starting from 0
+       bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift);
+
+       assert(bit < ((MAX_GPR - num_temps) << 2));
 
-       bit += __builtin_ctz(dta[elt]);
-       return ((elt << bt_index_shift) | bit) + 1;
+       return bit + 1;
 }
 
 // find free gpr component to use as indirectly addressable array
@@ -192,34 +187,64 @@ sel_chan regbits::find_free_chans(unsigned mask) {
        unsigned elt = 0;
        unsigned bit = 0;
 
-       basetype cd = dta[elt] >> bit;
+       assert (!(mask & ~0xF));
+       basetype cd = dta[elt];
 
        do {
-
                if (!cd) {
-                       if (++elt < size)
+                       if (++elt < size) {
                                cd = dta[elt];
-                       else
+                               bit = 0;
+                               continue;
+                       } else
                                return 0;
-
-                       bit = 0;
                }
 
                unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
 
-               if (p > bt_bits - bit) {
-                       if (++elt < size)
+               assert (p <= bt_bits - bit);
+               bit += p;
+               cd >>= p;
+
+               if ((cd & mask) == mask) {
+                       return ((elt << bt_index_shift) | bit) + 1;
+               }
+
+               bit += 4;
+               cd >>= 4;
+
+       } while (1);
+
+       return 0;
+}
+
+sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
+       unsigned elt = 0;
+       unsigned bit = 0;
+
+       assert (!(mask & ~0xF));
+       basetype cd = dta[elt];
+
+       do {
+               if (!cd) {
+                       if (++elt < size) {
                                cd = dta[elt];
-                       else
+                               bit = 0;
+                               continue;
+                       } else
                                return 0;
-                       bit = 0;
                }
 
+               unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
+
+               assert (p <= bt_bits - bit);
                bit += p;
                cd >>= p;
 
-               if ((cd & mask) == mask) {
-                       return ((elt << bt_index_shift) | bit) + 1;
+               if (cd & mask) {
+                       unsigned nb = __builtin_ctz(cd & mask);
+                       unsigned ofs = ((elt << bt_index_shift) | bit);
+                       return nb + ofs + 1;
                }
 
                bit += 4;
@@ -240,14 +265,20 @@ void ra_init::alloc_arrays() {
                gpr_array *a = *I;
 
                RA_DUMP(
-                       cerr << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
-                       cerr << "\n";
+                       sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
+                       sblog << "\n";
                );
 
+               // skip preallocated arrays (e.g. with preloaded inputs)
+               if (a->gpr) {
+                       RA_DUMP( sblog << "   FIXED at " << a->gpr << "\n"; );
+                       continue;
+               }
+
                bool dead = a->is_dead();
 
                if (dead) {
-                       RA_DUMP( cerr << "   DEAD\n"; );
+                       RA_DUMP( sblog << "   DEAD\n"; );
                        continue;
                }
 
@@ -261,9 +292,9 @@ void ra_init::alloc_arrays() {
                }
 
                RA_DUMP(
-                       cerr << "  interf: ";
+                       sblog << "  interf: ";
                        dump::dump_set(sh, s);
-                       cerr << "\n";
+                       sblog << "\n";
                );
 
                regbits rb(sh, s);
@@ -271,7 +302,7 @@ void ra_init::alloc_arrays() {
                sel_chan base = rb.find_free_array(a->array_size,
                                                   (1 << a->base_gpr.chan()));
 
-               RA_DUMP( cerr << "  found base: " << base << "\n"; );
+               RA_DUMP( sblog << "  found base: " << base << "\n"; );
 
                a->gpr = base;
        }
@@ -304,9 +335,9 @@ void ra_init::process_op(node* n) {
        bool copy = n->is_copy_mov();
 
        RA_DUMP(
-               cerr << "ra_init: process_op : ";
+               sblog << "ra_init: process_op : ";
                dump::dump_op(n);
-               cerr << "\n";
+               sblog << "\n";
        );
 
        if (n->is_alu_packed()) {
@@ -352,9 +383,9 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
        assert(vv.size() <= 8);
 
        RA_DUMP(
-               cerr << "color_bs_constraint: ";
+               sblog << "color_bs_constraint: ";
                dump::dump_vec(vv);
-               cerr << "\n";
+               sblog << "\n";
        );
 
        regbits rb(ctx.alu_temp_gprs);
@@ -377,9 +408,9 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
                        interf = v->interferences;
 
                RA_DUMP(
-                       cerr << "   processing " << *v << "  interferences : ";
+                       sblog << "   processing " << *v << "  interferences : ";
                        dump::dump_set(sh, interf);
-                       cerr << "\n";
+                       sblog << "\n";
                );
 
                if (gpr) {
@@ -403,9 +434,9 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
                rb.from_val_set(sh, interf);
 
                RA_DUMP(
-                       cerr << "   regbits : ";
+                       sblog << "   regbits : ";
                        rb.dump();
-                       cerr << "\n";
+                       sblog << "\n";
                );
 
                while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) {
@@ -414,7 +445,7 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
                                gpr = gpr + 1;
 
                        RA_DUMP(
-                               cerr << "    trying " << gpr << "\n";
+                               sblog << "    trying " << gpr << "\n";
                        );
 
                        unsigned chan = gpr.chan();
@@ -438,7 +469,7 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
                }
 
                if (!gpr) {
-                       cerr << "color_bs_constraint: failed...\n";
+                       sblog << "color_bs_constraint: failed...\n";
                        assert(!"coloring failed");
                }
        }
@@ -455,11 +486,11 @@ void ra_init::color(value* v) {
                return;
 
        RA_DUMP(
-               cerr << "coloring ";
+               sblog << "coloring ";
                dump::dump_val(v);
-               cerr << "   interferences ";
+               sblog << "   interferences ";
                dump::dump_set(sh, v->interferences);
-               cerr << "\n";
+               sblog << "\n";
        );
 
        if (v->is_reg_pinned()) {
@@ -472,11 +503,13 @@ void ra_init::color(value* v) {
        sel_chan c;
 
        if (v->is_chan_pinned()) {
-               RA_DUMP( cerr << "chan_pinned = " << v->pin_gpr.chan() << "  "; );
+               RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << "  ";        );
                unsigned mask = 1 << v->pin_gpr.chan();
                c = rb.find_free_chans(mask) + v->pin_gpr.chan();
        } else {
-               c = rb.find_free_bit(0);
+               unsigned cm = get_preferable_chan_mask();
+               RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
+               c = rb.find_free_chan_by_mask(cm);
        }
 
        assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
@@ -484,11 +517,12 @@ void ra_init::color(value* v) {
 }
 
 void ra_init::assign_color(value* v, sel_chan c) {
+       add_prev_chan(c.chan());
        v->gpr = c;
        RA_DUMP(
-               cerr << "colored ";
+               sblog << "colored ";
                dump::dump_val(v);
-               cerr << " to " << c << "\n";
+               sblog << " to " << c << "\n";
        );
 }
 
@@ -680,7 +714,7 @@ void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
 
                        value *t;
                        vvec::iterator F =
-                                       allow_swz ? find(v2.begin(), v2.end(), o) : v2.end();
+                                       allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end();
 
                        if (F != v2.end()) {
                                t = *(v1.begin() + (F - v2.begin()));
@@ -790,4 +824,20 @@ void ra_split::split_vector_inst(node* n) {
        }
 }
 
+void ra_init::add_prev_chan(unsigned chan) {
+       prev_chans = (prev_chans << 4) | (1 << chan);
+}
+
+unsigned ra_init::get_preferable_chan_mask() {
+       unsigned i, used_chans = 0;
+       unsigned chans = prev_chans;
+
+       for (i = 0; i < ra_tune; ++i) {
+               used_chans |= chans;
+               chans >>= 4;
+       }
+
+       return (~used_chans) & 0xF;
+}
+
 } // namespace r600_sb