#endif
#include <cstring>
-#include <iostream>
-#include <iomanip>
#include "sb_bc.h"
#include "sb_shader.h"
-
#include "sb_pass.h"
namespace r600_sb {
-using std::cerr;
-
class regbits {
typedef uint32_t basetype;
static const unsigned bt_bytes = sizeof(basetype);
void set(unsigned index, unsigned val);
- sel_chan find_free_bit(unsigned start);
+ sel_chan find_free_bit();
sel_chan find_free_chans(unsigned mask);
+ sel_chan find_free_chan_by_mask(unsigned mask);
sel_chan find_free_array(unsigned size, unsigned mask);
void dump();
for (unsigned i = 0; i < size * bt_bits; ++i) {
if (!(i & 31))
- cerr << "\n";
+ sblog << "\n";
- if (!(i & 3))
- cerr << " " << std::setw(3) << (i / 4) << " ";
+ if (!(i & 3)) {
+ sblog.print_w(i / 4, 7);
+ sblog << " ";
+ }
- cerr << (get(i) ? 1 : 0);
+ sblog << (get(i) ? 1 : 0);
}
}
}
// free register for ra means the bit is set
-sel_chan regbits::find_free_bit(unsigned start) {
- unsigned elt = start >> bt_index_shift;
- unsigned bit = start & bt_index_mask;
-
- unsigned end = start < MAX_GPR - num_temps ? MAX_GPR - num_temps : MAX_GPR;
+sel_chan regbits::find_free_bit() {
+ unsigned elt = 0;
+ unsigned bit = 0;
- while (elt < end && !dta[elt]) {
+ while (elt < size && !dta[elt])
++elt;
- bit = 0;
- }
- if (elt >= end)
+ if (elt >= size)
return 0;
- // FIXME this seems broken when not starting from 0
+ bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift);
+
+ assert(bit < ((MAX_GPR - num_temps) << 2));
- bit += __builtin_ctz(dta[elt]);
- return ((elt << bt_index_shift) | bit) + 1;
+ return bit + 1;
}
// find free gpr component to use as indirectly addressable array
unsigned elt = 0;
unsigned bit = 0;
- basetype cd = dta[elt] >> bit;
+ assert (!(mask & ~0xF));
+ basetype cd = dta[elt];
do {
-
if (!cd) {
- if (++elt < size)
+ if (++elt < size) {
cd = dta[elt];
- else
+ bit = 0;
+ continue;
+ } else
return 0;
-
- bit = 0;
}
unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
- if (p > bt_bits - bit) {
- if (++elt < size)
+ assert (p <= bt_bits - bit);
+ bit += p;
+ cd >>= p;
+
+ if ((cd & mask) == mask) {
+ return ((elt << bt_index_shift) | bit) + 1;
+ }
+
+ bit += 4;
+ cd >>= 4;
+
+ } while (1);
+
+ return 0;
+}
+
+sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
+ unsigned elt = 0;
+ unsigned bit = 0;
+
+ assert (!(mask & ~0xF));
+ basetype cd = dta[elt];
+
+ do {
+ if (!cd) {
+ if (++elt < size) {
cd = dta[elt];
- else
+ bit = 0;
+ continue;
+ } else
return 0;
- bit = 0;
}
+ unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
+
+ assert (p <= bt_bits - bit);
bit += p;
cd >>= p;
- if ((cd & mask) == mask) {
- return ((elt << bt_index_shift) | bit) + 1;
+ if (cd & mask) {
+ unsigned nb = __builtin_ctz(cd & mask);
+ unsigned ofs = ((elt << bt_index_shift) | bit);
+ return nb + ofs + 1;
}
bit += 4;
gpr_array *a = *I;
RA_DUMP(
- cerr << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
- cerr << "\n";
+ sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
+ sblog << "\n";
);
+ // skip preallocated arrays (e.g. with preloaded inputs)
+ if (a->gpr) {
+ RA_DUMP( sblog << " FIXED at " << a->gpr << "\n"; );
+ continue;
+ }
+
bool dead = a->is_dead();
if (dead) {
- RA_DUMP( cerr << " DEAD\n"; );
+ RA_DUMP( sblog << " DEAD\n"; );
continue;
}
}
RA_DUMP(
- cerr << " interf: ";
+ sblog << " interf: ";
dump::dump_set(sh, s);
- cerr << "\n";
+ sblog << "\n";
);
regbits rb(sh, s);
sel_chan base = rb.find_free_array(a->array_size,
(1 << a->base_gpr.chan()));
- RA_DUMP( cerr << " found base: " << base << "\n"; );
+ RA_DUMP( sblog << " found base: " << base << "\n"; );
a->gpr = base;
}
bool copy = n->is_copy_mov();
RA_DUMP(
- cerr << "ra_init: process_op : ";
+ sblog << "ra_init: process_op : ";
dump::dump_op(n);
- cerr << "\n";
+ sblog << "\n";
);
if (n->is_alu_packed()) {
assert(vv.size() <= 8);
RA_DUMP(
- cerr << "color_bs_constraint: ";
+ sblog << "color_bs_constraint: ";
dump::dump_vec(vv);
- cerr << "\n";
+ sblog << "\n";
);
regbits rb(ctx.alu_temp_gprs);
interf = v->interferences;
RA_DUMP(
- cerr << " processing " << *v << " interferences : ";
+ sblog << " processing " << *v << " interferences : ";
dump::dump_set(sh, interf);
- cerr << "\n";
+ sblog << "\n";
);
if (gpr) {
rb.from_val_set(sh, interf);
RA_DUMP(
- cerr << " regbits : ";
+ sblog << " regbits : ";
rb.dump();
- cerr << "\n";
+ sblog << "\n";
);
while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) {
gpr = gpr + 1;
RA_DUMP(
- cerr << " trying " << gpr << "\n";
+ sblog << " trying " << gpr << "\n";
);
unsigned chan = gpr.chan();
}
if (!gpr) {
- cerr << "color_bs_constraint: failed...\n";
+ sblog << "color_bs_constraint: failed...\n";
assert(!"coloring failed");
}
}
return;
RA_DUMP(
- cerr << "coloring ";
+ sblog << "coloring ";
dump::dump_val(v);
- cerr << " interferences ";
+ sblog << " interferences ";
dump::dump_set(sh, v->interferences);
- cerr << "\n";
+ sblog << "\n";
);
if (v->is_reg_pinned()) {
sel_chan c;
if (v->is_chan_pinned()) {
- RA_DUMP( cerr << "chan_pinned = " << v->pin_gpr.chan() << " "; );
+ RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << " "; );
unsigned mask = 1 << v->pin_gpr.chan();
c = rb.find_free_chans(mask) + v->pin_gpr.chan();
} else {
- c = rb.find_free_bit(0);
+ unsigned cm = get_preferable_chan_mask();
+ RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
+ c = rb.find_free_chan_by_mask(cm);
}
assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
}
void ra_init::assign_color(value* v, sel_chan c) {
+ add_prev_chan(c.chan());
v->gpr = c;
RA_DUMP(
- cerr << "colored ";
+ sblog << "colored ";
dump::dump_val(v);
- cerr << " to " << c << "\n";
+ sblog << " to " << c << "\n";
);
}
value *t;
vvec::iterator F =
- allow_swz ? find(v2.begin(), v2.end(), o) : v2.end();
+ allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end();
if (F != v2.end()) {
t = *(v1.begin() + (F - v2.begin()));
}
}
+void ra_init::add_prev_chan(unsigned chan) {
+ prev_chans = (prev_chans << 4) | (1 << chan);
+}
+
+unsigned ra_init::get_preferable_chan_mask() {
+ unsigned i, used_chans = 0;
+ unsigned chans = prev_chans;
+
+ for (i = 0; i < ra_tune; ++i) {
+ used_chans |= chans;
+ chans >>= 4;
+ }
+
+ return (~used_chans) & 0xF;
+}
+
} // namespace r600_sb