#include "sb_shader.h"
#include "sb_pass.h"
#include "sb_sched.h"
+#include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
namespace r600_sb {
}
int post_scheduler::run() {
- run_on(sh.root);
- return 0;
+ return run_on(sh.root) ? 0 : 1;
}
-void post_scheduler::run_on(container_node* n) {
-
+bool post_scheduler::run_on(container_node* n) {
+ int r = true;
for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
if (I->is_container()) {
if (I->subtype == NST_BB) {
bb_node* bb = static_cast<bb_node*>(*I);
- schedule_bb(bb);
+ r = schedule_bb(bb);
} else {
- run_on(static_cast<container_node*>(*I));
+ r = run_on(static_cast<container_node*>(*I));
}
+ if (!r)
+ break;
}
}
+ return r;
}
void post_scheduler::init_uc_val(container_node *c, value *v) {
return F == ucm.end() ? 0 : F->second;
}
-void post_scheduler::schedule_bb(bb_node* bb) {
+bool post_scheduler::schedule_bb(bb_node* bb) {
PSC_DUMP(
sblog << "scheduling BB " << bb->id << "\n";
if (!pending.empty())
sblog << "\n";
);
- if (n->subtype == NST_ALU_CLAUSE) {
+ // May require emitting ALU ops to load index registers
+ if (n->is_fetch_clause()) {
n->remove();
- process_alu(static_cast<container_node*>(n));
+ process_fetch(static_cast<container_node *>(n));
continue;
}
+ if (n->is_alu_clause()) {
+ n->remove();
+ bool r = process_alu(static_cast<container_node*>(n));
+ if (r)
+ continue;
+ return false;
+ }
+
n->remove();
bb->push_front(n);
}
this->cur_bb = NULL;
+ return true;
}
void post_scheduler::init_regmap() {
}
}
-void post_scheduler::process_alu(container_node *c) {
+static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
+ alu_node *a = sh.create_alu();
+
+ assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
+ if (ar_idx == V_SQ_CF_INDEX_0)
+ a->bc.set_op(ALU_OP0_SET_CF_IDX0);
+ else
+ a->bc.set_op(ALU_OP0_SET_CF_IDX1);
+ a->bc.slot = SLOT_X;
+ a->dst.resize(1); // Dummy needed for recolor
+
+ PSC_DUMP(
+ sblog << "created IDX load: ";
+ dump::dump_op(a);
+ sblog << "\n";
+ );
+
+ return a;
+}
+
+void post_scheduler::load_index_register(value *v, unsigned ar_idx)
+{
+ alu.reset();
+
+ if (!sh.get_ctx().is_cayman()) {
+ // Evergreen has to first load address register, then use CF_SET_IDX0/1
+ alu_group_tracker &rt = alu.grp();
+ alu_node *set_idx = create_set_idx(sh, ar_idx);
+ if (!rt.try_reserve(set_idx)) {
+ sblog << "can't emit SET_CF_IDX";
+ dump::dump_op(set_idx);
+ sblog << "\n";
+ }
+ process_group();
+
+ if (!alu.check_clause_limits()) {
+ // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
+ }
+ alu.emit_group();
+ }
+ alu_group_tracker &rt = alu.grp();
+ alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y);
+
+ if (!rt.try_reserve(a)) {
+ sblog << "can't emit AR load : ";
+ dump::dump_op(a);
+ sblog << "\n";
+ }
+
+ process_group();
+
+ if (!alu.check_clause_limits()) {
+ // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
+ }
+
+ alu.emit_group();
+ alu.emit_clause(cur_bb);
+}
+
+void post_scheduler::process_fetch(container_node *c) {
if (c->empty())
return;
+ for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) {
+ N = I;
+ ++N;
+
+ node *n = *I;
+
+ fetch_node *f = static_cast<fetch_node*>(n);
+
+ PSC_DUMP(
+ sblog << "process_tex ";
+ dump::dump_op(n);
+ sblog << " ";
+ );
+
+ // TODO: If same values used can avoid reloading index register
+ if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
+ f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
+ unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ?
+ f->bc.sampler_index_mode : f->bc.resource_index_mode;
+
+ // Currently require prior opt passes to use one TEX per indexed op
+ assert(f->parent->count() == 1);
+
+ value *v = f->src.back(); // Last src is index offset
+ assert(v);
+
+ cur_bb->push_front(c);
+
+ load_index_register(v, index_mode);
+ f->src.pop_back(); // Don't need index value any more
+
+ return;
+ }
+ }
+
+ cur_bb->push_front(c);
+}
+
+bool post_scheduler::process_alu(container_node *c) {
+
+ if (c->empty())
+ return true;
+
ucm.clear();
alu.reset();
if (uc) {
n->remove();
+
pending.push_back(n);
PSC_DUMP( sblog << "pending\n"; );
} else {
}
}
- schedule_alu(c);
+ return schedule_alu(c);
}
void post_scheduler::update_local_interferences() {
}
}
+void post_scheduler::emit_index_registers() {
+ for (unsigned i = 0; i < 2; i++) {
+ if (alu.current_idx[i]) {
+ regmap = prev_regmap;
+ alu.discard_current_group();
+
+ load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
+ alu.current_idx[i] = NULL;
+ }
+ }
+}
+
void post_scheduler::emit_clause() {
if (alu.current_ar) {
emit_load_ar();
process_group();
+ if (!alu.check_clause_limits()) {
+ // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
+ }
alu.emit_group();
}
- alu.emit_clause(cur_bb);
+ if (!alu.is_empty()) {
+ alu.emit_clause(cur_bb);
+ }
+
+ emit_index_registers();
}
-void post_scheduler::schedule_alu(container_node *c) {
+bool post_scheduler::schedule_alu(container_node *c) {
assert(!ready.empty() || !ready_copies.empty());
- while (1) {
-
+ bool improving = true;
+ int last_pending = pending.count();
+ while (improving) {
prev_regmap = regmap;
-
if (!prepare_alu_group()) {
+
+ int new_pending = pending.count();
+ improving = (new_pending < last_pending) || (last_pending == 0);
+ last_pending = new_pending;
+
+ if (alu.current_idx[0] || alu.current_idx[1]) {
+ regmap = prev_regmap;
+ emit_clause();
+ init_globals(live, false);
+
+ continue;
+ }
+
if (alu.current_ar) {
emit_load_ar();
continue;
regmap = prev_regmap;
emit_clause();
init_globals(live, false);
+
continue;
}
dump::dump_op_list(&pending);
assert(!"unscheduled pending instructions");
}
+ return improving;
}
void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
alu.discard_current_group();
alu_group_tracker &rt = alu.grp();
- alu_node *a = alu.create_ar_load();
+ alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
if (!rt.try_reserve(a)) {
sblog << "can't emit AR load : ";
}
bool post_scheduler::map_src_vec(vvec &vv, bool src) {
+ if (src) {
+ // Handle possible UBO indexing
+ bool ubo_indexing[2] = { false, false };
+ for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
+ value *v = *I;
+ if (!v)
+ continue;
+
+ if (v->is_kcache()) {
+ unsigned index_mode = v->select.kcache_index_mode();
+ if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) {
+ ubo_indexing[index_mode - KC_INDEX_0] = true;
+ }
+ }
+ }
+
+ // idx values stored at end of src vec, see bc_parser::prepare_alu_group
+ for (unsigned i = 2; i != 0; i--) {
+ if (ubo_indexing[i-1]) {
+ // TODO: skip adding value to kcache reservation somehow, causes
+ // unnecessary group breaks and cache line locks
+ value *v = vv.back();
+ if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) {
+ PSC_DUMP(
+ sblog << "IDX" << i-1 << " already set to " <<
+ *alu.current_idx[i-1] << ", trying to set " << *v << "\n";
+ );
+ return false;
+ }
+
+ alu.current_idx[i-1] = v;
+ PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";);
+ }
+ }
+ }
+
for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
value *v = *I;
if (!v)
sblog << " current_AR: " << *alu.current_ar << "\n";
if (alu.current_pr)
sblog << " current_PR: " << *alu.current_pr << "\n";
+ if (alu.current_idx[0])
+ sblog << " current IDX0: " << *alu.current_idx[0] << "\n";
+ if (alu.current_idx[1])
+ sblog << " current IDX1: " << *alu.current_idx[1] << "\n";
}
void post_scheduler::recolor_locals() {
unsigned avail_slots = rt.avail_slots();
+ // Cannot schedule in same clause as instructions using this index value
+ if (!n->dst.empty() && n->dst[0] &&
+ (n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) {
+ PSC_DUMP(sblog << " CF_IDX source: " << *n->dst[0] << "\n";);
+ return 0;
+ }
+
if (n->is_alu_packed()) {
alu_packed_node *p = static_cast<alu_packed_node*>(n);
unsigned slots = p->get_slot_mask();
grp0(sh), grp1(sh),
group(), clause(),
push_exec_mask(),
- current_ar(), current_pr() {}
+ current_ar(), current_pr(), current_idx() {}
void alu_clause_tracker::emit_group() {
// reserving slots to load AR and PR values
unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
+ // ...and index registers
+ reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
return false;
unsigned cnt = 0;
for (unsigned i = 0; i < sel_count; ++i) {
- unsigned line = rp[i];
+ unsigned line = rp[i] & 0x1fffffffu;
+ unsigned index_mode = rp[i] >> 29;
if (!line)
return cnt;
--line;
line = (sel_count == 2) ? line >> 5 : line >> 6;
+ line |= index_mode << 29;
if (lines.insert(line).second)
++cnt;
memcpy(old_kc, kc, sizeof(kc));
for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
- unsigned line = *I;
+ unsigned index_mode = *I >> 29;
+ unsigned line = *I & 0x1fffffffu;
unsigned bank = line >> 8;
+ assert(index_mode <= KC_INDEX_INVALID);
line &= 0xFF;
- if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
- ++kc[c-1].mode;
- else {
+ if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
+ kc[c-1].index_mode == index_mode)
+ {
+ kc[c-1].mode = KC_LOCK_2;
+ } else {
if (c == max_kcs) {
memcpy(kc, old_kc, sizeof(kc));
return false;
kc[c].bank = bank;
kc[c].addr = line;
+ kc[c].index_mode = index_mode;
++c;
}
}
return true;
}
-alu_node* alu_clause_tracker::create_ar_load() {
+alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) {
alu_node *a = sh.create_alu();
- // FIXME use MOVA_GPR on R6xx
-
if (sh.get_ctx().uses_mova_gpr) {
a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
a->bc.slot = SLOT_TRANS;
a->bc.set_op(ALU_OP1_MOVA_INT);
a->bc.slot = SLOT_X;
}
+ a->bc.dst_chan = ar_channel;
+ if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
+ a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
+ }
a->dst.resize(1);
- a->src.push_back(current_ar);
+ a->src.push_back(v);
PSC_DUMP(
sblog << "created AR load: ";