debug_assert(instr->block == addr->block);
instr->address = addr;
- array_insert(ir, ir->indirects, instr);
+ debug_assert(reg_num(addr->regs[0]) == REG_A0);
+ unsigned comp = reg_comp(addr->regs[0]);
+ if (comp == 0) {
+ array_insert(ir, ir->a0_users, instr);
+ } else {
+ debug_assert(comp == 1);
+ array_insert(ir, ir->a1_users, instr);
+ }
}
}
* convenient list of instructions that reference some address
* register simplifies this.
*/
- DECLARE_ARRAY(struct ir3_instruction *, indirects);
+ DECLARE_ARRAY(struct ir3_instruction *, a0_users);
+
+ /* same for a1.x: */
+ DECLARE_ARRAY(struct ir3_instruction *, a1_users);
/* and same for instructions that consume predicate register: */
DECLARE_ARRAY(struct ir3_instruction *, predicates);
dst = instr->regs[0];
- /* mov's that write to a0.x or p0.x are special: */
+ /* mov's that write to a0 or p0.x are special: */
if (dst->num == regid(REG_P0, 0))
return false;
- if (dst->num == regid(REG_A0, 0))
+ if (reg_num(dst) == REG_A0)
return false;
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
return util_last_bit(instr->regs[0]->wrmask);
}
-static inline bool writes_addr(struct ir3_instruction *instr)
+static inline bool writes_addr0(struct ir3_instruction *instr)
+{
+ if (instr->regs_count > 0) {
+ struct ir3_register *dst = instr->regs[0];
+ return dst->num == regid(REG_A0, 0);
+ }
+ return false;
+}
+
+static inline bool writes_addr1(struct ir3_instruction *instr)
{
if (instr->regs_count > 0) {
struct ir3_register *dst = instr->regs[0];
- return reg_num(dst) == REG_A0;
+ return dst->num == regid(REG_A0, 1);
}
return false;
}
base_lo = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz));
base_hi = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
} else {
- base_lo = create_uniform_indirect(b, ubo, ir3_get_addr(ctx, src0, ptrsz));
- base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr(ctx, src0, ptrsz));
+ base_lo = create_uniform_indirect(b, ubo, ir3_get_addr0(ctx, src0, ptrsz));
+ base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr0(ctx, src0, ptrsz));
/* NOTE: since relative addressing is used, make sure constlen is
* at least big enough to cover all the UBO addresses, since the
src = ir3_get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
dst[i] = create_uniform_indirect(b, idx + i,
- ir3_get_addr(ctx, src[0], 1));
+ ir3_get_addr0(ctx, src[0], 1));
}
/* NOTE: if relative addressing is used, we set
* constlen in the compiler (to worst-case value)
src = ir3_get_src(ctx, &intr->src[0]);
struct ir3_instruction *collect =
ir3_create_collect(ctx, ctx->ir->inputs, ctx->ninputs);
- struct ir3_instruction *addr = ir3_get_addr(ctx, src[0], 4);
+ struct ir3_instruction *addr = ir3_get_addr0(ctx, src[0], 4);
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i + comp;
dst[i] = create_indirect_load(ctx, ctx->ninputs,
list_addtail(&block->node, &ctx->ir->block_list);
/* re-emit addr register in each block if needed: */
- for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) {
- _mesa_hash_table_destroy(ctx->addr_ht[i], NULL);
- ctx->addr_ht[i] = NULL;
+ for (int i = 0; i < ARRAY_SIZE(ctx->addr0_ht); i++) {
+ _mesa_hash_table_destroy(ctx->addr0_ht[i], NULL);
+ ctx->addr0_ht[i] = NULL;
}
+ _mesa_hash_table_u64_destroy(ctx->addr1_ht, NULL);
+ ctx->addr1_ht = NULL;
+
nir_foreach_instr (instr, nblock) {
ctx->cur_instr = instr;
emit_instr(ctx, instr);
ralloc_array(ctx, struct ir3_instruction *, num_components);
if (src->reg.indirect)
- addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
+ addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
reg->num_components);
for (unsigned i = 0; i < num_components; i++) {
struct ir3_instruction *addr = NULL;
if (dst->reg.indirect)
- addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
+ addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
reg->num_components);
for (unsigned i = 0; i < num_components; i++) {
}
static struct ir3_instruction *
-create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
+create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
{
struct ir3_instruction *instr, *immed;
return instr;
}
+static struct ir3_instruction *
+create_addr1(struct ir3_block *block, unsigned const_val)
+{
+
+ struct ir3_instruction *immed = create_immed(block, const_val);
+ struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_S16);
+ instr->regs[0]->num = regid(REG_A0, 1);
+ instr->regs[0]->flags &= ~IR3_REG_SSA;
+ instr->regs[0]->flags |= IR3_REG_HALF;
+ instr->regs[1]->flags |= IR3_REG_HALF;
+ return instr;
+}
+
/* caches addr values to avoid generating multiple cov/shl/mova
* sequences for each use of a given NIR level src as address
*/
struct ir3_instruction *
-ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
+ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
{
struct ir3_instruction *addr;
unsigned idx = align - 1;
- compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
+ compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
- if (!ctx->addr_ht[idx]) {
- ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
+ if (!ctx->addr0_ht[idx]) {
+ ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx,
_mesa_hash_pointer, _mesa_key_pointer_equal);
} else {
struct hash_entry *entry;
- entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
+ entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
if (entry)
return entry->data;
}
- addr = create_addr(ctx->block, src, align);
- _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
+ addr = create_addr0(ctx->block, src, align);
+ _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
+
+ return addr;
+}
+
+/* Similar to ir3_get_addr0, but for a1.x. */
+struct ir3_instruction *
+ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
+{
+ struct ir3_instruction *addr;
+
+ if (!ctx->addr1_ht) {
+ ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
+ } else {
+ addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
+ if (addr)
+ return addr;
+ }
+
+ addr = create_addr1(ctx->block, const_val);
+ _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
return addr;
}
* src used for an array of vec1 cannot be also used for an
* array of vec4.
*/
- struct hash_table *addr_ht[4];
+ struct hash_table *addr0_ht[4];
+
+ /* The same for a1.x. We only support immediate values for a1.x, as this
+ * is the only use so far.
+ */
+ struct hash_table_u64 *addr1_ht;
/* last dst array, for indirect we need to insert a var-store.
*/
if (!(cond)) ir3_context_error((ctx), "failed assert: "#cond"\n"); \
} while (0)
-struct ir3_instruction * ir3_get_addr(struct ir3_context *ctx,
+struct ir3_instruction * ir3_get_addr0(struct ir3_context *ctx,
struct ir3_instruction *src, int align);
+struct ir3_instruction * ir3_get_addr1(struct ir3_context *ctx,
+ unsigned const_val);
struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx,
struct ir3_instruction *src);
if (is_meta(assigner) || is_meta(consumer))
return 0;
- if (writes_addr(assigner))
+ if (writes_addr0(assigner) || writes_addr1(assigner))
return 6;
/* On a6xx, it takes the number of delay slots to get a SFU result
/* note that we can end up with unused indirects, but we should
* not end up with unused predicates.
*/
- for (i = 0; i < ir->indirects_count; i++) {
- struct ir3_instruction *instr = ir->indirects[i];
+ for (i = 0; i < ir->a0_users_count; i++) {
+ struct ir3_instruction *instr = ir->a0_users[i];
if (instr && (instr->flags & IR3_INSTR_UNUSED))
- ir->indirects[i] = NULL;
+ ir->a0_users[i] = NULL;
+ }
+
+ for (i = 0; i < ir->a1_users_count; i++) {
+ struct ir3_instruction *instr = ir->a1_users[i];
+ if (instr && (instr->flags & IR3_INSTR_UNUSED))
+ ir->a1_users[i] = NULL;
}
/* cleanup unused inputs: */
calculate_deps(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *node)
{
- static const struct ir3_register half_reg = { .flags = IR3_REG_HALF };
struct ir3_register *reg;
int b;
* in the reverse direction) wrote any of our src registers:
*/
foreach_src_n (reg, i, node->instr) {
- /* NOTE: relative access for a src can be either const or gpr: */
- if (reg->flags & IR3_REG_RELATIV) {
- /* also reads a0.x: */
- add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
- }
-
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
continue;
}
}
+ if (node->instr->address) {
+ add_reg_dep(state, node, node->instr->address->regs[0],
+ node->instr->address->regs[0]->num,
+ false);
+ }
+
if (dest_regs(node->instr) == 0)
return;
for (unsigned i = 0; i < arr->length; i++) {
add_reg_dep(state, node, reg, arr->reg + i, true);
}
-
- /* also reads a0.x: */
- add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
} else {
foreach_bit (b, reg->wrmask) {
add_reg_dep(state, node, reg, reg->num + b, true);
if (instr->regs_count == 0)
continue;
/* couple special cases: */
- if (writes_addr(instr) || writes_pred(instr)) {
+ if (writes_addr0(instr) || writes_addr1(instr) || writes_pred(instr)) {
id->cls = -1;
} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
id->cls = total_class_count;
/* is dest a normal temp register: */
struct ir3_register *reg = instr->regs[0];
debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
- if ((reg->num == regid(REG_A0, 0)) ||
+ if ((reg_num(reg) == REG_A0) ||
(reg->num == regid(REG_P0, 0)))
return false;
return true;
struct ir3_block *block; /* the current block */
struct list_head depth_list; /* depth sorted unscheduled instrs */
struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
- struct ir3_instruction *addr; /* current a0.x user, if any */
+ struct ir3_instruction *addr0; /* current a0.x user, if any */
+ struct ir3_instruction *addr1; /* current a1.x user, if any */
struct ir3_instruction *pred; /* current p0.x user, if any */
int live_values; /* estimate of current live values */
int half_live_values; /* estimate of current half precision live values */
*/
list_delinit(&instr->node);
- if (writes_addr(instr)) {
- debug_assert(ctx->addr == NULL);
- ctx->addr = instr;
+ if (writes_addr0(instr)) {
+ debug_assert(ctx->addr0 == NULL);
+ ctx->addr0 = instr;
+ }
+
+ if (writes_addr1(instr)) {
+ debug_assert(ctx->addr1 == NULL);
+ ctx->addr1 = instr;
}
if (writes_pred(instr)) {
update_live_values(ctx, instr);
- if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
+ if (writes_addr0(instr) || writes_addr1(instr) || writes_pred(instr) || is_input(instr)) {
clear_cache(ctx, NULL);
} else {
/* invalidate only the necessary entries.. */
/* there is at least one instruction that could be scheduled,
* except for conflicting address/predicate register usage:
*/
- bool addr_conflict, pred_conflict;
+ bool addr0_conflict, addr1_conflict, pred_conflict;
};
/* could an instruction be scheduled if specified ssa src was scheduled? */
* TODO if any instructions use pred register and have other
* src args, we would need to do the same for writes_pred()..
*/
- if (writes_addr(instr)) {
+ if (writes_addr0(instr)) {
struct ir3 *ir = instr->block->shader;
bool ready = false;
- for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
- struct ir3_instruction *indirect = ir->indirects[i];
+ for (unsigned i = 0; (i < ir->a0_users_count) && !ready; i++) {
+ struct ir3_instruction *indirect = ir->a0_users[i];
+ if (!indirect)
+ continue;
+ if (indirect->address != instr)
+ continue;
+ ready = could_sched(indirect, instr);
+ }
+
+ /* nothing could be scheduled, so keep looking: */
+ if (!ready)
+ return false;
+ }
+
+ if (writes_addr1(instr)) {
+ struct ir3 *ir = instr->block->shader;
+ bool ready = false;
+ for (unsigned i = 0; (i < ir->a1_users_count) && !ready; i++) {
+ struct ir3_instruction *indirect = ir->a1_users[i];
if (!indirect)
continue;
if (indirect->address != instr)
* register is currently in use, we need to defer until it is
* free:
*/
- if (writes_addr(instr) && ctx->addr) {
- debug_assert(ctx->addr != instr);
- notes->addr_conflict = true;
+ if (writes_addr0(instr) && ctx->addr0) {
+ debug_assert(ctx->addr0 != instr);
+ notes->addr0_conflict = true;
+ return false;
+ }
+
+ if (writes_addr1(instr) && ctx->addr1) {
+ debug_assert(ctx->addr1 != instr);
+ notes->addr1_conflict = true;
return false;
}
return new_instr;
}
-/* "spill" the address register by remapping any unscheduled
+/* "spill" the address registers by remapping any unscheduled
* instructions which depend on the current address register
* to a clone of the instruction which wrote the address reg.
*/
static struct ir3_instruction *
-split_addr(struct ir3_sched_ctx *ctx)
+split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
+ struct ir3_instruction **users, unsigned users_count)
{
- struct ir3 *ir;
struct ir3_instruction *new_addr = NULL;
unsigned i;
- debug_assert(ctx->addr);
-
- ir = ctx->addr->block->shader;
+ debug_assert(*addr);
- for (i = 0; i < ir->indirects_count; i++) {
- struct ir3_instruction *indirect = ir->indirects[i];
+ for (i = 0; i < users_count; i++) {
+ struct ir3_instruction *indirect = users[i];
if (!indirect)
continue;
/* remap remaining instructions using current addr
* to new addr:
*/
- if (indirect->address == ctx->addr) {
+ if (indirect->address == *addr) {
if (!new_addr) {
- new_addr = split_instr(ctx, ctx->addr);
+ new_addr = split_instr(ctx, *addr);
/* original addr is scheduled, but new one isn't: */
new_addr->flags &= ~IR3_INSTR_MARK;
}
}
/* all remaining indirects remapped to new addr: */
- ctx->addr = NULL;
+ *addr = NULL;
return new_addr;
}
ctx->block = block;
/* addr/pred writes are per-block: */
- ctx->addr = NULL;
+ ctx->addr0 = NULL;
+ ctx->addr1 = NULL;
ctx->pred = NULL;
/* move all instructions to the unscheduled list, and
schedule(ctx, instr);
} else {
struct ir3_instruction *new_instr = NULL;
+ struct ir3 *ir = block->shader;
/* nothing available to schedule.. if we are blocked on
* address/predicate register conflict, then break the
* deadlock by cloning the instruction that wrote that
* reg:
*/
- if (notes.addr_conflict) {
- new_instr = split_addr(ctx);
+ if (notes.addr0_conflict) {
+ new_instr = split_addr(ctx, &ctx->addr0,
+ ir->a0_users, ir->a0_users_count);
+ } else if (notes.addr1_conflict) {
+ new_instr = split_addr(ctx, &ctx->addr1,
+ ir->a1_users, ir->a1_users_count);
} else if (notes.pred_conflict) {
new_instr = split_pred(ctx);
} else {