From: Rob Clark Date: Sat, 21 Mar 2020 21:44:44 +0000 (-0700) Subject: freedreno/ir3/ra: add def/use iterators X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=6347c2ea89bde624dd16cff6741db57e89d88ad5 freedreno/ir3/ra: add def/use iterators Decouple the messy logic of figuring out vreg names defined/used by an instruction from the logic of what to do about it by introducing iterators. There is still *some* array vs ssa special casing in ra_block_compute_live_ranges(), but less than before. And this will avoid introducing a second copy of the def/use logic in a following patch which uses the liveranges to calculate the maximum # of live values (which is the optimal target for max physical register window to round-robin within). Signed-off-by: Rob Clark Part-of: --- diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index d4663f6167d..fa379c3495b 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -585,159 +585,69 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) } foreach_instr (instr, &block->instr_list) { - struct ir3_instruction *src; - struct ir3_register *reg; - - if (writes_gpr(instr)) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - struct ir3_register *dst = instr->regs[0]; - - if (dst->flags & IR3_REG_ARRAY) { - struct ir3_array *arr = - ir3_lookup_array(ctx->ir, dst->array.id); - unsigned i; + foreach_def (name, ctx, instr) { + if (name_is_array(ctx, name)) { + struct ir3_array *arr = name_to_array(ctx, name); arr->start_ip = MIN2(arr->start_ip, instr->ip); arr->end_ip = MAX2(arr->end_ip, instr->ip); - /* set the node class now.. in case we don't encounter - * this array dst again. From register_alloc algo's - * perspective, these are all single/scalar regs: - */ - for (i = 0; i < arr->length; i++) { + for (unsigned i = 0; i < arr->length; i++) { unsigned name = arr->base + i; if(arr->half) ra_set_node_class(ctx->g, name, ctx->set->half_classes[0]); else ra_set_node_class(ctx->g, name, ctx->set->classes[0]); } - - /* indirect write is treated like a write to all array - * elements, since we don't know which one is actually - * written: - */ - if (dst->flags & IR3_REG_RELATIV) { - for (i = 0; i < arr->length; i++) { - unsigned name = arr->base + i; - def(name, instr); - } + } else { + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + if (is_high(instr)) { + ra_set_node_class(ctx->g, name, + ctx->set->high_classes[id->cls - HIGH_OFFSET]); + } else if (is_half(instr)) { + ra_set_node_class(ctx->g, name, + ctx->set->half_classes[id->cls - HALF_OFFSET]); } else { - unsigned name = arr->base + dst->array.offset; - def(name, instr); - } - } else if (id->defn == instr) { - /* in scalar pass, we aren't considering virtual register - * classes, ie. if an instruction writes a vec2, then it - * defines two different scalar register names. - */ - unsigned n = ctx->scalar_pass ? dest_regs(instr) : 1; - for (unsigned i = 0; i < n; i++) { - unsigned name = scalar_name(ctx, instr, i); - - /* split/collect instructions have duplicate names - * as real instructions, so they skip the hashtable: - */ - if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) || - (instr->opc == OPC_META_COLLECT))) { - /* this is slightly annoying, we can't just use an - * integer on the stack - */ - unsigned *key = ralloc(ctx->name_to_instr, unsigned); - *key = name; - debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key)); - _mesa_hash_table_insert(ctx->name_to_instr, key, instr); - } - - /* tex instructions actually have a wrmask, and - * don't touch masked out components. We can't do - * anything useful about that in the first pass, - * but in the scalar pass we can realize these - * registers are available: - */ - if (ctx->scalar_pass && is_tex_or_prefetch(instr) && - !(instr->regs[0]->wrmask & (1 << i))) - continue; - - def(name, instr); - - if ((instr->opc == OPC_META_INPUT) && first_non_input) - use(name, first_non_input); - - if (is_high(instr)) { - ra_set_node_class(ctx->g, name, - ctx->set->high_classes[id->cls - HIGH_OFFSET]); - } else if (is_half(instr)) { - ra_set_node_class(ctx->g, name, - ctx->set->half_classes[id->cls - HALF_OFFSET]); - } else { - ra_set_node_class(ctx->g, name, - ctx->set->classes[id->cls]); - } + ra_set_node_class(ctx->g, name, + ctx->set->classes[id->cls]); } } + + def(name, instr); + + if ((instr->opc == OPC_META_INPUT) && first_non_input) + use(name, first_non_input); } - foreach_src (reg, instr) { - if (reg->flags & IR3_REG_ARRAY) { - struct ir3_array *arr = - ir3_lookup_array(ctx->ir, reg->array.id); + foreach_use (name, ctx, instr) { + if (name_is_array(ctx, name)) { + struct ir3_array *arr = name_to_array(ctx, name); + arr->start_ip = MIN2(arr->start_ip, instr->ip); arr->end_ip = MAX2(arr->end_ip, instr->ip); - /* indirect read is treated like a read from all array - * elements, since we don't know which one is actually - * read: + /* NOTE: arrays are not SSA so unconditionally + * set use bit: */ - if (reg->flags & IR3_REG_RELATIV) { - unsigned i; - for (i = 0; i < arr->length; i++) { - unsigned name = arr->base + i; - use(name, instr); - BITSET_SET(bd->use, name); - } - } else { - unsigned name = arr->base + reg->array.offset; - use(name, instr); - /* NOTE: arrays are not SSA so unconditionally - * set use bit: - */ - BITSET_SET(bd->use, name); - debug_assert(reg->array.offset < arr->length); - } - } else if (ctx->scalar_pass) { - struct ir3_instruction *src = reg->instr; - /* skip things that aren't SSA: */ - unsigned n = src ? dest_regs(src) : 0; - - /* in scalar pass, we aren't considering virtual register - * classes, ie. if an instruction writes a vec2, then it - * defines two different scalar register names. - * - * We need to traverse up thru collect/split to find the - * actual non-meta instruction names for each of the - * components: + BITSET_SET(bd->use, name); + } + + use(name, instr); + } + + foreach_name (name, ctx, instr) { + /* split/collect instructions have duplicate names + * as real instructions, so they skip the hashtable: + */ + if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) || + (instr->opc == OPC_META_COLLECT))) { + /* this is slightly annoying, we can't just use an + * integer on the stack */ - for (unsigned i = 0; i < n; i++) { - /* Need to filter out a couple special cases, ie. - * writes to a0.x or p0.x: - */ - if (!writes_gpr(src)) - continue; - - /* split takes a src w/ wrmask potentially greater - * than 0x1, but it really only cares about a single - * component. This shows up in splits coming out of - * a tex instruction w/ wrmask=.z, for example. - */ - if ((instr->opc == OPC_META_SPLIT) && - !(i == instr->split.off)) - continue; - - use(scalar_name(ctx, src, i), instr); - } - } else if ((src = ssa(reg)) && writes_gpr(src)) { - unsigned name = ra_name(ctx, &ctx->instrd[src->ip]); - use(name, instr); + unsigned *key = ralloc(ctx->name_to_instr, unsigned); + *key = name; + debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key)); + _mesa_hash_table_insert(ctx->name_to_instr, key, instr); } } } diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index f9c2155b7df..db21eb9f220 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -134,6 +134,18 @@ struct ir3_ra_ctx { /* Tracking for select_reg callback */ unsigned start_search_reg; unsigned max_target; + + /* Temporary buffer for def/use iterators + * + * The worst case should probably be an array w/ relative access (ie. + * all elements are def'd or use'd), and that can't be larger than + * the number of registers. + * + * NOTE we could declare this on the stack if needed, but I don't + * think there is a need for nested iterators. + */ + unsigned namebuf[NUM_REGS]; + unsigned namecnt, nameidx; }; static inline int @@ -182,6 +194,153 @@ writes_gpr(struct ir3_instruction *instr) return true; } +#define NO_NAME ~0 + +/* + * Iterators to iterate the vreg names of an instructions def's and use's + */ + +static inline unsigned +__ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) +{ + if (!instr) + return 0; + + /* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */ + if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY)) + return 0; + + /* in scalar pass, we aren't considering virtual register classes, ie. + * if an instruction writes a vec2, then it defines two different scalar + * register names. + */ + if (ctx->scalar_pass) + return dest_regs(instr); + + return 1; +} + +#define foreach_name_n(__name, __n, __ctx, __instr) \ + for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \ + (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++) + +#define foreach_name(__name, __ctx, __instr) \ + foreach_name_n(__name, __n, __ctx, __instr) + +static inline unsigned +__ra_itr_pop(struct ir3_ra_ctx *ctx) +{ + if (ctx->nameidx < ctx->namecnt) + return ctx->namebuf[ctx->nameidx++]; + return NO_NAME; +} + +static inline void +__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name) +{ + assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf)); + ctx->namebuf[ctx->namecnt++] = name; +} + +static inline unsigned +__ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) +{ + /* nested use is not supported: */ + assert(ctx->namecnt == ctx->nameidx); + + ctx->namecnt = ctx->nameidx = 0; + + if (!writes_gpr(instr)) + return NO_NAME; + + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + struct ir3_register *dst = instr->regs[0]; + + if (dst->flags & IR3_REG_ARRAY) { + struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id); + + /* indirect write is treated like a write to all array + * elements, since we don't know which one is actually + * written: + */ + if (dst->flags & IR3_REG_RELATIV) { + for (unsigned i = 0; i < arr->length; i++) { + __ra_itr_push(ctx, arr->base + i); + } + } else { + __ra_itr_push(ctx, arr->base + dst->array.offset); + debug_assert(dst->array.offset < arr->length); + } + } else if (id->defn == instr) { + foreach_name_n (name, i, ctx, instr) { + /* tex instructions actually have a wrmask, and + * don't touch masked out components. We can't do + * anything useful about that in the first pass, + * but in the scalar pass we can realize these + * registers are available: + */ + if (ctx->scalar_pass && is_tex_or_prefetch(instr) && + !(instr->regs[0]->wrmask & (1 << i))) + continue; + __ra_itr_push(ctx, name); + } + } + + return __ra_itr_pop(ctx); +} + +static inline unsigned +__ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) +{ + /* nested use is not supported: */ + assert(ctx->namecnt == ctx->nameidx); + + ctx->namecnt = ctx->nameidx = 0; + + struct ir3_register *reg; + foreach_src (reg, instr) { + if (reg->flags & IR3_REG_ARRAY) { + struct ir3_array *arr = + ir3_lookup_array(ctx->ir, reg->array.id); + + /* indirect read is treated like a read from all array + * elements, since we don't know which one is actually + * read: + */ + if (reg->flags & IR3_REG_RELATIV) { + for (unsigned i = 0; i < arr->length; i++) { + __ra_itr_push(ctx, arr->base + i); + } + } else { + __ra_itr_push(ctx, arr->base + reg->array.offset); + debug_assert(reg->array.offset < arr->length); + } + } else { + foreach_name_n (name, i, ctx, reg->instr) { + /* split takes a src w/ wrmask potentially greater + * than 0x1, but it really only cares about a single + * component. This shows up in splits coming out of + * a tex instruction w/ wrmask=.z, for example. + */ + if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) && + !(i == instr->split.off)) + continue; + __ra_itr_push(ctx, name); + } + } + } + + return __ra_itr_pop(ctx); +} + +#define foreach_def(__name, __ctx, __instr) \ + for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \ + __name != NO_NAME; __name = __ra_itr_pop(__ctx)) + +#define foreach_use(__name, __ctx, __instr) \ + for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \ + __name != NO_NAME; __name = __ra_itr_pop(__ctx)) + int ra_size_to_class(unsigned sz, bool half, bool high); #endif /* IR3_RA_H_ */