From: Rob Clark Date: Mon, 23 Mar 2020 17:25:38 +0000 (-0700) Subject: freedreno/ir3/ra: pick higher numbered scalars in first pass X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=90f7d12236c5250bc56699a3071941daef0f515a;p=mesa.git freedreno/ir3/ra: pick higher numbered scalars in first pass Since we are re-assigning the scalars anyways in the second pass, assign them to the highest free reg in the first pass (rather than lowest) to allow packing vecN regs as low as possible. Note this required some changes specifically for tex instructions with a single component writemask that is not necessarily .x, as previously these would get assigned in the first RA pass, and since they are still scalar, we'd end up w/ some r47.* and other similarly way-to-high assignments after the 2nd pass. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c index 9b0037baa8f..a400406a1eb 100644 --- a/src/freedreno/ir3/ir3_depth.c +++ b/src/freedreno/ir3/ir3_depth.c @@ -177,11 +177,27 @@ compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so) ir3_instr_depth(block->condition, 6, false); } - /* mark un-used instructions: */ + /* remove un-used instructions: */ foreach_block (block, &ir->block_list) { progress |= remove_unused_by_block(block); } + /* fixup wrmask of split instructions to account for adjusted tex + * wrmask's: + */ + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + if (instr->opc != OPC_META_SPLIT) + continue; + + struct ir3_instruction *src = ssa(instr->regs[1]); + if (!is_tex_or_prefetch(src)) + continue; + + instr->regs[1]->wrmask = src->regs[0]->wrmask; + } + } + /* note that we can end up with unused indirects, but we should * not end up with unused predicates. */ diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index db51f3f8d23..a630202a3e2 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -384,7 +384,18 @@ ra_set_register_target(struct ir3_ra_ctx *ctx, unsigned max_target) static int pick_in_range(BITSET_WORD *regs, unsigned min, unsigned max) { - for (unsigned i = min; i < max; i++) { + for (unsigned i = min; i <= max; i++) { + if (BITSET_TEST(regs, i)) { + return i; + } + } + return -1; +} + +static int +pick_in_range_rev(BITSET_WORD *regs, int min, int max) +{ + for (int i = max; i >= min; i--) { if (BITSET_TEST(regs, i)) { return i; } @@ -398,6 +409,10 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data) { struct ir3_ra_ctx *ctx = data; unsigned int class = ra_get_node_class(ctx->g, n); + bool half, high; + int sz = ra_class_to_size(class, &half, &high); + + assert (sz > 0); /* dimensions within the register class: */ unsigned max_target, start; @@ -410,23 +425,51 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data) */ unsigned base; + /* TODO I think eventually we want to round-robin in vector pass + * as well, but needs some more work to calculate # of live vals + * for this. (Maybe with some work, we could just figure out + * the scalar target and use that, since that is what we care + * about in the end.. but that would mean setting up use-def/ + * liveranges for scalar pass before doing vector pass.) + * + * For now, in the vector class, just move assignments for scalar + * vals higher to hopefully prevent them from limiting where vecN + * values can be placed. Since the scalar values are re-assigned + * in the 2nd pass, we don't really care where they end up in the + * vector pass. + */ + if (!ctx->scalar_pass) { + base = ctx->set->gpr_to_ra_reg[class][0]; + if (high) { + max_target = HIGH_CLASS_REGS(sz); + } else if (half) { + max_target = HALF_CLASS_REGS(sz); + } else { + max_target = CLASS_REGS(sz); + } + + if ((sz == 1) && !high) { + return pick_in_range_rev(regs, base, base + max_target); + } else { + return pick_in_range(regs, base, base + max_target); + } + } else { + assert(sz == 1); + } + /* NOTE: this is only used in scalar pass, so the register * class will be one of the scalar classes (ie. idx==0): */ - if (class == ctx->set->high_classes[0]) { + base = ctx->set->gpr_to_ra_reg[class][0]; + if (high) { max_target = HIGH_CLASS_REGS(0); start = 0; - base = ctx->set->gpr_to_ra_reg[HIGH_OFFSET][0]; - } else if (class == ctx->set->half_classes[0]) { + } else if (half) { max_target = ctx->max_target; start = ctx->start_search_reg; - base = ctx->set->gpr_to_ra_reg[HALF_OFFSET][0]; - } else if (class == ctx->set->classes[0]) { + } else { max_target = ctx->max_target / 2; start = ctx->start_search_reg; - base = ctx->set->gpr_to_ra_reg[0][0]; - } else { - unreachable("unexpected register class!"); } /* For cat4 instructions, if the src reg is already assigned, and @@ -447,6 +490,19 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data) return reg; } } + } else if (is_tex_or_prefetch(instr)) { + /* we could have a tex fetch w/ wrmask .z, for example.. these + * cannot land in r0.x since that would underflow when we + * subtract the offset. Ie. if we pick r0.z, and subtract + * the offset, the register encoded for dst will be r0.x + */ + unsigned n = ffs(instr->regs[0]->wrmask); + debug_assert(n > 0); + unsigned offset = n - 1; + if (!half) + offset *= 2; + base += offset; + max_target -= offset; } int r = pick_in_range(regs, base + start, base + max_target); @@ -514,11 +570,13 @@ ra_init(struct ir3_ra_ctx *ctx) ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); /* TODO add selector callback for split (pre-a6xx) register file: */ - if (ctx->scalar_pass && (ctx->ir->compiler->gpu_id >= 600)) { + if (ctx->ir->compiler->gpu_id >= 600) { ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx); - ctx->name_to_instr = _mesa_hash_table_create(ctx->g, - _mesa_hash_int, _mesa_key_int_equal); + if (ctx->scalar_pass) { + ctx->name_to_instr = _mesa_hash_table_create(ctx->g, + _mesa_hash_int, _mesa_key_int_equal); + } } } @@ -1154,8 +1212,11 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, static bool should_assign(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) { - if ((instr->opc == OPC_META_SPLIT) || - (instr->opc == OPC_META_COLLECT)) + if ((instr->opc == OPC_META_SPLIT) && + (util_bitcount(instr->regs[1]->wrmask) > 1)) + return !ctx->scalar_pass; + if ((instr->opc == OPC_META_COLLECT) && + (util_bitcount(instr->regs[0]->wrmask) > 1)) return !ctx->scalar_pass; return ctx->scalar_pass; } @@ -1390,8 +1451,10 @@ ra_precolor_assigned(struct ir3_ra_ctx *ctx) foreach_block (block, &ctx->ir->block_list) { foreach_instr (instr, &block->instr_list) { - if ((instr->opc != OPC_META_SPLIT) && - (instr->opc != OPC_META_COLLECT)) + if (!writes_gpr(instr)) + continue; + + if (should_assign(ctx, instr)) continue; precolor(ctx, instr); diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h index 4c5cab0edfc..ab7211770d8 100644 --- a/src/freedreno/ir3/ir3_ra.h +++ b/src/freedreno/ir3/ir3_ra.h @@ -333,5 +333,6 @@ __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr) __name != NO_NAME; __name = __ra_itr_pop(__ctx)) int ra_size_to_class(unsigned sz, bool half, bool high); +int ra_class_to_size(unsigned class, bool *half, bool *high); #endif /* IR3_RA_H_ */ diff --git a/src/freedreno/ir3/ir3_ra_regset.c b/src/freedreno/ir3/ir3_ra_regset.c index d0e77bc5bf4..5ef309e43f7 100644 --- a/src/freedreno/ir3/ir3_ra_regset.c +++ b/src/freedreno/ir3/ir3_ra_regset.c @@ -229,3 +229,19 @@ ra_size_to_class(unsigned sz, bool half, bool high) debug_assert(0); return -1; } + +int +ra_class_to_size(unsigned class, bool *half, bool *high) +{ + *half = *high = false; + + if (class >= HIGH_OFFSET) { + *high = true; + return high_class_sizes[class - HIGH_OFFSET]; + } else if (class >= HALF_OFFSET) { + *half = true; + return half_class_sizes[class - HALF_OFFSET]; + } else { + return class_sizes[class]; + } +}