X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fpanfrost%2Fmidgard%2Fmir_promote_uniforms.c;h=12f860787c1daa8f14deb1145a701971b9d90d90;hb=a8beef332dbde0bb37d68bd2a53a00f9ad0c178d;hp=27e25cad8bf1142ea20211117ff2310d85914f4a;hpb=3d54ed2488c90873e78d3267e967f9bca4b75ab4;p=mesa.git diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 27e25cad8bf..12f860787c1 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -26,50 +26,117 @@ #include "compiler.h" #include "util/u_math.h" +#include "util/u_memory.h" /* This pass promotes reads from uniforms from load/store ops to uniform * registers if it is beneficial to do so. Normally, this saves both * instructions and total register pressure, but it does take a toll on the * number of work registers that are available, so this is a balance. * - * To cope, we take as an argument the maximum work register pressure in the - * program so we allow that many registers through at minimum, to prevent - * spilling. If we spill anyway, I mean, it's a lose-lose at that point. */ + * We use a heuristic to determine the ideal count, implemented by + * mir_work_heuristic, which returns the ideal number of work registers. + */ + +static bool +mir_is_promoteable_ubo(midgard_instruction *ins) +{ + /* TODO: promote unaligned access via swizzle? */ + + return (ins->type == TAG_LOAD_STORE_4) && + (OP_IS_UBO_READ(ins->load_store.op)) && + !(ins->constants[0] & 0xF) && + !(ins->load_store.arg_1) && + (ins->load_store.arg_2 == 0x1E) && + ((ins->constants[0] / 16) < 16); +} static unsigned -mir_ubo_offset(midgard_instruction *ins) +mir_promoteable_uniform_count(compiler_context *ctx) { - assert(ins->type == TAG_LOAD_STORE_4); - assert(OP_IS_UBO_READ(ins->load_store.op)); + unsigned count = 0; - /* Grab the offset as the hw understands it */ - unsigned lo = ins->load_store.varying_parameters >> 7; - unsigned hi = ins->load_store.address; - unsigned raw = ((hi << 3) | lo); + mir_foreach_instr_global(ctx, ins) { + if (mir_is_promoteable_ubo(ins)) + count = MAX2(count, ins->constants[0] / 16); + } - /* Account for the op's shift */ - unsigned shift = mir_ubo_shift(ins->load_store.op); - return (raw << shift); + return count; } -void -midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count) +static unsigned +mir_count_live(uint16_t *live, unsigned temp_count) { - mir_foreach_instr_global_safe(ctx, ins) { - if (ins->type != TAG_LOAD_STORE_4) continue; - if (!OP_IS_UBO_READ(ins->load_store.op)) continue; + unsigned count = 0; - /* Get the offset. TODO: can we promote unaligned access? */ - unsigned off = mir_ubo_offset(ins); - if (off & 0xF) continue; + for (unsigned i = 0; i < temp_count; ++i) + count += util_bitcount(live[i]); - unsigned address = off / 16; + return count; +} + +static unsigned +mir_estimate_pressure(compiler_context *ctx) +{ + mir_invalidate_liveness(ctx); + mir_compute_liveness(ctx); + + unsigned max_live = 0; + + mir_foreach_block(ctx, block) { + uint16_t *live = mem_dup(block->live_out, ctx->temp_count * sizeof(uint16_t)); + + mir_foreach_instr_in_block_rev(block, ins) { + unsigned count = mir_count_live(live, ctx->temp_count); + max_live = MAX2(max_live, count); + mir_liveness_ins_update(live, ins, ctx->temp_count); + } + + free(live); + } + + return DIV_ROUND_UP(max_live, 16); +} - /* Check this is UBO 0 */ - if (ins->load_store.arg_1) continue; +static unsigned +mir_work_heuristic(compiler_context *ctx) +{ + unsigned uniform_count = mir_promoteable_uniform_count(ctx); + + /* If there are 8 or fewer uniforms, it doesn't matter what we do, so + * allow as many work registers as needed */ + + if (uniform_count <= 8) + return 16; + + /* Otherwise, estimate the register pressure */ + + unsigned pressure = mir_estimate_pressure(ctx); - /* Check we're accessing directly */ - if (ins->load_store.arg_2 != 0x1E) continue; + /* Prioritize not spilling above all else. The relation between the + * pressure estimate and the actual register pressure is a little + * murkier than we might like (due to scheduling, pipeline registers, + * failure to pack vector registers, load/store registers, texture + * registers...), hence why this is a heuristic parameter */ + + if (pressure > 6) + return 16; + + /* If there's no chance of spilling, prioritize UBOs and thread count */ + + return 8; +} + +void +midgard_promote_uniforms(compiler_context *ctx) +{ + unsigned work_count = mir_work_heuristic(ctx); + unsigned promoted_count = 24 - work_count; + + mir_foreach_instr_global_safe(ctx, ins) { + if (!mir_is_promoteable_ubo(ins)) continue; + + unsigned off = ins->constants[0]; + unsigned address = off / 16; /* Check if it's a promotable range */ unsigned uniform_reg = 23 - address; @@ -84,29 +151,19 @@ midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count) /* We do need the move for safety for a non-SSA dest, or if * we're being fed into a special class */ - bool needs_move = ins->ssa_args.dest & IS_REG; - needs_move |= mir_special_index(ctx, ins->ssa_args.dest); - - /* Ensure this is a contiguous X-bound mask. It should be since - * we haven't done RA and per-component masked UBO reads don't - * make much sense. */ - - assert(((ins->mask + 1) & ins->mask) == 0); + bool needs_move = ins->dest & IS_REG; + needs_move |= mir_special_index(ctx, ins->dest); - /* Check the component count from the mask so we can setup a - * swizzle appropriately when promoting. The idea is to ensure - * the component count is preserved so RA can be smarter if we - * need to spill */ + if (needs_move) { + midgard_instruction mov = v_mov(promoted, ins->dest); - unsigned nr_components = util_bitcount(ins->mask); + if (ins->load_64) + mov.alu.reg_mode = midgard_reg_mode_64; - if (needs_move) { - midgard_instruction mov = v_mov(promoted, blank_alu_src, ins->ssa_args.dest); - mov.mask = ins->mask; - mir_insert_instruction_before(ins, mov); + mir_set_bytemask(&mov, mir_bytemask(ins)); + mir_insert_instruction_before(ctx, ins, mov); } else { - mir_rewrite_index_src_swizzle(ctx, ins->ssa_args.dest, - promoted, swizzle_of(nr_components)); + mir_rewrite_index_src(ctx, ins->dest, promoted); } mir_remove_instruction(ins);