From dcc50f4302d9904e5c433d8bd81af6fcb3159479 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 12 Mar 2020 08:05:58 -0400 Subject: [PATCH] pan/bi: Interpret register allocation results Once LCRA has run, we have a map from IR indices to byte offsets into the register file, so we need to "install" these results, rewriting the IR to use native registers and fixing up writemasks/swizzles to substitute vectorization for adjacent registers (for LCRA, we're modeling in terms of real vectors). Signed-off-by: Alyssa Rosenzweig Tested-by: Marge Bot Part-of: --- src/panfrost/bifrost/bi_ra.c | 95 ++++++++++++++++++++++++++ src/panfrost/bifrost/bifrost_compile.c | 1 + 2 files changed, 96 insertions(+) diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c index b30f0e7aceb..8c0fa92a12e 100644 --- a/src/panfrost/bifrost/bi_ra.c +++ b/src/panfrost/bifrost/bi_ra.c @@ -89,6 +89,99 @@ bi_allocate_registers(bi_context *ctx, bool *success) return l; } +static unsigned +bi_reg_from_index(struct lcra_state *l, unsigned index, unsigned offset) +{ + /* Did we run RA for this index at all */ + if (index >= l->node_count) + return index; + + /* LCRA didn't bother solving this index (how lazy!) */ + signed solution = l->solutions[index]; + if (solution < 0) + return index; + + solution += offset; + + assert((solution & 0x3) == 0); + unsigned reg = solution / 4; + return BIR_INDEX_REGISTER | reg; +} + +static void +bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src) +{ + if (ins->src[src] >= l->node_count) + return; + + bool vector = (bi_class_props[ins->type] & BI_VECTOR); + unsigned offset = 0; + + if (vector) { + /* TODO: Do we do anything here? */ + } else { + /* Use the swizzle as component select */ + nir_alu_type T = ins->src_types[src]; + unsigned size = nir_alu_type_get_type_size(T); + unsigned bytes = (MAX2(size, 8) / 8); + unsigned comps_per_reg = 4 / bytes; + unsigned components = bi_get_component_count(ins); + + for (unsigned i = 0; i < components; ++i) { + unsigned off = ins->swizzle[src][i] / comps_per_reg; + off *= 4; /* 32-bit registers */ + + /* We can't cross register boundaries in a swizzle */ + if (i == 0) + offset = off; + else + assert(off == offset); + + ins->swizzle[src][i] %= comps_per_reg; + } + } + + ins->src[src] = bi_reg_from_index(l, ins->src[src], offset); +} + +static void +bi_adjust_dest_ra(bi_instruction *ins, struct lcra_state *l) +{ + if (ins->dest >= l->node_count) + return; + + bool vector = (bi_class_props[ins->type] & BI_VECTOR); + unsigned offset = 0; + + if (!vector) { + /* Look at the writemask to get an offset, specifically the + * trailing zeros */ + + unsigned tz = __builtin_ctz(ins->writemask); + + /* Recall writemask is one bit per byte, so tz is in bytes */ + unsigned regs = tz / 4; + offset = regs * 4; + + /* Adjust writemask to compensate */ + ins->writemask >>= offset; + } + + ins->dest = bi_reg_from_index(l, ins->dest, offset); + +} + +static void +bi_install_registers(bi_context *ctx, struct lcra_state *l) +{ + bi_foreach_instr_global(ctx, ins) { + bi_adjust_dest_ra(ins, l); + + bi_foreach_src(ins, s) + bi_adjust_src_ra(ins, l, s); + } +} + void bi_register_allocate(bi_context *ctx) { @@ -108,5 +201,7 @@ bi_register_allocate(bi_context *ctx) assert(success); } while(!success); + bi_install_registers(ctx, l); + lcra_free(l); } diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index b9cb89d961b..f25f7424409 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -864,6 +864,7 @@ bifrost_compile_shader_nir(nir_shader *nir, panfrost_program *program, unsigned bi_print_shader(ctx, stdout); bi_schedule(ctx); bi_register_allocate(ctx); + bi_print_shader(ctx, stdout); ralloc_free(ctx); } -- 2.30.2