From: Rob Clark Date: Wed, 6 May 2020 21:58:28 +0000 (-0700) Subject: freedreno/ir3: use lower_wrmasks pass X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cf21b763832abc5739fc46eb0d30440587015840;p=mesa.git freedreno/ir3: use lower_wrmasks pass Signed-off-by: Rob Clark Reviewed-by: Kristian H. Kristensen Reviewed-by: Eric Anholt --- diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 594fb9cd021..e460cd0b629 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -73,13 +73,11 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; struct ir3_instruction *stgb, *src0, *src1, *src2, *byte_offset, *offset; - /* TODO handle wrmask properly, see _store_shared().. but I think - * it is more a PITA than that, since blob ends up loading the - * masked components and writing them back out. - */ unsigned wrmask = nir_intrinsic_write_mask(intr); unsigned ncomp = ffs(~wrmask) - 1; + assert(wrmask == BITFIELD_MASK(intr->num_components)); + /* can this be non-const buffer_index? how do we handle that? */ int ibo_idx = ir3_ssbo_to_ibo(ctx->so->shader, nir_src_as_uint(intr->src[1])); diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index d4cb74c39bd..e297e34fdf5 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -103,13 +103,11 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) { struct ir3_block *b = ctx->block; struct ir3_instruction *stib, *val, *offset; - /* TODO handle wrmask properly, see _store_shared().. but I think - * it is more a PITA than that, since blob ends up loading the - * masked components and writing them back out. - */ unsigned wrmask = nir_intrinsic_write_mask(intr); unsigned ncomp = ffs(~wrmask) - 1; + assert(wrmask == BITFIELD_MASK(intr->num_components)); + /* src0 is offset, src1 is value: */ val = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 13e180118c4..9e1105bce08 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -878,40 +878,26 @@ emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) struct ir3_block *b = ctx->block; struct ir3_instruction *stl, *offset; struct ir3_instruction * const *value; - unsigned base, wrmask; + unsigned base, wrmask, ncomp; value = ir3_get_src(ctx, &intr->src[0]); offset = ir3_get_src(ctx, &intr->src[1])[0]; base = nir_intrinsic_base(intr); wrmask = nir_intrinsic_write_mask(intr); + ncomp = ffs(~wrmask) - 1; - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the length of - * the block of enabled bits. - * - * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic()) - */ - while (wrmask) { - unsigned first_component = ffs(wrmask) - 1; - unsigned length = ffs(~(wrmask >> first_component)) - 1; - - stl = ir3_STL(b, offset, 0, - ir3_create_collect(ctx, &value[first_component], length), 0, - create_immed(b, length), 0); - stl->cat6.dst_offset = first_component + base; - stl->cat6.type = utype_src(intr->src[0]); - stl->barrier_class = IR3_BARRIER_SHARED_W; - stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; - - array_insert(b, b->keeps, stl); - - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - wrmask &= (15 << (first_component + length)); - } + assert(wrmask == BITFIELD_MASK(intr->num_components)); + + stl = ir3_STL(b, offset, 0, + ir3_create_collect(ctx, value, ncomp), 0, + create_immed(b, ncomp), 0); + stl->cat6.dst_offset = base; + stl->cat6.type = utype_src(intr->src[0]); + stl->barrier_class = IR3_BARRIER_SHARED_W; + stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; + + array_insert(b, b->keeps, stl); } /* src[] = { offset }. const_index[] = { base } */ diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index b3f784a557e..48dc9a340ab 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -210,6 +210,21 @@ ir3_optimize_loop(nir_shader *s) } while (progress); } +static bool +should_split_wrmask(const nir_instr *instr, const void *data) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_shared: + case nir_intrinsic_store_global: + return true; + default: + return false; + } +} + void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, const struct ir3_shader_key *key) @@ -274,6 +289,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, } OPT_V(s, nir_lower_regs_to_ssa); + OPT_V(s, nir_lower_wrmasks, should_split_wrmask, s); if (key) { if (s->info.stage == MESA_SHADER_VERTEX) {