From edc35c1f54a0f72b6c0f01b2156c10c904459b4f Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 22 Apr 2020 23:20:26 -0400 Subject: [PATCH] freedreno/ir3: fix 16-bit ssbo access Update cat6 instruction type, and shift 1 in lower_offset_for_ssbo. Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/ir3/disasm-a3xx.c | 3 ++- src/freedreno/ir3/ir3_a6xx.c | 4 ++-- src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 10 ++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index 23c3bc76ab2..9d63add3d2a 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -930,6 +930,7 @@ static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) bool indirect_ssbo = desc_features[cat6->desc_mode].indirect; bool bindless = desc_features[cat6->desc_mode].bindless; bool uniform = desc_features[cat6->desc_mode].uniform; + bool type_full = cat6->type != TYPE_U16; memset(&src1, 0, sizeof(src1)); @@ -952,7 +953,7 @@ static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) fprintf(ctx->out, " "); src2.reg = (reg_t)(cat6->src2); - src2.full = true; // XXX + src2.full = type_full; print_src(ctx, &src2); fprintf(ctx->out, ", "); diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 23c1970bb36..6f0ba54dc36 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -89,7 +89,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, ldib->regs[0]->wrmask = MASK(intr->num_components); ldib->cat6.iim_val = intr->num_components; ldib->cat6.d = 1; - ldib->cat6.type = TYPE_U32; + ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32; ldib->barrier_class = IR3_BARRIER_BUFFER_R; ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; handle_bindless_cat6(ldib, intr->src[0]); @@ -118,7 +118,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) stib = ir3_STIB(b, ssbo_idx(ctx, intr->src[1]), 0, offset, 0, val, 0); stib->cat6.iim_val = ncomp; stib->cat6.d = 1; - stib->cat6.type = TYPE_U32; + stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32; stib->barrier_class = IR3_BARRIER_BUFFER_W; stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; handle_bindless_cat6(stib, intr->src[1]); diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index 8e80c40eeb8..456e331aba7 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -168,10 +168,16 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, unsigned ir3_ssbo_opcode, uint8_t offset_src_idx) { unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs; + int shift = 2; bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest; nir_ssa_def *new_dest = NULL; + /* for 16-bit ssbo access, offset is in 16-bit words instead of dwords */ + if ((has_dest && intrinsic->dest.ssa.bit_size == 16) || + (!has_dest && intrinsic->src[0].ssa->bit_size == 16)) + shift = 1; + /* Here we create a new intrinsic and copy over all contents from the old one. */ nir_intrinsic_instr *new_intrinsic; @@ -192,7 +198,7 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, * Here we use the convention that shifting right is negative while shifting * left is positive. So 'x / 4' ~ 'x >> 2' or 'x << -2'. */ - nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -2); + nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -shift); /* The new source that will hold the dword-offset is always the last * one for every intrinsic. @@ -224,7 +230,7 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, if (new_offset) offset = new_offset; else - offset = nir_ushr(b, offset, nir_imm_int(b, 2)); + offset = nir_ushr(b, offset, nir_imm_int(b, shift)); /* Insert the new intrinsic right before the old one. */ nir_builder_instr_insert(b, &new_intrinsic->instr); -- 2.30.2