freedreno/ir3: fix 16-bit ssbo access
authorJonathan Marek <jonathan@marek.ca>
Thu, 23 Apr 2020 03:20:26 +0000 (23:20 -0400)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Apr 2020 13:11:58 +0000 (13:11 +0000)
Update cat6 instruction type, and shift 1 in lower_offset_for_ssbo.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4708>

src/freedreno/ir3/disasm-a3xx.c
src/freedreno/ir3/ir3_a6xx.c
src/freedreno/ir3/ir3_nir_lower_io_offsets.c

index 23c3bc76ab2e6dc527d8cd5b6dae24a4b9ec4297..9d63add3d2a8a576cd0df2266322a39984291740 100644 (file)
@@ -930,6 +930,7 @@ static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
        bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
        bool bindless = desc_features[cat6->desc_mode].bindless;
        bool uniform = desc_features[cat6->desc_mode].uniform;
+       bool type_full = cat6->type != TYPE_U16;
 
 
        memset(&src1, 0, sizeof(src1));
@@ -952,7 +953,7 @@ static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
        fprintf(ctx->out, " ");
 
        src2.reg = (reg_t)(cat6->src2);
-       src2.full = true; // XXX
+       src2.full = type_full;
        print_src(ctx, &src2);
        fprintf(ctx->out, ", ");
 
index 23c1970bb360c2a52e0b63f72322b82bd0410d83..6f0ba54dc36a186e3c87a5e9485134250681db0a 100644 (file)
@@ -89,7 +89,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        ldib->regs[0]->wrmask = MASK(intr->num_components);
        ldib->cat6.iim_val = intr->num_components;
        ldib->cat6.d = 1;
-       ldib->cat6.type = TYPE_U32;
+       ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32;
        ldib->barrier_class = IR3_BARRIER_BUFFER_R;
        ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
        handle_bindless_cat6(ldib, intr->src[0]);
@@ -118,7 +118,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        stib = ir3_STIB(b, ssbo_idx(ctx, intr->src[1]), 0, offset, 0, val, 0);
        stib->cat6.iim_val = ncomp;
        stib->cat6.d = 1;
-       stib->cat6.type = TYPE_U32;
+       stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32;
        stib->barrier_class = IR3_BARRIER_BUFFER_W;
        stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
        handle_bindless_cat6(stib, intr->src[1]);
index 8e80c40eeb884773815fd9f2b28b06dfd5fd9c54..456e331aba7800fdbfb58ff9dafab9a63d943a16 100644 (file)
@@ -168,10 +168,16 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
                                          unsigned ir3_ssbo_opcode, uint8_t offset_src_idx)
 {
        unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs;
+       int shift = 2;
 
        bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest;
        nir_ssa_def *new_dest = NULL;
 
+       /* for 16-bit ssbo access, offset is in 16-bit words instead of dwords */
+       if ((has_dest && intrinsic->dest.ssa.bit_size == 16) ||
+               (!has_dest && intrinsic->src[0].ssa->bit_size == 16))
+               shift = 1;
+
        /* Here we create a new intrinsic and copy over all contents from the old one. */
 
        nir_intrinsic_instr *new_intrinsic;
@@ -192,7 +198,7 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
         * Here we use the convention that shifting right is negative while shifting
         * left is positive. So 'x / 4' ~ 'x >> 2' or 'x << -2'.
         */
-       nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -2);
+       nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -shift);
 
        /* The new source that will hold the dword-offset is always the last
         * one for every intrinsic.
@@ -224,7 +230,7 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
        if (new_offset)
                offset = new_offset;
        else
-               offset = nir_ushr(b, offset, nir_imm_int(b, 2));
+               offset = nir_ushr(b, offset, nir_imm_int(b, shift));
 
        /* Insert the new intrinsic right before the old one. */
        nir_builder_instr_insert(b, &new_intrinsic->instr);