freedreno: Fix non-constbuf-upload UBO block indices and count.
authorEric Anholt <eric@anholt.net>
Mon, 11 May 2020 18:18:02 +0000 (11:18 -0700)
committerMarge Bot <eric+marge@anholt.net>
Tue, 12 May 2020 17:01:55 +0000 (17:01 +0000)
The nir_analyze_ubo_ranges pass removes all UBO block 0 loads to reverse
what nir_lower_uniforms_to_ubo() had done, and we only upload UBO pointers
to the HW for UBO block 1-N, so let's just fix up the shader state.

Fixes an off by one in const state layout setup, and some really dodgy
register addressing trying to deal with dynamic UBO indices when the UBO
pointers happen to be at the start of the constbuf.

There's no fixes tag, though this fixes a bug from September, because it
would require the num_ubos fix in nir_lower_uniforms_to_ubo.

Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4992>

.gitlab-ci/deqp-freedreno-a630-noubo-fails.txt
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c

index 2b00497a6caa8374513e5865bcee77d91afe650e..b8ffa8f0a1ce4a6a2468c4997468594e9f48734a 100644 (file)
@@ -1,18 +1,2 @@
 dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear
 dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw
-dEQP-GLES31.functional.ubo.random.all_per_block_buffers.11
-dEQP-GLES31.functional.ubo.random.all_per_block_buffers.16
-dEQP-GLES31.functional.ubo.random.all_per_block_buffers.21
-dEQP-GLES31.functional.ubo.random.all_per_block_buffers.31
-dEQP-GLES31.functional.ubo.random.all_per_block_buffers.6
-dEQP-GLES31.functional.ubo.random.all_shared_buffer.12
-dEQP-GLES31.functional.ubo.random.all_shared_buffer.17
-dEQP-GLES31.functional.ubo.random.all_shared_buffer.2
-dEQP-GLES31.functional.ubo.random.all_shared_buffer.27
-dEQP-GLES31.functional.ubo.random.all_shared_buffer.37
-dEQP-GLES31.functional.ubo.random.all_shared_buffer.47
-dEQP-GLES31.functional.ubo.random.basic_type_arrays.1
-dEQP-GLES31.functional.ubo.random.basic_type_arrays.11
-dEQP-GLES31.functional.ubo.random.basic_type_arrays.16
-dEQP-GLES31.functional.ubo.random.basic_type_arrays.21
-dEQP-GLES31.functional.ubo.random.basic_type_arrays.6
index d40978f574bc1c559369886379b9cc99e3020c50..dc2492e242aa7ba817e58ba85d138a0e0f066b27 100644 (file)
@@ -760,11 +760,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 {
        struct ir3_block *b = ctx->block;
        struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1;
-       /* UBO addresses are the first driver params, but subtract 2 here to
-        * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
-        * is the uniforms: */
        struct ir3_const_state *const_state = &ctx->so->shader->const_state;
-       unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
+       unsigned ubo = regid(const_state->offsets.ubo, 0);
        const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
 
        int off = 0;
index 17d79b97a2e8efb546c52ca9ce2a00f2dbc2999f..d2fe65cd0e39498e1cf9fc63c0abb5c7734a884c 100644 (file)
@@ -179,36 +179,59 @@ handle_partial_const(nir_builder *b, nir_ssa_def **srcp, unsigned *offp)
        }
 }
 
+static void
+lower_ubo_block_decrement(nir_intrinsic_instr *instr, nir_builder *b)
+{
+       /* Skip shifting things for turnip's bindless resources. */
+       if (ir3_bindless_resource(instr->src[0]))
+               return;
+
+       /* Shift all GL nir_intrinsic_load_ubo UBO indices down by 1, because we
+        * have lowered block 0 off of load_ubo to constbuf and ir3_const only
+        * uploads pointers for block 1-N.
+        */
+       nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[0], 1);
+       nir_ssa_def *new_idx = nir_iadd_imm(b, old_idx, -1);
+       nir_instr_rewrite_src(&instr->instr, &instr->src[0],
+                       nir_src_for_ssa(new_idx));
+}
+
 static void
 lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
                                                  struct ir3_ubo_analysis_state *state)
 {
+       b->cursor = nir_before_instr(&instr->instr);
+
        /* We don't lower dynamic block index UBO loads to load_uniform, but we
         * could probably with some effort determine a block stride in number of
         * registers.
         */
        struct ir3_ubo_range *range = get_existing_range(instr, state, false);
-       if (!range)
+       if (!range) {
+               lower_ubo_block_decrement(instr, b);
                return;
+       }
 
        if (range->bindless || range->block > 0) {
                /* We don't lower dynamic array indexing either, but we definitely should.
                 * We don't have a good way of determining the range of the dynamic
                 * access, so for now just fall back to pulling.
                 */
-               if (!nir_src_is_const(instr->src[1]))
+               if (!nir_src_is_const(instr->src[1])) {
+                       lower_ubo_block_decrement(instr, b);
                        return;
+               }
 
                /* After gathering the UBO access ranges, we limit the total
                 * upload. Reject if we're now outside the range.
                 */
                const struct ir3_ubo_range r = get_ubo_load_range(instr);
-               if (!(range->start <= r.start && r.end <= range->end))
+               if (!(range->start <= r.start && r.end <= range->end)) {
+                       lower_ubo_block_decrement(instr, b);
                        return;
+               }
        }
 
-       b->cursor = nir_before_instr(&instr->instr);
-
        nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
        unsigned const_offset = 0;
 
@@ -336,5 +359,11 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
                }
        }
 
+       /* If we previously had UBO 0, it's been lowered off of load_ubo and all
+        * the others were shifted down.
+        */
+       if (nir->info.num_ubos >= 1 && nir->info.first_ubo_is_default_ubo)
+               nir->info.num_ubos--;
+
        return state->lower_count > 0;
 }