From f789c5975c83f12216cf915d9a791e654b3c9e15 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 May 2020 11:18:02 -0700 Subject: [PATCH] freedreno: Fix non-constbuf-upload UBO block indices and count. The nir_analyze_ubo_ranges pass removes all UBO block 0 loads to reverse what nir_lower_uniforms_to_ubo() had done, and we only upload UBO pointers to the HW for UBO block 1-N, so let's just fix up the shader state. Fixes an off by one in const state layout setup, and some really dodgy register addressing trying to deal with dynamic UBO indices when the UBO pointers happen to be at the start of the constbuf. There's no fixes tag, though this fixes a bug from September, because it would require the num_ubos fix in nir_lower_uniforms_to_ubo. Reviewed-by: Kristian H. Kristensen Part-of: --- .../deqp-freedreno-a630-noubo-fails.txt | 16 -------- src/freedreno/ir3/ir3_compiler_nir.c | 5 +-- .../ir3/ir3_nir_analyze_ubo_ranges.c | 39 ++++++++++++++++--- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/.gitlab-ci/deqp-freedreno-a630-noubo-fails.txt b/.gitlab-ci/deqp-freedreno-a630-noubo-fails.txt index 2b00497a6ca..b8ffa8f0a1c 100644 --- a/.gitlab-ci/deqp-freedreno-a630-noubo-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-noubo-fails.txt @@ -1,18 +1,2 @@ dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw -dEQP-GLES31.functional.ubo.random.all_per_block_buffers.11 -dEQP-GLES31.functional.ubo.random.all_per_block_buffers.16 -dEQP-GLES31.functional.ubo.random.all_per_block_buffers.21 -dEQP-GLES31.functional.ubo.random.all_per_block_buffers.31 -dEQP-GLES31.functional.ubo.random.all_per_block_buffers.6 -dEQP-GLES31.functional.ubo.random.all_shared_buffer.12 -dEQP-GLES31.functional.ubo.random.all_shared_buffer.17 -dEQP-GLES31.functional.ubo.random.all_shared_buffer.2 -dEQP-GLES31.functional.ubo.random.all_shared_buffer.27 -dEQP-GLES31.functional.ubo.random.all_shared_buffer.37 -dEQP-GLES31.functional.ubo.random.all_shared_buffer.47 -dEQP-GLES31.functional.ubo.random.basic_type_arrays.1 -dEQP-GLES31.functional.ubo.random.basic_type_arrays.11 -dEQP-GLES31.functional.ubo.random.basic_type_arrays.16 -dEQP-GLES31.functional.ubo.random.basic_type_arrays.21 -dEQP-GLES31.functional.ubo.random.basic_type_arrays.6 diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index d40978f574b..dc2492e242a 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -760,11 +760,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1; - /* UBO addresses are the first driver params, but subtract 2 here to - * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0 - * is the uniforms: */ struct ir3_const_state *const_state = &ctx->so->shader->const_state; - unsigned ubo = regid(const_state->offsets.ubo, 0) - 2; + unsigned ubo = regid(const_state->offsets.ubo, 0); const unsigned ptrsz = ir3_pointer_size(ctx->compiler); int off = 0; diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 17d79b97a2e..d2fe65cd0e3 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -179,36 +179,59 @@ handle_partial_const(nir_builder *b, nir_ssa_def **srcp, unsigned *offp) } } +static void +lower_ubo_block_decrement(nir_intrinsic_instr *instr, nir_builder *b) +{ + /* Skip shifting things for turnip's bindless resources. */ + if (ir3_bindless_resource(instr->src[0])) + return; + + /* Shift all GL nir_intrinsic_load_ubo UBO indices down by 1, because we + * have lowered block 0 off of load_ubo to constbuf and ir3_const only + * uploads pointers for block 1-N. + */ + nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[0], 1); + nir_ssa_def *new_idx = nir_iadd_imm(b, old_idx, -1); + nir_instr_rewrite_src(&instr->instr, &instr->src[0], + nir_src_for_ssa(new_idx)); +} + static void lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b, struct ir3_ubo_analysis_state *state) { + b->cursor = nir_before_instr(&instr->instr); + /* We don't lower dynamic block index UBO loads to load_uniform, but we * could probably with some effort determine a block stride in number of * registers. */ struct ir3_ubo_range *range = get_existing_range(instr, state, false); - if (!range) + if (!range) { + lower_ubo_block_decrement(instr, b); return; + } if (range->bindless || range->block > 0) { /* We don't lower dynamic array indexing either, but we definitely should. * We don't have a good way of determining the range of the dynamic * access, so for now just fall back to pulling. */ - if (!nir_src_is_const(instr->src[1])) + if (!nir_src_is_const(instr->src[1])) { + lower_ubo_block_decrement(instr, b); return; + } /* After gathering the UBO access ranges, we limit the total * upload. Reject if we're now outside the range. */ const struct ir3_ubo_range r = get_ubo_load_range(instr); - if (!(range->start <= r.start && r.end <= range->end)) + if (!(range->start <= r.start && r.end <= range->end)) { + lower_ubo_block_decrement(instr, b); return; + } } - b->cursor = nir_before_instr(&instr->instr); - nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1); unsigned const_offset = 0; @@ -336,5 +359,11 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) } } + /* If we previously had UBO 0, it's been lowered off of load_ubo and all + * the others were shifted down. + */ + if (nir->info.num_ubos >= 1 && nir->info.first_ubo_is_default_ubo) + nir->info.num_ubos--; + return state->lower_count > 0; } -- 2.30.2