freedreno/ir3: remove indirect input load
[mesa.git] / src / freedreno / ir3 / ir3_nir_analyze_ubo_ranges.c
index d8a285073f194502fb8217e501c0cd3c865ae08c..8a9503feb747a7294869a47ad22f41b1d78e16a5 100644 (file)
 #include "compiler/nir/nir_builder.h"
 #include "util/u_math.h"
 
+static bool
+ubo_is_gl_uniforms(const struct ir3_ubo_info *ubo)
+{
+       return !ubo->bindless && ubo->block == 0;
+}
+
 static inline struct ir3_ubo_range
-get_ubo_load_range(nir_intrinsic_instr *instr)
+get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr, uint32_t alignment)
 {
        struct ir3_ubo_range r;
 
-       int offset = nir_src_as_uint(instr->src[1]);
-       if (instr->intrinsic == nir_intrinsic_load_ubo_ir3)
-               offset *= 16;
-       const int bytes = nir_intrinsic_dest_components(instr) * 4;
+       if (nir_src_is_const(instr->src[1])) {
+               int offset = nir_src_as_uint(instr->src[1]);
+               const int bytes = nir_intrinsic_dest_components(instr) * 4;
 
-       r.start = ROUND_DOWN_TO(offset, 16 * 4);
-       r.end = ALIGN(offset + bytes, 16 * 4);
+               r.start = ROUND_DOWN_TO(offset, alignment * 16);
+               r.end = ALIGN(offset + bytes, alignment * 16);
+       } else {
+               /* The other valid place to call this is on the GL default uniform block */
+               assert(nir_src_as_uint(instr->src[0]) == 0);
+               r.start = 0;
+               r.end = ALIGN(nir->num_uniforms * 16, alignment * 16);
+       }
 
        return r;
 }
 
-static struct ir3_ubo_range *
-get_existing_range(nir_intrinsic_instr *instr,
-                                  struct ir3_ubo_analysis_state *state,
-                                  bool create_new)
+static bool
+get_ubo_info(nir_intrinsic_instr *instr, struct ir3_ubo_info *ubo)
 {
-       unsigned block, base = 0;
-       bool bindless;
        if (nir_src_is_const(instr->src[0])) {
-               block = nir_src_as_uint(instr->src[0]);
-               bindless = false;
+               ubo->block = nir_src_as_uint(instr->src[0]);
+               ubo->bindless_base = 0;
+               ubo->bindless = false;
+               return true;
        } else {
                nir_intrinsic_instr *rsrc = ir3_bindless_resource(instr->src[0]);
                if (rsrc && nir_src_is_const(rsrc->src[0])) {
-                       block = nir_src_as_uint(rsrc->src[0]);
-                       base = nir_intrinsic_desc_set(rsrc);
-                       bindless = true;
-               } else {
-                       return NULL;
+                       ubo->block = nir_src_as_uint(rsrc->src[0]);
+                       ubo->bindless_base = nir_intrinsic_desc_set(rsrc);
+                       ubo->bindless = true;
+                       return true;
                }
        }
+       return false;
+}
+
+/**
+ * Get an existing range, but don't create a new range associated with
+ * the ubo, but don't create a new one if one does not already exist.
+ */
+static const struct ir3_ubo_range *
+get_existing_range(nir_intrinsic_instr *instr,
+                                  const struct ir3_ubo_analysis_state *state)
+{
+       struct ir3_ubo_info ubo = {};
+
+       if (!get_ubo_info(instr, &ubo))
+               return NULL;
+
+       for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
+               const struct ir3_ubo_range *range = &state->range[i];
+               if (range->end < range->start) {
+                       break;
+               } else if (!memcmp(&range->ubo, &ubo, sizeof(ubo))) {
+                       return range;
+               }
+       }
+
+       return NULL;
+}
+
+/**
+ * Get an existing range, or create a new one if necessary/possible.
+ */
+static struct ir3_ubo_range *
+get_range(nir_intrinsic_instr *instr, struct ir3_ubo_analysis_state *state)
+{
+       struct ir3_ubo_info ubo = {};
+
+       if (!get_ubo_info(instr, &ubo))
+               return NULL;
+
        for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
                struct ir3_ubo_range *range = &state->range[i];
                if (range->end < range->start) {
                        /* We don't have a matching range, but there are more available.
                         */
-                       if (create_new) {
-                               range->block = block;
-                               range->bindless_base = base;
-                               range->bindless = bindless;
-                               return range;
-                       } else {
-                               return NULL;
-                       }
-               } else if (range->block == block && range->bindless_base == base &&
-                                  range->bindless == bindless) {
+                       range->ubo = ubo;
+                       return range;
+               } else if (!memcmp(&range->ubo, &ubo, sizeof(ubo))) {
                        return range;
                }
        }
@@ -87,32 +127,23 @@ get_existing_range(nir_intrinsic_instr *instr,
 
 static void
 gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
-                                 struct ir3_ubo_analysis_state *state)
+                                 struct ir3_ubo_analysis_state *state, uint32_t alignment)
 {
-       struct ir3_ubo_range *old_r = get_existing_range(instr, state, true);
-       if (!old_r)
+       if (ir3_shader_debug & IR3_DBG_NOUBOOPT)
                return;
 
-       if (!nir_src_is_const(instr->src[1])) {
-               if (!old_r->bindless && old_r->block == 0) {
-                       /* If this is an indirect on UBO 0, we'll still lower it back to
-                        * load_uniform.  Set the range to cover all of UBO 0.
-                        */
-                       state->range[0].start = 0;
-                       state->range[0].end = ALIGN(nir->num_uniforms * 16, 16 * 4);
-               }
-
+       struct ir3_ubo_range *old_r = get_range(instr, state);
+       if (!old_r)
                return;
-       }
 
-       const struct ir3_ubo_range r = get_ubo_load_range(instr);
-
-       /* if UBO lowering is disabled, we still want to lower block 0
-        * (which is normal uniforms):
+       /* We don't know how to get the size of UBOs being indirected on, other
+        * than on the GL uniforms where we have some other shader_info data.
         */
-       if ((old_r->bindless || old_r->block != 0) && (ir3_shader_debug & IR3_DBG_NOUBOOPT))
+       if (!nir_src_is_const(instr->src[1]) && !ubo_is_gl_uniforms(&old_r->ubo))
                return;
 
+       const struct ir3_ubo_range r = get_ubo_load_range(nir, instr, alignment);
+
        if (r.start < old_r->start)
                old_r->start = r.start;
        if (old_r->end < r.end)
@@ -179,26 +210,29 @@ handle_partial_const(nir_builder *b, nir_ssa_def **srcp, int *offp)
        }
 }
 
+/* Tracks the maximum bindful UBO accessed so that we reduce the UBO
+ * descriptors emitted in the fast path for GL.
+ */
 static void
-lower_ubo_block_decrement(nir_intrinsic_instr *instr, nir_builder *b)
+track_ubo_use(nir_intrinsic_instr *instr, nir_builder *b, int *num_ubos)
 {
-       /* Skip shifting things for turnip's bindless resources. */
-       if (ir3_bindless_resource(instr->src[0]))
+       if (ir3_bindless_resource(instr->src[0])) {
+               assert(!b->shader->info.first_ubo_is_default_ubo); /* only set for GL */
                return;
+       }
 
-       /* Shift all GL nir_intrinsic_load_ubo UBO indices down by 1, because we
-        * have lowered block 0 off of load_ubo to constbuf and ir3_const only
-        * uploads pointers for block 1-N.
-        */
-       nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[0], 1);
-       nir_ssa_def *new_idx = nir_iadd_imm(b, old_idx, -1);
-       nir_instr_rewrite_src(&instr->instr, &instr->src[0],
-                       nir_src_for_ssa(new_idx));
+       if (nir_src_is_const(instr->src[0])) {
+               int block = nir_src_as_uint(instr->src[0]);
+               *num_ubos = MAX2(*num_ubos, block + 1);
+       } else {
+               *num_ubos = b->shader->info.num_ubos;
+       }
 }
 
-static void
+static bool
 lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
-                                                 struct ir3_ubo_analysis_state *state)
+               const struct ir3_ubo_analysis_state *state,
+               int *num_ubos, uint32_t alignment)
 {
        b->cursor = nir_before_instr(&instr->instr);
 
@@ -206,30 +240,28 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
         * could probably with some effort determine a block stride in number of
         * registers.
         */
-       struct ir3_ubo_range *range = get_existing_range(instr, state, false);
+       const struct ir3_ubo_range *range = get_existing_range(instr, state);
        if (!range) {
-               lower_ubo_block_decrement(instr, b);
-               return;
+               track_ubo_use(instr, b, num_ubos);
+               return false;
        }
 
-       if (range->bindless || range->block > 0) {
-               /* We don't lower dynamic array indexing either, but we definitely should.
-                * We don't have a good way of determining the range of the dynamic
-                * access, so for now just fall back to pulling.
-                */
-               if (!nir_src_is_const(instr->src[1])) {
-                       lower_ubo_block_decrement(instr, b);
-                       return;
-               }
+       /* We don't have a good way of determining the range of the dynamic
+        * access in general, so for now just fall back to pulling.
+        */
+       if (!nir_src_is_const(instr->src[1]) && !ubo_is_gl_uniforms(&range->ubo)) {
+               track_ubo_use(instr, b, num_ubos);
+               return false;
+       }
 
-               /* After gathering the UBO access ranges, we limit the total
-                * upload. Reject if we're now outside the range.
-                */
-               const struct ir3_ubo_range r = get_ubo_load_range(instr);
-               if (!(range->start <= r.start && r.end <= range->end)) {
-                       lower_ubo_block_decrement(instr, b);
-                       return;
-               }
+       /* After gathering the UBO access ranges, we limit the total
+        * upload. Don't lower if this load is outside the range.
+        */
+       const struct ir3_ubo_range r = get_ubo_load_range(b->shader,
+                       instr, alignment);
+       if (!(range->start <= r.start && r.end <= range->end)) {
+               track_ubo_use(instr, b, num_ubos);
+               return false;
        }
 
        nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
@@ -242,9 +274,8 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
         * offset is in units of 16 bytes, so we need to multiply by 4. And
         * also the same for the constant part of the offset:
         */
-
-       const int shift = instr->intrinsic == nir_intrinsic_load_ubo_ir3 ? 2 : -2;
-       nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, shift);
+       const int shift = -2;
+       nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
        nir_ssa_def *uniform_offset = NULL;
        if (new_offset) {
                uniform_offset = new_offset;
@@ -254,13 +285,8 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
                        nir_ushr(b, ubo_offset, nir_imm_int(b, -shift));
        }
 
-       if (instr->intrinsic == nir_intrinsic_load_ubo_ir3) {
-               const_offset <<= 2;
-               const_offset += nir_intrinsic_base(instr);
-       } else {
-               debug_assert(!(const_offset & 0x3));
-               const_offset >>= 2;
-       }
+       debug_assert(!(const_offset & 0x3));
+       const_offset >>= 2;
 
        const int range_offset = ((int)range->offset - (int)range->start) / 4;
        const_offset += range_offset;
@@ -290,7 +316,7 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
 
        nir_instr_remove(&instr->instr);
 
-       state->lower_count++;
+       return true;
 }
 
 static bool
@@ -300,13 +326,19 @@ instr_is_load_ubo(nir_instr *instr)
                return false;
 
        nir_intrinsic_op op = nir_instr_as_intrinsic(instr)->intrinsic;
-       return op == nir_intrinsic_load_ubo || op == nir_intrinsic_load_ubo_ir3;
+
+       /* nir_lower_ubo_vec4 happens after this pass. */
+       assert(op != nir_intrinsic_load_ubo_vec4);
+
+       return op == nir_intrinsic_load_ubo;
 }
 
-bool
-ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
+void
+ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
 {
-       struct ir3_ubo_analysis_state *state = &shader->ubo_state;
+       struct ir3_const_state *const_state = ir3_const_state(v);
+       struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+       struct ir3_compiler *compiler = v->shader->compiler;
 
        memset(state, 0, sizeof(*state));
        for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
@@ -318,7 +350,8 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
                        nir_foreach_block (block, function->impl) {
                                nir_foreach_instr (instr, block) {
                                        if (instr_is_load_ubo(instr))
-                                               gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), state);
+                                               gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr),
+                                                               state, compiler->const_upload_unit);
                                }
                        }
                }
@@ -332,8 +365,19 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
         * dynamically accessed ranges separately and upload static rangtes
         * first.
         */
-       const uint32_t max_upload = 16 * 1024;
-       uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
+
+       /* Limit our uploads to the amount of constant buffer space available in
+        * the hardware, minus what the shader compiler may need for various
+        * driver params.  We do this UBO-to-push-constant before the real
+        * allocation of the driver params' const space, because UBO pointers can
+        * be driver params but this pass usually eliminatings them.
+        */
+       struct ir3_const_state worst_case_const_state = { };
+       ir3_setup_const_state(nir, v, &worst_case_const_state);
+       const uint32_t max_upload = (ir3_max_const(v) -
+                       worst_case_const_state.offsets.immediate) * 16;
+
+       uint32_t offset = v->shader->num_reserved_user_consts * 16;
        state->num_enabled = ARRAY_SIZE(state->range);
        for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
                if (state->range[i].start >= state->range[i].end) {
@@ -353,15 +397,33 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
 
        }
        state->size = offset;
+}
 
+bool
+ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v)
+{
+       struct ir3_compiler *compiler = v->shader->compiler;
+       /* For the binning pass variant, we re-use the corresponding draw-pass
+        * variants const_state and ubo state.  To make these clear, in this
+        * pass it is const (read-only)
+        */
+       const struct ir3_const_state *const_state = ir3_const_state(v);
+       const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+
+       int num_ubos = 0;
+       bool progress = false;
        nir_foreach_function (function, nir) {
                if (function->impl) {
                        nir_builder builder;
                        nir_builder_init(&builder, function->impl);
                        nir_foreach_block (block, function->impl) {
                                nir_foreach_instr_safe (instr, block) {
-                                       if (instr_is_load_ubo(instr))
-                                               lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, state);
+                                       if (!instr_is_load_ubo(instr))
+                                               continue;
+                                       progress |=
+                                               lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr),
+                                                               &builder, state, &num_ubos,
+                                                               compiler->const_upload_unit);
                                }
                        }
 
@@ -369,12 +431,12 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
                                                                  nir_metadata_dominance);
                }
        }
-
-       /* If we previously had UBO 0, it's been lowered off of load_ubo and all
-        * the others were shifted down.
+       /* Update the num_ubos field for GL (first_ubo_is_default_ubo).  With
+        * Vulkan's bindless, we don't use the num_ubos field, so we can leave it
+        * incremented.
         */
-       if (nir->info.num_ubos >= 1 && nir->info.first_ubo_is_default_ubo)
-               nir->info.num_ubos--;
+       if (nir->info.first_ubo_is_default_ubo)
+           nir->info.num_ubos = num_ubos;
 
-       return state->lower_count > 0;
+       return progress;
 }