#include "compiler/nir/nir_builder.h"
#include "util/u_math.h"
+static bool
+ubo_is_gl_uniforms(const struct ir3_ubo_info *ubo)
+{
+ return !ubo->bindless && ubo->block == 0;
+}
+
static inline struct ir3_ubo_range
-get_ubo_load_range(nir_intrinsic_instr *instr)
+get_ubo_load_range(nir_shader *nir, nir_intrinsic_instr *instr, uint32_t alignment)
{
struct ir3_ubo_range r;
- int offset = nir_src_as_uint(instr->src[1]);
- const int bytes = nir_intrinsic_dest_components(instr) * 4;
+ if (nir_src_is_const(instr->src[1])) {
+ int offset = nir_src_as_uint(instr->src[1]);
+ const int bytes = nir_intrinsic_dest_components(instr) * 4;
- r.start = ROUND_DOWN_TO(offset, 16 * 4);
- r.end = ALIGN(offset + bytes, 16 * 4);
+ r.start = ROUND_DOWN_TO(offset, alignment * 16);
+ r.end = ALIGN(offset + bytes, alignment * 16);
+ } else {
+ /* The other valid place to call this is on the GL default uniform block */
+ assert(nir_src_as_uint(instr->src[0]) == 0);
+ r.start = 0;
+ r.end = ALIGN(nir->num_uniforms * 16, alignment * 16);
+ }
return r;
}
-static struct ir3_ubo_range *
-get_existing_range(nir_intrinsic_instr *instr,
- struct ir3_ubo_analysis_state *state,
- bool create_new)
+static bool
+get_ubo_info(nir_intrinsic_instr *instr, struct ir3_ubo_info *ubo)
{
- unsigned block, base = 0;
- bool bindless;
if (nir_src_is_const(instr->src[0])) {
- block = nir_src_as_uint(instr->src[0]);
- bindless = false;
+ ubo->block = nir_src_as_uint(instr->src[0]);
+ ubo->bindless_base = 0;
+ ubo->bindless = false;
+ return true;
} else {
nir_intrinsic_instr *rsrc = ir3_bindless_resource(instr->src[0]);
if (rsrc && nir_src_is_const(rsrc->src[0])) {
- block = nir_src_as_uint(rsrc->src[0]);
- base = nir_intrinsic_desc_set(rsrc);
- bindless = true;
- } else {
- return NULL;
+ ubo->block = nir_src_as_uint(rsrc->src[0]);
+ ubo->bindless_base = nir_intrinsic_desc_set(rsrc);
+ ubo->bindless = true;
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * Get an existing range, but don't create a new range associated with
+ * the ubo, but don't create a new one if one does not already exist.
+ */
+static const struct ir3_ubo_range *
+get_existing_range(nir_intrinsic_instr *instr,
+ const struct ir3_ubo_analysis_state *state)
+{
+ struct ir3_ubo_info ubo = {};
+
+ if (!get_ubo_info(instr, &ubo))
+ return NULL;
+
+ for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
+ const struct ir3_ubo_range *range = &state->range[i];
+ if (range->end < range->start) {
+ break;
+ } else if (!memcmp(&range->ubo, &ubo, sizeof(ubo))) {
+ return range;
}
}
+
+ return NULL;
+}
+
+/**
+ * Get an existing range, or create a new one if necessary/possible.
+ */
+static struct ir3_ubo_range *
+get_range(nir_intrinsic_instr *instr, struct ir3_ubo_analysis_state *state)
+{
+ struct ir3_ubo_info ubo = {};
+
+ if (!get_ubo_info(instr, &ubo))
+ return NULL;
+
for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
struct ir3_ubo_range *range = &state->range[i];
if (range->end < range->start) {
/* We don't have a matching range, but there are more available.
*/
- if (create_new) {
- range->block = block;
- range->bindless_base = base;
- range->bindless = bindless;
- return range;
- } else {
- return NULL;
- }
- } else if (range->block == block && range->bindless_base == base &&
- range->bindless == bindless) {
+ range->ubo = ubo;
+ return range;
+ } else if (!memcmp(&range->ubo, &ubo, sizeof(ubo))) {
return range;
}
}
static void
gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
- struct ir3_ubo_analysis_state *state)
+ struct ir3_ubo_analysis_state *state, uint32_t alignment)
{
- struct ir3_ubo_range *old_r = get_existing_range(instr, state, true);
- if (!old_r)
+ if (ir3_shader_debug & IR3_DBG_NOUBOOPT)
return;
- if (!nir_src_is_const(instr->src[1])) {
- if (!old_r->bindless && old_r->block == 0) {
- /* If this is an indirect on UBO 0, we'll still lower it back to
- * load_uniform. Set the range to cover all of UBO 0.
- */
- old_r->start = 0;
- old_r->end = ALIGN(nir->num_uniforms * 16, 16 * 4);
- }
-
+ struct ir3_ubo_range *old_r = get_range(instr, state);
+ if (!old_r)
return;
- }
- const struct ir3_ubo_range r = get_ubo_load_range(instr);
-
- /* if UBO lowering is disabled, we still want to lower block 0
- * (which is normal uniforms):
+ /* We don't know how to get the size of UBOs being indirected on, other
+ * than on the GL uniforms where we have some other shader_info data.
*/
- if ((old_r->bindless || old_r->block != 0) && (ir3_shader_debug & IR3_DBG_NOUBOOPT))
+ if (!nir_src_is_const(instr->src[1]) && !ubo_is_gl_uniforms(&old_r->ubo))
return;
+ const struct ir3_ubo_range r = get_ubo_load_range(nir, instr, alignment);
+
if (r.start < old_r->start)
old_r->start = r.start;
if (old_r->end < r.end)
}
}
+/* Tracks the maximum bindful UBO accessed so that we reduce the UBO
+ * descriptors emitted in the fast path for GL.
+ */
static void
-lower_ubo_block_decrement(nir_intrinsic_instr *instr, nir_builder *b)
+track_ubo_use(nir_intrinsic_instr *instr, nir_builder *b, int *num_ubos)
{
- /* Skip shifting things for turnip's bindless resources. */
- if (ir3_bindless_resource(instr->src[0]))
+ if (ir3_bindless_resource(instr->src[0])) {
+ assert(!b->shader->info.first_ubo_is_default_ubo); /* only set for GL */
return;
+ }
- /* Shift all GL nir_intrinsic_load_ubo UBO indices down by 1, because we
- * have lowered block 0 off of load_ubo to constbuf and ir3_const only
- * uploads pointers for block 1-N.
- */
- nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[0], 1);
- nir_ssa_def *new_idx = nir_iadd_imm(b, old_idx, -1);
- nir_instr_rewrite_src(&instr->instr, &instr->src[0],
- nir_src_for_ssa(new_idx));
+ if (nir_src_is_const(instr->src[0])) {
+ int block = nir_src_as_uint(instr->src[0]);
+ *num_ubos = MAX2(*num_ubos, block + 1);
+ } else {
+ *num_ubos = b->shader->info.num_ubos;
+ }
}
-static void
+static bool
lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
- struct ir3_ubo_analysis_state *state)
+ const struct ir3_ubo_analysis_state *state,
+ int *num_ubos, uint32_t alignment)
{
b->cursor = nir_before_instr(&instr->instr);
* could probably with some effort determine a block stride in number of
* registers.
*/
- struct ir3_ubo_range *range = get_existing_range(instr, state, false);
+ const struct ir3_ubo_range *range = get_existing_range(instr, state);
if (!range) {
- lower_ubo_block_decrement(instr, b);
- return;
+ track_ubo_use(instr, b, num_ubos);
+ return false;
}
- if (range->bindless || range->block > 0) {
- /* We don't lower dynamic array indexing either, but we definitely should.
- * We don't have a good way of determining the range of the dynamic
- * access, so for now just fall back to pulling.
- */
- if (!nir_src_is_const(instr->src[1])) {
- lower_ubo_block_decrement(instr, b);
- return;
- }
+ /* We don't have a good way of determining the range of the dynamic
+ * access in general, so for now just fall back to pulling.
+ */
+ if (!nir_src_is_const(instr->src[1]) && !ubo_is_gl_uniforms(&range->ubo))
+ return false;
- /* After gathering the UBO access ranges, we limit the total
- * upload. Reject if we're now outside the range.
- */
- const struct ir3_ubo_range r = get_ubo_load_range(instr);
- if (!(range->start <= r.start && r.end <= range->end)) {
- lower_ubo_block_decrement(instr, b);
- return;
- }
+ /* After gathering the UBO access ranges, we limit the total
+ * upload. Don't lower if this load is outside the range.
+ */
+ const struct ir3_ubo_range r = get_ubo_load_range(b->shader,
+ instr, alignment);
+ if (!(range->start <= r.start && r.end <= range->end)) {
+ track_ubo_use(instr, b, num_ubos);
+ return false;
}
nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
nir_instr_remove(&instr->instr);
- state->lower_count++;
+ return true;
}
static bool
return op == nir_intrinsic_load_ubo;
}
-bool
-ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
+void
+ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
{
- struct ir3_ubo_analysis_state *state = &shader->ubo_state;
+ struct ir3_const_state *const_state = ir3_const_state(v);
+ struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+ struct ir3_compiler *compiler = v->shader->compiler;
memset(state, 0, sizeof(*state));
for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
nir_foreach_block (block, function->impl) {
nir_foreach_instr (instr, block) {
if (instr_is_load_ubo(instr))
- gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), state);
+ gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr),
+ state, compiler->const_upload_unit);
}
}
}
* dynamically accessed ranges separately and upload static rangtes
* first.
*/
- const uint32_t max_upload = 16 * 1024;
- uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
+
+ /* Limit our uploads to the amount of constant buffer space available in
+ * the hardware, minus what the shader compiler may need for various
+ * driver params. We do this UBO-to-push-constant before the real
+ * allocation of the driver params' const space, because UBO pointers can
+ * be driver params but this pass usually eliminatings them.
+ */
+ struct ir3_const_state worst_case_const_state = { };
+ ir3_setup_const_state(nir, v, &worst_case_const_state);
+ const uint32_t max_upload = (ir3_max_const(v) -
+ worst_case_const_state.offsets.immediate) * 16;
+
+ uint32_t offset = v->shader->num_reserved_user_consts * 16;
state->num_enabled = ARRAY_SIZE(state->range);
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
if (state->range[i].start >= state->range[i].end) {
}
state->size = offset;
+}
+bool
+ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v)
+{
+ struct ir3_compiler *compiler = v->shader->compiler;
+ /* For the binning pass variant, we re-use the corresponding draw-pass
+ * variants const_state and ubo state. To make these clear, in this
+ * pass it is const (read-only)
+ */
+ const struct ir3_const_state *const_state = ir3_const_state(v);
+ const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+
+ int num_ubos = 0;
+ bool progress = false;
nir_foreach_function (function, nir) {
if (function->impl) {
nir_builder builder;
nir_builder_init(&builder, function->impl);
nir_foreach_block (block, function->impl) {
nir_foreach_instr_safe (instr, block) {
- if (instr_is_load_ubo(instr))
- lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, state);
+ if (!instr_is_load_ubo(instr))
+ continue;
+ progress |=
+ lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr),
+ &builder, state, &num_ubos,
+ compiler->const_upload_unit);
}
}
nir_metadata_dominance);
}
}
-
- /* If we previously had UBO 0, it's been lowered off of load_ubo and all
- * the others were shifted down.
+ /* Update the num_ubos field for GL (first_ubo_is_default_ubo). With
+ * Vulkan's bindless, we don't use the num_ubos field, so we can leave it
+ * incremented.
*/
- if (nir->info.num_ubos >= 1 && nir->info.first_ubo_is_default_ubo)
- nir->info.num_ubos--;
+ if (nir->info.first_ubo_is_default_ubo)
+ nir->info.num_ubos = num_ubos;
- return state->lower_count > 0;
+ return progress;
}