#include "pipe/p_screen.h"
#include "util/u_atomic.h"
#include "util/u_upload_mgr.h"
+#include "util/debug.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_serialize.h"
*out_num_cbufs = num_cbufs;
}
+static const char *surface_group_names[] = {
+ [IRIS_SURFACE_GROUP_RENDER_TARGET] = "render target",
+ [IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = "CS work groups",
+ [IRIS_SURFACE_GROUP_TEXTURE] = "texture",
+ [IRIS_SURFACE_GROUP_UBO] = "ubo",
+ [IRIS_SURFACE_GROUP_SSBO] = "ssbo",
+ [IRIS_SURFACE_GROUP_IMAGE] = "image",
+};
+
static void
-rewrite_src_with_bti(nir_builder *b, nir_instr *instr,
- nir_src *src, uint32_t offset)
+iris_print_binding_table(FILE *fp, const char *name,
+ const struct iris_binding_table *bt)
{
- assert(offset != 0xd0d0d0d0);
+ STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
+
+ uint32_t total = 0;
+ uint32_t compacted = 0;
+
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
+ uint32_t size = bt->sizes[i];
+ total += size;
+ if (size)
+ compacted += util_bitcount64(bt->used_mask[i]);
+ }
+
+ if (total == 0) {
+ fprintf(fp, "Binding table for %s is empty\n\n", name);
+ return;
+ }
+
+ if (total != compacted) {
+ fprintf(fp, "Binding table for %s "
+ "(compacted to %u entries from %u entries)\n",
+ name, compacted, total);
+ } else {
+ fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
+ }
+
+ uint32_t entry = 0;
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
+ uint64_t mask = bt->used_mask[i];
+ while (mask) {
+ int index = u_bit_scan64(&mask);
+ fprintf(fp, " [%u] %s #%d\n", entry++, surface_group_names[i], index);
+ }
+ }
+ fprintf(fp, "\n");
+}
+
+enum {
+ /* Max elements in a surface group. */
+ SURFACE_GROUP_MAX_ELEMENTS = 64,
+};
+
+/**
+ * Map a <group, index> pair to a binding table index.
+ *
+ * For example: <UBO, 5> => binding table index 12
+ */
+uint32_t
+iris_group_index_to_bti(const struct iris_binding_table *bt,
+ enum iris_surface_group group, uint32_t index)
+{
+ assert(index < bt->sizes[group]);
+ uint64_t mask = bt->used_mask[group];
+ uint64_t bit = 1ull << index;
+ if (bit & mask) {
+ return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
+ } else {
+ return IRIS_SURFACE_NOT_USED;
+ }
+}
+
+/**
+ * Map a binding table index back to a <group, index> pair.
+ *
+ * For example: binding table index 12 => <UBO, 5>
+ */
+uint32_t
+iris_bti_to_group_index(const struct iris_binding_table *bt,
+ enum iris_surface_group group, uint32_t bti)
+{
+ uint64_t used_mask = bt->used_mask[group];
+ assert(bti >= bt->offsets[group]);
+
+ uint32_t c = bti - bt->offsets[group];
+ while (used_mask) {
+ int i = u_bit_scan64(&used_mask);
+ if (c == 0)
+ return i;
+ c--;
+ }
+
+ return IRIS_SURFACE_NOT_USED;
+}
+
+static void
+rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
+ nir_instr *instr, nir_src *src,
+ enum iris_surface_group group)
+{
+ assert(bt->sizes[group] > 0);
b->cursor = nir_before_instr(instr);
nir_ssa_def *bti;
if (nir_src_is_const(*src)) {
- bti = nir_imm_intN_t(b, nir_src_as_uint(*src) + offset,
+ uint32_t index = nir_src_as_uint(*src);
+ bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
src->ssa->bit_size);
} else {
- bti = nir_iadd_imm(b, src->ssa, offset);
+ /* Indirect usage makes all the surfaces of the group to be available,
+ * so we can just add the base.
+ */
+ assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
+ bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
}
nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
}
+static void
+mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
+ enum iris_surface_group group)
+{
+ assert(bt->sizes[group] > 0);
+
+ if (nir_src_is_const(*src)) {
+ uint64_t index = nir_src_as_uint(*src);
+ assert(index < bt->sizes[group]);
+ bt->used_mask[group] |= 1ull << index;
+ } else {
+ /* There's an indirect usage, we need all the surfaces. */
+ bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
+ }
+}
+
+static bool
+skip_compacting_binding_tables(void)
+{
+ static int skip = -1;
+ if (skip < 0)
+ skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
+ return skip;
+}
+
/**
* Set up the binding table indices and apply to the shader.
- *
- * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
- * unused but also make sure that addition of small offsets to them will
- * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
*/
static void
iris_setup_binding_table(struct nir_shader *nir,
memset(bt, 0, sizeof(*bt));
- /* Calculate the initial binding table index for each group. */
- uint32_t next_offset;
+ /* Set the sizes for each surface group. For some groups, we already know
+ * upfront how many will be used, so mark them.
+ */
if (info->stage == MESA_SHADER_FRAGMENT) {
- next_offset = num_render_targets;
+ bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
+ /* All render targets used. */
+ bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
+ BITFIELD64_MASK(num_render_targets);
} else if (info->stage == MESA_SHADER_COMPUTE) {
- next_offset = 1;
- } else {
- next_offset = 0;
+ bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
}
- unsigned num_textures = util_last_bit(info->textures_used);
- if (num_textures) {
- bt->texture_start = next_offset;
- next_offset += num_textures;
- } else {
- bt->texture_start = 0xd0d0d0d0;
- }
+ bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
+ bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
- if (info->num_images) {
- bt->image_start = next_offset;
- next_offset += info->num_images;
- } else {
- bt->image_start = 0xd0d0d0d0;
- }
+ bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
- /* Allocate a slot in the UBO section for NIR constants if present.
+ /* Allocate an extra slot in the UBO section for NIR constants.
+ * Binding table compaction will remove it if unnecessary.
+ *
* We don't include them in iris_compiled_shader::num_cbufs because
* they are uploaded separately from shs->constbuf[], but from a shader
* point of view, they're another UBO (at the end of the section).
*/
- if (nir->constant_data_size > 0)
- num_cbufs++;
+ bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
- if (num_cbufs) {
- //assert(info->num_ubos <= BRW_MAX_UBO);
- bt->ubo_start = next_offset;
- next_offset += num_cbufs;
- } else {
- bt->ubo_start = 0xd0d0d0d0;
+ /* The first IRIS_MAX_ABOs indices in the SSBO group are for atomics, real
+ * SSBOs start after that. Compaction will remove unused ABOs.
+ */
+ bt->sizes[IRIS_SURFACE_GROUP_SSBO] = IRIS_MAX_ABOS + info->num_ssbos;
+
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
+ assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
+
+ /* Mark surfaces used for the cases we don't have the information available
+ * upfront.
+ */
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ nir_foreach_block (block, impl) {
+ nir_foreach_instr (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_num_work_groups:
+ bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
+ break;
+
+ case nir_intrinsic_image_size:
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_load_raw_intel:
+ case nir_intrinsic_image_store_raw_intel:
+ mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
+ break;
+
+ case nir_intrinsic_load_ubo:
+ mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
+ break;
+
+ case nir_intrinsic_store_ssbo:
+ mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
+ break;
+
+ case nir_intrinsic_get_buffer_size:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_ssbo_atomic_fmin:
+ case nir_intrinsic_ssbo_atomic_fmax:
+ case nir_intrinsic_ssbo_atomic_fcomp_swap:
+ case nir_intrinsic_load_ssbo:
+ mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
+ break;
+
+ default:
+ break;
+ }
+ }
}
- if (info->num_ssbos || info->num_abos) {
- bt->ssbo_start = next_offset;
- // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment
- next_offset += IRIS_MAX_ABOS + info->num_ssbos;
- } else {
- bt->ssbo_start = 0xd0d0d0d0;
+ /* When disable we just mark everything as used. */
+ if (unlikely(skip_compacting_binding_tables())) {
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
+ bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
}
- bt->size_bytes = next_offset * 4;
+ /* Calculate the offsets and the binding table size based on the used
+ * surfaces. After this point, the functions to go between "group indices"
+ * and binding table indices can be used.
+ */
+ uint32_t next = 0;
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
+ if (bt->used_mask[i] != 0) {
+ bt->offsets[i] = next;
+ next += util_bitcount64(bt->used_mask[i]);
+ }
+ }
+ bt->size_bytes = next * 4;
- nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ if (unlikely(INTEL_DEBUG & DEBUG_BT)) {
+ iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
+ }
/* Apply the binding table indices. The backend compiler is not expected
* to change those, as we haven't set any of the *_start entries in brw
nir_foreach_block (block, impl) {
nir_foreach_instr (instr, block) {
if (instr->type == nir_instr_type_tex) {
- assert(bt->texture_start != 0xd0d0d0d0);
- nir_instr_as_tex(instr)->texture_index += bt->texture_start;
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ tex->texture_index =
+ iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
+ tex->texture_index);
continue;
}
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_load_raw_intel:
case nir_intrinsic_image_store_raw_intel:
- rewrite_src_with_bti(&b, instr, &intrin->src[0], bt->image_start);
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
+ IRIS_SURFACE_GROUP_IMAGE);
break;
case nir_intrinsic_load_ubo:
- rewrite_src_with_bti(&b, instr, &intrin->src[0], bt->ubo_start);
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
+ IRIS_SURFACE_GROUP_UBO);
break;
case nir_intrinsic_store_ssbo:
- rewrite_src_with_bti(&b, instr, &intrin->src[1], bt->ssbo_start);
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
+ IRIS_SURFACE_GROUP_SSBO);
break;
case nir_intrinsic_get_buffer_size:
case nir_intrinsic_ssbo_atomic_fmax:
case nir_intrinsic_ssbo_atomic_fcomp_swap:
case nir_intrinsic_load_ssbo:
- rewrite_src_with_bti(&b, instr, &intrin->src[0], bt->ssbo_start);
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
+ IRIS_SURFACE_GROUP_SSBO);
break;
default:
/* Manually setup the TCS binding table. */
memset(&bt, 0, sizeof(bt));
+ bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
+ bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
bt.size_bytes = 4;
prog_data->ubo_ranges[0].length = 1;
ice->state.dirty |= IRIS_DIRTY_TCS |
IRIS_DIRTY_BINDINGS_TCS |
IRIS_DIRTY_CONSTANTS_TCS;
+
+ if (!tcs) {
+ /* We're binding a passthrough TCS, which doesn't have uniforms.
+ * Since there's no actual TCS, the state tracker doesn't bother
+ * to call set_constant_buffers to clear stale constant buffers.
+ *
+ * We do upload TCS constants for the default tesslevel system
+ * values, however. In this case, we would see stale constant
+ * data and try and read a dangling cbuf0->user_buffer pointer.
+ * Just zero out the stale constants to avoid the upload.
+ */
+ struct iris_shader_state *shs =
+ &ice->state.shaders[MESA_SHADER_TESS_CTRL];
+
+ memset(&shs->cbuf0, 0, sizeof(shs->cbuf0));
+ }
}
}
struct iris_stream_output_target *so =
(void *) ice->state.so_target[i];
if (so)
- so->stride = ish->stream_output.stride[i];
+ so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
}
}
if (!ish)
return NULL;
- nir = brw_preprocess_nir(screen->compiler, nir, NULL);
+ brw_preprocess_nir(screen->compiler, nir, NULL);
NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
NIR_PASS_V(nir, iris_lower_storage_image_derefs);
+ nir_sweep(nir);
+
if (nir->constant_data_size > 0) {
unsigned data_offset;
u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
* Updates dirty tracking to account for the shader's NOS.
*/
static void
-bind_state(struct iris_context *ice,
- struct iris_uncompiled_shader *ish,
- gl_shader_stage stage)
+bind_shader_state(struct iris_context *ice,
+ struct iris_uncompiled_shader *ish,
+ gl_shader_stage stage)
{
uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage;
const uint64_t nos = ish ? ish->nos : 0;
static void
iris_bind_vs_state(struct pipe_context *ctx, void *state)
{
- bind_state((void *) ctx, state, MESA_SHADER_VERTEX);
+ bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
}
static void
iris_bind_tcs_state(struct pipe_context *ctx, void *state)
{
- bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
+ bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
}
static void
if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
ice->state.dirty |= IRIS_DIRTY_URB;
- bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
+ bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
}
static void
if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
ice->state.dirty |= IRIS_DIRTY_URB;
- bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
+ bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
}
static void
(new_ish->nir->info.outputs_written & color_bits))
ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
- bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
+ bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
}
static void
iris_bind_cs_state(struct pipe_context *ctx, void *state)
{
- bind_state((void *) ctx, state, MESA_SHADER_COMPUTE);
+ bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
}
void