+ * For example: <UBO, 5> => binding table index 12
+ */
+uint32_t
+iris_group_index_to_bti(const struct iris_binding_table *bt,
+ enum iris_surface_group group, uint32_t index)
+{
+ assert(index < bt->sizes[group]);
+ uint64_t mask = bt->used_mask[group];
+ uint64_t bit = 1ull << index;
+ if (bit & mask) {
+ return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
+ } else {
+ return IRIS_SURFACE_NOT_USED;
+ }
+}
+
+/**
+ * Map a binding table index back to a <group, index> pair.
+ *
+ * For example: binding table index 12 => <UBO, 5>
+ */
+uint32_t
+iris_bti_to_group_index(const struct iris_binding_table *bt,
+ enum iris_surface_group group, uint32_t bti)
+{
+ uint64_t used_mask = bt->used_mask[group];
+ assert(bti >= bt->offsets[group]);
+
+ uint32_t c = bti - bt->offsets[group];
+ while (used_mask) {
+ int i = u_bit_scan64(&used_mask);
+ if (c == 0)
+ return i;
+ c--;
+ }
+
+ return IRIS_SURFACE_NOT_USED;
+}
+
+static void
+rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
+ nir_instr *instr, nir_src *src,
+ enum iris_surface_group group)
+{
+ assert(bt->sizes[group] > 0);
+
+ b->cursor = nir_before_instr(instr);
+ nir_ssa_def *bti;
+ if (nir_src_is_const(*src)) {
+ uint32_t index = nir_src_as_uint(*src);
+ bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
+ src->ssa->bit_size);
+ } else {
+ /* Indirect usage makes all the surfaces of the group to be available,
+ * so we can just add the base.
+ */
+ assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
+ bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
+ }
+ nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
+}
+
+static void
+mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
+ enum iris_surface_group group)
+{
+ assert(bt->sizes[group] > 0);
+
+ if (nir_src_is_const(*src)) {
+ uint64_t index = nir_src_as_uint(*src);
+ assert(index < bt->sizes[group]);
+ bt->used_mask[group] |= 1ull << index;
+ } else {
+ /* There's an indirect usage, we need all the surfaces. */
+ bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
+ }
+}
+
+static bool
+skip_compacting_binding_tables(void)
+{
+ static int skip = -1;
+ if (skip < 0)
+ skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
+ return skip;
+}
+
+/**
+ * Set up the binding table indices and apply to the shader.
+ */
+static void
+iris_setup_binding_table(struct nir_shader *nir,
+ struct iris_binding_table *bt,
+ unsigned num_render_targets,
+ unsigned num_system_values,
+ unsigned num_cbufs)
+{
+ const struct shader_info *info = &nir->info;
+
+ memset(bt, 0, sizeof(*bt));
+
+ /* Set the sizes for each surface group. For some groups, we already know
+ * upfront how many will be used, so mark them.
+ */
+ if (info->stage == MESA_SHADER_FRAGMENT) {
+ bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
+ /* All render targets used. */
+ bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
+ BITFIELD64_MASK(num_render_targets);
+ } else if (info->stage == MESA_SHADER_COMPUTE) {
+ bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
+ }
+
+ bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
+ bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
+
+ bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
+
+ /* Allocate an extra slot in the UBO section for NIR constants.
+ * Binding table compaction will remove it if unnecessary.
+ *
+ * We don't include them in iris_compiled_shader::num_cbufs because
+ * they are uploaded separately from shs->constbuf[], but from a shader
+ * point of view, they're another UBO (at the end of the section).
+ */
+ bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
+
+ /* The first IRIS_MAX_ABOs indices in the SSBO group are for atomics, real
+ * SSBOs start after that. Compaction will remove unused ABOs.
+ */
+ bt->sizes[IRIS_SURFACE_GROUP_SSBO] = IRIS_MAX_ABOS + info->num_ssbos;
+
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
+ assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
+
+ /* Mark surfaces used for the cases we don't have the information available
+ * upfront.
+ */
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ nir_foreach_block (block, impl) {
+ nir_foreach_instr (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_num_work_groups:
+ bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
+ break;
+
+ case nir_intrinsic_image_size:
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_load_raw_intel:
+ case nir_intrinsic_image_store_raw_intel:
+ mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
+ break;
+
+ case nir_intrinsic_load_ubo:
+ mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
+ break;
+
+ case nir_intrinsic_store_ssbo:
+ mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
+ break;
+
+ case nir_intrinsic_get_buffer_size:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_ssbo_atomic_fmin:
+ case nir_intrinsic_ssbo_atomic_fmax:
+ case nir_intrinsic_ssbo_atomic_fcomp_swap:
+ case nir_intrinsic_load_ssbo:
+ mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ /* When disable we just mark everything as used. */
+ if (unlikely(skip_compacting_binding_tables())) {
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
+ bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
+ }
+
+ /* Calculate the offsets and the binding table size based on the used
+ * surfaces. After this point, the functions to go between "group indices"
+ * and binding table indices can be used.
+ */
+ uint32_t next = 0;
+ for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
+ if (bt->used_mask[i] != 0) {
+ bt->offsets[i] = next;
+ next += util_bitcount64(bt->used_mask[i]);
+ }
+ }
+ bt->size_bytes = next * 4;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_BT)) {
+ iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
+ }
+
+ /* Apply the binding table indices. The backend compiler is not expected
+ * to change those, as we haven't set any of the *_start entries in brw
+ * binding_table.
+ */
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block (block, impl) {
+ nir_foreach_instr (instr, block) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ tex->texture_index =
+ iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
+ tex->texture_index);
+ continue;
+ }
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_image_size:
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_load_raw_intel:
+ case nir_intrinsic_image_store_raw_intel:
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
+ IRIS_SURFACE_GROUP_IMAGE);
+ break;
+
+ case nir_intrinsic_load_ubo:
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
+ IRIS_SURFACE_GROUP_UBO);
+ break;
+
+ case nir_intrinsic_store_ssbo:
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
+ IRIS_SURFACE_GROUP_SSBO);
+ break;
+
+ case nir_intrinsic_get_buffer_size:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_ssbo_atomic_fmin:
+ case nir_intrinsic_ssbo_atomic_fmax:
+ case nir_intrinsic_ssbo_atomic_fcomp_swap:
+ case nir_intrinsic_load_ssbo:
+ rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
+ IRIS_SURFACE_GROUP_SSBO);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+}
+
+static void
+iris_debug_recompile(struct iris_context *ice,
+ struct shader_info *info,
+ const struct brw_base_prog_key *key)
+{
+ struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
+ const struct brw_compiler *c = screen->compiler;
+
+ if (!info)
+ return;
+
+ c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
+ _mesa_shader_stage_to_string(info->stage),
+ info->name ? info->name : "(no identifier)",
+ info->label ? info->label : "");
+
+ const void *old_key =
+ iris_find_previous_compile(ice, info->stage, key->program_string_id);
+
+ brw_debug_key_recompile(c, &ice->dbg, info->stage, old_key, key);
+}
+
+/**
+ * Get the shader for the last enabled geometry stage.
+ *
+ * This stage is the one which will feed stream output and the rasterizer.
+ */
+static gl_shader_stage
+last_vue_stage(struct iris_context *ice)
+{
+ if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
+ return MESA_SHADER_GEOMETRY;
+
+ if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
+ return MESA_SHADER_TESS_EVAL;
+
+ return MESA_SHADER_VERTEX;
+}
+
+/**
+ * Compile a vertex shader, and upload the assembly.
+ */
+static struct iris_compiled_shader *
+iris_compile_vs(struct iris_context *ice,
+ struct iris_uncompiled_shader *ish,
+ const struct brw_vs_prog_key *key)
+{
+ struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+ const struct brw_compiler *compiler = screen->compiler;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ void *mem_ctx = ralloc_context(NULL);
+ struct brw_vs_prog_data *vs_prog_data =
+ rzalloc(mem_ctx, struct brw_vs_prog_data);
+ struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
+ struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
+ enum brw_param_builtin *system_values;
+ unsigned num_system_values;
+ unsigned num_cbufs;
+
+ nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
+
+ if (key->nr_userclip_plane_consts) {
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ nir_lower_clip_vs(nir, (1 << key->nr_userclip_plane_consts) - 1, true);
+ nir_lower_io_to_temporaries(nir, impl, true, false);
+ nir_lower_global_vars_to_local(nir);
+ nir_lower_vars_to_ssa(nir);
+ nir_shader_gather_info(nir, impl);
+ }
+
+ prog_data->use_alt_mode = ish->use_alt_mode;
+
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ &num_system_values, &num_cbufs);
+
+ struct iris_binding_table bt;
+ iris_setup_binding_table(nir, &bt, /* num_render_targets */ 0,
+ num_system_values, num_cbufs);
+
+ brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
+
+ brw_compute_vue_map(devinfo,
+ &vue_prog_data->vue_map, nir->info.outputs_written,
+ nir->info.separate_shader);
+
+ /* Don't tell the backend about our clip plane constants, we've already
+ * lowered them in NIR and we don't want it doing it again.
+ */
+ struct brw_vs_prog_key key_no_ucp = *key;
+ key_no_ucp.nr_userclip_plane_consts = 0;
+
+ char *error_str = NULL;
+ const unsigned *program =
+ brw_compile_vs(compiler, &ice->dbg, mem_ctx, &key_no_ucp, vs_prog_data,
+ nir, -1, &error_str);
+ if (program == NULL) {
+ dbg_printf("Failed to compile vertex shader: %s\n", error_str);
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (ish->compiled_once) {
+ iris_debug_recompile(ice, &nir->info, &key->base);
+ } else {
+ ish->compiled_once = true;
+ }
+
+ uint32_t *so_decls =
+ ice->vtbl.create_so_decl_list(&ish->stream_output,
+ &vue_prog_data->vue_map);
+
+ struct iris_compiled_shader *shader =
+ iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
+ prog_data, so_decls, system_values, num_system_values,
+ num_cbufs, &bt);
+
+ iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
+
+ ralloc_free(mem_ctx);
+ return shader;
+}
+
+/**
+ * Update the current vertex shader variant.
+ *
+ * Fill out the key, look in the cache, compile and bind if needed.