+iris_update_compiled_compute_shader(struct iris_context *ice)
+{
+ struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
+ struct iris_uncompiled_shader *ish =
+ ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
+
+ struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ struct brw_cs_prog_key key = { KEY_INIT(devinfo->gen) };
+ ice->vtbl.populate_cs_key(ice, &key);
+
+ struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
+ struct iris_compiled_shader *shader =
+ iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key);
+
+ if (!shader)
+ shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));
+
+ if (!shader)
+ shader = iris_compile_cs(ice, ish, &key);
+
+ if (old != shader) {
+ ice->shaders.prog[IRIS_CACHE_CS] = shader;
+ ice->state.dirty |= IRIS_DIRTY_CS |
+ IRIS_DIRTY_BINDINGS_CS |
+ IRIS_DIRTY_CONSTANTS_CS;
+ shs->sysvals_need_upload = true;
+ }
+}
+
+void
+iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+ uint32_t *dst)
+{
+ assert(cs_prog_data->push.total.size > 0);
+ assert(cs_prog_data->push.cross_thread.size == 0);
+ assert(cs_prog_data->push.per_thread.dwords == 1);
+ assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
+ for (unsigned t = 0; t < cs_prog_data->threads; t++)
+ dst[8 * t] = t;
+}
+
+/**
+ * Allocate scratch BOs as needed for the given per-thread size and stage.
+ */
+struct iris_bo *
+iris_get_scratch_space(struct iris_context *ice,
+ unsigned per_thread_scratch,
+ gl_shader_stage stage)
+{
+ struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+ struct iris_bufmgr *bufmgr = screen->bufmgr;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+
+ unsigned encoded_size = ffs(per_thread_scratch) - 11;
+ assert(encoded_size < (1 << 16));
+
+ struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
+
+ /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
+ *
+ * "Scratch Space per slice is computed based on 4 sub-slices. SW
+ * must allocate scratch space enough so that each slice has 4
+ * slices allowed."
+ *
+ * According to the other driver team, this applies to compute shaders
+ * as well. This is not currently documented at all.
+ *
+ * This hack is no longer necessary on Gen11+.
+ */
+ unsigned subslice_total = screen->subslice_total;
+ if (devinfo->gen < 11)
+ subslice_total = 4 * devinfo->num_slices;
+ assert(subslice_total >= screen->subslice_total);
+
+ if (!*bop) {
+ unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
+ uint32_t max_threads[] = {
+ [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
+ [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
+ [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
+ [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
+ [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
+ [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
+ };
+
+ uint32_t size = per_thread_scratch * max_threads[stage];
+
+ *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
+ }
+
+ return *bop;
+}
+
+/* ------------------------------------------------------------------- */
+
+/**
+ * The pipe->create_[stage]_state() driver hooks.
+ *
+ * Performs basic NIR preprocessing, records any state dependencies, and
+ * returns an iris_uncompiled_shader as the Gallium CSO.
+ *
+ * Actual shader compilation to assembly happens later, at first use.
+ */
+static void *
+iris_create_uncompiled_shader(struct pipe_context *ctx,
+ nir_shader *nir,
+ const struct pipe_stream_output_info *so_info)
+{
+ struct iris_context *ice = (void *)ctx;
+ struct iris_screen *screen = (struct iris_screen *)ctx->screen;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+
+ struct iris_uncompiled_shader *ish =
+ calloc(1, sizeof(struct iris_uncompiled_shader));
+ if (!ish)
+ return NULL;
+
+ brw_preprocess_nir(screen->compiler, nir, NULL);
+
+ NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
+ NIR_PASS_V(nir, iris_lower_storage_image_derefs);
+
+ nir_sweep(nir);
+
+ if (nir->constant_data_size > 0) {
+ unsigned data_offset;
+ u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
+ 32, nir->constant_data, &data_offset, &ish->const_data);
+
+ struct pipe_shader_buffer psb = {
+ .buffer = ish->const_data,
+ .buffer_offset = data_offset,
+ .buffer_size = nir->constant_data_size,
+ };
+ iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false);
+ }
+
+ ish->program_id = get_new_program_id(screen);
+ ish->nir = nir;
+ if (so_info) {
+ memcpy(&ish->stream_output, so_info, sizeof(*so_info));
+ update_so_info(&ish->stream_output, nir->info.outputs_written);
+ }
+
+ /* Save this now before potentially dropping nir->info.name */
+ if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0)
+ ish->use_alt_mode = true;
+
+ if (screen->disk_cache) {
+ /* Serialize the NIR to a binary blob that we can hash for the disk
+ * cache. First, drop unnecessary information (like variable names)
+ * so the serialized NIR is smaller, and also to let us detect more
+ * isomorphic shaders when hashing, increasing cache hits. We clone
+ * the NIR before stripping away this info because it can be useful
+ * when inspecting and debugging shaders.
+ */
+ nir_shader *clone = nir_shader_clone(NULL, nir);
+ nir_strip(clone);
+
+ struct blob blob;
+ blob_init(&blob);
+ nir_serialize(&blob, clone);
+ _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
+ blob_finish(&blob);
+
+ ralloc_free(clone);
+ }
+
+ return ish;
+}
+
+static struct iris_uncompiled_shader *
+iris_create_shader_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct nir_shader *nir;
+
+ if (state->type == PIPE_SHADER_IR_TGSI)
+ nir = tgsi_to_nir(state->tokens, ctx->screen);
+ else
+ nir = state->ir.nir;
+
+ return iris_create_uncompiled_shader(ctx, nir, &state->stream_output);
+}
+
+static void *
+iris_create_vs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct iris_context *ice = (void *) ctx;
+ struct iris_screen *screen = (void *) ctx->screen;
+ struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
+
+ /* User clip planes */
+ if (ish->nir->info.clip_distance_array_size == 0)
+ ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
+
+ if (screen->precompile) {
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ struct brw_vs_prog_key key = { KEY_INIT(devinfo->gen) };
+
+ if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
+ iris_compile_vs(ice, ish, &key);
+ }
+
+ return ish;
+}
+
+static void *
+iris_create_tcs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct iris_context *ice = (void *) ctx;
+ struct iris_screen *screen = (void *) ctx->screen;
+ const struct brw_compiler *compiler = screen->compiler;
+ struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
+ struct shader_info *info = &ish->nir->info;
+
+ if (screen->precompile) {
+ const unsigned _GL_TRIANGLES = 0x0004;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ struct brw_tcs_prog_key key = {
+ KEY_INIT(devinfo->gen),
+ // XXX: make sure the linker fills this out from the TES...
+ .tes_primitive_mode =
+ info->tess.primitive_mode ? info->tess.primitive_mode
+ : _GL_TRIANGLES,
+ .outputs_written = info->outputs_written,
+ .patch_outputs_written = info->patch_outputs_written,
+ };
+
+ /* 8_PATCH mode needs the key to contain the input patch dimensionality.
+ * We don't have that information, so we randomly guess that the input
+ * and output patches are the same size. This is a bad guess, but we
+ * can't do much better.
+ */
+ if (compiler->use_tcs_8_patch)
+ key.input_vertices = info->tess.tcs_vertices_out;
+
+ if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
+ iris_compile_tcs(ice, ish, &key);
+ }
+
+ return ish;
+}
+
+static void *
+iris_create_tes_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct iris_context *ice = (void *) ctx;
+ struct iris_screen *screen = (void *) ctx->screen;
+ struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
+ struct shader_info *info = &ish->nir->info;
+
+ if (screen->precompile) {
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ struct brw_tes_prog_key key = {
+ KEY_INIT(devinfo->gen),
+ // XXX: not ideal, need TCS output/TES input unification
+ .inputs_read = info->inputs_read,
+ .patch_inputs_read = info->patch_inputs_read,
+ };
+
+ if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
+ iris_compile_tes(ice, ish, &key);
+ }
+
+ return ish;
+}
+
+static void *
+iris_create_gs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct iris_context *ice = (void *) ctx;
+ struct iris_screen *screen = (void *) ctx->screen;
+ struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
+
+ if (screen->precompile) {
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ struct brw_gs_prog_key key = { KEY_INIT(devinfo->gen) };
+
+ if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
+ iris_compile_gs(ice, ish, &key);
+ }
+
+ return ish;
+}
+
+static void *
+iris_create_fs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ struct iris_context *ice = (void *) ctx;
+ struct iris_screen *screen = (void *) ctx->screen;
+ struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
+ struct shader_info *info = &ish->nir->info;
+
+ ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
+ (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
+ (1ull << IRIS_NOS_RASTERIZER) |
+ (1ull << IRIS_NOS_BLEND);
+
+ /* The program key needs the VUE map if there are > 16 inputs */
+ if (util_bitcount64(ish->nir->info.inputs_read &
+ BRW_FS_VARYING_INPUT_MASK) > 16) {
+ ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
+ }
+
+ if (screen->precompile) {
+ const uint64_t color_outputs = info->outputs_written &
+ ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+ BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
+ BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
+
+ bool can_rearrange_varyings =
+ util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
+
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ struct brw_wm_prog_key key = {
+ KEY_INIT(devinfo->gen),
+ .nr_color_regions = util_bitcount(color_outputs),
+ .coherent_fb_fetch = true,
+ .input_slots_valid =
+ can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
+ };
+
+ if (!iris_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
+ iris_compile_fs(ice, ish, &key, NULL);
+ }
+
+ return ish;
+}
+
+static void *
+iris_create_compute_state(struct pipe_context *ctx,
+ const struct pipe_compute_state *state)