-static inline void si_shader_selector_key(struct pipe_context *ctx,
- struct si_shader_selector *sel,
- union si_vgt_stages_key stages_key,
- struct si_shader_key *key)
-{
- struct si_context *sctx = (struct si_context *)ctx;
-
- memset(key, 0, sizeof(*key));
-
- switch (sel->type) {
- case PIPE_SHADER_VERTEX:
- si_shader_selector_key_vs(sctx, sel, key, &key->part.vs.prolog);
-
- if (sctx->tes_shader.cso)
- key->as_ls = 1;
- else if (sctx->gs_shader.cso)
- key->as_es = 1;
- else {
- key->as_ngg = stages_key.u.ngg;
- si_shader_selector_key_hw_vs(sctx, sel, key);
-
- if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->mono.u.vs_export_prim_id = 1;
- }
- break;
- case PIPE_SHADER_TESS_CTRL:
- if (sctx->chip_class >= GFX9) {
- si_shader_selector_key_vs(sctx, sctx->vs_shader.cso,
- key, &key->part.tcs.ls_prolog);
- key->part.tcs.ls = sctx->vs_shader.cso;
-
- /* When the LS VGPR fix is needed, monolithic shaders
- * can:
- * - avoid initializing EXEC in both the LS prolog
- * and the LS main part when !vs_needs_prolog
- * - remove the fixup for unused input VGPRs
- */
- key->part.tcs.ls_prolog.ls_vgpr_fix = sctx->ls_vgpr_fix;
-
- /* The LS output / HS input layout can be communicated
- * directly instead of via user SGPRs for merged LS-HS.
- * The LS VGPR fix prefers this too.
- */
- key->opt.prefer_mono = 1;
- }
-
- key->part.tcs.epilog.prim_mode =
- sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
- key->part.tcs.epilog.invoc0_tess_factors_are_def =
- sel->tcs_info.tessfactors_are_def_in_all_invocs;
- key->part.tcs.epilog.tes_reads_tess_factors =
- sctx->tes_shader.cso->info.reads_tess_factors;
-
- if (sel == sctx->fixed_func_tcs_shader.cso)
- key->mono.u.ff_tcs_inputs_to_copy = sctx->vs_shader.cso->outputs_written;
- break;
- case PIPE_SHADER_TESS_EVAL:
- if (sctx->gs_shader.cso)
- key->as_es = 1;
- else {
- key->as_ngg = stages_key.u.ngg;
- si_shader_selector_key_hw_vs(sctx, sel, key);
-
- if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
- key->mono.u.vs_export_prim_id = 1;
- }
- break;
- case PIPE_SHADER_GEOMETRY:
- if (sctx->chip_class >= GFX9) {
- if (sctx->tes_shader.cso) {
- key->part.gs.es = sctx->tes_shader.cso;
- } else {
- si_shader_selector_key_vs(sctx, sctx->vs_shader.cso,
- key, &key->part.gs.vs_prolog);
- key->part.gs.es = sctx->vs_shader.cso;
- key->part.gs.prolog.gfx9_prev_is_vs = 1;
- }
-
- key->as_ngg = stages_key.u.ngg;
-
- /* Merged ES-GS can have unbalanced wave usage.
- *
- * ES threads are per-vertex, while GS threads are
- * per-primitive. So without any amplification, there
- * are fewer GS threads than ES threads, which can result
- * in empty (no-op) GS waves. With too much amplification,
- * there are more GS threads than ES threads, which
- * can result in empty (no-op) ES waves.
- *
- * Non-monolithic shaders are implemented by setting EXEC
- * at the beginning of shader parts, and don't jump to
- * the end if EXEC is 0.
- *
- * Monolithic shaders use conditional blocks, so they can
- * jump and skip empty waves of ES or GS. So set this to
- * always use optimized variants, which are monolithic.
- */
- key->opt.prefer_mono = 1;
- }
- key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
- break;
- case PIPE_SHADER_FRAGMENT: {
- struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- struct si_state_blend *blend = sctx->queued.named.blend;
-
- if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
- sel->info.colors_written == 0x1)
- key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
-
- if (blend) {
- /* Select the shader color format based on whether
- * blending or alpha are needed.
- */
- key->part.ps.epilog.spi_shader_col_format =
- (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
- sctx->framebuffer.spi_shader_col_format_blend_alpha) |
- (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
- sctx->framebuffer.spi_shader_col_format_blend) |
- (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
- sctx->framebuffer.spi_shader_col_format_alpha) |
- (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
- sctx->framebuffer.spi_shader_col_format);
- key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
-
- /* The output for dual source blending should have
- * the same format as the first output.
- */
- if (blend->dual_src_blend)
- key->part.ps.epilog.spi_shader_col_format |=
- (key->part.ps.epilog.spi_shader_col_format & 0xf) << 4;
- } else
- key->part.ps.epilog.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
-
- /* If alpha-to-coverage is enabled, we have to export alpha
- * even if there is no color buffer.
- */
- if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) &&
- blend && blend->alpha_to_coverage)
- key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
-
- /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
- * to the range supported by the type if a channel has less
- * than 16 bits and the export format is 16_ABGR.
- */
- if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
- key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
- key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
- }
-
- /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
- if (!key->part.ps.epilog.last_cbuf) {
- key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
- key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
- key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
- }
-
- bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
- bool is_line = util_prim_is_lines(sctx->current_rast_prim);
-
- key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
- key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
-
- if (sctx->queued.named.blend) {
- key->part.ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one &&
- rs->multisample_enable;
- }
-
- key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
- key->part.ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) ||
- (is_line && rs->line_smooth)) &&
- sctx->framebuffer.nr_samples <= 1;
- key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
-
- if (sctx->ps_iter_samples > 1 &&
- sel->info.reads_samplemask) {
- key->part.ps.prolog.samplemask_log_ps_iter =
- util_logbase2(sctx->ps_iter_samples);
- }
-
- if (rs->force_persample_interp &&
- rs->multisample_enable &&
- sctx->framebuffer.nr_samples > 1 &&
- sctx->ps_iter_samples > 1) {
- key->part.ps.prolog.force_persp_sample_interp =
- sel->info.uses_persp_center ||
- sel->info.uses_persp_centroid;
-
- key->part.ps.prolog.force_linear_sample_interp =
- sel->info.uses_linear_center ||
- sel->info.uses_linear_centroid;
- } else if (rs->multisample_enable &&
- sctx->framebuffer.nr_samples > 1) {
- key->part.ps.prolog.bc_optimize_for_persp =
- sel->info.uses_persp_center &&
- sel->info.uses_persp_centroid;
- key->part.ps.prolog.bc_optimize_for_linear =
- sel->info.uses_linear_center &&
- sel->info.uses_linear_centroid;
- } else {
- /* Make sure SPI doesn't compute more than 1 pair
- * of (i,j), which is the optimization here. */
- key->part.ps.prolog.force_persp_center_interp =
- sel->info.uses_persp_center +
- sel->info.uses_persp_centroid +
- sel->info.uses_persp_sample > 1;
-
- key->part.ps.prolog.force_linear_center_interp =
- sel->info.uses_linear_center +
- sel->info.uses_linear_centroid +
- sel->info.uses_linear_sample > 1;
-
- if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE])
- key->mono.u.ps.interpolate_at_sample_force_center = 1;
- }
-
- key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
-
- /* ps_uses_fbfetch is true only if the color buffer is bound. */
- if (sctx->ps_uses_fbfetch && !sctx->blitter->running) {
- struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
- struct pipe_resource *tex = cb0->texture;
-
- /* 1D textures are allocated and used as 2D on GFX9. */
- key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
- key->mono.u.ps.fbfetch_is_1D = sctx->chip_class != GFX9 &&
- (tex->target == PIPE_TEXTURE_1D ||
- tex->target == PIPE_TEXTURE_1D_ARRAY);
- key->mono.u.ps.fbfetch_layered = tex->target == PIPE_TEXTURE_1D_ARRAY ||
- tex->target == PIPE_TEXTURE_2D_ARRAY ||
- tex->target == PIPE_TEXTURE_CUBE ||
- tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
- tex->target == PIPE_TEXTURE_3D;
- }
- break;
- }
- default:
- assert(0);
- }
-
- if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT)))
- memset(&key->opt, 0, sizeof(key->opt));
-}
-
-static void si_build_shader_variant(struct si_shader *shader,
- int thread_index,
- bool low_priority)
-{
- struct si_shader_selector *sel = shader->selector;
- struct si_screen *sscreen = sel->screen;
- struct ac_llvm_compiler *compiler;
- struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
-
- if (thread_index >= 0) {
- if (low_priority) {
- assert(thread_index < ARRAY_SIZE(sscreen->compiler_lowp));
- compiler = &sscreen->compiler_lowp[thread_index];
- } else {
- assert(thread_index < ARRAY_SIZE(sscreen->compiler));
- compiler = &sscreen->compiler[thread_index];
- }
- if (!debug->async)
- debug = NULL;
- } else {
- assert(!low_priority);
- compiler = shader->compiler_ctx_state.compiler;
- }
-
- if (unlikely(!si_shader_create(sscreen, compiler, shader, debug))) {
- PRINT_ERR("Failed to build shader variant (type=%u)\n",
- sel->type);
- shader->compilation_failed = true;
- return;
- }
-
- if (shader->compiler_ctx_state.is_debug_context) {
- FILE *f = open_memstream(&shader->shader_log,
- &shader->shader_log_size);
- if (f) {
- si_shader_dump(sscreen, shader, NULL, f, false);
- fclose(f);
- }
- }
-
- si_shader_init_pm4_state(sscreen, shader);
+static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_shader_selector *sel,
+ union si_vgt_stages_key stages_key,
+ struct si_shader_key *key)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ memset(key, 0, sizeof(*key));
+
+ switch (sel->info.stage) {
+ case MESA_SHADER_VERTEX:
+ si_shader_selector_key_vs(sctx, sel, key, &key->part.vs.prolog);
+
+ if (sctx->tes_shader.cso)
+ key->as_ls = 1;
+ else if (sctx->gs_shader.cso) {
+ key->as_es = 1;
+ key->as_ngg = stages_key.u.ngg;
+ } else {
+ key->as_ngg = stages_key.u.ngg;
+ si_shader_selector_key_hw_vs(sctx, sel, key);
+
+ if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
+ key->mono.u.vs_export_prim_id = 1;
+ }
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (sctx->chip_class >= GFX9) {
+ si_shader_selector_key_vs(sctx, sctx->vs_shader.cso, key, &key->part.tcs.ls_prolog);
+ key->part.tcs.ls = sctx->vs_shader.cso;
+
+ /* When the LS VGPR fix is needed, monolithic shaders
+ * can:
+ * - avoid initializing EXEC in both the LS prolog
+ * and the LS main part when !vs_needs_prolog
+ * - remove the fixup for unused input VGPRs
+ */
+ key->part.tcs.ls_prolog.ls_vgpr_fix = sctx->ls_vgpr_fix;
+
+ /* The LS output / HS input layout can be communicated
+ * directly instead of via user SGPRs for merged LS-HS.
+ * The LS VGPR fix prefers this too.
+ */
+ key->opt.prefer_mono = 1;
+ }
+
+ key->part.tcs.epilog.prim_mode =
+ sctx->tes_shader.cso->info.base.tess.primitive_mode;
+ key->part.tcs.epilog.invoc0_tess_factors_are_def =
+ sel->info.tessfactors_are_def_in_all_invocs;
+ key->part.tcs.epilog.tes_reads_tess_factors = sctx->tes_shader.cso->info.reads_tess_factors;
+
+ if (sel == sctx->fixed_func_tcs_shader.cso)
+ key->mono.u.ff_tcs_inputs_to_copy = sctx->vs_shader.cso->outputs_written;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ key->as_ngg = stages_key.u.ngg;
+
+ if (sctx->gs_shader.cso)
+ key->as_es = 1;
+ else {
+ si_shader_selector_key_hw_vs(sctx, sel, key);
+
+ if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
+ key->mono.u.vs_export_prim_id = 1;
+ }
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (sctx->chip_class >= GFX9) {
+ if (sctx->tes_shader.cso) {
+ key->part.gs.es = sctx->tes_shader.cso;
+ } else {
+ si_shader_selector_key_vs(sctx, sctx->vs_shader.cso, key, &key->part.gs.vs_prolog);
+ key->part.gs.es = sctx->vs_shader.cso;
+ key->part.gs.prolog.gfx9_prev_is_vs = 1;
+ }
+
+ key->as_ngg = stages_key.u.ngg;
+
+ /* Merged ES-GS can have unbalanced wave usage.
+ *
+ * ES threads are per-vertex, while GS threads are
+ * per-primitive. So without any amplification, there
+ * are fewer GS threads than ES threads, which can result
+ * in empty (no-op) GS waves. With too much amplification,
+ * there are more GS threads than ES threads, which
+ * can result in empty (no-op) ES waves.
+ *
+ * Non-monolithic shaders are implemented by setting EXEC
+ * at the beginning of shader parts, and don't jump to
+ * the end if EXEC is 0.
+ *
+ * Monolithic shaders use conditional blocks, so they can
+ * jump and skip empty waves of ES or GS. So set this to
+ * always use optimized variants, which are monolithic.
+ */
+ key->opt.prefer_mono = 1;
+ }
+ key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
+ break;
+ case MESA_SHADER_FRAGMENT: {
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ struct si_state_blend *blend = sctx->queued.named.blend;
+
+ if (sel->info.color0_writes_all_cbufs &&
+ sel->info.colors_written == 0x1)
+ key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
+
+ /* Select the shader color format based on whether
+ * blending or alpha are needed.
+ */
+ key->part.ps.epilog.spi_shader_col_format =
+ (blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend_alpha) |
+ (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_blend) |
+ (~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format_alpha) |
+ (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+ sctx->framebuffer.spi_shader_col_format);
+ key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
+
+ /* The output for dual source blending should have
+ * the same format as the first output.
+ */
+ if (blend->dual_src_blend) {
+ key->part.ps.epilog.spi_shader_col_format |=
+ (key->part.ps.epilog.spi_shader_col_format & 0xf) << 4;
+ }
+
+ /* If alpha-to-coverage is enabled, we have to export alpha
+ * even if there is no color buffer.
+ */
+ if (!(key->part.ps.epilog.spi_shader_col_format & 0xf) && blend->alpha_to_coverage)
+ key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
+
+ /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
+ * to the range supported by the type if a channel has less
+ * than 16 bits and the export format is 16_ABGR.
+ */
+ if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
+ key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
+ key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
+ }
+
+ /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */
+ if (!key->part.ps.epilog.last_cbuf) {
+ key->part.ps.epilog.spi_shader_col_format &= sel->colors_written_4bit;
+ key->part.ps.epilog.color_is_int8 &= sel->info.colors_written;
+ key->part.ps.epilog.color_is_int10 &= sel->info.colors_written;
+ }
+
+ bool is_poly = !util_prim_is_points_or_lines(sctx->current_rast_prim);
+ bool is_line = util_prim_is_lines(sctx->current_rast_prim);
+
+ key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
+ key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
+
+ key->part.ps.epilog.alpha_to_one = blend->alpha_to_one && rs->multisample_enable;
+
+ key->part.ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly;
+ key->part.ps.epilog.poly_line_smoothing =
+ ((is_poly && rs->poly_smooth) || (is_line && rs->line_smooth)) &&
+ sctx->framebuffer.nr_samples <= 1;
+ key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
+
+ if (sctx->ps_iter_samples > 1 && sel->info.reads_samplemask) {
+ key->part.ps.prolog.samplemask_log_ps_iter = util_logbase2(sctx->ps_iter_samples);
+ }
+
+ if (rs->force_persample_interp && rs->multisample_enable &&
+ sctx->framebuffer.nr_samples > 1 && sctx->ps_iter_samples > 1) {
+ key->part.ps.prolog.force_persp_sample_interp =
+ sel->info.uses_persp_center || sel->info.uses_persp_centroid;
+
+ key->part.ps.prolog.force_linear_sample_interp =
+ sel->info.uses_linear_center || sel->info.uses_linear_centroid;
+ } else if (rs->multisample_enable && sctx->framebuffer.nr_samples > 1) {
+ key->part.ps.prolog.bc_optimize_for_persp =
+ sel->info.uses_persp_center && sel->info.uses_persp_centroid;
+ key->part.ps.prolog.bc_optimize_for_linear =
+ sel->info.uses_linear_center && sel->info.uses_linear_centroid;
+ } else {
+ /* Make sure SPI doesn't compute more than 1 pair
+ * of (i,j), which is the optimization here. */
+ key->part.ps.prolog.force_persp_center_interp = sel->info.uses_persp_center +
+ sel->info.uses_persp_centroid +
+ sel->info.uses_persp_sample >
+ 1;
+
+ key->part.ps.prolog.force_linear_center_interp = sel->info.uses_linear_center +
+ sel->info.uses_linear_centroid +
+ sel->info.uses_linear_sample >
+ 1;
+
+ if (sel->info.uses_persp_opcode_interp_sample ||
+ sel->info.uses_linear_opcode_interp_sample)
+ key->mono.u.ps.interpolate_at_sample_force_center = 1;
+ }
+
+ key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
+
+ /* ps_uses_fbfetch is true only if the color buffer is bound. */
+ if (sctx->ps_uses_fbfetch && !sctx->blitter->running) {
+ struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
+ struct pipe_resource *tex = cb0->texture;
+
+ /* 1D textures are allocated and used as 2D on GFX9. */
+ key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
+ key->mono.u.ps.fbfetch_is_1D =
+ sctx->chip_class != GFX9 &&
+ (tex->target == PIPE_TEXTURE_1D || tex->target == PIPE_TEXTURE_1D_ARRAY);
+ key->mono.u.ps.fbfetch_layered =
+ tex->target == PIPE_TEXTURE_1D_ARRAY || tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE || tex->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ tex->target == PIPE_TEXTURE_3D;
+ }
+ break;
+ }
+ default:
+ assert(0);
+ }
+
+ if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT)))
+ memset(&key->opt, 0, sizeof(key->opt));
+}
+
+static void si_build_shader_variant(struct si_shader *shader, int thread_index, bool low_priority)
+{
+ struct si_shader_selector *sel = shader->selector;
+ struct si_screen *sscreen = sel->screen;
+ struct ac_llvm_compiler *compiler;
+ struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
+
+ if (thread_index >= 0) {
+ if (low_priority) {
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler_lowp));
+ compiler = &sscreen->compiler_lowp[thread_index];
+ } else {
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+ compiler = &sscreen->compiler[thread_index];
+ }
+ if (!debug->async)
+ debug = NULL;
+ } else {
+ assert(!low_priority);
+ compiler = shader->compiler_ctx_state.compiler;
+ }
+
+ if (!compiler->passes)
+ si_init_compiler(sscreen, compiler);
+
+ if (unlikely(!si_create_shader_variant(sscreen, compiler, shader, debug))) {
+ PRINT_ERR("Failed to build shader variant (type=%u)\n", sel->info.stage);
+ shader->compilation_failed = true;
+ return;
+ }
+
+ if (shader->compiler_ctx_state.is_debug_context) {
+ FILE *f = open_memstream(&shader->shader_log, &shader->shader_log_size);
+ if (f) {
+ si_shader_dump(sscreen, shader, NULL, f, false);
+ fclose(f);
+ }
+ }
+
+ si_shader_init_pm4_state(sscreen, shader);