- const struct pipe_shader_state *state)
-{
- struct si_screen *sscreen = (struct si_screen *)ctx->screen;
- struct si_context *sctx = (struct si_context*)ctx;
- struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
- int i;
-
- if (!sel)
- return NULL;
-
- pipe_reference_init(&sel->reference, 1);
- sel->screen = sscreen;
- sel->compiler_ctx_state.debug = sctx->debug;
- sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
-
- sel->so = state->stream_output;
-
- if (state->type == PIPE_SHADER_IR_TGSI &&
- !sscreen->options.enable_nir) {
- sel->tokens = tgsi_dup_tokens(state->tokens);
- if (!sel->tokens) {
- FREE(sel);
- return NULL;
- }
-
- tgsi_scan_shader(state->tokens, &sel->info);
- tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
- /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
- if (sel->info.uses_persp_opcode_interp_centroid)
- sel->info.uses_persp_centroid = true;
-
- if (sel->info.uses_linear_opcode_interp_centroid)
- sel->info.uses_linear_centroid = true;
-
- if (sel->info.uses_persp_opcode_interp_offset ||
- sel->info.uses_persp_opcode_interp_sample)
- sel->info.uses_persp_center = true;
-
- if (sel->info.uses_linear_opcode_interp_offset ||
- sel->info.uses_linear_opcode_interp_sample)
- sel->info.uses_linear_center = true;
- } else {
- if (state->type == PIPE_SHADER_IR_TGSI) {
- sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
- } else {
- assert(state->type == PIPE_SHADER_IR_NIR);
- sel->nir = state->ir.nir;
- }
-
- si_nir_lower_ps_inputs(sel->nir);
- si_nir_opts(sel->nir);
- si_nir_scan_shader(sel->nir, &sel->info);
- si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
- }
-
- sel->type = sel->info.processor;
- p_atomic_inc(&sscreen->num_shaders_created);
- si_get_active_slot_masks(&sel->info,
- &sel->active_const_and_shader_buffers,
- &sel->active_samplers_and_images);
-
- /* Record which streamout buffers are enabled. */
- for (i = 0; i < sel->so.num_outputs; i++) {
- sel->enabled_streamout_buffer_mask |=
- (1 << sel->so.output[i].output_buffer) <<
- (sel->so.output[i].stream * 4);
- }
-
- /* The prolog is a no-op if there are no inputs. */
- sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
- sel->info.num_inputs &&
- !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD];
-
- sel->force_correct_derivs_after_kill =
- sel->type == PIPE_SHADER_FRAGMENT &&
- sel->info.uses_derivatives &&
- sel->info.uses_kill &&
- sctx->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL);
-
- sel->prim_discard_cs_allowed =
- sel->type == PIPE_SHADER_VERTEX &&
- !sel->info.uses_bindless_images &&
- !sel->info.uses_bindless_samplers &&
- !sel->info.writes_memory &&
- !sel->info.writes_viewport_index &&
- !sel->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] &&
- !sel->so.num_outputs;
-
- switch (sel->type) {
- case PIPE_SHADER_GEOMETRY:
- sel->gs_output_prim =
- sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
-
- /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
- sel->rast_prim = sel->gs_output_prim;
- if (util_rast_prim_is_triangles(sel->rast_prim))
- sel->rast_prim = PIPE_PRIM_TRIANGLES;
-
- sel->gs_max_out_vertices =
- sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
- sel->gs_num_invocations =
- sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
- sel->gsvs_vertex_size = sel->info.num_outputs * 16;
- sel->max_gsvs_emit_size = sel->gsvs_vertex_size *
- sel->gs_max_out_vertices;
-
- sel->max_gs_stream = 0;
- for (i = 0; i < sel->so.num_outputs; i++)
- sel->max_gs_stream = MAX2(sel->max_gs_stream,
- sel->so.output[i].stream);
-
- sel->gs_input_verts_per_prim =
- u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
-
- /* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation. */
- sel->tess_turns_off_ngg =
- (sscreen->info.family == CHIP_NAVI10 ||
- sscreen->info.family == CHIP_NAVI12 ||
- sscreen->info.family == CHIP_NAVI14) &&
- sel->gs_num_invocations * sel->gs_max_out_vertices > 256;
- break;
-
- case PIPE_SHADER_TESS_CTRL:
- /* Always reserve space for these. */
- sel->patch_outputs_written |=
- (1ull << si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0)) |
- (1ull << si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0));
- /* fall through */
- case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_TESS_EVAL:
- for (i = 0; i < sel->info.num_outputs; i++) {
- unsigned name = sel->info.output_semantic_name[i];
- unsigned index = sel->info.output_semantic_index[i];
-
- switch (name) {
- case TGSI_SEMANTIC_TESSINNER:
- case TGSI_SEMANTIC_TESSOUTER:
- case TGSI_SEMANTIC_PATCH:
- sel->patch_outputs_written |=
- 1ull << si_shader_io_get_unique_index_patch(name, index);
- break;
-
- case TGSI_SEMANTIC_GENERIC:
- /* don't process indices the function can't handle */
- if (index >= SI_MAX_IO_GENERIC)
- break;
- /* fall through */
- default:
- sel->outputs_written |=
- 1ull << si_shader_io_get_unique_index(name, index, false);
- sel->outputs_written_before_ps |=
- 1ull << si_shader_io_get_unique_index(name, index, true);
- break;
- case TGSI_SEMANTIC_EDGEFLAG:
- break;
- }
- }
- sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
- sel->lshs_vertex_stride = sel->esgs_itemsize;
-
- /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
- * will start on a different bank. (except for the maximum 32*16).
- */
- if (sel->lshs_vertex_stride < 32*16)
- sel->lshs_vertex_stride += 4;
-
- /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
- * conflicts, i.e. each vertex will start at a different bank.
- */
- if (sctx->chip_class >= GFX9)
- sel->esgs_itemsize += 4;
-
- assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
-
- /* Only for TES: */
- if (sel->info.properties[TGSI_PROPERTY_TES_POINT_MODE])
- sel->rast_prim = PIPE_PRIM_POINTS;
- else if (sel->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES)
- sel->rast_prim = PIPE_PRIM_LINE_STRIP;
- else
- sel->rast_prim = PIPE_PRIM_TRIANGLES;
- break;
-
- case PIPE_SHADER_FRAGMENT:
- for (i = 0; i < sel->info.num_inputs; i++) {
- unsigned name = sel->info.input_semantic_name[i];
- unsigned index = sel->info.input_semantic_index[i];
-
- switch (name) {
- case TGSI_SEMANTIC_GENERIC:
- /* don't process indices the function can't handle */
- if (index >= SI_MAX_IO_GENERIC)
- break;
- /* fall through */
- default:
- sel->inputs_read |=
- 1ull << si_shader_io_get_unique_index(name, index, true);
- break;
- case TGSI_SEMANTIC_PCOORD: /* ignore this */
- break;
- }
- }
-
- for (i = 0; i < 8; i++)
- if (sel->info.colors_written & (1 << i))
- sel->colors_written_4bit |= 0xf << (4 * i);
-
- for (i = 0; i < sel->info.num_inputs; i++) {
- if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
- int index = sel->info.input_semantic_index[i];
- sel->color_attr_index[index] = i;
- }
- }
- break;
- default:;
- }
-
- /* PA_CL_VS_OUT_CNTL */
- if (sctx->chip_class <= GFX9)
- sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
-
- sel->clipdist_mask = sel->info.writes_clipvertex ?
- SIX_BITS : sel->info.clipdist_writemask;
- sel->culldist_mask = sel->info.culldist_writemask <<
- sel->info.num_written_clipdistance;
-
- /* DB_SHADER_CONTROL */
- sel->db_shader_control =
- S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
- S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
- S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
- S_02880C_KILL_ENABLE(sel->info.uses_kill);
-
- switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
- case TGSI_FS_DEPTH_LAYOUT_GREATER:
- sel->db_shader_control |=
- S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
- break;
- case TGSI_FS_DEPTH_LAYOUT_LESS:
- sel->db_shader_control |=
- S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
- break;
- }
-
- /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
- *
- * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
- * --|-----------|------------|------------|--------------------|-------------------|-------------
- * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0
- * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0
- * 2 | false | true | n/a | LateZ | 1 | 0
- * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0
- * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1
- *
- * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
- * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
- *
- * Don't use ReZ without profiling !!!
- *
- * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
- * shaders.
- */
- if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) {
- /* Cases 3, 4. */
- sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
- S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
- S_02880C_EXEC_ON_NOOP(sel->info.writes_memory);
- } else if (sel->info.writes_memory) {
- /* Case 2. */
- sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
- S_02880C_EXEC_ON_HIER_FAIL(1);
- } else {
- /* Case 1. */
- sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
- }
-
- if (sel->info.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE])
- sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
-
- (void) simple_mtx_init(&sel->mutex, mtx_plain);
-
- si_schedule_initial_compile(sctx, sel->info.processor, &sel->ready,
- &sel->compiler_ctx_state, sel,
- si_init_shader_selector_async);
- return sel;
+ const struct pipe_shader_state *state)
+{
+ struct si_screen *sscreen = (struct si_screen *)ctx->screen;
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
+ int i;
+
+ if (!sel)
+ return NULL;
+
+ sel->screen = sscreen;
+ sel->compiler_ctx_state.debug = sctx->debug;
+ sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
+
+ sel->so = state->stream_output;
+
+ if (state->type == PIPE_SHADER_IR_TGSI) {
+ sel->nir = tgsi_to_nir(state->tokens, ctx->screen, true);
+ } else {
+ assert(state->type == PIPE_SHADER_IR_NIR);
+ sel->nir = state->ir.nir;
+ }
+
+ si_nir_scan_shader(sel->nir, &sel->info);
+
+ const enum pipe_shader_type type = pipe_shader_type_from_mesa(sel->info.stage);
+ sel->const_and_shader_buf_descriptors_index =
+ si_const_and_shader_buffer_descriptors_idx(type);
+ sel->sampler_and_images_descriptors_index =
+ si_sampler_and_image_descriptors_idx(type);
+
+ p_atomic_inc(&sscreen->num_shaders_created);
+ si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
+ &sel->active_samplers_and_images);
+
+ /* Record which streamout buffers are enabled. */
+ for (i = 0; i < sel->so.num_outputs; i++) {
+ sel->enabled_streamout_buffer_mask |= (1 << sel->so.output[i].output_buffer)
+ << (sel->so.output[i].stream * 4);
+ }
+
+ sel->num_vs_inputs =
+ sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
+ ? sel->info.num_inputs
+ : 0;
+ sel->num_vbos_in_user_sgprs = MIN2(sel->num_vs_inputs, sscreen->num_vbos_in_user_sgprs);
+
+ /* The prolog is a no-op if there are no inputs. */
+ sel->vs_needs_prolog = sel->info.stage == MESA_SHADER_VERTEX && sel->info.num_inputs &&
+ !sel->info.base.vs.blit_sgprs_amd;
+
+ sel->prim_discard_cs_allowed =
+ sel->info.stage == MESA_SHADER_VERTEX && !sel->info.uses_bindless_images &&
+ !sel->info.uses_bindless_samplers && !sel->info.writes_memory &&
+ !sel->info.writes_viewport_index &&
+ !sel->info.base.vs.window_space_position && !sel->so.num_outputs;
+
+ switch (sel->info.stage) {
+ case MESA_SHADER_GEOMETRY:
+ sel->gs_output_prim = sel->info.base.gs.output_primitive;
+
+ /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
+ sel->rast_prim = sel->gs_output_prim;
+ if (util_rast_prim_is_triangles(sel->rast_prim))
+ sel->rast_prim = PIPE_PRIM_TRIANGLES;
+
+ sel->gs_max_out_vertices = sel->info.base.gs.vertices_out;
+ sel->gs_num_invocations = sel->info.base.gs.invocations;
+ sel->gsvs_vertex_size = sel->info.num_outputs * 16;
+ sel->max_gsvs_emit_size = sel->gsvs_vertex_size * sel->gs_max_out_vertices;
+
+ sel->max_gs_stream = 0;
+ for (i = 0; i < sel->so.num_outputs; i++)
+ sel->max_gs_stream = MAX2(sel->max_gs_stream, sel->so.output[i].stream);
+
+ sel->gs_input_verts_per_prim =
+ u_vertices_per_prim(sel->info.base.gs.input_primitive);
+
+ /* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation so
+ * we can't split workgroups. Disable ngg if any of the following conditions is true:
+ * - num_invocations * gs_max_out_vertices > 256
+ * - LDS usage is too high
+ */
+ sel->tess_turns_off_ngg = sscreen->info.chip_class >= GFX10 &&
+ (sel->gs_num_invocations * sel->gs_max_out_vertices > 256 ||
+ sel->gs_num_invocations * sel->gs_max_out_vertices *
+ (sel->info.num_outputs * 4 + 1) > 6500 /* max dw per GS primitive */);
+ break;
+
+ case MESA_SHADER_TESS_CTRL:
+ /* Always reserve space for these. */
+ sel->patch_outputs_written |=
+ (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
+ (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
+ /* fall through */
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
+ for (i = 0; i < sel->info.num_outputs; i++) {
+ unsigned semantic = sel->info.output_semantic[i];
+
+ if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
+ semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
+ (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
+ sel->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
+ } else if (semantic < VARYING_SLOT_MAX &&
+ semantic != VARYING_SLOT_EDGE) {
+ sel->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
+ sel->outputs_written_before_ps |= 1ull
+ << si_shader_io_get_unique_index(semantic, true);
+ }
+ }
+ sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
+ sel->lshs_vertex_stride = sel->esgs_itemsize;
+
+ /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
+ * will start on a different bank. (except for the maximum 32*16).
+ */
+ if (sel->lshs_vertex_stride < 32 * 16)
+ sel->lshs_vertex_stride += 4;
+
+ /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
+ * conflicts, i.e. each vertex will start at a different bank.
+ */
+ if (sctx->chip_class >= GFX9)
+ sel->esgs_itemsize += 4;
+
+ assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
+
+ /* Only for TES: */
+ if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
+ if (sel->info.base.tess.point_mode)
+ sel->rast_prim = PIPE_PRIM_POINTS;
+ else if (sel->info.base.tess.primitive_mode == GL_LINES)
+ sel->rast_prim = PIPE_PRIM_LINE_STRIP;
+ else
+ sel->rast_prim = PIPE_PRIM_TRIANGLES;
+ } else {
+ sel->rast_prim = PIPE_PRIM_TRIANGLES;
+ }
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ for (i = 0; i < sel->info.num_inputs; i++) {
+ unsigned semantic = sel->info.input_semantic[i];
+
+ if (semantic < VARYING_SLOT_MAX &&
+ semantic != VARYING_SLOT_PNTC) {
+ sel->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
+ }
+ }
+
+ for (i = 0; i < 8; i++)
+ if (sel->info.colors_written & (1 << i))
+ sel->colors_written_4bit |= 0xf << (4 * i);
+
+ for (i = 0; i < sel->info.num_inputs; i++) {
+ if (sel->info.input_semantic[i] == VARYING_SLOT_COL0)
+ sel->color_attr_index[0] = i;
+ else if (sel->info.input_semantic[i] == VARYING_SLOT_COL1)
+ sel->color_attr_index[1] = i;
+ }
+ break;
+ default:;
+ }
+
+ sel->ngg_culling_allowed =
+ sscreen->info.chip_class >= GFX10 &&
+ sscreen->info.has_dedicated_vram &&
+ sscreen->use_ngg_culling &&
+ (sel->info.stage == MESA_SHADER_VERTEX ||
+ (sel->info.stage == MESA_SHADER_TESS_EVAL &&
+ (sscreen->always_use_ngg_culling_all ||
+ sscreen->always_use_ngg_culling_tess))) &&
+ sel->info.writes_position &&
+ !sel->info.writes_viewport_index && /* cull only against viewport 0 */
+ !sel->info.writes_memory && !sel->so.num_outputs &&
+ (sel->info.stage != MESA_SHADER_VERTEX ||
+ (!sel->info.base.vs.blit_sgprs_amd &&
+ !sel->info.base.vs.window_space_position));
+
+ /* PA_CL_VS_OUT_CNTL */
+ if (sctx->chip_class <= GFX9)
+ sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
+
+ sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
+ u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
+ sel->culldist_mask = u_bit_consecutive(0, sel->info.base.cull_distance_array_size) <<
+ sel->info.base.clip_distance_array_size;
+
+ /* DB_SHADER_CONTROL */
+ sel->db_shader_control = S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) |
+ S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) |
+ S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) |
+ S_02880C_KILL_ENABLE(sel->info.base.fs.uses_discard);
+
+ if (sel->info.stage == MESA_SHADER_FRAGMENT) {
+ switch (sel->info.base.fs.depth_layout) {
+ case FRAG_DEPTH_LAYOUT_GREATER:
+ sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+ break;
+ case FRAG_DEPTH_LAYOUT_LESS:
+ sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+ break;
+ default:;
+ }
+
+ /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
+ *
+ * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
+ * --|-----------|------------|------------|--------------------|-------------------|-------------
+ * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0
+ * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0
+ * 2 | false | true | n/a | LateZ | 1 | 0
+ * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0
+ * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1
+ *
+ * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
+ * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
+ *
+ * Don't use ReZ without profiling !!!
+ *
+ * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
+ * shaders.
+ */
+ if (sel->info.base.fs.early_fragment_tests) {
+ /* Cases 3, 4. */
+ sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
+ S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
+ S_02880C_EXEC_ON_NOOP(sel->info.writes_memory);
+ } else if (sel->info.writes_memory) {
+ /* Case 2. */
+ sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1);
+ } else {
+ /* Case 1. */
+ sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+ }
+
+ if (sel->info.base.fs.post_depth_coverage)
+ sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
+ }
+
+ (void)simple_mtx_init(&sel->mutex, mtx_plain);
+
+ si_schedule_initial_compile(sctx, sel->info.stage, &sel->ready, &sel->compiler_ctx_state,
+ sel, si_init_shader_selector_async);
+ return sel;
+}
+
+static void *si_create_shader(struct pipe_context *ctx, const struct pipe_shader_state *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_screen *sscreen = (struct si_screen *)ctx->screen;
+ bool cache_hit;
+ struct si_shader_selector *sel = (struct si_shader_selector *)util_live_shader_cache_get(
+ ctx, &sscreen->live_shader_cache, state, &cache_hit);
+
+ if (sel && cache_hit && sctx->debug.debug_message) {
+ if (sel->main_shader_part)
+ si_shader_dump_stats_for_shader_db(sscreen, sel->main_shader_part, &sctx->debug);
+ if (sel->main_shader_part_ls)
+ si_shader_dump_stats_for_shader_db(sscreen, sel->main_shader_part_ls, &sctx->debug);
+ if (sel->main_shader_part_es)
+ si_shader_dump_stats_for_shader_db(sscreen, sel->main_shader_part_es, &sctx->debug);
+ if (sel->main_shader_part_ngg)
+ si_shader_dump_stats_for_shader_db(sscreen, sel->main_shader_part_ngg, &sctx->debug);
+ if (sel->main_shader_part_ngg_es)
+ si_shader_dump_stats_for_shader_db(sscreen, sel->main_shader_part_ngg_es, &sctx->debug);
+ }
+ return sel;