X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_state_shaders.c;h=9e1f088e16f49a6bdb29229296c32f5ab98eb287;hb=5ef50078fced6331c8731389785762b62262afba;hp=c629c28fa614c12304968640e81c3b48089d6a2e;hpb=877b9ec567b2c620c5e5f36fcc68f5d620b7a785;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c629c28fa61..9e1f088e16f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -72,7 +72,10 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, shader_variant_flags |= 1 << 1; if (si_get_wave_size(sel->screen, sel->info.stage, ngg, es, false, false) == 32) shader_variant_flags |= 1 << 2; - if (sel->info.stage == MESA_SHADER_FRAGMENT && sel->info.uses_derivatives && sel->info.uses_kill && + if (sel->info.stage == MESA_SHADER_FRAGMENT && + /* Derivatives imply helper invocations so check for needs_helper_invocations. */ + sel->info.base.fs.needs_helper_invocations && + sel->info.base.fs.uses_discard && sel->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) shader_variant_flags |= 1 << 3; @@ -312,20 +315,20 @@ static void si_set_tesseval_regs(struct si_screen *sscreen, const struct si_shad struct si_pm4_state *pm4) { const struct si_shader_info *info = &tes->info; - unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE]; - unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; - bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; - bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; + unsigned tes_prim_mode = info->base.tess.primitive_mode; + unsigned tes_spacing = info->base.tess.spacing; + bool tes_vertex_order_cw = !info->base.tess.ccw; + bool tes_point_mode = info->base.tess.point_mode; unsigned type, partitioning, topology, distribution_mode; switch (tes_prim_mode) { - case PIPE_PRIM_LINES: + case GL_LINES: type = V_028B6C_TESS_ISOLINE; break; - case PIPE_PRIM_TRIANGLES: + case GL_TRIANGLES: type = V_028B6C_TESS_TRIANGLE; break; - case PIPE_PRIM_QUADS: + case GL_QUADS: type = V_028B6C_TESS_QUAD; break; default: @@ -334,13 +337,13 @@ static void si_set_tesseval_regs(struct si_screen *sscreen, const struct si_shad } switch (tes_spacing) { - case PIPE_TESS_SPACING_FRACTIONAL_ODD: + case TESS_SPACING_FRACTIONAL_ODD: partitioning = V_028B6C_PART_FRAC_ODD; break; - case PIPE_TESS_SPACING_FRACTIONAL_EVEN: + case TESS_SPACING_FRACTIONAL_EVEN: partitioning = V_028B6C_PART_FRAC_EVEN; break; - case PIPE_TESS_SPACING_EQUAL: + case TESS_SPACING_EQUAL: partitioning = V_028B6C_PART_INTEGER; break; default: @@ -350,7 +353,7 @@ static void si_set_tesseval_regs(struct si_screen *sscreen, const struct si_shad if (tes_point_mode) topology = V_028B6C_OUTPUT_POINT; - else if (tes_prim_mode == PIPE_PRIM_LINES) + else if (tes_prim_mode == GL_LINES) topology = V_028B6C_OUTPUT_LINE; else if (tes_vertex_order_cw) /* for some reason, this must be the other way around */ @@ -400,7 +403,7 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, struct si_sh unsigned vtx_reuse_depth = 30; if (sel->info.stage == MESA_SHADER_TESS_EVAL && - sel->info.properties[TGSI_PROPERTY_TES_SPACING] == PIPE_TESS_SPACING_FRACTIONAL_ODD) + sel->info.base.tess.spacing == TESS_SPACING_FRACTIONAL_ODD) vtx_reuse_depth = 14; assert(pm4->shader); @@ -617,8 +620,8 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs, struct gfx9_gs_info *out) { - unsigned gs_num_invocations = MAX2(gs->gs_num_invocations, 1); - unsigned input_prim = gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; + unsigned gs_num_invocations = MAX2(gs->info.base.gs.invocations, 1); + unsigned input_prim = gs->info.base.gs.input_primitive; bool uses_adjacency = input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY; @@ -644,9 +647,9 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector * /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations. * Make sure we don't go over the maximum value. */ - if (gs->gs_max_out_vertices > 0) { + if (gs->info.base.gs.vertices_out > 0) { max_gs_prims = - MIN2(max_gs_prims, max_out_prims / (gs->gs_max_out_vertices * gs_num_invocations)); + MIN2(max_gs_prims, max_out_prims / (gs->info.base.gs.vertices_out * gs_num_invocations)); } assert(max_gs_prims > 0); @@ -701,7 +704,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector * out->es_verts_per_subgroup = es_verts; out->gs_prims_per_subgroup = gs_prims; out->gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations; - out->max_prims_per_subgroup = out->gs_inst_prims_in_subgroup * gs->gs_max_out_vertices; + out->max_prims_per_subgroup = out->gs_inst_prims_in_subgroup * gs->info.base.gs.vertices_out; out->esgs_ring_size = esgs_lds_size; assert(out->max_prims_per_subgroup <= max_out_prims); @@ -772,10 +775,10 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) { struct si_shader_selector *sel = shader->selector; const ubyte *num_components = sel->info.num_stream_output_components; - unsigned gs_num_invocations = sel->gs_num_invocations; + unsigned gs_num_invocations = sel->info.base.gs.invocations; struct si_pm4_state *pm4; uint64_t va; - unsigned max_stream = sel->max_gs_stream; + unsigned max_stream = util_last_bit(sel->info.base.gs.active_stream_mask); unsigned offset; pm4 = si_get_shader_pm4_state(shader); @@ -784,30 +787,30 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) pm4->atom.emit = si_emit_shader_gs; - offset = num_components[0] * sel->gs_max_out_vertices; + offset = num_components[0] * sel->info.base.gs.vertices_out; shader->ctx_reg.gs.vgt_gsvs_ring_offset_1 = offset; - if (max_stream >= 1) - offset += num_components[1] * sel->gs_max_out_vertices; + if (max_stream >= 2) + offset += num_components[1] * sel->info.base.gs.vertices_out; shader->ctx_reg.gs.vgt_gsvs_ring_offset_2 = offset; - if (max_stream >= 2) - offset += num_components[2] * sel->gs_max_out_vertices; + if (max_stream >= 3) + offset += num_components[2] * sel->info.base.gs.vertices_out; shader->ctx_reg.gs.vgt_gsvs_ring_offset_3 = offset; - if (max_stream >= 3) - offset += num_components[3] * sel->gs_max_out_vertices; + if (max_stream >= 4) + offset += num_components[3] * sel->info.base.gs.vertices_out; shader->ctx_reg.gs.vgt_gsvs_ring_itemsize = offset; /* The GSVS_RING_ITEMSIZE register takes 15 bits */ assert(offset < (1 << 15)); - shader->ctx_reg.gs.vgt_gs_max_vert_out = sel->gs_max_out_vertices; + shader->ctx_reg.gs.vgt_gs_max_vert_out = sel->info.base.gs.vertices_out; shader->ctx_reg.gs.vgt_gs_vert_itemsize = num_components[0]; - shader->ctx_reg.gs.vgt_gs_vert_itemsize_1 = (max_stream >= 1) ? num_components[1] : 0; - shader->ctx_reg.gs.vgt_gs_vert_itemsize_2 = (max_stream >= 2) ? num_components[2] : 0; - shader->ctx_reg.gs.vgt_gs_vert_itemsize_3 = (max_stream >= 3) ? num_components[3] : 0; + shader->ctx_reg.gs.vgt_gs_vert_itemsize_1 = (max_stream >= 2) ? num_components[1] : 0; + shader->ctx_reg.gs.vgt_gs_vert_itemsize_2 = (max_stream >= 3) ? num_components[2] : 0; + shader->ctx_reg.gs.vgt_gs_vert_itemsize_3 = (max_stream >= 4) ? num_components[3] : 0; shader->ctx_reg.gs.vgt_gs_instance_cnt = S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0); @@ -815,7 +818,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) va = shader->bo->gpu_address; if (sscreen->info.chip_class >= GFX9) { - unsigned input_prim = sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; + unsigned input_prim = sel->info.base.gs.input_primitive; gl_shader_stage es_stage = shader->key.part.gs.es->info.stage; unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt; @@ -1022,12 +1025,12 @@ static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx) unsigned si_get_input_prim(const struct si_shader_selector *gs) { if (gs->info.stage == MESA_SHADER_GEOMETRY) - return gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; + return gs->info.base.gs.input_primitive; if (gs->info.stage == MESA_SHADER_TESS_EVAL) { - if (gs->info.properties[TGSI_PROPERTY_TES_POINT_MODE]) + if (gs->info.base.tess.point_mode) return PIPE_PRIM_POINTS; - if (gs->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES) + if (gs->info.base.tess.primitive_mode == GL_LINES) return PIPE_PRIM_LINES; return PIPE_PRIM_TRIANGLES; } @@ -1036,11 +1039,17 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs) return PIPE_PRIM_TRIANGLES; /* worst case for all callers */ } -static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg) +static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, + const struct si_shader *shader, bool ngg) { - bool misc_vec_ena = sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) || + bool writes_psize = sel->info.writes_psize; + + if (shader) + writes_psize &= !shader->key.opt.kill_pointsize; + + bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) || sel->info.writes_layer || sel->info.writes_viewport_index; - return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) | + return S_02881C_USE_VTX_POINT_SIZE(writes_psize) | S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) | S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | @@ -1067,7 +1076,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader bool window_space = gs_info->stage == MESA_SHADER_VERTEX ? gs_info->base.vs.window_space_position : 0; bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid; - unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1); + unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1); unsigned input_prim = si_get_input_prim(gs_sel); bool break_wave_at_eoi = false; struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader); @@ -1187,7 +1196,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader if (gs_stage == MESA_SHADER_GEOMETRY) { shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4; - shader->ctx_reg.ngg.vgt_gs_max_vert_out = gs_sel->gs_max_out_vertices; + shader->ctx_reg.ngg.vgt_gs_max_vert_out = gs_sel->info.base.gs.vertices_out; } else { shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = 1; } @@ -1216,7 +1225,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) | /* Reuse for NGG. */ S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0); - shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true); /* Oversubscribe PC. This improves performance when there are too many varyings. */ float oversub_pc_factor = 0.25; @@ -1375,7 +1384,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, shader->ctx_reg.vs.vgt_primitiveid_en = enable_prim_id; } else { shader->ctx_reg.vs.vgt_gs_mode = - ac_vgt_gs_mode(gs->gs_max_out_vertices, sscreen->info.chip_class); + ac_vgt_gs_mode(gs->info.base.gs.vertices_out, sscreen->info.chip_class); shader->ctx_reg.vs.vgt_primitiveid_en = 0; } @@ -1422,7 +1431,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, : V_02870C_SPI_SHADER_NONE); shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) | S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1); - shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, false); oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0; @@ -1595,7 +1604,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) */ spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); - if (info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == TGSI_FS_COORD_PIXEL_CENTER_INTEGER) + if (info->base.fs.pixel_center_integer) spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1); spi_shader_col_format = si_get_spi_shader_col_format(shader); @@ -1616,7 +1625,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) * the color and Z formats to SPI_SHADER_ZERO. The hw will skip export * instructions if any are present. */ - if ((sscreen->info.chip_class <= GFX9 || info->uses_kill || + if ((sscreen->info.chip_class <= GFX9 || info->base.fs.uses_discard || shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) && !spi_shader_col_format && !info->writes_z && !info->writes_stencil && !info->writes_samplemask) @@ -1750,20 +1759,20 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad struct si_shader_selector *ps = sctx->ps_shader.cso; key->opt.clip_disable = sctx->queued.named.rasterizer->clip_plane_enable == 0 && - (vs->info.clipdist_writemask || vs->info.writes_clipvertex) && - !vs->info.culldist_writemask; + (vs->info.base.clip_distance_array_size || vs->info.writes_clipvertex) && + !vs->info.base.cull_distance_array_size; /* Find out if PS is disabled. */ bool ps_disabled = true; if (ps) { - bool ps_modifies_zs = ps->info.uses_kill || ps->info.writes_z || ps->info.writes_stencil || + bool ps_modifies_zs = ps->info.base.fs.uses_discard || ps->info.writes_z || ps->info.writes_stencil || ps->info.writes_samplemask || sctx->queued.named.blend->alpha_to_coverage || si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS; unsigned ps_colormask = si_get_total_colormask(sctx); ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard || - (!ps_colormask && !ps_modifies_zs && !ps->info.writes_memory); + (!ps_colormask && !ps_modifies_zs && !ps->info.base.writes_memory); } /* Find out which VS outputs aren't used by the PS. */ @@ -1783,6 +1792,16 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad key->opt.kill_outputs = ~linked & outputs_written; key->opt.ngg_culling = sctx->ngg_culling; + + if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) + key->mono.u.vs_export_prim_id = 1; + + /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */ + if (sctx->chip_class >= GFX10 && + vs->info.writes_psize && + sctx->current_rast_prim != PIPE_PRIM_POINTS && + !sctx->queued.named.rasterizer->polygon_mode_is_points) + key->opt.kill_pointsize = 1; } /* Compute the key for the hw shader variant */ @@ -1806,9 +1825,6 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh } else { key->as_ngg = stages_key.u.ngg; si_shader_selector_key_hw_vs(sctx, sel, key); - - if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->mono.u.vs_export_prim_id = 1; } break; case MESA_SHADER_TESS_CTRL: @@ -1832,7 +1848,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh } key->part.tcs.epilog.prim_mode = - sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + sctx->tes_shader.cso->info.base.tess.primitive_mode; key->part.tcs.epilog.invoc0_tess_factors_are_def = sel->info.tessfactors_are_def_in_all_invocs; key->part.tcs.epilog.tes_reads_tess_factors = sctx->tes_shader.cso->info.reads_tess_factors; @@ -1847,9 +1863,6 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh key->as_es = 1; else { si_shader_selector_key_hw_vs(sctx, sel, key); - - if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->mono.u.vs_export_prim_id = 1; } break; case MESA_SHADER_GEOMETRY: @@ -1889,7 +1902,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_state_blend *blend = sctx->queued.named.blend; - if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && + if (sel->info.color0_writes_all_cbufs && sel->info.colors_written == 0x1) key->part.ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; @@ -1980,8 +1993,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, struct si_sh sel->info.uses_linear_sample > 1; - if (sel->info.uses_persp_opcode_interp_sample || - sel->info.uses_linear_opcode_interp_sample) + if (sel->info.uses_interp_at_sample) key->mono.u.ps.interpolate_at_sample_force_center = 1; } @@ -2543,12 +2555,12 @@ void si_get_active_slot_masks(const struct si_shader_info *info, uint64_t *const { unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers; - num_shaderbufs = util_last_bit(info->shader_buffers_declared); - num_constbufs = util_last_bit(info->const_buffers_declared); + num_shaderbufs = info->base.num_ssbos; + num_constbufs = info->base.num_ubos; /* two 8-byte images share one 16-byte slot */ - num_images = align(util_last_bit(info->images_declared), 2); - num_msaa_images = align(util_last_bit(info->msaa_images_declared), 2); - num_samplers = util_last_bit(info->samplers_declared); + num_images = align(info->base.num_images, 2); + num_msaa_images = align(util_last_bit(info->base.msaa_images), 2); + num_samplers = util_last_bit(info->base.textures_used); /* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */ start = si_get_shaderbuf_slot(num_shaderbufs - 1); @@ -2624,39 +2636,30 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->prim_discard_cs_allowed = sel->info.stage == MESA_SHADER_VERTEX && !sel->info.uses_bindless_images && - !sel->info.uses_bindless_samplers && !sel->info.writes_memory && + !sel->info.uses_bindless_samplers && !sel->info.base.writes_memory && !sel->info.writes_viewport_index && !sel->info.base.vs.window_space_position && !sel->so.num_outputs; switch (sel->info.stage) { case MESA_SHADER_GEOMETRY: - sel->gs_output_prim = sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; - /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */ - sel->rast_prim = sel->gs_output_prim; + sel->rast_prim = sel->info.base.gs.output_primitive; if (util_rast_prim_is_triangles(sel->rast_prim)) sel->rast_prim = PIPE_PRIM_TRIANGLES; - sel->gs_max_out_vertices = sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; - sel->gs_num_invocations = sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; sel->gsvs_vertex_size = sel->info.num_outputs * 16; - sel->max_gsvs_emit_size = sel->gsvs_vertex_size * sel->gs_max_out_vertices; - - sel->max_gs_stream = 0; - for (i = 0; i < sel->so.num_outputs; i++) - sel->max_gs_stream = MAX2(sel->max_gs_stream, sel->so.output[i].stream); - + sel->max_gsvs_emit_size = sel->gsvs_vertex_size * sel->info.base.gs.vertices_out; sel->gs_input_verts_per_prim = - u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]); + u_vertices_per_prim(sel->info.base.gs.input_primitive); /* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation so * we can't split workgroups. Disable ngg if any of the following conditions is true: - * - num_invocations * gs_max_out_vertices > 256 + * - num_invocations * gs.vertices_out > 256 * - LDS usage is too high */ sel->tess_turns_off_ngg = sscreen->info.chip_class >= GFX10 && - (sel->gs_num_invocations * sel->gs_max_out_vertices > 256 || - sel->gs_num_invocations * sel->gs_max_out_vertices * + (sel->info.base.gs.invocations * sel->info.base.gs.vertices_out > 256 || + sel->info.base.gs.invocations * sel->info.base.gs.vertices_out * (sel->info.num_outputs * 4 + 1) > 6500 /* max dw per GS primitive */); break; @@ -2700,12 +2703,16 @@ static void *si_create_shader_selector(struct pipe_context *ctx, assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0); /* Only for TES: */ - if (sel->info.properties[TGSI_PROPERTY_TES_POINT_MODE]) - sel->rast_prim = PIPE_PRIM_POINTS; - else if (sel->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES) - sel->rast_prim = PIPE_PRIM_LINE_STRIP; - else + if (sel->info.stage == MESA_SHADER_TESS_EVAL) { + if (sel->info.base.tess.point_mode) + sel->rast_prim = PIPE_PRIM_POINTS; + else if (sel->info.base.tess.primitive_mode == GL_LINES) + sel->rast_prim = PIPE_PRIM_LINE_STRIP; + else + sel->rast_prim = PIPE_PRIM_TRIANGLES; + } else { sel->rast_prim = PIPE_PRIM_TRIANGLES; + } break; case MESA_SHADER_FRAGMENT: @@ -2742,66 +2749,71 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sscreen->always_use_ngg_culling_tess))) && sel->info.writes_position && !sel->info.writes_viewport_index && /* cull only against viewport 0 */ - !sel->info.writes_memory && !sel->so.num_outputs && + !sel->info.base.writes_memory && !sel->so.num_outputs && (sel->info.stage != MESA_SHADER_VERTEX || (!sel->info.base.vs.blit_sgprs_amd && !sel->info.base.vs.window_space_position)); /* PA_CL_VS_OUT_CNTL */ if (sctx->chip_class <= GFX9) - sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false); + sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false); - sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS : sel->info.clipdist_writemask; - sel->culldist_mask = sel->info.culldist_writemask << sel->info.num_written_clipdistance; + sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS : + u_bit_consecutive(0, sel->info.base.clip_distance_array_size); + sel->culldist_mask = u_bit_consecutive(0, sel->info.base.cull_distance_array_size) << + sel->info.base.clip_distance_array_size; /* DB_SHADER_CONTROL */ sel->db_shader_control = S_02880C_Z_EXPORT_ENABLE(sel->info.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(sel->info.writes_stencil) | S_02880C_MASK_EXPORT_ENABLE(sel->info.writes_samplemask) | - S_02880C_KILL_ENABLE(sel->info.uses_kill); + S_02880C_KILL_ENABLE(sel->info.base.fs.uses_discard); - switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) { - case TGSI_FS_DEPTH_LAYOUT_GREATER: - sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); - break; - case TGSI_FS_DEPTH_LAYOUT_LESS: - sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); - break; - } + if (sel->info.stage == MESA_SHADER_FRAGMENT) { + switch (sel->info.base.fs.depth_layout) { + case FRAG_DEPTH_LAYOUT_GREATER: + sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); + break; + case FRAG_DEPTH_LAYOUT_LESS: + sel->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); + break; + default:; + } - /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following: - * - * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP - * --|-----------|------------|------------|--------------------|-------------------|------------- - * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0 - * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0 - * 2 | false | true | n/a | LateZ | 1 | 0 - * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0 - * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1 - * - * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register. - * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense. - * - * Don't use ReZ without profiling !!! - * - * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex - * shaders. - */ - if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) { - /* Cases 3, 4. */ - sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | - S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | - S_02880C_EXEC_ON_NOOP(sel->info.writes_memory); - } else if (sel->info.writes_memory) { - /* Case 2. */ - sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1); - } else { - /* Case 1. */ - sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - } + /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following: + * + * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP + * --|-----------|------------|------------|--------------------|-------------------|------------- + * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0 + * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0 + * 2 | false | true | n/a | LateZ | 1 | 0 + * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0 + * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1 + * + * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register. + * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense. + * + * Don't use ReZ without profiling !!! + * + * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex + * shaders. + */ + if (sel->info.base.fs.early_fragment_tests) { + /* Cases 3, 4. */ + sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | + S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | + S_02880C_EXEC_ON_NOOP(sel->info.base.writes_memory); + } else if (sel->info.base.writes_memory) { + /* Case 2. */ + sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1); + } else { + /* Case 1. */ + sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + } - if (sel->info.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE]) - sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1); + if (sel->info.base.fs.post_depth_coverage) + sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1); + } (void)simple_mtx_init(&sel->mutex, mtx_plain); @@ -3050,9 +3062,9 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); if (sctx->screen->has_out_of_order_rast && - (!old_sel || old_sel->info.writes_memory != sel->info.writes_memory || - old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] != - sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])) + (!old_sel || old_sel->info.base.writes_memory != sel->info.base.writes_memory || + old_sel->info.base.fs.early_fragment_tests != + sel->info.base.fs.early_fragment_tests)) si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); } si_set_active_descriptors_for_shader(sctx, sel);