From: Marek Olšák Date: Sun, 6 Sep 2020 05:22:01 +0000 (-0400) Subject: radeonsi: kill point size VS output if it's not used by the rasterizer X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=5ef50078fced6331c8731389785762b62262afba radeonsi: kill point size VS output if it's not used by the rasterizer Fixed-func shaders can contain the output, because their generator doesn't consider the current primitive type into account. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 805a8b1e87a..d26f36a4388 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -637,6 +637,7 @@ struct si_shader_key { struct { /* For HW VS (it can be VS, TES, GS) */ uint64_t kill_outputs; /* "get_unique_index" bits */ + unsigned kill_pointsize : 1; unsigned clip_disable : 1; /* For NGG VS and TES. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index daa992b42d0..96313d11175 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -593,12 +593,13 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, pos_args[0].out[3] = ctx->ac.f32_1; /* W */ } + bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize; bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg; /* Write the misc vector (point size, edgeflag, layer, viewport). */ - if (shader->selector->info.writes_psize || pos_writes_edgeflag || + if (writes_psize || pos_writes_edgeflag || shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) { - pos_args[1].enabled_channels = shader->selector->info.writes_psize | + pos_args[1].enabled_channels = writes_psize | (pos_writes_edgeflag << 1) | (shader->selector->info.writes_layer << 2); @@ -611,7 +612,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ pos_args[1].out[3] = ctx->ac.f32_0; /* W */ - if (shader->selector->info.writes_psize) + if (writes_psize) pos_args[1].out[0] = psize_value; if (pos_writes_edgeflag) { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 4c2e0c7a6c1..75507a30cc4 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -871,6 +871,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast rs->polygon_mode_is_lines = (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) || (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK)); + rs->polygon_mode_is_points = + (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) || + (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK)); rs->pa_sc_line_stipple = state->line_stipple_enable ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | S_028A0C_REPEAT_COUNT(state->line_stipple_factor) @@ -1020,7 +1023,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) old_rs->poly_stipple_enable != rs->poly_stipple_enable || old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth || old_rs->clamp_fragment_color != rs->clamp_fragment_color || - old_rs->force_persample_interp != rs->force_persample_interp) + old_rs->force_persample_interp != rs->force_persample_interp || + old_rs->polygon_mode_is_points != rs->polygon_mode_is_points) sctx->do_update_shaders = true; } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index bb7a73c938e..4d42a40d517 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -95,6 +95,7 @@ struct si_state_rasterizer { unsigned provoking_vertex_first : 1; unsigned polygon_mode_enabled : 1; unsigned polygon_mode_is_lines : 1; + unsigned polygon_mode_is_points : 1; }; struct si_dsa_stencil_ref_part { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index df89c9dfe6b..9e1f088e16f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1039,11 +1039,17 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs) return PIPE_PRIM_TRIANGLES; /* worst case for all callers */ } -static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg) +static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, + const struct si_shader *shader, bool ngg) { - bool misc_vec_ena = sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) || + bool writes_psize = sel->info.writes_psize; + + if (shader) + writes_psize &= !shader->key.opt.kill_pointsize; + + bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) || sel->info.writes_layer || sel->info.writes_viewport_index; - return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) | + return S_02881C_USE_VTX_POINT_SIZE(writes_psize) | S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) | S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | @@ -1219,7 +1225,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) | /* Reuse for NGG. */ S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0); - shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true); /* Oversubscribe PC. This improves performance when there are too many varyings. */ float oversub_pc_factor = 0.25; @@ -1425,7 +1431,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, : V_02870C_SPI_SHADER_NONE); shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) | S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1); - shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, false); oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0; @@ -1789,6 +1795,13 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) key->mono.u.vs_export_prim_id = 1; + + /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */ + if (sctx->chip_class >= GFX10 && + vs->info.writes_psize && + sctx->current_rast_prim != PIPE_PRIM_POINTS && + !sctx->queued.named.rasterizer->polygon_mode_is_points) + key->opt.kill_pointsize = 1; } /* Compute the key for the hw shader variant */ @@ -2743,7 +2756,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, /* PA_CL_VS_OUT_CNTL */ if (sctx->chip_class <= GFX9) - sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false); + sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false); sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS : u_bit_consecutive(0, sel->info.base.clip_distance_array_size);