radeonsi: kill point size VS output if it's not used by the rasterizer
authorMarek Olšák <marek.olsak@amd.com>
Sun, 6 Sep 2020 05:22:01 +0000 (01:22 -0400)
committerVivek Pandya <vivekvpandya@gmail.com>
Mon, 7 Sep 2020 15:55:17 +0000 (21:25 +0530)
Fixed-func shaders can contain the output, because their generator
doesn't consider the current primitive type into account.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6620>

src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 805a8b1e87aeb15e194ce3e65d979fc591286c0d..d26f36a43880e57667df03a0fdc194b18b679f84 100644 (file)
@@ -637,6 +637,7 @@ struct si_shader_key {
    struct {
       /* For HW VS (it can be VS, TES, GS) */
       uint64_t kill_outputs; /* "get_unique_index" bits */
    struct {
       /* For HW VS (it can be VS, TES, GS) */
       uint64_t kill_outputs; /* "get_unique_index" bits */
+      unsigned kill_pointsize : 1;
       unsigned clip_disable : 1;
 
       /* For NGG VS and TES. */
       unsigned clip_disable : 1;
 
       /* For NGG VS and TES. */
index daa992b42d01c36f199eedf2aa7c2fba814330ee..96313d11175ed540c6a7c862044a9e400ee3a5a0 100644 (file)
@@ -593,12 +593,13 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
       pos_args[0].out[3] = ctx->ac.f32_1; /* W */
    }
 
       pos_args[0].out[3] = ctx->ac.f32_1; /* W */
    }
 
+   bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
    bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
 
    /* Write the misc vector (point size, edgeflag, layer, viewport). */
    bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
 
    /* Write the misc vector (point size, edgeflag, layer, viewport). */
-   if (shader->selector->info.writes_psize || pos_writes_edgeflag ||
+   if (writes_psize || pos_writes_edgeflag ||
        shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
        shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
-      pos_args[1].enabled_channels = shader->selector->info.writes_psize |
+      pos_args[1].enabled_channels = writes_psize |
                                      (pos_writes_edgeflag << 1) |
                                      (shader->selector->info.writes_layer << 2);
 
                                      (pos_writes_edgeflag << 1) |
                                      (shader->selector->info.writes_layer << 2);
 
@@ -611,7 +612,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
       pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
       pos_args[1].out[3] = ctx->ac.f32_0; /* W */
 
       pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
       pos_args[1].out[3] = ctx->ac.f32_0; /* W */
 
-      if (shader->selector->info.writes_psize)
+      if (writes_psize)
          pos_args[1].out[0] = psize_value;
 
       if (pos_writes_edgeflag) {
          pos_args[1].out[0] = psize_value;
 
       if (pos_writes_edgeflag) {
index 4c2e0c7a6c102af751ddd61722a621b4fcc9b771..75507a30cc4e37c068b944fb6cc2165007c559c2 100644 (file)
@@ -871,6 +871,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
    rs->polygon_mode_is_lines =
       (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
       (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
    rs->polygon_mode_is_lines =
       (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
       (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
+   rs->polygon_mode_is_points =
+      (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||
+      (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));
    rs->pa_sc_line_stipple = state->line_stipple_enable
                                ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
                                     S_028A0C_REPEAT_COUNT(state->line_stipple_factor)
    rs->pa_sc_line_stipple = state->line_stipple_enable
                                ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
                                     S_028A0C_REPEAT_COUNT(state->line_stipple_factor)
@@ -1020,7 +1023,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
        old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||
        old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
        old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
        old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||
        old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
-       old_rs->force_persample_interp != rs->force_persample_interp)
+       old_rs->force_persample_interp != rs->force_persample_interp ||
+       old_rs->polygon_mode_is_points != rs->polygon_mode_is_points)
       sctx->do_update_shaders = true;
 }
 
       sctx->do_update_shaders = true;
 }
 
index bb7a73c938e58bb8a9eb2dc2956fb16717470827..4d42a40d5171277f7cfe167cc93c610ea1ba9cbb 100644 (file)
@@ -95,6 +95,7 @@ struct si_state_rasterizer {
    unsigned provoking_vertex_first : 1;
    unsigned polygon_mode_enabled : 1;
    unsigned polygon_mode_is_lines : 1;
    unsigned provoking_vertex_first : 1;
    unsigned polygon_mode_enabled : 1;
    unsigned polygon_mode_is_lines : 1;
+   unsigned polygon_mode_is_points : 1;
 };
 
 struct si_dsa_stencil_ref_part {
 };
 
 struct si_dsa_stencil_ref_part {
index df89c9dfe6b66e559116e158fbf96091e32cb7ce..9e1f088e16f49a6bdb29229296c32f5ab98eb287 100644 (file)
@@ -1039,11 +1039,17 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
    return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
 }
 
    return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
 }
 
-static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg)
+static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
+                                   const struct si_shader *shader, bool ngg)
 {
 {
-   bool misc_vec_ena = sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+   bool writes_psize = sel->info.writes_psize;
+
+   if (shader)
+      writes_psize &= !shader->key.opt.kill_pointsize;
+
+   bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
                        sel->info.writes_layer || sel->info.writes_viewport_index;
                        sel->info.writes_layer || sel->info.writes_viewport_index;
-   return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
+   return S_02881C_USE_VTX_POINT_SIZE(writes_psize) |
           S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
           S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
           S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
           S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
           S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
           S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
@@ -1219,7 +1225,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
       S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) |
       /* Reuse for NGG. */
       S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
       S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) |
       /* Reuse for NGG. */
       S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
-   shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
+   shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true);
 
    /* Oversubscribe PC. This improves performance when there are too many varyings. */
    float oversub_pc_factor = 0.25;
 
    /* Oversubscribe PC. This improves performance when there are too many varyings. */
    float oversub_pc_factor = 0.25;
@@ -1425,7 +1431,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                                                                   : V_02870C_SPI_SHADER_NONE);
    shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) |
                                     S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1);
                                                                   : V_02870C_SPI_SHADER_NONE);
    shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) |
                                     S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1);
-   shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false);
+   shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, false);
 
    oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
 
 
    oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
 
@@ -1789,6 +1795,13 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad
 
    if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
       key->mono.u.vs_export_prim_id = 1;
 
    if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
       key->mono.u.vs_export_prim_id = 1;
+
+   /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */
+   if (sctx->chip_class >= GFX10 &&
+       vs->info.writes_psize &&
+       sctx->current_rast_prim != PIPE_PRIM_POINTS &&
+       !sctx->queued.named.rasterizer->polygon_mode_is_points)
+      key->opt.kill_pointsize = 1;
 }
 
 /* Compute the key for the hw shader variant */
 }
 
 /* Compute the key for the hw shader variant */
@@ -2743,7 +2756,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
    /* PA_CL_VS_OUT_CNTL */
    if (sctx->chip_class <= GFX9)
 
    /* PA_CL_VS_OUT_CNTL */
    if (sctx->chip_class <= GFX9)
-      sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
+      sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false);
 
    sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
                            u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
 
    sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
                            u_bit_consecutive(0, sel->info.base.clip_distance_array_size);