pos_args[0].out[3] = ctx->ac.f32_1; /* W */
}
+ bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
/* Write the misc vector (point size, edgeflag, layer, viewport). */
- if (shader->selector->info.writes_psize || pos_writes_edgeflag ||
+ if (writes_psize || pos_writes_edgeflag ||
shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
- pos_args[1].enabled_channels = shader->selector->info.writes_psize |
+ pos_args[1].enabled_channels = writes_psize |
(pos_writes_edgeflag << 1) |
(shader->selector->info.writes_layer << 2);
pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
pos_args[1].out[3] = ctx->ac.f32_0; /* W */
- if (shader->selector->info.writes_psize)
+ if (writes_psize)
pos_args[1].out[0] = psize_value;
if (pos_writes_edgeflag) {
rs->polygon_mode_is_lines =
(state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
(state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
+ rs->polygon_mode_is_points =
+ (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||
+ (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));
rs->pa_sc_line_stipple = state->line_stipple_enable
? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
S_028A0C_REPEAT_COUNT(state->line_stipple_factor)
old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||
old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
- old_rs->force_persample_interp != rs->force_persample_interp)
+ old_rs->force_persample_interp != rs->force_persample_interp ||
+ old_rs->polygon_mode_is_points != rs->polygon_mode_is_points)
sctx->do_update_shaders = true;
}
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
}
-static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg)
+static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
+ const struct si_shader *shader, bool ngg)
{
- bool misc_vec_ena = sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+ bool writes_psize = sel->info.writes_psize;
+
+ if (shader)
+ writes_psize &= !shader->key.opt.kill_pointsize;
+
+ bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
sel->info.writes_layer || sel->info.writes_viewport_index;
- return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
+ return S_02881C_USE_VTX_POINT_SIZE(writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) |
/* Reuse for NGG. */
S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
- shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
+ shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true);
/* Oversubscribe PC. This improves performance when there are too many varyings. */
float oversub_pc_factor = 0.25;
: V_02870C_SPI_SHADER_NONE);
shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) |
S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1);
- shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false);
+ shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, false);
oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->mono.u.vs_export_prim_id = 1;
+
+ /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */
+ if (sctx->chip_class >= GFX10 &&
+ vs->info.writes_psize &&
+ sctx->current_rast_prim != PIPE_PRIM_POINTS &&
+ !sctx->queued.named.rasterizer->polygon_mode_is_points)
+ key->opt.kill_pointsize = 1;
}
/* Compute the key for the hw shader variant */
/* PA_CL_VS_OUT_CNTL */
if (sctx->chip_class <= GFX9)
- sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
+ sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false);
sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
u_bit_consecutive(0, sel->info.base.clip_distance_array_size);