emitted_prims = nggso.emit[0];
}
+ /* Copy Primitive IDs from GS threads to the LDS address corresponding
+ * to the ES thread of the provoking vertex.
+ */
+ if (ctx->type == PIPE_SHADER_VERTEX &&
+ ctx->shader->key.mono.u.vs_export_prim_id) {
+ /* Streamout uses LDS. We need to wait for it before we can reuse it. */
+ if (sel->so.num_outputs)
+ ac_build_s_barrier(&ctx->ac);
+
+ ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
+ /* Extract the PROVOKING_VTX_INDEX field. */
+ LLVMValueRef provoking_vtx_in_prim =
+ si_unpack_param(ctx, ctx->param_vs_state_bits, 4, 2);
+
+ /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
+ LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
+ LLVMValueRef provoking_vtx_index =
+ LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
+
+ LLVMBuildStore(builder, ctx->abi.gs_prim_id,
+ ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
+ ac_build_endif(&ctx->ac, 5400);
+ }
+
/* TODO: primitive culling */
build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
}
}
- /* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
- if (ctx->type == PIPE_SHADER_TESS_EVAL &&
- ctx->shader->key.mono.u.vs_export_prim_id) {
+ if (ctx->shader->key.mono.u.vs_export_prim_id) {
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
outputs[i].semantic_index = 0;
- outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
+
+ if (ctx->type == PIPE_SHADER_VERTEX) {
+ /* Wait for GS stores to finish. */
+ ac_build_s_barrier(&ctx->ac);
+
+ tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring,
+ get_thread_id_in_tg(ctx));
+ outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
+ } else {
+ assert(ctx->type == PIPE_SHADER_TESS_EVAL);
+ outputs[i].values[0] = si_get_primitive_id(ctx, 0);
+ }
+
+ outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]);
for (unsigned j = 1; j < 4; j++)
outputs[i].values[j] = LLVMGetUndef(ctx->f32);
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
+ ctx->last_flatshade_first = -1;
ctx->last_sc_line_stipple = ~0;
ctx->last_vs_state = ~0;
ctx->last_ls = NULL;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
+ int last_flatshade_first;
unsigned last_sc_line_stipple;
unsigned current_vs_state;
unsigned last_vs_state;
esgs_ring_size = shader->gs_info.esgs_ring_size;;
}
- if (sel && shader->key.as_ngg && sel->so.num_outputs) {
- unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
- esgs_ring_size = MAX2(esgs_ring_size,
- shader->ngg.max_out_verts * esgs_vertex_bytes);
+ if (sel && shader->key.as_ngg) {
+ if (sel->so.num_outputs) {
+ unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1);
+ esgs_ring_size = MAX2(esgs_ring_size,
+ shader->ngg.max_out_verts * esgs_vertex_bytes);
+ }
+
+ /* GS stores Primitive IDs into LDS at the address corresponding
+ * to the provoking vertex. All vertex threads load and export
+ * PrimitiveID for their thread.
+ */
+ if (sel->type == PIPE_SHADER_VERTEX &&
+ shader->key.mono.u.vs_export_prim_id)
+ esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * 4);
}
if (esgs_ring_size) {
};
/* Fields of driver-defined VS state SGPR. */
-/* Clamp vertex color output (only used in VS as VS). */
#define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x) & 0x1) << 0)
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
#define C_VS_STATE_INDEXED 0xFFFFFFFD
#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
+#define S_VS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x) & 0x3) << 4)
+#define C_VS_STATE_PROVOKING_VTX_INDEX 0xFFFFFFCF
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
rs->clamp_fragment_color = state->clamp_fragment_color;
rs->clamp_vertex_color = state->clamp_vertex_color;
rs->flatshade = state->flatshade;
+ rs->flatshade_first = state->flatshade_first;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->rasterizer_discard = state->rasterizer_discard;
rs->pa_sc_line_stipple = state->line_stipple_enable ?
unsigned clip_plane_enable:8;
unsigned half_pixel_center:1;
unsigned flatshade:1;
+ unsigned flatshade_first:1;
unsigned two_side:1;
unsigned multisample_enable:1;
unsigned force_persample_interp:1;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
if (likely(rast_prim == sctx->last_rast_prim &&
- rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
+ rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
+ (sctx->chip_class <= GFX9 ||
+ rs->flatshade_first == sctx->last_flatshade_first)))
return;
if (util_prim_is_lines(rast_prim)) {
sctx->context_roll = true;
}
+ unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
+
if (rast_prim != sctx->last_rast_prim &&
(sctx->ngg || sctx->gs_shader.cso)) {
- unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
sctx->context_roll = true;
}
}
+ if (sctx->chip_class >= GFX10) {
+ unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
+ sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
+ sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
+ }
+
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
+ sctx->last_flatshade_first = rs->flatshade_first;
}
static void si_emit_vs_state(struct si_context *sctx,