From b680f723f8c2edd3382631d3481bfcb58d2952a5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 5 Jun 2019 20:20:47 -0400 Subject: [PATCH] radeonsi/gfx10: export correct PrimitiveID from NGG vertex shaders Acked-by: Bas Nieuwenhuizen --- .../drivers/radeonsi/gfx10_shader_ngg.c | 43 +++++++++++++++++-- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 18 ++++++-- src/gallium/drivers/radeonsi/si_shader.h | 3 +- src/gallium/drivers/radeonsi/si_state.c | 1 + src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 14 +++++- 8 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 8461a39488e..8fbce10012f 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -616,6 +616,30 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, emitted_prims = nggso.emit[0]; } + /* Copy Primitive IDs from GS threads to the LDS address corresponding + * to the ES thread of the provoking vertex. + */ + if (ctx->type == PIPE_SHADER_VERTEX && + ctx->shader->key.mono.u.vs_export_prim_id) { + /* Streamout uses LDS. We need to wait for it before we can reuse it. */ + if (sel->so.num_outputs) + ac_build_s_barrier(&ctx->ac); + + ac_build_ifcc(&ctx->ac, is_gs_thread, 5400); + /* Extract the PROVOKING_VTX_INDEX field. */ + LLVMValueRef provoking_vtx_in_prim = + si_unpack_param(ctx, ctx->param_vs_state_bits, 4, 2); + + /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */ + LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3); + LLVMValueRef provoking_vtx_index = + LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, ""); + + LLVMBuildStore(builder, ctx->abi.gs_prim_id, + ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index)); + ac_build_endif(&ctx->ac, 5400); + } + /* TODO: primitive culling */ build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); @@ -700,12 +724,23 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, } } - /* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */ - if (ctx->type == PIPE_SHADER_TESS_EVAL && - ctx->shader->key.mono.u.vs_export_prim_id) { + if (ctx->shader->key.mono.u.vs_export_prim_id) { outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID; outputs[i].semantic_index = 0; - outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0)); + + if (ctx->type == PIPE_SHADER_VERTEX) { + /* Wait for GS stores to finish. */ + ac_build_s_barrier(&ctx->ac); + + tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring, + get_thread_id_in_tg(ctx)); + outputs[i].values[0] = LLVMBuildLoad(builder, tmp, ""); + } else { + assert(ctx->type == PIPE_SHADER_TESS_EVAL); + outputs[i].values[0] = si_get_primitive_id(ctx, 0); + } + + outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]); for (unsigned j = 1; j < 4; j++) outputs[i].values[j] = LLVMGetUndef(ctx->f32); diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index f32e64ea570..277a25a0b3e 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -441,6 +441,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->last_prim = -1; ctx->last_multi_vgt_param = -1; ctx->last_rast_prim = -1; + ctx->last_flatshade_first = -1; ctx->last_sc_line_stipple = ~0; ctx->last_vs_state = ~0; ctx->last_ls = NULL; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a9080c93505..298e63738c4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1059,6 +1059,7 @@ struct si_context { int last_prim; int last_multi_vgt_param; int last_rast_prim; + int last_flatshade_first; unsigned last_sc_line_stipple; unsigned current_vs_state; unsigned last_vs_state; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 032b5a7bd8b..be3b897c791 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5228,10 +5228,20 @@ static bool si_shader_binary_open(struct si_screen *screen, esgs_ring_size = shader->gs_info.esgs_ring_size;; } - if (sel && shader->key.as_ngg && sel->so.num_outputs) { - unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1); - esgs_ring_size = MAX2(esgs_ring_size, - shader->ngg.max_out_verts * esgs_vertex_bytes); + if (sel && shader->key.as_ngg) { + if (sel->so.num_outputs) { + unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1); + esgs_ring_size = MAX2(esgs_ring_size, + shader->ngg.max_out_verts * esgs_vertex_bytes); + } + + /* GS stores Primitive IDs into LDS at the address corresponding + * to the provoking vertex. All vertex threads load and export + * PrimitiveID for their thread. + */ + if (sel->type == PIPE_SHADER_VERTEX && + shader->key.mono.u.vs_export_prim_id) + esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * 4); } if (esgs_ring_size) { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index b545bf1bc23..801895b240c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -241,13 +241,14 @@ enum { }; /* Fields of driver-defined VS state SGPR. */ -/* Clamp vertex color output (only used in VS as VS). */ #define S_VS_STATE_CLAMP_VERTEX_COLOR(x) (((unsigned)(x) & 0x1) << 0) #define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE #define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1) #define C_VS_STATE_INDEXED 0xFFFFFFFD #define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2) #define C_VS_STATE_OUTPRIM 0xFFFFFFF3 +#define S_VS_STATE_PROVOKING_VTX_INDEX(x) (((unsigned)(x) & 0x3) << 4) +#define C_VS_STATE_PROVOKING_VTX_INDEX 0xFFFFFFCF #define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8) #define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e9388e6252c..9f3e08675ac 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -892,6 +892,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->clamp_fragment_color = state->clamp_fragment_color; rs->clamp_vertex_color = state->clamp_vertex_color; rs->flatshade = state->flatshade; + rs->flatshade_first = state->flatshade_first; rs->sprite_coord_enable = state->sprite_coord_enable; rs->rasterizer_discard = state->rasterizer_discard; rs->pa_sc_line_stipple = state->line_stipple_enable ? diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 4493969037c..91b4f1ea13e 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -74,6 +74,7 @@ struct si_state_rasterizer { unsigned clip_plane_enable:8; unsigned half_pixel_center:1; unsigned flatshade:1; + unsigned flatshade_first:1; unsigned two_side:1; unsigned multisample_enable:1; unsigned force_persample_interp:1; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index eddfdd65da2..2f142bc6781 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -586,7 +586,9 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx) struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; if (likely(rast_prim == sctx->last_rast_prim && - rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)) + rs->pa_sc_line_stipple == sctx->last_sc_line_stipple && + (sctx->chip_class <= GFX9 || + rs->flatshade_first == sctx->last_flatshade_first))) return; if (util_prim_is_lines(rast_prim)) { @@ -599,9 +601,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx) sctx->context_roll = true; } + unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim); + if (rast_prim != sctx->last_rast_prim && (sctx->ngg || sctx->gs_shader.cso)) { - unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim); radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); sctx->context_roll = true; @@ -611,8 +614,15 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx) } } + if (sctx->chip_class >= GFX10) { + unsigned vtx_index = rs->flatshade_first ? 0 : gs_out; + sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX; + sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index); + } + sctx->last_rast_prim = rast_prim; sctx->last_sc_line_stipple = rs->pa_sc_line_stipple; + sctx->last_flatshade_first = rs->flatshade_first; } static void si_emit_vs_state(struct si_context *sctx, -- 2.30.2