From 9fd9a7d0ba39ed2328b1d48cd8ae83f070202f51 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 22 Apr 2017 18:04:00 +0200 Subject: [PATCH] radeonsi: remove VS epilog code, compile VS with PrimID export on demand MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The use of PrimID in the pixel shader is too rare to deserve such a sizable support code. The initial idea of the VS epilog was to move the clipping code there and remove it based on states, but optimized variants are now used to do that and are easier to support, so the VS epilog has turned out to be not so useful. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_pipe.c | 1 - src/gallium/drivers/radeonsi/si_pipe.h | 11 - src/gallium/drivers/radeonsi/si_shader.c | 208 +++--------------- src/gallium/drivers/radeonsi/si_shader.h | 15 +- .../drivers/radeonsi/si_state_shaders.c | 6 +- 5 files changed, 31 insertions(+), 210 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 296732dce8e..47d170af0ae 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -699,7 +699,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen) struct si_screen *sscreen = (struct si_screen *)pscreen; struct si_shader_part *parts[] = { sscreen->vs_prologs, - sscreen->vs_epilogs, sscreen->tcs_epilogs, sscreen->gs_prologs, sscreen->ps_prologs, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 918aa0f0717..ea61e1e4e7e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -87,7 +87,6 @@ struct si_screen { mtx_t shader_parts_mutex; struct si_shader_part *vs_prologs; - struct si_shader_part *vs_epilogs; struct si_shader_part *tcs_epilogs; struct si_shader_part *gs_prologs; struct si_shader_part *ps_prologs; @@ -509,16 +508,6 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx) return sctx->vs_shader.current; } -static inline bool si_vs_exports_prim_id(struct si_shader *shader) -{ - if (shader->selector->type == PIPE_SHADER_VERTEX) - return shader->key.part.vs.epilog.export_prim_id; - else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) - return shader->key.part.tes.epilog.export_prim_id; - else - return false; -} - static inline unsigned si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size) { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 59abdb755e1..a5d7373bdd4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -76,8 +76,6 @@ static unsigned llvm_get_type_size(LLVMTypeRef type); static void si_build_vs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key); -static void si_build_vs_epilog_function(struct si_shader_context *ctx, - union si_shader_part_key *key); static void si_build_tcs_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key); static void si_build_ps_prolog_function(struct si_shader_context *ctx, @@ -90,11 +88,6 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, */ #define PS_EPILOG_SAMPLEMASK_MIN_LOC 13 -/* The VS location of the PrimitiveID input is the same in the epilog, - * so that the main shader part doesn't have to move it. - */ -#define VS_EPILOG_PRIMID_LOC 2 - enum { CONST_ADDR_SPACE = 2, LOCAL_ADDR_SPACE = 3, @@ -2990,19 +2983,25 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3; } - } - /* Return the primitive ID from the LLVM function. */ - ctx->return_value = - LLVMBuildInsertValue(gallivm->builder, - ctx->return_value, - bitcast(bld_base, TGSI_TYPE_FLOAT, - get_primitive_id(bld_base, 0)), - VS_EPILOG_PRIMID_LOC, ""); - if (ctx->shader->selector->so.num_outputs) si_llvm_emit_streamout(ctx, outputs, i, 0); + + /* Export PrimitiveID. */ + if (ctx->shader->key.mono.vs_export_prim_id) { + outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID; + outputs[i].semantic_index = 0; + outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, + get_primitive_id(bld_base, 0)); + for (j = 1; j < 4; j++) + outputs[i].values[j] = LLVMConstReal(ctx->f32, 0); + + memset(outputs[i].vertex_stream, 0, + sizeof(outputs[i].vertex_stream)); + i++; + } + si_llvm_export_vs(bld_base, outputs, i); FREE(outputs); } @@ -5924,13 +5923,6 @@ static void create_function(struct si_shader_context *ctx) /* VGPRs */ declare_vs_input_vgprs(ctx, params, &num_params, &num_prolog_vgprs); - - /* PrimitiveID output. */ - if (!shader->is_gs_copy_shader && - !shader->key.as_es && !shader->key.as_ls) { - for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) - returns[num_returns++] = ctx->f32; - } break; case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */ @@ -6084,11 +6076,6 @@ static void create_function(struct si_shader_context *ctx) /* VGPRs */ declare_tes_input_vgprs(ctx, params, &num_params); - - /* PrimitiveID output. */ - if (!shader->key.as_es) - for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) - returns[num_returns++] = ctx->f32; break; case PIPE_SHADER_GEOMETRY: @@ -7045,8 +7032,8 @@ static void si_dump_shader_key(unsigned processor, struct si_shader *shader, "part.vs.prolog", f); fprintf(f, " as_es = %u\n", key->as_es); fprintf(f, " as_ls = %u\n", key->as_ls); - fprintf(f, " part.vs.epilog.export_prim_id = %u\n", - key->part.vs.epilog.export_prim_id); + fprintf(f, " mono.vs_export_prim_id = %u\n", + key->mono.vs_export_prim_id); break; case PIPE_SHADER_TESS_CTRL: @@ -7059,8 +7046,9 @@ static void si_dump_shader_key(unsigned processor, struct si_shader *shader, break; case PIPE_SHADER_TESS_EVAL: - fprintf(f, " part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id); fprintf(f, " as_es = %u\n", key->as_es); + fprintf(f, " mono.vs_export_prim_id = %u\n", + key->mono.vs_export_prim_id); break; case PIPE_SHADER_GEOMETRY: @@ -7388,28 +7376,6 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, shader_out->info.uses_instanceid = true; } -/** - * Compute the VS epilog key, which contains all the information needed to - * build the VS epilog function, and set the PrimitiveID output offset. - */ -static void si_get_vs_epilog_key(struct si_shader *shader, - struct si_vs_epilog_bits *states, - union si_shader_part_key *key) -{ - memset(key, 0, sizeof(*key)); - key->vs_epilog.states = *states; - - /* Set up the PrimitiveID output. */ - if (shader->key.part.vs.epilog.export_prim_id) { - unsigned index = shader->selector->info.num_outputs; - unsigned offset = shader->info.nr_param_exports++; - - key->vs_epilog.prim_id_param_offset = offset; - assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset)); - shader->info.vs_output_param_offset[index] = offset; - } -} - /** * Compute the PS prolog key, which contains all the information needed to * build the PS prolog function, and set related bits in shader->config. @@ -7962,14 +7928,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, } if (is_monolithic && ctx.type == PIPE_SHADER_VERTEX) { - LLVMValueRef parts[3]; - bool need_prolog; - bool need_epilog; - - need_prolog = sel->vs_needs_prolog; - need_epilog = !shader->key.as_es && !shader->key.as_ls; + LLVMValueRef parts[2]; + bool need_prolog = sel->vs_needs_prolog; - parts[need_prolog ? 1 : 0] = ctx.main_fn; + parts[1] = ctx.main_fn; if (need_prolog) { union si_shader_part_key prolog_key; @@ -7981,15 +7943,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, parts[0] = ctx.main_fn; } - if (need_epilog) { - union si_shader_part_key epilog_key; - si_get_vs_epilog_key(shader, &shader->key.part.vs.epilog, &epilog_key); - si_build_vs_epilog_function(&ctx, &epilog_key); - parts[need_prolog ? 2 : 1] = ctx.main_fn; - } - - si_build_wrapper_function(&ctx, parts, 1 + need_prolog + need_epilog, - need_prolog ? 1 : 0, 0); + si_build_wrapper_function(&ctx, parts + !need_prolog, + 1 + need_prolog, need_prolog, 0); } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) { if (sscreen->b.chip_class >= GFX9) { struct si_shader_selector *ls = shader->key.part.tcs.ls; @@ -8053,18 +8008,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_build_wrapper_function(&ctx, parts, 2, 0, 0); } - } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_EVAL && - !shader->key.as_es) { - LLVMValueRef parts[2]; - union si_shader_part_key epilog_key; - - parts[0] = ctx.main_fn; - - si_get_vs_epilog_key(shader, &shader->key.part.tes.epilog, &epilog_key); - si_build_vs_epilog_function(&ctx, &epilog_key); - parts[1] = ctx.main_fn; - - si_build_wrapper_function(&ctx, parts, 2, 0, 0); } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) { if (ctx.screen->b.chip_class >= GFX9) { struct si_shader_selector *es = shader->key.part.gs.es; @@ -8464,57 +8407,6 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, si_llvm_build_ret(ctx, ret); } -/** - * Build the vertex shader epilog function. This is also used by the tessellation - * evaluation shader compiled as VS. - * - * The input is PrimitiveID. - * - * If PrimitiveID is required by the pixel shader, export it. - * Otherwise, do nothing. - */ -static void si_build_vs_epilog_function(struct si_shader_context *ctx, - union si_shader_part_key *key) -{ - struct gallivm_state *gallivm = &ctx->gallivm; - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - LLVMTypeRef params[5]; - int num_params, i; - - /* Declare input VGPRs. */ - num_params = key->vs_epilog.states.export_prim_id ? - (VS_EPILOG_PRIMID_LOC + 1) : 0; - assert(num_params <= ARRAY_SIZE(params)); - - for (i = 0; i < num_params; i++) - params[i] = ctx->f32; - - /* Create the function. */ - si_create_function(ctx, "vs_epilog", NULL, 0, params, num_params, -1); - - /* Emit exports. */ - if (key->vs_epilog.states.export_prim_id) { - struct lp_build_context *base = &bld_base->base; - struct ac_export_args args; - - args.enabled_channels = 0x1; /* enabled channels */ - args.valid_mask = 0; /* whether the EXEC mask is valid */ - args.done = 0; /* DONE bit */ - args.target = V_008DFC_SQ_EXP_PARAM + - key->vs_epilog.prim_id_param_offset; - args.compr = 0; /* COMPR flag (0 = 32-bit export) */ - args.out[0] = LLVMGetParam(ctx->main_fn, - VS_EPILOG_PRIMID_LOC); /* X */ - args.out[1] = base->undef; /* Y */ - args.out[2] = base->undef; /* Z */ - args.out[3] = base->undef; /* W */ - - ac_build_export(&ctx->ac, &args); - } - - LLVMBuildRetVoid(gallivm->builder); -} - static bool si_get_vs_prolog(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, @@ -8541,27 +8433,6 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, return shader->prolog != NULL; } -/** - * Create & compile a vertex shader epilog. This a helper used by VS and TES. - */ -static bool si_get_vs_epilog(struct si_screen *sscreen, - LLVMTargetMachineRef tm, - struct si_shader *shader, - struct pipe_debug_callback *debug, - struct si_vs_epilog_bits *states) -{ - union si_shader_part_key epilog_key; - - si_get_vs_epilog_key(shader, states, &epilog_key); - - shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs, - PIPE_SHADER_VERTEX, true, - &epilog_key, tm, debug, - si_build_vs_epilog_function, - "Vertex Shader Epilog"); - return shader->epilog != NULL; -} - /** * Select and compile (or reuse) vertex shader parts (prolog & epilog). */ @@ -8570,33 +8441,8 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug) { - if (!si_get_vs_prolog(sscreen, tm, shader, debug, shader, - &shader->key.part.vs.prolog)) - return false; - - /* Get the epilog. */ - if (!shader->key.as_es && !shader->key.as_ls && - !si_get_vs_epilog(sscreen, tm, shader, debug, - &shader->key.part.vs.epilog)) - return false; - - return true; -} - -/** - * Select and compile (or reuse) TES parts (epilog). - */ -static bool si_shader_select_tes_parts(struct si_screen *sscreen, - LLVMTargetMachineRef tm, - struct si_shader *shader, - struct pipe_debug_callback *debug) -{ - if (shader->key.as_es) - return true; - - /* TES compiled as VS. */ - return si_get_vs_epilog(sscreen, tm, shader, debug, - &shader->key.part.tes.epilog); + return si_get_vs_prolog(sscreen, tm, shader, debug, shader, + &shader->key.part.vs.prolog); } /** @@ -9260,8 +9106,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, return -1; break; case PIPE_SHADER_TESS_EVAL: - if (!si_shader_select_tes_parts(sscreen, tm, shader, debug)) - return -1; break; case PIPE_SHADER_GEOMETRY: if (!si_shader_select_gs_parts(sscreen, tm, shader, debug)) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1eb9c0bc29d..ad50df92327 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -390,11 +390,6 @@ struct si_vs_prolog_bits { unsigned instance_divisors[SI_MAX_ATTRIBS]; }; -/* Common VS bits between the shader key and the epilog key. */ -struct si_vs_epilog_bits { - unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ -}; - /* Common TCS bits between the shader key and the epilog key. */ struct si_tcs_epilog_bits { unsigned prim_mode:3; @@ -441,10 +436,6 @@ union si_shader_part_key { /* Prologs for monolithic shaders shouldn't set EXEC. */ unsigned is_monolithic:1; } vs_prolog; - struct { - struct si_vs_epilog_bits states; - unsigned prim_id_param_offset:5; - } vs_epilog; struct { struct si_tcs_epilog_bits states; } tcs_epilog; @@ -479,16 +470,12 @@ struct si_shader_key { union { struct { struct si_vs_prolog_bits prolog; - struct si_vs_epilog_bits epilog; } vs; struct { struct si_vs_prolog_bits ls_prolog; /* for merged LS-HS */ struct si_shader_selector *ls; /* for merged LS-HS */ struct si_tcs_epilog_bits epilog; } tcs; /* tessellation control shader */ - struct { - struct si_vs_epilog_bits epilog; /* same as VS */ - } tes; /* tessellation evaluation shader */ struct { struct si_vs_prolog_bits vs_prolog; /* for merged ES-GS */ struct si_shader_selector *es; /* for merged ES-GS */ @@ -511,6 +498,8 @@ struct si_shader_key { /* One byte for every input: SI_FIX_FETCH_* enums. */ uint8_t vs_fix_fetch[SI_MAX_ATTRIBS]; uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */ + /* When PS needs PrimID and GS is disabled. */ + unsigned vs_export_prim_id:1; } mono; /* Optimization flags for asynchronous compilation only. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6b910778536..2150eb9896c 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -853,7 +853,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, unsigned oc_lds_en; unsigned window_space = shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; - bool enable_prim_id = si_vs_exports_prim_id(shader); + bool enable_prim_id = shader->key.mono.vs_export_prim_id; pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -1271,7 +1271,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, si_shader_selector_key_hw_vs(sctx, sel, key); if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->part.vs.epilog.export_prim_id = 1; + key->mono.vs_export_prim_id = 1; } break; case PIPE_SHADER_TESS_CTRL: @@ -1296,7 +1296,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, si_shader_selector_key_hw_vs(sctx, sel, key); if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->part.tes.epilog.export_prim_id = 1; + key->mono.vs_export_prim_id = 1; } break; case PIPE_SHADER_GEOMETRY: -- 2.30.2