From: Samuel Pitoiset Date: Wed, 26 Jun 2019 14:35:44 +0000 (+0200) Subject: radv: only export clip/cull distances if PS reads them X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d5004f60be7e5f950f5bd8fc8ced75f7a6dc3421;p=mesa.git radv: only export clip/cull distances if PS reads them The only exception is the GS copy shader which emits them unconditionally. Totals from affected shaders: SGPRS: 71320 -> 71008 (-0.44 %) VGPRS: 54372 -> 54240 (-0.24 %) Code Size: 2952628 -> 2941368 (-0.38 %) bytes Max Waves: 9689 -> 9723 (0.35 %) This helps Dota2, Doom, GTAV and Hitman 2. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index d6f286fe4ec..ead4e379a82 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2861,6 +2861,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) static void handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, bool export_layer_id, + bool export_clip_dists, struct radv_vs_output_info *outinfo) { uint32_t param_count = 0; @@ -2917,9 +2918,11 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], &args, sizeof(args)); - /* Export the clip/cull distances values to the next stage. */ - radv_export_param(ctx, param_count, &slots[0], 0xf); - outinfo->vs_output_param_offset[location] = param_count++; + if (export_clip_dists) { + /* Export the clip/cull distances values to the next stage. */ + radv_export_param(ctx, param_count, &slots[0], 0xf); + outinfo->vs_output_param_offset[location] = param_count++; + } } } @@ -3446,6 +3449,7 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, else handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, ctx->options->key.vs.export_layer_id, + ctx->options->key.vs.export_clip_dists, &ctx->shader_info->vs.outinfo); break; case MESA_SHADER_FRAGMENT: @@ -3463,6 +3467,7 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, else handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, ctx->options->key.tes.export_layer_id, + ctx->options->key.tes.export_clip_dists, &ctx->shader_info->tes.outinfo); break; default: @@ -4117,7 +4122,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) radv_emit_streamout(ctx, stream); if (stream == 0) { - handle_vs_outputs_post(ctx, false, false, + handle_vs_outputs_post(ctx, false, false, true, &ctx->shader_info->vs.outinfo); } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e61f9647596..985627be398 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2241,10 +2241,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline, pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.prim_id_input; keys[MESA_SHADER_VERTEX].vs.export_layer_id = pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.layer_input; + keys[MESA_SHADER_VERTEX].vs.export_clip_dists = + !!pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.num_input_clips_culls; keys[MESA_SHADER_TESS_EVAL].tes.export_prim_id = pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.prim_id_input; keys[MESA_SHADER_TESS_EVAL].tes.export_layer_id = pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.layer_input; + keys[MESA_SHADER_TESS_EVAL].tes.export_clip_dists = + !!pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.num_input_clips_culls; } if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index bfd2787a123..17779cd0c42 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -81,12 +81,14 @@ struct radv_vs_variant_key { uint32_t as_ls:1; uint32_t export_prim_id:1; uint32_t export_layer_id:1; + uint32_t export_clip_dists:1; }; struct radv_tes_variant_key { uint32_t as_es:1; uint32_t export_prim_id:1; uint32_t export_layer_id:1; + uint32_t export_clip_dists:1; uint8_t num_patches; uint8_t tcs_num_outputs; };