From e7f4cadd02ca88fd3af5a396c71a33e91088228b Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 12 Nov 2019 15:38:46 +0100 Subject: [PATCH] radv: Replace supports_spill with explict_scratch_args The former was always true and hence dead code. We will want to explicitly declare the ring offset register with ACO, but we also want to declare the scratch offset too, and we can't try to disable it since ACO also supports spilling and the determination of whether spilling has to happen occurs well after setting up registers. So replace supports_spill with something that will actually be used for ACO. Reviewed-by: Samuel Pitoiset --- .../aco_instruction_selection_setup.cpp | 22 ++----- src/amd/vulkan/radv_nir_to_llvm.c | 14 ++--- src/amd/vulkan/radv_shader.c | 5 +- src/amd/vulkan/radv_shader.h | 2 +- src/amd/vulkan/radv_shader_args.c | 59 +++++++++++-------- src/amd/vulkan/radv_shader_args.h | 1 + 6 files changed, 49 insertions(+), 54 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index c3f2832fa8f..fbab89417cd 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -95,9 +95,6 @@ struct isel_context { bool exec_potentially_empty = false; } cf_info; - /* scratch */ - bool scratch_enabled = false; - /* inputs common for merged stages */ Temp merged_wave_info = Temp(0, s1); @@ -639,8 +636,7 @@ static void allocate_user_sgprs(isel_context *ctx, user_sgpr_info.need_ring_offsets = true; /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) - user_sgpr_count += 2; + user_sgpr_count += 2; switch (ctx->stage) { case vertex_vs: @@ -895,10 +891,8 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) arg_info args = {}; /* this needs to be in sgprs 0 and 1 */ - if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) { - add_arg(&args, s2, &ctx->program->private_segment_buffer, 0); - set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx); - } + add_arg(&args, s2, &ctx->program->private_segment_buffer, 0); + set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx); unsigned vgpr_idx = 0; switch (ctx->stage) { @@ -928,8 +922,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) else declare_streamout_sgprs(ctx, &args, &idx); - if (ctx->options->supports_spill || ctx->scratch_enabled) - add_arg(&args, s1, &ctx->program->scratch_offset, idx++); + add_arg(&args, s1, &ctx->program->scratch_offset, idx++); declare_vs_input_vgprs(ctx, &args); break; @@ -940,8 +933,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr); add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx); - if (ctx->options->supports_spill || ctx->scratch_enabled) - add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1); + add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1); ctx->program->config->spi_ps_input_addr = 0; ctx->program->config->spi_ps_input_ena = 0; @@ -1004,8 +996,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) if (ctx->program->info->cs.uses_local_invocation_idx) add_arg(&args, s1, &ctx->tg_size, idx++); - if (ctx->options->supports_spill || ctx->scratch_enabled) - add_arg(&args, s1, &ctx->program->scratch_offset, idx++); + add_arg(&args, s1, &ctx->program->scratch_offset, idx++); add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++); break; @@ -1357,7 +1348,6 @@ setup_isel_context(Program* program, unsigned scratch_size = 0; for (unsigned i = 0; i < shader_count; i++) scratch_size = std::max(scratch_size, shaders[i]->scratch_size); - ctx.scratch_enabled = scratch_size > 0; ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024); ctx.block = ctx.program->create_and_insert_block(); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 11f983974d6..5d87b9a675a 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -411,15 +411,11 @@ static void create_function(struct radv_shader_context *ctx, ctx->max_workgroup_size, ctx->args->options); - if (ctx->args->options->supports_spill) { - ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), - NULL, 0, AC_FUNC_ATTR_READNONE); - ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, - ac_array_in_const_addr_space(ctx->ac.v4i32), ""); - } else if (ctx->args->ring_offsets.used) { - ctx->ring_offsets = ac_get_arg(&ctx->ac, ctx->args->ring_offsets); - } + ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", + LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), + NULL, 0, AC_FUNC_ATTR_READNONE); + ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, + ac_array_in_const_addr_space(ctx->ac.v4i32), ""); load_descriptor_sets(ctx); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 81526c7eca0..a7253976f67 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1118,8 +1118,7 @@ shader_variant_compile(struct radv_device *device, struct ac_llvm_compiler ac_llvm; bool thread_compiler; - if (options->supports_spill) - tm_options |= AC_TM_SUPPORTS_SPILL; + tm_options |= AC_TM_SUPPORTS_SPILL; if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) tm_options |= AC_TM_SISCHED; if (options->check_ir) @@ -1200,7 +1199,7 @@ radv_shader_variant_compile(struct radv_device *device, if (key) options.key = *key; - options.supports_spill = true; + options.explicit_scratch_args = use_aco; options.robust_buffer_access = device->robust_buffer_access; return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info, diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0dde52e1303..7ffce47bdd8 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -125,7 +125,7 @@ struct radv_shader_variant_key { struct radv_nir_compiler_options { struct radv_pipeline_layout *layout; struct radv_shader_variant_key key; - bool supports_spill; + bool explicit_scratch_args; bool clamp_shadow_reference; bool robust_buffer_access; bool dump_shader; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 949b91dcf94..bcec3e9d2e7 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -72,7 +72,6 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) } struct user_sgpr_info { - bool need_ring_offsets; bool indirect_all_descriptor_sets; uint8_t remaining_sgprs; }; @@ -168,22 +167,8 @@ static void allocate_user_sgprs(struct radv_shader_args *args, memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); - /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ - if (stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL || - args->is_gs_copy_shader) - user_sgpr_info->need_ring_offsets = true; - - if (stage == MESA_SHADER_FRAGMENT && - args->shader_info->ps.needs_sample_positions) - user_sgpr_info->need_ring_offsets = true; - - /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_count += 2; - } + /* 2 user sgprs will always be allocated for scratch/rings */ + user_sgpr_count += 2; switch (stage) { case MESA_SHADER_COMPUTE: @@ -464,7 +449,7 @@ radv_declare_shader_args(struct radv_shader_args *args, allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index, &user_sgpr_info); - if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) { + if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); } @@ -490,6 +475,11 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->ac.tg_size); } + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } + ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids); break; @@ -513,6 +503,11 @@ radv_declare_shader_args(struct radv_shader_args *args, declare_streamout_sgprs(args, stage); } + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } + declare_vs_input_vgprs(args); break; case MESA_SHADER_TESS_CTRL: @@ -524,7 +519,7 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tess_factor_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown @@ -556,6 +551,10 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tess_factor_offset); + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, @@ -578,6 +577,10 @@ radv_declare_shader_args(struct radv_shader_args *args, declare_streamout_sgprs(args, stage); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); } + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } declare_tes_input_vgprs(args); break; case MESA_SHADER_GEOMETRY: @@ -595,7 +598,7 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->merged_wave_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown @@ -638,6 +641,10 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id); + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, @@ -660,6 +667,10 @@ radv_declare_shader_args(struct radv_shader_args *args, declare_global_input_sgprs(args, &user_sgpr_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); + if (args->options->explicit_scratch_args) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->scratch_offset); + } ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid); @@ -682,7 +693,7 @@ radv_declare_shader_args(struct radv_shader_args *args, } args->shader_info->num_input_vgprs = 0; - args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0; + args->shader_info->num_input_sgprs = 2; args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; if (stage != MESA_SHADER_FRAGMENT) @@ -690,10 +701,8 @@ radv_declare_shader_args(struct radv_shader_args *args, uint8_t user_sgpr_idx = 0; - if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx); - } + set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index a7442c617de..0f57058bd6e 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -34,6 +34,7 @@ struct radv_shader_args { struct ac_arg descriptor_sets[MAX_SETS]; struct ac_arg ring_offsets; + struct ac_arg scratch_offset; struct ac_arg vertex_buffers; struct ac_arg rel_auto_id; -- 2.30.2