From 36a4d6d08164344cbb4766944b3c45b2b223cf22 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 16 May 2018 17:40:47 +0200 Subject: [PATCH] radv: add support for 32-bit pointers in user data SGPRs We still use 64-bit GPU pointers for all ring buffers because llvm.amdgcn.implicit.buffer.ptr doesn't seem to support 32-bit GPU pointers for now. This can be improved later anyways. Vega10: Totals from affected shaders: SGPRS: 1008722 -> 1026710 (1.78 %) VGPRS: 706580 -> 707136 (0.08 %) Spilled SGPRs: 22555 -> 22209 (-1.53 %) Spilled VGPRs: 75 -> 75 (0.00 %) Code Size: 34819208 -> 35202140 (1.10 %) bytes Max Waves: 175423 -> 175086 (-0.19 %) Polaris10: Totals from affected shaders: SGPRS: 1029849 -> 1036517 (0.65 %) VGPRS: 709984 -> 708872 (-0.16 %) Spilled SGPRs: 22672 -> 22309 (-1.60 %) Spilled VGPRs: 82 -> 66 (-19.51 %) Scratch size: 76 -> 60 (-21.05 %) dwords per thread Code Size: 34915336 -> 35309752 (1.13 %) bytes Max Waves: 151221 -> 151677 (0.30 %) Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 13 +++++++------ src/amd/vulkan/radv_device.c | 6 ++++-- src/amd/vulkan/radv_nir_to_llvm.c | 24 +++++++++++++++--------- src/amd/vulkan/radv_private.h | 18 ++++++++++++++---- 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 3636b2c8d9c..5ab577b4c59 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -586,11 +586,12 @@ radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, uint32_t base_reg = pipeline->user_data_0[stage]; if (loc->sgpr_idx == -1) return; - assert(loc->num_sgprs == 2); + + assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2)); assert(!loc->indirect); - radv_emit_shader_pointer(cmd_buffer->cs, - base_reg + loc->sgpr_idx * 4, va); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, + base_reg + loc->sgpr_idx * 4, va, false); } static void @@ -1442,10 +1443,10 @@ emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer, return; assert(!desc_set_loc->indirect); - assert(desc_set_loc->num_sgprs == 2); + assert(desc_set_loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2)); - radv_emit_shader_pointer(cmd_buffer->cs, - base_reg + desc_set_loc->sgpr_idx * 4, va); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, + base_reg + desc_set_loc->sgpr_idx * 4, va, false); } static void diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index d6abab338eb..61b4fba23f8 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1963,7 +1963,8 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { - radv_emit_shader_pointer(cs, regs[i], va); + radv_emit_shader_pointer(queue->device, cs, regs[i], + va, true); } } else { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, @@ -1974,7 +1975,8 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, R_00B530_SPI_SHADER_USER_DATA_LS_0}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { - radv_emit_shader_pointer(cs, regs[i], va); + radv_emit_shader_pointer(queue->device, cs, regs[i], + va, true); } } } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 9e42983cd17..c2cc5038b8f 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -569,7 +569,10 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, static void set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) { - set_loc_shader(ctx, idx, sgpr_idx, 2); + bool use_32bit_pointers = HAVE_32BIT_POINTERS && + idx != AC_UD_SCRATCH_RING_OFFSETS; + + set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); } static void @@ -580,7 +583,7 @@ set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, 2, indirect_offset); + set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset); } struct user_sgpr_info { @@ -618,7 +621,8 @@ count_vs_user_sgprs(struct radv_shader_context *ctx) { uint8_t count = 0; - count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; + if (ctx->shader_info->info.vs.has_vertex_buffers) + count += HAVE_32BIT_POINTERS ? 1 : 2; count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; return count; @@ -685,13 +689,13 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, user_sgpr_info->sgpr_count++; if (ctx->shader_info->info.loads_push_constants) - user_sgpr_info->sgpr_count += 2; + user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { - user_sgpr_info->sgpr_count += 2; + user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; user_sgpr_info->indirect_all_descriptor_sets = true; } else { user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; @@ -707,7 +711,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx, struct arg_info *args, LLVMValueRef *desc_sets) { - LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); + LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8); unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; unsigned stage_mask = 1 << stage; @@ -725,7 +729,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx, } } } else { - add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); + add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets); } if (ctx->shader_info->info.loads_push_constants) { @@ -745,7 +749,8 @@ declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), + add_arg(args, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->ac.v4i32), &ctx->vertex_buffers); } add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); @@ -1878,7 +1883,8 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(type), ""); return ac_build_load_to_sgpr(&ctx->ac, list, index); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index adfd75c2a87..e2fa58d8d1e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -57,6 +57,7 @@ #include "ac_nir_to_llvm.h" #include "ac_gpu_info.h" #include "ac_surface.h" +#include "ac_llvm_build.h" #include "radv_descriptor_set.h" #include "radv_extensions.h" #include "radv_cs.h" @@ -1130,12 +1131,21 @@ bool radv_get_memory_fd(struct radv_device *device, int *pFD); static inline void -radv_emit_shader_pointer(struct radeon_winsys_cs *cs, - uint32_t sh_offset, uint64_t va) +radv_emit_shader_pointer(struct radv_device *device, + struct radeon_winsys_cs *cs, + uint32_t sh_offset, uint64_t va, bool global) { - radeon_set_sh_reg_seq(cs, sh_offset, 2); + bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global; + + radeon_set_sh_reg_seq(cs, sh_offset, use_32bit_pointers ? 1 : 2); radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + + if (use_32bit_pointers) { + assert(va == 0 || + (va >> 32) == device->physical_device->rad_info.address32_hi); + } else { + radeon_emit(cs, va >> 32); + } } static inline struct radv_descriptor_state * -- 2.30.2