From c105e6169ce753584761b2ec03f64a5b5f4163cf Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 7 Nov 2019 15:56:35 +0100 Subject: [PATCH] radv,ac/nir: lower deref operations for shared memory This shouldn't introduce any functional changes for RadeonSI when NIR is enabled because these operations are already lowered. pipeline-db (NAVI10/LLVM): SGPRS: 9043 -> 9051 (0.09 %) VGPRS: 7272 -> 7292 (0.28 %) Code Size: 638892 -> 621628 (-2.70 %) bytes LDS: 1333 -> 1331 (-0.15 %) blocks Max Waves: 1614 -> 1608 (-0.37 %) Found this while glancing at some F12019 shaders. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/llvm/ac_nir_to_llvm.c | 48 +++++++++++++++++++---------------- src/amd/vulkan/radv_shader.c | 19 ++++++++++++++ 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index c6e699567c9..db3ed1436b7 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -101,14 +101,16 @@ static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) } static LLVMValueRef -get_memory_ptr(struct ac_nir_context *ctx, nir_src src) +get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size) { LLVMValueRef ptr = get_src(ctx, src); ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, ""); int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size); + return LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(ctx->ac.i32, addr_space), ""); + LLVMPointerType(type, addr_space), ""); } static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, @@ -2144,13 +2146,6 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, } } break; - case nir_var_mem_shared: { - LLVMValueRef address = get_src(ctx, instr->src[0]); - LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); - return LLVMBuildBitCast(ctx->ac.builder, val, - get_def_type(ctx, &instr->dest.ssa), - ""); - } case nir_var_shader_out: if (ctx->stage == MESA_SHADER_TESS_CTRL) { return load_tess_varyings(ctx, instr, false); @@ -2329,8 +2324,7 @@ visit_store_var(struct ac_nir_context *ctx, } break; - case nir_var_mem_global: - case nir_var_mem_shared: { + case nir_var_mem_global: { int writemask = instr->const_index[0]; LLVMValueRef address = get_src(ctx, instr->src[0]); LLVMValueRef val = get_src(ctx, instr->src[1]); @@ -2983,7 +2977,8 @@ visit_load_shared(struct ac_nir_context *ctx, { LLVMValueRef values[4], derived_ptr, index, ret; - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]); + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], + instr->dest.ssa.bit_size); for (int chan = 0; chan < instr->num_components; chan++) { index = LLVMConstInt(ctx->ac.i32, chan, 0); @@ -3002,7 +2997,8 @@ visit_store_shared(struct ac_nir_context *ctx, LLVMValueRef derived_ptr, data,index; LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]); + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], + instr->src[0].ssa->bit_size); LLVMValueRef src = get_src(ctx, instr->src[0]); int writemask = nir_intrinsic_write_mask(instr); @@ -3551,7 +3547,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_shared_atomic_xor: case nir_intrinsic_shared_atomic_exchange: case nir_intrinsic_shared_atomic_comp_swap: { - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]); + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], + instr->src[1].ssa->bit_size); result = visit_var_atomic(ctx, instr, ptr, 1); break; } @@ -4736,14 +4733,21 @@ static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir) { - nir_foreach_variable(variable, &nir->shared) { - LLVMValueRef shared = - LLVMAddGlobalInAddressSpace( - ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type), - variable->name ? variable->name : "", - AC_ADDR_SPACE_LDS); - _mesa_hash_table_insert(ctx->vars, variable, shared); - } + if (ctx->ac.lds) + return; + + LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, + nir->info.cs.shared_size); + + LLVMValueRef lds = + LLVMAddGlobalInAddressSpace(ctx->ac.module, type, + "compute_lds", + AC_ADDR_SPACE_LDS); + LLVMSetAlignment(lds, 64 * 1024); + + ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, lds, + LLVMPointerType(ctx->ac.i8, + AC_ADDR_SPACE_LDS), ""); } void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index a7253976f67..ee09a0fa62b 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -301,6 +301,17 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo); } +static void +shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) +{ + assert(glsl_type_is_vector_or_scalar(type)); + + uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, + *align = comp_size; +} + nir_shader * radv_shader_compile_to_nir(struct radv_device *device, struct radv_shader_module *module, @@ -503,6 +514,14 @@ radv_shader_compile_to_nir(struct radv_device *device, */ nir_lower_var_copies(nir); + /* Lower deref operations for compute shared memory. */ + if (nir->info.stage == MESA_SHADER_COMPUTE) { + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_var_info); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_shared, nir_address_format_32bit_offset); + } + /* Lower large variables that are always constant with load_constant * intrinsics, which get turned into PC-relative loads from a data * section next to the shader. -- 2.30.2