radv,ac/nir: lower deref operations for shared memory
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 7 Nov 2019 14:56:35 +0000 (15:56 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 29 Nov 2019 20:58:18 +0000 (21:58 +0100)
This shouldn't introduce any functional changes for RadeonSI
when NIR is enabled because these operations are already lowered.

pipeline-db (NAVI10/LLVM):
SGPRS: 9043 -> 9051 (0.09 %)
VGPRS: 7272 -> 7292 (0.28 %)
Code Size: 638892 -> 621628 (-2.70 %) bytes
LDS: 1333 -> 1331 (-0.15 %) blocks
Max Waves: 1614 -> 1608 (-0.37 %)

Found this while glancing at some F12019 shaders.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/llvm/ac_nir_to_llvm.c
src/amd/vulkan/radv_shader.c

index c6e699567c9d70e06b264245f12572cb895fee6a..db3ed1436b744bdccc91d97082633c314e4c0b10 100644 (file)
@@ -101,14 +101,16 @@ static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
 }
 
 static LLVMValueRef
-get_memory_ptr(struct ac_nir_context *ctx, nir_src src)
+get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
 {
        LLVMValueRef ptr = get_src(ctx, src);
        ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
        int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
 
+       LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
+
        return LLVMBuildBitCast(ctx->ac.builder, ptr,
-                               LLVMPointerType(ctx->ac.i32, addr_space), "");
+                               LLVMPointerType(type, addr_space), "");
 }
 
 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
@@ -2144,13 +2146,6 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
                        }
                }
                break;
-       case nir_var_mem_shared: {
-               LLVMValueRef address = get_src(ctx, instr->src[0]);
-               LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
-               return LLVMBuildBitCast(ctx->ac.builder, val,
-                                       get_def_type(ctx, &instr->dest.ssa),
-                                       "");
-       }
        case nir_var_shader_out:
                if (ctx->stage == MESA_SHADER_TESS_CTRL) {
                        return load_tess_varyings(ctx, instr, false);
@@ -2329,8 +2324,7 @@ visit_store_var(struct ac_nir_context *ctx,
                }
                break;
 
-       case nir_var_mem_global:
-       case nir_var_mem_shared: {
+       case nir_var_mem_global: {
                int writemask = instr->const_index[0];
                LLVMValueRef address = get_src(ctx, instr->src[0]);
                LLVMValueRef val = get_src(ctx, instr->src[1]);
@@ -2983,7 +2977,8 @@ visit_load_shared(struct ac_nir_context *ctx,
 {
        LLVMValueRef values[4], derived_ptr, index, ret;
 
-       LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
+       LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
+                                         instr->dest.ssa.bit_size);
 
        for (int chan = 0; chan < instr->num_components; chan++) {
                index = LLVMConstInt(ctx->ac.i32, chan, 0);
@@ -3002,7 +2997,8 @@ visit_store_shared(struct ac_nir_context *ctx,
        LLVMValueRef derived_ptr, data,index;
        LLVMBuilderRef builder = ctx->ac.builder;
 
-       LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]);
+       LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1],
+                                         instr->src[0].ssa->bit_size);
        LLVMValueRef src = get_src(ctx, instr->src[0]);
 
        int writemask = nir_intrinsic_write_mask(instr);
@@ -3551,7 +3547,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_shared_atomic_xor:
        case nir_intrinsic_shared_atomic_exchange:
        case nir_intrinsic_shared_atomic_comp_swap: {
-               LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
+               LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
+                                                 instr->src[1].ssa->bit_size);
                result = visit_var_atomic(ctx, instr, ptr, 1);
                break;
        }
@@ -4736,14 +4733,21 @@ static void
 setup_shared(struct ac_nir_context *ctx,
             struct nir_shader *nir)
 {
-       nir_foreach_variable(variable, &nir->shared) {
-               LLVMValueRef shared =
-                       LLVMAddGlobalInAddressSpace(
-                          ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type),
-                          variable->name ? variable->name : "",
-                          AC_ADDR_SPACE_LDS);
-               _mesa_hash_table_insert(ctx->vars, variable, shared);
-       }
+       if (ctx->ac.lds)
+               return;
+
+       LLVMTypeRef type = LLVMArrayType(ctx->ac.i8,
+                                        nir->info.cs.shared_size);
+
+       LLVMValueRef lds =
+               LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
+                                           "compute_lds",
+                                           AC_ADDR_SPACE_LDS);
+       LLVMSetAlignment(lds, 64 * 1024);
+
+       ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, lds,
+                                      LLVMPointerType(ctx->ac.i8,
+                                                      AC_ADDR_SPACE_LDS), "");
 }
 
 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
index a7253976f6737cebff246e0d13ee3625c34235e6..ee09a0fa62b303201d7e3ce62785bf7ef4693d96 100644 (file)
@@ -301,6 +301,17 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
         NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo);
 }
 
+static void
+shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+       assert(glsl_type_is_vector_or_scalar(type));
+
+       uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+       unsigned length = glsl_get_vector_elements(type);
+       *size = comp_size * length,
+       *align = comp_size;
+}
+
 nir_shader *
 radv_shader_compile_to_nir(struct radv_device *device,
                           struct radv_shader_module *module,
@@ -503,6 +514,14 @@ radv_shader_compile_to_nir(struct radv_device *device,
         */
        nir_lower_var_copies(nir);
 
+       /* Lower deref operations for compute shared memory. */
+       if (nir->info.stage == MESA_SHADER_COMPUTE) {
+               NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
+                          nir_var_mem_shared, shared_var_info);
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                          nir_var_mem_shared, nir_address_format_32bit_offset);
+       }
+
        /* Lower large variables that are always constant with load_constant
         * intrinsics, which get turned into PC-relative loads from a data
         * section next to the shader.