radeonsi: add shared memory
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 29 Mar 2016 15:51:49 +0000 (17:51 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 19 Apr 2016 16:10:30 +0000 (18:10 +0200)
Declares the shared memory as a global variable so that
LLVM is aware of it and it does not conflict with passes
like AMDGPUPromoteAlloca.

v2: - Use ctx->i8.
    - Dropped null-check for declare_memory_region.
    - Changed memory region array to single region.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
src/gallium/drivers/radeon/radeon_llvm.h
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h

index 0a164bba307af20ea7a1d9892a3c5d1c562c311d..3e11b36b1fddb68c19b3834153bd2665b0afe0fe 100644 (file)
@@ -68,6 +68,9 @@ struct radeon_llvm_context {
                        unsigned index,
                        const struct tgsi_full_declaration *decl);
 
+       void (*declare_memory_region)(struct radeon_llvm_context *,
+                       const struct tgsi_full_declaration *decl);
+
        /** This array contains the input values for the shader.  Typically these
          * values will be in the form of a target intrinsic that will inform the
          * backend how to load the actual inputs to the shader. 
index fb883cb585ef4c60ed314ca623d81c08a183e29e..08281973b2962bce4fa9306e492138d827c4090f 100644 (file)
@@ -366,6 +366,10 @@ static void emit_declaration(
                break;
        }
 
+       case TGSI_FILE_MEMORY:
+               ctx->declare_memory_region(ctx, decl);
+               break;
+
        default:
                break;
        }
index 68171c831d281ea24891c4121cf67c89570f2f22..9229fa104501265e79e14779615a785be6004c3c 100644 (file)
@@ -121,6 +121,8 @@ struct si_shader_context
        LLVMTypeRef v4i32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
+
+       LLVMValueRef shared_memory;
 };
 
 static struct si_shader_context *si_shader_context(
@@ -1320,6 +1322,30 @@ static void declare_system_value(
        radeon_bld->system_values[index] = value;
 }
 
+static void declare_compute_memory(struct radeon_llvm_context *radeon_bld,
+                                   const struct tgsi_full_declaration *decl)
+{
+       struct si_shader_context *ctx =
+               si_shader_context(&radeon_bld->soa.bld_base);
+       struct si_shader_selector *sel = ctx->shader->selector;
+       struct gallivm_state *gallivm = &radeon_bld->gallivm;
+
+       LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
+       LLVMValueRef var;
+
+       assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
+       assert(decl->Range.First == decl->Range.Last);
+       assert(!ctx->shared_memory);
+
+       var = LLVMAddGlobalInAddressSpace(gallivm->module,
+                                         LLVMArrayType(ctx->i8, sel->local_size),
+                                         "compute_lds",
+                                         LOCAL_ADDR_SPACE);
+       LLVMSetAlignment(var, 4);
+
+       ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, "");
+}
+
 static LLVMValueRef fetch_constant(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
@@ -5824,6 +5850,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        bld_base->emit_epilogue = si_llvm_return_fs_outputs;
                break;
        case TGSI_PROCESSOR_COMPUTE:
+               ctx.radeon_bld.declare_memory_region = declare_compute_memory;
                break;
        default:
                assert(!"Unsupported shader type");
index 738ddf65b57f5981de284e61792291a2fbe1e415..6ea849d2eae4776fa9a6f123d44841b77ddbaf70 100644 (file)
@@ -250,6 +250,9 @@ struct si_shader_selector {
         */
        unsigned        colors_written_4bit;
 
+       /* CS parameters */
+       unsigned local_size;
+
        /* masks of "get_unique_index" bits */
        uint64_t        outputs_written;
        uint32_t        patch_outputs_written;