radeonsi: add shader buffer support to TGSI_OPCODE_STORE
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 16 Mar 2016 22:03:02 +0000 (17:03 -0500)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 12 Apr 2016 21:30:43 +0000 (16:30 -0500)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
src/gallium/drivers/radeonsi/si_shader.c

index 2b4c684d719384af1c324fbb7a991b96562a34a7..18f75daf7f5777411c586d60636f23e56c635064 100644 (file)
@@ -3115,41 +3115,129 @@ static void store_fetch_args(
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
-       struct tgsi_full_src_register image;
-       unsigned target = inst->Memory.Texture;
+       struct tgsi_full_src_register memory;
        LLVMValueRef chans[4];
        LLVMValueRef data;
-       LLVMValueRef coords;
        LLVMValueRef rsrc;
        unsigned chan;
 
        emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
 
-       image = tgsi_full_src_register_from_dst(&inst->Dst[0]);
-       coords = image_fetch_coords(bld_base, inst, 0);
-
        for (chan = 0; chan < 4; ++chan) {
                chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
        }
        data = lp_build_gather_values(gallivm, chans, 4);
 
-       if (target == TGSI_TEXTURE_BUFFER) {
-               image_fetch_rsrc(bld_base, &image, false, &rsrc);
-               emit_data->args[0] = data;
-               emit_data->arg_count = 1;
+       emit_data->args[emit_data->arg_count++] = data;
+
+       memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
 
-               rsrc = extract_rsrc_top_half(ctx, rsrc);
-               buffer_append_args(ctx, emit_data, rsrc, coords,
-                                  bld_base->uint_bld.zero, false);
+       if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+               LLVMValueRef offset;
+               LLVMValueRef tmp;
+
+               rsrc = shader_buffer_fetch_rsrc(ctx, &memory);
+
+               tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
+               offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+
+               buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+                                  offset, false);
        } else {
+               unsigned target = inst->Memory.Texture;
+               LLVMValueRef coords;
+
+               coords = image_fetch_coords(bld_base, inst, 0);
+
+               if (target == TGSI_TEXTURE_BUFFER) {
+                       image_fetch_rsrc(bld_base, &memory, false, &rsrc);
+
+                       rsrc = extract_rsrc_top_half(ctx, rsrc);
+                       buffer_append_args(ctx, emit_data, rsrc, coords,
+                                       bld_base->uint_bld.zero, false);
+               } else {
+                       emit_data->args[1] = coords;
+                       image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]);
+                       emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
+                       emit_data->arg_count = 4;
+
+                       image_append_args(ctx, emit_data, target, false);
+               }
+       }
+}
+
+static void store_emit_buffer(
+               struct si_shader_context *ctx,
+               struct lp_build_emit_data *emit_data)
+{
+       const struct tgsi_full_instruction *inst = emit_data->inst;
+       struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
+       struct lp_build_context *uint_bld = &ctx->radeon_bld.soa.bld_base.uint_bld;
+       LLVMValueRef base_data = emit_data->args[0];
+       LLVMValueRef base_offset = emit_data->args[3];
+       unsigned writemask = inst->Dst[0].Register.WriteMask;
+
+       while (writemask) {
+               int start, count;
+               const char *intrinsic_name;
+               LLVMValueRef data;
+               LLVMValueRef offset;
+               LLVMValueRef tmp;
+
+               u_bit_scan_consecutive_range(&writemask, &start, &count);
+
+               /* Due to an LLVM limitation, split 3-element writes
+                * into a 2-element and a 1-element write. */
+               if (count == 3) {
+                       writemask |= 1 << (start + 2);
+                       count = 2;
+               }
+
+               if (count == 4) {
+                       data = base_data;
+                       intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
+               } else if (count == 2) {
+                       LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
+
+                       tmp = LLVMBuildExtractElement(
+                               builder, base_data,
+                               lp_build_const_int32(gallivm, start), "");
+                       data = LLVMBuildInsertElement(
+                               builder, LLVMGetUndef(v2f32), tmp,
+                               uint_bld->zero, "");
+
+                       tmp = LLVMBuildExtractElement(
+                               builder, base_data,
+                               lp_build_const_int32(gallivm, start + 1), "");
+                       data = LLVMBuildInsertElement(
+                               builder, data, tmp, uint_bld->one, "");
+
+                       intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
+               } else {
+                       assert(count == 1);
+                       data = LLVMBuildExtractElement(
+                               builder, base_data,
+                               lp_build_const_int32(gallivm, start), "");
+                       intrinsic_name = "llvm.amdgcn.buffer.store.f32";
+               }
+
+               offset = base_offset;
+               if (start != 0) {
+                       offset = LLVMBuildAdd(
+                               builder, offset,
+                               lp_build_const_int32(gallivm, start * 4), "");
+               }
+
                emit_data->args[0] = data;
-               emit_data->args[1] = coords;
-               image_fetch_rsrc(bld_base, &image, true, &emit_data->args[2]);
-               emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
-               emit_data->arg_count = 4;
+               emit_data->args[3] = offset;
 
-               image_append_args(ctx, emit_data, target, false);
+               lp_build_intrinsic(
+                       builder, intrinsic_name, emit_data->dst_type,
+                       emit_data->args, emit_data->arg_count,
+                       LLVMNoUnwindAttribute);
        }
 }
 
@@ -3165,6 +3253,11 @@ static void store_emit(
        char intrinsic_name[32];
        char coords_type[8];
 
+       if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
+               store_emit_buffer(si_shader_context(bld_base), emit_data);
+               return;
+       }
+
        if (target == TGSI_TEXTURE_BUFFER) {
                emit_data->output[emit_data->chan] = lp_build_intrinsic(
                        builder, "llvm.amdgcn.buffer.store.format.v4f32",