From 01109282ce47094f19495b9928845d4b47d9629f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 16 Mar 2016 17:03:02 -0500 Subject: [PATCH] radeonsi: add shader buffer support to TGSI_OPCODE_STORE MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Reviewed-by: Edward O'Callaghan --- src/gallium/drivers/radeonsi/si_shader.c | 129 +++++++++++++++++++---- 1 file changed, 111 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2b4c684d719..18f75daf7f5 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3115,41 +3115,129 @@ static void store_fetch_args( { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; const struct tgsi_full_instruction * inst = emit_data->inst; - struct tgsi_full_src_register image; - unsigned target = inst->Memory.Texture; + struct tgsi_full_src_register memory; LLVMValueRef chans[4]; LLVMValueRef data; - LLVMValueRef coords; LLVMValueRef rsrc; unsigned chan; emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context); - image = tgsi_full_src_register_from_dst(&inst->Dst[0]); - coords = image_fetch_coords(bld_base, inst, 0); - for (chan = 0; chan < 4; ++chan) { chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); } data = lp_build_gather_values(gallivm, chans, 4); - if (target == TGSI_TEXTURE_BUFFER) { - image_fetch_rsrc(bld_base, &image, false, &rsrc); - emit_data->args[0] = data; - emit_data->arg_count = 1; + emit_data->args[emit_data->arg_count++] = data; + + memory = tgsi_full_src_register_from_dst(&inst->Dst[0]); - rsrc = extract_rsrc_top_half(ctx, rsrc); - buffer_append_args(ctx, emit_data, rsrc, coords, - bld_base->uint_bld.zero, false); + if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { + LLVMValueRef offset; + LLVMValueRef tmp; + + rsrc = shader_buffer_fetch_rsrc(ctx, &memory); + + tmp = lp_build_emit_fetch(bld_base, inst, 0, 0); + offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); + + buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero, + offset, false); } else { + unsigned target = inst->Memory.Texture; + LLVMValueRef coords; + + coords = image_fetch_coords(bld_base, inst, 0); + + if (target == TGSI_TEXTURE_BUFFER) { + image_fetch_rsrc(bld_base, &memory, false, &rsrc); + + rsrc = extract_rsrc_top_half(ctx, rsrc); + buffer_append_args(ctx, emit_data, rsrc, coords, + bld_base->uint_bld.zero, false); + } else { + emit_data->args[1] = coords; + image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]); + emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */ + emit_data->arg_count = 4; + + image_append_args(ctx, emit_data, target, false); + } + } +} + +static void store_emit_buffer( + struct si_shader_context *ctx, + struct lp_build_emit_data *emit_data) +{ + const struct tgsi_full_instruction *inst = emit_data->inst; + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &ctx->radeon_bld.soa.bld_base.uint_bld; + LLVMValueRef base_data = emit_data->args[0]; + LLVMValueRef base_offset = emit_data->args[3]; + unsigned writemask = inst->Dst[0].Register.WriteMask; + + while (writemask) { + int start, count; + const char *intrinsic_name; + LLVMValueRef data; + LLVMValueRef offset; + LLVMValueRef tmp; + + u_bit_scan_consecutive_range(&writemask, &start, &count); + + /* Due to an LLVM limitation, split 3-element writes + * into a 2-element and a 1-element write. */ + if (count == 3) { + writemask |= 1 << (start + 2); + count = 2; + } + + if (count == 4) { + data = base_data; + intrinsic_name = "llvm.amdgcn.buffer.store.v4f32"; + } else if (count == 2) { + LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2); + + tmp = LLVMBuildExtractElement( + builder, base_data, + lp_build_const_int32(gallivm, start), ""); + data = LLVMBuildInsertElement( + builder, LLVMGetUndef(v2f32), tmp, + uint_bld->zero, ""); + + tmp = LLVMBuildExtractElement( + builder, base_data, + lp_build_const_int32(gallivm, start + 1), ""); + data = LLVMBuildInsertElement( + builder, data, tmp, uint_bld->one, ""); + + intrinsic_name = "llvm.amdgcn.buffer.store.v2f32"; + } else { + assert(count == 1); + data = LLVMBuildExtractElement( + builder, base_data, + lp_build_const_int32(gallivm, start), ""); + intrinsic_name = "llvm.amdgcn.buffer.store.f32"; + } + + offset = base_offset; + if (start != 0) { + offset = LLVMBuildAdd( + builder, offset, + lp_build_const_int32(gallivm, start * 4), ""); + } + emit_data->args[0] = data; - emit_data->args[1] = coords; - image_fetch_rsrc(bld_base, &image, true, &emit_data->args[2]); - emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */ - emit_data->arg_count = 4; + emit_data->args[3] = offset; - image_append_args(ctx, emit_data, target, false); + lp_build_intrinsic( + builder, intrinsic_name, emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMNoUnwindAttribute); } } @@ -3165,6 +3253,11 @@ static void store_emit( char intrinsic_name[32]; char coords_type[8]; + if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { + store_emit_buffer(si_shader_context(bld_base), emit_data); + return; + } + if (target == TGSI_TEXTURE_BUFFER) { emit_data->output[emit_data->chan] = lp_build_intrinsic( builder, "llvm.amdgcn.buffer.store.format.v4f32", -- 2.30.2