From b5ac381d8f66fbf2aef92772879ac906ce9d68b2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 26 Jun 2019 15:53:53 +1000 Subject: [PATCH] gallivm: add buffer operations to the tgsi->llvm conversion. This adds load, store and atomic operations. These operations have to respect the exec_mask, and can't operate in lanes where the execute is off. This is needed to avoid side effects seen outside the shaders. There is also bounds checking on the ssbo accesses vs the size ptr. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 2 + .../auxiliary/gallivm/lp_bld_tgsi_info.c | 2 + .../auxiliary/gallivm/lp_bld_tgsi_soa.c | 315 +++++++++++++++++- 3 files changed, 315 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 824eff2af41..3296a27ce4a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -456,6 +456,8 @@ struct lp_build_tgsi_soa_context LLVMValueRef ssbo_ptr; LLVMValueRef ssbo_sizes_ptr; + LLVMValueRef ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; + LLVMValueRef ssbo_sizes[LP_MAX_TGSI_SHADER_BUFFERS]; const struct lp_build_sampler_soa *sampler; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c index 9fc9b8c77ec..7871dce9103 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -307,6 +307,8 @@ analyse_instruction(struct analysis_context *ctx, max_regs = ARRAY_SIZE(info->output); } else if (dst->File == TGSI_FILE_ADDRESS) { continue; + } else if (dst->File == TGSI_FILE_BUFFER) { + continue; } else { assert(0); continue; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index c8002c232d5..cda9429f52f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -57,6 +57,7 @@ #include "lp_bld_gather.h" #include "lp_bld_init.h" #include "lp_bld_logic.h" +#include "lp_bld_misc.h" #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_quad.h" @@ -133,21 +134,25 @@ mask_has_loop(struct lp_exec_mask *mask) return FALSE; } +/* + * combine the execution mask if there is one with the current mask. + */ static LLVMValueRef mask_vec(struct lp_build_tgsi_context *bld_base) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_exec_mask *exec_mask = &bld->exec_mask; - + LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL; if (!exec_mask->has_mask) { - return lp_build_mask_value(bld->mask); + return bld_mask; } + if (!bld_mask) + return exec_mask->exec_mask; return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), exec_mask->exec_mask, ""); } - /* * Returns true if we're inside a switch statement. * It's global, meaning that it returns true even if there's @@ -2975,8 +2980,19 @@ lp_emit_declaration_soa( bld->consts_sizes[idx2D] = lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); } - break; + break; + case TGSI_FILE_BUFFER: + { + unsigned idx = decl->Range.First; + LLVMValueRef index = lp_build_const_int32(gallivm, idx); + assert(idx < LP_MAX_TGSI_SHADER_BUFFERS); + bld->ssbos[idx] = + lp_build_array_get(gallivm, bld->ssbo_ptr, index); + bld->ssbo_sizes[idx] = + lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index); + } + break; default: /* don't need to declare other vars */ break; @@ -3371,6 +3387,283 @@ lod_emit( FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); } +static void +load_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; + unsigned buf = bufreg->Register.Index; + assert(bufreg->Register.File == TGSI_FILE_BUFFER); + struct lp_build_context *uint_bld = &bld_base->uint_bld; + + if (0) { + /* for indirect support with ARB_gpu_shader5 */ + } else { + LLVMValueRef index; + LLVMValueRef scalar, scalar_ptr; + unsigned chan_index; + + index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0); + index = lp_build_shr_imm(uint_bld, index, 2); + + scalar_ptr = bld->ssbos[buf]; + + LLVMValueRef ssbo_limit; + + ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { + LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); + + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + + LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + + loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, + loop_state.counter, ""); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + + lp_build_if(&ifthen, gallivm, cond); + scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index); + + temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, ""); + } + } +} + +static void +store_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0]; + unsigned buf = bufreg->Register.Index; + assert(bufreg->Register.File == TGSI_FILE_BUFFER); + + if (0) { + + } else { + LLVMValueRef index; /* index into the const buffer */ + LLVMValueRef scalar_ptr; + LLVMValueRef value; + unsigned chan_index; + + index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0); + index = lp_build_shr_imm(uint_bld, index, 2); + + scalar_ptr = bld->ssbos[buf]; + + LLVMValueRef ssbo_limit; + + ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) { + LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index)); + + value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index); + + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value, + loop_state.counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, ""); + + struct lp_build_if_state ifthen; + LLVMValueRef cond; + + loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, + loop_state.counter, ""); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr); + + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + } + } +} + +static void +resq_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct lp_build_context *uint_bld = &bld_base->uint_bld; + const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; + + unsigned buf = bufreg->Register.Index; + assert(bufreg->Register.File == TGSI_FILE_BUFFER); + + LLVMValueRef num_ssbo = bld->ssbo_sizes[buf]; + + emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo); +} + +static void +atomic_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; + + assert(bufreg->Register.File == TGSI_FILE_BUFFER); + unsigned buf = bufreg->Register.Index; + + LLVMAtomicRMWBinOp op; + switch (emit_data->inst->Instruction.Opcode) { + case TGSI_OPCODE_ATOMUADD: + op = LLVMAtomicRMWBinOpAdd; + break; + case TGSI_OPCODE_ATOMXCHG: + op = LLVMAtomicRMWBinOpXchg; + break; + case TGSI_OPCODE_ATOMAND: + op = LLVMAtomicRMWBinOpAnd; + break; + case TGSI_OPCODE_ATOMOR: + op = LLVMAtomicRMWBinOpOr; + break; + case TGSI_OPCODE_ATOMXOR: + op = LLVMAtomicRMWBinOpXor; + break; + case TGSI_OPCODE_ATOMUMIN: + op = LLVMAtomicRMWBinOpUMin; + break; + case TGSI_OPCODE_ATOMUMAX: + op = LLVMAtomicRMWBinOpUMax; + break; + case TGSI_OPCODE_ATOMIMIN: + op = LLVMAtomicRMWBinOpMin; + break; + case TGSI_OPCODE_ATOMIMAX: + op = LLVMAtomicRMWBinOpMax; + break; + } + + if (0) { + } else { + LLVMValueRef index; /* index into the const buffer */ + LLVMValueRef scalar, scalar_ptr; + LLVMValueRef value; + + index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0); + value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0); + + index = lp_build_shr_imm(uint_bld, index, 2); + index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan)); + + scalar_ptr = bld->ssbos[buf]; + + LLVMValueRef atom_res = lp_build_alloca(gallivm, + uint_bld->vec_type, ""); + + LLVMValueRef ssbo_limit; + ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + + LLVMValueRef exec_mask = mask_vec(bld_base); + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value, + loop_state.counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, ""); + + index = LLVMBuildExtractElement(gallivm->builder, index, + loop_state.counter, ""); + + scalar_ptr = LLVMBuildGEP(builder, scalar_ptr, + &index, 1, ""); + + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0); + LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src, + loop_state.counter, ""); + cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, ""); + scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr, + cas_src_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); + } else { + scalar = LLVMBuildAtomicRMW(builder, op, + scalar_ptr, value_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + false); + } + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_endif(&ifthen); + + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, ""); + } +} + static void increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, LLVMValueRef ptr, @@ -3973,6 +4266,20 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit; + bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit; + bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit; + bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; + + bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit; if (gs_iface) { /* There's no specific value for this because it should always -- 2.30.2