gallivm: add buffer operations to the tgsi->llvm conversion.
authorDave Airlie <airlied@redhat.com>
Wed, 26 Jun 2019 05:53:53 +0000 (15:53 +1000)
committerDave Airlie <airlied@redhat.com>
Sun, 7 Jul 2019 06:24:05 +0000 (16:24 +1000)
This adds load, store and atomic operations. These operations
have to respect the exec_mask, and can't operate in lanes where
the execute is off. This is needed to avoid side effects seen
outside the shaders.

There is also bounds checking on the ssbo accesses vs the size
ptr.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 824eff2af4180e824c20285fb0c7bf8e5dabdc24..3296a27ce4a47cf8d2d17f39ce9afc6d1d95611a 100644 (file)
@@ -456,6 +456,8 @@ struct lp_build_tgsi_soa_context
 
    LLVMValueRef ssbo_ptr;
    LLVMValueRef ssbo_sizes_ptr;
+   LLVMValueRef ssbos[LP_MAX_TGSI_SHADER_BUFFERS];
+   LLVMValueRef ssbo_sizes[LP_MAX_TGSI_SHADER_BUFFERS];
 
    const struct lp_build_sampler_soa *sampler;
 
index 9fc9b8c77ecd30cb97fae4885c49e5c309444acf..7871dce9103a9abbd40f71994f6a7dd9406b7856 100644 (file)
@@ -307,6 +307,8 @@ analyse_instruction(struct analysis_context *ctx,
          max_regs = ARRAY_SIZE(info->output);
       } else if (dst->File == TGSI_FILE_ADDRESS) {
          continue;
+      } else if (dst->File == TGSI_FILE_BUFFER) {
+         continue;
       } else {
          assert(0);
          continue;
index c8002c232d59fff2e42980f83ad264459128ab1e..cda9429f52f6f1a77ab04a2ec2cbd7d931a84fcd 100644 (file)
@@ -57,6 +57,7 @@
 #include "lp_bld_gather.h"
 #include "lp_bld_init.h"
 #include "lp_bld_logic.h"
+#include "lp_bld_misc.h"
 #include "lp_bld_swizzle.h"
 #include "lp_bld_flow.h"
 #include "lp_bld_quad.h"
@@ -133,21 +134,25 @@ mask_has_loop(struct lp_exec_mask *mask)
    return FALSE;
 }
 
+/*
+ * combine the execution mask if there is one with the current mask.
+ */
 static LLVMValueRef
 mask_vec(struct lp_build_tgsi_context *bld_base)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct lp_exec_mask *exec_mask = &bld->exec_mask;
-
+   LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
    if (!exec_mask->has_mask) {
-      return lp_build_mask_value(bld->mask);
+      return bld_mask;
    }
+   if (!bld_mask)
+      return exec_mask->exec_mask;
    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
                        exec_mask->exec_mask, "");
 }
 
-
 /*
  * Returns true if we're inside a switch statement.
  * It's global, meaning that it returns true even if there's
@@ -2975,8 +2980,19 @@ lp_emit_declaration_soa(
       bld->consts_sizes[idx2D] =
          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
    }
-      break;
+   break;
+   case TGSI_FILE_BUFFER:
+   {
+      unsigned idx = decl->Range.First;
+      LLVMValueRef index = lp_build_const_int32(gallivm, idx);
+      assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
+      bld->ssbos[idx] =
+         lp_build_array_get(gallivm, bld->ssbo_ptr, index);
+      bld->ssbo_sizes[idx] =
+         lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
 
+   }
+   break;
    default:
       /* don't need to declare other vars */
       break;
@@ -3371,6 +3387,283 @@ lod_emit(
                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
 }
 
+static void
+load_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct gallivm_state * gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+   unsigned buf = bufreg->Register.Index;
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+
+   if (0) {
+      /* for indirect support with ARB_gpu_shader5 */
+   } else {
+      LLVMValueRef index;
+      LLVMValueRef scalar, scalar_ptr;
+      unsigned chan_index;
+
+      index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
+      index = lp_build_shr_imm(uint_bld, index, 2);
+
+      scalar_ptr = bld->ssbos[buf];
+
+      LLVMValueRef ssbo_limit;
+
+      ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+      ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+
+      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+         LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+         LLVMValueRef exec_mask = mask_vec(bld_base);
+         LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
+         exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+
+         LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
+         struct lp_build_loop_state loop_state;
+         lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+         struct lp_build_if_state ifthen;
+         LLVMValueRef cond, temp_res;
+
+         loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
+                                              loop_state.counter, "");
+
+         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+         cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+
+         lp_build_if(&ifthen, gallivm, cond);
+         scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
+
+         temp_res = LLVMBuildLoad(builder, result, "");
+         temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+         LLVMBuildStore(builder, temp_res, result);
+         lp_build_else(&ifthen);
+         temp_res = LLVMBuildLoad(builder, result, "");
+         temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+         LLVMBuildStore(builder, temp_res, result);
+         lp_build_endif(&ifthen);
+         lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+                                NULL, LLVMIntUGE);
+         emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
+      }
+   }
+}
+
+static void
+store_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct gallivm_state * gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
+   unsigned buf = bufreg->Register.Index;
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+
+   if (0) {
+
+   } else {
+      LLVMValueRef index;  /* index into the const buffer */
+      LLVMValueRef scalar_ptr;
+      LLVMValueRef value;
+      unsigned chan_index;
+
+      index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
+      index = lp_build_shr_imm(uint_bld, index, 2);
+
+      scalar_ptr = bld->ssbos[buf];
+
+      LLVMValueRef ssbo_limit;
+
+      ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+      ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+
+      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+         LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+         value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
+
+         LLVMValueRef exec_mask = mask_vec(bld_base);
+         LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
+         exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+
+         struct lp_build_loop_state loop_state;
+         lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+         LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
+                                                          loop_state.counter, "");
+         value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
+
+         struct lp_build_if_state ifthen;
+         LLVMValueRef cond;
+
+         loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
+                                              loop_state.counter, "");
+
+         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+         cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+         lp_build_if(&ifthen, gallivm, cond);
+
+         lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
+
+         lp_build_endif(&ifthen);
+         lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+                                NULL, LLVMIntUGE);
+      }
+   }
+}
+
+static void
+resq_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+
+   unsigned buf = bufreg->Register.Index;
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+
+   LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
+
+   emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
+}
+
+static void
+atomic_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct gallivm_state * gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+
+   assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+   unsigned buf = bufreg->Register.Index;
+
+   LLVMAtomicRMWBinOp op;
+   switch (emit_data->inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ATOMUADD:
+      op = LLVMAtomicRMWBinOpAdd;
+      break;
+   case TGSI_OPCODE_ATOMXCHG:
+      op = LLVMAtomicRMWBinOpXchg;
+      break;
+   case TGSI_OPCODE_ATOMAND:
+      op = LLVMAtomicRMWBinOpAnd;
+      break;
+   case TGSI_OPCODE_ATOMOR:
+      op = LLVMAtomicRMWBinOpOr;
+      break;
+   case TGSI_OPCODE_ATOMXOR:
+      op = LLVMAtomicRMWBinOpXor;
+      break;
+   case TGSI_OPCODE_ATOMUMIN:
+      op = LLVMAtomicRMWBinOpUMin;
+      break;
+   case TGSI_OPCODE_ATOMUMAX:
+      op = LLVMAtomicRMWBinOpUMax;
+      break;
+   case TGSI_OPCODE_ATOMIMIN:
+      op = LLVMAtomicRMWBinOpMin;
+      break;
+   case TGSI_OPCODE_ATOMIMAX:
+      op = LLVMAtomicRMWBinOpMax;
+      break;
+   }
+
+   if (0) {
+   } else {
+      LLVMValueRef index;  /* index into the const buffer */
+      LLVMValueRef scalar, scalar_ptr;
+      LLVMValueRef value;
+
+      index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
+      value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
+
+      index = lp_build_shr_imm(uint_bld, index, 2);
+      index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
+
+      scalar_ptr = bld->ssbos[buf];
+
+      LLVMValueRef atom_res = lp_build_alloca(gallivm,
+                                              uint_bld->vec_type, "");
+
+      LLVMValueRef ssbo_limit;
+      ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+      ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+
+      LLVMValueRef exec_mask = mask_vec(bld_base);
+      LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
+      exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+
+      struct lp_build_loop_state loop_state;
+      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
+                                                       loop_state.counter, "");
+      value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
+
+      index = LLVMBuildExtractElement(gallivm->builder, index,
+                                      loop_state.counter, "");
+
+      scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
+                                &index, 1, "");
+
+      struct lp_build_if_state ifthen;
+      LLVMValueRef cond, temp_res;
+
+      cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+      lp_build_if(&ifthen, gallivm, cond);
+
+      if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+         LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
+         LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
+                                                            loop_state.counter, "");
+         cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
+         scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
+                                         cas_src_ptr,
+                                         LLVMAtomicOrderingSequentiallyConsistent,
+                                         LLVMAtomicOrderingSequentiallyConsistent,
+                                         false);
+         scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
+      } else {
+         scalar = LLVMBuildAtomicRMW(builder, op,
+                                     scalar_ptr, value_ptr,
+                                     LLVMAtomicOrderingSequentiallyConsistent,
+                                     false);
+      }
+      temp_res = LLVMBuildLoad(builder, atom_res, "");
+      temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+      LLVMBuildStore(builder, temp_res, atom_res);
+      lp_build_else(&ifthen);
+      temp_res = LLVMBuildLoad(builder, atom_res, "");
+      temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+      LLVMBuildStore(builder, temp_res, atom_res);
+      lp_build_endif(&ifthen);
+
+      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+                             NULL, LLVMIntUGE);
+      emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
+   }
+}
+
 static void
 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
                           LLVMValueRef ptr,
@@ -3973,6 +4266,20 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
 
+   bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
+
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
 
    if (gs_iface) {
       /* There's no specific value for this because it should always