gallivm/llvmpipe: add support for global operations.
authorDave Airlie <airlied@redhat.com>
Tue, 10 Dec 2019 04:53:51 +0000 (14:53 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 27 Dec 2019 03:26:33 +0000 (13:26 +1000)
Acked-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_nir.c
src/gallium/auxiliary/gallivm/lp_bld_nir.h
src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
src/gallium/drivers/llvmpipe/lp_context.h
src/gallium/drivers/llvmpipe/lp_state_cs.c
src/gallium/drivers/llvmpipe/lp_state_cs.h

index 9c378d174d81a38f3d332b8f45f05187e06caa41..b1a812d919347bd379a44e9228365d7a3ff496a8 100644 (file)
@@ -1212,6 +1212,41 @@ static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
                              offset_is_uniform, offset, result);
 }
 
+static void visit_load_global(struct lp_build_nir_context *bld_base,
+                              nir_intrinsic_instr *instr, LLVMValueRef result[4])
+{
+   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
+   bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+                         nir_src_bit_size(instr->src[0]),
+                         addr, result);
+}
+
+static void visit_store_global(struct lp_build_nir_context *bld_base,
+                               nir_intrinsic_instr *instr)
+{
+   LLVMValueRef val = get_src(bld_base, instr->src[0]);
+   int nc = nir_src_num_components(instr->src[0]);
+   int bitsize = nir_src_bit_size(instr->src[0]);
+   LLVMValueRef addr = get_src(bld_base, instr->src[1]);
+   int addr_bitsize = nir_src_bit_size(instr->src[1]);
+   int writemask = instr->const_index[0];
+   bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
+}
+
+static void visit_global_atomic(struct lp_build_nir_context *bld_base,
+                                nir_intrinsic_instr *instr,
+                                LLVMValueRef result[4])
+{
+   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
+   LLVMValueRef val = get_src(bld_base, instr->src[1]);
+   LLVMValueRef val2 = NULL;
+   int addr_bitsize = nir_src_bit_size(instr->src[0]);
+   if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
+      val2 = get_src(bld_base, instr->src[2]);
+
+   bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize, addr, val, val2, &result[0]);
+}
+
 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
                             nir_intrinsic_instr *instr)
 {
@@ -1318,6 +1353,24 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
       break;
    case nir_intrinsic_load_kernel_input:
       visit_load_kernel_input(bld_base, instr, result);
+     break;
+   case nir_intrinsic_load_global:
+      visit_load_global(bld_base, instr, result);
+      break;
+   case nir_intrinsic_store_global:
+      visit_store_global(bld_base, instr);
+      break;
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_global_atomic_exchange:
+   case nir_intrinsic_global_atomic_comp_swap:
+      visit_global_atomic(bld_base, instr, result);
       break;
    default:
       assert(0);
index dbfed24074b629453e63cd45187d5f93aa92c0d3..c8a80428e7dafd99d25d1f132b3dfcb3a68f3c53 100644 (file)
@@ -72,6 +72,24 @@ struct lp_build_nir_context
                            bool offset_is_uniform,
                            LLVMValueRef offset, LLVMValueRef result[4]);
 
+   void (*load_global)(struct lp_build_nir_context *bld_base,
+                       unsigned nc, unsigned bit_size,
+                       unsigned offset_bit_size,
+                       LLVMValueRef offset, LLVMValueRef result[4]);
+
+   void (*store_global)(struct lp_build_nir_context *bld_base,
+                        unsigned writemask,
+                        unsigned nc, unsigned bit_size,
+                        unsigned addr_bit_size,
+                        LLVMValueRef addr, LLVMValueRef dst);
+
+   void (*atomic_global)(struct lp_build_nir_context *bld_base,
+                         nir_intrinsic_op op,
+                         unsigned addr_bit_size,
+                         LLVMValueRef addr,
+                         LLVMValueRef val, LLVMValueRef val2,
+                         LLVMValueRef *result);
+
    /* for SSBO and shared memory */
    void (*load_mem)(struct lp_build_nir_context *bld_base,
                     unsigned nc, unsigned bit_size,
index 99f49a942fe71d3e66628d9ae19e9d1d38489c9d..bf03e0a39ef2334c2a7cc22d17ac0375475e2f2d 100644 (file)
@@ -527,6 +527,201 @@ static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
    }
 }
 
+static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   switch (bit_size) {
+   case 8:
+      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
+      break;
+   case 16:
+      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
+      break;
+   case 32:
+   default:
+      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+      break;
+   case 64:
+      addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
+      break;
+   }
+   return addr_ptr;
+}
+
+static void emit_load_global(struct lp_build_nir_context *bld_base,
+                             unsigned nc,
+                             unsigned bit_size,
+                             unsigned addr_bit_size,
+                             LLVMValueRef addr,
+                             LLVMValueRef outval[4])
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   struct lp_build_context *res_bld;
+
+   res_bld = get_int_bld(bld_base, true, bit_size);
+
+   for (unsigned c = 0; c < nc; c++) {
+      LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
+
+      struct lp_build_loop_state loop_state;
+      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+      LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+                                                      loop_state.counter, "");
+      addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
+
+      LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
+
+      LLVMValueRef temp_res;
+      temp_res = LLVMBuildLoad(builder, result, "");
+      temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
+      LLVMBuildStore(builder, temp_res, result);
+      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+                             NULL, LLVMIntUGE);
+      outval[c] = LLVMBuildLoad(builder, result, "");
+   }
+}
+
+static void emit_store_global(struct lp_build_nir_context *bld_base,
+                              unsigned writemask,
+                              unsigned nc, unsigned bit_size,
+                              unsigned addr_bit_size,
+                              LLVMValueRef addr,
+                              LLVMValueRef dst)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+
+   for (unsigned c = 0; c < nc; c++) {
+      if (!(writemask & (1u << c)))
+         continue;
+      LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
+
+      LLVMValueRef exec_mask = mask_vec(bld_base);
+      struct lp_build_loop_state loop_state;
+      lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+      LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
+                                                       loop_state.counter, "");
+
+      LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+                                                      loop_state.counter, "");
+      addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
+      switch (bit_size) {
+      case 32:
+         value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
+         break;
+      case 64:
+         value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
+         break;
+      default:
+         break;
+      }
+      struct lp_build_if_state ifthen;
+
+      LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+      cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+      lp_build_if(&ifthen, gallivm, cond);
+      lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
+      lp_build_endif(&ifthen);
+      lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+                             NULL, LLVMIntUGE);
+   }
+}
+
+static void emit_atomic_global(struct lp_build_nir_context *bld_base,
+                               nir_intrinsic_op nir_op,
+                               unsigned addr_bit_size,
+                               LLVMValueRef addr,
+                               LLVMValueRef val, LLVMValueRef val2,
+                               LLVMValueRef *result)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   LLVMAtomicRMWBinOp op;
+   switch (nir_op) {
+   case nir_intrinsic_global_atomic_add:
+      op = LLVMAtomicRMWBinOpAdd;
+      break;
+   case nir_intrinsic_global_atomic_exchange:
+      op = LLVMAtomicRMWBinOpXchg;
+      break;
+   case nir_intrinsic_global_atomic_and:
+      op = LLVMAtomicRMWBinOpAnd;
+      break;
+   case nir_intrinsic_global_atomic_or:
+      op = LLVMAtomicRMWBinOpOr;
+      break;
+   case nir_intrinsic_global_atomic_xor:
+      op = LLVMAtomicRMWBinOpXor;
+      break;
+   case nir_intrinsic_global_atomic_umin:
+      op = LLVMAtomicRMWBinOpUMin;
+      break;
+   case nir_intrinsic_global_atomic_umax:
+      op = LLVMAtomicRMWBinOpUMax;
+      break;
+   case nir_intrinsic_global_atomic_imin:
+      op = LLVMAtomicRMWBinOpMin;
+      break;
+   case nir_intrinsic_global_atomic_imax:
+      op = LLVMAtomicRMWBinOpMax;
+      break;
+   default:
+      break;
+   }
+
+   LLVMValueRef atom_res = lp_build_alloca(gallivm,
+                                           uint_bld->vec_type, "");
+   LLVMValueRef exec_mask = mask_vec(bld_base);
+   struct lp_build_loop_state loop_state;
+   lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+   LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
+                                                    loop_state.counter, "");
+
+   LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+                                                   loop_state.counter, "");
+   addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
+   struct lp_build_if_state ifthen;
+   LLVMValueRef cond, temp_res;
+   LLVMValueRef scalar;
+   cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+   cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+   lp_build_if(&ifthen, gallivm, cond);
+
+   if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
+      LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
+                                                         loop_state.counter, "");
+      cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
+      scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
+                                      cas_src_ptr,
+                                      LLVMAtomicOrderingSequentiallyConsistent,
+                                      LLVMAtomicOrderingSequentiallyConsistent,
+                                      false);
+      scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
+   } else {
+      scalar = LLVMBuildAtomicRMW(builder, op,
+                                  addr_ptr, value_ptr,
+                                  LLVMAtomicOrderingSequentiallyConsistent,
+                                  false);
+   }
+   temp_res = LLVMBuildLoad(builder, atom_res, "");
+   temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+   LLVMBuildStore(builder, temp_res, atom_res);
+   lp_build_else(&ifthen);
+   temp_res = LLVMBuildLoad(builder, atom_res, "");
+   temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+   LLVMBuildStore(builder, temp_res, atom_res);
+   lp_build_endif(&ifthen);
+   lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+                          NULL, LLVMIntUGE);
+   *result = LLVMBuildLoad(builder, atom_res, "");
+}
+
 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
                           unsigned nc,
                           unsigned bit_size,
@@ -1276,6 +1471,9 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
    bld.bld_base.emit_var_decl = emit_var_decl;
    bld.bld_base.load_ubo = emit_load_ubo;
    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
+   bld.bld_base.load_global = emit_load_global;
+   bld.bld_base.store_global = emit_store_global;
+   bld.bld_base.atomic_global = emit_atomic_global;
    bld.bld_base.tex = emit_tex;
    bld.bld_base.tex_size = emit_tex_size;
    bld.bld_base.bgnloop = bgnloop;
index 1d36f4968cc640c666cef6a10120dd12b9323597..0e029f591224b384cb5086be66c8ada04d02f263 100644 (file)
@@ -169,6 +169,10 @@ struct llvmpipe_context {
 
    /** The LLVMContext to use for LLVM related work */
    LLVMContextRef context;
+
+   int max_global_buffers;
+   struct pipe_resource **global_buffers;
+
 };
 
 
index 83876dd98d4e8182d8b91bac915510a3b6901752..83affd7d429d0bf9472f862990254fbbbbd3fdb9 100644 (file)
@@ -499,6 +499,12 @@ llvmpipe_delete_compute_state(struct pipe_context *pipe,
    struct lp_compute_shader *shader = cs;
    struct lp_cs_variant_list_item *li;
 
+   if (llvmpipe->cs == cs)
+      llvmpipe->cs = NULL;
+   for (unsigned i = 0; i < shader->max_global_buffers; i++)
+      pipe_resource_reference(&shader->global_buffers[i], NULL);
+   FREE(shader->global_buffers);
+
    /* Delete all the variants */
    li = first_elem(&shader->variants);
    while(!at_end(&shader->variants, li)) {
@@ -1249,12 +1255,62 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe,
    llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
 }
 
+static void
+llvmpipe_set_compute_resources(struct pipe_context *pipe,
+                               unsigned start, unsigned count,
+                               struct pipe_surface **resources)
+{
+
+
+}
+
+static void
+llvmpipe_set_global_binding(struct pipe_context *pipe,
+                            unsigned first, unsigned count,
+                            struct pipe_resource **resources,
+                            uint32_t **handles)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_compute_shader *cs = llvmpipe->cs;
+   unsigned i;
+
+   if (first + count > cs->max_global_buffers) {
+      unsigned old_max = cs->max_global_buffers;
+      cs->max_global_buffers = first + count;
+      cs->global_buffers = realloc(cs->global_buffers,
+                                   cs->max_global_buffers * sizeof(cs->global_buffers[0]));
+      if (!cs->global_buffers) {
+         return;
+      }
+
+      memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
+   }
+
+   if (!resources) {
+      for (i = 0; i < count; i++)
+         pipe_resource_reference(&cs->global_buffers[first + i], NULL);
+      return;
+   }
+
+   for (i = 0; i < count; i++) {
+      uint64_t va;
+      uint32_t offset;
+      pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
+      struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
+      offset = *handles[i];
+      va = (uint64_t)((char *)lp_res->data + offset);
+      memcpy(handles[i], &va, sizeof(va));
+   }
+}
+
 void
 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
 {
    llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
    llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
    llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
+   llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
+   llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
    llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
 }
 
index 50f0856832b74bd221f2f068b25beabba99f463f..8f11889d0eca34250be22c18a82ff1fa6fb66379 100644 (file)
@@ -89,6 +89,9 @@ struct lp_compute_shader {
    unsigned no;
    unsigned variants_created;
    unsigned variants_cached;
+
+   int max_global_buffers;
+   struct pipe_resource **global_buffers;
 };
 
 struct lp_cs_exec {