llvmpipe/gallivm: add kernel inputs
authorDave Airlie <airlied@redhat.com>
Tue, 10 Dec 2019 04:45:19 +0000 (14:45 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 27 Dec 2019 03:22:40 +0000 (13:22 +1000)
compute shaders need kernel input support

Acked-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_nir.c
src/gallium/auxiliary/gallivm/lp_bld_nir.h
src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/drivers/llvmpipe/lp_jit.c
src/gallium/drivers/llvmpipe/lp_jit.h
src/gallium/drivers/llvmpipe/lp_state_cs.c
src/gallium/drivers/llvmpipe/lp_state_cs.h

index 770352b8daa643007233d61a446eb7d89d8a5c5d..a3b57e86a6fd1ef4771c31a9af2ae5a690e1240d 100644 (file)
@@ -1152,6 +1152,17 @@ static void visit_discard(struct lp_build_nir_context *bld_base,
    bld_base->discard(bld_base, cond);
 }
 
+static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
+                                    nir_intrinsic_instr *instr, LLVMValueRef result[4])
+{
+   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+
+   bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
+   bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+                             nir_src_bit_size(instr->src[0]),
+                             offset_is_uniform, offset, result);
+}
+
 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
                             nir_intrinsic_instr *instr)
 {
@@ -1254,6 +1265,9 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
       break;
    case nir_intrinsic_memory_barrier:
       break;
+   case nir_intrinsic_load_kernel_input:
+      visit_load_kernel_input(bld_base, instr, result);
+      break;
    default:
       assert(0);
       break;
index 194b109760c47af59952e581b6681e2d49e3c1d6..9a20d14834d66f93a3cc0cbd26a68c8616f8445c 100644 (file)
@@ -61,6 +61,13 @@ struct lp_build_nir_context
                     bool offset_is_uniform,
                     LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[4]);
 
+   void (*load_kernel_arg)(struct lp_build_nir_context *bld_base,
+                           unsigned nc,
+                           unsigned bit_size,
+                           unsigned offset_bit_size,
+                           bool offset_is_uniform,
+                           LLVMValueRef offset, LLVMValueRef result[4]);
+
    /* for SSBO and shared memory */
    void (*load_mem)(struct lp_build_nir_context *bld_base,
                     unsigned nc, unsigned bit_size,
@@ -186,6 +193,8 @@ struct lp_build_nir_soa_context
     * set. The inputs[] array above is unused then.
     */
    LLVMValueRef inputs_array;
+
+   LLVMValueRef kernel_args_ptr;
 };
 
 bool
index 4d4408bacb3d66e7e3d8f9475c3097126dff7d4e..0c848db45561651eee00f086f1e174b488237640 100644 (file)
@@ -488,6 +488,45 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base,
    }
 }
 
+static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
+                                 unsigned nc,
+                                 unsigned bit_size,
+                                 unsigned offset_bit_size,
+                                 bool offset_is_uniform,
+                                 LLVMValueRef offset,
+                                 LLVMValueRef result[4])
+{
+   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
+   LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
+   unsigned size_shift = 0;
+   struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
+   if (bit_size == 16)
+      size_shift = 1;
+   else if (bit_size == 32)
+      size_shift = 2;
+   else if (bit_size == 64)
+      size_shift = 3;
+   if (size_shift)
+      offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
+
+   LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
+   kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
+
+   if (offset_is_uniform) {
+      offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
+
+      for (unsigned c = 0; c < nc; c++) {
+         LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
+
+         LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
+         result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
+      }
+   }
+}
+
 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
                           unsigned nc,
                           unsigned bit_size,
@@ -1205,6 +1244,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
    bld.bld_base.store_reg = emit_store_reg;
    bld.bld_base.emit_var_decl = emit_var_decl;
    bld.bld_base.load_ubo = emit_load_ubo;
+   bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
    bld.bld_base.tex = emit_tex;
    bld.bld_base.tex_size = emit_tex_size;
    bld.bld_base.bgnloop = bgnloop;
@@ -1241,7 +1281,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
    bld.image = params->image;
    bld.shared_ptr = params->shared_ptr;
    bld.coro = params->coro;
-
+   bld.kernel_args_ptr = params->kernel_args;
    bld.indirects = 0;
    if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
       bld.indirects |= nir_var_shader_in;
index 5019c1dd8f6e3ec2f6599602b06a496a947a6d8b..63a5bc3d621df664b92ed0ada10e9b31a6144f26 100644 (file)
@@ -256,6 +256,7 @@ struct lp_build_tgsi_params {
    const struct lp_build_image_soa *image;
    LLVMValueRef shared_ptr;
    const struct lp_build_coro_suspend_info *coro;
+   LLVMValueRef kernel_args;
 };
 
 void
index 649f1ea0148f5dfe1c7f67668affec74bfbb235d..f4339e4c5dc58c38807c0a339a481712613f9f10 100644 (file)
@@ -362,6 +362,8 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp)
 
       elem_types[LP_JIT_CS_CTX_SHARED_SIZE] = LLVMInt32TypeInContext(lc);
 
+      elem_types[LP_JIT_CS_CTX_KERNEL_ARGS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+
       cs_context_type = LLVMStructTypeInContext(lc, elem_types,
                                              ARRAY_SIZE(elem_types), 0);
 
@@ -389,6 +391,9 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp)
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, shared_size,
                              gallivm->target, cs_context_type,
                              LP_JIT_CS_CTX_SHARED_SIZE);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, kernel_args,
+                             gallivm->target, cs_context_type,
+                             LP_JIT_CS_CTX_KERNEL_ARGS);
       LP_CHECK_STRUCT_SIZE(struct lp_jit_cs_context,
                            gallivm->target, cs_context_type);
 
index 8c89cad29394f4fbd495668d6d50804374a43256..0e549a05c0a961877303d8841c28f1b26cbcd410 100644 (file)
@@ -324,6 +324,8 @@ struct lp_jit_cs_context
    const uint32_t *ssbos[LP_MAX_TGSI_SHADER_BUFFERS];
    int num_ssbos[LP_MAX_TGSI_SHADER_BUFFERS];
 
+   void *kernel_args;
+
    uint32_t shared_size;
 };
 
@@ -339,6 +341,7 @@ enum {
    LP_JIT_CS_CTX_IMAGES,
    LP_JIT_CS_CTX_SSBOS,
    LP_JIT_CS_CTX_NUM_SSBOS,
+   LP_JIT_CS_CTX_KERNEL_ARGS,
    LP_JIT_CS_CTX_SHARED_SIZE,
    LP_JIT_CS_CTX_COUNT
 };
@@ -367,6 +370,9 @@ enum {
 #define lp_jit_cs_context_shared_size(_gallivm, _ptr) \
    lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size")
 
+#define lp_jit_cs_context_kernel_args(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, LP_JIT_CS_CTX_KERNEL_ARGS, "kernel_args")
+
 
 typedef void
 (*lp_jit_cs_func)(const struct lp_jit_cs_context *context,
index e98927acf7bf62801bdb8a436fcf952fc0245ebd..f4f021742d69aa288afb2ac7571a6b90afcdaf89 100644 (file)
@@ -277,6 +277,7 @@ generate_compute(struct llvmpipe_context *lp,
       LLVMValueRef consts_ptr, num_consts_ptr;
       LLVMValueRef ssbo_ptr, num_ssbo_ptr;
       LLVMValueRef shared_ptr;
+      LLVMValueRef kernel_args_ptr;
       struct lp_build_mask_context mask;
       struct lp_bld_tgsi_system_values system_values;
 
@@ -285,6 +286,8 @@ generate_compute(struct llvmpipe_context *lp,
       num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr);
       ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr);
       num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr);
+      kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm, context_ptr);
+
       shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr);
 
       /* these are coroutine entrypoint necessities */
@@ -360,6 +363,7 @@ generate_compute(struct llvmpipe_context *lp,
       params.image = image;
       params.shared_ptr = shared_ptr;
       params.coro = &coro_info;
+      params.kernel_args = kernel_args_ptr;
 
       if (shader->base.type == PIPE_SHADER_IR_TGSI)
          lp_build_tgsi_soa(gallivm, shader->base.tokens, &params, NULL);
@@ -1093,7 +1097,7 @@ update_csctx_ssbo(struct llvmpipe_context *llvmpipe)
 }
 
 static void
-llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe)
+llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, void *input)
 {
    if (llvmpipe->cs_dirty & (LP_CSNEW_CS))
       llvmpipe_update_cs(llvmpipe);
@@ -1127,6 +1131,12 @@ llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe)
                               ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]),
                               llvmpipe->images[PIPE_SHADER_COMPUTE]);
 
+   if (input) {
+      struct lp_cs_context *csctx = llvmpipe->csctx;
+      csctx->input = input;
+      csctx->cs.current.jit_context.kernel_args = input;
+   }
+
    llvmpipe->cs_dirty = 0;
 }
 
@@ -1193,7 +1203,7 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe,
 
    memset(&job_info, 0, sizeof(job_info));
 
-   llvmpipe_cs_update_derived(llvmpipe);
+   llvmpipe_cs_update_derived(llvmpipe, info->input);
 
    fill_grid_size(pipe, info, job_info.grid_size);
 
index 4bc434edd28bfb6e29085d9315135aaa2c03fb3f..50f0856832b74bd221f2f068b25beabba99f463f 100644 (file)
@@ -120,6 +120,8 @@ struct lp_cs_context {
    struct {
       struct pipe_image_view current;
    } images[LP_MAX_TGSI_SHADER_IMAGES];
+
+   void *input;
 };
 
 struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe);