From 258b9bc02eb7069a8f85cbf568e73c788da819a3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 10 Dec 2019 14:45:19 +1000 Subject: [PATCH] llvmpipe/gallivm: add kernel inputs compute shaders need kernel input support Acked-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 14 +++++++ src/gallium/auxiliary/gallivm/lp_bld_nir.h | 9 ++++ .../auxiliary/gallivm/lp_bld_nir_soa.c | 42 ++++++++++++++++++- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 + src/gallium/drivers/llvmpipe/lp_jit.c | 5 +++ src/gallium/drivers/llvmpipe/lp_jit.h | 6 +++ src/gallium/drivers/llvmpipe/lp_state_cs.c | 14 ++++++- src/gallium/drivers/llvmpipe/lp_state_cs.h | 2 + 8 files changed, 90 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 770352b8daa..a3b57e86a6f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1152,6 +1152,17 @@ static void visit_discard(struct lp_build_nir_context *bld_base, bld_base->discard(bld_base, cond); } +static void visit_load_kernel_input(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, LLVMValueRef result[4]) +{ + LLVMValueRef offset = get_src(bld_base, instr->src[0]); + + bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]); + bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), + nir_src_bit_size(instr->src[0]), + offset_is_uniform, offset, result); +} + static void visit_intrinsic(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr) { @@ -1254,6 +1265,9 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, break; case nir_intrinsic_memory_barrier: break; + case nir_intrinsic_load_kernel_input: + visit_load_kernel_input(bld_base, instr, result); + break; default: assert(0); break; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index 194b109760c..9a20d14834d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -61,6 +61,13 @@ struct lp_build_nir_context bool offset_is_uniform, LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[4]); + void (*load_kernel_arg)(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + unsigned offset_bit_size, + bool offset_is_uniform, + LLVMValueRef offset, LLVMValueRef result[4]); + /* for SSBO and shared memory */ void (*load_mem)(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, @@ -186,6 +193,8 @@ struct lp_build_nir_soa_context * set. The inputs[] array above is unused then. */ LLVMValueRef inputs_array; + + LLVMValueRef kernel_args_ptr; }; bool diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 4d4408bacb3..0c848db4556 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -488,6 +488,45 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base, } } +static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + unsigned offset_bit_size, + bool offset_is_uniform, + LLVMValueRef offset, + LLVMValueRef result[4]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size); + LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr; + unsigned size_shift = 0; + struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size); + if (bit_size == 16) + size_shift = 1; + else if (bit_size == 32) + size_shift = 2; + else if (bit_size == 64) + size_shift = 3; + if (size_shift) + offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift)); + + LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0); + kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, ""); + + if (offset_is_uniform) { + offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), ""); + + LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset); + result[c] = lp_build_broadcast_scalar(bld_broad, scalar); + } + } +} + static void emit_load_ubo(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, @@ -1205,6 +1244,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.store_reg = emit_store_reg; bld.bld_base.emit_var_decl = emit_var_decl; bld.bld_base.load_ubo = emit_load_ubo; + bld.bld_base.load_kernel_arg = emit_load_kernel_arg; bld.bld_base.tex = emit_tex; bld.bld_base.tex_size = emit_tex_size; bld.bld_base.bgnloop = bgnloop; @@ -1241,7 +1281,7 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.image = params->image; bld.shared_ptr = params->shared_ptr; bld.coro = params->coro; - + bld.kernel_args_ptr = params->kernel_args; bld.indirects = 0; if (params->info->indirect_files & (1 << TGSI_FILE_INPUT)) bld.indirects |= nir_var_shader_in; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 5019c1dd8f6..63a5bc3d621 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -256,6 +256,7 @@ struct lp_build_tgsi_params { const struct lp_build_image_soa *image; LLVMValueRef shared_ptr; const struct lp_build_coro_suspend_info *coro; + LLVMValueRef kernel_args; }; void diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 649f1ea0148..f4339e4c5dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -362,6 +362,8 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp) elem_types[LP_JIT_CS_CTX_SHARED_SIZE] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_CS_CTX_KERNEL_ARGS] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + cs_context_type = LLVMStructTypeInContext(lc, elem_types, ARRAY_SIZE(elem_types), 0); @@ -389,6 +391,9 @@ lp_jit_create_cs_types(struct lp_compute_shader_variant *lp) LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, shared_size, gallivm->target, cs_context_type, LP_JIT_CS_CTX_SHARED_SIZE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_cs_context, kernel_args, + gallivm->target, cs_context_type, + LP_JIT_CS_CTX_KERNEL_ARGS); LP_CHECK_STRUCT_SIZE(struct lp_jit_cs_context, gallivm->target, cs_context_type); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 8c89cad2939..0e549a05c0a 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -324,6 +324,8 @@ struct lp_jit_cs_context const uint32_t *ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; int num_ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; + void *kernel_args; + uint32_t shared_size; }; @@ -339,6 +341,7 @@ enum { LP_JIT_CS_CTX_IMAGES, LP_JIT_CS_CTX_SSBOS, LP_JIT_CS_CTX_NUM_SSBOS, + LP_JIT_CS_CTX_KERNEL_ARGS, LP_JIT_CS_CTX_SHARED_SIZE, LP_JIT_CS_CTX_COUNT }; @@ -367,6 +370,9 @@ enum { #define lp_jit_cs_context_shared_size(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size") +#define lp_jit_cs_context_kernel_args(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CS_CTX_KERNEL_ARGS, "kernel_args") + typedef void (*lp_jit_cs_func)(const struct lp_jit_cs_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index e98927acf7b..f4f021742d6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -277,6 +277,7 @@ generate_compute(struct llvmpipe_context *lp, LLVMValueRef consts_ptr, num_consts_ptr; LLVMValueRef ssbo_ptr, num_ssbo_ptr; LLVMValueRef shared_ptr; + LLVMValueRef kernel_args_ptr; struct lp_build_mask_context mask; struct lp_bld_tgsi_system_values system_values; @@ -285,6 +286,8 @@ generate_compute(struct llvmpipe_context *lp, num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr); ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr); num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr); + kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm, context_ptr); + shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr); /* these are coroutine entrypoint necessities */ @@ -360,6 +363,7 @@ generate_compute(struct llvmpipe_context *lp, params.image = image; params.shared_ptr = shared_ptr; params.coro = &coro_info; + params.kernel_args = kernel_args_ptr; if (shader->base.type == PIPE_SHADER_IR_TGSI) lp_build_tgsi_soa(gallivm, shader->base.tokens, ¶ms, NULL); @@ -1093,7 +1097,7 @@ update_csctx_ssbo(struct llvmpipe_context *llvmpipe) } static void -llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe) +llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, void *input) { if (llvmpipe->cs_dirty & (LP_CSNEW_CS)) llvmpipe_update_cs(llvmpipe); @@ -1127,6 +1131,12 @@ llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe) ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]), llvmpipe->images[PIPE_SHADER_COMPUTE]); + if (input) { + struct lp_cs_context *csctx = llvmpipe->csctx; + csctx->input = input; + csctx->cs.current.jit_context.kernel_args = input; + } + llvmpipe->cs_dirty = 0; } @@ -1193,7 +1203,7 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe, memset(&job_info, 0, sizeof(job_info)); - llvmpipe_cs_update_derived(llvmpipe); + llvmpipe_cs_update_derived(llvmpipe, info->input); fill_grid_size(pipe, info, job_info.grid_size); diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.h b/src/gallium/drivers/llvmpipe/lp_state_cs.h index 4bc434edd28..50f0856832b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.h @@ -120,6 +120,8 @@ struct lp_cs_context { struct { struct pipe_image_view current; } images[LP_MAX_TGSI_SHADER_IMAGES]; + + void *input; }; struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe); -- 2.30.2