From 264663d55d321225a4962073ce4b7389d3d42287 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 10 Dec 2019 14:53:51 +1000 Subject: [PATCH] gallivm/llvmpipe: add support for global operations. Acked-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 53 +++++ src/gallium/auxiliary/gallivm/lp_bld_nir.h | 18 ++ .../auxiliary/gallivm/lp_bld_nir_soa.c | 198 ++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_context.h | 4 + src/gallium/drivers/llvmpipe/lp_state_cs.c | 56 +++++ src/gallium/drivers/llvmpipe/lp_state_cs.h | 3 + 6 files changed, 332 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 9c378d174d8..b1a812d9193 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1212,6 +1212,41 @@ static void visit_load_kernel_input(struct lp_build_nir_context *bld_base, offset_is_uniform, offset, result); } +static void visit_load_global(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, LLVMValueRef result[4]) +{ + LLVMValueRef addr = get_src(bld_base, instr->src[0]); + bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), + nir_src_bit_size(instr->src[0]), + addr, result); +} + +static void visit_store_global(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef val = get_src(bld_base, instr->src[0]); + int nc = nir_src_num_components(instr->src[0]); + int bitsize = nir_src_bit_size(instr->src[0]); + LLVMValueRef addr = get_src(bld_base, instr->src[1]); + int addr_bitsize = nir_src_bit_size(instr->src[1]); + int writemask = instr->const_index[0]; + bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val); +} + +static void visit_global_atomic(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + LLVMValueRef addr = get_src(bld_base, instr->src[0]); + LLVMValueRef val = get_src(bld_base, instr->src[1]); + LLVMValueRef val2 = NULL; + int addr_bitsize = nir_src_bit_size(instr->src[0]); + if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap) + val2 = get_src(bld_base, instr->src[2]); + + bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize, addr, val, val2, &result[0]); +} + static void visit_intrinsic(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr) { @@ -1318,6 +1353,24 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, break; case nir_intrinsic_load_kernel_input: visit_load_kernel_input(bld_base, instr, result); + break; + case nir_intrinsic_load_global: + visit_load_global(bld_base, instr, result); + break; + case nir_intrinsic_store_global: + visit_store_global(bld_base, instr); + break; + case nir_intrinsic_global_atomic_add: + case nir_intrinsic_global_atomic_imin: + case nir_intrinsic_global_atomic_umin: + case nir_intrinsic_global_atomic_imax: + case nir_intrinsic_global_atomic_umax: + case nir_intrinsic_global_atomic_and: + case nir_intrinsic_global_atomic_or: + case nir_intrinsic_global_atomic_xor: + case nir_intrinsic_global_atomic_exchange: + case nir_intrinsic_global_atomic_comp_swap: + visit_global_atomic(bld_base, instr, result); break; default: assert(0); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h index dbfed24074b..c8a80428e7d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -72,6 +72,24 @@ struct lp_build_nir_context bool offset_is_uniform, LLVMValueRef offset, LLVMValueRef result[4]); + void (*load_global)(struct lp_build_nir_context *bld_base, + unsigned nc, unsigned bit_size, + unsigned offset_bit_size, + LLVMValueRef offset, LLVMValueRef result[4]); + + void (*store_global)(struct lp_build_nir_context *bld_base, + unsigned writemask, + unsigned nc, unsigned bit_size, + unsigned addr_bit_size, + LLVMValueRef addr, LLVMValueRef dst); + + void (*atomic_global)(struct lp_build_nir_context *bld_base, + nir_intrinsic_op op, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef val, LLVMValueRef val2, + LLVMValueRef *result); + /* for SSBO and shared memory */ void (*load_mem)(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 99f49a942fe..bf03e0a39ef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -527,6 +527,201 @@ static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, } } +static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size) +{ + LLVMBuilderRef builder = gallivm->builder; + switch (bit_size) { + case 8: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); + break; + case 16: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), ""); + break; + case 32: + default: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + break; + case 64: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), ""); + break; + } + return addr_ptr; +} + +static void emit_load_global(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef outval[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *res_bld; + + res_bld = get_int_bld(bld_base, true, bit_size); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, ""); + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + loop_state.counter, ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); + + LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c)); + + LLVMValueRef temp_res; + temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + outval[c] = LLVMBuildLoad(builder, result, ""); + } +} + +static void emit_store_global(struct lp_build_nir_context *bld_base, + unsigned writemask, + unsigned nc, unsigned bit_size, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef dst) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + + for (unsigned c = 0; c < nc; c++) { + if (!(writemask & (1u << c))) + continue; + LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + + LLVMValueRef exec_mask = mask_vec(bld_base); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + loop_state.counter, ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); + switch (bit_size) { + case 32: + value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), ""); + break; + case 64: + value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), ""); + break; + default: + break; + } + struct lp_build_if_state ifthen; + + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + } +} + +static void emit_atomic_global(struct lp_build_nir_context *bld_base, + nir_intrinsic_op nir_op, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef val, LLVMValueRef val2, + LLVMValueRef *result) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMAtomicRMWBinOp op; + switch (nir_op) { + case nir_intrinsic_global_atomic_add: + op = LLVMAtomicRMWBinOpAdd; + break; + case nir_intrinsic_global_atomic_exchange: + op = LLVMAtomicRMWBinOpXchg; + break; + case nir_intrinsic_global_atomic_and: + op = LLVMAtomicRMWBinOpAnd; + break; + case nir_intrinsic_global_atomic_or: + op = LLVMAtomicRMWBinOpOr; + break; + case nir_intrinsic_global_atomic_xor: + op = LLVMAtomicRMWBinOpXor; + break; + case nir_intrinsic_global_atomic_umin: + op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_global_atomic_umax: + op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_global_atomic_imin: + op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_global_atomic_imax: + op = LLVMAtomicRMWBinOpMax; + break; + default: + break; + } + + LLVMValueRef atom_res = lp_build_alloca(gallivm, + uint_bld->vec_type, ""); + LLVMValueRef exec_mask = mask_vec(bld_base); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + loop_state.counter, ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32); + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + LLVMValueRef scalar; + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + if (nir_op == nir_intrinsic_global_atomic_comp_swap) { + LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, + loop_state.counter, ""); + cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, ""); + scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr, + cas_src_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); + } else { + scalar = LLVMBuildAtomicRMW(builder, op, + addr_ptr, value_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + false); + } + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + *result = LLVMBuildLoad(builder, atom_res, ""); +} + static void emit_load_ubo(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, @@ -1276,6 +1471,9 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.emit_var_decl = emit_var_decl; bld.bld_base.load_ubo = emit_load_ubo; bld.bld_base.load_kernel_arg = emit_load_kernel_arg; + bld.bld_base.load_global = emit_load_global; + bld.bld_base.store_global = emit_store_global; + bld.bld_base.atomic_global = emit_atomic_global; bld.bld_base.tex = emit_tex; bld.bld_base.tex_size = emit_tex_size; bld.bld_base.bgnloop = bgnloop; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 1d36f4968cc..0e029f59122 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -169,6 +169,10 @@ struct llvmpipe_context { /** The LLVMContext to use for LLVM related work */ LLVMContextRef context; + + int max_global_buffers; + struct pipe_resource **global_buffers; + }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index 83876dd98d4..83affd7d429 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -499,6 +499,12 @@ llvmpipe_delete_compute_state(struct pipe_context *pipe, struct lp_compute_shader *shader = cs; struct lp_cs_variant_list_item *li; + if (llvmpipe->cs == cs) + llvmpipe->cs = NULL; + for (unsigned i = 0; i < shader->max_global_buffers; i++) + pipe_resource_reference(&shader->global_buffers[i], NULL); + FREE(shader->global_buffers); + /* Delete all the variants */ li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { @@ -1249,12 +1255,62 @@ static void llvmpipe_launch_grid(struct pipe_context *pipe, llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2]; } +static void +llvmpipe_set_compute_resources(struct pipe_context *pipe, + unsigned start, unsigned count, + struct pipe_surface **resources) +{ + + +} + +static void +llvmpipe_set_global_binding(struct pipe_context *pipe, + unsigned first, unsigned count, + struct pipe_resource **resources, + uint32_t **handles) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_compute_shader *cs = llvmpipe->cs; + unsigned i; + + if (first + count > cs->max_global_buffers) { + unsigned old_max = cs->max_global_buffers; + cs->max_global_buffers = first + count; + cs->global_buffers = realloc(cs->global_buffers, + cs->max_global_buffers * sizeof(cs->global_buffers[0])); + if (!cs->global_buffers) { + return; + } + + memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0])); + } + + if (!resources) { + for (i = 0; i < count; i++) + pipe_resource_reference(&cs->global_buffers[first + i], NULL); + return; + } + + for (i = 0; i < count; i++) { + uint64_t va; + uint32_t offset; + pipe_resource_reference(&cs->global_buffers[first + i], resources[i]); + struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]); + offset = *handles[i]; + va = (uint64_t)((char *)lp_res->data + offset); + memcpy(handles[i], &va, sizeof(va)); + } +} + void llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe) { llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state; llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state; llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state; + llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources; + llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding; llvmpipe->pipe.launch_grid = llvmpipe_launch_grid; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.h b/src/gallium/drivers/llvmpipe/lp_state_cs.h index 50f0856832b..8f11889d0ec 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.h @@ -89,6 +89,9 @@ struct lp_compute_shader { unsigned no; unsigned variants_created; unsigned variants_cached; + + int max_global_buffers; + struct pipe_resource **global_buffers; }; struct lp_cs_exec { -- 2.30.2