From: Jason Ekstrand Date: Thu, 19 Oct 2017 00:59:47 +0000 (-0700) Subject: spirv: Add support for lowering workgroup access to offsets X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ae54a4f84fb6b440d7a8a57651965a4d9eed725b;p=mesa.git spirv: Add support for lowering workgroup access to offsets Before, we always left workgroup variables as shared nir_variables and let the driver call nir_lower_io. This adds an option to do the lowering directly in spirv_to_nir. To do this, we implicitly assign the variables a std430 layout and then treat them like a UBO or SSBO and immediately lower all the way to an offset. As a side-effect, the spirv_to_nir pass now handles variable pointers for workgroup variables. Reviewed-by: Kristian H. Kristensen --- diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h index f129a205764..eb7146cb89d 100644 --- a/src/compiler/spirv/nir_spirv.h +++ b/src/compiler/spirv/nir_spirv.h @@ -49,6 +49,14 @@ enum nir_spirv_debug_level { }; struct spirv_to_nir_options { + /* Whether or not to lower all workgroup variable access to offsets + * up-front. This means you will _shared intrinsics instead of _var + * for workgroup data access. + * + * This is currently required for full variable pointers support. + */ + bool lower_workgroup_access_to_offsets; + struct { bool float64; bool image_ms_array; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index d4daa53f0e8..5c2f53dc130 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -809,6 +809,64 @@ translate_image_format(struct vtn_builder *b, SpvImageFormat format) } } +static struct vtn_type * +vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type, + uint32_t *size_out, uint32_t *align_out) +{ + switch (type->base_type) { + case vtn_base_type_scalar: { + uint32_t comp_size = glsl_get_bit_size(type->type) / 8; + *size_out = comp_size; + *align_out = comp_size; + return type; + } + + case vtn_base_type_vector: { + uint32_t comp_size = glsl_get_bit_size(type->type) / 8; + assert(type->length > 0 && type->length <= 4); + unsigned align_comps = type->length == 3 ? 4 : type->length; + *size_out = comp_size * type->length, + *align_out = comp_size * align_comps; + return type; + } + + case vtn_base_type_matrix: + case vtn_base_type_array: { + /* We're going to add an array stride */ + type = vtn_type_copy(b, type); + uint32_t elem_size, elem_align; + type->array_element = vtn_type_layout_std430(b, type->array_element, + &elem_size, &elem_align); + type->stride = vtn_align_u32(elem_size, elem_align); + *size_out = type->stride * type->length; + *align_out = elem_align; + return type; + } + + case vtn_base_type_struct: { + /* We're going to add member offsets */ + type = vtn_type_copy(b, type); + uint32_t offset = 0; + uint32_t align = 0; + for (unsigned i = 0; i < type->length; i++) { + uint32_t mem_size, mem_align; + type->members[i] = vtn_type_layout_std430(b, type->members[i], + &mem_size, &mem_align); + offset = vtn_align_u32(offset, mem_align); + type->offsets[i] = offset; + offset += mem_size; + align = MAX2(align, mem_align); + } + *size_out = offset; + *align_out = align; + return type; + } + + default: + unreachable("Invalid SPIR-V type for std430"); + } +} + static void vtn_handle_type(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -958,6 +1016,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, */ val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2); } + + if (storage_class == SpvStorageClassWorkgroup && + b->options->lower_workgroup_access_to_offsets) { + uint32_t size, align; + val->type->deref = vtn_type_layout_std430(b, val->type->deref, + &size, &align); + val->type->length = size; + val->type->align = align; + /* These can actually be stored to nir_variables and used as SSA + * values so they need a real glsl_type. + */ + val->type->type = glsl_uint_type(); + } break; } @@ -2181,6 +2252,32 @@ get_ssbo_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) } } +static nir_intrinsic_op +get_shared_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) +{ + switch (opcode) { + case SpvOpAtomicLoad: return nir_intrinsic_load_shared; + case SpvOpAtomicStore: return nir_intrinsic_store_shared; +#define OP(S, N) case SpvOp##S: return nir_intrinsic_shared_##N; + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_imin) + OP(AtomicUMin, atomic_umin) + OP(AtomicSMax, atomic_imax) + OP(AtomicUMax, atomic_umax) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + vtn_fail("Invalid shared atomic"); + } +} + static nir_intrinsic_op get_var_nir_atomic_op(struct vtn_builder *b, SpvOp opcode) { @@ -2246,7 +2343,8 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, SpvMemorySemanticsMask semantics = w[5]; */ - if (ptr->mode == vtn_variable_mode_workgroup) { + if (ptr->mode == vtn_variable_mode_workgroup && + !b->options->lower_workgroup_access_to_offsets) { nir_deref_var *deref = vtn_pointer_to_deref(b, ptr); const struct glsl_type *deref_type = nir_deref_tail(&deref->deref)->type; nir_intrinsic_op op = get_var_nir_atomic_op(b, opcode); @@ -2286,27 +2384,36 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, } } else { - vtn_assert(ptr->mode == vtn_variable_mode_ssbo); nir_ssa_def *offset, *index; offset = vtn_pointer_to_offset(b, ptr, &index, NULL); - nir_intrinsic_op op = get_ssbo_nir_atomic_op(b, opcode); + nir_intrinsic_op op; + if (ptr->mode == vtn_variable_mode_ssbo) { + op = get_ssbo_nir_atomic_op(b, opcode); + } else { + vtn_assert(ptr->mode == vtn_variable_mode_workgroup && + b->options->lower_workgroup_access_to_offsets); + op = get_shared_nir_atomic_op(b, opcode); + } atomic = nir_intrinsic_instr_create(b->nb.shader, op); + int src = 0; switch (opcode) { case SpvOpAtomicLoad: atomic->num_components = glsl_get_vector_elements(ptr->type->type); - atomic->src[0] = nir_src_for_ssa(index); - atomic->src[1] = nir_src_for_ssa(offset); + if (ptr->mode == vtn_variable_mode_ssbo) + atomic->src[src++] = nir_src_for_ssa(index); + atomic->src[src++] = nir_src_for_ssa(offset); break; case SpvOpAtomicStore: atomic->num_components = glsl_get_vector_elements(ptr->type->type); nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1); - atomic->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); - atomic->src[1] = nir_src_for_ssa(index); - atomic->src[2] = nir_src_for_ssa(offset); + atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); + if (ptr->mode == vtn_variable_mode_ssbo) + atomic->src[src++] = nir_src_for_ssa(index); + atomic->src[src++] = nir_src_for_ssa(offset); break; case SpvOpAtomicExchange: @@ -2323,9 +2430,10 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, case SpvOpAtomicAnd: case SpvOpAtomicOr: case SpvOpAtomicXor: - atomic->src[0] = nir_src_for_ssa(index); - atomic->src[1] = nir_src_for_ssa(offset); - fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); + if (ptr->mode == vtn_variable_mode_ssbo) + atomic->src[src++] = nir_src_for_ssa(index); + atomic->src[src++] = nir_src_for_ssa(offset); + fill_common_atomic_sources(b, opcode, w, &atomic->src[src]); break; default: diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index 0f92a74ec84..5f140b4618f 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -276,7 +276,10 @@ struct vtn_type { /* The value that declares this type. Used for finding decorations */ struct vtn_value *val; - /* Specifies the length of complex types. */ + /* Specifies the length of complex types. + * + * For Workgroup pointers, this is the size of the referenced type. + */ unsigned length; /* for arrays, matrices and pointers, the array stride */ @@ -327,6 +330,9 @@ struct vtn_type { /* Storage class for pointers */ SpvStorageClass storage_class; + + /* Required alignment for pointers */ + uint32_t align; }; /* Members for image types */ @@ -441,6 +447,8 @@ struct vtn_variable { nir_variable *var; nir_variable **members; + int shared_location; + /** * In some early released versions of GLSLang, it implemented all function * calls by making copies of all parameters into temporary variables and @@ -686,6 +694,13 @@ void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, const uint32_t *words, unsigned count); +static inline uint32_t +vtn_align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + static inline uint64_t vtn_u64_literal(const uint32_t *w) { diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 3369ae05a95..06eab2dc15c 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -62,7 +62,9 @@ vtn_pointer_uses_ssa_offset(struct vtn_builder *b, struct vtn_pointer *ptr) { return ptr->mode == vtn_variable_mode_ubo || - ptr->mode == vtn_variable_mode_ssbo; + ptr->mode == vtn_variable_mode_ssbo || + (ptr->mode == vtn_variable_mode_workgroup && + b->options->lower_workgroup_access_to_offsets); } static bool @@ -71,7 +73,9 @@ vtn_pointer_is_external_block(struct vtn_builder *b, { return ptr->mode == vtn_variable_mode_ssbo || ptr->mode == vtn_variable_mode_ubo || - ptr->mode == vtn_variable_mode_push_constant; + ptr->mode == vtn_variable_mode_push_constant || + (ptr->mode == vtn_variable_mode_workgroup && + b->options->lower_workgroup_access_to_offsets); } /* Dereference the given base pointer by the access chain */ @@ -167,7 +171,8 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b, /* We need ptr_type for the stride */ vtn_assert(base->ptr_type); /* This must be a pointer to an actual element somewhere */ - vtn_assert(block_index && offset); + vtn_assert(offset); + vtn_assert(block_index || base->mode == vtn_variable_mode_workgroup); /* We need at least one element in the chain */ vtn_assert(deref_chain->length >= 1); @@ -183,6 +188,7 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b, vtn_assert(!block_index); vtn_assert(base->var); + vtn_assert(base->ptr_type); switch (base->mode) { case vtn_variable_mode_ubo: case vtn_variable_mode_ssbo: @@ -201,6 +207,22 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b, offset = nir_imm_int(&b->nb, 0); break; + case vtn_variable_mode_workgroup: + /* Assign location on first use so that we don't end up bloating SLM + * address space for variables which are never statically used. + */ + if (base->var->shared_location < 0) { + assert(base->ptr_type->length > 0 && base->ptr_type->align > 0); + b->shader->num_shared = vtn_align_u32(b->shader->num_shared, + base->ptr_type->align); + base->var->shared_location = b->shader->num_shared; + b->shader->num_shared += base->ptr_type->length; + } + + block_index = NULL; + offset = nir_imm_int(&b->nb, base->var->shared_location); + break; + default: vtn_fail("Invalid offset pointer mode"); } @@ -837,6 +859,9 @@ vtn_block_load(struct vtn_builder *b, struct vtn_pointer *src) vtn_access_chain_get_offset_size(b, src->chain, src->var->type, &access_offset, &access_size); break; + case vtn_variable_mode_workgroup: + op = nir_intrinsic_load_shared; + break; default: vtn_fail("Invalid block variable mode"); } @@ -861,6 +886,9 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, case vtn_variable_mode_ssbo: op = nir_intrinsic_store_ssbo; break; + case vtn_variable_mode_workgroup: + op = nir_intrinsic_store_shared; + break; default: vtn_fail("Invalid block variable mode"); } @@ -946,7 +974,8 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_pointer *dest) { if (vtn_pointer_is_external_block(b, dest)) { - vtn_assert(dest->mode == vtn_variable_mode_ssbo); + vtn_assert(dest->mode == vtn_variable_mode_ssbo || + dest->mode == vtn_variable_mode_workgroup); vtn_block_store(b, src, dest); } else { _vtn_variable_load_store(b, false, dest, &src); @@ -1526,7 +1555,7 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr) ptr->mode == vtn_variable_mode_ssbo); return nir_vec2(&b->nb, ptr->block_index, ptr->offset); } else { - vtn_fail("Invalid pointer"); + vtn_assert(ptr->mode == vtn_variable_mode_workgroup); return ptr->offset; } } @@ -1555,7 +1584,7 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa, ptr->offset = nir_channel(&b->nb, ssa, 1); } else { vtn_assert(ssa->num_components == 1); - unreachable("Invalid pointer"); + vtn_assert(ptr->mode == vtn_variable_mode_workgroup); ptr->block_index = NULL; ptr->offset = ssa; } @@ -1630,7 +1659,6 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, case vtn_variable_mode_global: case vtn_variable_mode_image: case vtn_variable_mode_sampler: - case vtn_variable_mode_workgroup: /* For these, we create the variable normally */ var->var = rzalloc(b->shader, nir_variable); var->var->name = ralloc_strdup(var->var, val->name); @@ -1648,6 +1676,18 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, } break; + case vtn_variable_mode_workgroup: + if (b->options->lower_workgroup_access_to_offsets) { + var->shared_location = -1; + } else { + /* Create the variable normally */ + var->var = rzalloc(b->shader, nir_variable); + var->var->name = ralloc_strdup(var->var, val->name); + var->var->type = var->type->type; + var->var->data.mode = nir_var_shared; + } + break; + case vtn_variable_mode_input: case vtn_variable_mode_output: { /* In order to know whether or not we're a per-vertex inout, we need