From fd73ed1bd7e4d5a6af05d29908d6c4e2cca868b5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 9 Apr 2019 20:18:11 +0100 Subject: [PATCH] nir: add nir_lower_to_explicit() v2: use glsl_type_size_align_func v2: move get_explicit_type() to glsl_types.cpp/nir_types.cpp v2: use align() instead of util_align_npot() v2: pack arrays a bit tighter v2: rename mem_* to field_* v2: don't attempt to handle when struct offsets are already set v2: use column_type() instead of recreating it v2: use a branch instead of |= in nir_lower_to_explicit_impl() v2: assign locations to variables and update shared_size and num_shared v2: allow the pass to be used with nir_var_{shader_temp,function_temp} v4: rebase v5: add TODO v5: small formatting changes v5: remove incorrect assert in get_explicit_type() v5: rename to nir_lower_vars_to_explicit_types v5: correctly update progress when only variables are updated v5: rename get_explicit_type() to get_explicit_shared_type() v5: add comment explaining how get_explicit_shared_type() is different v5: update cast strides v6: update progress when lowering nir_var_function_temp variables v6: formatting changes v6: add more detailed documentation comment for get_explicit_shared_type v6: rename get_explicit_shared_type to get_explicit_type_for_size_align v7: fix comment in nir_lower_vars_to_explicit_types_impl() Signed-off-by: Rhys Perry Reviewed-by: Caio Marcelo de Oliveira Filho (v5) Reviewed-by: Jason Ekstrand --- src/compiler/glsl_types.cpp | 61 ++++++++++++++++++ src/compiler/glsl_types.h | 23 +++++++ src/compiler/nir/nir.h | 5 ++ src/compiler/nir/nir_lower_io.c | 107 ++++++++++++++++++++++++++++++++ src/compiler/nir_types.cpp | 8 +++ src/compiler/nir_types.h | 7 ++- 6 files changed, 208 insertions(+), 3 deletions(-) diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 10cfa3b4b1e..943ab020487 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -2365,6 +2365,67 @@ glsl_type::get_explicit_interface_type(bool supports_std430) const } } +/* This differs from get_explicit_std430_type() in that it: + * - can size arrays slightly smaller ("stride * (len - 1) + elem_size" instead + * of "stride * len") + * - consumes a glsl_type_size_align_func which allows 8 and 16-bit values to be + * packed more tightly + * - overrides any struct field offsets but get_explicit_std430_type() tries to + * respect any existing ones + */ +const glsl_type * +glsl_type::get_explicit_type_for_size_align(glsl_type_size_align_func type_info, + unsigned *size, unsigned *alignment) const +{ + if (this->is_scalar() || this->is_vector()) { + type_info(this, size, alignment); + return this; + } else if (this->is_array()) { + unsigned elem_size, elem_align; + const struct glsl_type *explicit_element = + this->fields.array->get_explicit_type_for_size_align(type_info, &elem_size, &elem_align); + + unsigned stride = align(elem_size, elem_align); + + *size = stride * (this->length - 1) + elem_size; + *alignment = elem_align; + return glsl_type::get_array_instance(explicit_element, this->length, stride); + } else if (this->is_struct()) { + struct glsl_struct_field *fields = (struct glsl_struct_field *) + malloc(sizeof(struct glsl_struct_field) * this->length); + + *size = 0; + *alignment = 0; + for (unsigned i = 0; i < this->length; i++) { + fields[i] = this->fields.structure[i]; + assert(fields[i].matrix_layout != GLSL_MATRIX_LAYOUT_ROW_MAJOR); + + unsigned field_size, field_align; + fields[i].type = + fields[i].type->get_explicit_type_for_size_align(type_info, &field_size, &field_align); + fields[i].offset = align(*size, field_align); + + *size = fields[i].offset + field_size; + *alignment = MAX2(*alignment, field_align); + } + + const glsl_type *type = glsl_type::get_struct_instance(fields, this->length, this->name, false); + free(fields); + return type; + } else if (this->is_matrix()) { + unsigned col_size, col_align; + type_info(this->column_type(), &col_size, &col_align); + unsigned stride = align(col_size, col_align); + + *size = this->matrix_columns * stride; + *alignment = col_align; + return glsl_type::get_instance(this->base_type, this->vector_elements, + this->matrix_columns, stride, false); + } else { + unreachable("Unhandled type."); + } +} + unsigned glsl_type::count_attribute_slots(bool is_gl_vertex_input) const { diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index 40f148a2171..dcd7eea6dc0 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -63,6 +63,9 @@ const struct glsl_type *decode_type_from_blob(struct blob_reader *blob); } #endif +typedef void (*glsl_type_size_align_func)(const struct glsl_type *type, + unsigned *size, unsigned *align); + enum glsl_base_type { /* Note: GLSL_TYPE_UINT, GLSL_TYPE_INT, and GLSL_TYPE_FLOAT must be 0, 1, * and 2 so that they will fit in the 2 bits of glsl_type::sampled_type. @@ -533,6 +536,26 @@ public: */ const glsl_type *get_explicit_interface_type(bool supports_std430) const; + /** Returns an explicitly laid out type given a type and size/align func + * + * The size/align func is only called for scalar and vector types and the + * returned type is otherwise laid out in the natural way as follows: + * + * - Arrays and matrices have a stride of ALIGN(elem_size, elem_align). + * + * - Structure types have their elements in-order and as tightly packed as + * possible following the alignment required by the size/align func. + * + * - All composite types (structures, matrices, and arrays) have an + * alignment equal to the highest alighment of any member of the composite. + * + * The types returned by this function are likely not suitable for most UBO + * or SSBO layout because they do not add the extra array and substructure + * alignment that is required by std140 and std430. + */ + const glsl_type *get_explicit_type_for_size_align(glsl_type_size_align_func type_info, + unsigned *size, unsigned *align) const; + /** * Alignment in bytes of the start of this type in OpenCL memory. */ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 06acfc6c562..8bec9760767 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3464,6 +3464,11 @@ bool nir_lower_io(nir_shader *shader, bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode); +bool +nir_lower_vars_to_explicit_types(nir_shader *shader, + nir_variable_mode modes, + glsl_type_size_align_func type_info); + typedef enum { /** * An address format which is a simple 32-bit global GPU address. diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index f8bcf4420e0..271c2a895f1 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -35,6 +35,8 @@ #include "nir_builder.h" #include "nir_deref.h" +#include "util/u_math.h" + struct lower_io_state { void *dead_ctx; nir_builder builder; @@ -1332,6 +1334,111 @@ nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, return progress; } +static bool +nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl, + nir_variable_mode modes, + glsl_type_size_align_func type_info) +{ + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!(deref->mode & modes)) + continue; + + unsigned size, alignment; + const struct glsl_type *new_type = + glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment); + if (new_type != deref->type) { + progress = true; + deref->type = new_type; + } + if (deref->deref_type == nir_deref_type_cast) { + /* See also glsl_type::get_explicit_type_for_size_align() */ + unsigned new_stride = align(size, alignment); + if (new_stride != deref->cast.ptr_stride) { + deref->cast.ptr_stride = new_stride; + progress = true; + } + } + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs | + nir_metadata_loop_analysis); + } + + return progress; +} + +static bool +lower_vars_to_explicit(nir_shader *shader, + struct exec_list *vars, nir_variable_mode mode, + glsl_type_size_align_func type_info) +{ + bool progress = false; + unsigned offset = 0; + nir_foreach_variable(var, vars) { + unsigned size, align; + const struct glsl_type *explicit_type = + glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align); + + if (explicit_type != var->type) { + progress = true; + var->type = explicit_type; + } + + var->data.driver_location = ALIGN_POT(offset, align); + offset = var->data.driver_location + size; + } + + if (mode == nir_var_mem_shared) { + shader->info.cs.shared_size = offset; + shader->num_shared = offset; + } + + return progress; +} + +bool +nir_lower_vars_to_explicit_types(nir_shader *shader, + nir_variable_mode modes, + glsl_type_size_align_func type_info) +{ + /* TODO: Situations which need to be handled to support more modes: + * - row-major matrices + * - compact shader inputs/outputs + * - interface types + */ + nir_variable_mode supported = nir_var_mem_shared | nir_var_shader_temp | nir_var_function_temp; + assert(!(modes & ~supported) && "unsupported"); + + bool progress = false; + + if (modes & nir_var_mem_shared) + progress |= lower_vars_to_explicit(shader, &shader->shared, nir_var_mem_shared, type_info); + if (modes & nir_var_shader_temp) + progress |= lower_vars_to_explicit(shader, &shader->globals, nir_var_shader_temp, type_info); + + nir_foreach_function(function, shader) { + if (function->impl) { + if (modes & nir_var_function_temp) + progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info); + + progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info); + } + } + + return progress; +} + /** * Return the offset source for a load/store intrinsic. */ diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index 15c752676f4..aa4a7df0cbe 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -746,3 +746,11 @@ glsl_type_is_leaf(const struct glsl_type *type) return true; } } + +const struct glsl_type * +glsl_get_explicit_type_for_size_align(const struct glsl_type *type, + glsl_type_size_align_func type_info, + unsigned *size, unsigned *align) +{ + return type->get_explicit_type_for_size_align(type_info, size, align); +} diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 7b2512792b8..0e7639e9018 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -192,14 +192,15 @@ const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); const struct glsl_type *glsl_channel_type(const struct glsl_type *type); -typedef void (*glsl_type_size_align_func)(const struct glsl_type *type, - unsigned *size, unsigned *align); - void glsl_get_natural_size_align_bytes(const struct glsl_type *type, unsigned *size, unsigned *align); const struct glsl_type *glsl_atomic_uint_type(void); +const struct glsl_type *glsl_get_explicit_type_for_size_align(const struct glsl_type *type, + glsl_type_size_align_func type_info, + unsigned *size, unsigned *align); + unsigned glsl_type_get_sampler_count(const struct glsl_type *type); unsigned glsl_type_get_image_count(const struct glsl_type *type); -- 2.30.2