From: Jason Ekstrand Date: Mon, 19 Oct 2015 18:15:32 +0000 (-0700) Subject: Merge remote-tracking branch 'mesa-public/master' into vulkan X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=958fc04dc51a2561c8598f42df59e3d9139e56a7;p=mesa.git Merge remote-tracking branch 'mesa-public/master' into vulkan --- 958fc04dc51a2561c8598f42df59e3d9139e56a7 diff --cc src/glsl/Makefile.am index 08368311b8a,33a34e4ccc8..8b0a73b250a --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@@ -160,18 -157,9 +157,19 @@@ glsl_compiler_SOURCES = glsl_compiler_LDADD = \ libglsl.la \ $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) +spirv2nir_SOURCES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + nir/spirv2nir.c + +spirv2nir_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + glsl_test_SOURCES = \ standalone_scaffolding.cpp \ test.cpp \ diff --cc src/glsl/Makefile.sources index 65a26268c2e,ca870367640..47dc628101d --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@@ -81,8 -84,8 +85,10 @@@ NIR_FILES = nir/nir_worklist.c \ nir/nir_worklist.h \ nir/nir_types.cpp \ + nir/shader_enums.h \ - nir/shader_enums.c ++ nir/shader_enums.c \ + nir/spirv_to_nir.c \ + nir/spirv_glsl450_to_nir.c # libglsl diff --cc src/glsl/nir/glsl_types.cpp index 00000000000,1c66dce85c4..309f9dca61e mode 000000,100644..100644 --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@@ -1,0 -1,1745 +1,1850 @@@ + /* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include + #include "main/core.h" /* for Elements, MAX2 */ + #include "glsl_parser_extras.h" + #include "glsl_types.h" + #include "util/hash_table.h" + + + mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; + hash_table *glsl_type::array_types = NULL; + hash_table *glsl_type::record_types = NULL; + hash_table *glsl_type::interface_types = NULL; ++hash_table *glsl_type::function_types = NULL; + hash_table *glsl_type::subroutine_types = NULL; + void *glsl_type::mem_ctx = NULL; + + void + glsl_type::init_ralloc_type_ctx(void) + { + if (glsl_type::mem_ctx == NULL) { + glsl_type::mem_ctx = ralloc_autofree_context(); + assert(glsl_type::mem_ctx != NULL); + } + } + + glsl_type::glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name) : + gl_type(gl_type), + base_type(base_type), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(vector_elements), matrix_columns(matrix_columns), + length(0) + { + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + + mtx_unlock(&glsl_type::mutex); + + /* Neither dimension is zero or both dimensions are zero. + */ + assert((vector_elements == 0) == (matrix_columns == 0)); + memset(& fields, 0, sizeof(fields)); + } + + glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name) : + gl_type(gl_type), + base_type(base_type), + sampler_dimensionality(dim), sampler_shadow(shadow), + sampler_array(array), sampler_type(type), interface_packing(0), + length(0) + { + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + + mtx_unlock(&glsl_type::mutex); + + memset(& fields, 0, sizeof(fields)); + + if (base_type == GLSL_TYPE_SAMPLER) { + /* Samplers take no storage whatsoever. */ + matrix_columns = vector_elements = 0; + } else { + matrix_columns = vector_elements = 1; + } + } + + glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name) : + gl_type(0), + base_type(GLSL_TYPE_STRUCT), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_fields) + { + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].location = fields[i].location; + this->fields.structure[i].interpolation = fields[i].interpolation; + this->fields.structure[i].centroid = fields[i].centroid; + this->fields.structure[i].sample = fields[i].sample; + this->fields.structure[i].matrix_layout = fields[i].matrix_layout; + this->fields.structure[i].patch = fields[i].patch; + this->fields.structure[i].image_read_only = fields[i].image_read_only; + this->fields.structure[i].image_write_only = fields[i].image_write_only; + this->fields.structure[i].image_coherent = fields[i].image_coherent; + this->fields.structure[i].image_volatile = fields[i].image_volatile; + this->fields.structure[i].image_restrict = fields[i].image_restrict; + } + + mtx_unlock(&glsl_type::mutex); + } + + glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name) : + gl_type(0), + base_type(GLSL_TYPE_INTERFACE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing((unsigned) packing), + vector_elements(0), matrix_columns(0), + length(num_fields) + { + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].location = fields[i].location; + this->fields.structure[i].interpolation = fields[i].interpolation; + this->fields.structure[i].centroid = fields[i].centroid; + this->fields.structure[i].sample = fields[i].sample; + this->fields.structure[i].matrix_layout = fields[i].matrix_layout; + this->fields.structure[i].patch = fields[i].patch; + } + + mtx_unlock(&glsl_type::mutex); + } + ++glsl_type::glsl_type(const glsl_type *return_type, ++ const glsl_function_param *params, unsigned num_params) : ++ gl_type(0), ++ base_type(GLSL_TYPE_FUNCTION), ++ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), ++ sampler_type(0), interface_packing(0), ++ vector_elements(0), matrix_columns(0), ++ length(num_params) ++{ ++ unsigned int i; ++ ++ mtx_lock(&glsl_type::mutex); ++ ++ init_ralloc_type_ctx(); ++ ++ this->fields.parameters = rzalloc_array(this->mem_ctx, ++ glsl_function_param, num_params + 1); ++ ++ /* We store the return type as the first parameter */ ++ this->fields.parameters[0].type = return_type; ++ this->fields.parameters[0].in = false; ++ this->fields.parameters[0].out = true; ++ ++ /* We store the i'th parameter in slot i+1 */ ++ for (i = 0; i < length; i++) { ++ this->fields.parameters[i + 1].type = params[i].type; ++ this->fields.parameters[i + 1].in = params[i].in; ++ this->fields.parameters[i + 1].out = params[i].out; ++ } ++ ++ mtx_unlock(&glsl_type::mutex); ++} ++ + glsl_type::glsl_type(const char *subroutine_name) : + gl_type(0), + base_type(GLSL_TYPE_SUBROUTINE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(1), matrix_columns(1), + length(0) + { + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(subroutine_name != NULL); + this->name = ralloc_strdup(this->mem_ctx, subroutine_name); + mtx_unlock(&glsl_type::mutex); + } + + bool + glsl_type::contains_sampler() const + { + if (this->is_array()) { + return this->fields.array->contains_sampler(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_sampler()) + return true; + } + return false; + } else { + return this->is_sampler(); + } + } + + + bool + glsl_type::contains_integer() const + { + if (this->is_array()) { + return this->fields.array->contains_integer(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_integer()) + return true; + } + return false; + } else { + return this->is_integer(); + } + } + + bool + glsl_type::contains_double() const + { + if (this->is_array()) { + return this->fields.array->contains_double(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_double()) + return true; + } + return false; + } else { + return this->is_double(); + } + } + + bool + glsl_type::contains_opaque() const { + switch (base_type) { + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + return true; + case GLSL_TYPE_ARRAY: + return fields.array->contains_opaque(); + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < length; i++) { + if (fields.structure[i].type->contains_opaque()) + return true; + } + return false; + default: + return false; + } + } + + bool + glsl_type::contains_subroutine() const + { + if (this->is_array()) { + return this->fields.array->contains_subroutine(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_subroutine()) + return true; + } + return false; + } else { + return this->is_subroutine(); + } + } + + gl_texture_index + glsl_type::sampler_index() const + { + const glsl_type *const t = (this->is_array()) ? this->fields.array : this; + + assert(t->is_sampler()); + + switch (t->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + return (t->sampler_array) ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + case GLSL_SAMPLER_DIM_2D: + return (t->sampler_array) ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + case GLSL_SAMPLER_DIM_3D: + return TEXTURE_3D_INDEX; + case GLSL_SAMPLER_DIM_CUBE: + return (t->sampler_array) ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; + case GLSL_SAMPLER_DIM_RECT: + return TEXTURE_RECT_INDEX; + case GLSL_SAMPLER_DIM_BUF: + return TEXTURE_BUFFER_INDEX; + case GLSL_SAMPLER_DIM_EXTERNAL: + return TEXTURE_EXTERNAL_INDEX; + case GLSL_SAMPLER_DIM_MS: + return (t->sampler_array) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + default: + assert(!"Should not get here."); + return TEXTURE_BUFFER_INDEX; + } + } + + bool + glsl_type::contains_image() const + { + if (this->is_array()) { + return this->fields.array->contains_image(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_image()) + return true; + } + return false; + } else { + return this->is_image(); + } + } + + const glsl_type *glsl_type::get_base_type() const + { + switch (base_type) { + case GLSL_TYPE_UINT: + return uint_type; + case GLSL_TYPE_INT: + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; + case GLSL_TYPE_BOOL: + return bool_type; + default: + return error_type; + } + } + + + const glsl_type *glsl_type::get_scalar_type() const + { + const glsl_type *type = this; + + /* Handle arrays */ + while (type->base_type == GLSL_TYPE_ARRAY) + type = type->fields.array; + + /* Handle vectors and matrices */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + return uint_type; + case GLSL_TYPE_INT: + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; + case GLSL_TYPE_BOOL: + return bool_type; + default: + /* Handle everything else */ + return type; + } + } + + + void + _mesa_glsl_release_types(void) + { + /* Should only be called during atexit (either when unloading shared + * object, or if process terminates), so no mutex-locking should be + * necessary. + */ + if (glsl_type::array_types != NULL) { + _mesa_hash_table_destroy(glsl_type::array_types, NULL); + glsl_type::array_types = NULL; + } + + if (glsl_type::record_types != NULL) { + _mesa_hash_table_destroy(glsl_type::record_types, NULL); + glsl_type::record_types = NULL; + } + + if (glsl_type::interface_types != NULL) { + _mesa_hash_table_destroy(glsl_type::interface_types, NULL); + glsl_type::interface_types = NULL; + } + } + + + glsl_type::glsl_type(const glsl_type *array, unsigned length) : + base_type(GLSL_TYPE_ARRAY), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(length), name(NULL) + { + this->fields.array = array; + /* Inherit the gl type of the base. The GL type is used for + * uniform/statevar handling in Mesa and the arrayness of the type + * is represented by the size rather than the type. + */ + this->gl_type = array->gl_type; + + /* Allow a maximum of 10 characters for the array size. This is enough + * for 32-bits of ~0. The extra 3 are for the '[', ']', and terminating + * NUL. + */ + const unsigned name_length = strlen(array->name) + 10 + 3; + + mtx_lock(&glsl_type::mutex); + char *const n = (char *) ralloc_size(this->mem_ctx, name_length); + mtx_unlock(&glsl_type::mutex); + + if (length == 0) + snprintf(n, name_length, "%s[]", array->name); + else { + /* insert outermost dimensions in the correct spot + * otherwise the dimension order will be backwards + */ + const char *pos = strchr(array->name, '['); + if (pos) { + int idx = pos - array->name; + snprintf(n, idx+1, "%s", array->name); + snprintf(n + idx, name_length - idx, "[%u]%s", + length, array->name + idx); + } else { + snprintf(n, name_length, "%s[%u]", array->name, length); + } + } + + this->name = n; + } + + + const glsl_type * + glsl_type::vec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + float_type, vec2_type, vec3_type, vec4_type + }; + return ts[components - 1]; + } + + const glsl_type * + glsl_type::dvec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + double_type, dvec2_type, dvec3_type, dvec4_type + }; + return ts[components - 1]; + } + + const glsl_type * + glsl_type::ivec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + int_type, ivec2_type, ivec3_type, ivec4_type + }; + return ts[components - 1]; + } + + + const glsl_type * + glsl_type::uvec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + uint_type, uvec2_type, uvec3_type, uvec4_type + }; + return ts[components - 1]; + } + + + const glsl_type * + glsl_type::bvec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + bool_type, bvec2_type, bvec3_type, bvec4_type + }; + return ts[components - 1]; + } + + + const glsl_type * + glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) + { + if (base_type == GLSL_TYPE_VOID) + return void_type; + + if ((rows < 1) || (rows > 4) || (columns < 1) || (columns > 4)) + return error_type; + + /* Treat GLSL vectors as Nx1 matrices. + */ + if (columns == 1) { + switch (base_type) { + case GLSL_TYPE_UINT: + return uvec(rows); + case GLSL_TYPE_INT: + return ivec(rows); + case GLSL_TYPE_FLOAT: + return vec(rows); + case GLSL_TYPE_DOUBLE: + return dvec(rows); + case GLSL_TYPE_BOOL: + return bvec(rows); + default: + return error_type; + } + } else { + if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1)) + return error_type; + + /* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following + * combinations are valid: + * + * 1 2 3 4 + * 1 + * 2 x x x + * 3 x x x + * 4 x x x + */ + #define IDX(c,r) (((c-1)*3) + (r-1)) + + if (base_type == GLSL_TYPE_DOUBLE) { + switch (IDX(columns, rows)) { + case IDX(2,2): return dmat2_type; + case IDX(2,3): return dmat2x3_type; + case IDX(2,4): return dmat2x4_type; + case IDX(3,2): return dmat3x2_type; + case IDX(3,3): return dmat3_type; + case IDX(3,4): return dmat3x4_type; + case IDX(4,2): return dmat4x2_type; + case IDX(4,3): return dmat4x3_type; + case IDX(4,4): return dmat4_type; + default: return error_type; + } + } else { + switch (IDX(columns, rows)) { + case IDX(2,2): return mat2_type; + case IDX(2,3): return mat2x3_type; + case IDX(2,4): return mat2x4_type; + case IDX(3,2): return mat3x2_type; + case IDX(3,3): return mat3_type; + case IDX(3,4): return mat3x4_type; + case IDX(4,2): return mat4x2_type; + case IDX(4,3): return mat4x3_type; + case IDX(4,4): return mat4_type; + default: return error_type; + } + } + } + + assert(!"Should not get here."); + return error_type; + } + + const glsl_type * + glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type) + { + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + if (shadow) + return (array ? sampler1DArrayShadow_type : sampler1DShadow_type); + else + return (array ? sampler1DArray_type : sampler1D_type); + case GLSL_SAMPLER_DIM_2D: + if (shadow) + return (array ? sampler2DArrayShadow_type : sampler2DShadow_type); + else + return (array ? sampler2DArray_type : sampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (shadow || array) + return error_type; + else + return sampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + if (shadow) + return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type); + else + return (array ? samplerCubeArray_type : samplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + if (shadow) + return sampler2DRectShadow_type; + else + return sampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (shadow || array) + return error_type; + else + return samplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + if (shadow) + return error_type; + return (array ? sampler2DMSArray_type : sampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + if (shadow || array) + return error_type; + else + return samplerExternalOES_type; + } + case GLSL_TYPE_INT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? isampler1DArray_type : isampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? isampler2DArray_type : isampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return isampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? isamplerCubeArray_type : isamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return isampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return isamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? isampler2DMSArray_type : isampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? usampler1DArray_type : usampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? usampler2DArray_type : usampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return usampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? usamplerCubeArray_type : usamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return usampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return usamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? usampler2DMSArray_type : usampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); + } + + const glsl_type * + glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) + { + /* Generate a name using the base type pointer in the key. This is + * done because the name of the base type may not be unique across + * shaders. For example, two shaders may have different record types + * named 'foo'. + */ + char key[128]; + snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size); + + mtx_lock(&glsl_type::mutex); + + if (array_types == NULL) { + array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + } + + const struct hash_entry *entry = _mesa_hash_table_search(array_types, key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(base, array_size); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(array_types, + ralloc_strdup(mem_ctx, key), + (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY); + assert(((glsl_type *) entry->data)->length == array_size); + assert(((glsl_type *) entry->data)->fields.array == base); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + + bool + glsl_type::record_compare(const glsl_type *b) const + { + if (this->length != b->length) + return false; + + if (this->interface_packing != b->interface_packing) + return false; + + /* From the GLSL 4.20 specification (Sec 4.2): + * + * "Structures must have the same name, sequence of type names, and + * type definitions, and field names to be considered the same type." + * + * GLSL ES behaves the same (Ver 1.00 Sec 4.2.4, Ver 3.00 Sec 4.2.5). + * + * Note that we cannot force type name check when comparing unnamed + * structure types, these have a unique name assigned during parsing. + */ + if (!this->is_anonymous() && !b->is_anonymous()) + if (strcmp(this->name, b->name) != 0) + return false; + + for (unsigned i = 0; i < this->length; i++) { + if (this->fields.structure[i].type != b->fields.structure[i].type) + return false; + if (strcmp(this->fields.structure[i].name, + b->fields.structure[i].name) != 0) + return false; + if (this->fields.structure[i].matrix_layout + != b->fields.structure[i].matrix_layout) + return false; + if (this->fields.structure[i].location + != b->fields.structure[i].location) + return false; + if (this->fields.structure[i].interpolation + != b->fields.structure[i].interpolation) + return false; + if (this->fields.structure[i].centroid + != b->fields.structure[i].centroid) + return false; + if (this->fields.structure[i].sample + != b->fields.structure[i].sample) + return false; + if (this->fields.structure[i].patch + != b->fields.structure[i].patch) + return false; + if (this->fields.structure[i].image_read_only + != b->fields.structure[i].image_read_only) + return false; + if (this->fields.structure[i].image_write_only + != b->fields.structure[i].image_write_only) + return false; + if (this->fields.structure[i].image_coherent + != b->fields.structure[i].image_coherent) + return false; + if (this->fields.structure[i].image_volatile + != b->fields.structure[i].image_volatile) + return false; + if (this->fields.structure[i].image_restrict + != b->fields.structure[i].image_restrict) + return false; + } + + return true; + } + + + bool + glsl_type::record_key_compare(const void *a, const void *b) + { + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2); + } + + + /** + * Generate an integer hash value for a glsl_type structure type. + */ + unsigned + glsl_type::record_key_hash(const void *a) + { + const glsl_type *const key = (glsl_type *) a; + uintptr_t hash = key->length; + unsigned retval; + + for (unsigned i = 0; i < key->length; i++) { + /* casting pointer to uintptr_t */ + hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type; + } + + if (sizeof(hash) == 8) + retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32); + else + retval = hash; + + return retval; + } + + + const glsl_type * + glsl_type::get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name) + { + const glsl_type key(fields, num_fields, name); + + mtx_lock(&glsl_type::mutex); + + if (record_types == NULL) { + record_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(record_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(fields, num_fields, name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(record_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT); + assert(((glsl_type *) entry->data)->length == num_fields); + assert(strcmp(((glsl_type *) entry->data)->name, name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + + const glsl_type * + glsl_type::get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *block_name) + { + const glsl_type key(fields, num_fields, packing, block_name); + + mtx_lock(&glsl_type::mutex); + + if (interface_types == NULL) { + interface_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(interface_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(fields, num_fields, + packing, block_name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(interface_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE); + assert(((glsl_type *) entry->data)->length == num_fields); + assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + const glsl_type * + glsl_type::get_subroutine_instance(const char *subroutine_name) + { + const glsl_type key(subroutine_name); + + mtx_lock(&glsl_type::mutex); + + if (subroutine_types == NULL) { + subroutine_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(subroutine_name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE); + assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + ++static bool ++function_key_compare(const void *a, const void *b) ++{ ++ const glsl_type *const key1 = (glsl_type *) a; ++ const glsl_type *const key2 = (glsl_type *) b; ++ ++ if (key1->length != key2->length) ++ return 1; ++ ++ return memcmp(key1->fields.parameters, key2->fields.parameters, ++ (key1->length + 1) * sizeof(*key1->fields.parameters)); ++} ++ ++ ++static uint32_t ++function_key_hash(const void *a) ++{ ++ const glsl_type *const key = (glsl_type *) a; ++ char hash_key[128]; ++ unsigned size = 0; ++ ++ size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); ++ ++ for (unsigned i = 0; i < key->length; i++) { ++ if (size >= sizeof(hash_key)) ++ break; ++ ++ size += snprintf(& hash_key[size], sizeof(hash_key) - size, ++ "%p", (void *) key->fields.structure[i].type); ++ } ++ ++ return _mesa_hash_string(hash_key); ++} ++ ++const glsl_type * ++glsl_type::get_function_instance(const glsl_type *return_type, ++ const glsl_function_param *params, ++ unsigned num_params) ++{ ++ const glsl_type key(return_type, params, num_params); ++ ++ mtx_lock(&glsl_type::mutex); ++ ++ if (function_types == NULL) { ++ function_types = _mesa_hash_table_create(NULL, function_key_hash, ++ function_key_compare); ++ } ++ ++ struct hash_entry *entry = _mesa_hash_table_search(function_types, &key); ++ if (entry == NULL) { ++ mtx_unlock(&glsl_type::mutex); ++ const glsl_type *t = new glsl_type(return_type, params, num_params); ++ mtx_lock(&glsl_type::mutex); ++ ++ entry = _mesa_hash_table_insert(function_types, t, (void *) t); ++ } ++ ++ const glsl_type *t = (const glsl_type *)entry->data; ++ ++ assert(t->base_type == GLSL_TYPE_FUNCTION); ++ assert(t->length == num_params); ++ ++ mtx_unlock(&glsl_type::mutex); ++ ++ return t; ++} ++ ++ + const glsl_type * + glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) + { + if (type_a == type_b) { + return type_a; + } else if (type_a->is_matrix() && type_b->is_matrix()) { + /* Matrix multiply. The columns of A must match the rows of B. Given + * the other previously tested constraints, this means the vector type + * of a row from A must be the same as the vector type of a column from + * B. + */ + if (type_a->row_type() == type_b->column_type()) { + /* The resulting matrix has the number of columns of matrix B and + * the number of rows of matrix A. We get the row count of A by + * looking at the size of a vector that makes up a column. The + * transpose (size of a row) is done for B. + */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + type_b->row_type()->vector_elements); + assert(type != error_type); + + return type; + } + } else if (type_a->is_matrix()) { + /* A is a matrix and B is a column vector. Columns of A must match + * rows of B. Given the other previously tested constraints, this + * means the vector type of a row from A must be the same as the + * vector the type of B. + */ + if (type_a->row_type() == type_b) { + /* The resulting vector has a number of elements equal to + * the number of rows of matrix A. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } else { + assert(type_b->is_matrix()); + + /* A is a row vector and B is a matrix. Columns of A must match rows + * of B. Given the other previously tested constraints, this means + * the type of A must be the same as the vector type of a column from + * B. + */ + if (type_a == type_b->column_type()) { + /* The resulting vector has a number of elements equal to + * the number of columns of matrix B. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_b->row_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } + + return error_type; + } + + + const glsl_type * + glsl_type::field_type(const char *name) const + { + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) + return error_type; + + for (unsigned i = 0; i < this->length; i++) { + if (strcmp(name, this->fields.structure[i].name) == 0) + return this->fields.structure[i].type; + } + + return error_type; + } + + + int + glsl_type::field_index(const char *name) const + { + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) + return -1; + + for (unsigned i = 0; i < this->length; i++) { + if (strcmp(name, this->fields.structure[i].name) == 0) + return i; + } + + return -1; + } + + + unsigned + glsl_type::component_slots() const + { + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return this->components(); + + case GLSL_TYPE_DOUBLE: + return 2 * this->components(); + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->component_slots(); + + return size; + } + + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->component_slots(); + + case GLSL_TYPE_IMAGE: + return 1; + case GLSL_TYPE_SUBROUTINE: + return 1; ++ ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + break; + } + + return 0; + } + + unsigned + glsl_type::record_location_offset(unsigned length) const + { + unsigned offset = 0; + const glsl_type *t = this->without_array(); + if (t->is_record()) { + assert(length <= t->length); + + for (unsigned i = 0; i < length; i++) { + const glsl_type *st = t->fields.structure[i].type; + const glsl_type *wa = st->without_array(); + if (wa->is_record()) { + unsigned r_offset = wa->record_location_offset(wa->length); + offset += st->is_array() ? + st->arrays_of_arrays_size() * r_offset : r_offset; + } else if (st->is_array() && st->fields.array->is_array()) { + unsigned outer_array_size = st->length; + const glsl_type *base_type = st->fields.array; + + /* For arrays of arrays the outer arrays take up a uniform + * slot for each element. The innermost array elements share a + * single slot so we ignore the innermost array when calculating + * the offset. + */ + while (base_type->fields.array->is_array()) { + outer_array_size = outer_array_size * base_type->length; + base_type = base_type->fields.array; + } + offset += outer_array_size; + } else { + /* We dont worry about arrays here because unless the array + * contains a structure or another array it only takes up a single + * uniform slot. + */ + offset += 1; + } + } + } + return offset; + } + + unsigned + glsl_type::uniform_locations() const + { + unsigned size = 0; + + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: + return 1; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->uniform_locations(); + return size; + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->uniform_locations(); + default: + return 0; + } + } + + bool + glsl_type::can_implicitly_convert_to(const glsl_type *desired, + _mesa_glsl_parse_state *state) const + { + if (this == desired) + return true; + + /* There is no conversion among matrix types. */ + if (this->matrix_columns > 1 || desired->matrix_columns > 1) + return false; + + /* Vector size must match. */ + if (this->vector_elements != desired->vector_elements) + return false; + + /* int and uint can be converted to float. */ + if (desired->is_float() && this->is_integer()) + return true; + + /* With GLSL 4.0 / ARB_gpu_shader5, int can be converted to uint. + * Note that state may be NULL here, when resolving function calls in the + * linker. By this time, all the state-dependent checks have already + * happened though, so allow anything that's allowed in any shader version. */ + if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) && + desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT) + return true; + + /* No implicit conversions from double. */ + if ((!state || state->has_double()) && this->is_double()) + return false; + + /* Conversions from different types to double. */ + if ((!state || state->has_double()) && desired->is_double()) { + if (this->is_float()) + return true; + if (this->is_integer()) + return true; + } + + return false; + } + + unsigned + glsl_type::std140_base_alignment(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + switch (this->vector_elements) { + case 1: + return N; + case 2: + return 2 * N; + case 3: + case 4: + return 4 * N; + } + } + + /* (4) If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single + * array element, according to rules (1), (2), and (3), and rounded up + * to the base alignment of a vec4. The array may have padding at the + * end; the base offset of the member following the array is rounded up + * to the next multiple of the base alignment. + * + * (6) If the member is an array of column-major matrices with + * columns and rows, the matrix is stored identically to a row of + * * column vectors with components each, according to rule + * (4). + * + * (8) If the member is an array of row-major matrices with columns + * and rows, the matrix is stored identically to a row of * + * row vectors with components each, according to rule (4). + * + * (10) If the member is an array of structures, the elements of + * the array are laid out in order, according to rule (9). + */ + if (this->is_array()) { + if (this->fields.array->is_scalar() || + this->fields.array->is_vector() || + this->fields.array->is_matrix()) { + return MAX2(this->fields.array->std140_base_alignment(row_major), 16); + } else { + assert(this->fields.array->is_record() || + this->fields.array->is_array()); + return this->fields.array->std140_base_alignment(row_major); + } + } + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + */ + if (this->is_matrix()) { + const struct glsl_type *vec_type, *array_type; + int c = this->matrix_columns; + int r = this->vector_elements; + + if (row_major) { + vec_type = get_instance(base_type, c, 1); + array_type = glsl_type::get_array_instance(vec_type, r); + } else { + vec_type = get_instance(base_type, r, 1); + array_type = glsl_type::get_array_instance(vec_type, c); + } + + return array_type->std140_base_alignment(false); + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record()) { + unsigned base_alignment = 16; + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + base_alignment = MAX2(base_alignment, + field_type->std140_base_alignment(field_row_major)); + } + return base_alignment; + } + + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::std140_size(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + return this->vector_elements * N; + } + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (6) If the member is an array of column-major matrices with + * columns and rows, the matrix is stored identically to a row of + * * column vectors with components each, according to rule + * (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + * + * (8) If the member is an array of row-major matrices with columns + * and rows, the matrix is stored identically to a row of * + * row vectors with components each, according to rule (4). + */ + if (this->without_array()->is_matrix()) { + const struct glsl_type *element_type; + const struct glsl_type *vec_type; + unsigned int array_len; + + if (this->is_array()) { + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); + } else { + element_type = this; + array_len = 1; + } + + if (row_major) { + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + + array_len *= element_type->vector_elements; + } else { + vec_type = get_instance(element_type->base_type, + element_type->vector_elements, 1); + array_len *= element_type->matrix_columns; + } + const glsl_type *array_type = glsl_type::get_array_instance(vec_type, + array_len); + + return array_type->std140_size(false); + } + + /* (4) If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single + * array element, according to rules (1), (2), and (3), and rounded up + * to the base alignment of a vec4. The array may have padding at the + * end; the base offset of the member following the array is rounded up + * to the next multiple of the base alignment. + * + * (10) If the member is an array of structures, the elements of + * the array are laid out in order, according to rule (9). + */ + if (this->is_array()) { + if (this->without_array()->is_record()) { + return this->arrays_of_arrays_size() * + this->without_array()->std140_size(row_major); + } else { + unsigned element_base_align = + this->without_array()->std140_base_alignment(row_major); + return this->arrays_of_arrays_size() * MAX2(element_base_align, 16); + } + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record() || this->is_interface()) { + unsigned size = 0; + unsigned max_align = 0; + + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + unsigned align = field_type->std140_base_alignment(field_row_major); + + /* Ignore unsized arrays when calculating size */ + if (field_type->is_unsized_array()) + continue; + + size = glsl_align(size, align); + size += field_type->std140_size(field_row_major); + + max_align = MAX2(align, max_align); + + if (field_type->is_record() && (i + 1 < this->length)) + size = glsl_align(size, 16); + } + size = glsl_align(size, MAX2(max_align, 16)); + return size; + } + + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::std430_base_alignment(bool row_major) const + { + + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + switch (this->vector_elements) { + case 1: + return N; + case 2: + return 2 * N; + case 3: + case 4: + return 4 * N; + } + } + + /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": + * + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures + * in rule 9 are not rounded up a multiple of the base alignment of a vec4. + */ + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_array()) + return this->fields.array->std430_base_alignment(row_major); + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + */ + if (this->is_matrix()) { + const struct glsl_type *vec_type, *array_type; + int c = this->matrix_columns; + int r = this->vector_elements; + + if (row_major) { + vec_type = get_instance(base_type, c, 1); + array_type = glsl_type::get_array_instance(vec_type, r); + } else { + vec_type = get_instance(base_type, r, 1); + array_type = glsl_type::get_array_instance(vec_type, c); + } + + return array_type->std430_base_alignment(false); + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record()) { + unsigned base_alignment = 0; + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + base_alignment = MAX2(base_alignment, + field_type->std430_base_alignment(field_row_major)); + } + assert(base_alignment > 0); + return base_alignment; + } + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::std430_array_stride(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* Notice that the array stride of a vec3 is not 3 * N but 4 * N. + * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout" + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_vector() && this->vector_elements == 3) + return 4 * N; + + /* By default use std430_size(row_major) */ + return this->std430_size(row_major); + } + + unsigned + glsl_type::std430_size(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": + * + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures + * in rule 9 are not rounded up a multiple of the base alignment of a vec4. + */ + if (this->is_scalar() || this->is_vector()) + return this->vector_elements * N; + + if (this->without_array()->is_matrix()) { + const struct glsl_type *element_type; + const struct glsl_type *vec_type; + unsigned int array_len; + + if (this->is_array()) { + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); + } else { + element_type = this; + array_len = 1; + } + + if (row_major) { + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + + array_len *= element_type->vector_elements; + } else { + vec_type = get_instance(element_type->base_type, + element_type->vector_elements, 1); + array_len *= element_type->matrix_columns; + } + const glsl_type *array_type = glsl_type::get_array_instance(vec_type, + array_len); + + return array_type->std430_size(false); + } + + if (this->is_array()) { + if (this->without_array()->is_record()) + return this->arrays_of_arrays_size() * + this->without_array()->std430_size(row_major); + else + return this->arrays_of_arrays_size() * + this->without_array()->std430_base_alignment(row_major); + } + + if (this->is_record() || this->is_interface()) { + unsigned size = 0; + unsigned max_align = 0; + + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + unsigned align = field_type->std430_base_alignment(field_row_major); + size = glsl_align(size, align); + size += field_type->std430_size(field_row_major); + + max_align = MAX2(align, max_align); + } + size = glsl_align(size, max_align); + return size; + } + + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::count_attribute_slots() const + { + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "A scalar input counts the same amount against this limit as a vec4, + * so applications may want to consider packing groups of four + * unrelated float inputs together into a vector to better utilize the + * capabilities of the underlying hardware. A matrix input will use up + * multiple locations. The number of locations used will equal the + * number of columns in the matrix." + * + * The spec does not explicitly say how arrays are counted. However, it + * should be safe to assume the total number of slots consumed by an array + * is the number of entries in the array multiplied by the number of slots + * consumed by a single element of the array. + * + * The spec says nothing about how structs are counted, because vertex + * attributes are not allowed to be (or contain) structs. However, Mesa + * allows varying structs, the number of varying slots taken up by a + * varying struct is simply equal to the sum of the number of slots taken + * up by each element. + */ + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: + return this->matrix_columns; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->count_attribute_slots(); + + return size; + } + + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->count_attribute_slots(); + ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_ERROR: + break; + } + + assert(!"Unexpected type in count_attribute_slots()"); + + return 0; + } + + int + glsl_type::coordinate_components() const + { + int size; + + switch (sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + size = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_EXTERNAL: + size = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + size = 3; + break; + default: + assert(!"Should not get here."); + size = 1; + break; + } + + /* Array textures need an additional component for the array index, except + * for cubemap array images that behave like a 2D array of interleaved + * cubemap faces. + */ + if (sampler_array && + !(base_type == GLSL_TYPE_IMAGE && + sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE)) + size += 1; + + return size; + } + + /** + * Declarations of type flyweights (glsl_type::_foo_type) and + * convenience pointers (glsl_type::foo_type). + * @{ + */ + #define DECL_TYPE(NAME, ...) \ + const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \ + const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type; + + #define STRUCT_TYPE(NAME) + + #include "builtin_type_macros.h" + /** @} */ diff --cc src/glsl/nir/glsl_types.h index 00000000000,3ec764219de..b83e1ca3d2c mode 000000,100644..100644 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@@ -1,0 -1,867 +1,890 @@@ + /* -*- c++ -*- */ + /* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #pragma once + #ifndef GLSL_TYPES_H + #define GLSL_TYPES_H + + #include + #include + + #ifdef __cplusplus + extern "C" { + #endif + + struct _mesa_glsl_parse_state; + struct glsl_symbol_table; + + extern void + _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state); + + extern void + _mesa_glsl_release_types(void); + + #ifdef __cplusplus + } + #endif + + enum glsl_base_type { + GLSL_TYPE_UINT = 0, + GLSL_TYPE_INT, + GLSL_TYPE_FLOAT, + GLSL_TYPE_DOUBLE, + GLSL_TYPE_BOOL, + GLSL_TYPE_SAMPLER, + GLSL_TYPE_IMAGE, + GLSL_TYPE_ATOMIC_UINT, + GLSL_TYPE_STRUCT, ++ GLSL_TYPE_FUNCTION, + GLSL_TYPE_INTERFACE, + GLSL_TYPE_ARRAY, + GLSL_TYPE_VOID, + GLSL_TYPE_SUBROUTINE, + GLSL_TYPE_ERROR + }; + + enum glsl_sampler_dim { + GLSL_SAMPLER_DIM_1D = 0, + GLSL_SAMPLER_DIM_2D, + GLSL_SAMPLER_DIM_3D, + GLSL_SAMPLER_DIM_CUBE, + GLSL_SAMPLER_DIM_RECT, + GLSL_SAMPLER_DIM_BUF, + GLSL_SAMPLER_DIM_EXTERNAL, + GLSL_SAMPLER_DIM_MS + }; + + enum glsl_interface_packing { + GLSL_INTERFACE_PACKING_STD140, + GLSL_INTERFACE_PACKING_SHARED, + GLSL_INTERFACE_PACKING_PACKED, + GLSL_INTERFACE_PACKING_STD430 + }; + + enum glsl_matrix_layout { + /** + * The layout of the matrix is inherited from the object containing the + * matrix (the top level structure or the uniform block). + */ + GLSL_MATRIX_LAYOUT_INHERITED, + + /** + * Explicit column-major layout + * + * If a uniform block doesn't have an explicit layout set, it will default + * to this layout. + */ + GLSL_MATRIX_LAYOUT_COLUMN_MAJOR, + + /** + * Row-major layout + */ + GLSL_MATRIX_LAYOUT_ROW_MAJOR + }; + + #ifdef __cplusplus + #include "GL/gl.h" + #include "util/ralloc.h" + #include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ + + struct glsl_type { + GLenum gl_type; + glsl_base_type base_type; + + unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */ + unsigned sampler_shadow:1; + unsigned sampler_array:1; + unsigned sampler_type:2; /**< Type of data returned using this + * sampler or image. Only \c + * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, + * and \c GLSL_TYPE_UINT are valid. + */ + unsigned interface_packing:2; + + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ + static void* operator new(size_t size) + { + mtx_lock(&glsl_type::mutex); + + /* mem_ctx should have been created by the static members */ + assert(glsl_type::mem_ctx != NULL); + + void *type; + + type = ralloc_size(glsl_type::mem_ctx, size); + assert(type != NULL); + + mtx_unlock(&glsl_type::mutex); + + return type; + } + + /* If the user *does* call delete, that's OK, we will just + * ralloc_free in that case. */ + static void operator delete(void *type) + { + mtx_lock(&glsl_type::mutex); + ralloc_free(type); + mtx_unlock(&glsl_type::mutex); + } + + /** + * \name Vector and matrix element counts + * + * For scalars, each of these values will be 1. For non-numeric types + * these will be 0. + */ + /*@{*/ + uint8_t vector_elements; /**< 1, 2, 3, or 4 vector elements. */ + uint8_t matrix_columns; /**< 1, 2, 3, or 4 matrix columns. */ + /*@}*/ + + /** + * For \c GLSL_TYPE_ARRAY, this is the length of the array. For + * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of + * elements in the structure and the number of values pointed to by + * \c fields.structure (below). + */ + unsigned length; + + /** + * Name of the data type + * + * Will never be \c NULL. + */ + const char *name; + + /** + * Subtype of composite data types. + */ + union { + const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ ++ struct glsl_function_param *parameters; /**< Parameters to function. */ + struct glsl_struct_field *structure; /**< List of struct fields. */ + } fields; + + /** + * \name Pointers to various public type singletons + */ + /*@{*/ + #undef DECL_TYPE + #define DECL_TYPE(NAME, ...) \ + static const glsl_type *const NAME##_type; + #undef STRUCT_TYPE + #define STRUCT_TYPE(NAME) \ + static const glsl_type *const struct_##NAME##_type; + #include "builtin_type_macros.h" + /*@}*/ + + /** + * Convenience accessors for vector types (shorter than get_instance()). + * @{ + */ + static const glsl_type *vec(unsigned components); + static const glsl_type *dvec(unsigned components); + static const glsl_type *ivec(unsigned components); + static const glsl_type *uvec(unsigned components); + static const glsl_type *bvec(unsigned components); + /**@}*/ + + /** + * For numeric and boolean derived types returns the basic scalar type + * + * If the type is a numeric or boolean scalar, vector, or matrix type, + * this function gets the scalar type of the individual components. For + * all other types, including arrays of numeric or boolean types, the + * error type is returned. + */ + const glsl_type *get_base_type() const; + + /** + * Get the basic scalar type which this type aggregates. + * + * If the type is a numeric or boolean scalar, vector, or matrix, or an + * array of any of those, this function gets the scalar type of the + * individual components. For structs and arrays of structs, this function + * returns the struct type. For samplers and arrays of samplers, this + * function returns the sampler type. + */ + const glsl_type *get_scalar_type() const; + + /** + * Get the instance of a built-in scalar, vector, or matrix type + */ + static const glsl_type *get_instance(unsigned base_type, unsigned rows, + unsigned columns); + + /** + * Get the instance of a sampler type + */ + static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type); + + + /** + * Get the instance of an array type + */ + static const glsl_type *get_array_instance(const glsl_type *base, + unsigned elements); + + /** + * Get the instance of a record type + */ + static const glsl_type *get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name); + + /** + * Get the instance of an interface block type + */ + static const glsl_type *get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *block_name); + + /** + * Get the instance of an subroutine type + */ + static const glsl_type *get_subroutine_instance(const char *subroutine_name); + ++ /** ++ * Get the instance of a function type ++ */ ++ static const glsl_type *get_function_instance(const struct glsl_type *return_type, ++ const glsl_function_param *parameters, ++ unsigned num_params); ++ + /** + * Get the type resulting from a multiplication of \p type_a * \p type_b + */ + static const glsl_type *get_mul_type(const glsl_type *type_a, + const glsl_type *type_b); + + /** + * Query the total number of scalars that make up a scalar, vector or matrix + */ + unsigned components() const + { + return vector_elements * matrix_columns; + } + + /** + * Calculate the number of components slots required to hold this type + * + * This is used to determine how many uniform or varying locations a type + * might occupy. + */ + unsigned component_slots() const; + + /** + * Calculate offset between the base location of the struct in + * uniform storage and a struct member. + * For the initial call, length is the index of the member to find the + * offset for. + */ + unsigned record_location_offset(unsigned length) const; + + /** + * Calculate the number of unique values from glGetUniformLocation for the + * elements of the type. + * + * This is used to allocate slots in the UniformRemapTable, the amount of + * locations may not match with actual used storage space by the driver. + */ + unsigned uniform_locations() const; + + /** + * Calculate the number of attribute slots required to hold this type + * + * This implements the language rules of GLSL 1.50 for counting the number + * of slots used by a vertex attribute. It also determines the number of + * varying slots the type will use up in the absence of varying packing + * (and thus, it can be used to measure the number of varying slots used by + * the varyings that are generated by lower_packed_varyings). + */ + unsigned count_attribute_slots() const; + + + /** + * Alignment in bytes of the start of this type in a std140 uniform + * block. + */ + unsigned std140_base_alignment(bool row_major) const; + + /** Size in bytes of this type in a std140 uniform block. + * + * Note that this is not GL_UNIFORM_SIZE (which is the number of + * elements in the array) + */ + unsigned std140_size(bool row_major) const; + + /** + * Alignment in bytes of the start of this type in a std430 shader + * storage block. + */ + unsigned std430_base_alignment(bool row_major) const; + + /** + * Calculate array stride in bytes of this type in a std430 shader storage + * block. + */ + unsigned std430_array_stride(bool row_major) const; + + /** + * Size in bytes of this type in a std430 shader storage block. + * + * Note that this is not GL_BUFFER_SIZE + */ + unsigned std430_size(bool row_major) const; + + /** + * \brief Can this type be implicitly converted to another? + * + * \return True if the types are identical or if this type can be converted + * to \c desired according to Section 4.1.10 of the GLSL spec. + * + * \verbatim + * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10 + * Implicit Conversions: + * + * In some situations, an expression and its type will be implicitly + * converted to a different type. The following table shows all allowed + * implicit conversions: + * + * Type of expression | Can be implicitly converted to + * -------------------------------------------------- + * int float + * uint + * + * ivec2 vec2 + * uvec2 + * + * ivec3 vec3 + * uvec3 + * + * ivec4 vec4 + * uvec4 + * + * There are no implicit array or structure conversions. For example, + * an array of int cannot be implicitly converted to an array of float. + * There are no implicit conversions between signed and unsigned + * integers. + * \endverbatim + */ + bool can_implicitly_convert_to(const glsl_type *desired, + _mesa_glsl_parse_state *state) const; + + /** + * Query whether or not a type is a scalar (non-vector and non-matrix). + */ + bool is_scalar() const + { + return (vector_elements == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a vector + */ + bool is_vector() const + { + return (vector_elements > 1) + && (matrix_columns == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a matrix + */ + bool is_matrix() const + { + /* GLSL only has float matrices. */ + return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE); + } + + /** + * Query whether or not a type is a non-array numeric type + */ + bool is_numeric() const + { + return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE); + } + + /** + * Query whether or not a type is an integral type + */ + bool is_integer() const + { + return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT); + } + + /** + * Query whether or not type is an integral type, or for struct and array + * types, contains an integral type. + */ + bool contains_integer() const; + + /** + * Query whether or not type is a double type, or for struct and array + * types, contains a double type. + */ + bool contains_double() const; + + /** + * Query whether or not a type is a float type + */ + bool is_float() const + { + return base_type == GLSL_TYPE_FLOAT; + } + + /** + * Query whether or not a type is a double type + */ + bool is_double() const + { + return base_type == GLSL_TYPE_DOUBLE; + } + + /** + * Query whether or not a type is a non-array boolean type + */ + bool is_boolean() const + { + return base_type == GLSL_TYPE_BOOL; + } + + /** + * Query whether or not a type is a sampler + */ + bool is_sampler() const + { + return base_type == GLSL_TYPE_SAMPLER; + } + + /** + * Query whether or not type is a sampler, or for struct and array + * types, contains a sampler. + */ + bool contains_sampler() const; + + /** + * Get the Mesa texture target index for a sampler type. + */ + gl_texture_index sampler_index() const; + + /** + * Query whether or not type is an image, or for struct and array + * types, contains an image. + */ + bool contains_image() const; + + /** + * Query whether or not a type is an image + */ + bool is_image() const + { + return base_type == GLSL_TYPE_IMAGE; + } + + /** + * Query whether or not a type is an array + */ + bool is_array() const + { + return base_type == GLSL_TYPE_ARRAY; + } + + /** + * Query whether or not a type is a record + */ + bool is_record() const + { + return base_type == GLSL_TYPE_STRUCT; + } + + /** + * Query whether or not a type is an interface + */ + bool is_interface() const + { + return base_type == GLSL_TYPE_INTERFACE; + } + + /** + * Query whether or not a type is the void type singleton. + */ + bool is_void() const + { + return base_type == GLSL_TYPE_VOID; + } + + /** + * Query whether or not a type is the error type singleton. + */ + bool is_error() const + { + return base_type == GLSL_TYPE_ERROR; + } + + /** + * Query if a type is unnamed/anonymous (named by the parser) + */ + + bool is_subroutine() const + { + return base_type == GLSL_TYPE_SUBROUTINE; + } + bool contains_subroutine() const; + + bool is_anonymous() const + { + return !strncmp(name, "#anon", 5); + } + + /** + * Get the type stripped of any arrays + * + * \return + * Pointer to the type of elements of the first non-array type for array + * types, or pointer to itself for non-array types. + */ + const glsl_type *without_array() const + { + const glsl_type *t = this; + + while (t->is_array()) + t = t->fields.array; + + return t; + } + + /** + * Return the total number of elements in an array including the elements + * in arrays of arrays. + */ + unsigned arrays_of_arrays_size() const + { + if (!is_array()) + return 0; + + unsigned size = length; + const glsl_type *base_type = fields.array; + + while (base_type->is_array()) { + size = size * base_type->length; + base_type = base_type->fields.array; + } + return size; + } + + /** + * Return the amount of atomic counter storage required for a type. + */ + unsigned atomic_size() const + { + if (base_type == GLSL_TYPE_ATOMIC_UINT) + return ATOMIC_COUNTER_SIZE; + else if (is_array()) + return length * fields.array->atomic_size(); + else + return 0; + } + + /** + * Return whether a type contains any atomic counters. + */ + bool contains_atomic() const + { + return atomic_size() > 0; + } + + /** + * Return whether a type contains any opaque types. + */ + bool contains_opaque() const; + + /** + * Query the full type of a matrix row + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the rows of the matrix is returned. + */ + const glsl_type *row_type() const + { + return is_matrix() + ? get_instance(base_type, matrix_columns, 1) + : error_type; + } + + /** + * Query the full type of a matrix column + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the columns of the matrix is returned. + */ + const glsl_type *column_type() const + { + return is_matrix() + ? get_instance(base_type, vector_elements, 1) + : error_type; + } + + /** + * Get the type of a structure field + * + * \return + * Pointer to the type of the named field. If the type is not a structure + * or the named field does not exist, \c glsl_type::error_type is returned. + */ + const glsl_type *field_type(const char *name) const; + + /** + * Get the location of a field within a record type + */ + int field_index(const char *name) const; + + /** + * Query the number of elements in an array type + * + * \return + * The number of elements in the array for array types or -1 for non-array + * types. If the number of elements in the array has not yet been declared, + * zero is returned. + */ + int array_size() const + { + return is_array() ? length : -1; + } + + /** + * Query whether the array size for all dimensions has been declared. + */ + bool is_unsized_array() const + { + return is_array() && length == 0; + } + + /** + * Return the number of coordinate components needed for this + * sampler or image type. + * + * This is based purely on the sampler's dimensionality. For example, this + * returns 1 for sampler1D, and 3 for sampler2DArray. + * + * Note that this is often different than actual coordinate type used in + * a texturing built-in function, since those pack additional values (such + * as the shadow comparitor or projector) into the coordinate type. + */ + int coordinate_components() const; + + /** + * Compare a record type against another record type. + * + * This is useful for matching record types declared across shader stages. + */ + bool record_compare(const glsl_type *b) const; + + private: + + static mtx_t mutex; + + /** + * ralloc context for all glsl_type allocations + * + * Set on the first call to \c glsl_type::new. + */ + static void *mem_ctx; + + void init_ralloc_type_ctx(void); + + /** Constructor for vector and matrix types */ + glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name); + + /** Constructor for sampler or image types */ + glsl_type(GLenum gl_type, glsl_base_type base_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name); + + /** Constructor for record types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name); + + /** Constructor for interface types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name); + ++ /** Constructor for interface types */ ++ glsl_type(const glsl_type *return_type, ++ const glsl_function_param *params, unsigned num_params); ++ + /** Constructor for array types */ + glsl_type(const glsl_type *array, unsigned length); + + /** Constructor for subroutine types */ + glsl_type(const char *name); + + /** Hash table containing the known array types. */ + static struct hash_table *array_types; + + /** Hash table containing the known record types. */ + static struct hash_table *record_types; + + /** Hash table containing the known interface types. */ + static struct hash_table *interface_types; + + /** Hash table containing the known subroutine types. */ + static struct hash_table *subroutine_types; + ++ /** Hash table containing the known function types. */ ++ static struct hash_table *function_types; ++ + static bool record_key_compare(const void *a, const void *b); + static unsigned record_key_hash(const void *key); + + /** + * \name Built-in type flyweights + */ + /*@{*/ + #undef DECL_TYPE + #define DECL_TYPE(NAME, ...) static const glsl_type _##NAME##_type; + #undef STRUCT_TYPE + #define STRUCT_TYPE(NAME) static const glsl_type _struct_##NAME##_type; + #include "builtin_type_macros.h" + /*@}*/ + + /** + * \name Friend functions. + * + * These functions are friends because they must have C linkage and the + * need to call various private methods or access various private static + * data. + */ + /*@{*/ + friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *); + friend void _mesa_glsl_release_types(void); + /*@}*/ + }; + ++#undef DECL_TYPE ++#undef STRUCT_TYPE ++#endif /* __cplusplus */ ++ + struct glsl_struct_field { + const struct glsl_type *type; + const char *name; + + /** + * For interface blocks, gl_varying_slot corresponding to the input/output + * if this is a built-in input/output (i.e. a member of the built-in + * gl_PerVertex interface block); -1 otherwise. + * + * Ignored for structs. + */ + int location; + + /** + * For interface blocks, the interpolation mode (as in + * ir_variable::interpolation). 0 otherwise. + */ + unsigned interpolation:2; + + /** + * For interface blocks, 1 if this variable uses centroid interpolation (as + * in ir_variable::centroid). 0 otherwise. + */ + unsigned centroid:1; + + /** + * For interface blocks, 1 if this variable uses sample interpolation (as + * in ir_variable::sample). 0 otherwise. + */ + unsigned sample:1; + + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * For interface blocks, 1 if this variable is a per-patch input or output + * (as in ir_variable::patch). 0 otherwise. + */ + unsigned patch:1; + + /** + * For interface blocks, it has a value if this variable uses multiple vertex + * streams (as in ir_variable::stream). -1 otherwise. + */ + int stream; + - + /** + * Image qualifiers, applicable to buffer variables defined in shader + * storage buffer objects (SSBOs) + */ + unsigned image_read_only:1; + unsigned image_write_only:1; + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + ++#ifdef __cplusplus + glsl_struct_field(const struct glsl_type *_type, const char *_name) + : type(_type), name(_name), location(-1), interpolation(0), centroid(0), + sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), + stream(-1) + { + /* empty */ + } + + glsl_struct_field() + { + /* empty */ + } ++#endif ++}; ++ ++struct glsl_function_param { ++ const struct glsl_type *type; ++ ++ bool in; ++ bool out; + }; + + static inline unsigned int + glsl_align(unsigned int a, unsigned int align) + { + return (a + align - 1) / align * align; + } + -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - + #endif /* GLSL_TYPES_H */ diff --cc src/vulkan/Makefile.am index 985864a87fe,00000000000..5abbd379b54 mode 100644,000000..100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@@ -1,138 -1,0 +1,139 @@@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h \ + $(top_srcdir)/include/vulkan/vk_ext_khr_swapchain.h \ + $(top_srcdir)/include/vulkan/vk_ext_khr_device_swapchain.h + +lib_LTLIBRARIES = libvulkan.la + +check_LTLIBRARIES = libvulkan-test.la + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ ++ -I$(top_srcdir)/src/glsl/nir \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src/vulkan + +libvulkan_la_CFLAGS = \ + -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g \ + -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init + +libvulkan_la_CXXFLAGS = \ + -Wall -Wno-unused-parameter -fvisibility=hidden -O0 -g + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_compiler.cpp \ + anv_device.c \ + anv_dump.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ + anv_pipeline.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_wsi.c \ + anv_wsi_x11.c \ + gen8_state.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen7_state.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c + +if HAVE_EGL_PLATFORM_WAYLAND +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c +libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + +libvulkan_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ + ../mesa/libmesa.la \ + ../mesa/drivers/dri/common/libdri_test_stubs.la \ + -lpthread -ldl + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_CXXFLAGS = $(libvulkan_la_CXXFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --cc src/vulkan/anv_compiler.cpp index a3b8d1cc80c,00000000000..2b8e7cee9aa mode 100644,000000..100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@@ -1,1386 -1,0 +1,1398 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "anv_private.h" +#include "anv_nir.h" + +#include +#include /* brw_new_shader_program is here */ +#include + +#include +#include +#include +#include "brw_vec4_gs_visitor.h" ++#include + +#include +#include +#include +#include +#include + +/* XXX: We need this to keep symbols in nir.h from conflicting with the + * generated GEN command packing headers. We need to fix *both* to not + * define something as generic as LOAD. + */ +#undef LOAD + +#include + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static VkResult +set_binding_table_layout(struct brw_stage_prog_data *prog_data, + struct anv_pipeline *pipeline, uint32_t stage) +{ + unsigned bias; + if (stage == VK_SHADER_STAGE_FRAGMENT) + bias = MAX_RTS; + else + bias = 0; + + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.image_start = bias; + + return VK_SUCCESS; +} + +static uint32_t +upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} + +static void +create_params_array(struct anv_pipeline *pipeline, + struct gl_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage); + unsigned num_params = 0; + + if (shader->num_uniform_components) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + num_params += MAX_DYNAMIC_BUFFERS; + + if (num_params == 0) + return; + + prog_data->param = (const gl_constant_value **) + anv_device_alloc(pipeline->device, + num_params * sizeof(gl_constant_value *), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < num_params; i++) + prog_data->param[i] = + (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; +} + +/** + * Return a bitfield where bit n is set if barycentric interpolation mode n + * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. + */ +unsigned +brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, + bool shade_model_flat, + bool persample_shading, + nir_shader *shader) +{ + unsigned barycentric_interp_modes = 0; + + nir_foreach_variable(var, &shader->inputs) { + enum glsl_interp_qualifier interp_qualifier = + (enum glsl_interp_qualifier) var->data.interpolation; + bool is_centroid = var->data.centroid && !persample_shading; + bool is_sample = var->data.sample || persample_shading; + bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) || + (var->data.location == VARYING_SLOT_COL1); + + /* Ignore WPOS and FACE, because they don't require interpolation. */ + if (var->data.location == VARYING_SLOT_POS || + var->data.location == VARYING_SLOT_FACE) + continue; + + /* Determine the set (or sets) of barycentric coordinates needed to + * interpolate this variable. Note that when + * brw->needs_unlit_centroid_workaround is set, centroid interpolation + * uses PIXEL interpolation for unlit pixels and CENTROID interpolation + * for lit pixels, so we need both sets of barycentric coordinates. + */ + if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) { + if (is_centroid) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; + } else if (is_sample) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC; + } + if ((!is_centroid && !is_sample) || + devinfo->needs_unlit_centroid_workaround) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; + } + } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH || + (!(shade_model_flat && is_gl_Color) && + interp_qualifier == INTERP_QUALIFIER_NONE)) { + if (is_centroid) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; + } else if (is_sample) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC; + } + if ((!is_centroid && !is_sample) || + devinfo->needs_unlit_centroid_workaround) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; + } + } + } + + return barycentric_interp_modes; +} + +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_program *prog = (struct gl_program *) vp; + + memset(key, 0, sizeof(*key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key->program_string_id = vp->id; + + /* _NEW_POLYGON */ + if (brw->gen < 6) { + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + } + + if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | + VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { + /* _NEW_LIGHT | _NEW_BUFFERS */ + key->clamp_vertex_color = ctx->Light._ClampVertexColor; + } + + /* _NEW_POINT */ + if (brw->gen < 6 && ctx->Point.PointSprite) { + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key->point_coord_replace |= (1 << i); + } + } +} + +static bool +really_do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + void *mem_ctx; + struct gl_shader *vs = NULL; + + if (prog) + vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + memset(prog_data, 0, sizeof(*prog_data)); + + mem_ctx = ralloc_context(NULL); + + create_params_array(pipeline, vs, &prog_data->base.base); + anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, + &prog_data->base.base); + anv_nir_apply_pipeline_layout(vs->Program->nir, pipeline->layout); + + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; + prog_data->inputs_read = vp->program.Base.InputsRead; + + if (key->copy_edgeflag) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + prog_data->inputs_read |= VERT_BIT_EDGEFLAG; + } + + if (brw->gen < 6) { + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (key->point_coord_replace & (1 << i)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + /* In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ + if (key->nr_userclip_plane_consts) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + brw_compute_vue_map(brw->intelScreen->devinfo, + &prog_data->base.vue_map, outputs_written, + prog ? prog->SeparateShader : false); + + set_binding_table_layout(&prog_data->base.base, pipeline, + VK_SHADER_STAGE_VERTEX); + + /* Emit GEN4 code. + */ - program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program, - prog, -1, &program_size); ++ program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, ++ key, prog_data, vs->Program->nir, NULL, false, -1, ++ &program_size, NULL); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + const uint32_t offset = upload_kernel(pipeline, program, program_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + return true; +} + +void brw_wm_populate_key(struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + GLuint lookup = 0; + GLuint line_aa; + bool program_uses_dfdy = fp->program.UsesDFdy; + struct gl_framebuffer draw_buffer; + bool multisample_fbo; + + memset(key, 0, sizeof(*key)); + + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. */ + key->tex.swizzles[i] = SWIZZLE_XYZW; + } + + /* A non-zero framebuffer name indicates that the framebuffer was created by + * the user rather than the window system. */ + draw_buffer.Name = 1; + draw_buffer.Visual.samples = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer.Width = 400; + draw_buffer.Height = 400; + ctx->DrawBuffer = &draw_buffer; + + multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + + /* Build the index for table lookup + */ + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + key->line_aa = line_aa; + + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. + */ + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + } + + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } + + /* The unique fragment program ID */ + key->program_string_id = fp->id; + + ctx->DrawBuffer = NULL; +} + +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) +{ + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + +static bool +really_do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) +{ + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct gl_shader *fs = NULL; + unsigned int program_size; + const uint32_t *program; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + memset(prog_data, 0, sizeof(*prog_data)); + + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. + */ + prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; + + prog_data->computed_depth_mode = computed_depth_mode(&fp->program); + + create_params_array(pipeline, fs, &prog_data->base); + anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); + anv_nir_apply_pipeline_layout(fs->Program->nir, pipeline->layout); + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw->intelScreen->devinfo, + key->flat_shade, + key->persample_shading, + fp->program.Base.nir); + + set_binding_table_layout(&prog_data->base, pipeline, + VK_SHADER_STAGE_FRAGMENT); + /* This needs to come after shader time and pull constant entries, but we + * don't have those set up now, so just put it after the layout entries. + */ + prog_data->binding_table.render_target_start = 0; + - program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, - &fp->program, prog, -1, -1, &program_size); ++ program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, key, ++ prog_data, fp->program.Base.nir, fs->Program, ++ -1, -1, brw->use_rep_send, &program_size, NULL); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + uint32_t offset = upload_kernel(pipeline, program, program_size); + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + ralloc_free(mem_ctx); + + return true; +} + +bool +anv_codegen_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, + struct anv_pipeline *pipeline) +{ + struct brw_gs_compile c; + + memset(&c, 0, sizeof(c)); + c.key = *key; + c.gp = gp; + + c.prog_data.include_primitive_id = + (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; + + c.prog_data.invocations = gp->program.Invocations; + + set_binding_table_layout(&c.prog_data.base.base, + pipeline, VK_SHADER_STAGE_GEOMETRY); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + * + * Note: param_count needs to be num_uniform_components * 4, since we add + * padding around uniform values below vec4 size, so the worst case is that + * every uniform is a float which gets padded to the size of a vec4. + */ + struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; + int param_count = gp->program.Base.nir->num_uniforms * 4; + + c.prog_data.base.base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + c.prog_data.base.base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + c.prog_data.base.base.image_param = + rzalloc_array(NULL, struct brw_image_param, gs->NumImages); + c.prog_data.base.base.nr_params = param_count; + c.prog_data.base.base.nr_image_params = gs->NumImages; + + brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, + &c.prog_data.base.base, false); + + if (brw->gen >= 8) { + c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 : + nir_gs_count_vertices(gp->program.Base.nir); + } + + if (brw->gen >= 7) { + if (gp->program.OutputType == GL_POINTS) { + /* When the output type is points, the geometry shader may output data + * to multiple streams, and EndPrimitive() has no effect. So we + * configure the hardware to interpret the control data as stream ID. + */ + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; + + /* We only have to emit control bits if we are using streams */ + if (prog->Geom.UsesStreams) + c.control_data_bits_per_vertex = 2; + else + c.control_data_bits_per_vertex = 0; + } else { + /* When the output type is triangle_strip or line_strip, EndPrimitive() + * may be used to terminate the current strip and start a new one + * (similar to primitive restart), and outputting data to multiple + * streams is not supported. So we configure the hardware to interpret + * the control data as EndPrimitive information (a.k.a. "cut bits"). + */ + c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT; + + /* We only need to output control data if the shader actually calls + * EndPrimitive(). + */ + c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0; + } + } else { + /* There are no control data bits in gen6. */ + c.control_data_bits_per_vertex = 0; + + /* If it is using transform feedback, enable it */ + if (prog->TransformFeedback.NumVarying) + c.prog_data.gen6_xfb_enabled = true; + else + c.prog_data.gen6_xfb_enabled = false; + } + c.control_data_header_size_bits = + gp->program.VerticesOut * c.control_data_bits_per_vertex; + + /* 1 HWORD = 32 bytes = 256 bits */ + c.prog_data.control_data_header_size_hwords = + ALIGN(c.control_data_header_size_bits, 256) / 256; + + GLbitfield64 outputs_written = gp->program.Base.OutputsWritten; + + brw_compute_vue_map(brw->intelScreen->devinfo, + &c.prog_data.base.vue_map, outputs_written, + prog ? prog->SeparateShader : false); + + /* Compute the output vertex size. + * + * From the Ivy Bridge PRM, Vol2 Part1 7.2.1.1 STATE_GS - Output Vertex + * Size (p168): + * + * [0,62] indicating [1,63] 16B units + * + * Specifies the size of each vertex stored in the GS output entry + * (following any Control Header data) as a number of 128-bit units + * (minus one). + * + * Programming Restrictions: The vertex size must be programmed as a + * multiple of 32B units with the following exception: Rendering is + * disabled (as per SOL stage state) and the vertex size output by the + * GS thread is 16B. + * + * If rendering is enabled (as per SOL state) the vertex size must be + * programmed as a multiple of 32B units. In other words, the only time + * software can program a vertex size with an odd number of 16B units + * is when rendering is disabled. + * + * Note: B=bytes in the above text. + * + * It doesn't seem worth the extra trouble to optimize the case where the + * vertex size is 16B (especially since this would require special-casing + * the GEN assembly that writes to the URB). So we just set the vertex + * size to a multiple of 32B (2 vec4's) in all cases. + * + * The maximum output vertex size is 62*16 = 992 bytes (31 hwords). We + * budget that as follows: + * + * 512 bytes for varyings (a varying component is 4 bytes and + * gl_MaxGeometryOutputComponents = 128) + * 16 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 + * bytes) + * 16 bytes overhead for gl_Position (we allocate it a slot in the VUE + * even if it's not used) + * 32 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots + * whenever clip planes are enabled, even if the shader doesn't + * write to gl_ClipDistance) + * 16 bytes overhead since the VUE size must be a multiple of 32 bytes + * (see below)--this causes up to 1 VUE slot to be wasted + * 400 bytes available for varying packing overhead + * + * Worst-case varying packing overhead is 3/4 of a varying slot (12 bytes) + * per interpolation type, so this is plenty. + * + */ + unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16; + assert(brw->gen == 6 || + output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES); + c.prog_data.output_vertex_size_hwords = + ALIGN(output_vertex_size_bytes, 32) / 32; + + /* Compute URB entry size. The maximum allowed URB entry size is 32k. + * That divides up as follows: + * + * 64 bytes for the control data header (cut indices or StreamID bits) + * 4096 bytes for varyings (a varying component is 4 bytes and + * gl_MaxGeometryTotalOutputComponents = 1024) + * 4096 bytes overhead for VARYING_SLOT_PSIZ (each varying slot is 16 + * bytes/vertex and gl_MaxGeometryOutputVertices is 256) + * 4096 bytes overhead for gl_Position (we allocate it a slot in the VUE + * even if it's not used) + * 8192 bytes overhead for gl_ClipDistance (we allocate it 2 VUE slots + * whenever clip planes are enabled, even if the shader doesn't + * write to gl_ClipDistance) + * 4096 bytes overhead since the VUE size must be a multiple of 32 + * bytes (see above)--this causes up to 1 VUE slot to be wasted + * 8128 bytes available for varying packing overhead + * + * Worst-case varying packing overhead is 3/4 of a varying slot per + * interpolation type, which works out to 3072 bytes, so this would allow + * us to accommodate 2 interpolation types without any danger of running + * out of URB space. + * + * In practice, the risk of running out of URB space is very small, since + * the above figures are all worst-case, and most of them scale with the + * number of output vertices. So we'll just calculate the amount of space + * we need, and if it's too large, fail to compile. + * + * The above is for gen7+ where we have a single URB entry that will hold + * all the output. In gen6, we will have to allocate URB entries for every + * vertex we emit, so our URB entries only need to be large enough to hold + * a single vertex. Also, gen6 does not have a control data header. + */ + unsigned output_size_bytes; + if (brw->gen >= 7) { + output_size_bytes = + c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut; + output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords; + } else { + output_size_bytes = c.prog_data.output_vertex_size_hwords * 32; + } + + /* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output, + * which comes before the control header. + */ + if (brw->gen >= 8) + output_size_bytes += 32; + + assert(output_size_bytes >= 1); + int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; + if (brw->gen == 6) + max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; + if (output_size_bytes > max_output_size_bytes) + return false; + + + /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and + * a multiple of 128 bytes in gen6. + */ + if (brw->gen >= 7) + c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + else + c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; + + /* FIXME: Need to pull this from nir shader. */ + c.prog_data.output_topology = _3DPRIM_TRISTRIP; + + /* The GLSL linker will have already matched up GS inputs and the outputs + * of prior stages. The driver does extend VS outputs in some cases, but + * only for legacy OpenGL or Gen4-5 hardware, neither of which offer + * geometry shader support. So we can safely ignore that. + * + * For SSO pipelines, we use a fixed VUE map layout based on variable + * locations, so we can rely on rendezvous-by-location making this work. + * + * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not + * written by previous stages and shows up via payload magic. + */ + GLbitfield64 inputs_read = + gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID; + brw_compute_vue_map(brw->intelScreen->devinfo, + &c.input_vue_map, inputs_read, + prog->SeparateShader); + + /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we + * need to program a URB read length of ceiling(num_slots / 2). + */ + c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; + + void *mem_ctx = ralloc_context(NULL); + unsigned program_size; + const unsigned *program = - brw_gs_emit(brw, prog, &c, mem_ctx, -1, &program_size); ++ brw_compile_gs(brw->intelScreen->compiler, brw, &c, gp->program.Base.nir, ++ prog, mem_ctx, -1, &program_size, NULL); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + pipeline->gs_vec4 = upload_kernel(pipeline, program, program_size); + pipeline->gs_vertex_count = gp->program.VerticesIn; + + ralloc_free(mem_ctx); + + return true; +} + +static bool +brw_codegen_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) +{ + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(prog_data, 0, sizeof(*prog_data)); + + set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); + + create_params_array(pipeline, cs, &prog_data->base); + anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); + anv_nir_apply_pipeline_layout(cs->Program->nir, pipeline->layout); + - program = brw_cs_emit(brw, mem_ctx, key, prog_data, - &cp->program, prog, -1, &program_size); ++ program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, key, ++ prog_data, cs->Program->nir, -1, ++ &program_size, NULL); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + pipeline->cs_simd = upload_kernel(pipeline, program, program_size); + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_cs_populate_key(struct brw_context *brw, + struct brw_compute_program *bcp, struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = bcp->id; +} + +struct anv_compiler { + struct anv_device *device; + struct intel_screen *screen; + struct brw_context *brw; + struct gl_pipeline_object pipeline; +}; + +extern "C" { + +struct anv_compiler * +anv_compiler_create(struct anv_device *device) +{ + const struct brw_device_info *devinfo = &device->info; + struct anv_compiler *compiler; + struct gl_context *ctx; + + compiler = rzalloc(NULL, struct anv_compiler); + if (compiler == NULL) + return NULL; + + compiler->screen = rzalloc(compiler, struct intel_screen); + if (compiler->screen == NULL) + goto fail; + + compiler->brw = rzalloc(compiler, struct brw_context); + if (compiler->brw == NULL) + goto fail; + + compiler->device = device; + + compiler->brw->gen = devinfo->gen; + compiler->brw->is_g4x = devinfo->is_g4x; + compiler->brw->is_baytrail = devinfo->is_baytrail; + compiler->brw->is_haswell = devinfo->is_haswell; + compiler->brw->is_cherryview = devinfo->is_cherryview; + + /* We need this at least for CS, which will check brw->max_cs_threads + * against the work group size. */ + compiler->brw->max_vs_threads = devinfo->max_vs_threads; + compiler->brw->max_hs_threads = devinfo->max_hs_threads; + compiler->brw->max_ds_threads = devinfo->max_ds_threads; + compiler->brw->max_gs_threads = devinfo->max_gs_threads; + compiler->brw->max_wm_threads = devinfo->max_wm_threads; + compiler->brw->max_cs_threads = devinfo->max_cs_threads; + compiler->brw->urb.size = devinfo->urb.size; + compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; + compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; + compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + + compiler->brw->intelScreen = compiler->screen; + compiler->screen->devinfo = &device->info; + + brw_process_intel_debug_variable(); + + compiler->screen->compiler = brw_compiler_create(compiler, &device->info); + + ctx = &compiler->brw->ctx; + _mesa_init_shader_object_functions(&ctx->Driver); + + /* brw_select_clip_planes() needs this for bogus reasons. */ + ctx->_Shader = &compiler->pipeline; + + return compiler; + + fail: + ralloc_free(compiler); + return NULL; +} + +void +anv_compiler_destroy(struct anv_compiler *compiler) +{ + _mesa_free_errors_data(&compiler->brw->ctx); + ralloc_free(compiler); +} + +/* From gen7_urb.c */ + +/* FIXME: Add to struct intel_device_info */ + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->vs_simd8 != NO_KERNEL; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->gs_vec4 != NO_KERNEL; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static const struct { + uint32_t token; + gl_shader_stage stage; + const char *name; +} stage_info[] = { + { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, + { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, + { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, + { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, + { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, + { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, +}; + +struct spirv_header{ + uint32_t magic; + uint32_t version; + uint32_t gen_magic; +}; + +static void +setup_nir_io(struct gl_shader *mesa_shader, + nir_shader *shader) +{ + struct gl_program *prog = mesa_shader->Program; + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + prog->InputsRead |= BITFIELD64_BIT(var->data.location); + if (shader->stage == MESA_SHADER_FRAGMENT) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog; + + fprog->InterpQualifier[var->data.location] = + (glsl_interp_qualifier)var->data.interpolation; + if (var->data.centroid) + fprog->IsCentroid |= BITFIELD64_BIT(var->data.location); + if (var->data.sample) + fprog->IsSample |= BITFIELD64_BIT(var->data.location); + } + } + + foreach_list_typed(nir_variable, var, node, &shader->outputs) { + prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); + } + ++ shader->info.system_values_read = 0; ++ foreach_list_typed(nir_variable, var, node, &shader->system_values) { ++ shader->info.system_values_read |= BITFIELD64_BIT(var->data.location); ++ } ++ + shader->info.inputs_read = prog->InputsRead; + shader->info.outputs_written = prog->OutputsWritten; - - mesa_shader->num_uniform_components = shader->num_uniforms; +} + +static void +anv_compile_shader_spirv(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct anv_shader *shader = pipeline->shaders[stage]; + struct gl_shader *mesa_shader; + int name = 0; + + mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(mesa_shader == NULL, + "failed to create %s shader\n", stage_info[stage].name); + +#define CREATE_PROGRAM(stage) \ - _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0) ++ &ralloc(mesa_shader, struct brw_##stage##_program)->program.Base + + bool is_scalar; + struct gl_program *prog; + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + prog = CREATE_PROGRAM(vertex); + is_scalar = compiler->screen->compiler->scalar_vs; + break; + case VK_SHADER_STAGE_GEOMETRY: + prog = CREATE_PROGRAM(geometry); + is_scalar = false; + break; + case VK_SHADER_STAGE_FRAGMENT: + prog = CREATE_PROGRAM(fragment); + is_scalar = true; + break; + case VK_SHADER_STAGE_COMPUTE: + prog = CREATE_PROGRAM(compute); + is_scalar = true; + break; + default: + unreachable("Unsupported shader stage"); + } ++ _mesa_init_gl_program(prog, 0, 0); + _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog); + + mesa_shader->Program->Parameters = + rzalloc(mesa_shader, struct gl_program_parameter_list); + + mesa_shader->Type = stage_info[stage].token; + mesa_shader->Stage = stage_info[stage].stage; + + struct gl_shader_compiler_options *glsl_options = + &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; + + if (shader->module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + mesa_shader->Program->nir = shader->module->nir; + mesa_shader->Program->nir->options = glsl_options->NirOptions; + } else { + uint32_t *spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); + + mesa_shader->Program->nir = + spirv_to_nir(spirv, shader->module->size / 4, + stage_info[stage].stage, glsl_options->NirOptions); + } + nir_validate_shader(mesa_shader->Program->nir); + ++ setup_nir_io(mesa_shader, mesa_shader->Program->nir); ++ + brw_process_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, + NULL, mesa_shader->Stage, is_scalar); + - setup_nir_io(mesa_shader, mesa_shader->Program->nir); ++ mesa_shader->num_uniform_components = ++ mesa_shader->Program->nir->num_uniforms; + + fail_if(mesa_shader->Program->nir == NULL, + "failed to translate SPIR-V to NIR\n"); + + _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders], + mesa_shader); + program->NumShaders++; +} + +static void +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +int +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) +{ + struct gl_shader_program *program; + int name = 0; + struct brw_context *brw = compiler->brw; + + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + brw->use_rep_send = pipeline->use_repclear; + brw->no_simd8 = pipeline->use_repclear; + + program = _mesa_new_shader_program(name); + program->Shaders = (struct gl_shader **) + calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); + fail_if(program == NULL || program->Shaders == NULL, + "failed to create program\n"); + + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_spirv(compiler, program, pipeline, i); + } + + for (unsigned i = 0; i < program->NumShaders; i++) { + struct gl_shader *shader = program->Shaders[i]; + program->_LinkedShaders[shader->Stage] = shader; + } + + bool success; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &pipeline->vs_prog_data.base.base); + + if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + } + + + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { + struct brw_gs_prog_key gs_key; + struct gl_geometry_program *gp = (struct gl_geometry_program *) + program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; + struct brw_geometry_program *bgp = brw_geometry_program(gp); + + success = anv_codegen_gs_prog(brw, program, bgp, &gs_key, pipeline); + fail_if(!success, "do_gs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &pipeline->gs_prog_data.base.base); + + if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + pipeline->gs_vec4 = NO_KERNEL; + } + + if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &pipeline->wm_prog_data.base); + } + + if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { + struct brw_cs_prog_key cs_key; + struct gl_compute_program *cp = (struct gl_compute_program *) + program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; + struct brw_compute_program *bcp = brw_compute_program(cp); + + brw_cs_populate_key(brw, bcp, &cs_key); + + success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); + fail_if(!success, "brw_codegen_cs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &pipeline->cs_prog_data.base); + } + + _mesa_delete_shader_program(&brw->ctx, program); + + struct anv_device *device = compiler->device; + while (device->scratch_block_pool.bo.size < pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + gen7_compute_urb_partition(pipeline); + + return 0; +} + +/* This badly named function frees the struct anv_pipeline data that the compiler + * allocates. Currently just the prog_data structs. + */ +void +anv_compiler_free(struct anv_pipeline *pipeline) +{ + for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { + if (pipeline->prog_data[stage]) { + /* We only ever set up the params array because we don't do + * non-UBO pull constants + */ + anv_device_free(pipeline->device, pipeline->prog_data[stage]->param); + } + } +} + +} diff --cc src/vulkan/anv_meta.c index 8f6bc421194,00000000000..76b8c4173e6 mode 100644,000000..100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@@ -1,1808 -1,0 +1,1802 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "anv_nir_builder.h" + +static nir_shader * +build_nir_vertex_shader(bool attr_flat) +{ + nir_builder b; + + const struct glsl_type *vertex_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX); + - nir_variable *pos_in = nir_variable_create(b.shader, "a_pos", - vertex_type, - nir_var_shader_in); ++ nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, ++ vertex_type, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; - nir_variable *pos_out = nir_variable_create(b.shader, "gl_Position", - vertex_type, - nir_var_shader_out); ++ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, ++ vertex_type, "gl_Position"); + pos_in->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + /* Add one more pass-through attribute. For clear shaders, this is used + * to store the color and for blit shaders it's the texture coordinate. + */ + const struct glsl_type *attr_type = glsl_vec4_type(); - nir_variable *attr_in = nir_variable_create(b.shader, "a_attr", attr_type, - nir_var_shader_in); ++ nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, ++ attr_type, "a_attr"); + attr_in->data.location = VERT_ATTRIB_GENERIC1; - nir_variable *attr_out = nir_variable_create(b.shader, "v_attr", attr_type, - nir_var_shader_out); ++ nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, ++ attr_type, "v_attr"); + attr_out->data.location = VARYING_SLOT_VAR0; + attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : + INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, attr_out, attr_in); + + return b.shader; +} + +static nir_shader * +build_nir_clear_fragment_shader(void) +{ + nir_builder b; + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); + - nir_variable *color_in = nir_variable_create(b.shader, "v_attr", - color_type, - nir_var_shader_in); ++ nir_variable *color_in = nir_variable_create(b.shader, nir_var_shader_in, ++ color_type, "v_attr"); + color_in->data.location = VARYING_SLOT_VAR0; + color_in->data.interpolation = INTERP_QUALIFIER_FLAT; - nir_variable *color_out = nir_variable_create(b.shader, "f_color", - color_type, - nir_var_shader_out); ++ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, ++ color_type, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_copy_var(&b, color_out, color_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + nir_builder b; + + nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT); + + const struct glsl_type *color_type = glsl_vec4_type(); + - nir_variable *tex_pos_in = nir_variable_create(b.shader, "v_attr", - glsl_vec4_type(), - nir_var_shader_in); ++ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, ++ glsl_vec4_type(), "v_attr"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, false, glsl_get_base_type(color_type)); - nir_variable *sampler = nir_variable_create(b.shader, "s_tex", sampler_type, - nir_var_uniform); ++ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, ++ sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_load_var(&b, tex_pos_in)); + tex->dest_type = nir_type_float; /* TODO */ + + switch (tex_dim) { + case GLSL_SAMPLER_DIM_2D: + tex->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + tex->coord_components = 3; + break; + default: + assert(!"Unsupported texture dimension"); + } + + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + - nir_variable *color_out = nir_variable_create(b.shader, "f_color", - color_type, - nir_var_shader_out); ++ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, ++ color_type, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa); + + return b.shader; +} + +static void +anv_device_init_meta_clear_state(struct anv_device *device) +{ + struct anv_shader_module vsm = { + .nir = build_nir_vertex_shader(true), + }; + + struct anv_shader_module fsm = { + .nir = build_nir_clear_fragment_shader(), + }; + + VkShader vs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&vsm), + .pName = "main", + }, &vs); + + VkShader fs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&fsm), + .pName = "main", + }, &fs); + + /* We use instanced rendering to clear multiple render targets. We have two + * vertex buffers: the first vertex buffer holds per-vertex data and + * provides the vertices for the clear rectangle. The second one holds + * per-instance data, which consists of the VUE header (which selects the + * layer) and the color (Vulkan supports per-RT clear colors). + */ + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 12, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 32, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Color */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 16 + } + } + }; + + anv_graphics_pipeline_create(anv_device_to_handle(device), + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL, + } + }, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = true, + .depthWriteEnable = true, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = false, + .stencilTestEnable = true, + .front = (VkStencilOpState) { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + }, + .back = (VkStencilOpState) { + .stencilPassOp = VK_STENCIL_OP_REPLACE, + .stencilCompareOp = VK_COMPARE_OP_ALWAYS, + }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .use_repclear = true, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.clear.pipeline); + + anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShader(anv_device_to_handle(device), fs); + ralloc_free(vsm.nir); + ralloc_free(fsm.nir); +} + +#define NUM_VB_USED 2 +struct anv_saved_state { + struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + uint32_t dynamic_flags; + struct anv_dynamic_state dynamic; +}; + +static void +anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *state, + uint32_t dynamic_state) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); + state->dynamic_flags = dynamic_state; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_state); +} + +static void +anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *state) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; + + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_flags); + cmd_buffer->state.dirty |= state->dynamic_flags; +} + +struct vue_header { + uint32_t Reserved; + uint32_t RTAIndex; + uint32_t ViewportIndex; + float PointWidth; +}; + +struct clear_instance_data { + struct vue_header vue_header; + VkClearColorValue color; +}; + +static void +meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, + int num_instances, + struct clear_instance_data *instance_data, + VkClearDepthStencilValue ds_clear_value) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_state state; + uint32_t size; + + const float vertex_data[] = { + /* Rect-list coordinates */ + 0.0, 0.0, ds_clear_value.depth, + fb->width, 0.0, ds_clear_value.depth, + fb->width, fb->height, ds_clear_value.depth, + + /* Align to 16 bytes */ + 0.0, 0.0, 0.0, + }; + + size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 16); + + /* Copy in the vertex and instance data */ + memcpy(state.map, vertex_data, sizeof(vertex_data)); + memcpy(state.map + sizeof(vertex_data), instance_data, + num_instances * sizeof(*instance_data)); + + struct anv_buffer vertex_buffer = { + .device = cmd_buffer->device, + .size = size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.clear.pipeline); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), + 3, num_instances, 0, 0); +} + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values) +{ + struct anv_saved_state saved_state; + + if (pass->has_stencil_clear_attachment) + anv_finishme("stencil clear"); + + /* FINISHME: Rethink how we count clear attachments in light of + * 0.138.2 -> 0.170.2 diff. + */ + if (pass->num_color_clear_attachments == 0 && + !pass->has_depth_clear_attachment) + return; + + struct clear_instance_data instance_data[pass->num_color_clear_attachments]; + uint32_t color_attachments[pass->num_color_clear_attachments]; + uint32_t ds_attachment = VK_ATTACHMENT_UNUSED; + VkClearDepthStencilValue ds_clear_value = {0}; + + int layer = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + const struct anv_render_pass_attachment *att = &pass->attachments[i]; + + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + if (anv_format_is_color(att->format)) { + instance_data[layer] = (struct clear_instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = clear_values[i].color, + }; + color_attachments[layer] = i; + layer++; + } else if (att->format->depth_format) { + assert(ds_attachment == VK_ATTACHMENT_UNUSED); + ds_attachment = i; + ds_clear_value = clear_values[ds_attachment].depthStencil; + } + } else if (att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + assert(att->format->has_stencil); + anv_finishme("stencil clear"); + } + } + + anv_cmd_buffer_save(cmd_buffer, &saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); + cmd_buffer->state.dynamic.viewport.count = 0; + + struct anv_subpass subpass = { + .input_count = 0, + .color_count = pass->num_color_clear_attachments, + .color_attachments = color_attachments, + .depth_stencil_attachment = ds_attachment, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); + + meta_emit_clear(cmd_buffer, pass->num_color_clear_attachments, + instance_data, ds_clear_value); + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +static VkImageViewType +meta_blit_get_src_image_view_type(const struct anv_image *src_image) +{ + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: + return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static uint32_t +meta_blit_get_dest_view_base_array_slice(const struct anv_image *dest_image, + const VkImageSubresourceCopy *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->arrayLayer; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static void +anv_device_init_meta_blit_state(struct anv_device *device) +{ + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vsm = { + .nir = build_nir_vertex_shader(false), + }; + + struct anv_shader_module fsm_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fsm_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkShader vs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&vsm), + .pName = "main", + }, &vs); + + VkShader fs_2d; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&fsm_2d), + .pName = "main", + }, &fs_2d); + + VkShader fs_3d; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = anv_shader_module_to_handle(&fsm_3d), + .pName = "main", + }, &fs_3d); + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 0, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 5 * sizeof(float), + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offsetInBytes = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 1, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .arraySize = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, + &device->meta_state.blit.ds_layout); + + anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.blit.pipeline_layout); + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = {0}, /* TEMPLATE VALUE! FILL ME IN! */ + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].shader = fs_2d; + anv_graphics_pipeline_create(anv_device_to_handle(device), + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.blit.pipeline_2d_src); + + pipeline_shader_stages[1].shader = fs_3d; + anv_graphics_pipeline_create(anv_device_to_handle(device), + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.blit.pipeline_3d_src); + + anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShader(anv_device_to_handle(device), fs_2d); + anv_DestroyShader(anv_device_to_handle(device), fs_3d); + ralloc_free(vsm.nir); + ralloc_free(fsm_2d.nir); + ralloc_free(fsm_3d.nir); +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *saved_state) +{ + anv_cmd_buffer_save(cmd_buffer, saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = { .handle = 1 }; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct vue_header)); + vb_data = vb_state.map + sizeof(struct vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.z + src_extent.depth) / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)(src_offset.z + src_extent.depth) / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct vue_header), + }); + + VkDescriptorSet set; + anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, + VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, + 1, &device->meta_state.blit.ds_layout, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .destSet = set, + .destBinding = 0, + .destArrayElement = 0, + .count = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pDescriptors = (VkDescriptorInfo[]) { + { + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = dest_iview->format->vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .pPreserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + anv_finishme("VK_IMAGE_TYPE_1D"); + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 1, + &(VkViewport) { + .originX = 0.0f, + .originY = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb); + anv_DestroyRenderPass(anv_device_to_handle(device), pass); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *saved_state) +{ + anv_cmd_buffer_restore(cmd_buffer, saved_state); +} + +static VkFormat +vk_format_for_cpp(int cpp) +{ + switch (cpp) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format cpp"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .channels = { + .r = VK_CHANNEL_SWIZZLE_R, + .g = VK_CHANNEL_SWIZZLE_G, + .b = VK_CHANNEL_SWIZZLE_B, + .a = VK_CHANNEL_SWIZZLE_A, + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1, + }, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }); + + anv_DestroyImage(vk_device, src_image); + anv_DestroyImage(vk_device, dest_image); +} + +void anv_CmdCopyBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; + uint64_t copy_size = pRegions[r].copySize; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int cpp = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(src_offset % cpp == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(dest_offset % cpp == 0); + + fs = ffs(pRegions[r].copySize) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(pRegions[r].copySize % cpp == 0); + + VkFormat copy_format = vk_format_for_cpp(cpp); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * cpp; + while (copy_size > max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * cpp); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * cpp; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / cpp, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + + const VkImageViewType src_iview_type = + meta_blit_get_src_image_view_type(src_image); + + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = src_iview_type, + .format = src_image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = 1 << pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, + .arraySize = 1 + }, + }, + cmd_buffer); + + const VkOffset3D dest_offset = { + .x = pRegions[r].destOffset.x, + .y = pRegions[r].destOffset.y, + .z = 0, + }; + + const uint32_t dest_array_slice = + meta_blit_get_dest_view_base_array_slice(dest_image, + &pRegions[r].destSubresource, + &pRegions[r].destOffset); + + if (pRegions[r].srcSubresource.arraySize > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].extent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_array_slice, + .arraySize = 1 + }, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkTexFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + + const VkImageViewType src_iview_type = + meta_blit_get_src_image_view_type(src_image); + + struct anv_saved_state saved_state; + + anv_finishme("respect VkTexFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = src_iview_type, + .format = src_image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = 1 << pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = pRegions[r].srcSubresource.arrayLayer, + .arraySize = 1 + }, + }, + cmd_buffer); + + const VkOffset3D dest_offset = { + .x = pRegions[r].destOffset.x, + .y = pRegions[r].destOffset.y, + .z = 0, + }; + + const uint32_t dest_array_slice = + meta_blit_get_dest_view_base_array_slice(dest_image, + &pRegions[r].destSubresource, + &pRegions[r].destOffset); + + if (pRegions[r].srcSubresource.arraySize > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].destExtent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].destSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_array_slice, + .arraySize = 1 + }, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffset, + pRegions[r].srcExtent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].destExtent); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static VkImage +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = format, + .extent = extent, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return anv_image_to_handle(image); +} + +void anv_CmdCopyBufferToImage( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + const VkFormat orig_format = dest_image->format->vk_format; + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkFormat proxy_format = orig_format; + VkImageAspect proxy_aspect = pRegions[r].imageSubresource.aspect; + + if (orig_format == VK_FORMAT_S8_UINT) { + proxy_format = VK_FORMAT_R8_UINT; + proxy_aspect = VK_IMAGE_ASPECT_COLOR; + } + + VkImage srcImage = make_image_for_buffer(vk_device, srcBuffer, + proxy_format, VK_IMAGE_USAGE_SAMPLED_BIT, &pRegions[r]); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = proxy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = 1 << proxy_aspect, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + const VkOffset3D dest_offset = { + .x = pRegions[r].imageOffset.x, + .y = pRegions[r].imageOffset.y, + .z = 0, + }; + + const uint32_t dest_array_slice = + meta_blit_get_dest_view_base_array_slice(dest_image, + &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + if (pRegions[r].imageExtent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = proxy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = dest_array_slice, + .arraySize = 1 + }, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + dest_offset, + pRegions[r].imageExtent); + + anv_DestroyImage(vk_device, srcImage); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_saved_state saved_state; + + const VkImageViewType src_iview_type = + meta_blit_get_src_image_view_type(src_image); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].imageSubresource.arraySize > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].imageExtent.depth > 1) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = src_iview_type, + .format = src_image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = 1 << pRegions[r].imageSubresource.aspect, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArrayLayer = pRegions[r].imageSubresource.arrayLayer, + .arraySize = 1 + }, + }, + cmd_buffer); + + VkFormat dest_format = src_image->format->vk_format; + if (dest_format == VK_FORMAT_S8_UINT) { + dest_format = VK_FORMAT_R8_UINT; + } + + VkImage destImage = make_image_for_buffer(vk_device, destBuffer, + dest_format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, &pRegions[r]); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArrayLayer = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + pRegions[r].imageOffset, + pRegions[r].imageExtent, + anv_image_from_handle(destImage), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent); + + anv_DestroyImage(vk_device, destImage); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + stub(); +} + +void anv_CmdFillBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + stub(); +} + +void anv_CmdClearColorImage( + VkCmdBuffer cmdBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, image, _image); + struct anv_saved_state saved_state; + + anv_cmd_buffer_save(cmd_buffer, &saved_state, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); + cmd_buffer->state.dynamic.viewport.count = 0; + + for (uint32_t r = 0; r < rangeCount; r++) { + for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { + for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = _image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image->format->vk_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRanges[r].baseMipLevel + l, + .mipLevels = 1, + .baseArrayLayer = pRanges[r].baseArrayLayer + s, + .arraySize = 1 + }, + }, + cmd_buffer); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = iview.format->vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .pPreserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 1, + .pClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + struct clear_instance_data instance_data = { + .vue_header = { + .RTAIndex = 0, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = *pColor, + }; + + meta_emit_clear(cmd_buffer, 1, &instance_data, + (VkClearDepthStencilValue) {0}); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + } + } + } + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +void anv_CmdClearDepthStencilImage( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + stub(); +} + +void anv_CmdClearColorAttachment( + VkCmdBuffer cmdBuffer, + uint32_t colorAttachment, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdClearDepthStencilAttachment( + VkCmdBuffer cmdBuffer, + VkImageAspectFlags aspectMask, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdResolveImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageResolve* pRegions) +{ + stub(); +} + +void +anv_device_init_meta(struct anv_device *device) +{ + anv_device_init_meta_clear_state(device); + anv_device_init_meta_blit_state(device); +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + /* Clear */ + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.clear.pipeline); + + /* Blit */ + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout); +} diff --cc src/vulkan/anv_nir_builder.h index 299c8c1aad0,00000000000..f26cb046a6b mode 100644,000000..100644 --- a/src/vulkan/anv_nir_builder.h +++ b/src/vulkan/anv_nir_builder.h @@@ -1,102 -1,0 +1,56 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "glsl/nir/nir_builder.h" +#include "util/ralloc.h" + +/* This file includes NIR helpers used by meta shaders in the Vulkan + * driver. Eventually, these will all be merged into nir_builder. + * However, for now, keeping them in their own file helps to prevent merge + * conflicts. + */ + +static inline void +nir_builder_init_simple_shader(nir_builder *b, gl_shader_stage stage) +{ + b->shader = nir_shader_create(NULL, stage, NULL); + + nir_function *func = nir_function_create(b->shader, + ralloc_strdup(b->shader, "main")); + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = 0; + + b->impl = nir_function_impl_create(overload); + b->cursor = nir_after_cf_list(&b->impl->body); +} + +static inline void +nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) +{ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, dest); + copy->variables[1] = nir_deref_var_create(copy, src); + nir_builder_instr_insert(build, ©->instr); +} - - static inline nir_variable * - nir_variable_create(nir_shader *shader, const char *name, - const struct glsl_type *type, nir_variable_mode mode) - { - nir_variable *var = rzalloc(shader, nir_variable); - var->name = ralloc_strdup(var, name); - var->type = type; - var->data.mode = mode; - - if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || - (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) - var->data.interpolation = INTERP_QUALIFIER_SMOOTH; - - switch (var->data.mode) { - case nir_var_local: - assert(!"nir_variable_create cannot be used for local variables"); - break; - - case nir_var_global: - exec_list_push_tail(&shader->globals, &var->node); - break; - - case nir_var_shader_in: - exec_list_push_tail(&shader->inputs, &var->node); - break; - - case nir_var_shader_out: - exec_list_push_tail(&shader->outputs, &var->node); - break; - - case nir_var_uniform: - case nir_var_shader_storage: - exec_list_push_tail(&shader->uniforms, &var->node); - break; - - case nir_var_system_value: - exec_list_push_tail(&shader->system_values, &var->node); - break; - - default: - unreachable("not reached"); - } - - return var; - }