From 19064b8c3a8e0f7b9081a0564f1c7c79a1953d0b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 5 Oct 2018 09:13:25 -0500 Subject: [PATCH] nir: Add a pass for gathering transform feedback info This is different from the GL_ARB_spirv pass because it generates a much simpler data structure that isn't tied to OpenGL and mtypes.h. Reviewed-by: Samuel Pitoiset --- src/compiler/Makefile.sources | 4 +- src/compiler/nir/meson.build | 2 + src/compiler/nir/nir_gather_xfb_info.c | 150 +++++++++++++++++++++++++ src/compiler/nir/nir_xfb_info.h | 59 ++++++++++ 4 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 src/compiler/nir/nir_gather_xfb_info.c create mode 100644 src/compiler/nir/nir_xfb_info.h diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index b65bb9b80b9..ae170f02e82 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -216,6 +216,7 @@ NIR_FILES = \ nir/nir_format_convert.h \ nir/nir_from_ssa.c \ nir/nir_gather_info.c \ + nir/nir_gather_xfb_info.c \ nir/nir_gs_count_vertices.c \ nir/nir_inline_functions.c \ nir/nir_instr_set.c \ @@ -308,7 +309,8 @@ NIR_FILES = \ nir/nir_validate.c \ nir/nir_vla.h \ nir/nir_worklist.c \ - nir/nir_worklist.h + nir/nir_worklist.h \ + nir/nir_xfb_info.h SPIRV_GENERATED_FILES = \ spirv/spirv_info.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index d8f65640004..b0c3a7feb31 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -100,6 +100,7 @@ files_libnir = files( 'nir_format_convert.h', 'nir_from_ssa.c', 'nir_gather_info.c', + 'nir_gather_xfb_info.c', 'nir_gs_count_vertices.c', 'nir_inline_functions.c', 'nir_instr_set.c', @@ -193,6 +194,7 @@ files_libnir = files( 'nir_vla.h', 'nir_worklist.c', 'nir_worklist.h', + 'nir_xfb_info.h', '../spirv/GLSL.ext.AMD.h', '../spirv/GLSL.std.450.h', '../spirv/gl_spirv.c', diff --git a/src/compiler/nir/nir_gather_xfb_info.c b/src/compiler/nir/nir_gather_xfb_info.c new file mode 100644 index 00000000000..a53703bb9bf --- /dev/null +++ b/src/compiler/nir/nir_gather_xfb_info.c @@ -0,0 +1,150 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_xfb_info.h" + +#include + +static void +add_var_xfb_outputs(nir_xfb_info *xfb, + nir_variable *var, + unsigned *location, + unsigned *offset, + const struct glsl_type *type) +{ + if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) { + unsigned length = glsl_get_length(type); + const struct glsl_type *child_type = glsl_get_array_element(type); + for (unsigned i = 0; i < length; i++) + add_var_xfb_outputs(xfb, var, location, offset, child_type); + } else if (glsl_type_is_struct(type)) { + unsigned length = glsl_get_length(type); + for (unsigned i = 0; i < length; i++) { + const struct glsl_type *child_type = glsl_get_struct_field(type, i); + add_var_xfb_outputs(xfb, var, location, offset, child_type); + } + } else { + assert(var->data.xfb_buffer < NIR_MAX_XFB_BUFFERS); + if (xfb->buffers_written & (1 << var->data.xfb_buffer)) { + assert(xfb->strides[var->data.xfb_buffer] == var->data.xfb_stride); + assert(xfb->buffer_to_stream[var->data.xfb_buffer] == var->data.stream); + } else { + xfb->buffers_written |= (1 << var->data.xfb_buffer); + xfb->strides[var->data.xfb_buffer] = var->data.xfb_stride; + xfb->buffer_to_stream[var->data.xfb_buffer] = var->data.stream; + } + + assert(var->data.stream < NIR_MAX_XFB_STREAMS); + xfb->streams_written |= (1 << var->data.stream); + + unsigned comp_slots = glsl_get_component_slots(type); + unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4); + assert(attrib_slots == glsl_count_attribute_slots(type, false)); + + /* Ensure that we don't have, for instance, a dvec2 with a location_frac + * of 2 which would make it crass a location boundary even though it + * fits in a single slot. However, you can have a dvec3 which crosses + * the slot boundary with a location_frac of 2. + */ + assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots); + + assert(var->data.location_frac + comp_slots <= 8); + uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac; + + assert(attrib_slots <= 2); + for (unsigned s = 0; s < attrib_slots; s++) { + nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++]; + + output->buffer = var->data.xfb_buffer; + output->offset = *offset; + output->location = *location; + output->component_mask = (comp_mask >> (s * 4)) & 0xf; + + (*location)++; + *offset += comp_slots * 4; + } + } +} + +static int +compare_xfb_output_offsets(const void *_a, const void *_b) +{ + const nir_xfb_output_info *a = _a, *b = _b; + return a->offset - b->offset; +} + +nir_xfb_info * +nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx) +{ + assert(shader->info.stage == MESA_SHADER_VERTEX || + shader->info.stage == MESA_SHADER_TESS_EVAL || + shader->info.stage == MESA_SHADER_GEOMETRY); + + /* Compute the number of outputs we have. This is simply the number of + * cumulative locations consumed by all the variables. If a location is + * represented by multiple variables, then they each count separately in + * number of outputs. + */ + unsigned num_outputs = 0; + nir_foreach_variable(var, &shader->outputs) { + if (var->data.explicit_xfb_buffer || + var->data.explicit_xfb_stride) { + assert(var->data.explicit_xfb_buffer && + var->data.explicit_xfb_stride && + var->data.explicit_offset); + num_outputs += glsl_count_attribute_slots(var->type, false); + } + } + if (num_outputs == 0) + return NULL; + + nir_xfb_info *xfb = rzalloc_size(mem_ctx, nir_xfb_info_size(num_outputs)); + + /* Walk the list of outputs and add them to the array */ + nir_foreach_variable(var, &shader->outputs) { + if (var->data.explicit_xfb_buffer || + var->data.explicit_xfb_stride) { + unsigned location = var->data.location; + unsigned offset = var->data.offset; + add_var_xfb_outputs(xfb, var, &location, &offset, var->type); + } + } + assert(xfb->output_count == num_outputs); + + /* Everything is easier in the state setup code if the list is sorted in + * order of output offset. + */ + qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]), + compare_xfb_output_offsets); + + /* Finally, do a sanity check */ + unsigned max_offset[NIR_MAX_XFB_BUFFERS] = { }; + for (unsigned i = 0; i < xfb->output_count; i++) { + assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]); + assert(xfb->outputs[i].component_mask != 0); + unsigned slots = util_bitcount(xfb->outputs[i].component_mask); + max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4; + } + + return xfb; +} diff --git a/src/compiler/nir/nir_xfb_info.h b/src/compiler/nir/nir_xfb_info.h new file mode 100644 index 00000000000..9b543df5f47 --- /dev/null +++ b/src/compiler/nir/nir_xfb_info.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NIR_XFB_INFO_H +#define NIR_XFB_INFO_H + +#include "nir.h" + +#define NIR_MAX_XFB_BUFFERS 4 +#define NIR_MAX_XFB_STREAMS 4 + +typedef struct { + uint8_t buffer; + uint16_t offset; + uint8_t location; + uint8_t component_mask; +} nir_xfb_output_info; + +typedef struct { + uint8_t buffers_written; + uint8_t streams_written; + + uint16_t strides[NIR_MAX_XFB_BUFFERS]; + uint8_t buffer_to_stream[NIR_MAX_XFB_STREAMS]; + + uint16_t output_count; + nir_xfb_output_info outputs[0]; +} nir_xfb_info; + +static inline size_t +nir_xfb_info_size(uint16_t output_count) +{ + return sizeof(nir_xfb_info) + sizeof(nir_xfb_output_info) * output_count; +} + +nir_xfb_info * +nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx); + +#endif /* NIR_XFB_INFO_H */ -- 2.30.2