nir/xfb: don't assert when xfb_buffer/stride is present but not xfb_offset
[mesa.git] / src / compiler / nir / nir_gather_xfb_info.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_xfb_info.h"
25
26 #include <util/u_math.h>
27
28 static void
29 add_var_xfb_outputs(nir_xfb_info *xfb,
30 nir_variable *var,
31 unsigned *location,
32 unsigned *offset,
33 const struct glsl_type *type)
34 {
35 if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
36 unsigned length = glsl_get_length(type);
37 const struct glsl_type *child_type = glsl_get_array_element(type);
38 for (unsigned i = 0; i < length; i++)
39 add_var_xfb_outputs(xfb, var, location, offset, child_type);
40 } else if (glsl_type_is_struct(type)) {
41 unsigned length = glsl_get_length(type);
42 for (unsigned i = 0; i < length; i++) {
43 const struct glsl_type *child_type = glsl_get_struct_field(type, i);
44 add_var_xfb_outputs(xfb, var, location, offset, child_type);
45 }
46 } else {
47 assert(var->data.xfb_buffer < NIR_MAX_XFB_BUFFERS);
48 if (xfb->buffers_written & (1 << var->data.xfb_buffer)) {
49 assert(xfb->strides[var->data.xfb_buffer] == var->data.xfb_stride);
50 assert(xfb->buffer_to_stream[var->data.xfb_buffer] == var->data.stream);
51 } else {
52 xfb->buffers_written |= (1 << var->data.xfb_buffer);
53 xfb->strides[var->data.xfb_buffer] = var->data.xfb_stride;
54 xfb->buffer_to_stream[var->data.xfb_buffer] = var->data.stream;
55 }
56
57 assert(var->data.stream < NIR_MAX_XFB_STREAMS);
58 xfb->streams_written |= (1 << var->data.stream);
59
60 unsigned comp_slots = glsl_get_component_slots(type);
61 unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
62 assert(attrib_slots == glsl_count_attribute_slots(type, false));
63
64 /* Ensure that we don't have, for instance, a dvec2 with a location_frac
65 * of 2 which would make it crass a location boundary even though it
66 * fits in a single slot. However, you can have a dvec3 which crosses
67 * the slot boundary with a location_frac of 2.
68 */
69 assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
70
71 assert(var->data.location_frac + comp_slots <= 8);
72 uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
73
74 assert(attrib_slots <= 2);
75 for (unsigned s = 0; s < attrib_slots; s++) {
76 nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
77
78 output->buffer = var->data.xfb_buffer;
79 output->offset = *offset + s * 16;
80 output->location = *location;
81 output->component_mask = (comp_mask >> (s * 4)) & 0xf;
82
83 (*location)++;
84 }
85 *offset += comp_slots * 4;
86 }
87 }
88
89 static int
90 compare_xfb_output_offsets(const void *_a, const void *_b)
91 {
92 const nir_xfb_output_info *a = _a, *b = _b;
93 return a->offset - b->offset;
94 }
95
96 nir_xfb_info *
97 nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
98 {
99 assert(shader->info.stage == MESA_SHADER_VERTEX ||
100 shader->info.stage == MESA_SHADER_TESS_EVAL ||
101 shader->info.stage == MESA_SHADER_GEOMETRY);
102
103 /* Compute the number of outputs we have. This is simply the number of
104 * cumulative locations consumed by all the variables. If a location is
105 * represented by multiple variables, then they each count separately in
106 * number of outputs.
107 */
108 unsigned num_outputs = 0;
109 nir_foreach_variable(var, &shader->outputs) {
110 if (var->data.explicit_xfb_buffer &&
111 var->data.explicit_offset) {
112
113 num_outputs += glsl_count_attribute_slots(var->type, false);
114 }
115 }
116 if (num_outputs == 0)
117 return NULL;
118
119 nir_xfb_info *xfb = rzalloc_size(mem_ctx, nir_xfb_info_size(num_outputs));
120
121 /* Walk the list of outputs and add them to the array */
122 nir_foreach_variable(var, &shader->outputs) {
123 if (var->data.explicit_xfb_buffer &&
124 var->data.explicit_offset) {
125
126 unsigned location = var->data.location;
127 unsigned offset = var->data.offset;
128 add_var_xfb_outputs(xfb, var, &location, &offset, var->type);
129 }
130 }
131 assert(xfb->output_count == num_outputs);
132
133 /* Everything is easier in the state setup code if the list is sorted in
134 * order of output offset.
135 */
136 qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
137 compare_xfb_output_offsets);
138
139 /* Finally, do a sanity check */
140 unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
141 for (unsigned i = 0; i < xfb->output_count; i++) {
142 assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
143 assert(xfb->outputs[i].component_mask != 0);
144 unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
145 max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
146 }
147
148 return xfb;
149 }