081ef77b48a3896ca39decd497bd811da432fb19
[mesa.git] / src / compiler / nir / nir_gather_xfb_info.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_xfb_info.h"
25
26 #include <util/u_math.h>
27
28 static void
29 add_var_xfb_outputs(nir_xfb_info *xfb,
30 nir_variable *var,
31 unsigned buffer,
32 unsigned *location,
33 unsigned *offset,
34 const struct glsl_type *type)
35 {
36 /* If this type contains a 64-bit value, align to 8 bytes */
37 if (glsl_type_contains_64bit(type))
38 *offset = ALIGN_POT(*offset, 8);
39
40 if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
41 unsigned length = glsl_get_length(type);
42 const struct glsl_type *child_type = glsl_get_array_element(type);
43 for (unsigned i = 0; i < length; i++)
44 add_var_xfb_outputs(xfb, var, buffer, location, offset, child_type);
45 } else if (glsl_type_is_struct(type)) {
46 unsigned length = glsl_get_length(type);
47 for (unsigned i = 0; i < length; i++) {
48 const struct glsl_type *child_type = glsl_get_struct_field(type, i);
49 add_var_xfb_outputs(xfb, var, buffer, location, offset, child_type);
50 }
51 } else {
52 assert(buffer < NIR_MAX_XFB_BUFFERS);
53 if (xfb->buffers_written & (1 << buffer)) {
54 assert(xfb->strides[buffer] == var->data.xfb_stride);
55 assert(xfb->buffer_to_stream[buffer] == var->data.stream);
56 } else {
57 xfb->buffers_written |= (1 << buffer);
58 xfb->strides[buffer] = var->data.xfb_stride;
59 xfb->buffer_to_stream[buffer] = var->data.stream;
60 }
61
62 assert(var->data.stream < NIR_MAX_XFB_STREAMS);
63 xfb->streams_written |= (1 << var->data.stream);
64
65 unsigned comp_slots = glsl_get_component_slots(type);
66 unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
67 assert(attrib_slots == glsl_count_attribute_slots(type, false));
68
69 /* Ensure that we don't have, for instance, a dvec2 with a location_frac
70 * of 2 which would make it crass a location boundary even though it
71 * fits in a single slot. However, you can have a dvec3 which crosses
72 * the slot boundary with a location_frac of 2.
73 */
74 assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
75
76 assert(var->data.location_frac + comp_slots <= 8);
77 uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
78
79 while (comp_mask) {
80 nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
81
82 output->buffer = buffer;
83 output->offset = *offset;
84 output->location = *location;
85 output->component_mask = comp_mask & 0xf;
86
87 *offset += util_bitcount(output->component_mask) * 4;
88 (*location)++;
89 comp_mask >>= 4;
90 }
91 }
92 }
93
94 static int
95 compare_xfb_output_offsets(const void *_a, const void *_b)
96 {
97 const nir_xfb_output_info *a = _a, *b = _b;
98 return a->offset - b->offset;
99 }
100
101 nir_xfb_info *
102 nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
103 {
104 assert(shader->info.stage == MESA_SHADER_VERTEX ||
105 shader->info.stage == MESA_SHADER_TESS_EVAL ||
106 shader->info.stage == MESA_SHADER_GEOMETRY);
107
108 /* Compute the number of outputs we have. This is simply the number of
109 * cumulative locations consumed by all the variables. If a location is
110 * represented by multiple variables, then they each count separately in
111 * number of outputs. This is only an estimate as some variables may have
112 * an xfb_buffer but not an output so it may end up larger than we need but
113 * it should be good enough for allocation.
114 */
115 unsigned num_outputs = 0;
116 nir_foreach_variable(var, &shader->outputs) {
117 if (var->data.explicit_xfb_buffer)
118 num_outputs += glsl_count_attribute_slots(var->type, false);
119 }
120 if (num_outputs == 0)
121 return NULL;
122
123 nir_xfb_info *xfb = rzalloc_size(mem_ctx, nir_xfb_info_size(num_outputs));
124
125 /* Walk the list of outputs and add them to the array */
126 nir_foreach_variable(var, &shader->outputs) {
127 if (!var->data.explicit_xfb_buffer)
128 continue;
129
130 unsigned location = var->data.location;
131
132 /* In order to know if we have a array of blocks can't be done just by
133 * checking if we have an interface type and is an array, because due
134 * splitting we could end on a case were we received a split struct
135 * that contains an array.
136 */
137 bool is_array_block = var->interface_type != NULL &&
138 glsl_type_is_array(var->type) &&
139 glsl_without_array(var->type) == glsl_get_bare_type(var->interface_type);
140
141 if (var->data.explicit_offset && !is_array_block) {
142 unsigned offset = var->data.offset;
143 add_var_xfb_outputs(xfb, var, var->data.xfb_buffer,
144 &location, &offset, var->type);
145 } else if (is_array_block) {
146 assert(glsl_type_is_struct(var->interface_type));
147
148 unsigned aoa_size = glsl_get_aoa_size(var->type);
149 const struct glsl_type *itype = var->interface_type;
150 unsigned nfields = glsl_get_length(itype);
151 for (unsigned b = 0; b < aoa_size; b++) {
152 for (unsigned f = 0; f < nfields; f++) {
153 int foffset = glsl_get_struct_field_offset(itype, f);
154 const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
155 if (foffset < 0) {
156 location += glsl_count_attribute_slots(ftype, false);
157 continue;
158 }
159
160 unsigned offset = foffset;
161 add_var_xfb_outputs(xfb, var, var->data.xfb_buffer + b,
162 &location, &offset, ftype);
163 }
164 }
165 }
166 }
167
168 /* Everything is easier in the state setup code if the list is sorted in
169 * order of output offset.
170 */
171 qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
172 compare_xfb_output_offsets);
173
174 #ifndef NDEBUG
175 /* Finally, do a sanity check */
176 unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
177 for (unsigned i = 0; i < xfb->output_count; i++) {
178 assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
179 assert(xfb->outputs[i].component_mask != 0);
180 unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
181 max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
182 }
183 #endif
184
185 return xfb;
186 }