nir: Add a pass to combine store_derefs to same vector
[mesa.git] / src / compiler / nir / nir_gather_xfb_info.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_xfb_info.h"
25
26 #include <util/u_math.h>
27
28 static void
29 add_var_xfb_varying(nir_xfb_info *xfb,
30 nir_variable *var,
31 unsigned offset,
32 const struct glsl_type *type)
33 {
34 nir_xfb_varying_info *varying = &xfb->varyings[xfb->varying_count++];
35
36 varying->type = type;
37 varying->buffer = var->data.xfb_buffer;
38 varying->offset = offset;
39 xfb->buffers[var->data.xfb_buffer].varying_count++;
40 }
41
42
43 static nir_xfb_info *
44 nir_gather_xfb_info_create(void *mem_ctx, uint16_t output_count, uint16_t varying_count)
45 {
46 nir_xfb_info *xfb = rzalloc_size(mem_ctx, sizeof(nir_xfb_info));
47
48 xfb->varyings = rzalloc_size(xfb, sizeof(nir_xfb_varying_info) * varying_count);
49 xfb->outputs = rzalloc_size(xfb, sizeof(nir_xfb_output_info) * output_count);
50
51 return xfb;
52 }
53
54 static void
55 add_var_xfb_outputs(nir_xfb_info *xfb,
56 nir_variable *var,
57 unsigned buffer,
58 unsigned *location,
59 unsigned *offset,
60 const struct glsl_type *type,
61 bool varying_added)
62 {
63 /* If this type contains a 64-bit value, align to 8 bytes */
64 if (glsl_type_contains_64bit(type))
65 *offset = ALIGN_POT(*offset, 8);
66
67 if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
68 unsigned length = glsl_get_length(type);
69
70 const struct glsl_type *child_type = glsl_get_array_element(type);
71 if (!glsl_type_is_array(child_type) &&
72 !glsl_type_is_struct(child_type)) {
73
74 add_var_xfb_varying(xfb, var, *offset, type);
75 varying_added = true;
76 }
77
78 for (unsigned i = 0; i < length; i++)
79 add_var_xfb_outputs(xfb, var, buffer, location, offset, child_type, varying_added);
80 } else if (glsl_type_is_struct_or_ifc(type)) {
81 unsigned length = glsl_get_length(type);
82 for (unsigned i = 0; i < length; i++) {
83 const struct glsl_type *child_type = glsl_get_struct_field(type, i);
84 add_var_xfb_outputs(xfb, var, buffer, location, offset, child_type, varying_added);
85 }
86 } else {
87 assert(buffer < NIR_MAX_XFB_BUFFERS);
88 if (xfb->buffers_written & (1 << buffer)) {
89 assert(xfb->buffers[buffer].stride == var->data.xfb_stride);
90 assert(xfb->buffer_to_stream[buffer] == var->data.stream);
91 } else {
92 xfb->buffers_written |= (1 << buffer);
93 xfb->buffers[buffer].stride = var->data.xfb_stride;
94 xfb->buffer_to_stream[buffer] = var->data.stream;
95 }
96
97 assert(var->data.stream < NIR_MAX_XFB_STREAMS);
98 xfb->streams_written |= (1 << var->data.stream);
99
100 unsigned comp_slots;
101 if (var->data.compact) {
102 /* This only happens for clip/cull which are float arrays */
103 assert(glsl_without_array(type) == glsl_float_type());
104 assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
105 var->data.location == VARYING_SLOT_CLIP_DIST1);
106 comp_slots = glsl_get_length(type);
107 } else {
108 comp_slots = glsl_get_component_slots(type);
109
110 UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
111 assert(attrib_slots == glsl_count_attribute_slots(type, false));
112
113 /* Ensure that we don't have, for instance, a dvec2 with a
114 * location_frac of 2 which would make it crass a location boundary
115 * even though it fits in a single slot. However, you can have a
116 * dvec3 which crosses the slot boundary with a location_frac of 2.
117 */
118 assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
119 attrib_slots);
120 }
121
122 assert(var->data.location_frac + comp_slots <= 8);
123 uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
124 unsigned comp_offset = var->data.location_frac;
125
126 if (!varying_added) {
127 add_var_xfb_varying(xfb, var, *offset, type);
128 }
129
130 while (comp_mask) {
131 nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
132
133 output->buffer = buffer;
134 output->offset = *offset;
135 output->location = *location;
136 output->component_mask = comp_mask & 0xf;
137 output->component_offset = comp_offset;
138
139 *offset += util_bitcount(output->component_mask) * 4;
140 (*location)++;
141 comp_mask >>= 4;
142 comp_offset = 0;
143 }
144 }
145 }
146
147 static int
148 compare_xfb_varying_offsets(const void *_a, const void *_b)
149 {
150 const nir_xfb_varying_info *a = _a, *b = _b;
151
152 if (a->buffer != b->buffer)
153 return a->buffer - b->buffer;
154
155 return a->offset - b->offset;
156 }
157
158 static int
159 compare_xfb_output_offsets(const void *_a, const void *_b)
160 {
161 const nir_xfb_output_info *a = _a, *b = _b;
162
163 return a->offset - b->offset;
164 }
165
166 nir_xfb_info *
167 nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
168 {
169 assert(shader->info.stage == MESA_SHADER_VERTEX ||
170 shader->info.stage == MESA_SHADER_TESS_EVAL ||
171 shader->info.stage == MESA_SHADER_GEOMETRY);
172
173 /* Compute the number of outputs we have. This is simply the number of
174 * cumulative locations consumed by all the variables. If a location is
175 * represented by multiple variables, then they each count separately in
176 * number of outputs. This is only an estimate as some variables may have
177 * an xfb_buffer but not an output so it may end up larger than we need but
178 * it should be good enough for allocation.
179 */
180 unsigned num_outputs = 0;
181 unsigned num_varyings = 0;
182 nir_foreach_variable(var, &shader->outputs) {
183 if (var->data.explicit_xfb_buffer) {
184 num_outputs += glsl_count_attribute_slots(var->type, false);
185 num_varyings += glsl_varying_count(var->type);
186 }
187 }
188 if (num_outputs == 0 || num_varyings == 0)
189 return NULL;
190
191 nir_xfb_info *xfb = nir_gather_xfb_info_create(mem_ctx, num_outputs, num_varyings);
192
193 /* Walk the list of outputs and add them to the array */
194 nir_foreach_variable(var, &shader->outputs) {
195 if (!var->data.explicit_xfb_buffer)
196 continue;
197
198 unsigned location = var->data.location;
199
200 /* In order to know if we have a array of blocks can't be done just by
201 * checking if we have an interface type and is an array, because due
202 * splitting we could end on a case were we received a split struct
203 * that contains an array.
204 */
205 bool is_array_block = var->interface_type != NULL &&
206 glsl_type_is_array(var->type) &&
207 glsl_without_array(var->type) == glsl_get_bare_type(var->interface_type);
208
209 if (var->data.explicit_offset && !is_array_block) {
210 unsigned offset = var->data.offset;
211 add_var_xfb_outputs(xfb, var, var->data.xfb_buffer,
212 &location, &offset, var->type, false);
213 } else if (is_array_block) {
214 assert(glsl_type_is_struct_or_ifc(var->interface_type));
215
216 unsigned aoa_size = glsl_get_aoa_size(var->type);
217 const struct glsl_type *itype = var->interface_type;
218 unsigned nfields = glsl_get_length(itype);
219 for (unsigned b = 0; b < aoa_size; b++) {
220 for (unsigned f = 0; f < nfields; f++) {
221 int foffset = glsl_get_struct_field_offset(itype, f);
222 const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
223 if (foffset < 0) {
224 location += glsl_count_attribute_slots(ftype, false);
225 continue;
226 }
227
228 unsigned offset = foffset;
229 add_var_xfb_outputs(xfb, var, var->data.xfb_buffer + b,
230 &location, &offset, ftype, false);
231 }
232 }
233 }
234 }
235
236 /* Everything is easier in the state setup code if outputs and varyings are
237 * sorted in order of output offset (and buffer for varyings).
238 */
239 qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
240 compare_xfb_output_offsets);
241
242 qsort(xfb->varyings, xfb->varying_count, sizeof(xfb->varyings[0]),
243 compare_xfb_varying_offsets);
244
245 #ifndef NDEBUG
246 /* Finally, do a sanity check */
247 unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
248 for (unsigned i = 0; i < xfb->output_count; i++) {
249 assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
250 assert(xfb->outputs[i].component_mask != 0);
251 unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
252 max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
253 }
254 #endif
255
256 return xfb;
257 }