r600/sfn: Fix memring print output
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_nir_vectorize_vs_inputs.c
1 /*
2 * Copyright © 2018 Timothy Arceri
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
29
30 /** @file nir_opt_vectorize_io.c
31 *
32 * Replaces scalar nir_load_input/nir_store_output operations with
33 * vectorized instructions.
34 */
35 bool
36 r600_vectorize_vs_inputs(nir_shader *shader);
37
38 static nir_deref_instr *
39 r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
40 const nir_deref_instr *src_head)
41 {
42 const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
43
44 if (!parent)
45 return dst_tail;
46
47 assert(src_head->deref_type == nir_deref_type_array);
48
49 dst_tail = r600_clone_deref_array(b, dst_tail, parent);
50
51 return nir_build_deref_array(b, dst_tail,
52 nir_ssa_for_src(b, src_head->arr.index, 1));
53 }
54
55 static bool
56 r600_variable_can_rewrite(nir_variable *var)
57 {
58
59 /* Skip complex types we don't split in the first place */
60 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
61 return false;
62
63
64 /* TODO: add 64/16bit support ? */
65 if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
66 return false;
67
68 /* We only check VSand attribute imputs */
69 return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
70 var->data.location <= VERT_ATTRIB_GENERIC15);
71 }
72
73 static bool
74 r600_instr_can_rewrite(nir_instr *instr)
75 {
76 if (instr->type != nir_instr_type_intrinsic)
77 return false;
78
79 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
80
81 if (intr->num_components > 3)
82 return false;
83
84 if (intr->intrinsic != nir_intrinsic_load_deref)
85 return false;
86
87 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
88 if (deref->mode != nir_var_shader_in)
89 return false;
90
91 return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
92 }
93
94 static bool
95 r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
96 {
97 assert(instr1->type == nir_instr_type_intrinsic &&
98 instr2->type == nir_instr_type_intrinsic);
99
100 nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
101 nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
102
103 nir_variable *var1 =
104 nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
105 nir_variable *var2 =
106 nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
107
108 /* We don't handle combining vars of different base types, so skip those */
109 if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
110 return false;
111
112 if (var1->data.location != var2->data.location)
113 return false;
114
115 return true;
116 }
117
118 static struct util_dynarray *
119 r600_vec_instr_stack_create(void *mem_ctx)
120 {
121 struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
122 util_dynarray_init(stack, mem_ctx);
123 return stack;
124 }
125
126 static void
127 r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
128 {
129 util_dynarray_append(stack, nir_instr *, instr);
130 }
131
132 static unsigned r600_correct_location(nir_variable *var)
133 {
134 return var->data.location - VERT_ATTRIB_GENERIC0;
135 }
136
137 static void
138 r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
139 unsigned comp, unsigned num_comps, unsigned old_num_comps)
140 {
141 unsigned channels[4];
142
143 b->cursor = nir_before_instr(&intr->instr);
144
145 assert(intr->dest.is_ssa);
146
147 nir_intrinsic_instr *new_intr =
148 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
149 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
150 intr->dest.ssa.bit_size, NULL);
151 new_intr->num_components = num_comps;
152
153 nir_deref_instr *deref = nir_build_deref_var(b, var);
154 deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
155
156 new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
157
158 if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
159 intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
160 nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
161
162 nir_builder_instr_insert(b, &new_intr->instr);
163
164 for (unsigned i = 0; i < old_num_comps; ++i)
165 channels[i] = comp - var->data.location_frac + i;
166 nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps);
167 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
168
169 /* Remove the old load intrinsic */
170 nir_instr_remove(&intr->instr);
171 }
172
173
174 static bool
175 r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
176 nir_instr *instr,
177 nir_variable *updated_vars[16][4])
178 {
179 nir_instr *last = util_dynarray_pop(stack, nir_instr *);
180
181 assert(last == instr);
182 assert(last->type == nir_instr_type_intrinsic);
183
184 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
185 nir_variable *var =
186 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
187 unsigned loc = r600_correct_location(var);
188
189 nir_variable *new_var;
190 new_var = updated_vars[loc][var->data.location_frac];
191
192 unsigned num_comps =
193 glsl_get_vector_elements(glsl_without_array(new_var->type));
194
195 unsigned old_num_comps =
196 glsl_get_vector_elements(glsl_without_array(var->type));
197
198 /* Don't bother walking the stack if this component can't be vectorised. */
199 if (old_num_comps > 3) {
200 return false;
201 }
202
203 if (new_var == var) {
204 return false;
205 }
206
207 r600_create_new_load(b, intr, new_var, var->data.location_frac,
208 num_comps, old_num_comps);
209 return true;
210 }
211
212 static bool
213 r600_cmp_func(const void *data1, const void *data2)
214 {
215 const struct util_dynarray *arr1 = data1;
216 const struct util_dynarray *arr2 = data2;
217
218 const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
219 const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
220
221 return r600_io_access_same_var(instr1, instr2);
222 }
223
224 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
225
226 static uint32_t
227 r600_hash_instr(const nir_instr *instr)
228 {
229 assert(instr->type == nir_instr_type_intrinsic);
230
231 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
232 nir_variable *var =
233 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
234
235 uint32_t hash = _mesa_fnv32_1a_offset_bias;
236
237 hash = HASH(hash, var->type);
238 return HASH(hash, var->data.location);
239 }
240
241 static uint32_t
242 r600_hash_stack(const void *data)
243 {
244 const struct util_dynarray *stack = data;
245 const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
246 return r600_hash_instr(first);
247 }
248
249 static struct set *
250 r600_vec_instr_set_create(void)
251 {
252 return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
253 }
254
255 static void
256 r600_vec_instr_set_destroy(struct set *instr_set)
257 {
258 _mesa_set_destroy(instr_set, NULL);
259 }
260
261 static void
262 r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
263 {
264 if (!r600_instr_can_rewrite(instr)) {
265 return;
266 }
267
268 struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
269 r600_vec_instr_stack_push(new_stack, instr);
270
271 struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
272
273 if (entry) {
274 ralloc_free(new_stack);
275 struct util_dynarray *stack = (struct util_dynarray *) entry->key;
276 r600_vec_instr_stack_push(stack, instr);
277 return;
278 }
279
280 _mesa_set_add(instr_set, new_stack);
281
282 return;
283 }
284
285 static bool
286 r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
287 nir_variable *updated_vars[16][4])
288 {
289 if (!r600_instr_can_rewrite(instr)) {
290 return false;
291 }
292 /*
293 * It's pretty unfortunate that we have to do this, but it's a side effect
294 * of the hash set interfaces. The hash set assumes that we're only
295 * interested in storing one equivalent element at a time, and if we try to
296 * insert a duplicate element it will remove the original. We could hack up
297 * the comparison function to "know" which input is an instruction we
298 * passed in and which is an array that's part of the entry, but that
299 * wouldn't work because we need to pass an array to _mesa_set_add() in
300 * vec_instr_add() above, and _mesa_set_add() will call our comparison
301 * function as well.
302 */
303 struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
304 r600_vec_instr_stack_push(temp, instr);
305 struct set_entry *entry = _mesa_set_search(instr_set, temp);
306 ralloc_free(temp);
307
308 if (entry) {
309 struct util_dynarray *stack = (struct util_dynarray *) entry->key;
310 bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
311
312 if (!util_dynarray_num_elements(stack, nir_instr *))
313 _mesa_set_remove(instr_set, entry);
314
315 return progress;
316 }
317
318 return false;
319 }
320
321 static bool
322 r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
323 nir_variable *updated_vars[16][4])
324 {
325 bool progress = false;
326
327 nir_foreach_instr_safe(instr, block) {
328 r600_vec_instr_set_add(instr_set, instr);
329 }
330
331 for (unsigned i = 0; i < block->num_dom_children; i++) {
332 nir_block *child = block->dom_children[i];
333 progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
334 }
335
336 nir_foreach_instr_reverse_safe(instr, block) {
337 progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
338 }
339
340 return progress;
341 }
342
343 static void
344 r600_create_new_io_var(nir_shader *shader,
345 nir_variable *vars[16][4],
346 unsigned location, unsigned comps)
347 {
348 unsigned num_comps = util_bitcount(comps);
349 assert(num_comps > 1);
350
351 /* Note: u_bit_scan() strips a component of the comps bitfield here */
352 unsigned first_comp = u_bit_scan(&comps);
353
354 nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
355 var->data.location_frac = first_comp;
356 var->type = glsl_replace_vector_type(var->type, num_comps);
357
358 nir_shader_add_variable(shader, var);
359
360 vars[location][first_comp] = var;
361
362 while (comps) {
363 const int comp = u_bit_scan(&comps);
364 if (vars[location][comp]) {
365 vars[location][comp] = var;
366 }
367 }
368 }
369
370 static inline bool
371 r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
372 {
373 return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
374 }
375
376 static void
377 r600_create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
378 nir_variable *vars[16][4])
379 {
380 if (exec_list_is_empty(io_list))
381 return;
382
383 nir_foreach_variable(var, io_list) {
384 if (r600_variable_can_rewrite(var)) {
385 unsigned loc = r600_correct_location(var);
386 vars[loc][var->data.location_frac] = var;
387 }
388 }
389
390 /* We don't handle combining vars of different type e.g. different array
391 * lengths.
392 */
393 for (unsigned i = 0; i < 16; i++) {
394 unsigned comps = 0;
395
396 for (unsigned j = 0; j < 3; j++) {
397
398 if (!vars[i][j])
399 continue;
400
401 for (unsigned k = j + 1; k < 4; k++) {
402 if (!vars[i][k])
403 continue;
404
405 if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
406 continue;
407
408 /* Set comps */
409 for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
410 comps |= 1 << (vars[i][j]->data.location_frac + n);
411
412 for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
413 comps |= 1 << (vars[i][k]->data.location_frac + n);
414
415 }
416 }
417 if (comps)
418 r600_create_new_io_var(shader, vars, i, comps);
419 }
420 }
421
422 static bool
423 r600_vectorize_io_impl(nir_function_impl *impl)
424 {
425 nir_builder b;
426 nir_builder_init(&b, impl);
427
428 nir_metadata_require(impl, nir_metadata_dominance);
429
430 nir_shader *shader = impl->function->shader;
431 nir_variable *updated_vars[16][4] = {0};
432
433 r600_create_new_io_vars(shader, &shader->inputs, updated_vars);
434
435 struct set *instr_set = r600_vec_instr_set_create();
436 bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set,
437 updated_vars);
438
439 if (progress) {
440 nir_metadata_preserve(impl, nir_metadata_block_index |
441 nir_metadata_dominance);
442 }
443
444 r600_vec_instr_set_destroy(instr_set);
445 return false;
446 }
447
448 bool
449 r600_vectorize_vs_inputs(nir_shader *shader)
450 {
451 bool progress = false;
452
453 if (shader->info.stage != MESA_SHADER_VERTEX)
454 return false;
455
456 nir_foreach_function(function, shader) {
457 if (function->impl)
458 progress |= r600_vectorize_io_impl(function->impl);
459 }
460
461 return progress;
462 }