2 * Copyright © 2018 Timothy Arceri
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
29 #define XXH_INLINE_ALL
30 #include "util/xxhash.h"
32 /** @file nir_opt_vectorize_io.c
34 * Replaces scalar nir_load_input/nir_store_output operations with
35 * vectorized instructions.
38 r600_vectorize_vs_inputs(nir_shader
*shader
);
40 static nir_deref_instr
*
41 r600_clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
42 const nir_deref_instr
*src_head
)
44 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
49 assert(src_head
->deref_type
== nir_deref_type_array
);
51 dst_tail
= r600_clone_deref_array(b
, dst_tail
, parent
);
53 return nir_build_deref_array(b
, dst_tail
,
54 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
58 r600_variable_can_rewrite(nir_variable
*var
)
61 /* Skip complex types we don't split in the first place */
62 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
)))
66 /* TODO: add 64/16bit support ? */
67 if (glsl_get_bit_size(glsl_without_array(var
->type
)) != 32)
70 /* We only check VSand attribute imputs */
71 return (var
->data
.location
>= VERT_ATTRIB_GENERIC0
&&
72 var
->data
.location
<= VERT_ATTRIB_GENERIC15
);
76 r600_instr_can_rewrite(nir_instr
*instr
)
78 if (instr
->type
!= nir_instr_type_intrinsic
)
81 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
83 if (intr
->num_components
> 3)
86 if (intr
->intrinsic
!= nir_intrinsic_load_deref
)
89 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
90 if (deref
->mode
!= nir_var_shader_in
)
93 return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref
));
97 r600_io_access_same_var(const nir_instr
*instr1
, const nir_instr
*instr2
)
99 assert(instr1
->type
== nir_instr_type_intrinsic
&&
100 instr2
->type
== nir_instr_type_intrinsic
);
102 nir_intrinsic_instr
*intr1
= nir_instr_as_intrinsic(instr1
);
103 nir_intrinsic_instr
*intr2
= nir_instr_as_intrinsic(instr2
);
106 nir_deref_instr_get_variable(nir_src_as_deref(intr1
->src
[0]));
108 nir_deref_instr_get_variable(nir_src_as_deref(intr2
->src
[0]));
110 /* We don't handle combining vars of different base types, so skip those */
111 if (glsl_get_base_type(var1
->type
) != glsl_get_base_type(var2
->type
))
114 if (var1
->data
.location
!= var2
->data
.location
)
120 static struct util_dynarray
*
121 r600_vec_instr_stack_create(void *mem_ctx
)
123 struct util_dynarray
*stack
= ralloc(mem_ctx
, struct util_dynarray
);
124 util_dynarray_init(stack
, mem_ctx
);
129 r600_vec_instr_stack_push(struct util_dynarray
*stack
, nir_instr
*instr
)
131 util_dynarray_append(stack
, nir_instr
*, instr
);
134 static unsigned r600_correct_location(nir_variable
*var
)
136 return var
->data
.location
- VERT_ATTRIB_GENERIC0
;
140 r600_create_new_load(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
141 unsigned comp
, unsigned num_comps
, unsigned old_num_comps
)
143 unsigned channels
[4];
145 b
->cursor
= nir_before_instr(&intr
->instr
);
147 assert(intr
->dest
.is_ssa
);
149 nir_intrinsic_instr
*new_intr
=
150 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
151 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
, num_comps
,
152 intr
->dest
.ssa
.bit_size
, NULL
);
153 new_intr
->num_components
= num_comps
;
155 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
156 deref
= r600_clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
158 new_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
160 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
161 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
)
162 nir_src_copy(&new_intr
->src
[1], &intr
->src
[1], &new_intr
->instr
);
164 nir_builder_instr_insert(b
, &new_intr
->instr
);
166 for (unsigned i
= 0; i
< old_num_comps
; ++i
)
167 channels
[i
] = comp
- var
->data
.location_frac
+ i
;
168 nir_ssa_def
*load
= nir_swizzle(b
, &new_intr
->dest
.ssa
, channels
, old_num_comps
);
169 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(load
));
171 /* Remove the old load intrinsic */
172 nir_instr_remove(&intr
->instr
);
177 r600_vec_instr_stack_pop(nir_builder
*b
, struct util_dynarray
*stack
,
179 nir_variable
*updated_vars
[16][4])
181 nir_instr
*last
= util_dynarray_pop(stack
, nir_instr
*);
183 assert(last
== instr
);
184 assert(last
->type
== nir_instr_type_intrinsic
);
186 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(last
);
188 nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
189 unsigned loc
= r600_correct_location(var
);
191 nir_variable
*new_var
;
192 new_var
= updated_vars
[loc
][var
->data
.location_frac
];
195 glsl_get_vector_elements(glsl_without_array(new_var
->type
));
197 unsigned old_num_comps
=
198 glsl_get_vector_elements(glsl_without_array(var
->type
));
200 /* Don't bother walking the stack if this component can't be vectorised. */
201 if (old_num_comps
> 3) {
205 if (new_var
== var
) {
209 r600_create_new_load(b
, intr
, new_var
, var
->data
.location_frac
,
210 num_comps
, old_num_comps
);
215 r600_cmp_func(const void *data1
, const void *data2
)
217 const struct util_dynarray
*arr1
= data1
;
218 const struct util_dynarray
*arr2
= data2
;
220 const nir_instr
*instr1
= *(nir_instr
**)util_dynarray_begin(arr1
);
221 const nir_instr
*instr2
= *(nir_instr
**)util_dynarray_begin(arr2
);
223 return r600_io_access_same_var(instr1
, instr2
);
226 #define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
229 r600_hash_instr(const nir_instr
*instr
)
231 assert(instr
->type
== nir_instr_type_intrinsic
);
233 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
235 nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
239 hash
= HASH(hash
, var
->type
);
240 return HASH(hash
, var
->data
.location
);
244 r600_hash_stack(const void *data
)
246 const struct util_dynarray
*stack
= data
;
247 const nir_instr
*first
= *(nir_instr
**)util_dynarray_begin(stack
);
248 return r600_hash_instr(first
);
252 r600_vec_instr_set_create(void)
254 return _mesa_set_create(NULL
, r600_hash_stack
, r600_cmp_func
);
258 r600_vec_instr_set_destroy(struct set
*instr_set
)
260 _mesa_set_destroy(instr_set
, NULL
);
264 r600_vec_instr_set_add(struct set
*instr_set
, nir_instr
*instr
)
266 if (!r600_instr_can_rewrite(instr
)) {
270 struct util_dynarray
*new_stack
= r600_vec_instr_stack_create(instr_set
);
271 r600_vec_instr_stack_push(new_stack
, instr
);
273 struct set_entry
*entry
= _mesa_set_search(instr_set
, new_stack
);
276 ralloc_free(new_stack
);
277 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
278 r600_vec_instr_stack_push(stack
, instr
);
282 _mesa_set_add(instr_set
, new_stack
);
288 r600_vec_instr_set_remove(nir_builder
*b
, struct set
*instr_set
, nir_instr
*instr
,
289 nir_variable
*updated_vars
[16][4])
291 if (!r600_instr_can_rewrite(instr
)) {
295 * It's pretty unfortunate that we have to do this, but it's a side effect
296 * of the hash set interfaces. The hash set assumes that we're only
297 * interested in storing one equivalent element at a time, and if we try to
298 * insert a duplicate element it will remove the original. We could hack up
299 * the comparison function to "know" which input is an instruction we
300 * passed in and which is an array that's part of the entry, but that
301 * wouldn't work because we need to pass an array to _mesa_set_add() in
302 * vec_instr_add() above, and _mesa_set_add() will call our comparison
305 struct util_dynarray
*temp
= r600_vec_instr_stack_create(instr_set
);
306 r600_vec_instr_stack_push(temp
, instr
);
307 struct set_entry
*entry
= _mesa_set_search(instr_set
, temp
);
311 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
312 bool progress
= r600_vec_instr_stack_pop(b
, stack
, instr
, updated_vars
);
314 if (!util_dynarray_num_elements(stack
, nir_instr
*))
315 _mesa_set_remove(instr_set
, entry
);
324 r600_vectorize_block(nir_builder
*b
, nir_block
*block
, struct set
*instr_set
,
325 nir_variable
*updated_vars
[16][4])
327 bool progress
= false;
329 nir_foreach_instr_safe(instr
, block
) {
330 r600_vec_instr_set_add(instr_set
, instr
);
333 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
334 nir_block
*child
= block
->dom_children
[i
];
335 progress
|= r600_vectorize_block(b
, child
, instr_set
, updated_vars
);
338 nir_foreach_instr_reverse_safe(instr
, block
) {
339 progress
|= r600_vec_instr_set_remove(b
, instr_set
, instr
, updated_vars
);
346 r600_create_new_io_var(nir_shader
*shader
,
347 nir_variable
*vars
[16][4],
348 unsigned location
, unsigned comps
)
350 unsigned num_comps
= util_bitcount(comps
);
351 assert(num_comps
> 1);
353 /* Note: u_bit_scan() strips a component of the comps bitfield here */
354 unsigned first_comp
= u_bit_scan(&comps
);
356 nir_variable
*var
= nir_variable_clone(vars
[location
][first_comp
], shader
);
357 var
->data
.location_frac
= first_comp
;
358 var
->type
= glsl_replace_vector_type(var
->type
, num_comps
);
360 nir_shader_add_variable(shader
, var
);
362 vars
[location
][first_comp
] = var
;
365 const int comp
= u_bit_scan(&comps
);
366 if (vars
[location
][comp
]) {
367 vars
[location
][comp
] = var
;
373 r600_variables_can_merge(const nir_variable
*lhs
, const nir_variable
*rhs
)
375 return (glsl_get_base_type(lhs
->type
) == glsl_get_base_type(rhs
->type
));
379 r600_create_new_io_vars(nir_shader
*shader
, struct exec_list
*io_list
,
380 nir_variable
*vars
[16][4])
382 if (exec_list_is_empty(io_list
))
385 nir_foreach_variable(var
, io_list
) {
386 if (r600_variable_can_rewrite(var
)) {
387 unsigned loc
= r600_correct_location(var
);
388 vars
[loc
][var
->data
.location_frac
] = var
;
392 /* We don't handle combining vars of different type e.g. different array
395 for (unsigned i
= 0; i
< 16; i
++) {
398 for (unsigned j
= 0; j
< 3; j
++) {
403 for (unsigned k
= j
+ 1; k
< 4; k
++) {
407 if (!r600_variables_can_merge(vars
[i
][j
], vars
[i
][k
]))
411 for (unsigned n
= 0; n
< glsl_get_components(vars
[i
][j
]->type
); ++n
)
412 comps
|= 1 << (vars
[i
][j
]->data
.location_frac
+ n
);
414 for (unsigned n
= 0; n
< glsl_get_components(vars
[i
][k
]->type
); ++n
)
415 comps
|= 1 << (vars
[i
][k
]->data
.location_frac
+ n
);
420 r600_create_new_io_var(shader
, vars
, i
, comps
);
425 r600_vectorize_io_impl(nir_function_impl
*impl
)
428 nir_builder_init(&b
, impl
);
430 nir_metadata_require(impl
, nir_metadata_dominance
);
432 nir_shader
*shader
= impl
->function
->shader
;
433 nir_variable
*updated_vars
[16][4] = {0};
435 r600_create_new_io_vars(shader
, &shader
->inputs
, updated_vars
);
437 struct set
*instr_set
= r600_vec_instr_set_create();
438 bool progress
= r600_vectorize_block(&b
, nir_start_block(impl
), instr_set
,
442 nir_metadata_preserve(impl
, nir_metadata_block_index
|
443 nir_metadata_dominance
);
446 r600_vec_instr_set_destroy(instr_set
);
451 r600_vectorize_vs_inputs(nir_shader
*shader
)
453 bool progress
= false;
455 if (shader
->info
.stage
!= MESA_SHADER_VERTEX
)
458 nir_foreach_function(function
, shader
) {
460 progress
|= r600_vectorize_io_impl(function
->impl
);