2 * Copyright © 2018 Timothy Arceri
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
30 /** @file nir_opt_vectorize_io.c
32 * Replaces scalar nir_load_input/nir_store_output operations with
33 * vectorized instructions.
36 r600_vectorize_vs_inputs(nir_shader
*shader
);
38 static nir_deref_instr
*
39 r600_clone_deref_array(nir_builder
*b
, nir_deref_instr
*dst_tail
,
40 const nir_deref_instr
*src_head
)
42 const nir_deref_instr
*parent
= nir_deref_instr_parent(src_head
);
47 assert(src_head
->deref_type
== nir_deref_type_array
);
49 dst_tail
= r600_clone_deref_array(b
, dst_tail
, parent
);
51 return nir_build_deref_array(b
, dst_tail
,
52 nir_ssa_for_src(b
, src_head
->arr
.index
, 1));
56 r600_variable_can_rewrite(nir_variable
*var
)
59 /* Skip complex types we don't split in the first place */
60 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var
->type
)))
64 /* TODO: add 64/16bit support ? */
65 if (glsl_get_bit_size(glsl_without_array(var
->type
)) != 32)
68 /* We only check VSand attribute imputs */
69 return (var
->data
.location
>= VERT_ATTRIB_GENERIC0
&&
70 var
->data
.location
<= VERT_ATTRIB_GENERIC15
);
74 r600_instr_can_rewrite(nir_instr
*instr
)
76 if (instr
->type
!= nir_instr_type_intrinsic
)
79 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
81 if (intr
->num_components
> 3)
84 if (intr
->intrinsic
!= nir_intrinsic_load_deref
)
87 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
88 if (deref
->mode
!= nir_var_shader_in
)
91 return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref
));
95 r600_io_access_same_var(const nir_instr
*instr1
, const nir_instr
*instr2
)
97 assert(instr1
->type
== nir_instr_type_intrinsic
&&
98 instr2
->type
== nir_instr_type_intrinsic
);
100 nir_intrinsic_instr
*intr1
= nir_instr_as_intrinsic(instr1
);
101 nir_intrinsic_instr
*intr2
= nir_instr_as_intrinsic(instr2
);
104 nir_deref_instr_get_variable(nir_src_as_deref(intr1
->src
[0]));
106 nir_deref_instr_get_variable(nir_src_as_deref(intr2
->src
[0]));
108 /* We don't handle combining vars of different base types, so skip those */
109 if (glsl_get_base_type(var1
->type
) != glsl_get_base_type(var2
->type
))
112 if (var1
->data
.location
!= var2
->data
.location
)
118 static struct util_dynarray
*
119 r600_vec_instr_stack_create(void *mem_ctx
)
121 struct util_dynarray
*stack
= ralloc(mem_ctx
, struct util_dynarray
);
122 util_dynarray_init(stack
, mem_ctx
);
127 r600_vec_instr_stack_push(struct util_dynarray
*stack
, nir_instr
*instr
)
129 util_dynarray_append(stack
, nir_instr
*, instr
);
132 static unsigned r600_correct_location(nir_variable
*var
)
134 return var
->data
.location
- VERT_ATTRIB_GENERIC0
;
138 r600_create_new_load(nir_builder
*b
, nir_intrinsic_instr
*intr
, nir_variable
*var
,
139 unsigned comp
, unsigned num_comps
, unsigned old_num_comps
)
141 unsigned channels
[4];
143 b
->cursor
= nir_before_instr(&intr
->instr
);
145 assert(intr
->dest
.is_ssa
);
147 nir_intrinsic_instr
*new_intr
=
148 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
149 nir_ssa_dest_init(&new_intr
->instr
, &new_intr
->dest
, num_comps
,
150 intr
->dest
.ssa
.bit_size
, NULL
);
151 new_intr
->num_components
= num_comps
;
153 nir_deref_instr
*deref
= nir_build_deref_var(b
, var
);
154 deref
= r600_clone_deref_array(b
, deref
, nir_src_as_deref(intr
->src
[0]));
156 new_intr
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
158 if (intr
->intrinsic
== nir_intrinsic_interp_deref_at_offset
||
159 intr
->intrinsic
== nir_intrinsic_interp_deref_at_sample
)
160 nir_src_copy(&new_intr
->src
[1], &intr
->src
[1], &new_intr
->instr
);
162 nir_builder_instr_insert(b
, &new_intr
->instr
);
164 for (unsigned i
= 0; i
< old_num_comps
; ++i
)
165 channels
[i
] = comp
- var
->data
.location_frac
+ i
;
166 nir_ssa_def
*load
= nir_swizzle(b
, &new_intr
->dest
.ssa
, channels
, old_num_comps
);
167 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(load
));
169 /* Remove the old load intrinsic */
170 nir_instr_remove(&intr
->instr
);
175 r600_vec_instr_stack_pop(nir_builder
*b
, struct util_dynarray
*stack
,
177 nir_variable
*updated_vars
[16][4])
179 nir_instr
*last
= util_dynarray_pop(stack
, nir_instr
*);
181 assert(last
== instr
);
182 assert(last
->type
== nir_instr_type_intrinsic
);
184 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(last
);
186 nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
187 unsigned loc
= r600_correct_location(var
);
189 nir_variable
*new_var
;
190 new_var
= updated_vars
[loc
][var
->data
.location_frac
];
193 glsl_get_vector_elements(glsl_without_array(new_var
->type
));
195 unsigned old_num_comps
=
196 glsl_get_vector_elements(glsl_without_array(var
->type
));
198 /* Don't bother walking the stack if this component can't be vectorised. */
199 if (old_num_comps
> 3) {
203 if (new_var
== var
) {
207 r600_create_new_load(b
, intr
, new_var
, var
->data
.location_frac
,
208 num_comps
, old_num_comps
);
213 r600_cmp_func(const void *data1
, const void *data2
)
215 const struct util_dynarray
*arr1
= data1
;
216 const struct util_dynarray
*arr2
= data2
;
218 const nir_instr
*instr1
= *(nir_instr
**)util_dynarray_begin(arr1
);
219 const nir_instr
*instr2
= *(nir_instr
**)util_dynarray_begin(arr2
);
221 return r600_io_access_same_var(instr1
, instr2
);
224 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
227 r600_hash_instr(const nir_instr
*instr
)
229 assert(instr
->type
== nir_instr_type_intrinsic
);
231 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
233 nir_deref_instr_get_variable(nir_src_as_deref(intr
->src
[0]));
235 uint32_t hash
= _mesa_fnv32_1a_offset_bias
;
237 hash
= HASH(hash
, var
->type
);
238 return HASH(hash
, var
->data
.location
);
242 r600_hash_stack(const void *data
)
244 const struct util_dynarray
*stack
= data
;
245 const nir_instr
*first
= *(nir_instr
**)util_dynarray_begin(stack
);
246 return r600_hash_instr(first
);
250 r600_vec_instr_set_create(void)
252 return _mesa_set_create(NULL
, r600_hash_stack
, r600_cmp_func
);
256 r600_vec_instr_set_destroy(struct set
*instr_set
)
258 _mesa_set_destroy(instr_set
, NULL
);
262 r600_vec_instr_set_add(struct set
*instr_set
, nir_instr
*instr
)
264 if (!r600_instr_can_rewrite(instr
)) {
268 struct util_dynarray
*new_stack
= r600_vec_instr_stack_create(instr_set
);
269 r600_vec_instr_stack_push(new_stack
, instr
);
271 struct set_entry
*entry
= _mesa_set_search(instr_set
, new_stack
);
274 ralloc_free(new_stack
);
275 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
276 r600_vec_instr_stack_push(stack
, instr
);
280 _mesa_set_add(instr_set
, new_stack
);
286 r600_vec_instr_set_remove(nir_builder
*b
, struct set
*instr_set
, nir_instr
*instr
,
287 nir_variable
*updated_vars
[16][4])
289 if (!r600_instr_can_rewrite(instr
)) {
293 * It's pretty unfortunate that we have to do this, but it's a side effect
294 * of the hash set interfaces. The hash set assumes that we're only
295 * interested in storing one equivalent element at a time, and if we try to
296 * insert a duplicate element it will remove the original. We could hack up
297 * the comparison function to "know" which input is an instruction we
298 * passed in and which is an array that's part of the entry, but that
299 * wouldn't work because we need to pass an array to _mesa_set_add() in
300 * vec_instr_add() above, and _mesa_set_add() will call our comparison
303 struct util_dynarray
*temp
= r600_vec_instr_stack_create(instr_set
);
304 r600_vec_instr_stack_push(temp
, instr
);
305 struct set_entry
*entry
= _mesa_set_search(instr_set
, temp
);
309 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
310 bool progress
= r600_vec_instr_stack_pop(b
, stack
, instr
, updated_vars
);
312 if (!util_dynarray_num_elements(stack
, nir_instr
*))
313 _mesa_set_remove(instr_set
, entry
);
322 r600_vectorize_block(nir_builder
*b
, nir_block
*block
, struct set
*instr_set
,
323 nir_variable
*updated_vars
[16][4])
325 bool progress
= false;
327 nir_foreach_instr_safe(instr
, block
) {
328 r600_vec_instr_set_add(instr_set
, instr
);
331 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
332 nir_block
*child
= block
->dom_children
[i
];
333 progress
|= r600_vectorize_block(b
, child
, instr_set
, updated_vars
);
336 nir_foreach_instr_reverse_safe(instr
, block
) {
337 progress
|= r600_vec_instr_set_remove(b
, instr_set
, instr
, updated_vars
);
344 r600_create_new_io_var(nir_shader
*shader
,
345 nir_variable
*vars
[16][4],
346 unsigned location
, unsigned comps
)
348 unsigned num_comps
= util_bitcount(comps
);
349 assert(num_comps
> 1);
351 /* Note: u_bit_scan() strips a component of the comps bitfield here */
352 unsigned first_comp
= u_bit_scan(&comps
);
354 nir_variable
*var
= nir_variable_clone(vars
[location
][first_comp
], shader
);
355 var
->data
.location_frac
= first_comp
;
356 var
->type
= glsl_replace_vector_type(var
->type
, num_comps
);
358 nir_shader_add_variable(shader
, var
);
360 vars
[location
][first_comp
] = var
;
363 const int comp
= u_bit_scan(&comps
);
364 if (vars
[location
][comp
]) {
365 vars
[location
][comp
] = var
;
371 r600_variables_can_merge(const nir_variable
*lhs
, const nir_variable
*rhs
)
373 return (glsl_get_base_type(lhs
->type
) == glsl_get_base_type(rhs
->type
));
377 r600_create_new_io_vars(nir_shader
*shader
, struct exec_list
*io_list
,
378 nir_variable
*vars
[16][4])
380 if (exec_list_is_empty(io_list
))
383 nir_foreach_variable(var
, io_list
) {
384 if (r600_variable_can_rewrite(var
)) {
385 unsigned loc
= r600_correct_location(var
);
386 vars
[loc
][var
->data
.location_frac
] = var
;
390 /* We don't handle combining vars of different type e.g. different array
393 for (unsigned i
= 0; i
< 16; i
++) {
396 for (unsigned j
= 0; j
< 3; j
++) {
401 for (unsigned k
= j
+ 1; k
< 4; k
++) {
405 if (!r600_variables_can_merge(vars
[i
][j
], vars
[i
][k
]))
409 for (unsigned n
= 0; n
< glsl_get_components(vars
[i
][j
]->type
); ++n
)
410 comps
|= 1 << (vars
[i
][j
]->data
.location_frac
+ n
);
412 for (unsigned n
= 0; n
< glsl_get_components(vars
[i
][k
]->type
); ++n
)
413 comps
|= 1 << (vars
[i
][k
]->data
.location_frac
+ n
);
418 r600_create_new_io_var(shader
, vars
, i
, comps
);
423 r600_vectorize_io_impl(nir_function_impl
*impl
)
426 nir_builder_init(&b
, impl
);
428 nir_metadata_require(impl
, nir_metadata_dominance
);
430 nir_shader
*shader
= impl
->function
->shader
;
431 nir_variable
*updated_vars
[16][4] = {0};
433 r600_create_new_io_vars(shader
, &shader
->inputs
, updated_vars
);
435 struct set
*instr_set
= r600_vec_instr_set_create();
436 bool progress
= r600_vectorize_block(&b
, nir_start_block(impl
), instr_set
,
440 nir_metadata_preserve(impl
, nir_metadata_block_index
|
441 nir_metadata_dominance
);
444 r600_vec_instr_set_destroy(instr_set
);
449 r600_vectorize_vs_inputs(nir_shader
*shader
)
451 bool progress
= false;
453 if (shader
->info
.stage
!= MESA_SHADER_VERTEX
)
456 nir_foreach_function(function
, shader
) {
458 progress
|= r600_vectorize_io_impl(function
->impl
);