2 * Copyright © 2015 Connor Abbott
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "nir_builder.h"
28 #include "util/u_dynarray.h"
30 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
33 hash_src(uint32_t hash
, const nir_src
*src
)
36 void *hash_data
= nir_src_is_const(*src
) ? NULL
: src
->ssa
;
38 return HASH(hash
, hash_data
);
42 hash_alu_src(uint32_t hash
, const nir_alu_src
*src
)
44 assert(!src
->abs
&& !src
->negate
);
46 /* intentionally don't hash swizzle */
48 return hash_src(hash
, &src
->src
);
52 hash_alu(uint32_t hash
, const nir_alu_instr
*instr
)
54 hash
= HASH(hash
, instr
->op
);
56 hash
= HASH(hash
, instr
->dest
.dest
.ssa
.bit_size
);
58 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
59 hash
= hash_alu_src(hash
, &instr
->src
[i
]);
65 hash_instr(const nir_instr
*instr
)
67 uint32_t hash
= _mesa_fnv32_1a_offset_bias
;
69 switch (instr
->type
) {
70 case nir_instr_type_alu
:
71 return hash_alu(hash
, nir_instr_as_alu(instr
));
73 unreachable("bad instruction type");
78 srcs_equal(const nir_src
*src1
, const nir_src
*src2
)
83 return src1
->ssa
== src2
->ssa
||
84 nir_src_is_const(*src1
) == nir_src_is_const(*src2
);
88 alu_srcs_equal(const nir_alu_src
*src1
, const nir_alu_src
*src2
)
91 assert(!src1
->negate
);
93 assert(!src2
->negate
);
95 return srcs_equal(&src1
->src
, &src2
->src
);
99 instrs_equal(const nir_instr
*instr1
, const nir_instr
*instr2
)
101 switch (instr1
->type
) {
102 case nir_instr_type_alu
: {
103 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
104 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
106 if (alu1
->op
!= alu2
->op
)
109 if (alu1
->dest
.dest
.ssa
.bit_size
!= alu2
->dest
.dest
.ssa
.bit_size
)
112 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
113 if (!alu_srcs_equal(&alu1
->src
[i
], &alu2
->src
[i
]))
121 unreachable("bad instruction type");
126 instr_can_rewrite(nir_instr
*instr
)
128 switch (instr
->type
) {
129 case nir_instr_type_alu
: {
130 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
132 /* Don't try and vectorize mov's. Either they'll be handled by copy
133 * prop, or they're actually necessary and trying to vectorize them
134 * would result in fighting with copy prop.
136 if (alu
->op
== nir_op_mov
)
139 if (nir_op_infos
[alu
->op
].output_size
!= 0)
142 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
143 if (nir_op_infos
[alu
->op
].input_sizes
[i
] != 0)
150 /* TODO support phi nodes */
159 * Tries to combine two instructions whose sources are different components of
160 * the same instructions into one vectorized instruction. Note that instr1
161 * should dominate instr2.
165 instr_try_combine(nir_instr
*instr1
, nir_instr
*instr2
)
167 assert(instr1
->type
== nir_instr_type_alu
);
168 assert(instr2
->type
== nir_instr_type_alu
);
169 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
170 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
172 assert(alu1
->dest
.dest
.ssa
.bit_size
== alu2
->dest
.dest
.ssa
.bit_size
);
173 unsigned alu1_components
= alu1
->dest
.dest
.ssa
.num_components
;
174 unsigned alu2_components
= alu2
->dest
.dest
.ssa
.num_components
;
175 unsigned total_components
= alu1_components
+ alu2_components
;
177 if (total_components
> 4)
181 nir_builder_init(&b
, nir_cf_node_get_function(&instr1
->block
->cf_node
));
182 b
.cursor
= nir_after_instr(instr1
);
184 nir_alu_instr
*new_alu
= nir_alu_instr_create(b
.shader
, alu1
->op
);
185 nir_ssa_dest_init(&new_alu
->instr
, &new_alu
->dest
.dest
,
186 total_components
, alu1
->dest
.dest
.ssa
.bit_size
, NULL
);
187 new_alu
->dest
.write_mask
= (1 << total_components
) - 1;
189 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
190 /* handle constant merging case */
191 if (alu1
->src
[i
].src
.ssa
!= alu2
->src
[i
].src
.ssa
) {
192 nir_const_value
*c1
= nir_src_as_const_value(alu1
->src
[i
].src
);
193 nir_const_value
*c2
= nir_src_as_const_value(alu2
->src
[i
].src
);
195 nir_const_value value
[4];
196 unsigned bit_size
= alu1
->src
[i
].src
.ssa
->bit_size
;
198 for (unsigned j
= 0; j
< total_components
; j
++) {
199 value
[j
].u64
= j
< alu1_components
?
200 c1
[alu1
->src
[i
].swizzle
[j
]].u64
:
201 c2
[alu2
->src
[i
].swizzle
[j
- alu1_components
]].u64
;
203 nir_ssa_def
*def
= nir_build_imm(&b
, total_components
, bit_size
, value
);
205 new_alu
->src
[i
].src
= nir_src_for_ssa(def
);
206 for (unsigned j
= 0; j
< total_components
; j
++)
207 new_alu
->src
[i
].swizzle
[j
] = j
;
211 new_alu
->src
[i
].src
= alu1
->src
[i
].src
;
213 for (unsigned j
= 0; j
< alu1_components
; j
++)
214 new_alu
->src
[i
].swizzle
[j
] = alu1
->src
[i
].swizzle
[j
];
216 for (unsigned j
= 0; j
< alu2_components
; j
++) {
217 new_alu
->src
[i
].swizzle
[j
+ alu1_components
] =
218 alu2
->src
[i
].swizzle
[j
];
222 nir_builder_instr_insert(&b
, &new_alu
->instr
);
224 unsigned swiz
[4] = {0, 1, 2, 3};
225 nir_ssa_def
*new_alu1
= nir_swizzle(&b
, &new_alu
->dest
.dest
.ssa
, swiz
,
228 for (unsigned i
= 0; i
< alu2_components
; i
++)
229 swiz
[i
] += alu1_components
;
230 nir_ssa_def
*new_alu2
= nir_swizzle(&b
, &new_alu
->dest
.dest
.ssa
, swiz
,
233 nir_foreach_use_safe(src
, &alu1
->dest
.dest
.ssa
) {
234 if (src
->parent_instr
->type
== nir_instr_type_alu
) {
235 /* For ALU instructions, rewrite the source directly to avoid a
236 * round-trip through copy propagation.
239 nir_instr_rewrite_src(src
->parent_instr
, src
,
240 nir_src_for_ssa(&new_alu
->dest
.dest
.ssa
));
242 nir_instr_rewrite_src(src
->parent_instr
, src
,
243 nir_src_for_ssa(new_alu1
));
247 nir_foreach_if_use_safe(src
, &alu1
->dest
.dest
.ssa
) {
248 nir_if_rewrite_condition(src
->parent_if
, nir_src_for_ssa(new_alu1
));
251 assert(list_is_empty(&alu1
->dest
.dest
.ssa
.uses
));
252 assert(list_is_empty(&alu1
->dest
.dest
.ssa
.if_uses
));
254 nir_foreach_use_safe(src
, &alu2
->dest
.dest
.ssa
) {
255 if (src
->parent_instr
->type
== nir_instr_type_alu
) {
256 /* For ALU instructions, rewrite the source directly to avoid a
257 * round-trip through copy propagation.
260 nir_alu_instr
*use
= nir_instr_as_alu(src
->parent_instr
);
262 unsigned src_index
= 5;
263 for (unsigned i
= 0; i
< nir_op_infos
[use
->op
].num_inputs
; i
++) {
264 if (&use
->src
[i
].src
== src
) {
269 assert(src_index
!= 5);
271 nir_instr_rewrite_src(src
->parent_instr
, src
,
272 nir_src_for_ssa(&new_alu
->dest
.dest
.ssa
));
275 i
< nir_ssa_alu_instr_src_components(use
, src_index
); i
++) {
276 use
->src
[src_index
].swizzle
[i
] += alu1_components
;
279 nir_instr_rewrite_src(src
->parent_instr
, src
,
280 nir_src_for_ssa(new_alu2
));
284 nir_foreach_if_use_safe(src
, &alu2
->dest
.dest
.ssa
) {
285 nir_if_rewrite_condition(src
->parent_if
, nir_src_for_ssa(new_alu2
));
288 assert(list_is_empty(&alu2
->dest
.dest
.ssa
.uses
));
289 assert(list_is_empty(&alu2
->dest
.dest
.ssa
.if_uses
));
291 nir_instr_remove(instr1
);
292 nir_instr_remove(instr2
);
294 return &new_alu
->instr
;
298 * Use an array to represent a stack of instructions that are equivalent.
300 * We push and pop instructions off the stack in dominance order. The first
301 * element dominates the second element which dominates the third, etc. When
302 * trying to add to the stack, first we try and combine the instruction with
303 * each of the instructions on the stack and, if successful, replace the
304 * instruction on the stack with the newly-combined instruction.
307 static struct util_dynarray
*
308 vec_instr_stack_create(void *mem_ctx
)
310 struct util_dynarray
*stack
= ralloc(mem_ctx
, struct util_dynarray
);
311 util_dynarray_init(stack
, mem_ctx
);
315 /* returns true if we were able to successfully replace the instruction */
318 vec_instr_stack_push(struct util_dynarray
*stack
, nir_instr
*instr
)
320 /* Walk the stack from child to parent to make live ranges shorter by
321 * matching the closest thing we can
323 util_dynarray_foreach_reverse(stack
, nir_instr
*, stack_instr
) {
324 nir_instr
*new_instr
= instr_try_combine(*stack_instr
, instr
);
326 *stack_instr
= new_instr
;
331 util_dynarray_append(stack
, nir_instr
*, instr
);
336 vec_instr_stack_pop(struct util_dynarray
*stack
, nir_instr
*instr
)
338 ASSERTED nir_instr
*last
= util_dynarray_pop(stack
, nir_instr
*);
339 assert(last
== instr
);
343 cmp_func(const void *data1
, const void *data2
)
345 const struct util_dynarray
*arr1
= data1
;
346 const struct util_dynarray
*arr2
= data2
;
348 const nir_instr
*instr1
= *(nir_instr
**)util_dynarray_begin(arr1
);
349 const nir_instr
*instr2
= *(nir_instr
**)util_dynarray_begin(arr2
);
351 return instrs_equal(instr1
, instr2
);
355 hash_stack(const void *data
)
357 const struct util_dynarray
*stack
= data
;
358 const nir_instr
*first
= *(nir_instr
**)util_dynarray_begin(stack
);
359 return hash_instr(first
);
363 vec_instr_set_create(void)
365 return _mesa_set_create(NULL
, hash_stack
, cmp_func
);
369 vec_instr_set_destroy(struct set
*instr_set
)
371 _mesa_set_destroy(instr_set
, NULL
);
375 vec_instr_set_add_or_rewrite(struct set
*instr_set
, nir_instr
*instr
)
377 if (!instr_can_rewrite(instr
))
380 struct util_dynarray
*new_stack
= vec_instr_stack_create(instr_set
);
381 vec_instr_stack_push(new_stack
, instr
);
383 struct set_entry
*entry
= _mesa_set_search(instr_set
, new_stack
);
386 ralloc_free(new_stack
);
387 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
388 return vec_instr_stack_push(stack
, instr
);
391 _mesa_set_add(instr_set
, new_stack
);
396 vec_instr_set_remove(struct set
*instr_set
, nir_instr
*instr
)
398 if (!instr_can_rewrite(instr
))
402 * It's pretty unfortunate that we have to do this, but it's a side effect
403 * of the hash set interfaces. The hash set assumes that we're only
404 * interested in storing one equivalent element at a time, and if we try to
405 * insert a duplicate element it will remove the original. We could hack up
406 * the comparison function to "know" which input is an instruction we
407 * passed in and which is an array that's part of the entry, but that
408 * wouldn't work because we need to pass an array to _mesa_set_add() in
409 * vec_instr_add_or_rewrite() above, and _mesa_set_add() will call our
410 * comparison function as well.
412 struct util_dynarray
*temp
= vec_instr_stack_create(instr_set
);
413 vec_instr_stack_push(temp
, instr
);
414 struct set_entry
*entry
= _mesa_set_search(instr_set
, temp
);
418 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
420 if (util_dynarray_num_elements(stack
, nir_instr
*) > 1)
421 vec_instr_stack_pop(stack
, instr
);
423 _mesa_set_remove(instr_set
, entry
);
428 vectorize_block(nir_block
*block
, struct set
*instr_set
)
430 bool progress
= false;
432 nir_foreach_instr_safe(instr
, block
) {
433 if (vec_instr_set_add_or_rewrite(instr_set
, instr
))
437 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
438 nir_block
*child
= block
->dom_children
[i
];
439 progress
|= vectorize_block(child
, instr_set
);
442 nir_foreach_instr_reverse(instr
, block
)
443 vec_instr_set_remove(instr_set
, instr
);
449 nir_opt_vectorize_impl(nir_function_impl
*impl
)
451 struct set
*instr_set
= vec_instr_set_create();
453 nir_metadata_require(impl
, nir_metadata_dominance
);
455 bool progress
= vectorize_block(nir_start_block(impl
), instr_set
);
458 nir_metadata_preserve(impl
, nir_metadata_block_index
|
459 nir_metadata_dominance
);
461 vec_instr_set_destroy(instr_set
);
466 nir_opt_vectorize(nir_shader
*shader
)
468 bool progress
= false;
470 nir_foreach_function(function
, shader
) {
472 progress
|= nir_opt_vectorize_impl(function
->impl
);