2 * Copyright © 2015 Connor Abbott
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "nir_builder.h"
28 #include "util/u_dynarray.h"
30 #define HASH(hash, data) XXH32(&data, sizeof(data), hash)
33 hash_src(uint32_t hash
, const nir_src
*src
)
36 void *hash_data
= nir_src_is_const(*src
) ? NULL
: src
->ssa
;
38 return HASH(hash
, hash_data
);
42 hash_alu_src(uint32_t hash
, const nir_alu_src
*src
)
44 assert(!src
->abs
&& !src
->negate
);
46 /* intentionally don't hash swizzle */
48 return hash_src(hash
, &src
->src
);
52 hash_alu(uint32_t hash
, const nir_alu_instr
*instr
)
54 hash
= HASH(hash
, instr
->op
);
56 hash
= HASH(hash
, instr
->dest
.dest
.ssa
.bit_size
);
58 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
59 hash
= hash_alu_src(hash
, &instr
->src
[i
]);
65 hash_instr(const nir_instr
*instr
)
69 switch (instr
->type
) {
70 case nir_instr_type_alu
:
71 return hash_alu(hash
, nir_instr_as_alu(instr
));
73 unreachable("bad instruction type");
78 srcs_equal(const nir_src
*src1
, const nir_src
*src2
)
83 return src1
->ssa
== src2
->ssa
||
84 nir_src_is_const(*src1
) == nir_src_is_const(*src2
);
88 alu_srcs_equal(const nir_alu_src
*src1
, const nir_alu_src
*src2
)
91 assert(!src1
->negate
);
93 assert(!src2
->negate
);
95 return srcs_equal(&src1
->src
, &src2
->src
);
99 instrs_equal(const nir_instr
*instr1
, const nir_instr
*instr2
)
101 switch (instr1
->type
) {
102 case nir_instr_type_alu
: {
103 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
104 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
106 if (alu1
->op
!= alu2
->op
)
109 if (alu1
->dest
.dest
.ssa
.bit_size
!= alu2
->dest
.dest
.ssa
.bit_size
)
112 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
113 if (!alu_srcs_equal(&alu1
->src
[i
], &alu2
->src
[i
]))
121 unreachable("bad instruction type");
126 instr_can_rewrite(nir_instr
*instr
)
128 switch (instr
->type
) {
129 case nir_instr_type_alu
: {
130 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
132 /* Don't try and vectorize mov's. Either they'll be handled by copy
133 * prop, or they're actually necessary and trying to vectorize them
134 * would result in fighting with copy prop.
136 if (alu
->op
== nir_op_mov
)
139 if (nir_op_infos
[alu
->op
].output_size
!= 0)
142 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
143 if (nir_op_infos
[alu
->op
].input_sizes
[i
] != 0)
150 /* TODO support phi nodes */
159 * Tries to combine two instructions whose sources are different components of
160 * the same instructions into one vectorized instruction. Note that instr1
161 * should dominate instr2.
165 instr_try_combine(struct nir_shader
*nir
, nir_instr
*instr1
, nir_instr
*instr2
,
166 nir_opt_vectorize_cb filter
, void *data
)
168 assert(instr1
->type
== nir_instr_type_alu
);
169 assert(instr2
->type
== nir_instr_type_alu
);
170 nir_alu_instr
*alu1
= nir_instr_as_alu(instr1
);
171 nir_alu_instr
*alu2
= nir_instr_as_alu(instr2
);
173 assert(alu1
->dest
.dest
.ssa
.bit_size
== alu2
->dest
.dest
.ssa
.bit_size
);
174 unsigned alu1_components
= alu1
->dest
.dest
.ssa
.num_components
;
175 unsigned alu2_components
= alu2
->dest
.dest
.ssa
.num_components
;
176 unsigned total_components
= alu1_components
+ alu2_components
;
178 if (total_components
> 4)
181 if (nir
->options
->vectorize_vec2_16bit
&&
182 (total_components
> 2 || alu1
->dest
.dest
.ssa
.bit_size
!= 16))
185 if (filter
&& !filter(&alu1
->instr
, &alu2
->instr
, data
))
189 nir_builder_init(&b
, nir_cf_node_get_function(&instr1
->block
->cf_node
));
190 b
.cursor
= nir_after_instr(instr1
);
192 nir_alu_instr
*new_alu
= nir_alu_instr_create(b
.shader
, alu1
->op
);
193 nir_ssa_dest_init(&new_alu
->instr
, &new_alu
->dest
.dest
,
194 total_components
, alu1
->dest
.dest
.ssa
.bit_size
, NULL
);
195 new_alu
->dest
.write_mask
= (1 << total_components
) - 1;
197 for (unsigned i
= 0; i
< nir_op_infos
[alu1
->op
].num_inputs
; i
++) {
198 /* handle constant merging case */
199 if (alu1
->src
[i
].src
.ssa
!= alu2
->src
[i
].src
.ssa
) {
200 nir_const_value
*c1
= nir_src_as_const_value(alu1
->src
[i
].src
);
201 nir_const_value
*c2
= nir_src_as_const_value(alu2
->src
[i
].src
);
203 nir_const_value value
[4];
204 unsigned bit_size
= alu1
->src
[i
].src
.ssa
->bit_size
;
206 for (unsigned j
= 0; j
< total_components
; j
++) {
207 value
[j
].u64
= j
< alu1_components
?
208 c1
[alu1
->src
[i
].swizzle
[j
]].u64
:
209 c2
[alu2
->src
[i
].swizzle
[j
- alu1_components
]].u64
;
211 nir_ssa_def
*def
= nir_build_imm(&b
, total_components
, bit_size
, value
);
213 new_alu
->src
[i
].src
= nir_src_for_ssa(def
);
214 for (unsigned j
= 0; j
< total_components
; j
++)
215 new_alu
->src
[i
].swizzle
[j
] = j
;
219 new_alu
->src
[i
].src
= alu1
->src
[i
].src
;
221 for (unsigned j
= 0; j
< alu1_components
; j
++)
222 new_alu
->src
[i
].swizzle
[j
] = alu1
->src
[i
].swizzle
[j
];
224 for (unsigned j
= 0; j
< alu2_components
; j
++) {
225 new_alu
->src
[i
].swizzle
[j
+ alu1_components
] =
226 alu2
->src
[i
].swizzle
[j
];
230 nir_builder_instr_insert(&b
, &new_alu
->instr
);
232 unsigned swiz
[4] = {0, 1, 2, 3};
233 nir_ssa_def
*new_alu1
= nir_swizzle(&b
, &new_alu
->dest
.dest
.ssa
, swiz
,
236 for (unsigned i
= 0; i
< alu2_components
; i
++)
237 swiz
[i
] += alu1_components
;
238 nir_ssa_def
*new_alu2
= nir_swizzle(&b
, &new_alu
->dest
.dest
.ssa
, swiz
,
241 nir_foreach_use_safe(src
, &alu1
->dest
.dest
.ssa
) {
242 if (src
->parent_instr
->type
== nir_instr_type_alu
) {
243 /* For ALU instructions, rewrite the source directly to avoid a
244 * round-trip through copy propagation.
247 nir_instr_rewrite_src(src
->parent_instr
, src
,
248 nir_src_for_ssa(&new_alu
->dest
.dest
.ssa
));
250 nir_instr_rewrite_src(src
->parent_instr
, src
,
251 nir_src_for_ssa(new_alu1
));
255 nir_foreach_if_use_safe(src
, &alu1
->dest
.dest
.ssa
) {
256 nir_if_rewrite_condition(src
->parent_if
, nir_src_for_ssa(new_alu1
));
259 assert(list_is_empty(&alu1
->dest
.dest
.ssa
.uses
));
260 assert(list_is_empty(&alu1
->dest
.dest
.ssa
.if_uses
));
262 nir_foreach_use_safe(src
, &alu2
->dest
.dest
.ssa
) {
263 if (src
->parent_instr
->type
== nir_instr_type_alu
) {
264 /* For ALU instructions, rewrite the source directly to avoid a
265 * round-trip through copy propagation.
268 nir_alu_instr
*use
= nir_instr_as_alu(src
->parent_instr
);
270 unsigned src_index
= 5;
271 for (unsigned i
= 0; i
< nir_op_infos
[use
->op
].num_inputs
; i
++) {
272 if (&use
->src
[i
].src
== src
) {
277 assert(src_index
!= 5);
279 nir_instr_rewrite_src(src
->parent_instr
, src
,
280 nir_src_for_ssa(&new_alu
->dest
.dest
.ssa
));
283 i
< nir_ssa_alu_instr_src_components(use
, src_index
); i
++) {
284 use
->src
[src_index
].swizzle
[i
] += alu1_components
;
287 nir_instr_rewrite_src(src
->parent_instr
, src
,
288 nir_src_for_ssa(new_alu2
));
292 nir_foreach_if_use_safe(src
, &alu2
->dest
.dest
.ssa
) {
293 nir_if_rewrite_condition(src
->parent_if
, nir_src_for_ssa(new_alu2
));
296 assert(list_is_empty(&alu2
->dest
.dest
.ssa
.uses
));
297 assert(list_is_empty(&alu2
->dest
.dest
.ssa
.if_uses
));
299 nir_instr_remove(instr1
);
300 nir_instr_remove(instr2
);
302 return &new_alu
->instr
;
306 * Use an array to represent a stack of instructions that are equivalent.
308 * We push and pop instructions off the stack in dominance order. The first
309 * element dominates the second element which dominates the third, etc. When
310 * trying to add to the stack, first we try and combine the instruction with
311 * each of the instructions on the stack and, if successful, replace the
312 * instruction on the stack with the newly-combined instruction.
315 static struct util_dynarray
*
316 vec_instr_stack_create(void *mem_ctx
)
318 struct util_dynarray
*stack
= ralloc(mem_ctx
, struct util_dynarray
);
319 util_dynarray_init(stack
, mem_ctx
);
323 /* returns true if we were able to successfully replace the instruction */
326 vec_instr_stack_push(struct nir_shader
*nir
, struct util_dynarray
*stack
,
328 nir_opt_vectorize_cb filter
, void *data
)
330 /* Walk the stack from child to parent to make live ranges shorter by
331 * matching the closest thing we can
333 util_dynarray_foreach_reverse(stack
, nir_instr
*, stack_instr
) {
334 nir_instr
*new_instr
= instr_try_combine(nir
, *stack_instr
, instr
,
337 *stack_instr
= new_instr
;
342 util_dynarray_append(stack
, nir_instr
*, instr
);
347 vec_instr_stack_pop(struct util_dynarray
*stack
, nir_instr
*instr
)
349 ASSERTED nir_instr
*last
= util_dynarray_pop(stack
, nir_instr
*);
350 assert(last
== instr
);
354 cmp_func(const void *data1
, const void *data2
)
356 const struct util_dynarray
*arr1
= data1
;
357 const struct util_dynarray
*arr2
= data2
;
359 const nir_instr
*instr1
= *(nir_instr
**)util_dynarray_begin(arr1
);
360 const nir_instr
*instr2
= *(nir_instr
**)util_dynarray_begin(arr2
);
362 return instrs_equal(instr1
, instr2
);
366 hash_stack(const void *data
)
368 const struct util_dynarray
*stack
= data
;
369 const nir_instr
*first
= *(nir_instr
**)util_dynarray_begin(stack
);
370 return hash_instr(first
);
374 vec_instr_set_create(void)
376 return _mesa_set_create(NULL
, hash_stack
, cmp_func
);
380 vec_instr_set_destroy(struct set
*instr_set
)
382 _mesa_set_destroy(instr_set
, NULL
);
386 vec_instr_set_add_or_rewrite(struct nir_shader
*nir
, struct set
*instr_set
,
388 nir_opt_vectorize_cb filter
, void *data
)
390 if (!instr_can_rewrite(instr
))
393 struct util_dynarray
*new_stack
= vec_instr_stack_create(instr_set
);
394 vec_instr_stack_push(nir
, new_stack
, instr
, filter
, data
);
396 struct set_entry
*entry
= _mesa_set_search(instr_set
, new_stack
);
399 ralloc_free(new_stack
);
400 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
401 return vec_instr_stack_push(nir
, stack
, instr
, filter
, data
);
404 _mesa_set_add(instr_set
, new_stack
);
409 vec_instr_set_remove(struct nir_shader
*nir
, struct set
*instr_set
,
410 nir_instr
*instr
, nir_opt_vectorize_cb filter
, void *data
)
412 if (!instr_can_rewrite(instr
))
416 * It's pretty unfortunate that we have to do this, but it's a side effect
417 * of the hash set interfaces. The hash set assumes that we're only
418 * interested in storing one equivalent element at a time, and if we try to
419 * insert a duplicate element it will remove the original. We could hack up
420 * the comparison function to "know" which input is an instruction we
421 * passed in and which is an array that's part of the entry, but that
422 * wouldn't work because we need to pass an array to _mesa_set_add() in
423 * vec_instr_add_or_rewrite() above, and _mesa_set_add() will call our
424 * comparison function as well.
426 struct util_dynarray
*temp
= vec_instr_stack_create(instr_set
);
427 vec_instr_stack_push(nir
, temp
, instr
, filter
, data
);
428 struct set_entry
*entry
= _mesa_set_search(instr_set
, temp
);
432 struct util_dynarray
*stack
= (struct util_dynarray
*) entry
->key
;
434 if (util_dynarray_num_elements(stack
, nir_instr
*) > 1)
435 vec_instr_stack_pop(stack
, instr
);
437 _mesa_set_remove(instr_set
, entry
);
442 vectorize_block(struct nir_shader
*nir
, nir_block
*block
,
443 struct set
*instr_set
,
444 nir_opt_vectorize_cb filter
, void *data
)
446 bool progress
= false;
448 nir_foreach_instr_safe(instr
, block
) {
449 if (vec_instr_set_add_or_rewrite(nir
, instr_set
, instr
, filter
, data
))
453 for (unsigned i
= 0; i
< block
->num_dom_children
; i
++) {
454 nir_block
*child
= block
->dom_children
[i
];
455 progress
|= vectorize_block(nir
, child
, instr_set
, filter
, data
);
458 nir_foreach_instr_reverse(instr
, block
)
459 vec_instr_set_remove(nir
, instr_set
, instr
, filter
, data
);
465 nir_opt_vectorize_impl(struct nir_shader
*nir
, nir_function_impl
*impl
,
466 nir_opt_vectorize_cb filter
, void *data
)
468 struct set
*instr_set
= vec_instr_set_create();
470 nir_metadata_require(impl
, nir_metadata_dominance
);
472 bool progress
= vectorize_block(nir
, nir_start_block(impl
), instr_set
,
476 nir_metadata_preserve(impl
, nir_metadata_block_index
|
477 nir_metadata_dominance
);
479 vec_instr_set_destroy(instr_set
);
484 nir_opt_vectorize(nir_shader
*shader
, nir_opt_vectorize_cb filter
,
487 bool progress
= false;
489 nir_foreach_function(function
, shader
) {
491 progress
|= nir_opt_vectorize_impl(shader
, function
->impl
, filter
, data
);