2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
31 * Implements a pass that tries to move uses vecN sources to their
32 * destinations. This is kind of like an inverse copy-propagation pass.
33 * For instance, if you have
35 * ssa_1 = vec4(a, b, c, d)
38 * This will be turned into
40 * ssa_1 = vec4(a, b, c, d)
41 * ssa_2 = fadd(ssa_1.x, ssa_1.y)
43 * While this is "worse" because it adds a bunch of unneeded dependencies, it
44 * actually makes it much easier for vec4-based backends to coalesce the MOVs
45 * that result from the vec4 operation because it doesn't have to worry about
46 * quite as many reads.
49 /* Returns true if the given SSA def dominates the instruction. An SSA def is
50 * considered to *not* dominate the instruction that defines it.
53 ssa_def_dominates_instr(nir_ssa_def
*def
, nir_instr
*instr
)
55 if (instr
->index
<= def
->parent_instr
->index
) {
57 } else if (def
->parent_instr
->block
== instr
->block
) {
58 return def
->parent_instr
->index
< instr
->index
;
60 return nir_block_dominates(def
->parent_instr
->block
, instr
->block
);
65 move_vec_src_uses_to_dest_block(nir_block
*block
)
67 bool progress
= false;
69 nir_foreach_instr(instr
, block
) {
70 if (instr
->type
!= nir_instr_type_alu
)
73 nir_alu_instr
*vec
= nir_instr_as_alu(instr
);
81 continue; /* The loop */
84 /* Can't handle non-SSA vec operations */
85 if (!vec
->dest
.dest
.is_ssa
)
88 /* Can't handle saturation */
89 if (vec
->dest
.saturate
)
92 /* First, mark all of the sources we are going to consider for rewriting
95 int srcs_remaining
= 0;
96 for (unsigned i
= 0; i
< nir_op_infos
[vec
->op
].num_inputs
; i
++) {
97 /* We can't rewrite a source if it's not in SSA form */
98 if (!vec
->src
[i
].src
.is_ssa
)
101 /* We can't rewrite a source if it has modifiers */
102 if (vec
->src
[i
].abs
|| vec
->src
[i
].negate
)
105 srcs_remaining
|= 1 << i
;
108 /* We can't actually do anything with this instruction */
109 if (srcs_remaining
== 0)
112 for (unsigned i
; i
= ffs(srcs_remaining
) - 1, srcs_remaining
;) {
113 int8_t swizzle
[4] = { -1, -1, -1, -1 };
115 for (unsigned j
= i
; j
< nir_op_infos
[vec
->op
].num_inputs
; j
++) {
116 if (vec
->src
[j
].src
.ssa
!= vec
->src
[i
].src
.ssa
)
119 /* Mark the given channel as having been handled */
120 srcs_remaining
&= ~(1 << j
);
122 /* Mark the appropriate channel as coming from src j */
123 swizzle
[vec
->src
[j
].swizzle
[0]] = j
;
126 nir_foreach_use_safe(use
, vec
->src
[i
].src
.ssa
) {
127 if (use
->parent_instr
== &vec
->instr
)
130 /* We need to dominate the use if we are going to rewrite it */
131 if (!ssa_def_dominates_instr(&vec
->dest
.dest
.ssa
, use
->parent_instr
))
134 /* For now, we'll just rewrite ALU instructions */
135 if (use
->parent_instr
->type
!= nir_instr_type_alu
)
140 nir_alu_instr
*use_alu
= nir_instr_as_alu(use
->parent_instr
);
142 /* Figure out which source we're actually looking at */
143 nir_alu_src
*use_alu_src
= exec_node_data(nir_alu_src
, use
, src
);
144 unsigned src_idx
= use_alu_src
- use_alu
->src
;
145 assert(src_idx
< nir_op_infos
[use_alu
->op
].num_inputs
);
147 bool can_reswizzle
= true;
148 for (unsigned j
= 0; j
< 4; j
++) {
149 if (!nir_alu_instr_channel_used(use_alu
, src_idx
, j
))
152 if (swizzle
[use_alu_src
->swizzle
[j
]] == -1) {
153 can_reswizzle
= false;
161 /* At this point, we have determined that the given use can be
162 * reswizzled to actually use the destination of the vecN operation.
163 * Go ahead and rewrite it as needed.
165 nir_instr_rewrite_src(use
->parent_instr
, use
,
166 nir_src_for_ssa(&vec
->dest
.dest
.ssa
));
167 for (unsigned j
= 0; j
< 4; j
++) {
168 if (!nir_alu_instr_channel_used(use_alu
, src_idx
, j
))
171 use_alu_src
->swizzle
[j
] = swizzle
[use_alu_src
->swizzle
[j
]];
182 nir_move_vec_src_uses_to_dest_impl(nir_shader
*shader
, nir_function_impl
*impl
)
184 bool progress
= false;
186 nir_metadata_require(impl
, nir_metadata_dominance
);
188 nir_index_instrs(impl
);
190 nir_foreach_block(block
, impl
) {
191 progress
|= move_vec_src_uses_to_dest_block(block
);
194 nir_metadata_preserve(impl
, nir_metadata_block_index
|
195 nir_metadata_dominance
);
201 nir_move_vec_src_uses_to_dest(nir_shader
*shader
)
203 bool progress
= false;
205 nir_foreach_function(function
, shader
) {
207 progress
|= nir_move_vec_src_uses_to_dest_impl(shader
,