2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Connor Abbott (cwabbott0@gmail.com)
29 #include <main/imports.h>
32 * SSA-based copy propagation
35 static bool is_move(nir_alu_instr
*instr
)
37 if (instr
->op
!= nir_op_fmov
&&
38 instr
->op
!= nir_op_imov
)
41 if (instr
->dest
.saturate
)
44 /* we handle modifiers in a separate pass */
46 if (instr
->src
[0].abs
|| instr
->src
[0].negate
)
49 if (!instr
->src
[0].src
.is_ssa
)
56 static bool is_vec(nir_alu_instr
*instr
)
58 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
59 if (!instr
->src
[i
].src
.is_ssa
)
62 return instr
->op
== nir_op_vec2
||
63 instr
->op
== nir_op_vec3
||
64 instr
->op
== nir_op_vec4
;
68 is_swizzleless_move(nir_alu_instr
*instr
)
71 for (unsigned i
= 0; i
< 4; i
++) {
72 if (!((instr
->dest
.write_mask
>> i
) & 1))
74 if (instr
->src
[0].swizzle
[i
] != i
)
78 } else if (is_vec(instr
)) {
79 nir_ssa_def
*def
= NULL
;
80 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
81 if (instr
->src
[i
].swizzle
[0] != i
)
85 def
= instr
->src
[i
].src
.ssa
;
86 } else if (instr
->src
[i
].src
.ssa
!= def
) {
97 copy_prop_src(nir_src
*src
, nir_instr
*parent_instr
, nir_if
*parent_if
)
100 if (src
->reg
.indirect
)
101 return copy_prop_src(src
, parent_instr
, parent_if
);
105 nir_instr
*src_instr
= src
->ssa
->parent_instr
;
106 if (src_instr
->type
!= nir_instr_type_alu
)
109 nir_alu_instr
*alu_instr
= nir_instr_as_alu(src_instr
);
110 if (!is_swizzleless_move(alu_instr
))
113 /* Don't let copy propagation land us with a phi that has more
114 * components in its source than it has in its destination. That badly
115 * messes up out-of-ssa.
117 if (parent_instr
&& parent_instr
->type
== nir_instr_type_phi
) {
118 nir_phi_instr
*phi
= nir_instr_as_phi(parent_instr
);
119 assert(phi
->dest
.is_ssa
);
120 if (phi
->dest
.ssa
.num_components
!=
121 alu_instr
->src
[0].src
.ssa
->num_components
)
126 nir_instr_rewrite_src(parent_instr
, src
,
127 nir_src_for_ssa(alu_instr
->src
[0].src
.ssa
));
129 assert(src
== &parent_if
->condition
);
130 nir_if_rewrite_condition(parent_if
,
131 nir_src_for_ssa(alu_instr
->src
[0].src
.ssa
));
138 copy_prop_alu_src(nir_alu_instr
*parent_alu_instr
, unsigned index
)
140 nir_alu_src
*src
= &parent_alu_instr
->src
[index
];
141 if (!src
->src
.is_ssa
) {
142 if (src
->src
.reg
.indirect
)
143 return copy_prop_src(src
->src
.reg
.indirect
, &parent_alu_instr
->instr
,
148 nir_instr
*src_instr
= src
->src
.ssa
->parent_instr
;
149 if (src_instr
->type
!= nir_instr_type_alu
)
152 nir_alu_instr
*alu_instr
= nir_instr_as_alu(src_instr
);
153 if (!is_move(alu_instr
) && !is_vec(alu_instr
))
157 unsigned new_swizzle
[4] = {0, 0, 0, 0};
159 if (alu_instr
->op
== nir_op_fmov
||
160 alu_instr
->op
== nir_op_imov
) {
161 for (unsigned i
= 0; i
< 4; i
++)
162 new_swizzle
[i
] = alu_instr
->src
[0].swizzle
[src
->swizzle
[i
]];
163 def
= alu_instr
->src
[0].src
.ssa
;
167 for (unsigned i
= 0; i
< 4; i
++) {
168 if (!nir_alu_instr_channel_used(parent_alu_instr
, index
, i
))
171 nir_ssa_def
*new_def
= alu_instr
->src
[src
->swizzle
[i
]].src
.ssa
;
178 new_swizzle
[i
] = alu_instr
->src
[src
->swizzle
[i
]].swizzle
[0];
182 for (unsigned i
= 0; i
< 4; i
++)
183 src
->swizzle
[i
] = new_swizzle
[i
];
185 nir_instr_rewrite_src(&parent_alu_instr
->instr
, &src
->src
,
186 nir_src_for_ssa(def
));
192 nir_instr
*parent_instr
;
197 copy_prop_src_cb(nir_src
*src
, void *_state
)
199 copy_prop_state
*state
= (copy_prop_state
*) _state
;
200 while (copy_prop_src(src
, state
->parent_instr
, NULL
))
201 state
->progress
= true;
207 copy_prop_instr(nir_instr
*instr
)
209 if (instr
->type
== nir_instr_type_alu
) {
210 nir_alu_instr
*alu_instr
= nir_instr_as_alu(instr
);
211 bool progress
= false;
213 for (unsigned i
= 0; i
< nir_op_infos
[alu_instr
->op
].num_inputs
; i
++)
214 while (copy_prop_alu_src(alu_instr
, i
))
217 if (!alu_instr
->dest
.dest
.is_ssa
&& alu_instr
->dest
.dest
.reg
.indirect
)
218 while (copy_prop_src(alu_instr
->dest
.dest
.reg
.indirect
, instr
, NULL
))
224 copy_prop_state state
;
225 state
.parent_instr
= instr
;
226 state
.progress
= false;
227 nir_foreach_src(instr
, copy_prop_src_cb
, &state
);
229 return state
.progress
;
233 copy_prop_if(nir_if
*if_stmt
)
235 return copy_prop_src(&if_stmt
->condition
, NULL
, if_stmt
);
239 copy_prop_block(nir_block
*block
, void *_state
)
241 bool *progress
= (bool *) _state
;
243 nir_foreach_instr(block
, instr
) {
244 if (copy_prop_instr(instr
))
248 if (block
->cf_node
.node
.next
!= NULL
&& /* check that we aren't the end node */
249 !nir_cf_node_is_last(&block
->cf_node
) &&
250 nir_cf_node_next(&block
->cf_node
)->type
== nir_cf_node_if
) {
251 nir_if
*if_stmt
= nir_cf_node_as_if(nir_cf_node_next(&block
->cf_node
));
252 if (copy_prop_if(if_stmt
))
260 nir_copy_prop_impl(nir_function_impl
*impl
)
262 bool progress
= false;
264 nir_foreach_block(impl
, copy_prop_block
, &progress
);
269 nir_copy_prop(nir_shader
*shader
)
271 bool progress
= false;
273 nir_foreach_overload(shader
, overload
) {
274 if (overload
->impl
&& nir_copy_prop_impl(overload
->impl
))