2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file opt_vectorize.cpp
27 * Combines scalar assignments of the same expression (modulo swizzle) to
28 * multiple channels of the same variable into a single vectorized expression
31 * Many generated shaders contain scalarized code. That is, they contain
39 * r1.xyz = log2(v0.xyz);
41 * We look for consecutive assignments of the same expression (modulo swizzle)
42 * to each channel of the same variable.
44 * For instance, we want to convert these three scalar operations
46 * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47 * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48 * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
50 * into a single vector operation
52 * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
56 #include "ir_visitor.h"
57 #include "ir_optimization.h"
58 #include "glsl_types.h"
59 #include "program/prog_instruction.h"
63 class ir_vectorize_visitor
: public ir_hierarchical_visitor
{
71 current_assignment
= NULL
;
72 last_assignment
= NULL
;
77 ir_vectorize_visitor()
83 virtual ir_visitor_status
visit_enter(ir_assignment
*);
84 virtual ir_visitor_status
visit_enter(ir_swizzle
*);
85 virtual ir_visitor_status
visit_enter(ir_dereference_array
*);
86 virtual ir_visitor_status
visit_enter(ir_expression
*);
87 virtual ir_visitor_status
visit_enter(ir_if
*);
88 virtual ir_visitor_status
visit_enter(ir_loop
*);
89 virtual ir_visitor_status
visit_enter(ir_texture
*);
91 virtual ir_visitor_status
visit_leave(ir_assignment
*);
95 ir_assignment
*assignment
[4];
96 ir_assignment
*current_assignment
, *last_assignment
;
103 } /* unnamed namespace */
106 * Rewrites the swizzles and types of a right-hand side of an assignment.
108 * From the example above, this function would be called (by visit_tree()) on
109 * the nodes of the tree (expression float log2 (swiz z (var_ref v0))),
110 * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))).
112 * The function operates on ir_expressions (and its operands) and ir_swizzles.
113 * For expressions it sets a new type and swizzles any non-expression and non-
114 * swizzle scalar operands into appropriately sized vector arguments. For
115 * example, if combining
117 * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
118 * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
120 * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
121 * (var_ref v1) such that the final result was
123 * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
124 * (swiz xx (var_ref v1))))
126 * For swizzles, it sets a new type, and if the variable being swizzled is a
127 * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
128 * data parameter. If the swizzled variable is scalar, then the swizzle was
129 * added by an earlier call to rewrite_swizzle() on an expression, so the
130 * mask should not be modified.
133 rewrite_swizzle(ir_instruction
*ir
, void *data
)
135 ir_swizzle_mask
*mask
= (ir_swizzle_mask
*)data
;
137 switch (ir
->ir_type
) {
138 case ir_type_swizzle
: {
139 ir_swizzle
*swz
= (ir_swizzle
*)ir
;
140 if (swz
->val
->type
->is_vector()) {
143 swz
->type
= glsl_type::get_instance(swz
->type
->base_type
,
144 mask
->num_components
, 1);
147 case ir_type_expression
: {
148 ir_expression
*expr
= (ir_expression
*)ir
;
149 expr
->type
= glsl_type::get_instance(expr
->type
->base_type
,
150 mask
->num_components
, 1);
151 for (unsigned i
= 0; i
< 4; i
++) {
152 if (expr
->operands
[i
]) {
153 ir_rvalue
*rval
= expr
->operands
[i
]->as_rvalue();
154 if (rval
&& rval
->type
->is_scalar() &&
155 !rval
->as_expression() && !rval
->as_swizzle()) {
156 expr
->operands
[i
] = new(ir
) ir_swizzle(rval
, 0, 0, 0, 0,
157 mask
->num_components
);
169 * Attempt to vectorize the previously saved assignments, and clear them from
172 * If the assignments are able to be combined, it modifies in-place the last
173 * assignment seen to be an equivalent vector form of the scalar assignments.
174 * It then removes the other now obsolete scalar assignments.
177 ir_vectorize_visitor::try_vectorize()
179 if (this->last_assignment
&& this->channels
> 1) {
180 ir_swizzle_mask mask
= {0, 0, 0, 0, channels
, 0};
182 this->last_assignment
->write_mask
= 0;
184 for (unsigned i
= 0, j
= 0; i
< 4; i
++) {
185 if (this->assignment
[i
]) {
186 this->last_assignment
->write_mask
|= 1 << i
;
188 if (this->assignment
[i
] != this->last_assignment
) {
189 this->assignment
[i
]->remove();
193 case 0: mask
.x
= i
; break;
194 case 1: mask
.y
= i
; break;
195 case 2: mask
.z
= i
; break;
196 case 3: mask
.w
= i
; break;
203 visit_tree(this->last_assignment
->rhs
, rewrite_swizzle
, &mask
);
205 this->progress
= true;
211 * Returns whether the write mask is a single channel.
214 single_channel_write_mask(unsigned write_mask
)
216 return write_mask
!= 0 && (write_mask
& (write_mask
- 1)) == 0;
220 * Translates single-channeled write mask to single-channeled swizzle.
223 write_mask_to_swizzle(unsigned write_mask
)
225 switch (write_mask
) {
226 case WRITEMASK_X
: return SWIZZLE_X
;
227 case WRITEMASK_Y
: return SWIZZLE_Y
;
228 case WRITEMASK_Z
: return SWIZZLE_Z
;
229 case WRITEMASK_W
: return SWIZZLE_W
;
231 unreachable("not reached");
235 * Returns whether a single-channeled write mask matches a swizzle.
238 write_mask_matches_swizzle(unsigned write_mask
,
239 const ir_swizzle
*swz
)
241 return ((write_mask
== WRITEMASK_X
&& swz
->mask
.x
== SWIZZLE_X
) ||
242 (write_mask
== WRITEMASK_Y
&& swz
->mask
.x
== SWIZZLE_Y
) ||
243 (write_mask
== WRITEMASK_Z
&& swz
->mask
.x
== SWIZZLE_Z
) ||
244 (write_mask
== WRITEMASK_W
&& swz
->mask
.x
== SWIZZLE_W
));
248 * Upon entering an ir_assignment, attempt to vectorize the currently tracked
249 * assignments if the current assignment is not suitable. Keep a pointer to
250 * the current assignment.
253 ir_vectorize_visitor::visit_enter(ir_assignment
*ir
)
255 ir_dereference
*lhs
= this->last_assignment
!= NULL
?
256 this->last_assignment
->lhs
: NULL
;
257 ir_rvalue
*rhs
= this->last_assignment
!= NULL
?
258 this->last_assignment
->rhs
: NULL
;
261 this->channels
>= 4 ||
262 !single_channel_write_mask(ir
->write_mask
) ||
263 this->assignment
[write_mask_to_swizzle(ir
->write_mask
)] != NULL
||
264 (lhs
&& !ir
->lhs
->equals(lhs
)) ||
265 (rhs
&& !ir
->rhs
->equals(rhs
, ir_type_swizzle
))) {
269 this->current_assignment
= ir
;
271 return visit_continue
;
275 * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
276 * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
277 * matches the current assignment's write mask.
279 * If the write mask doesn't match the swizzle mask, remove the current
280 * assignment from further consideration.
283 ir_vectorize_visitor::visit_enter(ir_swizzle
*ir
)
285 if (this->current_assignment
) {
286 if (write_mask_matches_swizzle(this->current_assignment
->write_mask
, ir
)) {
287 this->has_swizzle
= true;
289 this->current_assignment
= NULL
;
292 return visit_continue
;
295 /* Upon entering an ir_array_dereference, remove the current assignment from
296 * further consideration. Since the index of an array dereference must scalar,
297 * we are not able to vectorize it.
299 * FINISHME: If all of scalar indices are identical we could vectorize.
302 ir_vectorize_visitor::visit_enter(ir_dereference_array
*)
304 this->current_assignment
= NULL
;
305 return visit_continue_with_parent
;
309 * Upon entering an ir_expression, remove the current assignment from further
310 * consideration if the expression operates horizontally on vectors.
313 ir_vectorize_visitor::visit_enter(ir_expression
*ir
)
315 if (ir
->is_horizontal()) {
316 this->current_assignment
= NULL
;
317 return visit_continue_with_parent
;
319 return visit_continue
;
322 /* Since there is no statement to visit between the "then" and "else"
323 * instructions try to vectorize before, in between, and after them to avoid
324 * combining statements from different basic blocks.
327 ir_vectorize_visitor::visit_enter(ir_if
*ir
)
331 visit_list_elements(this, &ir
->then_instructions
);
334 visit_list_elements(this, &ir
->else_instructions
);
337 return visit_continue_with_parent
;
340 /* Since there is no statement to visit between the instructions in the body of
341 * the loop and the instructions after it try to vectorize before and after the
342 * body to avoid combining statements from different basic blocks.
345 ir_vectorize_visitor::visit_enter(ir_loop
*ir
)
349 visit_list_elements(this, &ir
->body_instructions
);
352 return visit_continue_with_parent
;
356 * Upon entering an ir_texture, remove the current assignment from
357 * further consideration. Vectorizing multiple texture lookups into one
361 ir_vectorize_visitor::visit_enter(ir_texture
*)
363 this->current_assignment
= NULL
;
364 return visit_continue_with_parent
;
368 * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
369 * the swizzle mask(s) found were appropriate. Also save a pointer in
370 * ::last_assignment so that we can compare future assignments with it.
372 * Finally, clear ::current_assignment and ::has_swizzle.
375 ir_vectorize_visitor::visit_leave(ir_assignment
*ir
)
377 if (this->has_swizzle
&& this->current_assignment
) {
378 assert(this->current_assignment
== ir
);
380 unsigned channel
= write_mask_to_swizzle(this->current_assignment
->write_mask
);
381 this->assignment
[channel
] = ir
;
384 this->last_assignment
= this->current_assignment
;
386 this->current_assignment
= NULL
;
387 this->has_swizzle
= false;
388 return visit_continue
;
392 * Combines scalar assignments of the same expression (modulo swizzle) to
393 * multiple channels of the same variable into a single vectorized expression
397 do_vectorize(exec_list
*instructions
)
399 ir_vectorize_visitor v
;
403 /* Try to vectorize the last assignments seen. */