2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_wm_channel_expressions.cpp
27 * Breaks vector operations down into operations on each component.
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
45 #include "main/core.h"
48 #include "../glsl/ir.h"
49 #include "../glsl/ir_expression_flattening.h"
50 #include "../glsl/glsl_types.h"
52 class ir_channel_expressions_visitor
: public ir_hierarchical_visitor
{
54 ir_channel_expressions_visitor()
56 this->progress
= false;
60 ir_visitor_status
visit_leave(ir_assignment
*);
62 ir_rvalue
*get_element(ir_variable
*var
, unsigned int element
);
63 void assign(ir_assignment
*ir
, int elem
, ir_rvalue
*val
);
70 channel_expressions_predicate(ir_instruction
*ir
)
72 ir_expression
*expr
= ir
->as_expression();
78 for (i
= 0; i
< expr
->get_num_operands(); i
++) {
79 if (expr
->operands
[i
]->type
->is_vector())
87 brw_do_channel_expressions(exec_list
*instructions
)
89 ir_channel_expressions_visitor v
;
91 /* Pull out any matrix expression to a separate assignment to a
92 * temp. This will make our handling of the breakdown to
93 * operations on the matrix's vector components much easier.
95 do_expression_flattening(instructions
, channel_expressions_predicate
);
97 visit_list_elements(&v
, instructions
);
103 ir_channel_expressions_visitor::get_element(ir_variable
*var
, unsigned int elem
)
105 ir_dereference
*deref
;
107 if (var
->type
->is_scalar())
108 return new(mem_ctx
) ir_dereference_variable(var
);
110 assert(elem
< var
->type
->components());
111 deref
= new(mem_ctx
) ir_dereference_variable(var
);
112 return new(mem_ctx
) ir_swizzle(deref
, elem
, 0, 0, 0, 1);
116 ir_channel_expressions_visitor::assign(ir_assignment
*ir
, int elem
, ir_rvalue
*val
)
118 ir_dereference
*lhs
= ir
->lhs
->clone(mem_ctx
, NULL
);
119 ir_assignment
*assign
;
121 /* This assign-of-expression should have been generated by the
122 * expression flattening visitor (since we never short circit to
123 * not flatten, even for plain assignments of variables), so the
124 * writemask is always full.
126 assert(ir
->write_mask
== (1 << ir
->lhs
->type
->components()) - 1);
128 assign
= new(mem_ctx
) ir_assignment(lhs
, val
, NULL
, (1 << elem
));
129 ir
->insert_before(assign
);
133 ir_channel_expressions_visitor::visit_leave(ir_assignment
*ir
)
135 ir_expression
*expr
= ir
->rhs
->as_expression();
136 bool found_vector
= false;
137 unsigned int i
, vector_elements
= 1;
138 ir_variable
*op_var
[2];
141 return visit_continue
;
144 this->mem_ctx
= talloc_parent(ir
);
146 for (i
= 0; i
< expr
->get_num_operands(); i
++) {
147 if (expr
->operands
[i
]->type
->is_vector()) {
149 vector_elements
= expr
->operands
[i
]->type
->vector_elements
;
154 return visit_continue
;
156 /* Store the expression operands in temps so we can use them
159 for (i
= 0; i
< expr
->get_num_operands(); i
++) {
160 ir_assignment
*assign
;
161 ir_dereference
*deref
;
163 assert(!expr
->operands
[i
]->type
->is_matrix());
165 op_var
[i
] = new(mem_ctx
) ir_variable(expr
->operands
[i
]->type
,
166 "channel_expressions",
168 ir
->insert_before(op_var
[i
]);
170 deref
= new(mem_ctx
) ir_dereference_variable(op_var
[i
]);
171 assign
= new(mem_ctx
) ir_assignment(deref
,
174 ir
->insert_before(assign
);
177 const glsl_type
*element_type
= glsl_type::get_instance(ir
->lhs
->type
->base_type
,
180 /* OK, time to break down this vector operation. */
181 switch (expr
->operation
) {
182 case ir_unop_bit_not
:
183 case ir_unop_logic_not
:
209 for (i
= 0; i
< vector_elements
; i
++) {
210 ir_rvalue
*op0
= get_element(op_var
[0], i
);
212 assign(ir
, i
, new(mem_ctx
) ir_expression(expr
->operation
,
227 case ir_binop_lshift
:
228 case ir_binop_rshift
:
229 case ir_binop_bit_and
:
230 case ir_binop_bit_xor
:
231 case ir_binop_bit_or
:
233 case ir_binop_greater
:
234 case ir_binop_lequal
:
235 case ir_binop_gequal
:
237 case ir_binop_nequal
:
238 for (i
= 0; i
< vector_elements
; i
++) {
239 ir_rvalue
*op0
= get_element(op_var
[0], i
);
240 ir_rvalue
*op1
= get_element(op_var
[1], i
);
242 assign(ir
, i
, new(mem_ctx
) ir_expression(expr
->operation
,
251 temp
= new(mem_ctx
) ir_expression(ir_binop_logic_or
,
253 get_element(op_var
[0], 0),
254 get_element(op_var
[0], 1));
256 for (i
= 2; i
< vector_elements
; i
++) {
257 temp
= new(mem_ctx
) ir_expression(ir_binop_logic_or
,
259 get_element(op_var
[0], i
),
267 ir_expression
*last
= NULL
;
268 for (i
= 0; i
< vector_elements
; i
++) {
269 ir_rvalue
*op0
= get_element(op_var
[0], i
);
270 ir_rvalue
*op1
= get_element(op_var
[1], i
);
273 temp
= new(mem_ctx
) ir_expression(ir_binop_mul
,
278 last
= new(mem_ctx
) ir_expression(ir_binop_add
,
290 case ir_binop_cross
: {
291 for (i
= 0; i
< vector_elements
; i
++) {
292 int swiz0
= (i
+ 1) % 3;
293 int swiz1
= (i
+ 2) % 3;
294 ir_expression
*temp1
, *temp2
;
296 temp1
= new(mem_ctx
) ir_expression(ir_binop_mul
,
298 get_element(op_var
[0], swiz0
),
299 get_element(op_var
[1], swiz1
));
301 temp2
= new(mem_ctx
) ir_expression(ir_binop_mul
,
303 get_element(op_var
[1], swiz0
),
304 get_element(op_var
[0], swiz1
));
306 temp2
= new(mem_ctx
) ir_expression(ir_unop_neg
,
311 assign(ir
, i
, new(mem_ctx
) ir_expression(ir_binop_add
,
318 case ir_binop_logic_and
:
319 case ir_binop_logic_xor
:
320 case ir_binop_logic_or
:
323 assert(!"not reached: expression operates on scalars only");
325 case ir_binop_all_equal
:
326 case ir_binop_any_nequal
: {
327 ir_expression
*last
= NULL
;
328 for (i
= 0; i
< vector_elements
; i
++) {
329 ir_rvalue
*op0
= get_element(op_var
[0], i
);
330 ir_rvalue
*op1
= get_element(op_var
[1], i
);
332 ir_expression_operation join
;
334 if (expr
->operation
== ir_binop_all_equal
)
335 join
= ir_binop_logic_and
;
337 join
= ir_binop_logic_or
;
339 temp
= new(mem_ctx
) ir_expression(expr
->operation
,
344 last
= new(mem_ctx
) ir_expression(join
,
356 assert(!"noise should have been broken down to function call");
361 this->progress
= true;
363 return visit_continue
;