2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_wm_channel_expressions.cpp
27 * Breaks vector operations down into operations on each component.
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
44 #include "brw_program.h"
45 #include "compiler/glsl/ir.h"
46 #include "compiler/glsl/ir_expression_flattening.h"
47 #include "compiler/glsl_types.h"
49 class ir_channel_expressions_visitor
: public ir_hierarchical_visitor
{
51 ir_channel_expressions_visitor()
53 this->progress
= false;
57 ir_visitor_status
visit_leave(ir_assignment
*);
59 ir_rvalue
*get_element(ir_variable
*var
, unsigned int element
);
60 void assign(ir_assignment
*ir
, int elem
, ir_rvalue
*val
);
67 channel_expressions_predicate(ir_instruction
*ir
)
69 ir_expression
*expr
= ir
->as_expression();
75 switch (expr
->operation
) {
76 case ir_unop_pack_half_2x16
:
77 case ir_unop_pack_snorm_2x16
:
78 case ir_unop_pack_snorm_4x8
:
79 case ir_unop_pack_unorm_2x16
:
80 case ir_unop_pack_unorm_4x8
:
83 /* these opcodes need to act on the whole vector,
84 * just like texturing.
86 case ir_unop_interpolate_at_centroid
:
87 case ir_binop_interpolate_at_offset
:
88 case ir_binop_interpolate_at_sample
:
89 case ir_unop_pack_double_2x32
:
90 case ir_unop_pack_int_2x32
:
91 case ir_unop_pack_uint_2x32
:
97 for (i
= 0; i
< expr
->get_num_operands(); i
++) {
98 if (expr
->operands
[i
]->type
->is_vector())
106 brw_do_channel_expressions(exec_list
*instructions
)
108 ir_channel_expressions_visitor v
;
110 /* Pull out any matrix expression to a separate assignment to a
111 * temp. This will make our handling of the breakdown to
112 * operations on the matrix's vector components much easier.
114 do_expression_flattening(instructions
, channel_expressions_predicate
);
116 visit_list_elements(&v
, instructions
);
122 ir_channel_expressions_visitor::get_element(ir_variable
*var
, unsigned int elem
)
124 ir_dereference
*deref
;
126 if (var
->type
->is_scalar())
127 return new(mem_ctx
) ir_dereference_variable(var
);
129 assert(elem
< var
->type
->components());
130 deref
= new(mem_ctx
) ir_dereference_variable(var
);
131 return new(mem_ctx
) ir_swizzle(deref
, elem
, 0, 0, 0, 1);
135 ir_channel_expressions_visitor::assign(ir_assignment
*ir
, int elem
, ir_rvalue
*val
)
137 ir_dereference
*lhs
= ir
->lhs
->clone(mem_ctx
, NULL
);
138 ir_assignment
*assign
;
140 /* This assign-of-expression should have been generated by the
141 * expression flattening visitor (since we never short circit to
142 * not flatten, even for plain assignments of variables), so the
143 * writemask is always full.
145 assert(ir
->write_mask
== (1 << ir
->lhs
->type
->components()) - 1);
147 assign
= new(mem_ctx
) ir_assignment(lhs
, val
, NULL
, (1 << elem
));
148 ir
->insert_before(assign
);
152 ir_channel_expressions_visitor::visit_leave(ir_assignment
*ir
)
154 ir_expression
*expr
= ir
->rhs
->as_expression();
155 bool found_vector
= false;
156 unsigned int i
, vector_elements
= 1;
157 ir_variable
*op_var
[4];
160 return visit_continue
;
163 this->mem_ctx
= ralloc_parent(ir
);
165 for (i
= 0; i
< expr
->get_num_operands(); i
++) {
166 if (expr
->operands
[i
]->type
->is_vector()) {
168 vector_elements
= expr
->operands
[i
]->type
->vector_elements
;
173 return visit_continue
;
175 switch (expr
->operation
) {
176 case ir_unop_pack_half_2x16
:
177 case ir_unop_pack_snorm_2x16
:
178 case ir_unop_pack_snorm_4x8
:
179 case ir_unop_pack_unorm_2x16
:
180 case ir_unop_pack_unorm_4x8
:
181 case ir_unop_interpolate_at_centroid
:
182 case ir_binop_interpolate_at_offset
:
183 case ir_binop_interpolate_at_sample
:
184 /* We scalarize these in NIR, so no need to do it here */
185 case ir_unop_pack_double_2x32
:
186 case ir_unop_pack_int_2x32
:
187 case ir_unop_pack_uint_2x32
:
188 return visit_continue
;
194 /* Store the expression operands in temps so we can use them
197 for (i
= 0; i
< expr
->get_num_operands(); i
++) {
198 ir_assignment
*assign
;
199 ir_dereference
*deref
;
201 assert(!expr
->operands
[i
]->type
->is_matrix());
203 op_var
[i
] = new(mem_ctx
) ir_variable(expr
->operands
[i
]->type
,
204 "channel_expressions",
206 ir
->insert_before(op_var
[i
]);
208 deref
= new(mem_ctx
) ir_dereference_variable(op_var
[i
]);
209 assign
= new(mem_ctx
) ir_assignment(deref
,
212 ir
->insert_before(assign
);
215 const glsl_type
*element_type
= glsl_type::get_instance(ir
->lhs
->type
->base_type
,
218 /* OK, time to break down this vector operation. */
219 switch (expr
->operation
) {
220 case ir_unop_bit_not
:
221 case ir_unop_logic_not
:
232 case ir_unop_bitcast_i2f
:
233 case ir_unop_bitcast_f2i
:
234 case ir_unop_bitcast_f2u
:
235 case ir_unop_bitcast_u2f
:
236 case ir_unop_bitcast_u642d
:
237 case ir_unop_bitcast_i642d
:
238 case ir_unop_bitcast_d2u64
:
239 case ir_unop_bitcast_d2i64
:
275 case ir_unop_u642i64
:
276 case ir_unop_i642u64
:
281 case ir_unop_round_even
:
285 case ir_unop_dFdx_coarse
:
286 case ir_unop_dFdx_fine
:
288 case ir_unop_dFdy_coarse
:
289 case ir_unop_dFdy_fine
:
290 case ir_unop_bitfield_reverse
:
291 case ir_unop_bit_count
:
292 case ir_unop_find_msb
:
293 case ir_unop_find_lsb
:
294 case ir_unop_saturate
:
295 case ir_unop_subroutine_to_int
:
296 for (i
= 0; i
< vector_elements
; i
++) {
297 ir_rvalue
*op0
= get_element(op_var
[0], i
);
299 assign(ir
, i
, new(mem_ctx
) ir_expression(expr
->operation
,
309 case ir_binop_imul_high
:
312 case ir_binop_borrow
:
317 case ir_binop_lshift
:
318 case ir_binop_rshift
:
319 case ir_binop_bit_and
:
320 case ir_binop_bit_xor
:
321 case ir_binop_bit_or
:
322 case ir_binop_logic_and
:
323 case ir_binop_logic_xor
:
324 case ir_binop_logic_or
:
326 case ir_binop_greater
:
327 case ir_binop_lequal
:
328 case ir_binop_gequal
:
330 case ir_binop_nequal
:
332 for (i
= 0; i
< vector_elements
; i
++) {
333 ir_rvalue
*op0
= get_element(op_var
[0], i
);
334 ir_rvalue
*op1
= get_element(op_var
[1], i
);
336 assign(ir
, i
, new(mem_ctx
) ir_expression(expr
->operation
,
344 ir_expression
*last
= NULL
;
345 for (i
= 0; i
< vector_elements
; i
++) {
346 ir_rvalue
*op0
= get_element(op_var
[0], i
);
347 ir_rvalue
*op1
= get_element(op_var
[1], i
);
350 temp
= new(mem_ctx
) ir_expression(ir_binop_mul
,
355 last
= new(mem_ctx
) ir_expression(ir_binop_add
,
367 case ir_binop_all_equal
:
368 case ir_binop_any_nequal
: {
369 ir_expression
*last
= NULL
;
370 for (i
= 0; i
< vector_elements
; i
++) {
371 ir_rvalue
*op0
= get_element(op_var
[0], i
);
372 ir_rvalue
*op1
= get_element(op_var
[1], i
);
374 ir_expression_operation join
;
376 if (expr
->operation
== ir_binop_all_equal
)
377 join
= ir_binop_logic_and
;
379 join
= ir_binop_logic_or
;
381 temp
= new(mem_ctx
) ir_expression(expr
->operation
,
386 last
= new(mem_ctx
) ir_expression(join
,
398 unreachable("noise should have been broken down to function call");
400 case ir_binop_ubo_load
:
401 case ir_unop_get_buffer_size
:
402 unreachable("not yet supported");
407 case ir_triop_bitfield_extract
:
408 for (i
= 0; i
< vector_elements
; i
++) {
409 ir_rvalue
*op0
= get_element(op_var
[0], i
);
410 ir_rvalue
*op1
= get_element(op_var
[1], i
);
411 ir_rvalue
*op2
= get_element(op_var
[2], i
);
413 assign(ir
, i
, new(mem_ctx
) ir_expression(expr
->operation
,
421 case ir_quadop_bitfield_insert
:
422 for (i
= 0; i
< vector_elements
; i
++) {
423 ir_rvalue
*op0
= get_element(op_var
[0], i
);
424 ir_rvalue
*op1
= get_element(op_var
[1], i
);
425 ir_rvalue
*op2
= get_element(op_var
[2], i
);
426 ir_rvalue
*op3
= get_element(op_var
[3], i
);
428 assign(ir
, i
, new(mem_ctx
) ir_expression(expr
->operation
,
437 case ir_unop_pack_snorm_2x16
:
438 case ir_unop_pack_snorm_4x8
:
439 case ir_unop_pack_unorm_2x16
:
440 case ir_unop_pack_unorm_4x8
:
441 case ir_unop_pack_half_2x16
:
442 case ir_unop_unpack_snorm_2x16
:
443 case ir_unop_unpack_snorm_4x8
:
444 case ir_unop_unpack_unorm_2x16
:
445 case ir_unop_unpack_unorm_4x8
:
446 case ir_unop_unpack_half_2x16
:
447 case ir_binop_vector_extract
:
448 case ir_triop_vector_insert
:
449 case ir_quadop_vector
:
450 case ir_unop_ssbo_unsized_array_length
:
451 unreachable("should have been lowered");
453 case ir_unop_interpolate_at_centroid
:
454 case ir_binop_interpolate_at_offset
:
455 case ir_binop_interpolate_at_sample
:
456 case ir_unop_unpack_double_2x32
:
457 unreachable("not reached: expression operates on scalars only");
459 case ir_unop_pack_double_2x32
:
460 case ir_unop_pack_int_2x32
:
461 case ir_unop_pack_uint_2x32
:
462 unreachable("not reached: to be lowered in NIR, should've been skipped");
464 case ir_unop_frexp_sig
:
465 case ir_unop_frexp_exp
:
466 unreachable("should have been lowered by lower_instructions");
468 case ir_unop_vote_any
:
469 case ir_unop_vote_all
:
470 case ir_unop_vote_eq
:
471 case ir_unop_unpack_int_2x32
:
472 case ir_unop_unpack_uint_2x32
:
473 unreachable("unsupported");
477 this->progress
= true;
479 return visit_continue
;