i965: For color clears, only disable writes to components that exist.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_channel_expressions.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_wm_channel_expressions.cpp
26 *
27 * Breaks vector operations down into operations on each component.
28 *
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
36 *
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
42 */
43
44 extern "C" {
45 #include "main/core.h"
46 #include "brw_wm.h"
47 }
48 #include "glsl/ir.h"
49 #include "glsl/ir_expression_flattening.h"
50 #include "glsl/glsl_types.h"
51
52 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
53 public:
54 ir_channel_expressions_visitor()
55 {
56 this->progress = false;
57 this->mem_ctx = NULL;
58 }
59
60 ir_visitor_status visit_leave(ir_assignment *);
61
62 ir_rvalue *get_element(ir_variable *var, unsigned int element);
63 void assign(ir_assignment *ir, int elem, ir_rvalue *val);
64
65 bool progress;
66 void *mem_ctx;
67 };
68
69 static bool
70 channel_expressions_predicate(ir_instruction *ir)
71 {
72 ir_expression *expr = ir->as_expression();
73 unsigned int i;
74
75 if (!expr)
76 return false;
77
78 for (i = 0; i < expr->get_num_operands(); i++) {
79 if (expr->operands[i]->type->is_vector())
80 return true;
81 }
82
83 return false;
84 }
85
86 bool
87 brw_do_channel_expressions(exec_list *instructions)
88 {
89 ir_channel_expressions_visitor v;
90
91 /* Pull out any matrix expression to a separate assignment to a
92 * temp. This will make our handling of the breakdown to
93 * operations on the matrix's vector components much easier.
94 */
95 do_expression_flattening(instructions, channel_expressions_predicate);
96
97 visit_list_elements(&v, instructions);
98
99 return v.progress;
100 }
101
102 ir_rvalue *
103 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
104 {
105 ir_dereference *deref;
106
107 if (var->type->is_scalar())
108 return new(mem_ctx) ir_dereference_variable(var);
109
110 assert(elem < var->type->components());
111 deref = new(mem_ctx) ir_dereference_variable(var);
112 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
113 }
114
115 void
116 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
117 {
118 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
119 ir_assignment *assign;
120
121 /* This assign-of-expression should have been generated by the
122 * expression flattening visitor (since we never short circit to
123 * not flatten, even for plain assignments of variables), so the
124 * writemask is always full.
125 */
126 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
127
128 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
129 ir->insert_before(assign);
130 }
131
132 ir_visitor_status
133 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
134 {
135 ir_expression *expr = ir->rhs->as_expression();
136 bool found_vector = false;
137 unsigned int i, vector_elements = 1;
138 ir_variable *op_var[3];
139
140 if (!expr)
141 return visit_continue;
142
143 if (!this->mem_ctx)
144 this->mem_ctx = ralloc_parent(ir);
145
146 for (i = 0; i < expr->get_num_operands(); i++) {
147 if (expr->operands[i]->type->is_vector()) {
148 found_vector = true;
149 vector_elements = expr->operands[i]->type->vector_elements;
150 break;
151 }
152 }
153 if (!found_vector)
154 return visit_continue;
155
156 /* Store the expression operands in temps so we can use them
157 * multiple times.
158 */
159 for (i = 0; i < expr->get_num_operands(); i++) {
160 ir_assignment *assign;
161 ir_dereference *deref;
162
163 assert(!expr->operands[i]->type->is_matrix());
164
165 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
166 "channel_expressions",
167 ir_var_temporary);
168 ir->insert_before(op_var[i]);
169
170 deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
171 assign = new(mem_ctx) ir_assignment(deref,
172 expr->operands[i],
173 NULL);
174 ir->insert_before(assign);
175 }
176
177 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
178 1, 1);
179
180 /* OK, time to break down this vector operation. */
181 switch (expr->operation) {
182 case ir_unop_bit_not:
183 case ir_unop_logic_not:
184 case ir_unop_neg:
185 case ir_unop_abs:
186 case ir_unop_sign:
187 case ir_unop_rcp:
188 case ir_unop_rsq:
189 case ir_unop_sqrt:
190 case ir_unop_exp:
191 case ir_unop_log:
192 case ir_unop_exp2:
193 case ir_unop_log2:
194 case ir_unop_bitcast_i2f:
195 case ir_unop_bitcast_f2i:
196 case ir_unop_bitcast_f2u:
197 case ir_unop_bitcast_u2f:
198 case ir_unop_i2u:
199 case ir_unop_u2i:
200 case ir_unop_f2i:
201 case ir_unop_f2u:
202 case ir_unop_i2f:
203 case ir_unop_f2b:
204 case ir_unop_b2f:
205 case ir_unop_i2b:
206 case ir_unop_b2i:
207 case ir_unop_u2f:
208 case ir_unop_trunc:
209 case ir_unop_ceil:
210 case ir_unop_floor:
211 case ir_unop_fract:
212 case ir_unop_round_even:
213 case ir_unop_sin:
214 case ir_unop_cos:
215 case ir_unop_sin_reduced:
216 case ir_unop_cos_reduced:
217 case ir_unop_dFdx:
218 case ir_unop_dFdy:
219 case ir_unop_bitfield_reverse:
220 case ir_unop_bit_count:
221 case ir_unop_find_msb:
222 case ir_unop_find_lsb:
223 for (i = 0; i < vector_elements; i++) {
224 ir_rvalue *op0 = get_element(op_var[0], i);
225
226 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
227 element_type,
228 op0,
229 NULL));
230 }
231 break;
232
233 case ir_binop_add:
234 case ir_binop_sub:
235 case ir_binop_mul:
236 case ir_binop_imul_high:
237 case ir_binop_div:
238 case ir_binop_carry:
239 case ir_binop_borrow:
240 case ir_binop_mod:
241 case ir_binop_min:
242 case ir_binop_max:
243 case ir_binop_pow:
244 case ir_binop_lshift:
245 case ir_binop_rshift:
246 case ir_binop_bit_and:
247 case ir_binop_bit_xor:
248 case ir_binop_bit_or:
249 case ir_binop_less:
250 case ir_binop_greater:
251 case ir_binop_lequal:
252 case ir_binop_gequal:
253 case ir_binop_equal:
254 case ir_binop_nequal:
255 for (i = 0; i < vector_elements; i++) {
256 ir_rvalue *op0 = get_element(op_var[0], i);
257 ir_rvalue *op1 = get_element(op_var[1], i);
258
259 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
260 element_type,
261 op0,
262 op1));
263 }
264 break;
265
266 case ir_unop_any: {
267 ir_expression *temp;
268 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
269 element_type,
270 get_element(op_var[0], 0),
271 get_element(op_var[0], 1));
272
273 for (i = 2; i < vector_elements; i++) {
274 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
275 element_type,
276 get_element(op_var[0], i),
277 temp);
278 }
279 assign(ir, 0, temp);
280 break;
281 }
282
283 case ir_binop_dot: {
284 ir_expression *last = NULL;
285 for (i = 0; i < vector_elements; i++) {
286 ir_rvalue *op0 = get_element(op_var[0], i);
287 ir_rvalue *op1 = get_element(op_var[1], i);
288 ir_expression *temp;
289
290 temp = new(mem_ctx) ir_expression(ir_binop_mul,
291 element_type,
292 op0,
293 op1);
294 if (last) {
295 last = new(mem_ctx) ir_expression(ir_binop_add,
296 element_type,
297 temp,
298 last);
299 } else {
300 last = temp;
301 }
302 }
303 assign(ir, 0, last);
304 break;
305 }
306
307 case ir_binop_logic_and:
308 case ir_binop_logic_xor:
309 case ir_binop_logic_or:
310 ir->fprint(stderr);
311 fprintf(stderr, "\n");
312 assert(!"not reached: expression operates on scalars only");
313 break;
314 case ir_binop_all_equal:
315 case ir_binop_any_nequal: {
316 ir_expression *last = NULL;
317 for (i = 0; i < vector_elements; i++) {
318 ir_rvalue *op0 = get_element(op_var[0], i);
319 ir_rvalue *op1 = get_element(op_var[1], i);
320 ir_expression *temp;
321 ir_expression_operation join;
322
323 if (expr->operation == ir_binop_all_equal)
324 join = ir_binop_logic_and;
325 else
326 join = ir_binop_logic_or;
327
328 temp = new(mem_ctx) ir_expression(expr->operation,
329 element_type,
330 op0,
331 op1);
332 if (last) {
333 last = new(mem_ctx) ir_expression(join,
334 element_type,
335 temp,
336 last);
337 } else {
338 last = temp;
339 }
340 }
341 assign(ir, 0, last);
342 break;
343 }
344 case ir_unop_noise:
345 assert(!"noise should have been broken down to function call");
346 break;
347
348 case ir_binop_bfm: {
349 /* Does not need to be scalarized, since its result will be identical
350 * for all channels.
351 */
352 ir_rvalue *op0 = get_element(op_var[0], 0);
353 ir_rvalue *op1 = get_element(op_var[1], 0);
354
355 assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
356 element_type,
357 op0,
358 op1));
359 break;
360 }
361
362 case ir_binop_ubo_load:
363 assert(!"not yet supported");
364 break;
365
366 case ir_triop_fma:
367 case ir_triop_lrp:
368 case ir_triop_csel:
369 case ir_triop_bitfield_extract:
370 for (i = 0; i < vector_elements; i++) {
371 ir_rvalue *op0 = get_element(op_var[0], i);
372 ir_rvalue *op1 = get_element(op_var[1], i);
373 ir_rvalue *op2 = get_element(op_var[2], i);
374
375 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
376 element_type,
377 op0,
378 op1,
379 op2));
380 }
381 break;
382
383 case ir_triop_bfi: {
384 /* Only a single BFM is needed for multiple BFIs. */
385 ir_rvalue *op0 = get_element(op_var[0], 0);
386
387 for (i = 0; i < vector_elements; i++) {
388 ir_rvalue *op1 = get_element(op_var[1], i);
389 ir_rvalue *op2 = get_element(op_var[2], i);
390
391 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
392 element_type,
393 op0->clone(mem_ctx, NULL),
394 op1,
395 op2));
396 }
397 break;
398 }
399
400 case ir_unop_pack_snorm_2x16:
401 case ir_unop_pack_snorm_4x8:
402 case ir_unop_pack_unorm_2x16:
403 case ir_unop_pack_unorm_4x8:
404 case ir_unop_pack_half_2x16:
405 case ir_unop_unpack_snorm_2x16:
406 case ir_unop_unpack_snorm_4x8:
407 case ir_unop_unpack_unorm_2x16:
408 case ir_unop_unpack_unorm_4x8:
409 case ir_unop_unpack_half_2x16:
410 case ir_binop_ldexp:
411 case ir_binop_vector_extract:
412 case ir_triop_vector_insert:
413 case ir_quadop_bitfield_insert:
414 case ir_quadop_vector:
415 assert(!"should have been lowered");
416 break;
417
418 case ir_unop_unpack_half_2x16_split_x:
419 case ir_unop_unpack_half_2x16_split_y:
420 case ir_binop_pack_half_2x16_split:
421 assert(!"not reached: expression operates on scalars only");
422 break;
423 }
424
425 ir->remove();
426 this->progress = true;
427
428 return visit_continue;
429 }