i965: Use brw_wm_prog_data::uses_kill, not gl_fragment_program::UsesKill
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_channel_expressions.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_wm_channel_expressions.cpp
26 *
27 * Breaks vector operations down into operations on each component.
28 *
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
36 *
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
42 */
43
44 extern "C" {
45 #include "main/core.h"
46 #include "brw_wm.h"
47 }
48 #include "glsl/ir.h"
49 #include "glsl/ir_expression_flattening.h"
50 #include "glsl/glsl_types.h"
51
52 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
53 public:
54 ir_channel_expressions_visitor()
55 {
56 this->progress = false;
57 this->mem_ctx = NULL;
58 }
59
60 ir_visitor_status visit_leave(ir_assignment *);
61
62 ir_rvalue *get_element(ir_variable *var, unsigned int element);
63 void assign(ir_assignment *ir, int elem, ir_rvalue *val);
64
65 bool progress;
66 void *mem_ctx;
67 };
68
69 static bool
70 channel_expressions_predicate(ir_instruction *ir)
71 {
72 ir_expression *expr = ir->as_expression();
73 unsigned int i;
74
75 if (!expr)
76 return false;
77
78 switch (expr->operation) {
79 /* these opcodes need to act on the whole vector,
80 * just like texturing.
81 */
82 case ir_unop_interpolate_at_centroid:
83 case ir_binop_interpolate_at_offset:
84 case ir_binop_interpolate_at_sample:
85 return false;
86 default:
87 break;
88 }
89
90 for (i = 0; i < expr->get_num_operands(); i++) {
91 if (expr->operands[i]->type->is_vector())
92 return true;
93 }
94
95 return false;
96 }
97
98 bool
99 brw_do_channel_expressions(exec_list *instructions)
100 {
101 ir_channel_expressions_visitor v;
102
103 /* Pull out any matrix expression to a separate assignment to a
104 * temp. This will make our handling of the breakdown to
105 * operations on the matrix's vector components much easier.
106 */
107 do_expression_flattening(instructions, channel_expressions_predicate);
108
109 visit_list_elements(&v, instructions);
110
111 return v.progress;
112 }
113
114 ir_rvalue *
115 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
116 {
117 ir_dereference *deref;
118
119 if (var->type->is_scalar())
120 return new(mem_ctx) ir_dereference_variable(var);
121
122 assert(elem < var->type->components());
123 deref = new(mem_ctx) ir_dereference_variable(var);
124 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
125 }
126
127 void
128 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
129 {
130 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
131 ir_assignment *assign;
132
133 /* This assign-of-expression should have been generated by the
134 * expression flattening visitor (since we never short circit to
135 * not flatten, even for plain assignments of variables), so the
136 * writemask is always full.
137 */
138 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
139
140 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
141 ir->insert_before(assign);
142 }
143
144 ir_visitor_status
145 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
146 {
147 ir_expression *expr = ir->rhs->as_expression();
148 bool found_vector = false;
149 unsigned int i, vector_elements = 1;
150 ir_variable *op_var[3];
151
152 if (!expr)
153 return visit_continue;
154
155 if (!this->mem_ctx)
156 this->mem_ctx = ralloc_parent(ir);
157
158 for (i = 0; i < expr->get_num_operands(); i++) {
159 if (expr->operands[i]->type->is_vector()) {
160 found_vector = true;
161 vector_elements = expr->operands[i]->type->vector_elements;
162 break;
163 }
164 }
165 if (!found_vector)
166 return visit_continue;
167
168 switch (expr->operation) {
169 case ir_unop_interpolate_at_centroid:
170 case ir_binop_interpolate_at_offset:
171 case ir_binop_interpolate_at_sample:
172 return visit_continue;
173
174 default:
175 break;
176 }
177
178 /* Store the expression operands in temps so we can use them
179 * multiple times.
180 */
181 for (i = 0; i < expr->get_num_operands(); i++) {
182 ir_assignment *assign;
183 ir_dereference *deref;
184
185 assert(!expr->operands[i]->type->is_matrix());
186
187 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
188 "channel_expressions",
189 ir_var_temporary);
190 ir->insert_before(op_var[i]);
191
192 deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
193 assign = new(mem_ctx) ir_assignment(deref,
194 expr->operands[i],
195 NULL);
196 ir->insert_before(assign);
197 }
198
199 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
200 1, 1);
201
202 /* OK, time to break down this vector operation. */
203 switch (expr->operation) {
204 case ir_unop_bit_not:
205 case ir_unop_logic_not:
206 case ir_unop_neg:
207 case ir_unop_abs:
208 case ir_unop_sign:
209 case ir_unop_rcp:
210 case ir_unop_rsq:
211 case ir_unop_sqrt:
212 case ir_unop_exp:
213 case ir_unop_log:
214 case ir_unop_exp2:
215 case ir_unop_log2:
216 case ir_unop_bitcast_i2f:
217 case ir_unop_bitcast_f2i:
218 case ir_unop_bitcast_f2u:
219 case ir_unop_bitcast_u2f:
220 case ir_unop_i2u:
221 case ir_unop_u2i:
222 case ir_unop_f2i:
223 case ir_unop_f2u:
224 case ir_unop_i2f:
225 case ir_unop_f2b:
226 case ir_unop_b2f:
227 case ir_unop_i2b:
228 case ir_unop_b2i:
229 case ir_unop_u2f:
230 case ir_unop_trunc:
231 case ir_unop_ceil:
232 case ir_unop_floor:
233 case ir_unop_fract:
234 case ir_unop_round_even:
235 case ir_unop_sin:
236 case ir_unop_cos:
237 case ir_unop_sin_reduced:
238 case ir_unop_cos_reduced:
239 case ir_unop_dFdx:
240 case ir_unop_dFdx_coarse:
241 case ir_unop_dFdx_fine:
242 case ir_unop_dFdy:
243 case ir_unop_dFdy_coarse:
244 case ir_unop_dFdy_fine:
245 case ir_unop_bitfield_reverse:
246 case ir_unop_bit_count:
247 case ir_unop_find_msb:
248 case ir_unop_find_lsb:
249 case ir_unop_saturate:
250 for (i = 0; i < vector_elements; i++) {
251 ir_rvalue *op0 = get_element(op_var[0], i);
252
253 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
254 element_type,
255 op0,
256 NULL));
257 }
258 break;
259
260 case ir_binop_add:
261 case ir_binop_sub:
262 case ir_binop_mul:
263 case ir_binop_imul_high:
264 case ir_binop_div:
265 case ir_binop_carry:
266 case ir_binop_borrow:
267 case ir_binop_mod:
268 case ir_binop_min:
269 case ir_binop_max:
270 case ir_binop_pow:
271 case ir_binop_lshift:
272 case ir_binop_rshift:
273 case ir_binop_bit_and:
274 case ir_binop_bit_xor:
275 case ir_binop_bit_or:
276 case ir_binop_less:
277 case ir_binop_greater:
278 case ir_binop_lequal:
279 case ir_binop_gequal:
280 case ir_binop_equal:
281 case ir_binop_nequal:
282 for (i = 0; i < vector_elements; i++) {
283 ir_rvalue *op0 = get_element(op_var[0], i);
284 ir_rvalue *op1 = get_element(op_var[1], i);
285
286 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
287 element_type,
288 op0,
289 op1));
290 }
291 break;
292
293 case ir_unop_any: {
294 ir_expression *temp;
295 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
296 element_type,
297 get_element(op_var[0], 0),
298 get_element(op_var[0], 1));
299
300 for (i = 2; i < vector_elements; i++) {
301 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
302 element_type,
303 get_element(op_var[0], i),
304 temp);
305 }
306 assign(ir, 0, temp);
307 break;
308 }
309
310 case ir_binop_dot: {
311 ir_expression *last = NULL;
312 for (i = 0; i < vector_elements; i++) {
313 ir_rvalue *op0 = get_element(op_var[0], i);
314 ir_rvalue *op1 = get_element(op_var[1], i);
315 ir_expression *temp;
316
317 temp = new(mem_ctx) ir_expression(ir_binop_mul,
318 element_type,
319 op0,
320 op1);
321 if (last) {
322 last = new(mem_ctx) ir_expression(ir_binop_add,
323 element_type,
324 temp,
325 last);
326 } else {
327 last = temp;
328 }
329 }
330 assign(ir, 0, last);
331 break;
332 }
333
334 case ir_binop_logic_and:
335 case ir_binop_logic_xor:
336 case ir_binop_logic_or:
337 ir->fprint(stderr);
338 fprintf(stderr, "\n");
339 unreachable("not reached: expression operates on scalars only");
340 case ir_binop_all_equal:
341 case ir_binop_any_nequal: {
342 ir_expression *last = NULL;
343 for (i = 0; i < vector_elements; i++) {
344 ir_rvalue *op0 = get_element(op_var[0], i);
345 ir_rvalue *op1 = get_element(op_var[1], i);
346 ir_expression *temp;
347 ir_expression_operation join;
348
349 if (expr->operation == ir_binop_all_equal)
350 join = ir_binop_logic_and;
351 else
352 join = ir_binop_logic_or;
353
354 temp = new(mem_ctx) ir_expression(expr->operation,
355 element_type,
356 op0,
357 op1);
358 if (last) {
359 last = new(mem_ctx) ir_expression(join,
360 element_type,
361 temp,
362 last);
363 } else {
364 last = temp;
365 }
366 }
367 assign(ir, 0, last);
368 break;
369 }
370 case ir_unop_noise:
371 unreachable("noise should have been broken down to function call");
372
373 case ir_binop_bfm: {
374 /* Does not need to be scalarized, since its result will be identical
375 * for all channels.
376 */
377 ir_rvalue *op0 = get_element(op_var[0], 0);
378 ir_rvalue *op1 = get_element(op_var[1], 0);
379
380 assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
381 element_type,
382 op0,
383 op1));
384 break;
385 }
386
387 case ir_binop_ubo_load:
388 unreachable("not yet supported");
389
390 case ir_triop_fma:
391 case ir_triop_lrp:
392 case ir_triop_csel:
393 case ir_triop_bitfield_extract:
394 for (i = 0; i < vector_elements; i++) {
395 ir_rvalue *op0 = get_element(op_var[0], i);
396 ir_rvalue *op1 = get_element(op_var[1], i);
397 ir_rvalue *op2 = get_element(op_var[2], i);
398
399 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
400 element_type,
401 op0,
402 op1,
403 op2));
404 }
405 break;
406
407 case ir_triop_bfi: {
408 /* Only a single BFM is needed for multiple BFIs. */
409 ir_rvalue *op0 = get_element(op_var[0], 0);
410
411 for (i = 0; i < vector_elements; i++) {
412 ir_rvalue *op1 = get_element(op_var[1], i);
413 ir_rvalue *op2 = get_element(op_var[2], i);
414
415 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
416 element_type,
417 op0->clone(mem_ctx, NULL),
418 op1,
419 op2));
420 }
421 break;
422 }
423
424 case ir_unop_pack_snorm_2x16:
425 case ir_unop_pack_snorm_4x8:
426 case ir_unop_pack_unorm_2x16:
427 case ir_unop_pack_unorm_4x8:
428 case ir_unop_pack_half_2x16:
429 case ir_unop_unpack_snorm_2x16:
430 case ir_unop_unpack_snorm_4x8:
431 case ir_unop_unpack_unorm_2x16:
432 case ir_unop_unpack_unorm_4x8:
433 case ir_unop_unpack_half_2x16:
434 case ir_binop_ldexp:
435 case ir_binop_vector_extract:
436 case ir_triop_vector_insert:
437 case ir_quadop_bitfield_insert:
438 case ir_quadop_vector:
439 unreachable("should have been lowered");
440
441 case ir_unop_unpack_half_2x16_split_x:
442 case ir_unop_unpack_half_2x16_split_y:
443 case ir_binop_pack_half_2x16_split:
444 case ir_unop_interpolate_at_centroid:
445 case ir_binop_interpolate_at_offset:
446 case ir_binop_interpolate_at_sample:
447 unreachable("not reached: expression operates on scalars only");
448 }
449
450 ir->remove();
451 this->progress = true;
452
453 return visit_continue;
454 }