i965: Delete vestiges of resource streamer code.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_channel_expressions.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_wm_channel_expressions.cpp
26 *
27 * Breaks vector operations down into operations on each component.
28 *
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
36 *
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
42 */
43
44 #include "brw_program.h"
45 #include "compiler/glsl/ir.h"
46 #include "compiler/glsl/ir_expression_flattening.h"
47 #include "compiler/glsl_types.h"
48
49 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
50 public:
51 ir_channel_expressions_visitor()
52 {
53 this->progress = false;
54 this->mem_ctx = NULL;
55 }
56
57 ir_visitor_status visit_leave(ir_assignment *);
58
59 ir_rvalue *get_element(ir_variable *var, unsigned int element);
60 void assign(ir_assignment *ir, int elem, ir_rvalue *val);
61
62 bool progress;
63 void *mem_ctx;
64 };
65
66 static bool
67 channel_expressions_predicate(ir_instruction *ir)
68 {
69 ir_expression *expr = ir->as_expression();
70 unsigned int i;
71
72 if (!expr)
73 return false;
74
75 switch (expr->operation) {
76 case ir_unop_pack_half_2x16:
77 case ir_unop_pack_snorm_2x16:
78 case ir_unop_pack_snorm_4x8:
79 case ir_unop_pack_unorm_2x16:
80 case ir_unop_pack_unorm_4x8:
81 return false;
82
83 /* these opcodes need to act on the whole vector,
84 * just like texturing.
85 */
86 case ir_unop_interpolate_at_centroid:
87 case ir_binop_interpolate_at_offset:
88 case ir_binop_interpolate_at_sample:
89 case ir_unop_pack_double_2x32:
90 case ir_unop_pack_int_2x32:
91 case ir_unop_pack_uint_2x32:
92 return false;
93 default:
94 break;
95 }
96
97 for (i = 0; i < expr->get_num_operands(); i++) {
98 if (expr->operands[i]->type->is_vector())
99 return true;
100 }
101
102 return false;
103 }
104
105 bool
106 brw_do_channel_expressions(exec_list *instructions)
107 {
108 ir_channel_expressions_visitor v;
109
110 /* Pull out any matrix expression to a separate assignment to a
111 * temp. This will make our handling of the breakdown to
112 * operations on the matrix's vector components much easier.
113 */
114 do_expression_flattening(instructions, channel_expressions_predicate);
115
116 visit_list_elements(&v, instructions);
117
118 return v.progress;
119 }
120
121 ir_rvalue *
122 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
123 {
124 ir_dereference *deref;
125
126 if (var->type->is_scalar())
127 return new(mem_ctx) ir_dereference_variable(var);
128
129 assert(elem < var->type->components());
130 deref = new(mem_ctx) ir_dereference_variable(var);
131 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
132 }
133
134 void
135 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
136 {
137 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
138 ir_assignment *assign;
139
140 /* This assign-of-expression should have been generated by the
141 * expression flattening visitor (since we never short circit to
142 * not flatten, even for plain assignments of variables), so the
143 * writemask is always full.
144 */
145 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
146
147 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
148 ir->insert_before(assign);
149 }
150
151 ir_visitor_status
152 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
153 {
154 ir_expression *expr = ir->rhs->as_expression();
155 bool found_vector = false;
156 unsigned int i, vector_elements = 1;
157 ir_variable *op_var[4];
158
159 if (!expr)
160 return visit_continue;
161
162 if (!this->mem_ctx)
163 this->mem_ctx = ralloc_parent(ir);
164
165 for (i = 0; i < expr->get_num_operands(); i++) {
166 if (expr->operands[i]->type->is_vector()) {
167 found_vector = true;
168 vector_elements = expr->operands[i]->type->vector_elements;
169 break;
170 }
171 }
172 if (!found_vector)
173 return visit_continue;
174
175 switch (expr->operation) {
176 case ir_unop_pack_half_2x16:
177 case ir_unop_pack_snorm_2x16:
178 case ir_unop_pack_snorm_4x8:
179 case ir_unop_pack_unorm_2x16:
180 case ir_unop_pack_unorm_4x8:
181 case ir_unop_interpolate_at_centroid:
182 case ir_binop_interpolate_at_offset:
183 case ir_binop_interpolate_at_sample:
184 /* We scalarize these in NIR, so no need to do it here */
185 case ir_unop_pack_double_2x32:
186 case ir_unop_pack_int_2x32:
187 case ir_unop_pack_uint_2x32:
188 return visit_continue;
189
190 default:
191 break;
192 }
193
194 /* Store the expression operands in temps so we can use them
195 * multiple times.
196 */
197 for (i = 0; i < expr->get_num_operands(); i++) {
198 ir_assignment *assign;
199 ir_dereference *deref;
200
201 assert(!expr->operands[i]->type->is_matrix());
202
203 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
204 "channel_expressions",
205 ir_var_temporary);
206 ir->insert_before(op_var[i]);
207
208 deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
209 assign = new(mem_ctx) ir_assignment(deref,
210 expr->operands[i],
211 NULL);
212 ir->insert_before(assign);
213 }
214
215 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
216 1, 1);
217
218 /* OK, time to break down this vector operation. */
219 switch (expr->operation) {
220 case ir_unop_bit_not:
221 case ir_unop_logic_not:
222 case ir_unop_neg:
223 case ir_unop_abs:
224 case ir_unop_sign:
225 case ir_unop_rcp:
226 case ir_unop_rsq:
227 case ir_unop_sqrt:
228 case ir_unop_exp:
229 case ir_unop_log:
230 case ir_unop_exp2:
231 case ir_unop_log2:
232 case ir_unop_bitcast_i2f:
233 case ir_unop_bitcast_f2i:
234 case ir_unop_bitcast_f2u:
235 case ir_unop_bitcast_u2f:
236 case ir_unop_bitcast_u642d:
237 case ir_unop_bitcast_i642d:
238 case ir_unop_bitcast_d2u64:
239 case ir_unop_bitcast_d2i64:
240 case ir_unop_i2u:
241 case ir_unop_u2i:
242 case ir_unop_f2i:
243 case ir_unop_f2u:
244 case ir_unop_i2f:
245 case ir_unop_f2b:
246 case ir_unop_b2f:
247 case ir_unop_i2b:
248 case ir_unop_b2i:
249 case ir_unop_u2f:
250 case ir_unop_d2f:
251 case ir_unop_f2d:
252 case ir_unop_d2i:
253 case ir_unop_i2d:
254 case ir_unop_d2u:
255 case ir_unop_u2d:
256 case ir_unop_d2b:
257 case ir_unop_i642i:
258 case ir_unop_u642i:
259 case ir_unop_i642u:
260 case ir_unop_u642u:
261 case ir_unop_i642b:
262 case ir_unop_i642f:
263 case ir_unop_u642f:
264 case ir_unop_i642d:
265 case ir_unop_u642d:
266 case ir_unop_i2i64:
267 case ir_unop_u2i64:
268 case ir_unop_b2i64:
269 case ir_unop_f2i64:
270 case ir_unop_d2i64:
271 case ir_unop_i2u64:
272 case ir_unop_u2u64:
273 case ir_unop_f2u64:
274 case ir_unop_d2u64:
275 case ir_unop_u642i64:
276 case ir_unop_i642u64:
277 case ir_unop_trunc:
278 case ir_unop_ceil:
279 case ir_unop_floor:
280 case ir_unop_fract:
281 case ir_unop_round_even:
282 case ir_unop_sin:
283 case ir_unop_cos:
284 case ir_unop_dFdx:
285 case ir_unop_dFdx_coarse:
286 case ir_unop_dFdx_fine:
287 case ir_unop_dFdy:
288 case ir_unop_dFdy_coarse:
289 case ir_unop_dFdy_fine:
290 case ir_unop_bitfield_reverse:
291 case ir_unop_bit_count:
292 case ir_unop_find_msb:
293 case ir_unop_find_lsb:
294 case ir_unop_saturate:
295 case ir_unop_subroutine_to_int:
296 for (i = 0; i < vector_elements; i++) {
297 ir_rvalue *op0 = get_element(op_var[0], i);
298
299 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
300 element_type,
301 op0,
302 NULL));
303 }
304 break;
305
306 case ir_binop_add:
307 case ir_binop_sub:
308 case ir_binop_mul:
309 case ir_binop_imul_high:
310 case ir_binop_div:
311 case ir_binop_carry:
312 case ir_binop_borrow:
313 case ir_binop_mod:
314 case ir_binop_min:
315 case ir_binop_max:
316 case ir_binop_pow:
317 case ir_binop_lshift:
318 case ir_binop_rshift:
319 case ir_binop_bit_and:
320 case ir_binop_bit_xor:
321 case ir_binop_bit_or:
322 case ir_binop_logic_and:
323 case ir_binop_logic_xor:
324 case ir_binop_logic_or:
325 case ir_binop_less:
326 case ir_binop_greater:
327 case ir_binop_lequal:
328 case ir_binop_gequal:
329 case ir_binop_equal:
330 case ir_binop_nequal:
331 case ir_binop_ldexp:
332 for (i = 0; i < vector_elements; i++) {
333 ir_rvalue *op0 = get_element(op_var[0], i);
334 ir_rvalue *op1 = get_element(op_var[1], i);
335
336 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
337 element_type,
338 op0,
339 op1));
340 }
341 break;
342
343 case ir_binop_dot: {
344 ir_expression *last = NULL;
345 for (i = 0; i < vector_elements; i++) {
346 ir_rvalue *op0 = get_element(op_var[0], i);
347 ir_rvalue *op1 = get_element(op_var[1], i);
348 ir_expression *temp;
349
350 temp = new(mem_ctx) ir_expression(ir_binop_mul,
351 element_type,
352 op0,
353 op1);
354 if (last) {
355 last = new(mem_ctx) ir_expression(ir_binop_add,
356 element_type,
357 temp,
358 last);
359 } else {
360 last = temp;
361 }
362 }
363 assign(ir, 0, last);
364 break;
365 }
366
367 case ir_binop_all_equal:
368 case ir_binop_any_nequal: {
369 ir_expression *last = NULL;
370 for (i = 0; i < vector_elements; i++) {
371 ir_rvalue *op0 = get_element(op_var[0], i);
372 ir_rvalue *op1 = get_element(op_var[1], i);
373 ir_expression *temp;
374 ir_expression_operation join;
375
376 if (expr->operation == ir_binop_all_equal)
377 join = ir_binop_logic_and;
378 else
379 join = ir_binop_logic_or;
380
381 temp = new(mem_ctx) ir_expression(expr->operation,
382 element_type,
383 op0,
384 op1);
385 if (last) {
386 last = new(mem_ctx) ir_expression(join,
387 element_type,
388 temp,
389 last);
390 } else {
391 last = temp;
392 }
393 }
394 assign(ir, 0, last);
395 break;
396 }
397 case ir_unop_noise:
398 unreachable("noise should have been broken down to function call");
399
400 case ir_binop_ubo_load:
401 case ir_unop_get_buffer_size:
402 unreachable("not yet supported");
403
404 case ir_triop_fma:
405 case ir_triop_lrp:
406 case ir_triop_csel:
407 case ir_triop_bitfield_extract:
408 for (i = 0; i < vector_elements; i++) {
409 ir_rvalue *op0 = get_element(op_var[0], i);
410 ir_rvalue *op1 = get_element(op_var[1], i);
411 ir_rvalue *op2 = get_element(op_var[2], i);
412
413 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
414 element_type,
415 op0,
416 op1,
417 op2));
418 }
419 break;
420
421 case ir_quadop_bitfield_insert:
422 for (i = 0; i < vector_elements; i++) {
423 ir_rvalue *op0 = get_element(op_var[0], i);
424 ir_rvalue *op1 = get_element(op_var[1], i);
425 ir_rvalue *op2 = get_element(op_var[2], i);
426 ir_rvalue *op3 = get_element(op_var[3], i);
427
428 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
429 element_type,
430 op0,
431 op1,
432 op2,
433 op3));
434 }
435 break;
436
437 case ir_unop_pack_snorm_2x16:
438 case ir_unop_pack_snorm_4x8:
439 case ir_unop_pack_unorm_2x16:
440 case ir_unop_pack_unorm_4x8:
441 case ir_unop_pack_half_2x16:
442 case ir_unop_unpack_snorm_2x16:
443 case ir_unop_unpack_snorm_4x8:
444 case ir_unop_unpack_unorm_2x16:
445 case ir_unop_unpack_unorm_4x8:
446 case ir_unop_unpack_half_2x16:
447 case ir_binop_vector_extract:
448 case ir_triop_vector_insert:
449 case ir_quadop_vector:
450 case ir_unop_ssbo_unsized_array_length:
451 unreachable("should have been lowered");
452
453 case ir_unop_interpolate_at_centroid:
454 case ir_binop_interpolate_at_offset:
455 case ir_binop_interpolate_at_sample:
456 case ir_unop_unpack_double_2x32:
457 unreachable("not reached: expression operates on scalars only");
458
459 case ir_unop_pack_double_2x32:
460 case ir_unop_pack_int_2x32:
461 case ir_unop_pack_uint_2x32:
462 unreachable("not reached: to be lowered in NIR, should've been skipped");
463
464 case ir_unop_frexp_sig:
465 case ir_unop_frexp_exp:
466 unreachable("should have been lowered by lower_instructions");
467
468 case ir_unop_vote_any:
469 case ir_unop_vote_all:
470 case ir_unop_vote_eq:
471 case ir_unop_unpack_int_2x32:
472 case ir_unop_unpack_uint_2x32:
473 unreachable("unsupported");
474 }
475
476 ir->remove();
477 this->progress = true;
478
479 return visit_continue;
480 }