glsl: Delete the ir_binop_bfm and ir_triop_bfi opcodes.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_channel_expressions.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_wm_channel_expressions.cpp
26 *
27 * Breaks vector operations down into operations on each component.
28 *
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers. Each
31 * ALU operation operates on one of those channel registers. As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
36 *
37 * The exception to the desire to break everything down to floats is
38 * texturing. The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values. We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
42 */
43
44 #include "glsl/ir.h"
45 #include "glsl/ir_expression_flattening.h"
46 #include "glsl/nir/glsl_types.h"
47
48 class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
49 public:
50 ir_channel_expressions_visitor()
51 {
52 this->progress = false;
53 this->mem_ctx = NULL;
54 }
55
56 ir_visitor_status visit_leave(ir_assignment *);
57
58 ir_rvalue *get_element(ir_variable *var, unsigned int element);
59 void assign(ir_assignment *ir, int elem, ir_rvalue *val);
60
61 bool progress;
62 void *mem_ctx;
63 };
64
65 static bool
66 channel_expressions_predicate(ir_instruction *ir)
67 {
68 ir_expression *expr = ir->as_expression();
69 unsigned int i;
70
71 if (!expr)
72 return false;
73
74 switch (expr->operation) {
75 /* these opcodes need to act on the whole vector,
76 * just like texturing.
77 */
78 case ir_unop_interpolate_at_centroid:
79 case ir_binop_interpolate_at_offset:
80 case ir_binop_interpolate_at_sample:
81 return false;
82 default:
83 break;
84 }
85
86 for (i = 0; i < expr->get_num_operands(); i++) {
87 if (expr->operands[i]->type->is_vector())
88 return true;
89 }
90
91 return false;
92 }
93
94 bool
95 brw_do_channel_expressions(exec_list *instructions)
96 {
97 ir_channel_expressions_visitor v;
98
99 /* Pull out any matrix expression to a separate assignment to a
100 * temp. This will make our handling of the breakdown to
101 * operations on the matrix's vector components much easier.
102 */
103 do_expression_flattening(instructions, channel_expressions_predicate);
104
105 visit_list_elements(&v, instructions);
106
107 return v.progress;
108 }
109
110 ir_rvalue *
111 ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
112 {
113 ir_dereference *deref;
114
115 if (var->type->is_scalar())
116 return new(mem_ctx) ir_dereference_variable(var);
117
118 assert(elem < var->type->components());
119 deref = new(mem_ctx) ir_dereference_variable(var);
120 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
121 }
122
123 void
124 ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
125 {
126 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
127 ir_assignment *assign;
128
129 /* This assign-of-expression should have been generated by the
130 * expression flattening visitor (since we never short circit to
131 * not flatten, even for plain assignments of variables), so the
132 * writemask is always full.
133 */
134 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
135
136 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
137 ir->insert_before(assign);
138 }
139
140 ir_visitor_status
141 ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
142 {
143 ir_expression *expr = ir->rhs->as_expression();
144 bool found_vector = false;
145 unsigned int i, vector_elements = 1;
146 ir_variable *op_var[4];
147
148 if (!expr)
149 return visit_continue;
150
151 if (!this->mem_ctx)
152 this->mem_ctx = ralloc_parent(ir);
153
154 for (i = 0; i < expr->get_num_operands(); i++) {
155 if (expr->operands[i]->type->is_vector()) {
156 found_vector = true;
157 vector_elements = expr->operands[i]->type->vector_elements;
158 break;
159 }
160 }
161 if (!found_vector)
162 return visit_continue;
163
164 switch (expr->operation) {
165 case ir_unop_interpolate_at_centroid:
166 case ir_binop_interpolate_at_offset:
167 case ir_binop_interpolate_at_sample:
168 return visit_continue;
169
170 default:
171 break;
172 }
173
174 /* Store the expression operands in temps so we can use them
175 * multiple times.
176 */
177 for (i = 0; i < expr->get_num_operands(); i++) {
178 ir_assignment *assign;
179 ir_dereference *deref;
180
181 assert(!expr->operands[i]->type->is_matrix());
182
183 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
184 "channel_expressions",
185 ir_var_temporary);
186 ir->insert_before(op_var[i]);
187
188 deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
189 assign = new(mem_ctx) ir_assignment(deref,
190 expr->operands[i],
191 NULL);
192 ir->insert_before(assign);
193 }
194
195 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
196 1, 1);
197
198 /* OK, time to break down this vector operation. */
199 switch (expr->operation) {
200 case ir_unop_bit_not:
201 case ir_unop_logic_not:
202 case ir_unop_neg:
203 case ir_unop_abs:
204 case ir_unop_sign:
205 case ir_unop_rcp:
206 case ir_unop_rsq:
207 case ir_unop_sqrt:
208 case ir_unop_exp:
209 case ir_unop_log:
210 case ir_unop_exp2:
211 case ir_unop_log2:
212 case ir_unop_bitcast_i2f:
213 case ir_unop_bitcast_f2i:
214 case ir_unop_bitcast_f2u:
215 case ir_unop_bitcast_u2f:
216 case ir_unop_i2u:
217 case ir_unop_u2i:
218 case ir_unop_f2i:
219 case ir_unop_f2u:
220 case ir_unop_i2f:
221 case ir_unop_f2b:
222 case ir_unop_b2f:
223 case ir_unop_i2b:
224 case ir_unop_b2i:
225 case ir_unop_u2f:
226 case ir_unop_trunc:
227 case ir_unop_ceil:
228 case ir_unop_floor:
229 case ir_unop_fract:
230 case ir_unop_round_even:
231 case ir_unop_sin:
232 case ir_unop_cos:
233 case ir_unop_dFdx:
234 case ir_unop_dFdx_coarse:
235 case ir_unop_dFdx_fine:
236 case ir_unop_dFdy:
237 case ir_unop_dFdy_coarse:
238 case ir_unop_dFdy_fine:
239 case ir_unop_bitfield_reverse:
240 case ir_unop_bit_count:
241 case ir_unop_find_msb:
242 case ir_unop_find_lsb:
243 case ir_unop_saturate:
244 case ir_unop_subroutine_to_int:
245 for (i = 0; i < vector_elements; i++) {
246 ir_rvalue *op0 = get_element(op_var[0], i);
247
248 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
249 element_type,
250 op0,
251 NULL));
252 }
253 break;
254
255 case ir_binop_add:
256 case ir_binop_sub:
257 case ir_binop_mul:
258 case ir_binop_imul_high:
259 case ir_binop_div:
260 case ir_binop_carry:
261 case ir_binop_borrow:
262 case ir_binop_mod:
263 case ir_binop_min:
264 case ir_binop_max:
265 case ir_binop_pow:
266 case ir_binop_lshift:
267 case ir_binop_rshift:
268 case ir_binop_bit_and:
269 case ir_binop_bit_xor:
270 case ir_binop_bit_or:
271 case ir_binop_logic_and:
272 case ir_binop_logic_xor:
273 case ir_binop_logic_or:
274 case ir_binop_less:
275 case ir_binop_greater:
276 case ir_binop_lequal:
277 case ir_binop_gequal:
278 case ir_binop_equal:
279 case ir_binop_nequal:
280 for (i = 0; i < vector_elements; i++) {
281 ir_rvalue *op0 = get_element(op_var[0], i);
282 ir_rvalue *op1 = get_element(op_var[1], i);
283
284 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
285 element_type,
286 op0,
287 op1));
288 }
289 break;
290
291 case ir_binop_dot: {
292 ir_expression *last = NULL;
293 for (i = 0; i < vector_elements; i++) {
294 ir_rvalue *op0 = get_element(op_var[0], i);
295 ir_rvalue *op1 = get_element(op_var[1], i);
296 ir_expression *temp;
297
298 temp = new(mem_ctx) ir_expression(ir_binop_mul,
299 element_type,
300 op0,
301 op1);
302 if (last) {
303 last = new(mem_ctx) ir_expression(ir_binop_add,
304 element_type,
305 temp,
306 last);
307 } else {
308 last = temp;
309 }
310 }
311 assign(ir, 0, last);
312 break;
313 }
314
315 case ir_binop_all_equal:
316 case ir_binop_any_nequal: {
317 ir_expression *last = NULL;
318 for (i = 0; i < vector_elements; i++) {
319 ir_rvalue *op0 = get_element(op_var[0], i);
320 ir_rvalue *op1 = get_element(op_var[1], i);
321 ir_expression *temp;
322 ir_expression_operation join;
323
324 if (expr->operation == ir_binop_all_equal)
325 join = ir_binop_logic_and;
326 else
327 join = ir_binop_logic_or;
328
329 temp = new(mem_ctx) ir_expression(expr->operation,
330 element_type,
331 op0,
332 op1);
333 if (last) {
334 last = new(mem_ctx) ir_expression(join,
335 element_type,
336 temp,
337 last);
338 } else {
339 last = temp;
340 }
341 }
342 assign(ir, 0, last);
343 break;
344 }
345 case ir_unop_noise:
346 unreachable("noise should have been broken down to function call");
347
348 case ir_binop_ubo_load:
349 case ir_unop_get_buffer_size:
350 unreachable("not yet supported");
351
352 case ir_triop_fma:
353 case ir_triop_lrp:
354 case ir_triop_csel:
355 case ir_triop_bitfield_extract:
356 for (i = 0; i < vector_elements; i++) {
357 ir_rvalue *op0 = get_element(op_var[0], i);
358 ir_rvalue *op1 = get_element(op_var[1], i);
359 ir_rvalue *op2 = get_element(op_var[2], i);
360
361 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
362 element_type,
363 op0,
364 op1,
365 op2));
366 }
367 break;
368
369 case ir_quadop_bitfield_insert:
370 for (i = 0; i < vector_elements; i++) {
371 ir_rvalue *op0 = get_element(op_var[0], i);
372 ir_rvalue *op1 = get_element(op_var[1], i);
373 ir_rvalue *op2 = get_element(op_var[2], i);
374 ir_rvalue *op3 = get_element(op_var[3], i);
375
376 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
377 element_type,
378 op0,
379 op1,
380 op2,
381 op3));
382 }
383 break;
384
385 case ir_unop_pack_snorm_2x16:
386 case ir_unop_pack_snorm_4x8:
387 case ir_unop_pack_unorm_2x16:
388 case ir_unop_pack_unorm_4x8:
389 case ir_unop_pack_half_2x16:
390 case ir_unop_unpack_snorm_2x16:
391 case ir_unop_unpack_snorm_4x8:
392 case ir_unop_unpack_unorm_2x16:
393 case ir_unop_unpack_unorm_4x8:
394 case ir_unop_unpack_half_2x16:
395 case ir_binop_ldexp:
396 case ir_binop_vector_extract:
397 case ir_triop_vector_insert:
398 case ir_quadop_vector:
399 case ir_unop_ssbo_unsized_array_length:
400 unreachable("should have been lowered");
401
402 case ir_unop_unpack_half_2x16_split_x:
403 case ir_unop_unpack_half_2x16_split_y:
404 case ir_binop_pack_half_2x16_split:
405 case ir_unop_interpolate_at_centroid:
406 case ir_binop_interpolate_at_offset:
407 case ir_binop_interpolate_at_sample:
408 unreachable("not reached: expression operates on scalars only");
409
410 case ir_unop_pack_double_2x32:
411 case ir_unop_unpack_double_2x32:
412 case ir_unop_frexp_sig:
413 case ir_unop_frexp_exp:
414 case ir_unop_d2f:
415 case ir_unop_f2d:
416 case ir_unop_d2i:
417 case ir_unop_i2d:
418 case ir_unop_d2u:
419 case ir_unop_u2d:
420 case ir_unop_d2b:
421 unreachable("no fp64 support yet");
422 }
423
424 ir->remove();
425 this->progress = true;
426
427 return visit_continue;
428 }