glsl: Add a pass to lower bitfield-insert into bfm+bfi.
[mesa.git] / src / glsl / lower_instructions.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_instructions.cpp
26 *
27 * Many GPUs lack native instructions for certain expression operations, and
28 * must replace them with some other expression tree. This pass lowers some
29 * of the most common cases, allowing the lowering code to be implemented once
30 * rather than in each driver backend.
31 *
32 * Currently supported transformations:
33 * - SUB_TO_ADD_NEG
34 * - DIV_TO_MUL_RCP
35 * - INT_DIV_TO_MUL_RCP
36 * - EXP_TO_EXP2
37 * - POW_TO_EXP2
38 * - LOG_TO_LOG2
39 * - MOD_TO_FRACT
40 * - LRP_TO_ARITH
41 * - BITFIELD_INSERT_TO_BFM_BFI
42 *
43 * SUB_TO_ADD_NEG:
44 * ---------------
45 * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
46 *
47 * This simplifies expression reassociation, and for many backends
48 * there is no subtract operation separate from adding the negation.
49 * For backends with native subtract operations, they will probably
50 * want to recognize add(op0, neg(op1)) or the other way around to
51 * produce a subtract anyway.
52 *
53 * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
54 * --------------------------------------
55 * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
56 *
57 * Many GPUs don't have a divide instruction (945 and 965 included),
58 * but they do have an RCP instruction to compute an approximate
59 * reciprocal. By breaking the operation down, constant reciprocals
60 * can get constant folded.
61 *
62 * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
63 * handles the integer case, converting to and from floating point so that
64 * RCP is possible.
65 *
66 * EXP_TO_EXP2 and LOG_TO_LOG2:
67 * ----------------------------
68 * Many GPUs don't have a base e log or exponent instruction, but they
69 * do have base 2 versions, so this pass converts exp and log to exp2
70 * and log2 operations.
71 *
72 * POW_TO_EXP2:
73 * -----------
74 * Many older GPUs don't have an x**y instruction. For these GPUs, convert
75 * x**y to 2**(y * log2(x)).
76 *
77 * MOD_TO_FRACT:
78 * -------------
79 * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
80 *
81 * Many GPUs don't have a MOD instruction (945 and 965 included), and
82 * if we have to break it down like this anyway, it gives an
83 * opportunity to do things like constant fold the (1.0 / op1) easily.
84 *
85 * LRP_TO_ARITH:
86 * -------------
87 * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
88 *
89 * BITFIELD_INSERT_TO_BFM_BFI:
90 * ---------------------------
91 * Breaks ir_quadop_bitfield_insert into ir_binop_bfm (bitfield mask) and
92 * ir_triop_bfi (bitfield insert).
93 *
94 * Many GPUs implement the bitfieldInsert() built-in from ARB_gpu_shader_5
95 * with a pair of instructions.
96 *
97 */
98
99 #include "main/core.h" /* for M_LOG2E */
100 #include "glsl_types.h"
101 #include "ir.h"
102 #include "ir_builder.h"
103 #include "ir_optimization.h"
104
105 using namespace ir_builder;
106
107 class lower_instructions_visitor : public ir_hierarchical_visitor {
108 public:
109 lower_instructions_visitor(unsigned lower)
110 : progress(false), lower(lower) { }
111
112 ir_visitor_status visit_leave(ir_expression *);
113
114 bool progress;
115
116 private:
117 unsigned lower; /** Bitfield of which operations to lower */
118
119 void sub_to_add_neg(ir_expression *);
120 void div_to_mul_rcp(ir_expression *);
121 void int_div_to_mul_rcp(ir_expression *);
122 void mod_to_fract(ir_expression *);
123 void exp_to_exp2(ir_expression *);
124 void pow_to_exp2(ir_expression *);
125 void log_to_log2(ir_expression *);
126 void lrp_to_arith(ir_expression *);
127 void bitfield_insert_to_bfm_bfi(ir_expression *);
128 };
129
130 /**
131 * Determine if a particular type of lowering should occur
132 */
133 #define lowering(x) (this->lower & x)
134
135 bool
136 lower_instructions(exec_list *instructions, unsigned what_to_lower)
137 {
138 lower_instructions_visitor v(what_to_lower);
139
140 visit_list_elements(&v, instructions);
141 return v.progress;
142 }
143
144 void
145 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
146 {
147 ir->operation = ir_binop_add;
148 ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
149 ir->operands[1], NULL);
150 this->progress = true;
151 }
152
153 void
154 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
155 {
156 assert(ir->operands[1]->type->is_float());
157
158 /* New expression for the 1.0 / op1 */
159 ir_rvalue *expr;
160 expr = new(ir) ir_expression(ir_unop_rcp,
161 ir->operands[1]->type,
162 ir->operands[1]);
163
164 /* op0 / op1 -> op0 * (1.0 / op1) */
165 ir->operation = ir_binop_mul;
166 ir->operands[1] = expr;
167
168 this->progress = true;
169 }
170
171 void
172 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
173 {
174 assert(ir->operands[1]->type->is_integer());
175
176 /* Be careful with integer division -- we need to do it as a
177 * float and re-truncate, since rcp(n > 1) of an integer would
178 * just be 0.
179 */
180 ir_rvalue *op0, *op1;
181 const struct glsl_type *vec_type;
182
183 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
184 ir->operands[1]->type->vector_elements,
185 ir->operands[1]->type->matrix_columns);
186
187 if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
188 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
189 else
190 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
191
192 op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
193
194 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
195 ir->operands[0]->type->vector_elements,
196 ir->operands[0]->type->matrix_columns);
197
198 if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
199 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
200 else
201 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
202
203 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
204 ir->type->vector_elements,
205 ir->type->matrix_columns);
206
207 op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
208
209 if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
210 ir->operation = ir_unop_f2i;
211 ir->operands[0] = op0;
212 } else {
213 ir->operation = ir_unop_i2u;
214 ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
215 }
216 ir->operands[1] = NULL;
217
218 this->progress = true;
219 }
220
221 void
222 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
223 {
224 ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
225
226 ir->operation = ir_unop_exp2;
227 ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
228 ir->operands[0], log2_e);
229 this->progress = true;
230 }
231
232 void
233 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
234 {
235 ir_expression *const log2_x =
236 new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
237 ir->operands[0]);
238
239 ir->operation = ir_unop_exp2;
240 ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
241 ir->operands[1], log2_x);
242 ir->operands[1] = NULL;
243 this->progress = true;
244 }
245
246 void
247 lower_instructions_visitor::log_to_log2(ir_expression *ir)
248 {
249 ir->operation = ir_binop_mul;
250 ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
251 ir->operands[0], NULL);
252 ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
253 this->progress = true;
254 }
255
256 void
257 lower_instructions_visitor::mod_to_fract(ir_expression *ir)
258 {
259 ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
260 ir_var_temporary);
261 this->base_ir->insert_before(temp);
262
263 ir_assignment *const assign =
264 new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
265 ir->operands[1], NULL);
266
267 this->base_ir->insert_before(assign);
268
269 ir_expression *const div_expr =
270 new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
271 ir->operands[0],
272 new(ir) ir_dereference_variable(temp));
273
274 /* Don't generate new IR that would need to be lowered in an additional
275 * pass.
276 */
277 if (lowering(DIV_TO_MUL_RCP))
278 div_to_mul_rcp(div_expr);
279
280 ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
281 ir->operands[0]->type,
282 div_expr,
283 NULL);
284
285 ir->operation = ir_binop_mul;
286 ir->operands[0] = new(ir) ir_dereference_variable(temp);
287 ir->operands[1] = expr;
288 this->progress = true;
289 }
290
291 void
292 lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
293 {
294 /* (lrp x y a) -> x*(1-a) + y*a */
295
296 /* Save op2 */
297 ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
298 ir_var_temporary);
299 this->base_ir->insert_before(temp);
300 this->base_ir->insert_before(assign(temp, ir->operands[2]));
301
302 ir_constant *one = new(ir) ir_constant(1.0f);
303
304 ir->operation = ir_binop_add;
305 ir->operands[0] = mul(ir->operands[0], sub(one, temp));
306 ir->operands[1] = mul(ir->operands[1], temp);
307 ir->operands[2] = NULL;
308
309 this->progress = true;
310 }
311
312 void
313 lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
314 {
315 /* Translates
316 * ir_quadop_bitfield_insert base insert offset bits
317 * into
318 * ir_triop_bfi (ir_binop_bfm bits offset) insert base
319 */
320
321 ir_rvalue *base_expr = ir->operands[0];
322
323 ir->operation = ir_triop_bfi;
324 ir->operands[0] = new(ir) ir_expression(ir_binop_bfm,
325 ir->type->get_base_type(),
326 ir->operands[3],
327 ir->operands[2]);
328 /* ir->operands[1] is still the value to insert. */
329 ir->operands[2] = base_expr;
330 ir->operands[3] = NULL;
331
332 this->progress = true;
333 }
334
335 ir_visitor_status
336 lower_instructions_visitor::visit_leave(ir_expression *ir)
337 {
338 switch (ir->operation) {
339 case ir_binop_sub:
340 if (lowering(SUB_TO_ADD_NEG))
341 sub_to_add_neg(ir);
342 break;
343
344 case ir_binop_div:
345 if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
346 int_div_to_mul_rcp(ir);
347 else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
348 div_to_mul_rcp(ir);
349 break;
350
351 case ir_unop_exp:
352 if (lowering(EXP_TO_EXP2))
353 exp_to_exp2(ir);
354 break;
355
356 case ir_unop_log:
357 if (lowering(LOG_TO_LOG2))
358 log_to_log2(ir);
359 break;
360
361 case ir_binop_mod:
362 if (lowering(MOD_TO_FRACT) && ir->type->is_float())
363 mod_to_fract(ir);
364 break;
365
366 case ir_binop_pow:
367 if (lowering(POW_TO_EXP2))
368 pow_to_exp2(ir);
369 break;
370
371 case ir_triop_lrp:
372 if (lowering(LRP_TO_ARITH))
373 lrp_to_arith(ir);
374 break;
375
376 case ir_quadop_bitfield_insert:
377 if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
378 bitfield_insert_to_bfm_bfi(ir);
379 break;
380
381 default:
382 return visit_continue;
383 }
384
385 return visit_continue;
386 }