r300/compiler: Clear empty registers after constant folding
[mesa.git] / src / glsl / ir_div_to_mul_rcp.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file ir_div_to_mul_rcp.cpp
26 *
27 * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
28 *
29 * Many GPUs don't have a divide instruction (945 and 965 included),
30 * but they do have an RCP instruction to compute an approximate
31 * reciprocal. By breaking the operation down, constant reciprocals
32 * can get constant folded.
33 */
34
35 #include "ir.h"
36 #include "glsl_types.h"
37
38 class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
39 public:
40 ir_div_to_mul_rcp_visitor()
41 {
42 this->made_progress = false;
43 }
44
45 ir_visitor_status visit_leave(ir_expression *);
46
47 bool made_progress;
48 };
49
50 bool
51 do_div_to_mul_rcp(exec_list *instructions)
52 {
53 ir_div_to_mul_rcp_visitor v;
54
55 visit_list_elements(&v, instructions);
56 return v.made_progress;
57 }
58
59 ir_visitor_status
60 ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
61 {
62 if (ir->operation != ir_binop_div)
63 return visit_continue;
64
65 if (ir->operands[1]->type->base_type != GLSL_TYPE_INT &&
66 ir->operands[1]->type->base_type != GLSL_TYPE_UINT) {
67 /* New expression for the 1.0 / op1 */
68 ir_rvalue *expr;
69 expr = new(ir) ir_expression(ir_unop_rcp,
70 ir->operands[1]->type,
71 ir->operands[1],
72 NULL);
73
74 /* op0 / op1 -> op0 * (1.0 / op1) */
75 ir->operation = ir_binop_mul;
76 ir->operands[1] = expr;
77 } else {
78 /* Be careful with integer division -- we need to do it as a
79 * float and re-truncate, since rcp(n > 1) of an integer would
80 * just be 0.
81 */
82 ir_rvalue *op0, *op1;
83 const struct glsl_type *vec_type;
84
85 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
86 ir->operands[1]->type->vector_elements,
87 ir->operands[1]->type->matrix_columns);
88
89 if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
90 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
91 else
92 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
93
94 op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
95
96 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
97 ir->operands[0]->type->vector_elements,
98 ir->operands[0]->type->matrix_columns);
99
100 if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
101 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
102 else
103 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
104
105 op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
106
107 ir->operation = ir_unop_f2i;
108 ir->operands[0] = op0;
109 ir->operands[1] = NULL;
110 }
111
112 this->made_progress = true;
113
114 return visit_continue;
115 }