src/glsl/lower_instructions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_instructions.cpp
  26  *
  27  * Many GPUs lack native instructions for certain expression operations, and
  28  * must replace them with some other expression tree.  This pass lowers some
  29  * of the most common cases, allowing the lowering code to be implemented once
  30  * rather than in each driver backend.
  31  *
  32  * Currently supported transformations:
  33  * - SUB_TO_ADD_NEG
  34  * - DIV_TO_MUL_RCP
  35  * - EXP_TO_EXP2
  36  * - LOG_TO_LOG2
  37  * - MOD_TO_FRACT
  38  *
  39  * SUB_TO_ADD_NEG:
  40  * ---------------
  41  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  42  *
  43  * This simplifies expression reassociation, and for many backends
  44  * there is no subtract operation separate from adding the negation.
  45  * For backends with native subtract operations, they will probably
  46  * want to recognize add(op0, neg(op1)) or the other way around to
  47  * produce a subtract anyway.
  48  *
  49  * DIV_TO_MUL_RCP:
  50  * ---------------
  51  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
  52  *
  53  * Many GPUs don't have a divide instruction (945 and 965 included),
  54  * but they do have an RCP instruction to compute an approximate
  55  * reciprocal.  By breaking the operation down, constant reciprocals
  56  * can get constant folded.
  57  *
  58  * EXP_TO_EXP2 and LOG_TO_LOG2:
  59  * ----------------------------
  60  * Many GPUs don't have a base e log or exponent instruction, but they
  61  * do have base 2 versions, so this pass converts exp and log to exp2
  62  * and log2 operations.
  63  *
  64  * MOD_TO_FRACT:
  65  * -------------
  66  * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
  67  *
  68  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  69  * if we have to break it down like this anyway, it gives an
  70  * opportunity to do things like constant fold the (1.0 / op1) easily.
  71  */
  72
  73 #include "main/core.h" /* for M_E */
  74 #include "glsl_types.h"
  75 #include "ir.h"
  76 #include "ir_optimization.h"
  77
  78 class lower_instructions_visitor : public ir_hierarchical_visitor {
  79 public:
  80    lower_instructions_visitor(unsigned lower)
  81       : progress(false), lower(lower) { }
  82
  83    ir_visitor_status visit_leave(ir_expression *);
  84
  85    bool progress;
  86
  87 private:
  88    unsigned lower; /** Bitfield of which operations to lower */
  89
  90    void sub_to_add_neg(ir_expression *);
  91    void div_to_mul_rcp(ir_expression *);
  92    void mod_to_fract(ir_expression *);
  93    void exp_to_exp2(ir_expression *);
  94    void log_to_log2(ir_expression *);
  95 };
  96
  97 /**
  98  * Determine if a particular type of lowering should occur
  99  */
 100 #define lowering(x) (this->lower & x)
 101
 102 bool
 103 lower_instructions(exec_list *instructions, unsigned what_to_lower)
 104 {
 105    lower_instructions_visitor v(what_to_lower);
 106
 107    visit_list_elements(&v, instructions);
 108    return v.progress;
 109 }
 110
 111 void
 112 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 113 {
 114    ir->operation = ir_binop_add;
 115    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
 116                                            ir->operands[1], NULL);
 117    this->progress = true;
 118 }
 119
 120 void
 121 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 122 {
 123    if (!ir->operands[1]->type->is_integer()) {
 124       /* New expression for the 1.0 / op1 */
 125       ir_rvalue *expr;
 126       expr = new(ir) ir_expression(ir_unop_rcp,
 127                                    ir->operands[1]->type,
 128                                    ir->operands[1],
 129                                    NULL);
 130
 131       /* op0 / op1 -> op0 * (1.0 / op1) */
 132       ir->operation = ir_binop_mul;
 133       ir->operands[1] = expr;
 134    } else {
 135       /* Be careful with integer division -- we need to do it as a
 136        * float and re-truncate, since rcp(n > 1) of an integer would
 137        * just be 0.
 138        */
 139       ir_rvalue *op0, *op1;
 140       const struct glsl_type *vec_type;
 141
 142       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 143                                          ir->operands[1]->type->vector_elements,
 144                                          ir->operands[1]->type->matrix_columns);
 145
 146       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
 147          op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
 148       else
 149          op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
 150
 151       op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
 152
 153       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 154                                          ir->operands[0]->type->vector_elements,
 155                                          ir->operands[0]->type->matrix_columns);
 156
 157       if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
 158          op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
 159       else
 160          op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 161
 162       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 163
 164       ir->operation = ir_unop_f2i;
 165       ir->operands[0] = op0;
 166       ir->operands[1] = NULL;
 167    }
 168
 169    this->progress = true;
 170 }
 171
 172 void
 173 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
 174 {
 175    ir_constant *log2_e = new(ir) ir_constant(log2f(M_E));
 176
 177    ir->operation = ir_unop_exp2;
 178    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
 179                                            ir->operands[0], log2_e);
 180    this->progress = true;
 181 }
 182
 183 void
 184 lower_instructions_visitor::log_to_log2(ir_expression *ir)
 185 {
 186    ir->operation = ir_binop_mul;
 187    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 188                                            ir->operands[0], NULL);
 189    ir->operands[1] = new(ir) ir_constant(1.0f / log2f(M_E));
 190    this->progress = true;
 191 }
 192
 193 void
 194 lower_instructions_visitor::mod_to_fract(ir_expression *ir)
 195 {
 196    ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
 197                                            ir_var_temporary);
 198    this->base_ir->insert_before(temp);
 199
 200    ir_assignment *const assign =
 201       new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
 202                             ir->operands[1], NULL);
 203
 204    this->base_ir->insert_before(assign);
 205
 206    ir_expression *const div_expr =
 207       new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
 208                             ir->operands[0],
 209                             new(ir) ir_dereference_variable(temp));
 210
 211    /* Don't generate new IR that would need to be lowered in an additional
 212     * pass.
 213     */
 214    if (lowering(DIV_TO_MUL_RCP))
 215       div_to_mul_rcp(div_expr);
 216
 217    ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
 218                                            ir->operands[0]->type,
 219                                            div_expr,
 220                                            NULL);
 221
 222    ir->operation = ir_binop_mul;
 223    ir->operands[0] = new(ir) ir_dereference_variable(temp);
 224    ir->operands[1] = expr;
 225    this->progress = true;
 226 }
 227
 228 ir_visitor_status
 229 lower_instructions_visitor::visit_leave(ir_expression *ir)
 230 {
 231    switch (ir->operation) {
 232    case ir_binop_sub:
 233       if (lowering(SUB_TO_ADD_NEG))
 234          sub_to_add_neg(ir);
 235       break;
 236
 237    case ir_binop_div:
 238       if (lowering(DIV_TO_MUL_RCP))
 239          div_to_mul_rcp(ir);
 240       break;
 241
 242    case ir_unop_exp:
 243       if (lowering(EXP_TO_EXP2))
 244          exp_to_exp2(ir);
 245       break;
 246
 247    case ir_unop_log:
 248       if (lowering(LOG_TO_LOG2))
 249          log_to_log2(ir);
 250       break;
 251
 252    case ir_binop_mod:
 253       if (lowering(MOD_TO_FRACT))
 254          mod_to_fract(ir);
 255       break;
 256
 257    default:
 258       return visit_continue;
 259    }
 260
 261    return visit_continue;
 262 }