src/glsl/lower_instructions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_instructions.cpp
  26  *
  27  * Many GPUs lack native instructions for certain expression operations, and
  28  * must replace them with some other expression tree.  This pass lowers some
  29  * of the most common cases, allowing the lowering code to be implemented once
  30  * rather than in each driver backend.
  31  *
  32  * Currently supported transformations:
  33  * - SUB_TO_ADD_NEG
  34  * - DIV_TO_MUL_RCP
  35  * - INT_DIV_TO_MUL_RCP
  36  * - EXP_TO_EXP2
  37  * - POW_TO_EXP2
  38  * - LOG_TO_LOG2
  39  * - MOD_TO_FRACT
  40  * - LRP_TO_ARITH
  41  * - BITFIELD_INSERT_TO_BFM_BFI
  42  *
  43  * SUB_TO_ADD_NEG:
  44  * ---------------
  45  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  46  *
  47  * This simplifies expression reassociation, and for many backends
  48  * there is no subtract operation separate from adding the negation.
  49  * For backends with native subtract operations, they will probably
  50  * want to recognize add(op0, neg(op1)) or the other way around to
  51  * produce a subtract anyway.
  52  *
  53  * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
  54  * --------------------------------------
  55  * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
  56  *
  57  * Many GPUs don't have a divide instruction (945 and 965 included),
  58  * but they do have an RCP instruction to compute an approximate
  59  * reciprocal.  By breaking the operation down, constant reciprocals
  60  * can get constant folded.
  61  *
  62  * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
  63  * handles the integer case, converting to and from floating point so that
  64  * RCP is possible.
  65  *
  66  * EXP_TO_EXP2 and LOG_TO_LOG2:
  67  * ----------------------------
  68  * Many GPUs don't have a base e log or exponent instruction, but they
  69  * do have base 2 versions, so this pass converts exp and log to exp2
  70  * and log2 operations.
  71  *
  72  * POW_TO_EXP2:
  73  * -----------
  74  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  75  * x**y to 2**(y * log2(x)).
  76  *
  77  * MOD_TO_FRACT:
  78  * -------------
  79  * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
  80  *
  81  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  82  * if we have to break it down like this anyway, it gives an
  83  * opportunity to do things like constant fold the (1.0 / op1) easily.
  84  *
  85  * LRP_TO_ARITH:
  86  * -------------
  87  * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
  88  *
  89  * BITFIELD_INSERT_TO_BFM_BFI:
  90  * ---------------------------
  91  * Breaks ir_quadop_bitfield_insert into ir_binop_bfm (bitfield mask) and
  92  * ir_triop_bfi (bitfield insert).
  93  *
  94  * Many GPUs implement the bitfieldInsert() built-in from ARB_gpu_shader_5
  95  * with a pair of instructions.
  96  *
  97  */
  98
  99 #include "main/core.h" /* for M_LOG2E */
 100 #include "glsl_types.h"
 101 #include "ir.h"
 102 #include "ir_builder.h"
 103 #include "ir_optimization.h"
 104
 105 using namespace ir_builder;
 106
 107 class lower_instructions_visitor : public ir_hierarchical_visitor {
 108 public:
 109    lower_instructions_visitor(unsigned lower)
 110       : progress(false), lower(lower) { }
 111
 112    ir_visitor_status visit_leave(ir_expression *);
 113
 114    bool progress;
 115
 116 private:
 117    unsigned lower; /** Bitfield of which operations to lower */
 118
 119    void sub_to_add_neg(ir_expression *);
 120    void div_to_mul_rcp(ir_expression *);
 121    void int_div_to_mul_rcp(ir_expression *);
 122    void mod_to_fract(ir_expression *);
 123    void exp_to_exp2(ir_expression *);
 124    void pow_to_exp2(ir_expression *);
 125    void log_to_log2(ir_expression *);
 126    void lrp_to_arith(ir_expression *);
 127    void bitfield_insert_to_bfm_bfi(ir_expression *);
 128 };
 129
 130 /**
 131  * Determine if a particular type of lowering should occur
 132  */
 133 #define lowering(x) (this->lower & x)
 134
 135 bool
 136 lower_instructions(exec_list *instructions, unsigned what_to_lower)
 137 {
 138    lower_instructions_visitor v(what_to_lower);
 139
 140    visit_list_elements(&v, instructions);
 141    return v.progress;
 142 }
 143
 144 void
 145 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 146 {
 147    ir->operation = ir_binop_add;
 148    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
 149                                            ir->operands[1], NULL);
 150    this->progress = true;
 151 }
 152
 153 void
 154 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 155 {
 156    assert(ir->operands[1]->type->is_float());
 157
 158    /* New expression for the 1.0 / op1 */
 159    ir_rvalue *expr;
 160    expr = new(ir) ir_expression(ir_unop_rcp,
 161                                 ir->operands[1]->type,
 162                                 ir->operands[1]);
 163
 164    /* op0 / op1 -> op0 * (1.0 / op1) */
 165    ir->operation = ir_binop_mul;
 166    ir->operands[1] = expr;
 167
 168    this->progress = true;
 169 }
 170
 171 void
 172 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
 173 {
 174    assert(ir->operands[1]->type->is_integer());
 175
 176    /* Be careful with integer division -- we need to do it as a
 177     * float and re-truncate, since rcp(n > 1) of an integer would
 178     * just be 0.
 179     */
 180    ir_rvalue *op0, *op1;
 181    const struct glsl_type *vec_type;
 182
 183    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 184                                       ir->operands[1]->type->vector_elements,
 185                                       ir->operands[1]->type->matrix_columns);
 186
 187    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
 188       op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
 189    else
 190       op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
 191
 192    op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
 193
 194    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 195                                       ir->operands[0]->type->vector_elements,
 196                                       ir->operands[0]->type->matrix_columns);
 197
 198    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
 199       op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
 200    else
 201       op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 202
 203    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 204                                       ir->type->vector_elements,
 205                                       ir->type->matrix_columns);
 206
 207    op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 208
 209    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
 210       ir->operation = ir_unop_f2i;
 211       ir->operands[0] = op0;
 212    } else {
 213       ir->operation = ir_unop_i2u;
 214       ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
 215    }
 216    ir->operands[1] = NULL;
 217
 218    this->progress = true;
 219 }
 220
 221 void
 222 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
 223 {
 224    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
 225
 226    ir->operation = ir_unop_exp2;
 227    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
 228                                            ir->operands[0], log2_e);
 229    this->progress = true;
 230 }
 231
 232 void
 233 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
 234 {
 235    ir_expression *const log2_x =
 236       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 237                             ir->operands[0]);
 238
 239    ir->operation = ir_unop_exp2;
 240    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
 241                                            ir->operands[1], log2_x);
 242    ir->operands[1] = NULL;
 243    this->progress = true;
 244 }
 245
 246 void
 247 lower_instructions_visitor::log_to_log2(ir_expression *ir)
 248 {
 249    ir->operation = ir_binop_mul;
 250    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 251                                            ir->operands[0], NULL);
 252    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
 253    this->progress = true;
 254 }
 255
 256 void
 257 lower_instructions_visitor::mod_to_fract(ir_expression *ir)
 258 {
 259    ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
 260                                            ir_var_temporary);
 261    this->base_ir->insert_before(temp);
 262
 263    ir_assignment *const assign =
 264       new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
 265                             ir->operands[1], NULL);
 266
 267    this->base_ir->insert_before(assign);
 268
 269    ir_expression *const div_expr =
 270       new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
 271                             ir->operands[0],
 272                             new(ir) ir_dereference_variable(temp));
 273
 274    /* Don't generate new IR that would need to be lowered in an additional
 275     * pass.
 276     */
 277    if (lowering(DIV_TO_MUL_RCP))
 278       div_to_mul_rcp(div_expr);
 279
 280    ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
 281                                            ir->operands[0]->type,
 282                                            div_expr,
 283                                            NULL);
 284
 285    ir->operation = ir_binop_mul;
 286    ir->operands[0] = new(ir) ir_dereference_variable(temp);
 287    ir->operands[1] = expr;
 288    this->progress = true;
 289 }
 290
 291 void
 292 lower_instructions_visitor::lrp_to_arith(ir_expression *ir)
 293 {
 294    /* (lrp x y a) -> x*(1-a) + y*a */
 295
 296    /* Save op2 */
 297    ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, "lrp_factor",
 298                                            ir_var_temporary);
 299    this->base_ir->insert_before(temp);
 300    this->base_ir->insert_before(assign(temp, ir->operands[2]));
 301
 302    ir_constant *one = new(ir) ir_constant(1.0f);
 303
 304    ir->operation = ir_binop_add;
 305    ir->operands[0] = mul(ir->operands[0], sub(one, temp));
 306    ir->operands[1] = mul(ir->operands[1], temp);
 307    ir->operands[2] = NULL;
 308
 309    this->progress = true;
 310 }
 311
 312 void
 313 lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
 314 {
 315    /* Translates
 316     *    ir_quadop_bitfield_insert base insert offset bits
 317     * into
 318     *    ir_triop_bfi (ir_binop_bfm bits offset) insert base
 319     */
 320
 321    ir_rvalue *base_expr = ir->operands[0];
 322
 323    ir->operation = ir_triop_bfi;
 324    ir->operands[0] = new(ir) ir_expression(ir_binop_bfm,
 325                                            ir->type->get_base_type(),
 326                                            ir->operands[3],
 327                                            ir->operands[2]);
 328    /* ir->operands[1] is still the value to insert. */
 329    ir->operands[2] = base_expr;
 330    ir->operands[3] = NULL;
 331
 332    this->progress = true;
 333 }
 334
 335 ir_visitor_status
 336 lower_instructions_visitor::visit_leave(ir_expression *ir)
 337 {
 338    switch (ir->operation) {
 339    case ir_binop_sub:
 340       if (lowering(SUB_TO_ADD_NEG))
 341          sub_to_add_neg(ir);
 342       break;
 343
 344    case ir_binop_div:
 345       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 346          int_div_to_mul_rcp(ir);
 347       else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
 348          div_to_mul_rcp(ir);
 349       break;
 350
 351    case ir_unop_exp:
 352       if (lowering(EXP_TO_EXP2))
 353          exp_to_exp2(ir);
 354       break;
 355
 356    case ir_unop_log:
 357       if (lowering(LOG_TO_LOG2))
 358          log_to_log2(ir);
 359       break;
 360
 361    case ir_binop_mod:
 362       if (lowering(MOD_TO_FRACT) && ir->type->is_float())
 363          mod_to_fract(ir);
 364       break;
 365
 366    case ir_binop_pow:
 367       if (lowering(POW_TO_EXP2))
 368          pow_to_exp2(ir);
 369       break;
 370
 371    case ir_triop_lrp:
 372       if (lowering(LRP_TO_ARITH))
 373          lrp_to_arith(ir);
 374       break;
 375
 376    case ir_quadop_bitfield_insert:
 377       if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
 378          bitfield_insert_to_bfm_bfi(ir);
 379       break;
 380
 381    default:
 382       return visit_continue;
 383    }
 384
 385    return visit_continue;
 386 }