src/glsl/ir_div_to_mul_rcp.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file ir_div_to_mul_rcp.cpp
  26  *
  27  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
  28  *
  29  * Many GPUs don't have a divide instruction (945 and 965 included),
  30  * but they do have an RCP instruction to compute an approximate
  31  * reciprocal.  By breaking the operation down, constant reciprocals
  32  * can get constant folded.
  33  */
  34
  35 #include "ir.h"
  36 #include "glsl_types.h"
  37
  38 class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
  39 public:
  40    ir_div_to_mul_rcp_visitor()
  41    {
  42       this->made_progress = false;
  43    }
  44
  45    ir_visitor_status visit_leave(ir_expression *);
  46
  47    bool made_progress;
  48 };
  49
  50 bool
  51 do_div_to_mul_rcp(exec_list *instructions)
  52 {
  53    ir_div_to_mul_rcp_visitor v;
  54
  55    visit_list_elements(&v, instructions);
  56    return v.made_progress;
  57 }
  58
  59 ir_visitor_status
  60 ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
  61 {
  62    if (ir->operation != ir_binop_div)
  63       return visit_continue;
  64
  65    if (ir->operands[1]->type->base_type != GLSL_TYPE_INT &&
  66        ir->operands[1]->type->base_type != GLSL_TYPE_UINT) {
  67       /* New expression for the 1.0 / op1 */
  68       ir_rvalue *expr;
  69       expr = new(ir) ir_expression(ir_unop_rcp,
  70                                    ir->operands[1]->type,
  71                                    ir->operands[1],
  72                                    NULL);
  73
  74       /* op0 / op1 -> op0 * (1.0 / op1) */
  75       ir->operation = ir_binop_mul;
  76       ir->operands[1] = expr;
  77    } else {
  78       /* Be careful with integer division -- we need to do it as a
  79        * float and re-truncate, since rcp(n > 1) of an integer would
  80        * just be 0.
  81        */
  82       ir_rvalue *op0, *op1;
  83       const struct glsl_type *vec_type;
  84
  85       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
  86                                          ir->operands[1]->type->vector_elements,
  87                                          ir->operands[1]->type->matrix_columns);
  88
  89       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
  90          op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
  91       else
  92          op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
  93
  94       op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
  95
  96       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
  97                                          ir->operands[0]->type->vector_elements,
  98                                          ir->operands[0]->type->matrix_columns);
  99
 100       if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
 101          op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
 102       else
 103          op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 104
 105       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 106
 107       ir->operation = ir_unop_f2i;
 108       ir->operands[0] = op0;
 109       ir->operands[1] = NULL;
 110    }
 111
 112    this->made_progress = true;
 113
 114    return visit_continue;
 115 }