glsl: Use a separate div_to_mul_rcp lowering flag for integers.
authorBryan Cain <bryancain3@gmail.com>
Sat, 27 Aug 2011 23:32:58 +0000 (18:32 -0500)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 31 Aug 2011 19:02:18 +0000 (12:02 -0700)
Using multiply and reciprocal for integer division involves potentially
lossy floating point conversions.  This is okay for older GPUs that
represent integers as floating point, but undesirable for GPUs with
native integer division instructions.

TGSI, for example, has UDIV/IDIV instructions for integer division,
so it makes sense to handle this directly.  Likewise for i965.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Bryan Cain <bryancain3@gmail.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
src/glsl/ir_optimization.h
src/glsl/lower_instructions.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/program/ir_to_mesa.cpp
src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index f7808bdda9ad9fafc39c090ccfcf9c4e8d2f90b6..48448d4a16a92216f80b490d7853e6b09990b729 100644 (file)
  */
 
 /* Operations for lower_instructions() */
-#define SUB_TO_ADD_NEG 0x01
-#define DIV_TO_MUL_RCP 0x02
-#define EXP_TO_EXP2    0x04
-#define POW_TO_EXP2    0x08
-#define LOG_TO_LOG2    0x10
-#define MOD_TO_FRACT   0x20
+#define SUB_TO_ADD_NEG     0x01
+#define DIV_TO_MUL_RCP     0x02
+#define EXP_TO_EXP2        0x04
+#define POW_TO_EXP2        0x08
+#define LOG_TO_LOG2        0x10
+#define MOD_TO_FRACT       0x20
+#define INT_DIV_TO_MUL_RCP 0x40
 
 bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations);
 
index 23aa19bde6f12037161bee46f3b85ad69aadbf66..d79eb0a7ffb0d69f41d5fcef8e9d1674514dcb45 100644 (file)
@@ -32,6 +32,7 @@
  * Currently supported transformations:
  * - SUB_TO_ADD_NEG
  * - DIV_TO_MUL_RCP
+ * - INT_DIV_TO_MUL_RCP
  * - EXP_TO_EXP2
  * - POW_TO_EXP2
  * - LOG_TO_LOG2
@@ -47,8 +48,8 @@
  * want to recognize add(op0, neg(op1)) or the other way around to
  * produce a subtract anyway.
  *
- * DIV_TO_MUL_RCP:
- * ---------------
+ * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
+ * --------------------------------------
  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
  *
  * Many GPUs don't have a divide instruction (945 and 965 included),
  * reciprocal.  By breaking the operation down, constant reciprocals
  * can get constant folded.
  *
+ * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
+ * handles the integer case, converting to and from floating point so that
+ * RCP is possible.
+ *
  * EXP_TO_EXP2 and LOG_TO_LOG2:
  * ----------------------------
  * Many GPUs don't have a base e log or exponent instruction, but they
@@ -95,6 +100,7 @@ private:
 
    void sub_to_add_neg(ir_expression *);
    void div_to_mul_rcp(ir_expression *);
+   void int_div_to_mul_rcp(ir_expression *);
    void mod_to_fract(ir_expression *);
    void exp_to_exp2(ir_expression *);
    void pow_to_exp2(ir_expression *);
@@ -127,60 +133,67 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 void
 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 {
-   if (!ir->operands[1]->type->is_integer()) {
-      /* New expression for the 1.0 / op1 */
-      ir_rvalue *expr;
-      expr = new(ir) ir_expression(ir_unop_rcp,
-                                  ir->operands[1]->type,
-                                  ir->operands[1],
-                                  NULL);
-
-      /* op0 / op1 -> op0 * (1.0 / op1) */
-      ir->operation = ir_binop_mul;
-      ir->operands[1] = expr;
+   assert(ir->operands[1]->type->is_float());
+
+   /* New expression for the 1.0 / op1 */
+   ir_rvalue *expr;
+   expr = new(ir) ir_expression(ir_unop_rcp,
+                               ir->operands[1]->type,
+                               ir->operands[1]);
+
+   /* op0 / op1 -> op0 * (1.0 / op1) */
+   ir->operation = ir_binop_mul;
+   ir->operands[1] = expr;
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
+{
+   assert(ir->operands[1]->type->is_integer());
+
+   /* Be careful with integer division -- we need to do it as a
+    * float and re-truncate, since rcp(n > 1) of an integer would
+    * just be 0.
+    */
+   ir_rvalue *op0, *op1;
+   const struct glsl_type *vec_type;
+
+   vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                     ir->operands[1]->type->vector_elements,
+                                     ir->operands[1]->type->matrix_columns);
+
+   if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
+      op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
+   else
+      op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
+
+   op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
+
+   vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                     ir->operands[0]->type->vector_elements,
+                                     ir->operands[0]->type->matrix_columns);
+
+   if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
+      op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
+   else
+      op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
+
+   vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                     ir->type->vector_elements,
+                                     ir->type->matrix_columns);
+
+   op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
+
+   if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
+      ir->operation = ir_unop_f2i;
+      ir->operands[0] = op0;
    } else {
-      /* Be careful with integer division -- we need to do it as a
-       * float and re-truncate, since rcp(n > 1) of an integer would
-       * just be 0.
-       */
-      ir_rvalue *op0, *op1;
-      const struct glsl_type *vec_type;
-
-      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
-                                        ir->operands[1]->type->vector_elements,
-                                        ir->operands[1]->type->matrix_columns);
-
-      if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
-        op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
-      else
-        op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
-
-      op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
-
-      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
-                                        ir->operands[0]->type->vector_elements,
-                                        ir->operands[0]->type->matrix_columns);
-
-      if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
-        op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
-      else
-        op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
-
-      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
-                                        ir->type->vector_elements,
-                                        ir->type->matrix_columns);
-
-      op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
-
-      if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
-        ir->operation = ir_unop_f2i;
-        ir->operands[0] = op0;
-      } else {
-        ir->operation = ir_unop_i2u;
-        ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
-      }
-      ir->operands[1] = NULL;
+      ir->operation = ir_unop_i2u;
+      ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
    }
+   ir->operands[1] = NULL;
 
    this->progress = true;
 }
@@ -265,7 +278,9 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_div:
-      if (lowering(DIV_TO_MUL_RCP))
+      if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
+        int_div_to_mul_rcp(ir);
+      else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
         div_to_mul_rcp(ir);
       break;
 
index 0a21094708bfc74ba51f8027a96215cb01fd2c3a..a6ed810a16e17a88f99aa4f72f3543d9fcdd737a 100644 (file)
@@ -100,6 +100,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       lower_instructions(shader->ir,
                         MOD_TO_FRACT |
                         DIV_TO_MUL_RCP |
+                        INT_DIV_TO_MUL_RCP |
                         SUB_TO_ADD_NEG |
                         EXP_TO_EXP2 |
                         LOG_TO_LOG2);
index 6820e4c6ba746bc2fad281e6d721c21953059dd1..dd154db8b039ec43255b887513f04e30b006cc8b 100644 (file)
@@ -3232,7 +3232,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
         /* Lowering */
         do_mat_op_to_vec(ir);
         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-                                | LOG_TO_LOG2
+                                | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
                                 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
index 3fbb0cdd248da4022c18e4ac76e4cc68a3957ef7..98bea69b9596ad12e15bbda550e400d326744772 100644 (file)
@@ -4982,7 +4982,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
          /* Lowering */
          do_mat_op_to_vec(ir);
          lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-                                | LOG_TO_LOG2
+                                | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
                                 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;