glsl: split DIV_TO_MUL_RCP into single- and double-precision flags

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Mon, 16 Jan 2017 15:39:06 +0000 (16:39 +0100)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Mon, 23 Jan 2017 15:17:19 +0000 (16:17 +0100)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 16 Jan 2017 15:39:06 +0000 (16:39 +0100)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 23 Jan 2017 15:17:19 +0000 (16:17 +0100)
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h

index d46b9579941b30b02bde6d61d81b686cb53bf317..67a7514c7dbc22056856b9ea8e4a9642ee6bc527 100644 (file)
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -30,7 +30,7 @@
  
  /* Operations for lower_instructions() */
  #define SUB_TO_ADD_NEG     0x01
-#define DIV_TO_MUL_RCP     0x02
+#define FDIV_TO_MUL_RCP    0x02
  #define EXP_TO_EXP2        0x04
  #define POW_TO_EXP2        0x08
  #define LOG_TO_LOG2        0x10
@@ -49,6 +49,8 @@
  #define FIND_LSB_TO_FLOAT_CAST    0x20000
  #define FIND_MSB_TO_FLOAT_CAST    0x40000
  #define IMUL_HIGH_TO_MUL          0x80000
+#define DDIV_TO_MUL_RCP           0x100000
+#define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
  
  /* Opertaions for lower_64bit_integer_instructions() */
  #define MUL64                     (1U << 0)
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp

index 9fc83d1583f02e38ab842f34a9c991d42e7199f8..729cb13f84f4a1fd4673e96858155e70e3a59664 100644 (file)
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -54,8 +54,8 @@
   * want to recognize add(op0, neg(op1)) or the other way around to
   * produce a subtract anyway.
   *
- * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
- * --------------------------------------
+ * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
+ * ---------------------------------------------------------
   * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
   *
   * Many GPUs don't have a divide instruction (945 and 965 included),
@@ -63,9 +63,11 @@
   * reciprocal.  By breaking the operation down, constant reciprocals
   * can get constant folded.
   *
- * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
- * handles the integer case, converting to and from floating point so that
- * RCP is possible.
+ * FDIV_TO_MUL_RCP only lowers single-precision floating point division;
+ * DDIV_TO_MUL_RCP only lowers double-precision floating point division.
+ * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
+ * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
+ * point so that RCP is possible.
   *
   * EXP_TO_EXP2 and LOG_TO_LOG2:
   * ----------------------------
@@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
     /* Don't generate new IR that would need to be lowered in an additional
      * pass.
      */
-   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
+   if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
+       (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
        div_to_mul_rcp(div_expr);
  
     ir_expression *const floor_expr =
@@ -1599,8 +1602,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
     case ir_binop_div:
        if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
          int_div_to_mul_rcp(ir);
-      else if ((ir->operands[1]->type->is_float() ||
-                ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
+      else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
+               (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
          div_to_mul_rcp(ir);
        break;
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Mon, 16 Jan 2017 15:39:06 +0000 (16:39 +0100)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Mon, 23 Jan 2017 15:17:19 +0000 (16:17 +0100)
src/compiler/glsl/ir_optimization.h		patch \| blob \| history
src/compiler/glsl/lower_instructions.cpp		patch \| blob \| history