glsl: Improve precision of mod(x,y)

author Iago Toral Quiroga <itoral@igalia.com>

Tue, 20 Jan 2015 16:09:59 +0000 (17:09 +0100)

committer Iago Toral Quiroga <itoral@igalia.com>

Tue, 3 Feb 2015 12:19:36 +0000 (13:19 +0100)
author Iago Toral Quiroga <itoral@igalia.com>
Tue, 20 Jan 2015 16:09:59 +0000 (17:09 +0100)
committer Iago Toral Quiroga <itoral@igalia.com>
Tue, 3 Feb 2015 12:19:36 +0000 (13:19 +0100)
diff --git a/src/glsl/README b/src/glsl/README

index 2f93f12ffa8a4b3f355dcaf8115d85ce02fc1ecf..bfcf69f903af13237e82c9bb81c0f5ff4433a046 100644 (file)
--- a/src/glsl/README
+++ b/src/glsl/README
@@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type:
  
  You can then use the new expression from builtins (if all backends
  would rather see it), or scan the IR and convert to use your new
-expression type (see ir_mod_to_fract, for example).
+expression type (see ir_mod_to_floor, for example).
  
  Q: How is memory management handled in the compiler?
  
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h

index 34e0b4b947d1f59531da2a9235c25be6c015c868..912d9103e174e40bf7aeeb9ae1d5781961fb0f94 100644 (file)
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -34,7 +34,7 @@
  #define EXP_TO_EXP2        0x04
  #define POW_TO_EXP2        0x08
  #define LOG_TO_LOG2        0x10
-#define MOD_TO_FRACT       0x20
+#define MOD_TO_FLOOR       0x20
  #define INT_DIV_TO_MUL_RCP 0x40
  #define BITFIELD_INSERT_TO_BFM_BFI 0x80
  #define LDEXP_TO_ARITH     0x100
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp

index 684285350d05a685270b3da79eb9a2d836c76602..09afe556454a662ef8ca4dbba19f32f9e3d0b687 100644 (file)
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -36,7 +36,7 @@
   * - EXP_TO_EXP2
   * - POW_TO_EXP2
   * - LOG_TO_LOG2
- * - MOD_TO_FRACT
+ * - MOD_TO_FLOOR
   * - LDEXP_TO_ARITH
   * - BITFIELD_INSERT_TO_BFM_BFI
   * - CARRY_TO_ARITH
@@ -77,14 +77,17 @@
   * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
   * x**y to 2**(y * log2(x)).
   *
- * MOD_TO_FRACT:
+ * MOD_TO_FLOOR:
   * -------------
- * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
+ * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
   *
   * Many GPUs don't have a MOD instruction (945 and 965 included), and
   * if we have to break it down like this anyway, it gives an
   * opportunity to do things like constant fold the (1.0 / op1) easily.
   *
+ * Note: before we used to implement this as op1 * fract(op / op1) but this
+ * implementation had significant precision errors.
+ *
   * LDEXP_TO_ARITH:
   * -------------
   * Converts ir_binop_ldexp to arithmetic and bit operations.
@@ -136,7 +139,7 @@ private:
     void sub_to_add_neg(ir_expression *);
     void div_to_mul_rcp(ir_expression *);
     void int_div_to_mul_rcp(ir_expression *);
-   void mod_to_fract(ir_expression *);
+   void mod_to_floor(ir_expression *);
     void exp_to_exp2(ir_expression *);
     void pow_to_exp2(ir_expression *);
     void log_to_log2(ir_expression *);
@@ -276,22 +279,29 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
  }
  
  void
-lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+lower_instructions_visitor::mod_to_floor(ir_expression *ir)
  {
-   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
-                                          ir_var_temporary);
-   this->base_ir->insert_before(temp);
-
-   ir_assignment *const assign =
-      new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
-                           ir->operands[1], NULL);
-
-   this->base_ir->insert_before(assign);
+   ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
+                                         ir_var_temporary);
+   ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
+                                         ir_var_temporary);
+   this->base_ir->insert_before(x);
+   this->base_ir->insert_before(y);
+
+   ir_assignment *const assign_x =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
+                            ir->operands[0], NULL);
+   ir_assignment *const assign_y =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
+                            ir->operands[1], NULL);
+
+   this->base_ir->insert_before(assign_x);
+   this->base_ir->insert_before(assign_y);
  
     ir_expression *const div_expr =
-      new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
-                           ir->operands[0],
-                           new(ir) ir_dereference_variable(temp));
+      new(ir) ir_expression(ir_binop_div, x->type,
+                            new(ir) ir_dereference_variable(x),
+                            new(ir) ir_dereference_variable(y));
  
     /* Don't generate new IR that would need to be lowered in an additional
      * pass.
@@ -299,14 +309,17 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
     if (lowering(DIV_TO_MUL_RCP))
        div_to_mul_rcp(div_expr);
  
-   ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
-                                          ir->operands[0]->type,
-                                          div_expr,
-                                          NULL);
+   ir_expression *const floor_expr =
+      new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
  
-   ir->operation = ir_binop_mul;
-   ir->operands[0] = new(ir) ir_dereference_variable(temp);
-   ir->operands[1] = expr;
+   ir_expression *const mul_expr =
+      new(ir) ir_expression(ir_binop_mul,
+                            new(ir) ir_dereference_variable(y),
+                            floor_expr);
+
+   ir->operation = ir_binop_sub;
+   ir->operands[0] = new(ir) ir_dereference_variable(x);
+   ir->operands[1] = mul_expr;
     this->progress = true;
  }
  
@@ -535,8 +548,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
        break;
  
     case ir_binop_mod:
-      if (lowering(MOD_TO_FRACT) && ir->type->is_float())
-        mod_to_fract(ir);
+      if (lowering(MOD_TO_FLOOR) && ir->type->is_float())
+        mod_to_floor(ir);
        break;
  
     case ir_binop_pow:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index f5d73838763d924a52fe3c62cf73ecfcd3413668..6cddcf5e7e97d019562e63eba6090ab3ea65b046 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -877,7 +877,7 @@ fs_visitor::visit(ir_expression *ir)
        break;
     }
     case ir_binop_mod:
-      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
        assert(ir->type->is_integer());
        emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
        break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp

index be30d976c14fd6a5ab8a6b78c92d6c6e7a86813d..7321665c55617c99bead430e23f1ce6d8d4abdb5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -157,7 +157,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
                                    ? BITFIELD_INSERT_TO_BFM_BFI
                                    : 0;
        lower_instructions(shader->base.ir,
-                        MOD_TO_FRACT |
+                        MOD_TO_FLOOR |
                          DIV_TO_MUL_RCP |
                          SUB_TO_ADD_NEG |
                          EXP_TO_EXP2 |
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 8b8b27f8e7b43659dcdbcafb15040ab99e2b62de..8129118e28dca7e3be6ca4c834f9d803b157bcd5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1522,7 +1522,7 @@ vec4_visitor::visit(ir_expression *ir)
        break;
     }
     case ir_binop_mod:
-      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
        assert(ir->type->is_integer());
        emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
        break;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp

index 038f66b6d9ee2c6969fa7e701bd3f48714f4f8e7..725e51d63c3b889d3a31aa7ecc2ad1851004d47e 100644 (file)
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1152,7 +1152,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
        assert(!"not reached: should be handled by ir_div_to_mul_rcp");
        break;
     case ir_binop_mod:
-      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
        assert(ir->type->is_integer());
        emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
        break;
@@ -2943,7 +2943,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  
          /* Lowering */
          do_mat_op_to_vec(ir);
-        lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+        lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
                                  | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
  
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 2ed7a3bf9b8552e8cb37ee508ae826a06d68a39c..b84f39dd183edd451d7dca893b5e0d133ba75214 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5419,7 +5419,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
           lower_offset_arrays(ir);
        do_mat_op_to_vec(ir);
        lower_instructions(ir,
-                         MOD_TO_FRACT |
+                         MOD_TO_FLOOR |
                           DIV_TO_MUL_RCP |
                           EXP_TO_EXP2 |
                           LOG_TO_LOG2 |
author	Iago Toral Quiroga <itoral@igalia.com>
	Tue, 20 Jan 2015 16:09:59 +0000 (17:09 +0100)
committer	Iago Toral Quiroga <itoral@igalia.com>
	Tue, 3 Feb 2015 12:19:36 +0000 (13:19 +0100)
src/glsl/README		patch \| blob \| history
src/glsl/ir_optimization.h		patch \| blob \| history
src/glsl/lower_instructions.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_shader.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp		patch \| blob \| history
src/mesa/program/ir_to_mesa.cpp		patch \| blob \| history
src/mesa/state_tracker/st_glsl_to_tgsi.cpp		patch \| blob \| history