From 5dfb085ff325df3dbefda515f06106469babbefc Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 20 Jan 2015 17:09:59 +0100 Subject: [PATCH] glsl: Improve precision of mod(x,y) Currently, Mesa uses the lowering pass MOD_TO_FRACT to implement mod(x,y) as y * fract(x/y). This implementation has a down side though: it introduces precision errors due to the fract() operation. Even worse, since the result of fract() is multiplied by y, the larger y gets the larger the precision error we produce, so for large enough numbers the precision loss is significant. Some examples on i965: Operation Precision error ----------------------------------------------------- mod(-1.951171875, 1.9980468750) 0.0000000447 mod(121.57, 13.29) 0.0000023842 mod(3769.12, 321.99) 0.0000762939 mod(3769.12, 1321.99) 0.0001220703 mod(-987654.125, 123456.984375) 0.0160663128 mod( 987654.125, 123456.984375) 0.0312500000 This patch replaces the current lowering pass with a different one (MOD_TO_FLOOR) that follows the recommended implementation in the GLSL man pages: mod(x,y) = x - y * floor(x/y) This implementation eliminates the precision errors at the expense of an additional add instruction on some systems. On systems that can do negate with multiply-add in a single operation this new implementation would come at no additional cost. v2 (Ian Romanick) - Do not clone operands because when they are expressions we would be duplicating them and that can lead to suboptimal code. Fixes the following 16 dEQP tests: dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.mediump_* dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.highp_* Reviewed-by: Ian Romanick --- src/glsl/README | 2 +- src/glsl/ir_optimization.h | 2 +- src/glsl/lower_instructions.cpp | 65 +++++++++++-------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- src/mesa/program/ir_to_mesa.cpp | 4 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 8 files changed, 47 insertions(+), 34 deletions(-) diff --git a/src/glsl/README b/src/glsl/README index 2f93f12ffa8..bfcf69f903a 100644 --- a/src/glsl/README +++ b/src/glsl/README @@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type: You can then use the new expression from builtins (if all backends would rather see it), or scan the IR and convert to use your new -expression type (see ir_mod_to_fract, for example). +expression type (see ir_mod_to_floor, for example). Q: How is memory management handled in the compiler? diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 34e0b4b947d..912d9103e17 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -34,7 +34,7 @@ #define EXP_TO_EXP2 0x04 #define POW_TO_EXP2 0x08 #define LOG_TO_LOG2 0x10 -#define MOD_TO_FRACT 0x20 +#define MOD_TO_FLOOR 0x20 #define INT_DIV_TO_MUL_RCP 0x40 #define BITFIELD_INSERT_TO_BFM_BFI 0x80 #define LDEXP_TO_ARITH 0x100 diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 684285350d0..09afe556454 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -36,7 +36,7 @@ * - EXP_TO_EXP2 * - POW_TO_EXP2 * - LOG_TO_LOG2 - * - MOD_TO_FRACT + * - MOD_TO_FLOOR * - LDEXP_TO_ARITH * - BITFIELD_INSERT_TO_BFM_BFI * - CARRY_TO_ARITH @@ -77,14 +77,17 @@ * Many older GPUs don't have an x**y instruction. For these GPUs, convert * x**y to 2**(y * log2(x)). * - * MOD_TO_FRACT: + * MOD_TO_FLOOR: * ------------- - * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1)) + * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) * * Many GPUs don't have a MOD instruction (945 and 965 included), and * if we have to break it down like this anyway, it gives an * opportunity to do things like constant fold the (1.0 / op1) easily. * + * Note: before we used to implement this as op1 * fract(op / op1) but this + * implementation had significant precision errors. + * * LDEXP_TO_ARITH: * ------------- * Converts ir_binop_ldexp to arithmetic and bit operations. @@ -136,7 +139,7 @@ private: void sub_to_add_neg(ir_expression *); void div_to_mul_rcp(ir_expression *); void int_div_to_mul_rcp(ir_expression *); - void mod_to_fract(ir_expression *); + void mod_to_floor(ir_expression *); void exp_to_exp2(ir_expression *); void pow_to_exp2(ir_expression *); void log_to_log2(ir_expression *); @@ -276,22 +279,29 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir) } void -lower_instructions_visitor::mod_to_fract(ir_expression *ir) +lower_instructions_visitor::mod_to_floor(ir_expression *ir) { - ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b", - ir_var_temporary); - this->base_ir->insert_before(temp); - - ir_assignment *const assign = - new(ir) ir_assignment(new(ir) ir_dereference_variable(temp), - ir->operands[1], NULL); - - this->base_ir->insert_before(assign); + ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", + ir_var_temporary); + ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", + ir_var_temporary); + this->base_ir->insert_before(x); + this->base_ir->insert_before(y); + + ir_assignment *const assign_x = + new(ir) ir_assignment(new(ir) ir_dereference_variable(x), + ir->operands[0], NULL); + ir_assignment *const assign_y = + new(ir) ir_assignment(new(ir) ir_dereference_variable(y), + ir->operands[1], NULL); + + this->base_ir->insert_before(assign_x); + this->base_ir->insert_before(assign_y); ir_expression *const div_expr = - new(ir) ir_expression(ir_binop_div, ir->operands[0]->type, - ir->operands[0], - new(ir) ir_dereference_variable(temp)); + new(ir) ir_expression(ir_binop_div, x->type, + new(ir) ir_dereference_variable(x), + new(ir) ir_dereference_variable(y)); /* Don't generate new IR that would need to be lowered in an additional * pass. @@ -299,14 +309,17 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir) if (lowering(DIV_TO_MUL_RCP)) div_to_mul_rcp(div_expr); - ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract, - ir->operands[0]->type, - div_expr, - NULL); + ir_expression *const floor_expr = + new(ir) ir_expression(ir_unop_floor, x->type, div_expr); - ir->operation = ir_binop_mul; - ir->operands[0] = new(ir) ir_dereference_variable(temp); - ir->operands[1] = expr; + ir_expression *const mul_expr = + new(ir) ir_expression(ir_binop_mul, + new(ir) ir_dereference_variable(y), + floor_expr); + + ir->operation = ir_binop_sub; + ir->operands[0] = new(ir) ir_dereference_variable(x); + ir->operands[1] = mul_expr; this->progress = true; } @@ -535,8 +548,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) break; case ir_binop_mod: - if (lowering(MOD_TO_FRACT) && ir->type->is_float()) - mod_to_fract(ir); + if (lowering(MOD_TO_FLOOR) && ir->type->is_float()) + mod_to_floor(ir); break; case ir_binop_pow: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index f5d73838763..6cddcf5e7e9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -877,7 +877,7 @@ fs_visitor::visit(ir_expression *ir) break; } case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ + /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ assert(ir->type->is_integer()); emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index be30d976c14..7321665c556 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -157,7 +157,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) ? BITFIELD_INSERT_TO_BFM_BFI : 0; lower_instructions(shader->base.ir, - MOD_TO_FRACT | + MOD_TO_FLOOR | DIV_TO_MUL_RCP | SUB_TO_ADD_NEG | EXP_TO_EXP2 | diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 8b8b27f8e7b..8129118e28d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1522,7 +1522,7 @@ vec4_visitor::visit(ir_expression *ir) break; } case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ + /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ assert(ir->type->is_integer()); emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]); break; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 038f66b6d9e..725e51d63c3 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1152,7 +1152,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by ir_div_to_mul_rcp"); break; case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ + /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ assert(ir->type->is_integer()); emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); break; @@ -2943,7 +2943,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); - lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 2ed7a3bf9b8..b84f39dd183 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5419,7 +5419,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) lower_offset_arrays(ir); do_mat_op_to_vec(ir); lower_instructions(ir, - MOD_TO_FRACT | + MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | -- 2.30.2