From 2dfbbeca50b95ccdd714d9baa4411c779f6a20d9 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 28 Mar 2014 15:28:31 +0200 Subject: [PATCH] i965/vec4: Change vec4_visitor::emit_lrp to use MAC for gen<6 This allows us to emit ADD/MUL/MAC instead of MUL/ADD/MUL/ADD, saving one instruction and two temporary registers. Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Juha-Pekka Heikkila --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 059dc73adcc..8fa0aee996b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1165,24 +1165,14 @@ vec4_visitor::emit_lrp(const dst_reg &dst, } else { /* Earlier generations don't support three source operations, so we * need to emit x*(1-a) + y*a. - * - * A better way to do this would be: - * ADD one_minus_a, negate(a), 1.0f - * MUL null, y, a - * MAC dst, x, one_minus_a - * but we would need to support MAC and implicit accumulator. */ - dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); - dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); - dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); - y_times_a.writemask = dst.writemask; - one_minus_a.writemask = dst.writemask; - x_times_one_minus_a.writemask = dst.writemask; - - emit(MUL(y_times_a, y, a)); + dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); + one_minus_a.writemask = dst.writemask; + emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); + vec4_instruction *mul = emit(MUL(dst_null_f(), y, a)); + mul->writes_accumulator = true; + emit(MAC(dst, x, src_reg(one_minus_a))); } } -- 2.30.2