From 3b032732753b18c84482e30dd3675403eec7919f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 28 Nov 2013 15:07:06 -0800 Subject: [PATCH] i965/vec4: Trivial improvements to the with_writemask() function. Add assertion that the register is not in the HW_REG or IMM file, calculate the conjunction of the old and new mask instead of replacing the old [consistent with the behavior of brw_writemask(), causes no functional changes right now], make it static inline to let the compiler do a slightly better job at optimizing things, and shorten its name. v2: Assert that the new writemask is not zero to avoid undefined hardware behaviour. Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_vec4.h | 10 ++++++++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +---------- src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 12 ++++++------ 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 9dc4f0162d1..1e0d8824f17 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -214,8 +214,14 @@ offset(dst_reg reg, unsigned delta) return reg; } -dst_reg -with_writemask(dst_reg const &r, int mask); +static inline dst_reg +writemask(dst_reg reg, unsigned mask) +{ + assert(reg.file != HW_REG && reg.file != IMM); + assert((reg.writemask & mask) != 0); + reg.writemask &= mask; + return reg; +} class vec4_instruction : public backend_instruction { public: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 1448a61f315..ba299be72de 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -929,15 +929,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir) emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ)); } -dst_reg -with_writemask(dst_reg const & r, int mask) -{ - dst_reg result = r; - result.writemask = mask; - return result; -} - - void vec4_visitor::visit(ir_variable *ir) { @@ -2495,7 +2486,7 @@ vec4_visitor::visit(ir_texture *ir) if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && type->sampler_array) { emit_math(SHADER_OPCODE_INT_QUOTIENT, - with_writemask(inst->dst, WRITEMASK_Z), + writemask(inst->dst, WRITEMASK_Z), src_reg(inst->dst), src_reg(6)); } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 6bfbf93e657..c739e41d1d4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -59,8 +59,8 @@ vec4_vs_visitor::emit_prolog() if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); - emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); - emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); + emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); + emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); } emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); @@ -87,9 +87,9 @@ vec4_vs_visitor::emit_prolog() if (es3_normalize_factor.file == BAD_FILE) { /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); - emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ), + emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<9) - 1)))); - emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W), + emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<1) - 1)))); } @@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog() if (normalize_factor.file == BAD_FILE) { /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); - emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ), + emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<10) - 1)))); - emit(MOV(with_writemask(normalize_factor, WRITEMASK_W), + emit(MOV(writemask(normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<2) - 1)))); } -- 2.30.2