i965/vec4: Trivial improvements to the with_writemask() function.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 28 Nov 2013 23:07:06 +0000 (15:07 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 19 Feb 2014 15:27:25 +0000 (16:27 +0100)
Add assertion that the register is not in the HW_REG or IMM file,
calculate the conjunction of the old and new mask instead of replacing
the old [consistent with the behavior of brw_writemask(), causes no
functional changes right now], make it static inline to let the
compiler do a slightly better job at optimizing things, and shorten
its name.

v2: Assert that the new writemask is not zero to avoid undefined
    hardware behaviour.

Reviewed-by: Paul Berry <stereotype441@gmail.com>
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp

index 9dc4f0162d16b0835383000f9f7b7474411461c8..1e0d8824f17cef610c0ade170e2b418540bd2e74 100644 (file)
@@ -214,8 +214,14 @@ offset(dst_reg reg, unsigned delta)
    return reg;
 }
 
-dst_reg
-with_writemask(dst_reg const &r, int mask);
+static inline dst_reg
+writemask(dst_reg reg, unsigned mask)
+{
+   assert(reg.file != HW_REG && reg.file != IMM);
+   assert((reg.writemask & mask) != 0);
+   reg.writemask &= mask;
+   return reg;
+}
 
 class vec4_instruction : public backend_instruction {
 public:
index 1448a61f31500872a8add102aa654e5601a5057a..ba299be72de877172a1d48cf35c05dd58839bb14 100644 (file)
@@ -929,15 +929,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
    emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
 }
 
-dst_reg
-with_writemask(dst_reg const & r, int mask)
-{
-   dst_reg result = r;
-   result.writemask = mask;
-   return result;
-}
-
-
 void
 vec4_visitor::visit(ir_variable *ir)
 {
@@ -2495,7 +2486,7 @@ vec4_visitor::visit(ir_texture *ir)
       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
           type->sampler_array) {
          emit_math(SHADER_OPCODE_INT_QUOTIENT,
-                   with_writemask(inst->dst, WRITEMASK_Z),
+                   writemask(inst->dst, WRITEMASK_Z),
                    src_reg(inst->dst), src_reg(6));
       }
    }
index 6bfbf93e6577629644c6907af576f31b1edd532d..c739e41d1d44aeaafcd19ef0e20f13278fb4d7f6 100644 (file)
@@ -59,8 +59,8 @@ vec4_vs_visitor::emit_prolog()
             if (sign_recovery_shift.file == BAD_FILE) {
                /* shift constant: <22,22,22,30> */
                sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
-               emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
-               emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
+               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
+               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
             }
 
             emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
@@ -87,9 +87,9 @@ vec4_vs_visitor::emit_prolog()
                if (es3_normalize_factor.file == BAD_FILE) {
                   /* mul constant: 1 / (2^(b-1) - 1) */
                   es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
-                  emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
+                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
                            src_reg(1.0f / ((1<<9) - 1))));
-                  emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
+                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
                            src_reg(1.0f / ((1<<1) - 1))));
                }
 
@@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog()
                if (normalize_factor.file == BAD_FILE) {
                   /* 1 / (2^b - 1) for b=<10,10,10,2> */
                   normalize_factor = dst_reg(this, glsl_type::vec4_type);
-                  emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
+                  emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
                            src_reg(1.0f / ((1<<10) - 1))));
-                  emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
+                  emit(MOV(writemask(normalize_factor, WRITEMASK_W),
                            src_reg(1.0f / ((1<<2) - 1))));
                }