intel: Change the register type from UW to UD in blorp engine

author Anuj Phogat <anuj.phogat@gmail.com>

Wed, 8 May 2013 00:06:23 +0000 (17:06 -0700)

committer Anuj Phogat <anuj.phogat@gmail.com>

Thu, 30 May 2013 17:50:29 +0000 (10:50 -0700)
author Anuj Phogat <anuj.phogat@gmail.com>
Wed, 8 May 2013 00:06:23 +0000 (17:06 -0700)
committer Anuj Phogat <anuj.phogat@gmail.com>
Thu, 30 May 2013 17:50:29 +0000 (10:50 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h

index 891508074b61d45c421d55642044366cd414ab3e..70e3933e11a6b84ab6f6702c172310d55564e233 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -161,22 +161,19 @@ struct brw_blorp_coord_transform_params
     void setup(GLuint src0, GLuint dst0, GLuint dst1,
                bool mirror);
  
-   int16_t multiplier;
-   int16_t offset;
+   int32_t multiplier;
+   int32_t offset;
  };
  
  
  struct brw_blorp_wm_push_constants
  {
-   uint16_t dst_x0;
-   uint16_t dst_x1;
-   uint16_t dst_y0;
-   uint16_t dst_y1;
+   uint32_t dst_x0;
+   uint32_t dst_x1;
+   uint32_t dst_y0;
+   uint32_t dst_y1;
     brw_blorp_coord_transform_params x_transform;
     brw_blorp_coord_transform_params y_transform;
-
-   /* Pad out to an integral number of registers */
-   uint16_t pad[8];
  };
  
  /* Every 32 bytes of push constant data constitutes one GEN register. */
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp

index c3ef054b2b4a812a5ce88ff7b0a58cb951a9517d..60837cae765de9351c9e72fffcdfbce962910353 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -590,13 +590,12 @@ private:
     void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
     void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
     void kill_if_outside_dst_rect();
-   void translate_dst_to_src();
+   void translate_dst_to_src(unsigned intel_gen);
     void single_to_blend();
     void manual_blend(unsigned num_samples);
     void sample(struct brw_reg dst);
     void texel_fetch(struct brw_reg dst);
     void mcs_fetch();
-   void expand_to_32_bits(struct brw_reg src, struct brw_reg dst);
     void texture_lookup(struct brw_reg dst, GLuint msg_type,
                         const sampler_message_arg *args, int num_args);
     void render_target_write();
@@ -773,7 +772,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
        kill_if_outside_dst_rect();
  
     /* Next, apply a translation to obtain coordinates in the source image. */
-   translate_dst_to_src();
+   translate_dst_to_src(brw->intel.gen);
  
     /* If the source image is not multisampled, then we want to fetch sample
      * number 0, because that's the only sample there is.
@@ -845,7 +844,7 @@ brw_blorp_blit_program::alloc_push_const_regs(int base_reg)
  #define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)
  #define ALLOC_REG(name) \
     this->name = \
-      brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 2)
+      brw_ud1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 4)
  
     ALLOC_REG(dst_x0);
     ALLOC_REG(dst_x1);
@@ -875,17 +874,23 @@ brw_blorp_blit_program::alloc_regs()
     }
     this->mcs_data =
        retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;
+
     for (int i = 0; i < 2; ++i) {
        this->x_coords[i]
-         = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
+         = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+      reg += 2;
        this->y_coords[i]
-         = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
+         = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+      reg += 2;
     }
     this->xy_coord_index = 0;
     this->sample_index
-      = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
-   this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
-   this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));
+      = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+   reg += 2;
+   this->t1 = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+   reg += 2;
+   this->t2 = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+   reg += 2;
  
     /* Make sure we didn't run out of registers */
     assert(reg <= GEN7_MRF_HACK_START);
@@ -942,7 +947,8 @@ brw_blorp_blit_program::compute_frag_coords()
      * Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the
      * result, since pixels n+1 and n+3 are in the right half of the subspan.
      */
-   brw_ADD(&func, X, stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
+   brw_ADD(&func, vec16(retype(X, BRW_REGISTER_TYPE_UW)),
+           stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
  
     /* Similarly, Y coordinates for subspans come from R1.2[31:16] through
      * R1.5[31:16], so to get pixel Y coordinates we need to start at the 5th
@@ -952,11 +958,17 @@ brw_blorp_blit_program::compute_frag_coords()
      * And we need to add the repeating sequence (0, 0, 1, 1, ...), since
      * pixels n+2 and n+3 are in the bottom half of the subspan.
      */
-   brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
+   brw_ADD(&func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
+           stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
+
+   /* Move the coordinates to UD registers. */
+   brw_MOV(&func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
+   brw_MOV(&func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
+   SWAP_XY_AND_XPYP();
  
     if (key->persample_msaa_dispatch) {
        switch (key->rt_samples) {
-      case 4:
+      case 4: {
           /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 4.
            * Therefore, subspan 0 will represent sample 0, subspan 1 will
            * represent sample 1, and so on.
@@ -966,9 +978,13 @@ brw_blorp_blit_program::compute_frag_coords()
            * populate a temporary variable with the sequence (0, 1, 2, 3), and
            * then copy from it using vstride=1, width=4, hstride=0.
            */
-         brw_MOV(&func, t1, brw_imm_v(0x3210));
-         brw_MOV(&func, S, stride(t1, 1, 4, 0));
+         struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);
+         brw_MOV(&func, vec16(t1_uw1), brw_imm_v(0x3210));
+         /* Move to UD sample_index register. */
+         brw_MOV(&func, S, stride(t1_uw1, 1, 4, 0));
+         brw_MOV(&func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
           break;
+      }
        case 8: {
           /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 8.
            * Therefore, subspan 0 will represent sample N (where N is 0 or 4),
@@ -984,12 +1000,16 @@ brw_blorp_blit_program::compute_frag_coords()
            * using vstride=1, width=4, hstride=0.
            */
           struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));
+         struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
           struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
           brw_AND(&func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
           brw_SHR(&func, t1_ud1, t1_ud1, brw_imm_ud(5));
-         brw_MOV(&func, t2, brw_imm_v(0x3210));
-         brw_ADD(&func, S, retype(t1_ud1, BRW_REGISTER_TYPE_UW),
-                 stride(t2, 1, 4, 0));
+         brw_MOV(&func, vec16(t2_uw1), brw_imm_v(0x3210));
+         brw_ADD(&func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+                 stride(t2_uw1, 1, 4, 0));
+         brw_ADD(&func, offset(S, 1),
+                 retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+                 suboffset(stride(t2_uw1, 1, 4, 0), 2));
           break;
        }
        default:
@@ -1031,6 +1051,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
      */
     assert(s_is_zero);
  
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
     if (new_tiled_w) {
        /* Given X and Y coordinates that describe an address using Y tiling,
         * translate to the X and Y coordinates that describe the same address
@@ -1100,6 +1121,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
        brw_OR(&func, Yp, t1, t2);
        SWAP_XY_AND_XPYP();
     }
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  }
  
  /**
@@ -1116,6 +1138,7 @@ void
  brw_blorp_blit_program::encode_msaa(unsigned num_samples,
                                      intel_msaa_layout layout)
  {
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
     switch (layout) {
     case INTEL_MSAA_LAYOUT_NONE:
        /* No translation necessary, and S should already be zero. */
@@ -1187,6 +1210,7 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples,
        s_is_zero = true;
        break;
     }
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  }
  
  /**
@@ -1203,6 +1227,7 @@ void
  brw_blorp_blit_program::decode_msaa(unsigned num_samples,
                                      intel_msaa_layout layout)
  {
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
     switch (layout) {
     case INTEL_MSAA_LAYOUT_NONE:
        /* No translation necessary, and S should already be zero. */
@@ -1265,6 +1290,7 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples,
        SWAP_XY_AND_XPYP();
        break;
     }
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
  }
  
  /**
@@ -1277,12 +1303,12 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()
  {
     struct brw_reg f0 = brw_flag_reg(0, 0);
     struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-   struct brw_reg null16 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
+   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
  
-   brw_CMP(&func, null16, BRW_CONDITIONAL_GE, X, dst_x0);
-   brw_CMP(&func, null16, BRW_CONDITIONAL_GE, Y, dst_y0);
-   brw_CMP(&func, null16, BRW_CONDITIONAL_L, X, dst_x1);
-   brw_CMP(&func, null16, BRW_CONDITIONAL_L, Y, dst_y1);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_GE, X, dst_x0);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_GE, Y, dst_y0);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_L, X, dst_x1);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_L, Y, dst_y1);
  
     brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
     brw_push_insn_state(&func);
@@ -1296,12 +1322,28 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()
   * coordinates.
   */
  void
-brw_blorp_blit_program::translate_dst_to_src()
+brw_blorp_blit_program::translate_dst_to_src(unsigned intel_gen)
  {
-   brw_MUL(&func, Xp, X, x_transform.multiplier);
-   brw_MUL(&func, Yp, Y, y_transform.multiplier);
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+   /* For mul instruction:
+    * On SNB when both src0 and src1 are of type D or UD, only the low 16 bits
+    * of each element of src0 are used.
+    * On IVB when both src0 and src1 are of type D or UD, only the low 16 bits
+    * of each element of src1 are used.
+    * multiplier can be positive or negative. So keep the multiplier in a src
+    * register which don't get truncated during multiplication.
+    */
+   if (intel_gen == 6) {
+      brw_MUL(&func, Xp, X, x_transform.multiplier);
+      brw_MUL(&func, Yp, Y, y_transform.multiplier);
+   }
+   else {
+      brw_MUL(&func, Xp, x_transform.multiplier, X);
+      brw_MUL(&func, Yp, y_transform.multiplier, Y);
+   }
     brw_ADD(&func, Xp, Xp, x_transform.offset);
     brw_ADD(&func, Yp, Yp, y_transform.offset);
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
     SWAP_XY_AND_XPYP();
  }
  
@@ -1318,10 +1360,12 @@ brw_blorp_blit_program::single_to_blend()
      * that maxe up a pixel).  So we need to multiply our X and Y coordinates
      * each by 2 and then add 1.
      */
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
     brw_SHL(&func, t1, X, brw_imm_w(1));
     brw_SHL(&func, t2, Y, brw_imm_w(1));
     brw_ADD(&func, Xp, t1, brw_imm_w(1));
     brw_ADD(&func, Yp, t2, brw_imm_w(1));
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
     SWAP_XY_AND_XPYP();
  }
  
@@ -1394,7 +1438,7 @@ brw_blorp_blit_program::manual_blend(unsigned num_samples)
           s_is_zero = true;
        } else {
           s_is_zero = false;
-         brw_MOV(&func, S, brw_imm_uw(i));
+         brw_MOV(&func, vec16(S), brw_imm_ud(i));
        }
        texel_fetch(texture_data[stack_depth++]);
  
@@ -1545,16 +1589,6 @@ brw_blorp_blit_program::mcs_fetch()
                    gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args));
  }
  
-void
-brw_blorp_blit_program::expand_to_32_bits(struct brw_reg src,
-                                          struct brw_reg dst)
-{
-   brw_MOV(&func, vec8(dst), vec8(src));
-   brw_set_compression_control(&func, BRW_COMPRESSION_2NDHALF);
-   brw_MOV(&func, offset(vec8(dst), 1), suboffset(vec8(src), 8));
-   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
-}
-
  void
  brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
                                         GLuint msg_type,
@@ -1566,16 +1600,16 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
     for (int arg = 0; arg < num_args; ++arg) {
        switch (args[arg]) {
        case SAMPLER_MESSAGE_ARG_U_FLOAT:
-         expand_to_32_bits(X, retype(mrf, BRW_REGISTER_TYPE_F));
+         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
           break;
        case SAMPLER_MESSAGE_ARG_V_FLOAT:
-         expand_to_32_bits(Y, retype(mrf, BRW_REGISTER_TYPE_F));
+         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
           break;
        case SAMPLER_MESSAGE_ARG_U_INT:
-         expand_to_32_bits(X, mrf);
+         brw_MOV(&func, mrf, X);
           break;
        case SAMPLER_MESSAGE_ARG_V_INT:
-         expand_to_32_bits(Y, mrf);
+         brw_MOV(&func, mrf, Y);
           break;
        case SAMPLER_MESSAGE_ARG_SI_INT:
           /* Note: on Gen7, this code may be reached with s_is_zero==true
@@ -1586,7 +1620,7 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
           if (s_is_zero)
              brw_MOV(&func, mrf, brw_imm_ud(0));
           else
-            expand_to_32_bits(S, mrf);
+            brw_MOV(&func, mrf, S);
           break;
        case SAMPLER_MESSAGE_ARG_MCS_INT:
           switch (key->tex_layout) {
@@ -1614,7 +1648,7 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
     }
  
     brw_SAMPLE(&func,
-              retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
+              retype(dst, BRW_REGISTER_TYPE_F) /* dest */,
                base_mrf /* msg_reg_nr */,
                brw_message_reg(base_mrf) /* src0 */,
                BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
@@ -1685,7 +1719,7 @@ brw_blorp_coord_transform_params::setup(GLuint src0, GLuint dst0, GLuint dst1,
         *   x' = 1*x + (src_x0 - dst_x0)
         */
        multiplier = 1;
-      offset = src0 - dst0;
+      offset = (int) (src0 - dst0);
     } else {
        /* When mirroring X we need:
         *   x' - src_x0 = dst_x1 - x - 1
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h

index 9ac25442eec03169521ed288e7783d9e38c2f30b..972ccf6bc42bbbd2cd261de9f3208594dd3452b5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -344,6 +344,13 @@ brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
     return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
  }
  
+/** Construct unsigned dword[1] register */
+static inline struct brw_reg
+brw_ud1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UD), subnr);
+}
+
  static inline struct brw_reg
  brw_imm_reg(unsigned type)
  {
author	Anuj Phogat <anuj.phogat@gmail.com>
	Wed, 8 May 2013 00:06:23 +0000 (17:06 -0700)
committer	Anuj Phogat <anuj.phogat@gmail.com>
	Thu, 30 May 2013 17:50:29 +0000 (10:50 -0700)
src/mesa/drivers/dri/i965/brw_blorp.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_reg.h		patch \| blob \| history