i965/vec4: Add and use byte-MOV instruction for unpack 4x8.
authorMatt Turner <mattst88@gmail.com>
Thu, 12 Feb 2015 01:42:43 +0000 (01:42 +0000)
committerMatt Turner <mattst88@gmail.com>
Fri, 20 Feb 2015 05:16:44 +0000 (21:16 -0800)
Previously we were using a B/UB source in an Align16 instruction, which
is illegal. It for some reason works on all platforms, except Broadwell.

Cc: "10.5" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86811
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index a597d6bcced2f6426d603e93891f52a35900e5b9..17c27dd49fc51743f11dd5ddba5bd04e6a06de8f 100644 (file)
@@ -911,6 +911,7 @@ enum opcode {
 
    SHADER_OPCODE_URB_WRITE_SIMD8,
 
+   VEC4_OPCODE_MOV_BYTES,
    VEC4_OPCODE_PACK_BYTES,
    VEC4_OPCODE_UNPACK_UNIFORM,
 
index b0e9c82265c591180e1d10f5826fdccc6cfb7bbc..fbb20bc06bb261e07556c37c8e25b92c3625cf96 100644 (file)
@@ -471,6 +471,8 @@ brw_instruction_name(enum opcode op)
    case SHADER_OPCODE_URB_WRITE_SIMD8:
       return "gen8_urb_write_simd8";
 
+   case VEC4_OPCODE_MOV_BYTES:
+      return "mov_bytes";
    case VEC4_OPCODE_PACK_BYTES:
       return "pack_bytes";
    case VEC4_OPCODE_UNPACK_UNIFORM:
index 3d03b6096e3704f5bc1a9cb9ca2407c86709b30b..67c828501882b6e1a6f4fed22c4fdf2b93da1f4d 100644 (file)
@@ -1512,6 +1512,22 @@ vec4_generator::generate_code(const cfg_t *cfg)
          generate_unpack_flags(dst);
          break;
 
+      case VEC4_OPCODE_MOV_BYTES: {
+         /* Moves the low byte from each channel, using an Align1 access mode
+          * and a <4,1,0> source region.
+          */
+         assert(src[0].type == BRW_REGISTER_TYPE_UB ||
+                src[0].type == BRW_REGISTER_TYPE_B);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+         src[0].vstride = BRW_VERTICAL_STRIDE_4;
+         src[0].width = BRW_WIDTH_1;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
+         brw_MOV(p, dst, src[0]);
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
       case VEC4_OPCODE_PACK_BYTES: {
          /* Is effectively:
           *
index be071d74baa2ed1a0db28e3357e804ab57b4f6a0..f6f589d71b4da3f72b5d4df68e660ef38d6fb68e 100644 (file)
@@ -489,7 +489,7 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
 
    shifted.type = BRW_REGISTER_TYPE_UB;
    dst_reg f(this, glsl_type::vec4_type);
-   emit(MOV(f, src_reg(shifted)));
+   emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
 
    emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
 }
@@ -511,7 +511,7 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
 
    shifted.type = BRW_REGISTER_TYPE_B;
    dst_reg f(this, glsl_type::vec4_type);
-   emit(MOV(f, src_reg(shifted)));
+   emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
 
    dst_reg scaled(this, glsl_type::vec4_type);
    emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));