i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (v4)

author Chad Versace <chad.versace@linux.intel.com>

Wed, 9 Jan 2013 19:46:42 +0000 (11:46 -0800)

committer Chad Versace <chad.versace@linux.intel.com>

Fri, 25 Jan 2013 05:31:06 +0000 (21:31 -0800)
author Chad Versace <chad.versace@linux.intel.com>
Wed, 9 Jan 2013 19:46:42 +0000 (11:46 -0800)
committer Chad Versace <chad.versace@linux.intel.com>
Fri, 25 Jan 2013 05:31:06 +0000 (21:31 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h

index e2f1e653b126237259010c1683222dece750ce6c..79cc12f0f661246c598a2e553c0a1f9e572ad25b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -726,6 +726,9 @@ enum opcode {
     FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
     FS_OPCODE_DISCARD_JUMP,
     FS_OPCODE_SET_GLOBAL_OFFSET,
+   FS_OPCODE_PACK_HALF_2x16_SPLIT,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
  
     VS_OPCODE_URB_WRITE,
     VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index b47b0d066c8d2b30487a4d1bbf4dcfde2fdff372..d332502bde084c230365bb0489553777c02c0894 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -542,6 +542,14 @@ private:
                                     struct brw_reg offset);
     void generate_discard_jump(fs_inst *inst);
  
+   void generate_pack_half_2x16_split(fs_inst *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg x,
+                                      struct brw_reg y);
+   void generate_unpack_half_2x16_split(fs_inst *inst,
+                                        struct brw_reg dst,
+                                        struct brw_reg src);
+
     void patch_discard_jumps_to_fb_writes();
  
     struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

index 58521ee6e6d3c3856e09ad3baedd96371b78ccc1..e19da51904952eaa92f817091d02f46252eaaaf6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -342,9 +342,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
        assert(!"not yet supported");
        break;
  
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_half_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
     case ir_quadop_vector:
        assert(!"should have been lowered");
        break;
+
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+      assert("!not reached: expression operates on scalars only");
+      break;
     }
  
     ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp

index 324e6656d1fabc88fa8d4c397a5466c49a02f654..27c5302b9f521ba8c4b5dac479dad7f85f0b73b2 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -922,6 +922,95 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
     brw_pop_insn_state(p);
  }
  
+/**
+ * Change the register's data type from UD to W, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_w(struct brw_reg r)
+{
+   assert(r.type == BRW_REGISTER_TYPE_UD);
+   r.type = BRW_REGISTER_TYPE_W;
+
+   /* The BRW_*_STRIDE enums are defined so that incrementing the field
+    * doubles the real stride.
+    */
+   if (r.hstride != 0)
+      ++r.hstride;
+   if (r.vstride != 0)
+      ++r.vstride;
+
+   return r;
+}
+
+void
+fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+                                            struct brw_reg dst,
+                                            struct brw_reg x,
+                                            struct brw_reg y)
+{
+   assert(intel->gen >= 7);
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+   assert(x.type = BRW_REGISTER_TYPE_F);
+   assert(y.type = BRW_REGISTER_TYPE_F);
+
+   /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
+    *
+    *   Because this instruction does not have a 16-bit floating-point type,
+    *   the destination data type must be Word (W).
+    *
+    *   The destination must be DWord-aligned and specify a horizontal stride
+    *   (HorzStride) of 2. The 16-bit result is stored in the lower word of
+    *   each destination channel and the upper word is not modified.
+    */
+   struct brw_reg dst_w = ud_reg_to_w(dst);
+
+   /* Give each 32-bit channel of dst the form below , where "." means
+    * unchanged.
+    *   0x....hhhh
+    */
+   brw_F32TO16(p, dst_w, y);
+
+   /* Now the form:
+    *   0xhhhh0000
+    */
+   brw_SHL(p, dst, dst, brw_imm_ud(16u));
+
+   /* And, finally the form of packHalf2x16's output:
+    *   0xhhhhllll
+    */
+   brw_F32TO16(p, dst_w, x);
+}
+
+void
+fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+                                              struct brw_reg dst,
+                                              struct brw_reg src)
+{
+   assert(intel->gen >= 7);
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+
+   /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+    *
+    *   Because this instruction does not have a 16-bit floating-point type,
+    *   the source data type must be Word (W). The destination type must be
+    *   F (Float).
+    */
+   struct brw_reg src_w = ud_reg_to_w(src);
+
+   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+    * For the Y case, we wish to access only the upper word; therefore
+    * a 16-bit subregister offset is needed.
+    */
+   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+      src.subnr += 2;
+
+   brw_F16TO32(p, dst, src_w);
+}
+
  void
  fs_generator::generate_code(exec_list *instructions)
  {
@@ -1082,7 +1171,12 @@ fs_generator::generate_code(exec_list *instructions)
        case BRW_OPCODE_SHL:
          brw_SHL(p, dst, src[0], src[1]);
          break;
-
+      case BRW_OPCODE_F32TO16:
+         brw_F32TO16(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_F16TO32:
+         brw_F16TO32(p, dst, src[0]);
+         break;
        case BRW_OPCODE_CMP:
          brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
          break;
@@ -1229,6 +1323,15 @@ fs_generator::generate_code(exec_list *instructions)
           generate_set_global_offset(inst, dst, src[0], src[1]);
           break;
  
+      case FS_OPCODE_PACK_HALF_2x16_SPLIT:
+          generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
+          break;
+
+      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
+      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+         generate_unpack_half_2x16_split(inst, dst, src[0]);
+         break;
+
        default:
          if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
             _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index ebb37fd31138add35449e4b6a62114bf4bf6936f..2b1332f1acc526420e0fab5317858c6ddf75eac4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -538,7 +538,20 @@ fs_visitor::visit(ir_expression *ir)
                    BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
                    this->result, op[0], op[1]);
        break;
-
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_half_2x16:
+   case ir_unop_pack_half_2x16:
+      assert(!"not reached: should be handled by lower_packing_builtins");
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+      emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
+      break;
+   case ir_unop_unpack_half_2x16_split_y:
+      emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
+      break;
     case ir_binop_pow:
        emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
        break;
@@ -566,7 +579,9 @@ fs_visitor::visit(ir_expression *ir)
        else
          inst = emit(SHR(this->result, op[0], op[1]));
        break;
-
+   case ir_binop_pack_half_2x16_split:
+      emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
+      break;
     case ir_binop_ubo_load:
        /* This IR node takes a constant uniform block and a constant or
         * variable byte offset within the block and loads a vector from that.
author	Chad Versace <chad.versace@linux.intel.com>
	Wed, 9 Jan 2013 19:46:42 +0000 (11:46 -0800)
committer	Chad Versace <chad.versace@linux.intel.com>
	Fri, 25 Jan 2013 05:31:06 +0000 (21:31 -0800)
src/mesa/drivers/dri/i965/brw_defines.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_emit.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history