i965: add new VS_OPCODE_UNPACK_FLAGS_SIMD4X2
authorChris Forbes <chrisf@ijw.co.nz>
Wed, 7 Aug 2013 18:31:33 +0000 (06:31 +1200)
committerChris Forbes <chrisf@ijw.co.nz>
Thu, 15 Aug 2013 19:24:38 +0000 (07:24 +1200)
Splits the bottom 8 bits of f0.0 for further wrangling
in a SIMD4x2 program. The 4 bits corresponding to the channels in each
program flow are copied to the LSBs of dst.x visible to each flow.

This is useful for working with clipping flags in the VS.

V3: - Fixup immediate types
    - Teach scheduler about the hidden dep on flags

Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
V2: Reviewed-by: Paul Berry <stereotype441@gmail.com>

src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp

index d8b3b1727cfe2a77c06c51293dca3b79ea61ef0a..2ab0a2b00ebf2bd2abebd1a87e751867434f8ac5 100644 (file)
@@ -798,6 +798,7 @@ enum opcode {
    VS_OPCODE_SCRATCH_WRITE,
    VS_OPCODE_PULL_CONSTANT_LOAD,
    VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+   VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
 };
 
 #define BRW_PREDICATE_NONE             0
index 9a2e8bebfd0ad51cff5baecc7f2c24fd7382341e..6c7e827e89564bfeabf9cf536a26f73822b8cec5 100644 (file)
@@ -494,6 +494,8 @@ brw_instruction_name(enum opcode op)
       return "pull_constant_load";
    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
       return "pull_constant_load_gen7";
+   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
+      return "unpack_flags_simd4x2";
 
    default:
       /* Yes, this leaks.  It's in debug code, it should never occur, and if
index ee14cd82600f907337ccfe086c7533359c8c3c82..8986648793a1247f9998b3edf68f36c701f4f91b 100644 (file)
@@ -188,7 +188,7 @@ public:
 
    bool depends_on_flags()
    {
-      return predicate;
+      return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
    }
 };
 
@@ -592,6 +592,8 @@ private:
                                          struct brw_reg dst,
                                          struct brw_reg surf_index,
                                          struct brw_reg offset);
+   void generate_unpack_flags(vec4_instruction *inst,
+                              struct brw_reg dst);
 
    struct brw_context *brw;
    struct gl_context *ctx;
index c82af0e79a3beeda226c5662fe3bbccde24d2cdd..6ebc318a13d11db1fabb893276ee8557e0300037 100644 (file)
@@ -439,6 +439,25 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
    brw_pop_insn_state(p);
 }
 
+void
+vec4_generator::generate_unpack_flags(vec4_instruction *inst,
+                                      struct brw_reg dst)
+{
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   struct brw_reg flags = brw_flag_reg(0, 0);
+   struct brw_reg dst_0 = suboffset(vec1(dst), 0);
+   struct brw_reg dst_4 = suboffset(vec1(dst), 4);
+
+   brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
+   brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
+   brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));
+
+   brw_pop_insn_state(p);
+}
+
 void
 vec4_generator::generate_scratch_read(vec4_instruction *inst,
                                       struct brw_reg dst,
@@ -851,6 +870,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
       brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
       break;
 
+   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
+      generate_unpack_flags(inst, dst);
+      break;
+
    default:
       if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
          _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",