i965/gs: Add GS_OPCODE_SET_VERTEX_COUNT.
authorPaul Berry <stereotype441@gmail.com>
Sat, 23 Mar 2013 15:18:43 +0000 (08:18 -0700)
committerPaul Berry <stereotype441@gmail.com>
Fri, 23 Aug 2013 18:03:27 +0000 (11:03 -0700)
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp

index ff270da1536138d49608cc630e339fdaa4dba581..2e1285fec0adb60b3143891f1777e4dccb4c6f3f 100644 (file)
@@ -833,6 +833,16 @@ enum opcode {
     * vec4_instruction::offset.
     */
    GS_OPCODE_SET_WRITE_OFFSET,
+
+   /**
+    * Set the "GS Number of Output Vertices for Slot {0,1}" fields of a
+    * URB_WRITE message header.
+    *
+    * - dst is the MRF containing the message header.
+    *
+    * - src0.x is the vertex count.  The upper 16 bits will be ignored.
+    */
+   GS_OPCODE_SET_VERTEX_COUNT,
 };
 
 #define BRW_PREDICATE_NONE             0
index e5d939af24f90bd71634f7e1b379103ccad37257..cec2d6082097c4b3acdd94acbf663a7db9bd3da9 100644 (file)
@@ -503,6 +503,8 @@ brw_instruction_name(enum opcode op)
       return "gs_thread_end";
    case GS_OPCODE_SET_WRITE_OFFSET:
       return "set_write_offset";
+   case GS_OPCODE_SET_VERTEX_COUNT:
+      return "set_vertex_count";
 
    default:
       /* Yes, this leaks.  It's in debug code, it should never occur, and if
index 484e5787e5dbe0e2dba31de73e4f29773366f5c2..730d6b728331259cc9a775fb9ad1710778b7a6d2 100644 (file)
@@ -633,6 +633,8 @@ private:
    void generate_gs_set_write_offset(struct brw_reg dst,
                                      struct brw_reg src0,
                                      struct brw_reg src1);
+   void generate_gs_set_vertex_count(struct brw_reg dst,
+                                     struct brw_reg src);
    void generate_oword_dual_block_offsets(struct brw_reg m1,
                                          struct brw_reg index);
    void generate_scratch_write(vec4_instruction *inst,
index c487ac85a230e44d75c2030414f7215396af843e..11eeca144f54ceea957231c848eacccfd70ebd29 100644 (file)
@@ -474,6 +474,33 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
    brw_pop_insn_state(p);
 }
 
+void
+vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
+                                             struct brw_reg src)
+{
+   brw_push_insn_state(p);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+   /* If we think of the src and dst registers as composed of 8 DWORDs each,
+    * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
+    * them to WORDs, and then pack them into DWORD 2 of dst.
+    *
+    * It's easier to get the EU to do this if we think of the src and dst
+    * registers as composed of 16 WORDS each; then, we want to pick up the
+    * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 of
+    * dst.
+    *
+    * We can do that by the following EU instruction:
+    *
+    *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
+    */
+   brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
+           stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_pop_insn_state(p);
+}
+
 void
 vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
                                                   struct brw_reg index)
@@ -954,6 +981,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
       generate_gs_set_write_offset(dst, src[0], src[1]);
       break;
 
+   case GS_OPCODE_SET_VERTEX_COUNT:
+      generate_gs_set_vertex_count(dst, src[0]);
+      break;
+
    case SHADER_OPCODE_SHADER_TIME_ADD:
       brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
       mark_surface_used(SURF_INDEX_VS_SHADER_TIME);