i965 gs: Clean up dodgy register re-use, at the cost of a few MOVs.

author Paul Berry <stereotype441@gmail.com>

Tue, 29 Nov 2011 22:54:02 +0000 (14:54 -0800)

committer Paul Berry <stereotype441@gmail.com>

Thu, 8 Dec 2011 00:38:01 +0000 (16:38 -0800)
author Paul Berry <stereotype441@gmail.com>
Tue, 29 Nov 2011 22:54:02 +0000 (14:54 -0800)
committer Paul Berry <stereotype441@gmail.com>
Thu, 8 Dec 2011 00:38:01 +0000 (16:38 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h

index 12889a62e6b2fe80d6c2e0adabe02e98b81a3ce4..93448a77f088f7418ade7e80685a92bee3e55c8f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -60,6 +60,7 @@ struct brw_gs_compile {
     struct {
        struct brw_reg R0;
        struct brw_reg vertex[MAX_GS_VERTS];
+      struct brw_reg header;
        struct brw_reg temp;
     } reg;
  
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c

index e9875cdbf1191b9602517749d54539f5e8170114..6d39df195f6b01d0c504139f3aad4094d2209fa7 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -58,35 +58,66 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c,
        i += c->nr_regs;
     }
  
-   c->reg.temp = brw_vec8_grf(i, 0);
+   c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
+   c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
  
     c->prog_data.urb_read_length = c->nr_regs; 
     c->prog_data.total_grf = i;
  }
  
  
+/**
+ * Set up the initial value of c->reg.header register based on c->reg.R0.
+ *
+ * The following information is passed to the GS thread in R0, and needs to be
+ * included in the first URB_WRITE or FF_SYNC message sent by the GS:
+ *
+ * - DWORD 0 [31:0] handle info (Gen4 only)
+ * - DWORD 5 [7:0] FFTID
+ * - DWORD 6 [31:0] Debug info
+ * - DWORD 7 [31:0] Debug info
+ *
+ * This function sets up the above data by copying by copying the contents of
+ * R0 to the header register.
+ */
+static void brw_gs_initialize_header(struct brw_gs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   brw_MOV(p, c->reg.header, c->reg.R0);
+}
+
+/**
+ * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
+ *
+ * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
+ * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
+ * need to be able to update on a per-vertex basis.
+ */
+static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c,
+                                        unsigned dw2)
+{
+   struct brw_compile *p = &c->func;
+   brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
+}
+
+/**
+ * Emit a vertex using the URB_WRITE message.  Use the contents of
+ * c->reg.header for the message header, and the registers starting at \c vert
+ * for the vertex data.
+ *
+ * If \c last is true, then this is the last vertex, so no further URB space
+ * should be allocated, and this message should end the thread.
+ *
+ * If \c last is false, then a new URB entry will be allocated, and its handle
+ * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
+ * message.
+ */
  static void brw_gs_emit_vue(struct brw_gs_compile *c, 
                             struct brw_reg vert,
-                           bool last,
-                           GLuint header)
+                           bool last)
  {
     struct brw_compile *p = &c->func;
-   struct intel_context *intel = &c->func.brw->intel;
     bool allocate = !last;
-   struct brw_reg temp;
-
-   if (intel->gen < 6)
-      temp = c->reg.R0;
-   else {
-      temp = c->reg.temp;
-      brw_MOV(p, retype(temp, BRW_REGISTER_TYPE_UD),
-             retype(c->reg.R0, BRW_REGISTER_TYPE_UD));
-   }
-
-   /* Overwrite PrimType and PrimStart in the message header, for
-    * each vertex in turn:
-    */
-   brw_MOV(p, get_element_ud(temp, 2), brw_imm_ud(header));
  
     /* Copy the vertex from vertn into m1..mN+1:
      */
@@ -99,9 +130,10 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
      * allocated each time.
      */
     brw_urb_WRITE(p, 
-                allocate ? temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+                allocate ? c->reg.temp
+                          : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                  0,
-                temp,
+                c->reg.header,
                  allocate,
                  1,             /* used */
                  c->nr_regs + 1, /* msg length */
@@ -111,38 +143,37 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
                  0,             /* urb offset */
                  BRW_URB_SWIZZLE_NONE);
  
-   if (intel->gen >= 6 && allocate)
-       brw_MOV(p, get_element_ud(c->reg.R0, 0), get_element_ud(temp, 0));
+   if (allocate) {
+      brw_MOV(p, get_element_ud(c->reg.header, 0),
+              get_element_ud(c->reg.temp, 0));
+   }
  }
  
+/**
+ * Send an FF_SYNC message to ensure that all previously spawned GS threads
+ * have finished sending primitives down the pipeline, and to allocate a URB
+ * entry for the first output vertex.  Only needed when intel->needs_ff_sync
+ * is true.
+ *
+ * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
+ * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
+ * the allocated URB entry (which will be needed by the URB_WRITE meesage that
+ * follows).
+ */
  static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
  {
     struct brw_compile *p = &c->func;
-   struct intel_context *intel = &c->func.brw->intel;
  
-   if (intel->gen < 6) {
-      brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
-      brw_ff_sync(p,
-                 c->reg.R0,
-                 0,
-                 c->reg.R0,
-                 1, /* allocate */
-                 1, /* response length */
-                 0 /* eot */);
-   } else {
-      brw_MOV(p, retype(c->reg.temp, BRW_REGISTER_TYPE_UD),
-             retype(c->reg.R0, BRW_REGISTER_TYPE_UD));
-      brw_MOV(p, get_element_ud(c->reg.temp, 1), brw_imm_ud(num_prim));
-      brw_ff_sync(p,
-                 c->reg.temp,
-                 0,
-                 c->reg.temp,
-                 1, /* allocate */
-                 1, /* response length */
-                 0 /* eot */);
-      brw_MOV(p, get_element_ud(c->reg.R0, 0),
-      get_element_ud(c->reg.temp, 0));
-   }
+   brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
+   brw_ff_sync(p,
+               c->reg.temp,
+               0,
+               c->reg.header,
+               1, /* allocate */
+               1, /* response length */
+               0 /* eot */);
+   brw_MOV(p, get_element_ud(c->reg.header, 0),
+           get_element_ud(c->reg.temp, 0));
  }
  
  
@@ -151,23 +182,28 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
     struct intel_context *intel = &c->func.brw->intel;
  
     brw_gs_alloc_regs(c, 4);
-   
+   brw_gs_initialize_header(c);
     /* Use polygons for correct edgeflag behaviour. Note that vertex 3
      * is the PV for quads, but vertex 0 for polygons:
      */
     if (intel->needs_ff_sync)
        brw_gs_ff_sync(c, 1);
+   brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_START);
     if (key->pv_first) {
-      brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
-      brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0);
+      brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END);
+      brw_gs_emit_vue(c, c->reg.vertex[3], 1);
     }
     else {
-      brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
-      brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[3], 0);
+      brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2);
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0);
+      brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END);
+      brw_gs_emit_vue(c, c->reg.vertex[2], 1);
     }
  }
  
@@ -176,20 +212,26 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
     struct intel_context *intel = &c->func.brw->intel;
  
     brw_gs_alloc_regs(c, 4);
+   brw_gs_initialize_header(c);
     
     if (intel->needs_ff_sync)
        brw_gs_ff_sync(c, 1);
+   brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_START);
     if (key->pv_first) {
-      brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
-      brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0);
+      brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END);
+      brw_gs_emit_vue(c, c->reg.vertex[3], 1);
     }
     else {
-      brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
-      brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
-      brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0);
+      brw_gs_overwrite_header_dw2(c, _3DPRIM_POLYGON << 2);
+      brw_gs_emit_vue(c, c->reg.vertex[3], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_overwrite_header_dw2(c, (_3DPRIM_POLYGON << 2) | R02_PRIM_END);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 1);
     }
  }
  
@@ -198,9 +240,12 @@ void brw_gs_lines( struct brw_gs_compile *c )
     struct intel_context *intel = &c->func.brw->intel;
  
     brw_gs_alloc_regs(c, 2);
+   brw_gs_initialize_header(c);
  
     if (intel->needs_ff_sync)
        brw_gs_ff_sync(c, 1);
-   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
-   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+   brw_gs_overwrite_header_dw2(c, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+   brw_gs_overwrite_header_dw2(c, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1);
  }
author	Paul Berry <stereotype441@gmail.com>
	Tue, 29 Nov 2011 22:54:02 +0000 (14:54 -0800)
committer	Paul Berry <stereotype441@gmail.com>
	Thu, 8 Dec 2011 00:38:01 +0000 (16:38 -0800)
src/mesa/drivers/dri/i965/brw_gs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_gs_emit.c		patch \| blob \| history