i965: Rewrite the HiZ op
[mesa.git] / src / mesa / drivers / dri / i965 / brw_gs.c
index 001cd62f8cac8a20f922409a8a1b4ef41d040bf0..f9c4f6aa0141dc69875dabf16494bd89bb48b3b5 100644 (file)
@@ -42,7 +42,7 @@
 #include "brw_state.h"
 #include "brw_gs.h"
 
-#include "../glsl/ralloc.h"
+#include "glsl/ralloc.h"
 
 static void compile_gs_prog( struct brw_context *brw,
                             struct brw_gs_prog_key *key )
@@ -53,26 +53,12 @@ static void compile_gs_prog( struct brw_context *brw,
    void *mem_ctx;
    GLuint program_size;
 
-   /* Gen6: VF has already converted into polygon, and LINELOOP is
-    * converted to LINESTRIP at the beginning of the 3D pipeline.
-    */
-   if (intel->gen >= 6)
-      return;
-
    memset(&c, 0, sizeof(c));
    
    c.key = *key;
-   /* Need to locate the two positions present in vertex + header.
-    * These are currently hardcoded:
-    */
-   c.nr_attrs = brw_count_bits(c.key.attrs);
-
-   if (intel->gen >= 5)
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
-   else
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
-
-   c.nr_bytes = c.nr_regs * REG_SIZE;
+   /* The geometry shader needs to access the entire VUE. */
+   brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs);
+   c.nr_regs = (c.vue_map.num_slots + 1)/2;
 
    mem_ctx = NULL;
    
@@ -87,24 +73,60 @@ static void compile_gs_prog( struct brw_context *brw,
     */
    brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
 
-
-   /* Note that primitives which don't require a GS program have
-    * already been weeded out by this stage:
-    */
-
-   switch (key->primitive) {
-   case GL_QUADS:
-      brw_gs_quads( &c, key );
-      break;
-   case GL_QUAD_STRIP:
-      brw_gs_quad_strip( &c, key );
-      break;
-   case GL_LINE_LOOP:
-      brw_gs_lines( &c );
-      break;
-   default:
-      ralloc_free(mem_ctx);
-      return;
+   if (intel->gen >= 6) {
+      unsigned num_verts;
+      bool check_edge_flag;
+      /* On Sandybridge, we use the GS for implementing transform feedback
+       * (called "Stream Out" in the PRM).
+       */
+      switch (key->primitive) {
+      case _3DPRIM_POINTLIST:
+         num_verts = 1;
+         check_edge_flag = false;
+        break;
+      case _3DPRIM_LINELIST:
+      case _3DPRIM_LINESTRIP:
+      case _3DPRIM_LINELOOP:
+         num_verts = 2;
+         check_edge_flag = false;
+        break;
+      case _3DPRIM_TRILIST:
+      case _3DPRIM_TRIFAN:
+      case _3DPRIM_TRISTRIP:
+      case _3DPRIM_RECTLIST:
+        num_verts = 3;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_QUADLIST:
+      case _3DPRIM_QUADSTRIP:
+      case _3DPRIM_POLYGON:
+         num_verts = 3;
+         check_edge_flag = true;
+         break;
+      default:
+        assert(!"Unexpected primitive type in Gen6 SOL program.");
+        return;
+      }
+      gen6_sol_program(&c, key, num_verts, check_edge_flag);
+   } else {
+      /* On Gen4-5, we use the GS to decompose certain types of primitives.
+       * Note that primitives which don't require a GS program have already
+       * been weeded out by now.
+       */
+      switch (key->primitive) {
+      case _3DPRIM_QUADLIST:
+        brw_gs_quads( &c, key );
+        break;
+      case _3DPRIM_QUADSTRIP:
+        brw_gs_quad_strip( &c, key );
+        break;
+      case _3DPRIM_LINELOOP:
+        brw_gs_lines( &c );
+        break;
+      default:
+        ralloc_free(mem_ctx);
+        return;
+      }
    }
 
    /* get the program
@@ -121,33 +143,24 @@ static void compile_gs_prog( struct brw_context *brw,
       printf("\n");
     }
 
-   /* Upload
-    */
-   drm_intel_bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG,
-                                     &c.key, sizeof(c.key),
-                                     program, program_size,
-                                     &c.prog_data, sizeof(c.prog_data),
-                                     &brw->gs.prog_data);
+   brw_upload_cache(&brw->cache, BRW_GS_PROG,
+                   &c.key, sizeof(c.key),
+                   program, program_size,
+                   &c.prog_data, sizeof(c.prog_data),
+                   &brw->gs.prog_offset, &brw->gs.prog_data);
    ralloc_free(mem_ctx);
 }
 
-static const GLenum gs_prim[GL_POLYGON+1] = {  
-   GL_POINTS,
-   GL_LINES,
-   GL_LINE_LOOP,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_QUADS,
-   GL_QUAD_STRIP,
-   GL_TRIANGLES
-};
-
 static void populate_key( struct brw_context *brw,
                          struct brw_gs_prog_key *key )
 {
+   static const unsigned swizzle_for_offset[4] = {
+      BRW_SWIZZLE4(0, 1, 2, 3),
+      BRW_SWIZZLE4(1, 2, 3, 3),
+      BRW_SWIZZLE4(2, 3, 3, 3),
+      BRW_SWIZZLE4(3, 3, 3, 3)
+   };
+
    struct gl_context *ctx = &brw->intel.ctx;
    struct intel_context *intel = &brw->intel;
 
@@ -157,27 +170,79 @@ static void populate_key( struct brw_context *brw,
    key->attrs = brw->vs.prog_data->outputs_written;
 
    /* BRW_NEW_PRIMITIVE */
-   key->primitive = gs_prim[brw->primitive];
+   key->primitive = brw->primitive;
 
    /* _NEW_LIGHT */
    key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
-   if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
+   if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
       /* Provide consistent primitive order with brw_set_prim's
        * optimization of single quads to trifans.
        */
-      key->pv_first = GL_TRUE;
+      key->pv_first = true;
    }
 
-   key->need_gs_prog = (intel->gen >= 6)
-      ? 0
-      : (brw->primitive == GL_QUADS ||
-        brw->primitive == GL_QUAD_STRIP ||
-        brw->primitive == GL_LINE_LOOP);
+   /* _NEW_TRANSFORM */
+   key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
+
+   if (intel->gen >= 7) {
+      /* On Gen7 and later, we don't use GS (yet). */
+      key->need_gs_prog = false;
+   } else if (intel->gen == 6) {
+      /* On Gen6, GS is used for transform feedback. */
+      /* _NEW_TRANSFORM_FEEDBACK */
+      if (ctx->TransformFeedback.CurrentObject->Active &&
+          !ctx->TransformFeedback.CurrentObject->Paused) {
+         const struct gl_shader_program *shaderprog =
+            ctx->Shader.CurrentVertexProgram;
+         const struct gl_transform_feedback_info *linked_xfb_info =
+            &shaderprog->LinkedTransformFeedback;
+         int i;
+
+         /* Make sure that the VUE slots won't overflow the unsigned chars in
+          * key->transform_feedback_bindings[].
+          */
+         STATIC_ASSERT(BRW_VERT_RESULT_MAX <= 256);
+
+         /* Make sure that we don't need more binding table entries than we've
+          * set aside for use in transform feedback.  (We shouldn't, since we
+          * set aside enough binding table entries to have one per component).
+          */
+         assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+         key->need_gs_prog = true;
+         key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+         for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+            key->transform_feedback_bindings[i] =
+               linked_xfb_info->Outputs[i].OutputRegister;
+            key->transform_feedback_swizzles[i] =
+               swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+         }
+      }
+      /* On Gen6, GS is also used for rasterizer discard. */
+      /* _NEW_RASTERIZER_DISCARD */
+      if (ctx->RasterDiscard) {
+         key->need_gs_prog = true;
+         key->rasterizer_discard = true;
+      }
+   } else {
+      /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+       * into simpler primitives.
+       */
+      key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+                           brw->primitive == _3DPRIM_QUADSTRIP ||
+                           brw->primitive == _3DPRIM_LINELOOP);
+   }
+   /* For testing, the environment variable INTEL_FORCE_GS can be used to
+    * force a GS program to be used, even if it's not necessary.
+    */
+   if (getenv("INTEL_FORCE_GS"))
+      key->need_gs_prog = true;
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void prepare_gs_prog(struct brw_context *brw)
+static void
+brw_upload_gs_prog(struct brw_context *brw)
 {
    struct brw_gs_prog_key key;
    /* Populate the key:
@@ -189,24 +254,24 @@ static void prepare_gs_prog(struct brw_context *brw)
       brw->gs.prog_active = key.need_gs_prog;
    }
 
-   drm_intel_bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = NULL;
-
    if (brw->gs.prog_active) {
-      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
-                                        &key, sizeof(key),
-                                        &brw->gs.prog_data);
-      if (brw->gs.prog_bo == NULL)
+      if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+                           &key, sizeof(key),
+                           &brw->gs.prog_offset, &brw->gs.prog_data)) {
         compile_gs_prog( brw, &key );
+      }
    }
 }
 
 
 const struct brw_tracked_state brw_gs_prog = {
    .dirty = {
-      .mesa  = _NEW_LIGHT,
+      .mesa  = (_NEW_LIGHT |
+                _NEW_TRANSFORM |
+                _NEW_TRANSFORM_FEEDBACK |
+                _NEW_RASTERIZER_DISCARD),
       .brw   = BRW_NEW_PRIMITIVE,
       .cache = CACHE_NEW_VS_PROG
    },
-   .prepare = prepare_gs_prog
+   .emit = brw_upload_gs_prog
 };