i915: Remove most of the code under gen >= 4 checks.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_gs.c
index 3171e97d7af912b00bb8f0a059417a922053933d..f354dd9f62575ff65deb52e5a5e0ecf1f9b31d7c 100644 (file)
@@ -32,6 +32,7 @@
 #include "main/glheader.h"
 #include "main/macros.h"
 #include "main/enums.h"
+#include "main/transformfeedback.h"
 
 #include "intel_batchbuffer.h"
 
@@ -42,7 +43,7 @@
 #include "brw_state.h"
 #include "brw_gs.h"
 
-#include "../glsl/ralloc.h"
+#include "glsl/ralloc.h"
 
 static void compile_gs_prog( struct brw_context *brw,
                             struct brw_gs_prog_key *key )
@@ -53,28 +54,13 @@ static void compile_gs_prog( struct brw_context *brw,
    void *mem_ctx;
    GLuint program_size;
 
-   /* Gen6: VF has already converted into polygon, and LINELOOP is
-    * converted to LINESTRIP at the beginning of the 3D pipeline.
-    */
-   if (intel->gen >= 6)
-      return;
-
    memset(&c, 0, sizeof(c));
    
    c.key = *key;
-   /* Need to locate the two positions present in vertex + header.
-    * These are currently hardcoded:
-    */
-   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.vue_map = brw->vs.prog_data->base.vue_map;
+   c.nr_regs = (c.vue_map.num_slots + 1)/2;
 
-   if (intel->gen >= 5)
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
-   else
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
-
-   c.nr_bytes = c.nr_regs * REG_SIZE;
-
-   mem_ctx = NULL;
+   mem_ctx = ralloc_context(NULL);
    
    /* Begin the compilation:
     */
@@ -87,24 +73,60 @@ static void compile_gs_prog( struct brw_context *brw,
     */
    brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
 
-
-   /* Note that primitives which don't require a GS program have
-    * already been weeded out by this stage:
-    */
-
-   switch (key->primitive) {
-   case GL_QUADS:
-      brw_gs_quads( &c, key );
-      break;
-   case GL_QUAD_STRIP:
-      brw_gs_quad_strip( &c, key );
-      break;
-   case GL_LINE_LOOP:
-      brw_gs_lines( &c );
-      break;
-   default:
-      ralloc_free(mem_ctx);
-      return;
+   if (intel->gen >= 6) {
+      unsigned num_verts;
+      bool check_edge_flag;
+      /* On Sandybridge, we use the GS for implementing transform feedback
+       * (called "Stream Out" in the PRM).
+       */
+      switch (key->primitive) {
+      case _3DPRIM_POINTLIST:
+         num_verts = 1;
+         check_edge_flag = false;
+        break;
+      case _3DPRIM_LINELIST:
+      case _3DPRIM_LINESTRIP:
+      case _3DPRIM_LINELOOP:
+         num_verts = 2;
+         check_edge_flag = false;
+        break;
+      case _3DPRIM_TRILIST:
+      case _3DPRIM_TRIFAN:
+      case _3DPRIM_TRISTRIP:
+      case _3DPRIM_RECTLIST:
+        num_verts = 3;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_QUADLIST:
+      case _3DPRIM_QUADSTRIP:
+      case _3DPRIM_POLYGON:
+         num_verts = 3;
+         check_edge_flag = true;
+         break;
+      default:
+        assert(!"Unexpected primitive type in Gen6 SOL program.");
+        return;
+      }
+      gen6_sol_program(&c, key, num_verts, check_edge_flag);
+   } else {
+      /* On Gen4-5, we use the GS to decompose certain types of primitives.
+       * Note that primitives which don't require a GS program have already
+       * been weeded out by now.
+       */
+      switch (key->primitive) {
+      case _3DPRIM_QUADLIST:
+        brw_gs_quads( &c, key );
+        break;
+      case _3DPRIM_QUADSTRIP:
+        brw_gs_quad_strip( &c, key );
+        break;
+      case _3DPRIM_LINELOOP:
+        brw_gs_lines( &c );
+        break;
+      default:
+        ralloc_free(mem_ctx);
+        return;
+      }
    }
 
    /* get the program
@@ -129,52 +151,83 @@ static void compile_gs_prog( struct brw_context *brw,
    ralloc_free(mem_ctx);
 }
 
-static const GLenum gs_prim[GL_POLYGON+1] = {  
-   GL_POINTS,
-   GL_LINES,
-   GL_LINE_LOOP,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_QUADS,
-   GL_QUAD_STRIP,
-   GL_TRIANGLES
-};
-
 static void populate_key( struct brw_context *brw,
                          struct brw_gs_prog_key *key )
 {
+   static const unsigned swizzle_for_offset[4] = {
+      BRW_SWIZZLE4(0, 1, 2, 3),
+      BRW_SWIZZLE4(1, 2, 3, 3),
+      BRW_SWIZZLE4(2, 3, 3, 3),
+      BRW_SWIZZLE4(3, 3, 3, 3)
+   };
+
    struct gl_context *ctx = &brw->intel.ctx;
    struct intel_context *intel = &brw->intel;
 
    memset(key, 0, sizeof(*key));
 
-   /* CACHE_NEW_VS_PROG */
-   key->attrs = brw->vs.prog_data->outputs_written;
+   /* CACHE_NEW_VS_PROG (part of VUE map) */
+   key->attrs = brw->vs.prog_data->base.vue_map.slots_valid;
 
    /* BRW_NEW_PRIMITIVE */
-   key->primitive = gs_prim[brw->primitive];
+   key->primitive = brw->primitive;
 
    /* _NEW_LIGHT */
    key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
-   if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
+   if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
       /* Provide consistent primitive order with brw_set_prim's
        * optimization of single quads to trifans.
        */
-      key->pv_first = GL_TRUE;
+      key->pv_first = true;
    }
 
-   key->need_gs_prog = (intel->gen >= 6)
-      ? 0
-      : (brw->primitive == GL_QUADS ||
-        brw->primitive == GL_QUAD_STRIP ||
-        brw->primitive == GL_LINE_LOOP);
+   if (intel->gen >= 7) {
+      /* On Gen7 and later, we don't use GS (yet). */
+      key->need_gs_prog = false;
+   } else if (intel->gen == 6) {
+      /* On Gen6, GS is used for transform feedback. */
+      /* BRW_NEW_TRANSFORM_FEEDBACK */
+      if (_mesa_is_xfb_active_and_unpaused(ctx)) {
+         const struct gl_shader_program *shaderprog =
+            ctx->Shader.CurrentVertexProgram;
+         const struct gl_transform_feedback_info *linked_xfb_info =
+            &shaderprog->LinkedTransformFeedback;
+         int i;
+
+         /* Make sure that the VUE slots won't overflow the unsigned chars in
+          * key->transform_feedback_bindings[].
+          */
+         STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
+
+         /* Make sure that we don't need more binding table entries than we've
+          * set aside for use in transform feedback.  (We shouldn't, since we
+          * set aside enough binding table entries to have one per component).
+          */
+         assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+         key->need_gs_prog = true;
+         key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+         for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+            key->transform_feedback_bindings[i] =
+               linked_xfb_info->Outputs[i].OutputRegister;
+            key->transform_feedback_swizzles[i] =
+               swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+         }
+      }
+   } else {
+      /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+       * into simpler primitives.
+       */
+      key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+                           brw->primitive == _3DPRIM_QUADSTRIP ||
+                           brw->primitive == _3DPRIM_LINELOOP);
+   }
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void prepare_gs_prog(struct brw_context *brw)
+static void
+brw_upload_gs_prog(struct brw_context *brw)
 {
    struct brw_gs_prog_key key;
    /* Populate the key:
@@ -198,9 +251,10 @@ static void prepare_gs_prog(struct brw_context *brw)
 
 const struct brw_tracked_state brw_gs_prog = {
    .dirty = {
-      .mesa  = _NEW_LIGHT,
-      .brw   = BRW_NEW_PRIMITIVE,
+      .mesa  = (_NEW_LIGHT),
+      .brw   = (BRW_NEW_PRIMITIVE |
+                BRW_NEW_TRANSFORM_FEEDBACK),
       .cache = CACHE_NEW_VS_PROG
    },
-   .prepare = prepare_gs_prog
+   .emit = brw_upload_gs_prog
 };