i965: Make the userclip flag for the VUE map come from VS prog data.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_gs.c
index 1bc3eccf49d9ad05869b5d29f23db42ed3b6fc34..c6132df26d185367b3b5186ab9f89c4595ed8253 100644 (file)
@@ -42,7 +42,7 @@
 #include "brw_state.h"
 #include "brw_gs.h"
 
-
+#include "glsl/ralloc.h"
 
 static void compile_gs_prog( struct brw_context *brw,
                             struct brw_gs_prog_key *key )
@@ -50,27 +50,20 @@ static void compile_gs_prog( struct brw_context *brw,
    struct intel_context *intel = &brw->intel;
    struct brw_gs_compile c;
    const GLuint *program;
+   void *mem_ctx;
    GLuint program_size;
 
    memset(&c, 0, sizeof(c));
    
    c.key = *key;
-   /* Need to locate the two positions present in vertex + header.
-    * These are currently hardcoded:
-    */
-   c.nr_attrs = brw_count_bits(c.key.attrs);
-
-   if (intel->is_ironlake)
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
-   else
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
-
-   c.nr_bytes = c.nr_regs * REG_SIZE;
+   brw_compute_vue_map(&c.vue_map, intel, brw->vs.prog_data);
+   c.nr_regs = (c.vue_map.num_slots + 1)/2;
 
+   mem_ctx = NULL;
    
    /* Begin the compilation:
     */
-   brw_init_compile(brw, &c.func);
+   brw_init_compile(brw, &c.func, mem_ctx);
 
    c.func.single_program_flow = 1;
 
@@ -79,99 +72,176 @@ static void compile_gs_prog( struct brw_context *brw,
     */
    brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
 
-
-   /* Note that primitives which don't require a GS program have
-    * already been weeded out by this stage:
-    */
-   switch (key->primitive) {
-   case GL_QUADS:
-      brw_gs_quads( &c, key );
-      break;
-   case GL_QUAD_STRIP:
-      brw_gs_quad_strip( &c, key );
-      break;
-   case GL_LINE_LOOP:
-      brw_gs_lines( &c );
-      break;
-   case GL_LINES:
-      if (key->hint_gs_always)
-        brw_gs_lines( &c );
-      else {
+   if (intel->gen >= 6) {
+      unsigned num_verts;
+      bool check_edge_flag;
+      /* On Sandybridge, we use the GS for implementing transform feedback
+       * (called "Stream Out" in the PRM).
+       */
+      switch (key->primitive) {
+      case _3DPRIM_POINTLIST:
+         num_verts = 1;
+         check_edge_flag = false;
+        break;
+      case _3DPRIM_LINELIST:
+      case _3DPRIM_LINESTRIP:
+      case _3DPRIM_LINELOOP:
+         num_verts = 2;
+         check_edge_flag = false;
+        break;
+      case _3DPRIM_TRILIST:
+      case _3DPRIM_TRIFAN:
+      case _3DPRIM_TRISTRIP:
+      case _3DPRIM_RECTLIST:
+        num_verts = 3;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_QUADLIST:
+      case _3DPRIM_QUADSTRIP:
+      case _3DPRIM_POLYGON:
+         num_verts = 3;
+         check_edge_flag = true;
+         break;
+      default:
+        assert(!"Unexpected primitive type in Gen6 SOL program.");
         return;
       }
-      break;
-   case GL_TRIANGLES:
-      if (key->hint_gs_always)
-        brw_gs_tris( &c );
-      else {
-        return;
-      }
-      break;
-   case GL_POINTS:
-      if (key->hint_gs_always)
-        brw_gs_points( &c );
-      else {
+      gen6_sol_program(&c, key, num_verts, check_edge_flag);
+   } else {
+      /* On Gen4-5, we use the GS to decompose certain types of primitives.
+       * Note that primitives which don't require a GS program have already
+       * been weeded out by now.
+       */
+      switch (key->primitive) {
+      case _3DPRIM_QUADLIST:
+        brw_gs_quads( &c, key );
+        break;
+      case _3DPRIM_QUADSTRIP:
+        brw_gs_quad_strip( &c, key );
+        break;
+      case _3DPRIM_LINELOOP:
+        brw_gs_lines( &c );
+        break;
+      default:
+        ralloc_free(mem_ctx);
         return;
       }
-      break;      
-   default:
-      return;
    }
 
    /* get the program
     */
    program = brw_get_program(&c.func, &program_size);
 
-   /* Upload
-    */
-   dri_bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
-                                      &c.key, sizeof(c.key),
-                                      NULL, 0,
-                                      program, program_size,
-                                      &c.prog_data,
-                                      &brw->gs.prog_data );
-}
+   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+      int i;
 
-static const GLenum gs_prim[GL_POLYGON+1] = {  
-   GL_POINTS,
-   GL_LINES,
-   GL_LINE_LOOP,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_QUADS,
-   GL_QUAD_STRIP,
-   GL_TRIANGLES
-};
+      printf("gs:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+        brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+                   intel->gen);
+      printf("\n");
+    }
+
+   brw_upload_cache(&brw->cache, BRW_GS_PROG,
+                   &c.key, sizeof(c.key),
+                   program, program_size,
+                   &c.prog_data, sizeof(c.prog_data),
+                   &brw->gs.prog_offset, &brw->gs.prog_data);
+   ralloc_free(mem_ctx);
+}
 
 static void populate_key( struct brw_context *brw,
                          struct brw_gs_prog_key *key )
 {
-   GLcontext *ctx = &brw->intel.ctx;
+   static const unsigned swizzle_for_offset[4] = {
+      BRW_SWIZZLE4(0, 1, 2, 3),
+      BRW_SWIZZLE4(1, 2, 3, 3),
+      BRW_SWIZZLE4(2, 3, 3, 3),
+      BRW_SWIZZLE4(3, 3, 3, 3)
+   };
+
+   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+
    memset(key, 0, sizeof(*key));
 
-   /* CACHE_NEW_VS_PROG */
+   /* CACHE_NEW_VS_PROG (part of VUE map) */
    key->attrs = brw->vs.prog_data->outputs_written;
 
    /* BRW_NEW_PRIMITIVE */
-   key->primitive = gs_prim[brw->primitive];
+   key->primitive = brw->primitive;
 
-   key->hint_gs_always = 0;    /* debug code? */
-   
    /* _NEW_LIGHT */
    key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+   if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
+      /* Provide consistent primitive order with brw_set_prim's
+       * optimization of single quads to trifans.
+       */
+      key->pv_first = true;
+   }
+
+   /* CACHE_NEW_VS_PROG (part of VUE map)*/
+   key->userclip_active = brw->vs.prog_data->userclip;
+
+   if (intel->gen >= 7) {
+      /* On Gen7 and later, we don't use GS (yet). */
+      key->need_gs_prog = false;
+   } else if (intel->gen == 6) {
+      /* On Gen6, GS is used for transform feedback. */
+      /* _NEW_TRANSFORM_FEEDBACK */
+      if (ctx->TransformFeedback.CurrentObject->Active &&
+          !ctx->TransformFeedback.CurrentObject->Paused) {
+         const struct gl_shader_program *shaderprog =
+            ctx->Shader.CurrentVertexProgram;
+         const struct gl_transform_feedback_info *linked_xfb_info =
+            &shaderprog->LinkedTransformFeedback;
+         int i;
 
-   key->need_gs_prog = (key->hint_gs_always ||
-                       brw->primitive == GL_QUADS ||
-                       brw->primitive == GL_QUAD_STRIP ||
-                       brw->primitive == GL_LINE_LOOP);
+         /* Make sure that the VUE slots won't overflow the unsigned chars in
+          * key->transform_feedback_bindings[].
+          */
+         STATIC_ASSERT(BRW_VERT_RESULT_MAX <= 256);
+
+         /* Make sure that we don't need more binding table entries than we've
+          * set aside for use in transform feedback.  (We shouldn't, since we
+          * set aside enough binding table entries to have one per component).
+          */
+         assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+         key->need_gs_prog = true;
+         key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+         for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+            key->transform_feedback_bindings[i] =
+               linked_xfb_info->Outputs[i].OutputRegister;
+            key->transform_feedback_swizzles[i] =
+               swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+         }
+      }
+      /* On Gen6, GS is also used for rasterizer discard. */
+      /* _NEW_RASTERIZER_DISCARD */
+      if (ctx->RasterDiscard) {
+         key->need_gs_prog = true;
+         key->rasterizer_discard = true;
+      }
+   } else {
+      /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+       * into simpler primitives.
+       */
+      key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+                           brw->primitive == _3DPRIM_QUADSTRIP ||
+                           brw->primitive == _3DPRIM_LINELOOP);
+   }
+   /* For testing, the environment variable INTEL_FORCE_GS can be used to
+    * force a GS program to be used, even if it's not necessary.
+    */
+   if (getenv("INTEL_FORCE_GS"))
+      key->need_gs_prog = true;
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void prepare_gs_prog(struct brw_context *brw)
+static void
+brw_upload_gs_prog(struct brw_context *brw)
 {
    struct brw_gs_prog_key key;
    /* Populate the key:
@@ -184,22 +254,22 @@ static void prepare_gs_prog(struct brw_context *brw)
    }
 
    if (brw->gs.prog_active) {
-      dri_bo_unreference(brw->gs.prog_bo);
-      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
-                                        &key, sizeof(key),
-                                        NULL, 0,
-                                        &brw->gs.prog_data);
-      if (brw->gs.prog_bo == NULL)
+      if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+                           &key, sizeof(key),
+                           &brw->gs.prog_offset, &brw->gs.prog_data)) {
         compile_gs_prog( brw, &key );
+      }
    }
 }
 
 
 const struct brw_tracked_state brw_gs_prog = {
    .dirty = {
-      .mesa  = _NEW_LIGHT,
+      .mesa  = (_NEW_LIGHT |
+                _NEW_TRANSFORM_FEEDBACK |
+                _NEW_RASTERIZER_DISCARD),
       .brw   = BRW_NEW_PRIMITIVE,
       .cache = CACHE_NEW_VS_PROG
    },
-   .prepare = prepare_gs_prog
+   .emit = brw_upload_gs_prog
 };