i965/vec4: Make with_writemask() non-static.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_gs.c
index 69ffa19c40c1be1375ee206049cc46d7b2f263ae..3882c06b75cae525de5a0772f3a54c4fad80ebeb 100644 (file)
@@ -32,6 +32,7 @@
 #include "main/glheader.h"
 #include "main/macros.h"
 #include "main/enums.h"
+#include "main/transformfeedback.h"
 
 #include "intel_batchbuffer.h"
 
 
 #include "glsl/ralloc.h"
 
-static void compile_gs_prog( struct brw_context *brw,
-                            struct brw_gs_prog_key *key )
+static void compile_ff_gs_prog(struct brw_context *brw,
+                               struct brw_ff_gs_prog_key *key)
 {
-   struct intel_context *intel = &brw->intel;
-   struct brw_gs_compile c;
+   struct brw_ff_gs_compile c;
    const GLuint *program;
    void *mem_ctx;
    GLuint program_size;
@@ -56,12 +56,10 @@ static void compile_gs_prog( struct brw_context *brw,
    memset(&c, 0, sizeof(c));
    
    c.key = *key;
-   /* The geometry shader needs to access the entire VUE. */
-   struct brw_vue_map vue_map;
-   brw_compute_vue_map(&vue_map, intel, c.key.userclip_active, c.key.attrs);
-   c.nr_regs = (vue_map.num_slots + 1)/2;
+   c.vue_map = brw->vs.prog_data->base.vue_map;
+   c.nr_regs = (c.vue_map.num_slots + 1)/2;
 
-   mem_ctx = NULL;
+   mem_ctx = ralloc_context(NULL);
    
    /* Begin the compilation:
     */
@@ -74,7 +72,7 @@ static void compile_gs_prog( struct brw_context *brw,
     */
    brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
 
-   if (intel->gen >= 6) {
+   if (brw->gen >= 6) {
       unsigned num_verts;
       bool check_edge_flag;
       /* On Sandybridge, we use the GS for implementing transform feedback
@@ -116,13 +114,13 @@ static void compile_gs_prog( struct brw_context *brw,
        */
       switch (key->primitive) {
       case _3DPRIM_QUADLIST:
-        brw_gs_quads( &c, key );
+        brw_ff_gs_quads( &c, key );
         break;
       case _3DPRIM_QUADSTRIP:
-        brw_gs_quad_strip( &c, key );
+        brw_ff_gs_quad_strip( &c, key );
         break;
       case _3DPRIM_LINELOOP:
-        brw_gs_lines( &c );
+        brw_ff_gs_lines( &c );
         break;
       default:
         ralloc_free(mem_ctx);
@@ -140,28 +138,34 @@ static void compile_gs_prog( struct brw_context *brw,
       printf("gs:\n");
       for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
         brw_disasm(stdout, &((struct brw_instruction *)program)[i],
-                   intel->gen);
+                   brw->gen);
       printf("\n");
     }
 
-   brw_upload_cache(&brw->cache, BRW_GS_PROG,
+   brw_upload_cache(&brw->cache, BRW_FF_GS_PROG,
                    &c.key, sizeof(c.key),
                    program, program_size,
                    &c.prog_data, sizeof(c.prog_data),
-                   &brw->gs.prog_offset, &brw->gs.prog_data);
+                   &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
    ralloc_free(mem_ctx);
 }
 
-static void populate_key( struct brw_context *brw,
-                         struct brw_gs_prog_key *key )
+static void populate_key(struct brw_context *brw,
+                         struct brw_ff_gs_prog_key *key)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
+   static const unsigned swizzle_for_offset[4] = {
+      BRW_SWIZZLE4(0, 1, 2, 3),
+      BRW_SWIZZLE4(1, 2, 3, 3),
+      BRW_SWIZZLE4(2, 3, 3, 3),
+      BRW_SWIZZLE4(3, 3, 3, 3)
+   };
+
+   struct gl_context *ctx = &brw->ctx;
 
    memset(key, 0, sizeof(*key));
 
-   /* CACHE_NEW_VS_PROG */
-   key->attrs = brw->vs.prog_data->outputs_written;
+   /* CACHE_NEW_VS_PROG (part of VUE map) */
+   key->attrs = brw->vs.prog_data->base.vue_map.slots_valid;
 
    /* BRW_NEW_PRIMITIVE */
    key->primitive = brw->primitive;
@@ -175,16 +179,39 @@ static void populate_key( struct brw_context *brw,
       key->pv_first = true;
    }
 
-   /* _NEW_TRANSFORM */
-   key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
-
-   if (intel->gen >= 7) {
+   if (brw->gen >= 7) {
       /* On Gen7 and later, we don't use GS (yet). */
       key->need_gs_prog = false;
-   } else if (intel->gen == 6) {
+   } else if (brw->gen == 6) {
       /* On Gen6, GS is used for transform feedback. */
-      /* _NEW_TRANSFORM_FEEDBACK */
-      key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active;
+      /* BRW_NEW_TRANSFORM_FEEDBACK */
+      if (_mesa_is_xfb_active_and_unpaused(ctx)) {
+         const struct gl_shader_program *shaderprog =
+            ctx->Shader.CurrentVertexProgram;
+         const struct gl_transform_feedback_info *linked_xfb_info =
+            &shaderprog->LinkedTransformFeedback;
+         int i;
+
+         /* Make sure that the VUE slots won't overflow the unsigned chars in
+          * key->transform_feedback_bindings[].
+          */
+         STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
+
+         /* Make sure that we don't need more binding table entries than we've
+          * set aside for use in transform feedback.  (We shouldn't, since we
+          * set aside enough binding table entries to have one per component).
+          */
+         assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+         key->need_gs_prog = true;
+         key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+         for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+            key->transform_feedback_bindings[i] =
+               linked_xfb_info->Outputs[i].OutputRegister;
+            key->transform_feedback_swizzles[i] =
+               swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+         }
+      }
    } else {
       /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
        * into simpler primitives.
@@ -193,45 +220,39 @@ static void populate_key( struct brw_context *brw,
                            brw->primitive == _3DPRIM_QUADSTRIP ||
                            brw->primitive == _3DPRIM_LINELOOP);
    }
-   /* For testing, the environment variable INTEL_FORCE_GS can be used to
-    * force a GS program to be used, even if it's not necessary.
-    */
-   if (getenv("INTEL_FORCE_GS"))
-      key->need_gs_prog = true;
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
 static void
-brw_upload_gs_prog(struct brw_context *brw)
+brw_upload_ff_gs_prog(struct brw_context *brw)
 {
-   struct brw_gs_prog_key key;
+   struct brw_ff_gs_prog_key key;
    /* Populate the key:
     */
    populate_key(brw, &key);
 
-   if (brw->gs.prog_active != key.need_gs_prog) {
-      brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
-      brw->gs.prog_active = key.need_gs_prog;
+   if (brw->ff_gs.prog_active != key.need_gs_prog) {
+      brw->state.dirty.cache |= CACHE_NEW_FF_GS_PROG;
+      brw->ff_gs.prog_active = key.need_gs_prog;
    }
 
-   if (brw->gs.prog_active) {
-      if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+   if (brw->ff_gs.prog_active) {
+      if (!brw_search_cache(&brw->cache, BRW_FF_GS_PROG,
                            &key, sizeof(key),
-                           &brw->gs.prog_offset, &brw->gs.prog_data)) {
-        compile_gs_prog( brw, &key );
+                           &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data)) {
+        compile_ff_gs_prog( brw, &key );
       }
    }
 }
 
 
-const struct brw_tracked_state brw_gs_prog = {
+const struct brw_tracked_state brw_ff_gs_prog = {
    .dirty = {
-      .mesa  = (_NEW_LIGHT |
-                _NEW_TRANSFORM |
-                _NEW_TRANSFORM_FEEDBACK),
-      .brw   = BRW_NEW_PRIMITIVE,
+      .mesa  = (_NEW_LIGHT),
+      .brw   = (BRW_NEW_PRIMITIVE |
+                BRW_NEW_TRANSFORM_FEEDBACK),
       .cache = CACHE_NEW_VS_PROG
    },
-   .emit = brw_upload_gs_prog
+   .emit = brw_upload_ff_gs_prog
 };