nv50: fix build-predicate function
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_vcache.c
index 37ffbac4f92221e1c884e506dde9091c31e06015..a848b54f7d24ee7028821288f3074e50c8a1efda 100644 (file)
@@ -41,6 +41,7 @@
 #define FETCH_MAX 256
 #define DRAW_MAX (16*1024)
 
+
 struct vcache_frontend {
    struct draw_pt_front_end base;
    struct draw_context *draw;
@@ -54,7 +55,7 @@ struct vcache_frontend {
    unsigned draw_count;
    unsigned fetch_count;
    unsigned fetch_max;
-   
+
    struct draw_pt_middle_end *middle;
 
    unsigned input_prim;
@@ -64,14 +65,15 @@ struct vcache_frontend {
    unsigned opt;
 };
 
-static INLINE void 
+
+static INLINE void
 vcache_flush( struct vcache_frontend *vcache )
 {
    if (vcache->middle_prim != vcache->output_prim) {
       vcache->middle_prim = vcache->output_prim;
-      vcache->middle->prepare( vcache->middle, 
-                               vcache->middle_prim, 
-                               vcache->opt, 
+      vcache->middle->prepare( vcache->middle,
+                               vcache->middle_prim,
+                               vcache->opt,
                                &vcache->fetch_max );
    }
 
@@ -88,12 +90,12 @@ vcache_flush( struct vcache_frontend *vcache )
    vcache->draw_count = 0;
 }
 
+
 static INLINE void 
 vcache_check_flush( struct vcache_frontend *vcache )
 {
-   if ( vcache->draw_count + 6 >= DRAW_MAX ||
-        vcache->fetch_count + 4 >= FETCH_MAX )
-   {
+   if (vcache->draw_count + 6 >= DRAW_MAX ||
+       vcache->fetch_count + 6 >= FETCH_MAX) {
       vcache_flush( vcache );
    }
 }
@@ -145,6 +147,7 @@ vcache_triangle_flags( struct vcache_frontend *vcache,
    vcache_check_flush(vcache);
 }
 
+
 static INLINE void 
 vcache_line( struct vcache_frontend *vcache,
              unsigned i0,
@@ -176,65 +179,86 @@ vcache_point( struct vcache_frontend *vcache,
    vcache_check_flush(vcache);
 }
 
-static INLINE void 
-vcache_quad( struct vcache_frontend *vcache,
-             unsigned i0,
-             unsigned i1,
-             unsigned i2,
-             unsigned i3 )
+
+static INLINE void
+vcache_line_adj_flags( struct vcache_frontend *vcache,
+                       unsigned flags,
+                       unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
 {
-   vcache_triangle( vcache, i0, i1, i3 );
-   vcache_triangle( vcache, i1, i2, i3 );
+   vcache_elt(vcache, a0, 0);
+   vcache_elt(vcache, i0, flags);
+   vcache_elt(vcache, i1, 0);
+   vcache_elt(vcache, a1, 0);
+   vcache_check_flush(vcache);
 }
 
-static INLINE void 
-vcache_ef_quad( struct vcache_frontend *vcache,
-                unsigned i0,
-                unsigned i1,
-                unsigned i2,
-                unsigned i3 )
+
+static INLINE void
+vcache_line_adj( struct vcache_frontend *vcache,
+                 unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
 {
-   if (vcache->draw->rasterizer->flatshade_first) {
-      vcache_triangle_flags( vcache,
-                             ( DRAW_PIPE_RESET_STIPPLE |
-                               DRAW_PIPE_EDGE_FLAG_0 |
-                               DRAW_PIPE_EDGE_FLAG_1 ),
-                             i0, i1, i2 );
-
-      vcache_triangle_flags( vcache,
-                             ( DRAW_PIPE_EDGE_FLAG_2 |
-                               DRAW_PIPE_EDGE_FLAG_1 ),
-                             i0, i2, i3 );
-   }
-   else {
-      vcache_triangle_flags( vcache,
-                             ( DRAW_PIPE_RESET_STIPPLE |
-                               DRAW_PIPE_EDGE_FLAG_0 |
-                               DRAW_PIPE_EDGE_FLAG_2 ),
-                             i0, i1, i3 );
-
-      vcache_triangle_flags( vcache,
-                             ( DRAW_PIPE_EDGE_FLAG_0 |
-                               DRAW_PIPE_EDGE_FLAG_1 ),
-                             i1, i2, i3 );
-   }
+   vcache_elt(vcache, a0, 0);
+   vcache_elt(vcache, i0, 0);
+   vcache_elt(vcache, i1, 0);
+   vcache_elt(vcache, a1, 0);
+   vcache_check_flush(vcache);
 }
 
+
+static INLINE void
+vcache_triangle_adj_flags( struct vcache_frontend *vcache,
+                           unsigned flags,
+                           unsigned i0, unsigned a0,
+                           unsigned i1, unsigned a1,
+                           unsigned i2, unsigned a2 )
+{
+   vcache_elt(vcache, i0, flags);
+   vcache_elt(vcache, a0, 0);
+   vcache_elt(vcache, i1, 0);
+   vcache_elt(vcache, a1, 0);
+   vcache_elt(vcache, i2, 0);
+   vcache_elt(vcache, a2, 0);
+   vcache_check_flush(vcache);
+}
+
+
+static INLINE void
+vcache_triangle_adj( struct vcache_frontend *vcache,
+                     unsigned i0, unsigned a0,
+                     unsigned i1, unsigned a1,
+                     unsigned i2, unsigned a2 )
+{
+   vcache_elt(vcache, i0, 0);
+   vcache_elt(vcache, a0, 0);
+   vcache_elt(vcache, i1, 0);
+   vcache_elt(vcache, a1, 0);
+   vcache_elt(vcache, i2, 0);
+   vcache_elt(vcache, a2, 0);
+   vcache_check_flush(vcache);
+}
+
+
 /* At least for now, we're back to using a template include file for
  * this.  The two paths aren't too different though - it may be
  * possible to reunify them.
  */
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3)        vcache_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1)        vcache_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0)                vcache_point(vc,i0)
+#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
+#define LINE(flags,i0,i1)        vcache_line_flags(vcache,flags,i0,i1)
+#define POINT(i0)                vcache_point(vcache,i0)
+#define LINE_ADJ(flags,a0,i0,i1,a1) \
+   vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
+#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
+   vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
 #define FUNC vcache_run_extras
 #include "draw_pt_vcache_tmp.h"
 
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3)        vcache_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1)        vcache_line(vc,i0,i1)
-#define POINT(vc,i0)                vcache_point(vc,i0)
+#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
+#define LINE(flags,i0,i1)        vcache_line(vcache,i0,i1)
+#define POINT(i0)                vcache_point(vcache,i0)
+#define LINE_ADJ(flags,a0,i0,i1,a1) \
+   vcache_line_adj(vcache,a0,i0,i1,a1)
+#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
+   vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
 #define FUNC vcache_run
 #include "draw_pt_vcache_tmp.h"
 
@@ -245,23 +269,23 @@ rebase_uint_elts( const unsigned *src,
                   ushort *dest )
 {
    unsigned i;
-
    for (i = 0; i < count; i++) 
       dest[i] = (ushort)(src[i] + delta);
 }
 
+
 static INLINE void 
 rebase_ushort_elts( const ushort *src,
                     unsigned count,
                     int delta,
-                                ushort *dest )
+                    ushort *dest )
 {
    unsigned i;
-
    for (i = 0; i < count; i++) 
       dest[i] = (ushort)(src[i] + delta);
 }
 
+
 static INLINE void 
 rebase_ubyte_elts( const ubyte *src,
                    unsigned count,
@@ -269,42 +293,39 @@ rebase_ubyte_elts( const ubyte *src,
                    ushort *dest )
 {
    unsigned i;
-
    for (i = 0; i < count; i++) 
       dest[i] = (ushort)(src[i] + delta);
 }
 
 
-
 static INLINE void 
 translate_uint_elts( const unsigned *src,
                      unsigned count,
                      ushort *dest )
 {
    unsigned i;
-
    for (i = 0; i < count; i++) 
       dest[i] = (ushort)(src[i]);
 }
 
+
 static INLINE void 
 translate_ushort_elts( const ushort *src,
                        unsigned count,
                        ushort *dest )
 {
    unsigned i;
-
    for (i = 0; i < count; i++) 
       dest[i] = (ushort)(src[i]);
 }
 
+
 static INLINE void 
 translate_ubyte_elts( const ubyte *src,
                       unsigned count,
                       ushort *dest )
 {
    unsigned i;
-
    for (i = 0; i < count; i++) 
       dest[i] = (ushort)(src[i]);
 }
@@ -325,6 +346,26 @@ format_from_get_elt( pt_elt_func get_elt )
 }
 #endif
 
+
+/**
+ * Check if any vertex attributes use instance divisors.
+ * Note that instance divisors complicate vertex fetching so we need
+ * to take the vcache path when they're in use.
+ */
+static boolean
+any_instance_divisors(const struct draw_context *draw)
+{
+   uint i;
+
+   for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+      uint div = draw->pt.vertex_element[i].instance_divisor;
+      if (div)
+         return TRUE;
+   }
+   return FALSE;
+}
+
+
 static INLINE void 
 vcache_check_run( struct draw_pt_front_end *frontend, 
                   pt_elt_func get_elt,
@@ -334,45 +375,73 @@ vcache_check_run( struct draw_pt_front_end *frontend,
 {
    struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; 
    struct draw_context *draw = vcache->draw;
-   unsigned min_index = draw->pt.user.min_index;
-   unsigned max_index = draw->pt.user.max_index;
-   unsigned index_size = draw->pt.user.eltSize;
-   unsigned fetch_count = max_index + 1 - min_index;
+   const unsigned min_index = draw->pt.user.min_index;
+   const unsigned max_index = draw->pt.user.max_index;
+   const unsigned index_size = draw->pt.user.eltSize;
+   unsigned fetch_count;
    const ushort *transformed_elts;
    ushort *storage = NULL;
    boolean ok = FALSE;
 
+   /* debug: verify indexes are in range [min_index, max_index] */
+   if (0) {
+      unsigned i;
+      for (i = 0; i < draw_count; i++) {
+         if (index_size == 1) {
+            assert( ((const ubyte *) elts)[i] >= min_index);
+            assert( ((const ubyte *) elts)[i] <= max_index);
+         }
+         else if (index_size == 2) {
+            assert( ((const ushort *) elts)[i] >= min_index);
+            assert( ((const ushort *) elts)[i] <= max_index);
+         }
+         else {
+            assert(index_size == 4);
+            assert( ((const uint *) elts)[i] >= min_index);
+            assert( ((const uint *) elts)[i] <= max_index);
+         }
+      }
+   }
+
+   /* Note: max_index is frequently 0xffffffff so we have to be sure
+    * that any arithmetic involving max_index doesn't overflow!
+    */
+   if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
+      goto fail;
+
+   if (any_instance_divisors(draw))
+      goto fail;
+
+   fetch_count = max_index + 1 - min_index;
+
+   if (0)
+      debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, 
+                   vcache->fetch_max,
+                   draw_count);
 
-   if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, 
-                       vcache->fetch_max,
-                       draw_count);
-      
    if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
        fetch_count >= UNDEFINED_VERTEX_ID ||
        fetch_count > draw_count) {
       if (0) debug_printf("fail\n");
       goto fail;
    }
-      
+
    if (vcache->middle_prim != vcache->input_prim) {
       vcache->middle_prim = vcache->input_prim;
-      vcache->middle->prepare( vcache->middle, 
-                               vcache->middle_prim, 
-                               vcache->opt, 
+      vcache->middle->prepare( vcache->middle,
+                               vcache->middle_prim,
+                               vcache->opt,
                                &vcache->fetch_max );
    }
 
-
    assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
           (elt_bias <  0 && min_index + elt_bias <  min_index));
 
    if (min_index == 0 &&
-       index_size == 2)
-   {
+       index_size == 2) {
       transformed_elts = (const ushort *)elts;
    }
-   else 
-   {
+   else {
       storage = MALLOC( draw_count * sizeof(ushort) );
       if (!storage)
          goto fail;
@@ -407,23 +476,23 @@ vcache_check_run( struct draw_pt_front_end *frontend,
          switch(index_size) {
          case 1:
             rebase_ubyte_elts( (const ubyte *)elts,
-                                  draw_count,
-                                  0 - (int)min_index,
-                                  storage );
+                               draw_count,
+                               0 - (int)min_index,
+                               storage );
             break;
 
          case 2:
             rebase_ushort_elts( (const ushort *)elts,
-                                   draw_count,
-                                   0 - (int)min_index,
-                                   storage );
+                                draw_count,
+                                0 - (int)min_index,
+                                storage );
             break;
 
          case 4:
             rebase_uint_elts( (const uint *)elts,
-                                 draw_count,
-                                 0 - (int)min_index,
-                                 storage );
+                              draw_count,
+                              0 - (int)min_index,
+                              storage );
             break;
 
          default:
@@ -450,32 +519,47 @@ vcache_check_run( struct draw_pt_front_end *frontend,
    debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
                 fetch_count, draw_count);
 
- fail:
+fail:
    vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
 }
 
 
 
 
-static void 
+static void
 vcache_prepare( struct draw_pt_front_end *frontend,
-                unsigned prim,
+                unsigned in_prim,
                 struct draw_pt_middle_end *middle,
                 unsigned opt )
 {
    struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
 
-   if (opt & PT_PIPELINE)
-   {
+   if (opt & PT_PIPELINE) {
       vcache->base.run = vcache_run_extras;
    }
-   else 
-   {
+   else {
       vcache->base.run = vcache_check_run;
    }
 
-   vcache->input_prim = prim;
-   vcache->output_prim = u_reduced_prim(prim);
+   /* VCache will always emit the reduced version of its input
+    * primitive, ie STRIP/FANS become TRIS, etc.
+    *
+    * This is not to be confused with what the GS might be up to,
+    * which is a separate issue.
+    */
+   vcache->input_prim = in_prim;
+   switch (in_prim) {
+   case PIPE_PRIM_LINES_ADJACENCY:
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
+      break;
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
+      break;
+   default:
+      vcache->output_prim = u_reduced_prim(in_prim);
+   }
 
    vcache->middle = middle;
    vcache->opt = opt;
@@ -483,11 +567,13 @@ vcache_prepare( struct draw_pt_front_end *frontend,
    /* Have to run prepare here, but try and guess a good prim for
     * doing so:
     */
-   vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
-   middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max );
-}
-
+   vcache->middle_prim = (opt & PT_PIPELINE)
+      ? vcache->output_prim : vcache->input_prim;
 
+   middle->prepare( middle,
+                    vcache->middle_prim,
+                    opt, &vcache->fetch_max );
+}
 
 
 static void 
@@ -498,6 +584,7 @@ vcache_finish( struct draw_pt_front_end *frontend )
    vcache->middle = NULL;
 }
 
+
 static void 
 vcache_destroy( struct draw_pt_front_end *frontend )
 {