draw: add stream output decomposition file
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_vcache.c
index 2eafe270bc0a688724fef8290e4d34a5a8aaeabf..914c87a9dc485a38c8d63c4a6a7f51d8431a8b57 100644 (file)
@@ -30,7 +30,8 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "pipe/p_util.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_pt.h"
@@ -53,15 +54,28 @@ struct vcache_frontend {
    unsigned draw_count;
    unsigned fetch_count;
    unsigned fetch_max;
-   
+
    struct draw_pt_middle_end *middle;
 
    unsigned input_prim;
    unsigned output_prim;
+
+   unsigned middle_prim;
+   unsigned opt;
 };
 
-static void vcache_flush( struct vcache_frontend *vcache )
+static INLINE void
+vcache_flush( struct vcache_frontend *vcache )
 {
+   if (vcache->middle_prim != vcache->output_prim) {
+      vcache->middle_prim = vcache->output_prim;
+      vcache->middle->prepare( vcache->middle,
+                               vcache->input_prim,
+                               vcache->middle_prim,
+                               vcache->opt,
+                               &vcache->fetch_max );
+   }
+
    if (vcache->draw_count) {
       vcache->middle->run( vcache->middle,
                            vcache->fetch_elts,
@@ -75,7 +89,8 @@ static void vcache_flush( struct vcache_frontend *vcache )
    vcache->draw_count = 0;
 }
 
-static void vcache_check_flush( struct vcache_frontend *vcache )
+static INLINE void 
+vcache_check_flush( struct vcache_frontend *vcache )
 {
    if ( vcache->draw_count + 6 >= DRAW_MAX ||
         vcache->fetch_count + 4 >= FETCH_MAX )
@@ -85,9 +100,10 @@ static void vcache_check_flush( struct vcache_frontend *vcache )
 }
 
 
-static INLINE void vcache_elt( struct vcache_frontend *vcache,
-                               unsigned felt,
-                               ushort flags )
+static INLINE void 
+vcache_elt( struct vcache_frontend *vcache,
+            unsigned felt,
+            ushort flags )
 {
    unsigned idx = felt % CACHE_MAX;
 
@@ -104,10 +120,11 @@ static INLINE void vcache_elt( struct vcache_frontend *vcache,
 
 
                    
-static void vcache_triangle( struct vcache_frontend *vcache,
-                             unsigned i0,
-                             unsigned i1,
-                             unsigned i2 )
+static INLINE void 
+vcache_triangle( struct vcache_frontend *vcache,
+                 unsigned i0,
+                 unsigned i1,
+                 unsigned i2 )
 {
    vcache_elt(vcache, i0, 0);
    vcache_elt(vcache, i1, 0);
@@ -116,11 +133,12 @@ static void vcache_triangle( struct vcache_frontend *vcache,
 }
 
                          
-static void vcache_triangle_flags( struct vcache_frontend *vcache,
-                                   ushort flags,
-                                   unsigned i0,
-                                   unsigned i1,
-                                   unsigned i2 )
+static INLINE void 
+vcache_triangle_flags( struct vcache_frontend *vcache,
+                       ushort flags,
+                       unsigned i0,
+                       unsigned i1,
+                       unsigned i2 )
 {
    vcache_elt(vcache, i0, flags);
    vcache_elt(vcache, i1, 0);
@@ -128,9 +146,10 @@ static void vcache_triangle_flags( struct vcache_frontend *vcache,
    vcache_check_flush(vcache);
 }
 
-static void vcache_line( struct vcache_frontend *vcache,
-                         unsigned i0,
-                         unsigned i1 )
+static INLINE void 
+vcache_line( struct vcache_frontend *vcache,
+             unsigned i0,
+             unsigned i1 )
 {
    vcache_elt(vcache, i0, 0);
    vcache_elt(vcache, i1, 0);
@@ -138,10 +157,11 @@ static void vcache_line( struct vcache_frontend *vcache,
 }
 
 
-static void vcache_line_flags( struct vcache_frontend *vcache,
-                               ushort flags,
-                               unsigned i0,
-                               unsigned i1 )
+static INLINE void 
+vcache_line_flags( struct vcache_frontend *vcache,
+                   ushort flags,
+                   unsigned i0,
+                   unsigned i1 )
 {
    vcache_elt(vcache, i0, flags);
    vcache_elt(vcache, i1, 0);
@@ -149,39 +169,66 @@ static void vcache_line_flags( struct vcache_frontend *vcache,
 }
 
 
-static void vcache_point( struct vcache_frontend *vcache,
-                          unsigned i0 )
+static INLINE void 
+vcache_point( struct vcache_frontend *vcache,
+              unsigned i0 )
 {
    vcache_elt(vcache, i0, 0);
    vcache_check_flush(vcache);
 }
 
-static void vcache_quad( struct vcache_frontend *vcache,
-                         unsigned i0,
-                         unsigned i1,
-                         unsigned i2,
-                         unsigned i3 )
+static INLINE void 
+vcache_quad( struct vcache_frontend *vcache,
+             unsigned i0,
+             unsigned i1,
+             unsigned i2,
+             unsigned i3 )
 {
-   vcache_triangle( vcache, i0, i1, i3 );
-   vcache_triangle( vcache, i1, i2, i3 );
+   if (vcache->draw->rasterizer->flatshade_first) {
+      /* pass last quad vertex as first triangle vertex */
+      vcache_triangle( vcache, i3, i0, i1 );
+      vcache_triangle( vcache, i3, i1, i2 );
+   }
+   else {
+      /* pass last quad vertex as last triangle vertex */
+      vcache_triangle( vcache, i0, i1, i3 );
+      vcache_triangle( vcache, i1, i2, i3 );
+   }
 }
 
-static void vcache_ef_quad( struct vcache_frontend *vcache,
-                            unsigned i0,
-                            unsigned i1,
-                            unsigned i2,
-                            unsigned i3 )
+static INLINE void 
+vcache_ef_quad( struct vcache_frontend *vcache,
+                unsigned i0,
+                unsigned i1,
+                unsigned i2,
+                unsigned i3 )
 {
-   vcache_triangle_flags( vcache,
-                          ( DRAW_PIPE_RESET_STIPPLE |
-                            DRAW_PIPE_EDGE_FLAG_0 |
-                            DRAW_PIPE_EDGE_FLAG_2 ),
-                          i0, i1, i3 );
-
-   vcache_triangle_flags( vcache,
-                          ( DRAW_PIPE_EDGE_FLAG_0 |
-                            DRAW_PIPE_EDGE_FLAG_1 ),
-                          i1, i2, i3 );
+   if (vcache->draw->rasterizer->flatshade_first) {
+      /* pass last quad vertex as first triangle vertex */
+      vcache_triangle_flags( vcache,
+                             ( DRAW_PIPE_RESET_STIPPLE |
+                               DRAW_PIPE_EDGE_FLAG_0 |
+                               DRAW_PIPE_EDGE_FLAG_1 ),
+                             i3, i0, i1 );
+
+      vcache_triangle_flags( vcache,
+                             ( DRAW_PIPE_EDGE_FLAG_1 |
+                               DRAW_PIPE_EDGE_FLAG_2 ),
+                             i3, i1, i2 );
+   }
+   else {
+      /* pass last quad vertex as last triangle vertex */
+      vcache_triangle_flags( vcache,
+                             ( DRAW_PIPE_RESET_STIPPLE |
+                               DRAW_PIPE_EDGE_FLAG_0 |
+                               DRAW_PIPE_EDGE_FLAG_2 ),
+                             i0, i1, i3 );
+
+      vcache_triangle_flags( vcache,
+                             ( DRAW_PIPE_EDGE_FLAG_0 |
+                               DRAW_PIPE_EDGE_FLAG_1 ),
+                             i1, i2, i3 );
+   }
 }
 
 /* At least for now, we're back to using a template include file for
@@ -202,10 +249,11 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
 #define FUNC vcache_run
 #include "draw_pt_vcache_tmp.h"
 
-static void rebase_uint_elts( const unsigned *src,
-                              unsigned count,
-                              int delta,
-                              ushort *dest )
+static INLINE void 
+rebase_uint_elts( const unsigned *src,
+                  unsigned count,
+                  int delta,
+                  ushort *dest )
 {
    unsigned i;
 
@@ -213,9 +261,10 @@ static void rebase_uint_elts( const unsigned *src,
       dest[i] = (ushort)(src[i] + delta);
 }
 
-static void rebase_ushort_elts( const ushort *src,
-                                unsigned count,
-                                int delta,
+static INLINE void 
+rebase_ushort_elts( const ushort *src,
+                    unsigned count,
+                    int delta,
                                 ushort *dest )
 {
    unsigned i;
@@ -224,10 +273,11 @@ static void rebase_ushort_elts( const ushort *src,
       dest[i] = (ushort)(src[i] + delta);
 }
 
-static void rebase_ubyte_elts( const ubyte *src,
-                               unsigned count,
-                               int delta,
-                               ushort *dest )
+static INLINE void 
+rebase_ubyte_elts( const ubyte *src,
+                   unsigned count,
+                   int delta,
+                   ushort *dest )
 {
    unsigned i;
 
@@ -237,9 +287,10 @@ static void rebase_ubyte_elts( const ubyte *src,
 
 
 
-static void translate_uint_elts( const unsigned *src,
-                                 unsigned count,
-                                 ushort *dest )
+static INLINE void 
+translate_uint_elts( const unsigned *src,
+                     unsigned count,
+                     ushort *dest )
 {
    unsigned i;
 
@@ -247,9 +298,10 @@ static void translate_uint_elts( const unsigned *src,
       dest[i] = (ushort)(src[i]);
 }
 
-static void translate_ushort_elts( const ushort *src,
-                                   unsigned count,
-                                   ushort *dest )
+static INLINE void 
+translate_ushort_elts( const ushort *src,
+                       unsigned count,
+                       ushort *dest )
 {
    unsigned i;
 
@@ -257,9 +309,10 @@ static void translate_ushort_elts( const ushort *src,
       dest[i] = (ushort)(src[i]);
 }
 
-static void translate_ubyte_elts( const ubyte *src,
-                                  unsigned count,
-                                  ushort *dest )
+static INLINE void 
+translate_ubyte_elts( const ubyte *src,
+                      unsigned count,
+                      ushort *dest )
 {
    unsigned i;
 
@@ -271,7 +324,8 @@ static void translate_ubyte_elts( const ubyte *src,
 
 
 #if 0
-static enum pipe_format format_from_get_elt( pt_elt_func get_elt )
+static INLINE enum pipe_format 
+format_from_get_elt( pt_elt_func get_elt )
 {
    switch (draw->pt.user.eltSize) {
    case 1: return PIPE_FORMAT_R8_UNORM;
@@ -282,10 +336,12 @@ static enum pipe_format format_from_get_elt( pt_elt_func get_elt )
 }
 #endif
 
-static void vcache_check_run( struct draw_pt_front_end *frontend, 
-                              pt_elt_func get_elt,
-                              const void *elts,
-                              unsigned draw_count )
+static INLINE void 
+vcache_check_run( struct draw_pt_front_end *frontend, 
+                  pt_elt_func get_elt,
+                  const void *elts,
+                  int elt_bias,
+                  unsigned draw_count )
 {
    struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; 
    struct draw_context *draw = vcache->draw;
@@ -295,22 +351,35 @@ static void vcache_check_run( struct draw_pt_front_end *frontend,
    unsigned fetch_count = max_index + 1 - min_index;
    const ushort *transformed_elts;
    ushort *storage = NULL;
+   boolean ok = FALSE;
 
 
    if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, 
                        vcache->fetch_max,
                        draw_count);
-      
-   if (max_index == 0xffffffff ||
-       fetch_count >= vcache->fetch_max ||
+
+   if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
+       fetch_count >= UNDEFINED_VERTEX_ID ||
        fetch_count > draw_count) {
       if (0) debug_printf("fail\n");
       goto fail;
    }
-      
+
+   if (vcache->middle_prim != vcache->input_prim) {
+      vcache->middle_prim = vcache->input_prim;
+      vcache->middle->prepare( vcache->middle,
+                               vcache->input_prim,
+                               vcache->middle_prim,
+                               vcache->opt,
+                               &vcache->fetch_max );
+   }
+
+
+   assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
+          (elt_bias <  0 && min_index + elt_bias <  min_index));
 
    if (min_index == 0 &&
-       index_size == 2) 
+       index_size == 2)
    {
       transformed_elts = (const ushort *)elts;
    }
@@ -342,6 +411,7 @@ static void vcache_check_run( struct draw_pt_front_end *frontend,
 
          default:
             assert(0);
+            FREE(storage);
             return;
          }
       }
@@ -370,32 +440,41 @@ static void vcache_check_run( struct draw_pt_front_end *frontend,
 
          default:
             assert(0);
+            FREE(storage);
             return;
          }
       }
       transformed_elts = storage;
    }
 
-   vcache->middle->run_linear_elts( vcache->middle,
-                                    min_index, /* start */
-                                    fetch_count,
-                                    transformed_elts,
-                                    draw_count );
-
+   if (fetch_count < UNDEFINED_VERTEX_ID)
+      ok = vcache->middle->run_linear_elts( vcache->middle,
+                                            min_index + elt_bias, /* start */
+                                            fetch_count,
+                                            transformed_elts,
+                                            draw_count );
+   
    FREE(storage);
-   return;
+
+   if (ok)
+      return;
+
+   debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
+                fetch_count, draw_count);
 
  fail:
-   vcache_run( frontend, get_elt, elts, draw_count );
+   vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
 }
 
 
 
 
-static void vcache_prepare( struct draw_pt_front_end *frontend,
-                            unsigned prim,
-                            struct draw_pt_middle_end *middle,
-                           unsigned opt )
+static void
+vcache_prepare( struct draw_pt_front_end *frontend,
+                unsigned in_prim,
+                unsigned out_prim,
+                struct draw_pt_middle_end *middle,
+                unsigned opt )
 {
    struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
 
@@ -403,29 +482,38 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
    {
       vcache->base.run = vcache_run_extras;
    }
-   else 
+   else
    {
       vcache->base.run = vcache_check_run;
    }
 
-   vcache->input_prim = prim;
-   vcache->output_prim = draw_pt_reduced_prim(prim);
+   vcache->input_prim = in_prim;
+   vcache->output_prim = u_reduced_prim(out_prim);
 
    vcache->middle = middle;
-   middle->prepare( middle, vcache->output_prim, opt, &vcache->fetch_max );
+   vcache->opt = opt;
+
+   /* Have to run prepare here, but try and guess a good prim for
+    * doing so:
+    */
+   vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
+   middle->prepare( middle, vcache->input_prim,
+                    vcache->middle_prim, opt, &vcache->fetch_max );
 }
 
 
 
 
-static void vcache_finish( struct draw_pt_front_end *frontend )
+static void 
+vcache_finish( struct draw_pt_front_end *frontend )
 {
    struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
    vcache->middle->finish( vcache->middle );
    vcache->middle = NULL;
 }
 
-static void vcache_destroy( struct draw_pt_front_end *frontend )
+static void 
+vcache_destroy( struct draw_pt_front_end *frontend )
 {
    FREE(frontend);
 }