draw: draw_range_elements trial
authorKeith Whitwell <keith@tungstengraphics.com>
Thu, 29 May 2008 10:46:43 +0000 (11:46 +0100)
committerKeith Whitwell <keith@tungstengraphics.com>
Thu, 29 May 2008 10:48:04 +0000 (11:48 +0100)
13 files changed:
src/gallium/auxiliary/draw/draw_context.c
src/gallium/auxiliary/draw/draw_context.h
src/gallium/auxiliary/draw/draw_private.h
src/gallium/auxiliary/draw/draw_pt.h
src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
src/gallium/auxiliary/draw/draw_pt_vcache.c
src/gallium/drivers/softpipe/sp_context.c
src/gallium/drivers/softpipe/sp_draw_arrays.c
src/gallium/drivers/softpipe/sp_state.h
src/gallium/include/pipe/p_context.h
src/mesa/state_tracker/st_draw.c

index 8509baf8654772360764b45c2724508c88b698b2..bcec85c2ef88fbb26bfb76591a6bec4d52346d43 100644 (file)
@@ -348,14 +348,30 @@ void draw_set_edgeflags( struct draw_context *draw,
  * \param elements  the element buffer ptr
  */
 void
-draw_set_mapped_element_buffer( struct draw_context *draw,
-                                unsigned eltSize, void *elements )
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+                                      unsigned eltSize,
+                                      unsigned min_index,
+                                      unsigned max_index,
+                                      void *elements )
 {
    draw->pt.user.elts = elements;
    draw->pt.user.eltSize = eltSize;
+   draw->pt.user.min_index = min_index;
+   draw->pt.user.max_index = max_index;
 }
 
 
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+                                unsigned eltSize,
+                                void *elements )
+{
+   draw->pt.user.elts = elements;
+   draw->pt.user.eltSize = eltSize;
+   draw->pt.user.min_index = 0;
+   draw->pt.user.max_index = 0xffffffff;
+}
+
  
 /* Revamp me please:
  */
index c5c3d3b09e04ebe91c343f2fa4473a3ac85e8c81..8dd03cb79eff855ada8d538fb6918dfe83645baa 100644 (file)
@@ -118,8 +118,16 @@ void draw_set_vertex_elements(struct draw_context *draw,
                              unsigned count,
                               const struct pipe_vertex_element *elements);
 
+void
+draw_set_mapped_element_buffer_range( struct draw_context *draw,
+                                      unsigned eltSize,
+                                      unsigned min_index,
+                                      unsigned max_index,
+                                      void *elements );
+
 void draw_set_mapped_element_buffer( struct draw_context *draw,
-                                     unsigned eltSize, void *elements );
+                                     unsigned eltSize, 
+                                     void *elements );
 
 void draw_set_mapped_vertex_buffer(struct draw_context *draw,
                                    unsigned attr, const void *buffer);
index 4cbccc8b5bb22462e3939082fdde29b94e5c9584..40f1d978f211fd422368b8c11d31bec93635e500 100644 (file)
@@ -147,6 +147,8 @@ struct draw_context
          const void *elts;
          /** bytes per index (0, 1, 2 or 4) */
          unsigned eltSize;
+         unsigned min_index;
+         unsigned max_index;
          
          /** vertex arrays */
          const void *vbuffer[PIPE_MAX_ATTRIBS];
index e03816ebbc75b174c165c700929302ee5c1d2428..6b8ba1d171bd94d589880bd54c68e7f27ccb8a21 100644 (file)
@@ -96,6 +96,15 @@ struct draw_pt_middle_end {
                       unsigned start,
                       unsigned count);
 
+   /* Transform all vertices in a linear range and then draw them with
+    * the supplied element list.
+    */
+   void (*run_linear_elts)( struct draw_pt_middle_end *,
+                            unsigned fetch_start,
+                            unsigned fetch_count,
+                            const ushort *draw_elts,
+                            unsigned draw_count );
+
    void (*finish)( struct draw_pt_middle_end * );
    void (*destroy)( struct draw_pt_middle_end * );
 };
index a1d041a74f50d388eae78617916782efb81f3d35..09bdc5fb5e1fa982fa54cae2cfead106b7739e07 100644 (file)
@@ -311,6 +311,53 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
 }
 
 
+static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
+                                        unsigned start,
+                                        unsigned count,
+                                        const ushort *draw_elts,
+                                        unsigned draw_count )
+{
+   struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+   struct draw_context *draw = feme->draw;
+   void *hw_verts;
+
+   /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+    */
+   draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+   hw_verts = draw->render->allocate_vertices( draw->render,
+                                               (ushort)feme->translate->key.output_stride,
+                                               (ushort)count );
+   if (!hw_verts) {
+      assert(0);
+      return;
+   }
+
+   /* Single routine to fetch vertices and emit HW verts.
+    */
+   feme->translate->run( feme->translate,
+                         start,
+                         count,
+                         hw_verts );
+
+   /* XXX: Draw arrays path to avoid re-emitting index list again and
+    * again.
+    */
+   draw->render->draw( draw->render, 
+                       draw_elts, 
+                       draw_count );
+
+   /* Done -- that was easy, wasn't it:
+    */
+   draw->render->release_vertices( draw->render,
+                                   hw_verts,
+                                   feme->translate->key.output_stride,
+                                   count );
+
+}
+
+
+
 
 static void fetch_emit_finish( struct draw_pt_middle_end *middle )
 {
@@ -343,6 +390,7 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
    fetch_emit->base.prepare    = fetch_emit_prepare;
    fetch_emit->base.run        = fetch_emit_run;
    fetch_emit->base.run_linear = fetch_emit_run_linear;
+   fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts;
    fetch_emit->base.finish     = fetch_emit_finish;
    fetch_emit->base.destroy    = fetch_emit_destroy;
 
index 5265a131605a35a26a87490c7cc2f24575e87731..efa6dddbda88d154246dc6f12c096f06b065aaf2 100644 (file)
@@ -310,6 +310,54 @@ fse_run(struct draw_pt_middle_end *middle,
 }
 
 
+
+static void fse_run_linear_elts( struct draw_pt_middle_end *middle, 
+                                 unsigned start, 
+                                 unsigned count,
+                                 const ushort *draw_elts,
+                                 unsigned draw_count )
+{
+   struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+   struct draw_context *draw = fse->draw;
+   unsigned alloc_count = align(count, 4);
+   char *hw_verts;
+
+   /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+    */
+   draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+   hw_verts = draw->render->allocate_vertices( draw->render,
+                                               (ushort)fse->key.output_stride,
+                                               (ushort)alloc_count );
+
+   if (!hw_verts) {
+      assert(0);
+      return;
+   }
+
+   /* Single routine to fetch vertices, run shader and emit HW verts.
+    * Clipping is done elsewhere -- either by the API or on hardware,
+    * or for some other reason not required...
+    */
+   fse->active->run_linear( fse->active, 
+                            start, count,
+                            hw_verts );
+
+
+   draw->render->draw( draw->render, 
+                       draw_elts, 
+                       draw_count );
+   
+
+
+   draw->render->release_vertices( draw->render, 
+                                  hw_verts, 
+                                  fse->key.output_stride, 
+                                  count );
+}
+
+
+
 static void fse_finish( struct draw_pt_middle_end *middle )
 {
 }
@@ -330,6 +378,7 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
    fse->base.prepare = fse_prepare;
    fse->base.run = fse_run;
    fse->base.run_linear = fse_run_linear;
+   fse->base.run_linear_elts = fse_run_linear_elts;
    fse->base.finish = fse_finish;
    fse->base.destroy = fse_destroy;
    fse->draw = draw;
index 06718779a5b3e054a0e7e26186195857d2fca7ba..c58a9008679c02147dbbf489aa087b6f407a8e85 100644 (file)
@@ -98,7 +98,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
 
 
 
-
 static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
                                 const unsigned *fetch_elts,
                                 unsigned fetch_count,
@@ -251,6 +250,84 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
 
 
 
+static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
+                                            unsigned start,
+                                            unsigned count,
+                                            const ushort *draw_elts,
+                                            unsigned draw_count )
+{
+   struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+   struct draw_context *draw = fpme->draw;
+   struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+   unsigned opt = fpme->opt;
+   unsigned alloc_count = align_int( count, 4 );
+
+   struct vertex_header *pipeline_verts =
+      (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+   if (!pipeline_verts) {
+      /* Not much we can do here - just skip the rendering.
+       */
+      assert(0);
+      return;
+   }
+
+   /* Fetch into our vertex buffer
+    */
+   draw_pt_fetch_run_linear( fpme->fetch,
+                             start,
+                             count,
+                             (char *)pipeline_verts );
+
+   /* Run the shader, note that this overwrites the data[] parts of
+    * the pipeline verts.  If there is no shader, ie a bypass shader,
+    * then the inputs == outputs, and are already in the correct
+    * place.
+    */
+   if (opt & PT_SHADE)
+   {
+      shader->run_linear(shader,
+                        (const float (*)[4])pipeline_verts->data,
+                        (      float (*)[4])pipeline_verts->data,
+                        (const float (*)[4])draw->pt.user.constants,
+                        count,
+                        fpme->vertex_size,
+                        fpme->vertex_size);
+   }
+
+   if (draw_pt_post_vs_run( fpme->post_vs,
+                           pipeline_verts,
+                           count,
+                           fpme->vertex_size ))
+   {
+      opt |= PT_PIPELINE;
+   }
+
+   /* Do we need to run the pipeline?
+    */
+   if (opt & PT_PIPELINE) {
+      draw_pipeline_run( fpme->draw,
+                         fpme->prim,
+                         pipeline_verts,
+                         count,
+                         fpme->vertex_size,
+                         draw_elts,
+                         draw_count );
+   }
+   else {
+      draw_pt_emit( fpme->emit,
+                   (const float (*)[4])pipeline_verts->data,
+                   count,
+                   fpme->vertex_size,
+                   draw_elts,
+                   draw_count );
+   }
+
+   FREE(pipeline_verts);
+}
+
+
+
 static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
 {
    /* nothing to do */
@@ -282,6 +359,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
    fpme->base.prepare        = fetch_pipeline_prepare;
    fpme->base.run            = fetch_pipeline_run;
    fpme->base.run_linear     = fetch_pipeline_linear_run;
+   fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts;
    fpme->base.finish         = fetch_pipeline_finish;
    fpme->base.destroy        = fetch_pipeline_destroy;
 
index 96e02fbf3a9178517d8a6b6ed1c88ec04e25d210..720b91b8e64283ff38c7db2d22dce2b403ab49e2 100644 (file)
@@ -36,8 +36,8 @@
 #include "draw/draw_pt.h"
 
 
-#define CACHE_MAX 32
-#define FETCH_MAX 128
+#define CACHE_MAX 1024
+#define FETCH_MAX 4096
 #define DRAW_MAX (16*1024)
 
 struct vcache_frontend {
@@ -201,7 +201,124 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
 #define FUNC vcache_run
 #include "draw_pt_vcache_tmp.h"
 
+static void translate_uint_elts( const unsigned *src,
+                                 unsigned count,
+                                 int delta,
+                                 ushort *dest )
+{
+   unsigned i;
+
+   for (i = 0; i < count; i++) 
+      dest[i] = (ushort)(src[i] + delta);
+}
+
+static void translate_ushort_elts( const ushort *src,
+                                   unsigned count,
+                                   int delta,
+                                   ushort *dest )
+{
+   unsigned i;
+
+   for (i = 0; i < count; i++) 
+      dest[i] = (ushort)(src[i] + delta);
+}
 
+static void translate_ubyte_elts( const ubyte *src,
+                                  unsigned count,
+                                  int delta,
+                                  ushort *dest )
+{
+   unsigned i;
+
+   for (i = 0; i < count; i++) 
+      dest[i] = (ushort)(src[i] + delta);
+}
+
+#if 0
+static enum pipe_format format_from_get_elt( pt_elt_func get_elt )
+{
+   switch (draw->pt.user.eltSize) {
+   case 1: return PIPE_FORMAT_R8_UNORM;
+   case 2: return PIPE_FORMAT_R16_UNORM;
+   case 4: return PIPE_FORMAT_R32_UNORM;
+   default: return PIPE_FORMAT_NONE;
+   }
+}
+#endif
+
+static void vcache_check_run( struct draw_pt_front_end *frontend, 
+                              pt_elt_func get_elt,
+                              const void *elts,
+                              unsigned draw_count )
+{
+   struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; 
+   struct draw_context *draw = vcache->draw;
+   unsigned min_index = draw->pt.user.min_index;
+   unsigned max_index = draw->pt.user.max_index;
+   unsigned index_size = draw->pt.user.eltSize;
+   unsigned fetch_count = MAX2(max_index, max_index + 1 - min_index);
+   const ushort *transformed_elts;
+   ushort *storage = NULL;
+
+   printf("fetch_count %x\n", fetch_count);
+      
+   if (fetch_count >= FETCH_MAX ||
+       fetch_count > draw_count) 
+      goto fail;
+      
+
+   if (min_index == 0 &&
+       index_size == 2) 
+   {
+      transformed_elts = (const ushort *)elts;
+   }
+   else 
+   {
+      storage = MALLOC( draw_count * sizeof(ushort) );
+      if (!storage)
+         goto fail;
+      
+      switch(index_size) {
+      case 1:
+         translate_ubyte_elts( (const ubyte *)elts,
+                               draw_count,
+                               0 - (int)min_index,
+                               storage );
+         break;
+
+      case 2:
+         translate_ushort_elts( (const ushort *)elts,
+                                draw_count,
+                                0 - (int)min_index,
+                                storage );
+         break;
+
+      case 4:
+         translate_uint_elts( (const uint *)elts,
+                              draw_count,
+                              0 - (int)min_index,
+                              storage );
+         break;
+
+      default:
+         assert(0);
+         return;
+      }
+      transformed_elts = storage;
+   }
+
+   vcache->middle->run_linear_elts( vcache->middle,
+                                    min_index, /* start */
+                                    fetch_count,
+                                    transformed_elts,
+                                    draw_count );
+
+   FREE(storage);
+   return;
+
+ fail:
+   vcache_run( frontend, get_elt, elts, draw_count );
+}
 
 
 
@@ -219,7 +336,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
    }
    else 
    {
-      vcache->base.run = vcache_run;
+      vcache->base.run = vcache_check_run;
    }
 
    vcache->input_prim = prim;
index 045a1f74a95cd45f1feacb7b829ebff30dabca4f..1e0106b86c740b1fe01a88fe069f6a7030799b09 100644 (file)
@@ -179,6 +179,7 @@ softpipe_create( struct pipe_screen *screen,
 
    softpipe->pipe.draw_arrays = softpipe_draw_arrays;
    softpipe->pipe.draw_elements = softpipe_draw_elements;
+   softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
    softpipe->pipe.set_edgeflags = softpipe_set_edgeflags;
 
 
index 6c58f9909da0573771f7137f7e0f8462c598f38e..dbecf6865f498c669564b646ac1c55ea0624a81d 100644 (file)
@@ -108,11 +108,14 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
  *
  * XXX should the element buffer be specified/bound with a separate function?
  */
+
 boolean
-softpipe_draw_elements(struct pipe_context *pipe,
-                       struct pipe_buffer *indexBuffer,
-                       unsigned indexSize,
-                       unsigned mode, unsigned start, unsigned count)
+softpipe_draw_range_elements(struct pipe_context *pipe,
+                             struct pipe_buffer *indexBuffer,
+                             unsigned indexSize,
+                             unsigned min_index,
+                             unsigned max_index,
+                             unsigned mode, unsigned start, unsigned count)
 {
    struct softpipe_context *sp = softpipe_context(pipe);
    struct draw_context *draw = sp->draw;
@@ -141,11 +144,14 @@ softpipe_draw_elements(struct pipe_context *pipe,
       void *mapped_indexes
          = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
                                     PIPE_BUFFER_USAGE_CPU_READ);
-      draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes);
+      draw_set_mapped_element_buffer_range(draw, indexSize,
+                                           min_index,
+                                           max_index,
+                                           mapped_indexes);
    }
    else {
       /* no index/element buffer */
-      draw_set_mapped_element_buffer(draw, 0, NULL);
+      draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
    }
 
 
@@ -171,6 +177,19 @@ softpipe_draw_elements(struct pipe_context *pipe,
    return TRUE;
 }
 
+boolean
+softpipe_draw_elements(struct pipe_context *pipe,
+                       struct pipe_buffer *indexBuffer,
+                       unsigned indexSize,
+                       unsigned mode, unsigned start, unsigned count)
+{
+   return softpipe_draw_range_elements( pipe, indexBuffer,
+                                        indexSize,
+                                        0, 0xffffffff,
+                                        mode, start, count );
+}
+
+
 
 void
 softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
index 452e51fa7912902d7f7550435ae97971d42a9cf9..701e02b295e2231e516bf62b5eb44c99cdee8e94 100644 (file)
@@ -171,6 +171,13 @@ boolean softpipe_draw_elements(struct pipe_context *pipe,
                               struct pipe_buffer *indexBuffer,
                               unsigned indexSize,
                               unsigned mode, unsigned start, unsigned count);
+boolean
+softpipe_draw_range_elements(struct pipe_context *pipe,
+                             struct pipe_buffer *indexBuffer,
+                             unsigned indexSize,
+                             unsigned min_index,
+                             unsigned max_index,
+                             unsigned mode, unsigned start, unsigned count);
 
 void
 softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
index 0f68f592f77072859276bc78c17147a7cbc8b4b0..faf112c6d6172aa594d65bffde92e051ffbfba77 100644 (file)
@@ -76,6 +76,20 @@ struct pipe_context {
                             struct pipe_buffer *indexBuffer,
                             unsigned indexSize,
                             unsigned mode, unsigned start, unsigned count);
+
+   /* XXX: this is (probably) a temporary entrypoint, as the range
+    * information should be available from the vertex_buffer state.
+    * Using this to quickly evaluate a specialized path in the draw
+    * module.
+    */
+   boolean (*draw_range_elements)( struct pipe_context *pipe,
+                                   struct pipe_buffer *indexBuffer,
+                                   unsigned indexSize,
+                                   unsigned minIndex,
+                                   unsigned maxIndex,
+                                   unsigned mode, 
+                                   unsigned start, 
+                                   unsigned count);
    /*@}*/
 
 
index a3bffbfc95bb054fcf2109193ab8e1d0c4855cdb..551860452ae3b108ef0acc1aa6c0c9ff4e6d4e02 100644 (file)
@@ -365,14 +365,33 @@ st_draw_vbo(GLcontext *ctx,
       }
 
       /* draw */
-      for (i = 0; i < nr_prims; i++) {
+      if (nr_prims == 1 && pipe->draw_range_elements != NULL) {
+         i = 0;
+
+         /* XXX: exercise temporary path to pass min/max directly
+          * through to driver & draw module.  These interfaces still
+          * need a bit of work...
+          */
          setup_edgeflags(ctx, prims[i].mode,
                          prims[i].start + indexOffset, prims[i].count,
                          arrays[VERT_ATTRIB_EDGEFLAG]);
 
-         pipe->draw_elements(pipe, indexBuf, indexSize,
-                             prims[i].mode,
-                             prims[i].start + indexOffset, prims[i].count);
+         pipe->draw_range_elements(pipe, indexBuf, indexSize,
+                                   min_index,
+                                   max_index,
+                                   prims[i].mode,
+                                   prims[i].start + indexOffset, prims[i].count);
+      }
+      else {
+         for (i = 0; i < nr_prims; i++) {
+            setup_edgeflags(ctx, prims[i].mode,
+                            prims[i].start + indexOffset, prims[i].count,
+                            arrays[VERT_ATTRIB_EDGEFLAG]);
+            
+            pipe->draw_elements(pipe, indexBuf, indexSize,
+                                prims[i].mode,
+                                prims[i].start + indexOffset, prims[i].count);
+         }
       }
 
       pipe_reference_buffer(pipe, &indexBuf, NULL);