Implement draw_arrays_instanced() in softpipe.
authorMichal Krol <michal@vmware.com>
Tue, 29 Dec 2009 22:21:01 +0000 (23:21 +0100)
committerMichal Krol <michal@vmware.com>
Tue, 29 Dec 2009 22:21:01 +0000 (23:21 +0100)
Modify the translate module to respect instance divisors and accept
instance id as a parameter to calculate input vertex offset.

17 files changed:
src/gallium/auxiliary/draw/draw_context.h
src/gallium/auxiliary/draw/draw_pipe_vbuf.c
src/gallium/auxiliary/draw/draw_private.h
src/gallium/auxiliary/draw/draw_pt.c
src/gallium/auxiliary/draw/draw_pt_emit.c
src/gallium/auxiliary/draw/draw_pt_fetch.c
src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
src/gallium/auxiliary/draw/draw_vs.h
src/gallium/auxiliary/draw/draw_vs_varient.c
src/gallium/auxiliary/tgsi/tgsi_dump.c
src/gallium/auxiliary/translate/translate.h
src/gallium/auxiliary/translate/translate_generic.c
src/gallium/auxiliary/translate/translate_sse.c
src/gallium/drivers/softpipe/sp_context.c
src/gallium/drivers/softpipe/sp_draw_arrays.c
src/gallium/drivers/softpipe/sp_state.h
src/gallium/drivers/svga/svga_state_vs.c

index 465b8f10c6c3562d5816beb4d4bf3660444c7a09..c0f6a614115069efed570e7e057a521832aefdd7 100644 (file)
@@ -151,6 +151,14 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw,
 void draw_arrays(struct draw_context *draw, unsigned prim,
                 unsigned start, unsigned count);
 
+void
+draw_arrays_instanced(struct draw_context *draw,
+                      unsigned mode,
+                      unsigned start,
+                      unsigned count,
+                      unsigned startInstance,
+                      unsigned instanceCount);
+
 void draw_flush(struct draw_context *draw);
 
 
index 1a5269c0de9393b126f06855f21290cdc945ccb6..bb8a8ff491b30aacf29b867464a0058a94646776 100644 (file)
@@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf,
       /* Note: we really do want data[0] here, not data[pos]: 
        */
       vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0);
-      vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr);
+      vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);
 
       if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
       
@@ -275,6 +275,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
       hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       hw_key.element[i].input_buffer = src_buffer;
       hw_key.element[i].input_offset = src_offset;
+      hw_key.element[i].instance_divisor = 0;
       hw_key.element[i].output_format = output_format;
       hw_key.element[i].output_offset = dst_offset;
 
index 3850cede1e87676cf94cc94a959498e9da56e59d..129d919a8466daa3cc281dfd0f1708cc5d096f0b 100644 (file)
@@ -226,6 +226,8 @@ struct draw_context
 
    unsigned reduced_prim;
 
+   unsigned instance_id;
+
    void *driver_private;
 };
 
index 2801dbafe4735b44246d406b310492b0bb190110..1217b9e5d784f326b71bf9f06f146ad334de8093 100644 (file)
@@ -312,5 +312,28 @@ draw_arrays(struct draw_context *draw, unsigned prim,
 #endif
 
    /* drawing done here: */
+   draw->instance_id = 0;
    draw_pt_arrays(draw, prim, start, count);
 }
+
+void
+draw_arrays_instanced(struct draw_context *draw,
+                      unsigned mode,
+                      unsigned start,
+                      unsigned count,
+                      unsigned startInstance,
+                      unsigned instanceCount)
+{
+   unsigned reduced_prim = u_reduced_prim(mode);
+   unsigned instance;
+
+   if (reduced_prim != draw->reduced_prim) {
+      draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+      draw->reduced_prim = reduced_prim;
+   }
+
+   for (instance = 0; instance < instanceCount; instance++) {
+      draw->instance_id = instance + startInstance;
+      draw_pt_arrays(draw, mode, start, count);
+   }
+}
index 064e16c295ca4df2e9c8dd7dbcc2fed36e849fe5..d0abeb93365cee10d22d88d3e753e14b5000e087 100644 (file)
@@ -125,6 +125,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
       hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       hw_key.element[i].input_buffer = src_buffer;
       hw_key.element[i].input_offset = src_offset;
+      hw_key.element[i].instance_divisor = 0;
       hw_key.element[i].output_format = output_format;
       hw_key.element[i].output_offset = dst_offset;
 
@@ -204,6 +205,7 @@ void draw_pt_emit( struct pt_emit *emit,
    translate->run( translate,
                   0, 
                   vertex_count,
+                   draw->instance_id,
                   hw_verts );
 
    render->unmap_vertices( render, 
@@ -263,6 +265,7 @@ void draw_pt_emit_linear(struct pt_emit *emit,
    translate->run(translate,
                   0,
                   count,
+                  draw->instance_id,
                   hw_verts);
 
    if (0) {
index 305bfef4352b002c60aa06879878d697e20b33b8..e8174a297116301dc62715893329afa714b76f02 100644 (file)
@@ -81,6 +81,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
       key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
       key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
       key.element[nr].input_offset = 0;
+      key.element[nr].instance_divisor = 0;
       key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
       key.element[nr].output_offset = dst_offset;
       dst_offset += 1 * sizeof(float);
@@ -100,6 +101,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
       key.element[nr].input_format = draw->pt.vertex_element[i].src_format;
       key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index;
       key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset;
+      key.element[nr].instance_divisor = draw->pt.vertex_element[i].instance_divisor;
       key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       key.element[nr].output_offset = dst_offset;
 
@@ -183,6 +185,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
    translate->run( translate,
                    start,
                    count,
+                   draw->instance_id,
                    verts );
 }
 
index e7fe6b3b7687af024f8112b6dfc0b025320de0ea..40bfc0fbb22795504da3603f9813aecce4a4a130 100644 (file)
@@ -169,6 +169,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
       key.element[i].input_format = input_format;
       key.element[i].input_buffer = input_buffer;
       key.element[i].input_offset = input_offset;
+      key.element[i].instance_divisor = src->instance_divisor;
       key.element[i].output_format = output_format;
       key.element[i].output_offset = dst_offset;
       
@@ -314,6 +315,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
    feme->translate->run( feme->translate,
                          start,
                          count,
+                         draw->instance_id,
                          hw_verts );
 
    if (0) {
@@ -374,6 +376,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
    feme->translate->run( feme->translate,
                          start,
                          count,
+                         draw->instance_id,
                          hw_verts );
 
    draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
index e3b807ebd0e1df50ffdf6fa3c4aec523fb9e0b68..00036cfe68ba7d15026a766046137c0d2abb96aa 100644 (file)
@@ -43,6 +43,7 @@ struct draw_varient_input
    enum pipe_format format;
    unsigned buffer;
    unsigned offset; 
+   unsigned instance_divisor;
 };
 
 struct draw_varient_output
index 7ee567d4789823fb1a0a645e8aa87cb997e85d0e..4cc080f8039f320355ec25ab7ffa3eda880d2620 100644 (file)
@@ -180,6 +180,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
 
    vsvg->emit->run( vsvg->emit,
                     0, count,
+                    vsvg->draw->instance_id,
                     output_buffer );
 
    FREE(temp_buffer);
@@ -202,6 +203,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
    vsvg->fetch->run( vsvg->fetch, 
                      start,
                      count,
+                     vsvg->draw->instance_id,
                      temp_buffer );
 
    vsvg->base.vs->run_linear( vsvg->base.vs, 
@@ -238,6 +240,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
    
    vsvg->emit->run( vsvg->emit,
                     0, count,
+                    vsvg->draw->instance_id,
                     output_buffer );
 
    FREE(temp_buffer);
@@ -283,6 +286,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
       fetch.element[i].input_format = key->element[i].in.format;
       fetch.element[i].input_buffer = key->element[i].in.buffer;
       fetch.element[i].input_offset = key->element[i].in.offset;
+      fetch.element[i].instance_divisor = 0;
       fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       fetch.element[i].output_offset = i * 4 * sizeof(float);
       assert(fetch.element[i].output_offset < fetch.output_stride);
@@ -297,6 +301,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
          emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
          emit.element[i].input_buffer = 0;
          emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
+         emit.element[i].instance_divisor = 0;
          emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
          emit.element[i].output_offset = key->element[i].out.offset;
          assert(emit.element[i].input_offset <= fetch.output_stride);
@@ -305,6 +310,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
          emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
          emit.element[i].input_buffer = 1;
          emit.element[i].input_offset = 0;
+         emit.element[i].instance_divisor = 0;
          emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
          emit.element[i].output_offset = key->element[i].out.offset;
       }
index 5e7e5d2ff9c9b31489385a68a3489ad15f82e21f..4391ca75d1d7afe5acb21b6c2f932b0858f8b06f 100644 (file)
@@ -122,7 +122,8 @@ static const char *semantic_names[] =
    "GENERIC",
    "NORMAL",
    "FACE",
-   "EDGEFLAG"
+   "EDGEFLAG",
+   "INSTANCEID"
 };
 
 static const char *immediate_type_names[] =
index 1afdf194b319e3fbe9f21852039949bcf987623c..fb298471b8bf29aeef71cfc82382fde385c9969c 100644 (file)
@@ -50,6 +50,7 @@ struct translate_element
    enum pipe_format output_format;
    unsigned input_buffer:8;
    unsigned input_offset:24;
+   unsigned instance_divisor;
    unsigned output_offset;
 };
 
@@ -79,6 +80,7 @@ struct translate {
    void (PIPE_CDECL *run)( struct translate *,
                            unsigned start,
                            unsigned count,
+                           unsigned instance_id,
                            void *output_buffer);
 };
 
index 266e7ee81e67fb81fd18084a2fc46a1ce90156f5..0fa99274099b31b07932e00a7ac3bcea968c7473 100644 (file)
@@ -49,6 +49,7 @@ struct translate_generic {
       fetch_func fetch;
       unsigned buffer;
       unsigned input_offset;
+      unsigned instance_divisor;
 
       emit_func emit;
       unsigned output_offset;
@@ -607,6 +608,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
 static void PIPE_CDECL generic_run( struct translate *translate,
                                     unsigned start,
                                     unsigned count,
+                                    unsigned instance_id,
                                     void *output_buffer )
 {
    struct translate_generic *tg = translate_generic(translate);
@@ -622,13 +624,20 @@ static void PIPE_CDECL generic_run( struct translate *translate,
 
       for (attr = 0; attr < nr_attrs; attr++) {
         float data[4];
-
-        const char *src = (tg->attrib[attr].input_ptr + 
-                           tg->attrib[attr].input_stride * elt);
+         const char *src;
 
         char *dst = (vert + 
                      tg->attrib[attr].output_offset);
 
+         if (tg->attrib[attr].instance_divisor) {
+            src = tg->attrib[attr].input_ptr +
+                  tg->attrib[attr].input_stride *
+                  (instance_id / tg->attrib[attr].instance_divisor);
+         } else {
+            src = tg->attrib[attr].input_ptr +
+                  tg->attrib[attr].input_stride * elt;
+         }
+
         tg->attrib[attr].fetch( src, data );
 
          if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
@@ -687,6 +696,7 @@ struct translate *translate_generic_create( const struct translate_key *key )
       tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
       tg->attrib[i].buffer = key->element[i].input_buffer;
       tg->attrib[i].input_offset = key->element[i].input_offset;
+      tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
 
       tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
       tg->attrib[i].output_offset = key->element[i].output_offset;
index b62db8d8f333f8c393bad373a1ba4eeb85f3c014..edd0be17f0bafe825a569f44d691adbe1f77dfa0 100644 (file)
@@ -637,6 +637,7 @@ static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
 static void PIPE_CDECL translate_sse_run( struct translate *translate,
                         unsigned start,
                         unsigned count,
+                         unsigned instance_id,
                         void *output_buffer )
 {
    struct translate_sse *p = (struct translate_sse *)translate;
index 2a33587b5a7fa4e9ee64a1c0a7ad0f884123eae3..406414ae3d2fbf59595e88576a7a43584a6be0c9 100644 (file)
@@ -238,6 +238,7 @@ softpipe_create( struct pipe_screen *screen )
    softpipe->pipe.draw_arrays = softpipe_draw_arrays;
    softpipe->pipe.draw_elements = softpipe_draw_elements;
    softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
+   softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced;
 
    softpipe->pipe.clear = softpipe_clear;
    softpipe->pipe.flush = softpipe_flush;
index 518ef8806e5681ff3eebf61b109618bd1e7354d4..6a593fb06a0a9eb0f3b255b82e4a7960b4b198de 100644 (file)
@@ -184,3 +184,54 @@ softpipe_draw_elements(struct pipe_context *pipe,
                                         0, 0xffffffff,
                                         mode, start, count );
 }
+
+boolean
+softpipe_draw_arrays_instanced(struct pipe_context *pipe,
+                               unsigned mode,
+                               unsigned start,
+                               unsigned count,
+                               unsigned startInstance,
+                               unsigned instanceCount)
+{
+   struct softpipe_context *sp = softpipe_context(pipe);
+   struct draw_context *draw = sp->draw;
+   unsigned i;
+
+   sp->reduced_api_prim = u_reduced_prim(mode);
+
+   if (sp->dirty) {
+      softpipe_update_derived(sp);
+   }
+
+   softpipe_map_transfers(sp);
+   softpipe_map_constant_buffers(sp);
+
+   /* Map vertex buffers */
+   for (i = 0; i < sp->num_vertex_buffers; i++) {
+      void *buf;
+
+      buf = pipe_buffer_map(pipe->screen,
+                            sp->vertex_buffer[i].buffer,
+                            PIPE_BUFFER_USAGE_CPU_READ);
+      draw_set_mapped_vertex_buffer(draw, i, buf);
+   }
+
+   draw_set_mapped_element_buffer_range(draw, 0, start,
+                                        start + count - 1, NULL);
+
+   /* draw! */
+   draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount);
+
+   /* unmap vertex/index buffers - will cause draw module to flush */
+   for (i = 0; i < sp->num_vertex_buffers; i++) {
+      draw_set_mapped_vertex_buffer(draw, i, NULL);
+      pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer);
+   }
+
+   /* Note: leave drawing surfaces mapped */
+   softpipe_unmap_constant_buffers(sp);
+
+   sp->dirty_render_cache = TRUE;
+
+   return TRUE;
+}
index 26d5c3fbb2fb0bd3bb3d62c03ffd52dcbfe248e2..13935fd799f918e7f0f902dd19d5323475d57af5 100644 (file)
@@ -189,6 +189,14 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
                              unsigned max_index,
                              unsigned mode, unsigned start, unsigned count);
 
+boolean
+softpipe_draw_arrays_instanced(struct pipe_context *pipe,
+                               unsigned mode,
+                               unsigned start,
+                               unsigned count,
+                               unsigned startInstance,
+                               unsigned instanceCount);
+
 void
 softpipe_map_transfers(struct softpipe_context *sp);
 
index 44b7ceb4fa407ae5657b1c9281336ce9b6792642..114de1a49e125f9089dfb37650bca2d9b13d7e6f 100644 (file)
@@ -198,6 +198,7 @@ static int update_zero_stride( struct svga_context *svga,
          key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
          key.element[0].input_buffer = vel->vertex_buffer_index;
          key.element[0].input_offset = vel->src_offset;
+         key.element[0].instance_divisor = vel->instance_divisor;
          key.element[0].output_offset = const_idx * 4 * sizeof(float);
 
          translate_key_sanitize(&key);