Merge branch 'mesa_7_7_branch'
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_emit.c
index 85d0bdfcab0752557065554526a0e04ebbe6fffe..c5dfbcfa3cb70802f2869a21db5ffc3e7a71642e 100644 (file)
@@ -31,7 +31,8 @@
   */
 
 
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 #include "draw/draw_context.h"
 #include "draw/draw_private.h"
 #include "draw/draw_vbuf.h"
@@ -39,7 +40,6 @@
 #include "draw/draw_pt.h"
 #include "draw/draw_vs.h"
 
-#include "translate/translate.h"
 
 struct fetch_shade_emit;
 
@@ -70,14 +70,15 @@ struct fetch_shade_emit {
                               
 static void fse_prepare( struct draw_pt_middle_end *middle,
                          unsigned prim, 
-                         unsigned opt )
+                         unsigned opt,
+                         unsigned *max_vertices )
 {
    struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
    struct draw_context *draw = fse->draw;
    unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
    const struct vertex_info *vinfo;
    unsigned i;
-   boolean need_psize = 0;
+   unsigned nr_vbs = 0;
    
 
    if (!draw->render->set_primitive( draw->render, 
@@ -99,9 +100,10 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
    fse->key.nr_elements = MAX2(fse->key.nr_outputs,     /* outputs - translate to hw format */
                                fse->key.nr_inputs);     /* inputs - fetch from api format */
 
-   fse->key.viewport = !draw->identity_viewport;
+   fse->key.viewport = (!draw->rasterizer->bypass_vs_clip_and_viewport &&
+                        !draw->identity_viewport);
    fse->key.clip = !draw->bypass_clipping;
-   fse->key.pad = 0;
+   fse->key.const_vbuffers = 0;
 
    memset(fse->key.element, 0, 
           fse->key.nr_elements * sizeof(fse->key.element[0]));
@@ -115,42 +117,39 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
        */
       fse->key.element[i].in.buffer = src->vertex_buffer_index;
       fse->key.element[i].in.offset = src->src_offset;
+      nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1);
    }
    
+   for (i = 0; i < 5 && i < nr_vbs; i++) {
+      if (draw->pt.vertex_buffer[i].stride == 0)
+         fse->key.const_vbuffers |= (1<<i);
+   }
 
+   if (0) debug_printf("%s: lookup const_vbuffers: %x\n", __FUNCTION__, fse->key.const_vbuffers);
+   
    {
       unsigned dst_offset = 0;
 
       for (i = 0; i < vinfo->num_attribs; i++) {
          unsigned emit_sz = 0;
-         unsigned output_format = PIPE_FORMAT_NONE;
-         unsigned vs_output = vinfo->src_index[i];
 
-         switch (vinfo->emit[i]) {
+         switch (vinfo->attrib[i].emit) {
          case EMIT_4F:
-            output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
             emit_sz = 4 * sizeof(float);
             break;
          case EMIT_3F:
-            output_format = PIPE_FORMAT_R32G32B32_FLOAT;
             emit_sz = 3 * sizeof(float);
             break;
          case EMIT_2F:
-            output_format = PIPE_FORMAT_R32G32_FLOAT;
             emit_sz = 2 * sizeof(float);
             break;
          case EMIT_1F:
-            output_format = PIPE_FORMAT_R32_FLOAT;
             emit_sz = 1 * sizeof(float);
             break;
          case EMIT_1F_PSIZE:
-            need_psize = 1;
-            output_format = PIPE_FORMAT_R32_FLOAT;
             emit_sz = 1 * sizeof(float);
-            vs_output = vinfo->num_attribs + 1;
             break;
          case EMIT_4UB:
-            output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
             emit_sz = 4 * sizeof(ubyte);
             break;
          default:
@@ -162,40 +161,16 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
           * numbers, not to positions in the hw vertex description --
           * that's handled by the output_offset field.
           */
-         fse->key.element[vs_output].out.format = output_format;
-         fse->key.element[vs_output].out.vs_output = vs_output;
-         fse->key.element[vs_output].out.offset = dst_offset;
+         fse->key.element[i].out.format = vinfo->attrib[i].emit;
+         fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index;
+         fse->key.element[i].out.offset = dst_offset;
       
          dst_offset += emit_sz;
          assert(fse->key.output_stride >= dst_offset);
       }
    }
 
-   /* To make psize work, really need to tell the vertex shader to
-    * copy that value from input->output.  For 'translate' this was
-    * implicit for all elements.
-    */
-#if 0
-   if (need_psize) {
-      unsigned input = num_vs_inputs + 1;
-      const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
-      fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
-      fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
-      fse->key.element[i].input_offset = 0; 
-
-      fse->key.nr_inputs += 1;
-      fse->key.nr_elements = MAX2(fse->key.nr_inputs,
-                                  fse->key.nr_outputs);
-      
-   }
-#endif
-
-   /* Would normally look up a vertex shader and peruse its list of
-    * varients somehow.  We omitted that step and put all the
-    * hardcoded "shaders" into an array.  We're just making the
-    * assumption that this happens to be a matching shader...  ie
-    * you're running isosurf, aren't you?
-    */
+   
    fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, 
                                          &fse->key );
 
@@ -204,56 +179,65 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
       return ;
    }
 
+   if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__, 
+                       fse->active->key.const_vbuffers);
+
    /* Now set buffer pointers:
     */
-   for (i = 0; i < num_vs_inputs; i++) {
-      unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
-
-      fse->active->set_input( fse->active, 
-                              i, 
-                              
-                              ((const ubyte *) draw->pt.user.vbuffer[buf] + 
-                               draw->pt.vertex_buffer[buf].buffer_offset),
-                              
-                              draw->pt.vertex_buffer[buf].pitch );
+   for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+      fse->active->set_buffer( fse->active, 
+                               i, 
+                               ((const ubyte *) draw->pt.user.vbuffer[i] + 
+                                draw->pt.vertex_buffer[i].buffer_offset),
+                              draw->pt.vertex_buffer[i].stride );
    }
 
-   fse->active->set_constants( fse->active,
-                               (const float (*)[4])draw->pt.user.constants );
+   *max_vertices = (draw->render->max_vertex_buffer_bytes / 
+                    (vinfo->size * 4));
 
-   fse->active->set_viewport( fse->active,
-                              &draw->viewport );
+   /* Return an even number of verts.
+    * This prevents "parity" errors when splitting long triangle strips which
+    * can lead to front/back culling mix-ups.
+    * Every other triangle in a strip has an alternate front/back orientation
+    * so splitting at an odd position can cause the orientation of subsequent
+    * triangles to get reversed.
+    */
+   *max_vertices = *max_vertices & ~1;
 
-   //return TRUE;
+   /* Probably need to do this somewhere (or fix exec shader not to
+    * need it):
+    */
+   if (1) {
+      struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+      vs->prepare(vs, draw);
+   }
 }
 
 
 
-
-
-
-
 static void fse_run_linear( struct draw_pt_middle_end *middle, 
                             unsigned start, 
                             unsigned count )
 {
    struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
    struct draw_context *draw = fse->draw;
-   unsigned alloc_count = align(count, 4);
    char *hw_verts;
 
    /* XXX: need to flush to get prim_vbuf.c to release its allocation??
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   hw_verts = draw->render->allocate_vertices( draw->render,
-                                               (ushort)fse->key.output_stride,
-                                               (ushort)alloc_count );
+   if (count >= UNDEFINED_VERTEX_ID) 
+      goto fail;
 
-   if (!hw_verts) {
-      assert(0);
-      return;
-   }
+   if (!draw->render->allocate_vertices( draw->render,
+                                         (ushort)fse->key.output_stride,
+                                         (ushort)count ))
+      goto fail;
+
+   hw_verts = draw->render->map_vertices( draw->render );
+   if (!hw_verts)
+      goto fail;
 
    /* Single routine to fetch vertices, run shader and emit HW verts.
     * Clipping is done elsewhere -- either by the API or on hardware,
@@ -263,13 +247,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
                             start, count,
                             hw_verts );
 
-   /* Draw arrays path to avoid re-emitting index list again and
-    * again.
-    */
-   draw->render->draw_arrays( draw->render,
-                              0,
-                              count );
-   
+
    if (0) {
       unsigned i;
       for (i = 0; i < count; i++) {
@@ -281,12 +259,24 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
                                    (const uint8_t *)hw_verts + fse->key.output_stride * i );
       }
    }
+   
+   draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
+   /* Draw arrays path to avoid re-emitting index list again and
+    * again.
+    */
+   draw->render->draw_arrays( draw->render,
+                              0,
+                              count );
+   
+
+   draw->render->release_vertices( draw->render );
 
+   return;
 
-   draw->render->release_vertices( draw->render, 
-                                  hw_verts, 
-                                  fse->key.output_stride, 
-                                  count );
+fail:
+   assert(0);
+   return;
 }
 
 
@@ -299,20 +289,23 @@ fse_run(struct draw_pt_middle_end *middle,
 {
    struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
    struct draw_context *draw = fse->draw;
-   unsigned alloc_count = align(fetch_count, 4);
    void *hw_verts;
    
    /* XXX: need to flush to get prim_vbuf.c to release its allocation?? 
     */
    draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
-   hw_verts = draw->render->allocate_vertices( draw->render,
-                                               (ushort)fse->key.output_stride,
-                                               (ushort)alloc_count );
-   if (!hw_verts) {
-      assert(0);
-      return;
-   }
+   if (fetch_count >= UNDEFINED_VERTEX_ID) 
+      goto fail;
+
+   if (!draw->render->allocate_vertices( draw->render,
+                                         (ushort)fse->key.output_stride,
+                                         (ushort)fetch_count ))
+      goto fail;
+
+   hw_verts = draw->render->map_vertices( draw->render ); 
+   if (!hw_verts) 
+      goto fail;
          
                                        
    /* Single routine to fetch vertices, run shader and emit HW verts.
@@ -322,9 +315,6 @@ fse_run(struct draw_pt_middle_end *middle,
                           fetch_count,
                           hw_verts );
 
-   draw->render->draw( draw->render, 
-                       draw_elts, 
-                       draw_count );
 
    if (0) {
       unsigned i;
@@ -336,15 +326,72 @@ fse_run(struct draw_pt_middle_end *middle,
       }
    }
 
+   draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) );
+   
+   draw->render->draw( draw->render, 
+                       draw_elts, 
+                       draw_count );
 
-   draw->render->release_vertices( draw->render, 
-                                   hw_verts, 
-                                   fse->key.output_stride, 
-                                   fetch_count );
 
+   draw->render->release_vertices( draw->render );
+   return;
+
+fail:
+   assert(0);
+   return;
 }
 
 
+
+static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, 
+                                 unsigned start, 
+                                 unsigned count,
+                                 const ushort *draw_elts,
+                                 unsigned draw_count )
+{
+   struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+   struct draw_context *draw = fse->draw;
+   char *hw_verts;
+
+   /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+    */
+   draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+   if (count >= UNDEFINED_VERTEX_ID)
+      return FALSE;
+
+   if (!draw->render->allocate_vertices( draw->render,
+                                         (ushort)fse->key.output_stride,
+                                         (ushort)count ))
+      return FALSE;
+
+   hw_verts = draw->render->map_vertices( draw->render );
+   if (!hw_verts) 
+      return FALSE;
+
+   /* Single routine to fetch vertices, run shader and emit HW verts.
+    * Clipping is done elsewhere -- either by the API or on hardware,
+    * or for some other reason not required...
+    */
+   fse->active->run_linear( fse->active, 
+                            start, count,
+                            hw_verts );
+
+
+   draw->render->draw( draw->render, 
+                       draw_elts, 
+                       draw_count );
+   
+
+   draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) );
+
+   draw->render->release_vertices( draw->render );
+
+   return TRUE;
+}
+
+
+
 static void fse_finish( struct draw_pt_middle_end *middle )
 {
 }
@@ -365,6 +412,7 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
    fse->base.prepare = fse_prepare;
    fse->base.run = fse_run;
    fse->base.run_linear = fse_run_linear;
+   fse->base.run_linear_elts = fse_run_linear_elts;
    fse->base.finish = fse_finish;
    fse->base.destroy = fse_destroy;
    fse->draw = draw;