util/simple_mtx: add assert_locked()
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_pipeline.c
index 4e39d553ed900452e531790c4ab6b95857b26977..07838fb7eda45ff0310039c5e87dc9934fc14f30 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -31,6 +31,7 @@
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "draw/draw_vertex.h"
+#include "draw/draw_prim_assembler.h"
 #include "draw/draw_pt.h"
 #include "draw/draw_vs.h"
 #include "draw/draw_gs.h"
@@ -48,27 +49,45 @@ struct fetch_pipeline_middle_end {
    unsigned vertex_data_offset;
    unsigned vertex_size;
    unsigned input_prim;
-   unsigned output_prim;
    unsigned opt;
 };
 
-static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
-                                    unsigned in_prim,
-                                    unsigned out_prim,
-                                   unsigned opt,
-                                    unsigned *max_vertices )
+
+/** cast wrapper */
+static inline struct fetch_pipeline_middle_end *
+fetch_pipeline_middle_end(struct draw_pt_middle_end *middle)
 {
-   struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+   return (struct fetch_pipeline_middle_end *) middle;
+}
+
+
+/**
+ * Prepare/validate middle part of the vertex pipeline.
+ * NOTE: if you change this function, also look at the LLVM
+ * function llvm_middle_end_prepare() for similar changes.
+ */
+static void
+fetch_pipeline_prepare(struct draw_pt_middle_end *middle,
+                       unsigned prim,
+                       unsigned opt,
+                       unsigned *max_vertices)
+{
+   struct fetch_pipeline_middle_end *fpme = fetch_pipeline_middle_end(middle);
    struct draw_context *draw = fpme->draw;
    struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+   struct draw_geometry_shader *gs = draw->gs.geometry_shader;
    unsigned i;
    unsigned instance_id_index = ~0;
-
-   /* Add one to num_outputs because the pipeline occasionally tags on
-    * an additional texcoord, eg for AA lines.
-    */
-   unsigned nr = MAX2( vs->info.num_inputs,
-                      vs->info.num_outputs + 1 );
+   const unsigned gs_out_prim = (gs ? gs->output_primitive :
+                                 u_assembled_prim(prim));
+   unsigned nr_vs_outputs = draw_total_vs_outputs(draw);
+   unsigned nr = MAX2(vs->info.num_inputs, nr_vs_outputs);
+   unsigned point_clip = draw->rasterizer->fill_front == PIPE_POLYGON_MODE_POINT ||
+                         gs_out_prim == PIPE_PRIM_POINTS;
+
+   if (gs) {
+      nr = MAX2(nr, gs->info.num_outputs + 1);
+   }
 
    /* Scan for instanceID system value.
     */
@@ -79,51 +98,58 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
       }
    }
 
-   fpme->input_prim = in_prim;
-   fpme->output_prim = out_prim;
+   fpme->input_prim = prim;
    fpme->opt = opt;
 
    /* Always leave room for the vertex header whether we need it or
     * not.  It's hard to get rid of it in particular because of the
-    * viewport code in draw_pt_post_vs.c.  
+    * viewport code in draw_pt_post_vs.c.
     */
    fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
 
-   
-
-   draw_pt_fetch_prepare( fpme->fetch, 
+   draw_pt_fetch_prepare( fpme->fetch,
                           vs->info.num_inputs,
                           fpme->vertex_size,
                           instance_id_index );
-   /* XXX: it's not really gl rasterization rules we care about here,
-    * but gl vs dx9 clip spaces.
-    */
    draw_pt_post_vs_prepare( fpme->post_vs,
-                           (boolean)draw->bypass_clipping,
-                           (boolean)draw->identity_viewport,
-                           (boolean)draw->rasterizer->gl_rasterization_rules,
-                           (draw->vs.edgeflag_output ? true : false) );
+                            draw->clip_xy,
+                            draw->clip_z,
+                            draw->clip_user,
+                            point_clip ? draw->guard_band_points_xy :
+                                         draw->guard_band_xy,
+                            draw->bypass_viewport,
+                            draw->rasterizer->clip_halfz,
+                            (draw->vs.edgeflag_output ? TRUE : FALSE) );
 
-   draw_pt_so_emit_prepare( fpme->so_emit, out_prim );
+   draw_pt_so_emit_prepare( fpme->so_emit, FALSE );
 
    if (!(opt & PT_PIPELINE)) {
       draw_pt_emit_prepare( fpme->emit,
-                           out_prim,
+                           gs_out_prim,
                             max_vertices );
 
-      *max_vertices = MAX2( *max_vertices,
-                            DRAW_PIPE_MAX_VERTICES );
+      *max_vertices = MAX2( *max_vertices, 4096 );
    }
    else {
-      *max_vertices = DRAW_PIPE_MAX_VERTICES; 
+      /* limit max fetches by limiting max_vertices */
+      *max_vertices = 4096;
    }
 
-   /* return even number */
-   *max_vertices = *max_vertices & ~1;
-
    /* No need to prepare the shader.
     */
    vs->prepare(vs, draw);
+
+   /* Make sure that the vertex size didn't change at any point above */
+   assert(nr_vs_outputs == draw_total_vs_outputs(draw));
+}
+
+
+static void
+fetch_pipeline_bind_parameters(struct draw_pt_middle_end *middle)
+{
+   /* No-op since the vertex shader executor and drawing pipeline
+    * just grab the constants, viewport, etc. from the draw context state.
+    */
 }
 
 
@@ -139,7 +165,7 @@ static void fetch( struct pt_fetch *fetch,
    }
    else {
       draw_pt_fetch_run( fetch,
-                         fetch_info->elts, 
+                         fetch_info->elts,
                          fetch_info->count,
                          output );
    }
@@ -160,9 +186,11 @@ static void pipeline(struct fetch_pipeline_middle_end *fpme,
                          prim_info );
 }
 
-static void emit(struct pt_emit *emit,
-                 const struct draw_vertex_info *vert_info,
-                 const struct draw_prim_info *prim_info)
+
+static void
+emit(struct pt_emit *emit,
+     const struct draw_vertex_info *vert_info,
+     const struct draw_prim_info *prim_info)
 {
    if (prim_info->linear) {
       draw_pt_emit_linear(emit, vert_info, prim_info);
@@ -173,66 +201,86 @@ static void emit(struct pt_emit *emit,
 }
 
 
-static void draw_vertex_shader_run(struct draw_vertex_shader *vshader,
-                                   const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 
-                                   const struct draw_vertex_info *input_verts,
-                                   struct draw_vertex_info *output_verts )
+static void
+draw_vertex_shader_run(struct draw_vertex_shader *vshader,
+                       const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+                       unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
+                       const struct draw_fetch_info *fetch_info,
+                       const struct draw_vertex_info *input_verts,
+                       struct draw_vertex_info *output_verts)
 {
    output_verts->vertex_size = input_verts->vertex_size;
    output_verts->stride = input_verts->vertex_size;
    output_verts->count = input_verts->count;
    output_verts->verts =
       (struct vertex_header *)MALLOC(output_verts->vertex_size *
-                                     output_verts->count);
+                                     align(output_verts->count, 4));
 
    vshader->run_linear(vshader,
                        (const float (*)[4])input_verts->verts->data,
                        (      float (*)[4])output_verts->verts->data,
                        constants,
+                       const_size,
                        input_verts->count,
                        input_verts->vertex_size,
-                       input_verts->vertex_size);
+                       input_verts->vertex_size,
+                       fetch_info->elts);
 }
 
-static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
-                                    const struct draw_fetch_info *fetch_info,
-                                    const struct draw_prim_info *prim_info )
+
+static void
+fetch_pipeline_generic(struct draw_pt_middle_end *middle,
+                       const struct draw_fetch_info *fetch_info,
+                       const struct draw_prim_info *in_prim_info)
 {
-   struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+   struct fetch_pipeline_middle_end *fpme = fetch_pipeline_middle_end(middle);
    struct draw_context *draw = fpme->draw;
    struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
    struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
-   struct draw_prim_info gs_prim_info;
+   struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS];
    struct draw_vertex_info fetched_vert_info;
    struct draw_vertex_info vs_vert_info;
-   struct draw_vertex_info gs_vert_info;
+   struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS];
    struct draw_vertex_info *vert_info;
+   struct draw_prim_info ia_prim_info;
+   struct draw_vertex_info ia_vert_info;
+   const struct draw_prim_info *prim_info = in_prim_info;
+   boolean free_prim_info = FALSE;
+   unsigned opt = fpme->opt;
+   int num_vertex_streams = 1;
 
    fetched_vert_info.count = fetch_info->count;
    fetched_vert_info.vertex_size = fpme->vertex_size;
+   fetched_vert_info.stride = fpme->vertex_size;
    fetched_vert_info.verts =
-      (struct vertex_header *)MALLOC(fetched_vert_info.vertex_size *
-                                     fetch_info->count);
+      (struct vertex_header *)MALLOC(fpme->vertex_size *
+                                     align(fetch_info->count,  4));
    if (!fetched_vert_info.verts) {
       assert(0);
       return;
    }
+   if (draw->collect_statistics) {
+      draw->statistics.ia_vertices += prim_info->count;
+      draw->statistics.ia_primitives +=
+         u_decomposed_prims_for_vertices(prim_info->prim, fetch_info->count);
+      draw->statistics.vs_invocations += fetch_info->count;
+   }
 
    /* Fetch into our vertex buffer.
     */
    fetch( fpme->fetch, fetch_info, (char *)fetched_vert_info.verts );
 
-   /* Finished with fetch:
-    */
-   fetch_info = NULL;
    vert_info = &fetched_vert_info;
 
    /* Run the shader, note that this overwrites the data[] parts of
     * the pipeline verts.
+    * Need fetch info to get vertex id correct.
     */
-   if (fpme->opt & PT_SHADE) { 
+   if (fpme->opt & PT_SHADE) {
       draw_vertex_shader_run(vshader,
                              draw->pt.user.vs_constants,
+                             draw->pt.user.vs_constants_size,
+                             fetch_info,
                              vert_info,
                              &vs_vert_info);
 
@@ -240,18 +288,55 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
       vert_info = &vs_vert_info;
    }
 
+   /* Finished with fetch:
+    */
+   fetch_info = NULL;
+
    if ((fpme->opt & PT_SHADE) && gshader) {
       draw_geometry_shader_run(gshader,
                                draw->pt.user.gs_constants,
+                               draw->pt.user.gs_constants_size,
                                vert_info,
                                prim_info,
-                               &gs_vert_info,
-                               &gs_prim_info);
-         
-         
+                               &vshader->info,
+                               gs_vert_info,
+                               gs_prim_info);
+
       FREE(vert_info->verts);
-      vert_info = &gs_vert_info;
-      prim_info = &gs_prim_info;
+      vert_info = &gs_vert_info[0];
+      prim_info = &gs_prim_info[0];
+      num_vertex_streams = gshader->num_vertex_streams;
+
+      /*
+       * pt emit can only handle ushort number of vertices (see
+       * render->allocate_vertices).
+       * vsplit guarantees there's never more than 4096, however GS can
+       * easily blow this up (by a factor of 256 (or even 1024) max).
+       */
+      if (vert_info->count > 65535) {
+         opt |= PT_PIPELINE;
+      }
+   } else {
+      if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
+         draw_prim_assembler_run(draw, prim_info, vert_info,
+                                 &ia_prim_info, &ia_vert_info);
+
+         if (ia_vert_info.count) {
+            FREE(vert_info->verts);
+            vert_info = &ia_vert_info;
+            prim_info = &ia_prim_info;
+            free_prim_info = TRUE;
+         }
+      }
+   }
+   if (prim_info->count == 0) {
+      debug_printf("GS/IA didn't emit any vertices!\n");
+
+      FREE(vert_info->verts);
+      if (free_prim_info) {
+         FREE(prim_info->primitive_lengths);
+      }
+      return;
    }
 
 
@@ -260,37 +345,56 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle,
     * XXX: Stream output surely needs to respect the prim_info->elt
     *      lists.
     */
-   draw_pt_so_emit( fpme->so_emit,
-                    vert_info,
-                    prim_info );
-
-   if (draw_pt_post_vs_run( fpme->post_vs,
-                            vert_info ))
-   {
-      opt |= PT_PIPELINE;
-   }
+   draw_pt_so_emit( fpme->so_emit, num_vertex_streams, vert_info, prim_info );
+
+   draw_stats_clipper_primitives(draw, prim_info);
 
-   /* Do we need to run the pipeline?
+   /*
+    * if there's no position, need to stop now, or the latter stages
+    * will try to access non-existent position output.
     */
-   if (opt & PT_PIPELINE) {
-      pipeline( fpme->draw,
-                vert_info,
-                prim_info );
+   if (draw_current_shader_position_output(draw) != -1) {
+
+      if (draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info ))
+      {
+         opt |= PT_PIPELINE;
+      }
+
+      /* Do we need to run the pipeline?
+       */
+      if (opt & PT_PIPELINE) {
+         pipeline( fpme, vert_info, prim_info );
+      }
+      else {
+         emit( fpme->emit, vert_info, prim_info );
+      }
    }
-   else {
-      emit( fpme->emit,
-            vert_info,
-            prim_info );
+   FREE(vert_info->verts);
+   if (free_prim_info) {
+      FREE(prim_info->primitive_lengths);
    }
+}
+
 
+static inline unsigned
+prim_type(unsigned prim, unsigned flags)
+{
+   if (flags & DRAW_LINE_LOOP_AS_STRIP)
+      return PIPE_PRIM_LINE_STRIP;
+   else
+      return prim;
 }
 
-static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
-                                const unsigned *fetch_elts,
-                                unsigned fetch_count,
-                                const ushort *draw_elts,
-                                unsigned draw_count )
+
+static void
+fetch_pipeline_run(struct draw_pt_middle_end *middle,
+                   const unsigned *fetch_elts,
+                   unsigned fetch_count,
+                   const ushort *draw_elts,
+                   unsigned draw_count,
+                   unsigned prim_flags)
 {
+   struct fetch_pipeline_middle_end *fpme = fetch_pipeline_middle_end(middle);
    struct draw_fetch_info fetch_info;
    struct draw_prim_info prim_info;
 
@@ -303,15 +407,22 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
    prim_info.start = 0;
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
+   prim_info.flags = prim_flags;
+   prim_info.primitive_count = 1;
+   prim_info.primitive_lengths = &draw_count;
 
    fetch_pipeline_generic( middle, &fetch_info, &prim_info );
 }
 
 
-static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
-                                       unsigned start,
-                                       unsigned count)
+static void
+fetch_pipeline_linear_run(struct draw_pt_middle_end *middle,
+                          unsigned start,
+                          unsigned count,
+                          unsigned prim_flags)
 {
+   struct fetch_pipeline_middle_end *fpme = fetch_pipeline_middle_end(middle);
    struct draw_fetch_info fetch_info;
    struct draw_prim_info prim_info;
 
@@ -324,18 +435,25 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
    prim_info.start = 0;
    prim_info.count = count;
    prim_info.elts = NULL;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
+   prim_info.flags = prim_flags;
+   prim_info.primitive_count = 1;
+   prim_info.primitive_lengths = &count;
 
    fetch_pipeline_generic( middle, &fetch_info, &prim_info );
 }
 
 
 
-static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle,
-                                            unsigned start,
-                                            unsigned count,
-                                            const ushort *draw_elts,
-                                            unsigned draw_count )
+static boolean
+fetch_pipeline_linear_run_elts(struct draw_pt_middle_end *middle,
+                               unsigned start,
+                               unsigned count,
+                               const ushort *draw_elts,
+                               unsigned draw_count,
+                               unsigned prim_flags )
 {
+   struct fetch_pipeline_middle_end *fpme = fetch_pipeline_middle_end(middle);
    struct draw_fetch_info fetch_info;
    struct draw_prim_info prim_info;
 
@@ -348,20 +466,28 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
    prim_info.start = 0;
    prim_info.count = draw_count;
    prim_info.elts = draw_elts;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
+   prim_info.flags = prim_flags;
+   prim_info.primitive_count = 1;
+   prim_info.primitive_lengths = &draw_count;
 
    fetch_pipeline_generic( middle, &fetch_info, &prim_info );
-}
 
+   return TRUE;
+}
 
 
-static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
+static void
+fetch_pipeline_finish( struct draw_pt_middle_end *middle )
 {
    /* nothing to do */
 }
 
-static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
+
+static void
+fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
 {
-   struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+   struct fetch_pipeline_middle_end *fpme = fetch_pipeline_middle_end(middle);
 
    if (fpme->fetch)
       draw_pt_fetch_destroy( fpme->fetch );
@@ -379,13 +505,16 @@ static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
 }
 
 
-struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw )
+struct draw_pt_middle_end *
+draw_pt_fetch_pipeline_or_emit(struct draw_context *draw)
 {
-   struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end );
+   struct fetch_pipeline_middle_end *fpme =
+      CALLOC_STRUCT( fetch_pipeline_middle_end );
    if (!fpme)
       goto fail;
 
    fpme->base.prepare        = fetch_pipeline_prepare;
+   fpme->base.bind_parameters  = fetch_pipeline_bind_parameters;
    fpme->base.run            = fetch_pipeline_run;
    fpme->base.run_linear     = fetch_pipeline_linear_run;
    fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts;
@@ -403,7 +532,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
       goto fail;
 
    fpme->emit = draw_pt_emit_create( draw );
-   if (!fpme->emit) 
+   if (!fpme->emit)
       goto fail;
 
    fpme->so_emit = draw_pt_so_emit_create( draw );