gallium: add PIPE_SHADER_CAP_GLSL_16BIT_TEMPS for LowerPrecisionTemporaries
[mesa.git] / src / gallium / drivers / svga / svga_swtnl_backend.c
index ded8bcbd536ad05858429261343a22e627717eaa..5887a9ad7d737b077356635da9d70af17e2ad9f5 100644 (file)
 #include "svga_reg.h"
 #include "svga3d_reg.h"
 #include "svga_draw.h"
+#include "svga_shader.h"
 #include "svga_swtnl_private.h"
 
 
 static const struct vertex_info *
-svga_vbuf_render_get_vertex_info( struct vbuf_render *render )
+svga_vbuf_render_get_vertex_info(struct vbuf_render *render)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
@@ -56,9 +57,9 @@ svga_vbuf_render_get_vertex_info( struct vbuf_render *render )
 
 
 static boolean
-svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
-                                    ushort vertex_size,
-                                    ushort nr_vertices )
+svga_vbuf_render_allocate_vertices(struct vbuf_render *render,
+                                   ushort vertex_size,
+                                   ushort nr_vertices)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
@@ -67,6 +68,9 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
    boolean new_vbuf = FALSE;
    boolean new_ibuf = FALSE;
 
+   SVGA_STATS_TIME_PUSH(svga_sws(svga),
+                        SVGA_STATS_TIME_VBUFRENDERALLOCVERT);
+
    if (svga_render->vertex_size != vertex_size)
       svga->swtnl.new_vdecl = TRUE;
    svga_render->vertex_size = (size_t)vertex_size;
@@ -75,7 +79,8 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
       new_ibuf = new_vbuf = TRUE;
    svga->swtnl.new_vbuf = FALSE;
 
-   if (svga_render->vbuf_size < svga_render->vbuf_offset + svga_render->vbuf_used + size)
+   if (svga_render->vbuf_size
+       < svga_render->vbuf_offset + svga_render->vbuf_used + size)
       new_vbuf = TRUE;
 
    if (new_vbuf)
@@ -85,11 +90,12 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
 
    if (!svga_render->vbuf) {
       svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
-      svga_render->vbuf = pipe_buffer_create(screen,
-                                             PIPE_BIND_VERTEX_BUFFER,
-                                             PIPE_USAGE_STREAM,
-                                             svga_render->vbuf_size);
-      if(!svga_render->vbuf) {
+      svga_render->vbuf = SVGA_TRY_PTR(pipe_buffer_create
+                                       (screen, PIPE_BIND_VERTEX_BUFFER,
+                                        PIPE_USAGE_STREAM,
+                                        svga_render->vbuf_size));
+      if (!svga_render->vbuf) {
+         svga_retry_enter(svga);
          svga_context_flush(svga, NULL);
          assert(!svga_render->vbuf);
          svga_render->vbuf = pipe_buffer_create(screen,
@@ -99,6 +105,7 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
          /* The buffer allocation may fail if we run out of memory.
           * The draw module's vbuf code should handle that without crashing.
           */
+         svga_retry_exit(svga);
       }
 
       svga->swtnl.new_vdecl = TRUE;
@@ -112,14 +119,21 @@ svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
    if (svga->swtnl.new_vdecl)
       svga_render->vdecl_offset = svga_render->vbuf_offset;
 
+   SVGA_STATS_TIME_POP(svga_sws(svga));
+
    return TRUE;
 }
 
+
 static void *
-svga_vbuf_render_map_vertices( struct vbuf_render *render )
+svga_vbuf_render_map_vertices(struct vbuf_render *render)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
+   void * retPtr = NULL;
+
+   SVGA_STATS_TIME_PUSH(svga_sws(svga),
+                        SVGA_STATS_TIME_VBUFRENDERMAPVERT);
 
    if (svga_render->vbuf) {
       char *ptr = (char*)pipe_buffer_map(&svga->pipe,
@@ -129,81 +143,117 @@ svga_vbuf_render_map_vertices( struct vbuf_render *render )
                                          PIPE_TRANSFER_DISCARD_RANGE |
                                          PIPE_TRANSFER_UNSYNCHRONIZED,
                                          &svga_render->vbuf_transfer);
-      if (ptr)
-         return ptr + svga_render->vbuf_offset;
+      if (ptr) {
+         svga_render->vbuf_ptr = ptr;
+         retPtr = ptr + svga_render->vbuf_offset;
+      }
       else {
+         svga_render->vbuf_ptr = NULL;
          svga_render->vbuf_transfer = NULL;
-         return NULL;
+         retPtr = NULL;
       }
    }
    else {
       /* we probably ran out of memory when allocating the vertex buffer */
-      return NULL;
+      retPtr = NULL;
    }
+
+   SVGA_STATS_TIME_POP(svga_sws(svga));
+   return retPtr;
 }
 
+
 static void
-svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
-                                 ushort min_index,
-                                 ushort max_index )
+svga_vbuf_render_unmap_vertices(struct vbuf_render *render,
+                                ushort min_index,
+                                ushort max_index)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
    unsigned offset, length;
    size_t used = svga_render->vertex_size * ((size_t)max_index + 1);
 
+   SVGA_STATS_TIME_PUSH(svga_sws(svga),
+                        SVGA_STATS_TIME_VBUFRENDERUNMAPVERT);
+
    offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
    length = svga_render->vertex_size * (max_index + 1 - min_index);
+
+   if (0) {
+      /* dump vertex data */
+      const float *f = (const float *) ((char *) svga_render->vbuf_ptr +
+                                        svga_render->vbuf_offset);
+      unsigned i;
+      debug_printf("swtnl vertex data:\n");
+      for (i = 0; i < length / 4; i += 4) {
+         debug_printf("%u: %f %f %f %f\n", i, f[i], f[i+1], f[i+2], f[i+3]);
+      }
+   }
+
    pipe_buffer_flush_mapped_range(&svga->pipe,
-                                 svga_render->vbuf_transfer,
-                                 offset, length);
+                                  svga_render->vbuf_transfer,
+                                  offset, length);
    pipe_buffer_unmap(&svga->pipe, svga_render->vbuf_transfer);
    svga_render->min_index = min_index;
    svga_render->max_index = max_index;
    svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
+
+   SVGA_STATS_TIME_POP(svga_sws(svga));
 }
 
+
 static void
-svga_vbuf_render_set_primitive( struct vbuf_render *render,
-                                unsigned prim )
+svga_vbuf_render_set_primitive(struct vbuf_render *render,
+                               enum pipe_prim_type prim)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    svga_render->prim = prim;
 }
 
+
 static void
-svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
+svga_vbuf_submit_state(struct svga_vbuf_render *svga_render)
 {
    struct svga_context *svga = svga_render->svga;
    SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
-   enum pipe_error ret;
    unsigned i;
+   static const unsigned zero[PIPE_MAX_ATTRIBS] = {0};
+   boolean retried;
 
    /* if the vdecl or vbuf hasn't changed do nothing */
    if (!svga->swtnl.new_vdecl)
       return;
 
+   SVGA_STATS_TIME_PUSH(svga_sws(svga),
+                        SVGA_STATS_TIME_VBUFSUBMITSTATE);
+
    memcpy(vdecl, svga_render->vdecl, sizeof(vdecl));
 
    /* flush the hw state */
-   ret = svga_hwtnl_flush(svga->hwtnl);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_flush(svga->hwtnl);
+   SVGA_RETRY_CHECK(svga, svga_hwtnl_flush(svga->hwtnl), retried);
+   if (retried) {
       /* if we hit this path we might become synced with hw */
       svga->swtnl.new_vbuf = TRUE;
-      assert(ret == 0);
    }
 
-   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
-
    for (i = 0; i < svga_render->vdecl_count; i++) {
       vdecl[i].array.offset += svga_render->vdecl_offset;
+   }
 
-      svga_hwtnl_vdecl( svga->hwtnl,
-                        i,
-                        &vdecl[i],
-                        svga_render->vbuf );
+   svga_hwtnl_vertex_decls(svga->hwtnl,
+                           svga_render->vdecl_count,
+                           vdecl,
+                           zero,
+                           svga_render->layout_id);
+
+   /* Specify the vertex buffer (there's only ever one) */
+   {
+      struct pipe_vertex_buffer vb;
+      vb.is_user_buffer = false;
+      vb.buffer.resource = svga_render->vbuf;
+      vb.buffer_offset = svga_render->vdecl_offset;
+      vb.stride = vdecl[0].array.stride;
+      svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb);
    }
 
    /* We have already taken care of flatshading, so let the hwtnl
@@ -211,29 +261,36 @@ svga_vbuf_submit_state( struct svga_vbuf_render *svga_render )
     */
    if (svga->state.sw.need_pipeline) {
       svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
-      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+      svga_hwtnl_set_fillmode(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
    }
    else {
-      svga_hwtnl_set_flatshade( svga->hwtnl,
-                                svga->curr.rast->templ.flatshade,
-                                svga->curr.rast->templ.flatshade_first );
+      svga_hwtnl_set_flatshade(svga->hwtnl,
+                                svga->curr.rast->templ.flatshade ||
+                                svga_is_using_flat_shading(svga),
+                                svga->curr.rast->templ.flatshade_first);
 
-      svga_hwtnl_set_unfilled( svga->hwtnl,
-                               svga->curr.rast->hw_unfilled );
+      svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
    }
 
    svga->swtnl.new_vdecl = FALSE;
+   SVGA_STATS_TIME_POP(svga_sws(svga));
 }
 
+
 static void
-svga_vbuf_render_draw_arrays( struct vbuf_render *render,
-                              unsigned start,
-                              uint nr )
+svga_vbuf_render_draw_arrays(struct vbuf_render *render,
+                              unsigned start, uint nr)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
-   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
-   enum pipe_error ret = PIPE_OK;
+   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset)
+      / svga_render->vertex_size;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const unsigned start_instance = 0;
+   const unsigned instance_count = 1;
+   boolean retried;
+
+   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_VBUFDRAWARRAYS);
 
    /* off to hardware */
    svga_vbuf_submit_state(svga_render);
@@ -242,46 +299,47 @@ svga_vbuf_render_draw_arrays( struct vbuf_render *render,
     * altered some of our state behind our backs.  Testcase:
     * redbook/polys.c
     */
-   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
-
-   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
+   SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_arrays
+                    (svga->hwtnl, svga_render->prim, start + bias,
+                     nr, start_instance, instance_count, 0), retried);
+   if (retried) {
       svga->swtnl.new_vbuf = TRUE;
-      assert(ret == PIPE_OK);
    }
+
+   SVGA_STATS_TIME_POP(svga_sws(svga));
 }
 
 
 static void
-svga_vbuf_render_draw_elements( struct vbuf_render *render,
+svga_vbuf_render_draw_elements(struct vbuf_render *render,
                                 const ushort *indices,
                                 uint nr_indices)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
    struct svga_context *svga = svga_render->svga;
-   struct pipe_screen *screen = svga->pipe.screen;
-   int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
-   boolean ret;
-   size_t size = 2 * nr_indices;
-
-   assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
-   
-   if (svga_render->ibuf_size < svga_render->ibuf_offset + size)
-      pipe_resource_reference(&svga_render->ibuf, NULL);
-
-   if (!svga_render->ibuf) {
-      svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size);
-      svga_render->ibuf = pipe_buffer_create(screen,
-                                             PIPE_BIND_INDEX_BUFFER,
-                                             PIPE_USAGE_STREAM,
-                                             svga_render->ibuf_size);
-      svga_render->ibuf_offset = 0;
-   }
-
-   pipe_buffer_write_nooverlap(&svga->pipe, svga_render->ibuf,
-                              svga_render->ibuf_offset, 2 * nr_indices, indices);
+   int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset)
+      / svga_render->vertex_size;
+   boolean retried;
+   /* instancing will already have been resolved at this point by 'draw' */
+   const struct pipe_draw_info info = {
+      .index_size = 2,
+      .mode = svga_render->prim,
+      .has_user_indices = 1,
+      .index.user = indices,
+      .start_instance = 0,
+      .instance_count = 1,
+      .index_bias = bias,
+      .min_index = svga_render->min_index,
+      .max_index = svga_render->max_index,
+      .start = 0,
+      .count = nr_indices
+   };
+
+   assert((svga_render->vbuf_offset - svga_render->vdecl_offset)
+          % svga_render->vertex_size == 0);
+
+   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_VBUFDRAWELEMENTS);
 
    /* off to hardware */
    svga_vbuf_submit_state(svga_render);
@@ -290,43 +348,26 @@ svga_vbuf_render_draw_elements( struct vbuf_render *render,
     * altered some of our state behind our backs.  Testcase:
     * redbook/polys.c
     */
-   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
-
-   ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
-                                        svga_render->ibuf,
-                                        2,
-                                        bias,
-                                        svga_render->min_index,
-                                        svga_render->max_index,
-                                        svga_render->prim,
-                                        svga_render->ibuf_offset / 2, nr_indices);
-   if(ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
-                                           svga_render->ibuf,
-                                           2,
-                                           bias,
-                                           svga_render->min_index,
-                                           svga_render->max_index,
-                                           svga_render->prim,
-                                           svga_render->ibuf_offset / 2, nr_indices);
+   svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
+   SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_range_elements(svga->hwtnl, &info,
+                                                         nr_indices), retried);
+   if (retried) {
       svga->swtnl.new_vbuf = TRUE;
-      assert(ret == PIPE_OK);
    }
 
-   svga_render->ibuf_offset += size;
+   SVGA_STATS_TIME_POP(svga_sws(svga));
 }
 
 
 static void
-svga_vbuf_render_release_vertices( struct vbuf_render *render )
+svga_vbuf_render_release_vertices(struct vbuf_render *render)
 {
 
 }
 
 
 static void
-svga_vbuf_render_destroy( struct vbuf_render *render )
+svga_vbuf_render_destroy(struct vbuf_render *render)
 {
    struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
 
@@ -340,7 +381,7 @@ svga_vbuf_render_destroy( struct vbuf_render *render )
  * Create a new primitive render.
  */
 struct vbuf_render *
-svga_vbuf_render_create( struct svga_context *svga )
+svga_vbuf_render_create(struct svga_context *svga)
 {
    struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render);
 
@@ -349,6 +390,7 @@ svga_vbuf_render_create( struct svga_context *svga )
    svga_render->vbuf_size = 0;
    svga_render->ibuf_alloc_size = 4*1024;
    svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->layout_id = SVGA3D_INVALID_ID;
    svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
    svga_render->base.max_indices = 65536;
    svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;