svga/winsys: implement GBS support

[mesa.git] / src / gallium / winsys / svga / drm / vmw_context.c
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c

index eb89272a5e6ad4b65480c2e42bf74ff1224bb206..1234a5edce322006e778c050d49ab9b7d68c2c05 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -29,6 +29,8 @@
  #include "util/u_debug.h"
  #include "util/u_memory.h"
  #include "util/u_debug_stack.h"
+#include "util/u_debug_flush.h"
+#include "util/u_hash_table.h"
  #include "pipebuffer/pb_buffer.h"
  #include "pipebuffer/pb_validate.h"
  
@@ -38,19 +40,38 @@
  #include "vmw_buffer.h"
  #include "vmw_surface.h"
  #include "vmw_fence.h"
+#include "vmw_shader.h"
  
  #define VMW_COMMAND_SIZE (64*1024)
  #define VMW_SURFACE_RELOCS (1024)
+#define VMW_SHADER_RELOCS (1024)
  #define VMW_REGION_RELOCS (512)
  
  #define VMW_MUST_FLUSH_STACK 8
  
-struct vmw_region_relocation
+struct vmw_buffer_relocation
  {
-   struct SVGAGuestPtr *where;
     struct pb_buffer *buffer;
-   /* TODO: put offset info inside where */
+   boolean is_mob;
     uint32 offset;
+
+   union {
+      struct {
+        struct SVGAGuestPtr *where;
+      } region;
+      struct {
+        SVGAMobId *id;
+        uint32 *offset_into_mob;
+      } mob;
+   };
+};
+
+struct vmw_ctx_validate_item {
+   union {
+      struct vmw_svga_winsys_surface *vsurf;
+      struct vmw_svga_winsys_shader *vshader;
+   };
+   boolean referenced;
  };
  
  struct vmw_svga_winsys_context
@@ -58,10 +79,12 @@ struct vmw_svga_winsys_context
     struct svga_winsys_context base;
  
     struct vmw_winsys_screen *vws;
+   struct util_hash_table *hash;
  
  #ifdef DEBUG
     boolean must_flush;
     struct debug_stack_frame must_flush_stack[VMW_MUST_FLUSH_STACK];
+   struct debug_flush_ctx *fctx;
  #endif
  
     struct {
@@ -72,7 +95,7 @@ struct vmw_svga_winsys_context
     } command;
  
     struct {
-      struct vmw_svga_winsys_surface *handles[VMW_SURFACE_RELOCS];
+      struct vmw_ctx_validate_item items[VMW_SURFACE_RELOCS];
        uint32_t size;
        uint32_t used;
        uint32_t staged;
@@ -80,20 +103,30 @@ struct vmw_svga_winsys_context
     } surface;
     
     struct {
-      struct vmw_region_relocation relocs[VMW_REGION_RELOCS];
+      struct vmw_buffer_relocation relocs[VMW_REGION_RELOCS];
        uint32_t size;
        uint32_t used;
        uint32_t staged;
        uint32_t reserved;
     } region;
  
+   struct {
+      struct vmw_ctx_validate_item items[VMW_SHADER_RELOCS];
+      uint32_t size;
+      uint32_t used;
+      uint32_t staged;
+      uint32_t reserved;
+   } shader;
+
     struct pb_validate *validate;
  
     /**
-    * The amount of GMR that is referred by the commands currently batched
-    * in the context.
+    * The amount of surface, GMR or MOB memory that is referred by the commands
+    * currently batched in the context command buffer.
      */
-   uint32_t seen_regions;
+   uint64_t seen_surfaces;
+   uint64_t seen_regions;
+   uint64_t seen_mobs;
  
     /**
      * Whether this context should fail to reserve more commands, not because it
@@ -140,7 +173,7 @@ vmw_swc_flush(struct svga_winsys_context *swc,
     
        /* Apply relocations */
        for(i = 0; i < vswc->region.used; ++i) {
-         struct vmw_region_relocation *reloc = &vswc->region.relocs[i];
+         struct vmw_buffer_relocation *reloc = &vswc->region.relocs[i];
           struct SVGAGuestPtr ptr;
  
           if(!vmw_gmr_bufmgr_region_ptr(reloc->buffer, &ptr))
@@ -148,7 +181,16 @@ vmw_swc_flush(struct svga_winsys_context *swc,
  
           ptr.offset += reloc->offset;
  
-         *reloc->where = ptr;
+        if (reloc->is_mob) {
+           if (reloc->mob.id)
+              *reloc->mob.id = ptr.gmrId;
+           if (reloc->mob.offset_into_mob)
+              *reloc->mob.offset_into_mob = ptr.offset;
+           else {
+              assert(ptr.offset == 0);
+           }
+        } else
+           *reloc->region.where = ptr;
        }
  
        if (vswc->command.used || pfence != NULL)
@@ -166,27 +208,37 @@ vmw_swc_flush(struct svga_winsys_context *swc,
     vswc->command.reserved = 0;
  
     for(i = 0; i < vswc->surface.used + vswc->surface.staged; ++i) {
-      struct vmw_svga_winsys_surface *vsurf =
-        vswc->surface.handles[i];
-      p_atomic_dec(&vsurf->validated);
-      vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
+      struct vmw_ctx_validate_item *isurf = &vswc->surface.items[i];
+      if (isurf->referenced)
+         p_atomic_dec(&isurf->vsurf->validated);
+      vmw_svga_winsys_surface_reference(&isurf->vsurf, NULL);
     }
  
+   util_hash_table_clear(vswc->hash);
     vswc->surface.used = 0;
     vswc->surface.reserved = 0;
  
-   for(i = 0; i < vswc->region.used + vswc->region.staged; ++i) {
-      pb_reference(&vswc->region.relocs[i].buffer, NULL);
+   for(i = 0; i < vswc->shader.used + vswc->shader.staged; ++i) {
+      struct vmw_ctx_validate_item *ishader = &vswc->shader.items[i];
+      if (ishader->referenced)
+         p_atomic_dec(&ishader->vshader->validated);
+      vmw_svga_winsys_shader_reference(&ishader->vshader, NULL);
     }
  
+   vswc->shader.used = 0;
+   vswc->shader.reserved = 0;
+
     vswc->region.used = 0;
     vswc->region.reserved = 0;
  
  #ifdef DEBUG
     vswc->must_flush = FALSE;
+   debug_flush_flush(vswc->fctx);
  #endif
     vswc->preemptive_flush = FALSE;
+   vswc->seen_surfaces = 0;
     vswc->seen_regions = 0;
+   vswc->seen_mobs = 0;
  
     if(pfence)
        vmw_fence_reference(vswc->vws, pfence, fence);
@@ -210,6 +262,7 @@ vmw_swc_reserve(struct svga_winsys_context *swc,
        debug_backtrace_dump(vswc->must_flush_stack, VMW_MUST_FLUSH_STACK);
        assert(!vswc->must_flush);
     }
+   debug_flush_might_flush(vswc->fctx);
  #endif
  
     assert(nr_bytes <= vswc->command.size);
@@ -219,6 +272,7 @@ vmw_swc_reserve(struct svga_winsys_context *swc,
     if(vswc->preemptive_flush ||
        vswc->command.used + nr_bytes > vswc->command.size ||
        vswc->surface.used + nr_relocs > vswc->surface.size ||
+      vswc->shader.used + nr_relocs > vswc->shader.size ||
        vswc->region.used + nr_relocs > vswc->region.size) {
  #ifdef DEBUG
        vswc->must_flush = TRUE;
@@ -230,17 +284,190 @@ vmw_swc_reserve(struct svga_winsys_context *swc,
  
     assert(vswc->command.used + nr_bytes <= vswc->command.size);
     assert(vswc->surface.used + nr_relocs <= vswc->surface.size);
+   assert(vswc->shader.used + nr_relocs <= vswc->shader.size);
     assert(vswc->region.used + nr_relocs <= vswc->region.size);
     
     vswc->command.reserved = nr_bytes;
     vswc->surface.reserved = nr_relocs;
     vswc->surface.staged = 0;
+   vswc->shader.reserved = nr_relocs;
+   vswc->shader.staged = 0;
     vswc->region.reserved = nr_relocs;
     vswc->region.staged = 0;
     
     return vswc->command.buffer + vswc->command.used;
  }
  
+static void
+vmw_swc_context_relocation(struct svga_winsys_context *swc,
+                          uint32 *cid)
+{
+   *cid = swc->cid;
+}
+
+static boolean
+vmw_swc_add_validate_buffer(struct vmw_svga_winsys_context *vswc,
+                           struct pb_buffer *pb_buf,
+                           unsigned flags)
+{
+   enum pipe_error ret;
+   unsigned translated_flags;
+
+   /*
+    * TODO: Update pb_validate to provide a similar functionality
+    * (Check buffer already present before adding)
+    */
+   if (util_hash_table_get(vswc->hash, pb_buf) != pb_buf) {
+      translated_flags = vmw_translate_to_pb_flags(flags);
+      ret = pb_validate_add_buffer(vswc->validate, pb_buf, translated_flags);
+      /* TODO: Update pipebuffer to reserve buffers and not fail here */
+      assert(ret == PIPE_OK);
+      (void)ret;
+      (void)util_hash_table_set(vswc->hash, pb_buf, pb_buf);
+      return TRUE;
+   }
+
+   return FALSE;
+}
+
+static void
+vmw_swc_region_relocation(struct svga_winsys_context *swc,
+                          struct SVGAGuestPtr *where,
+                          struct svga_winsys_buffer *buffer,
+                          uint32 offset,
+                          unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_buffer_relocation *reloc;
+
+   assert(vswc->region.staged < vswc->region.reserved);
+
+   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+   reloc->region.where = where;
+
+   /*
+    * pb_validate holds a refcount to the buffer, so no need to
+    * refcount it again in the relocation.
+    */
+   reloc->buffer = vmw_pb_buffer(buffer);
+   reloc->offset = offset;
+   reloc->is_mob = FALSE;
+   ++vswc->region.staged;
+
+   if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
+      vswc->seen_regions += reloc->buffer->size;
+      if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/5)
+         vswc->preemptive_flush = TRUE;
+   }
+
+#ifdef DEBUG
+   if (!(flags & SVGA_RELOC_INTERNAL))
+      debug_flush_cb_reference(vswc->fctx, vmw_debug_flush_buf(buffer));
+#endif
+}
+
+static void
+vmw_swc_mob_relocation(struct svga_winsys_context *swc,
+                      SVGAMobId *id,
+                      uint32 *offset_into_mob,
+                      struct svga_winsys_buffer *buffer,
+                      uint32 offset,
+                      unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_buffer_relocation *reloc;
+
+   assert(vswc->region.staged < vswc->region.reserved);
+
+   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+   reloc->mob.id = id;
+   reloc->mob.offset_into_mob = offset_into_mob;
+
+   /*
+    * pb_validate holds a refcount to the buffer, so no need to
+    * refcount it again in the relocation.
+    */
+   reloc->buffer = vmw_pb_buffer(buffer);
+   reloc->offset = offset;
+   reloc->is_mob = TRUE;
+   ++vswc->region.staged;
+
+   if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
+      vswc->seen_mobs += reloc->buffer->size;
+      /* divide by 5, tested for best performance */
+      if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / 5)
+         vswc->preemptive_flush = TRUE;
+   }
+
+#ifdef DEBUG
+   if (!(flags & SVGA_RELOC_INTERNAL))
+      debug_flush_cb_reference(vswc->fctx, vmw_debug_flush_buf(buffer));
+#endif
+}
+
+
+/**
+ * vmw_swc_surface_clear_reference - Clear referenced info for a surface
+ *
+ * @swc:   Pointer to an svga_winsys_context
+ * @vsurf: Pointer to a vmw_svga_winsys_surface, the referenced info of which
+ *         we want to clear
+ *
+ * This is primarily used by a discard surface map to indicate that the
+ * surface data is no longer referenced by a draw call, and mapping it
+ * should therefore no longer cause a flush.
+ */
+void
+vmw_swc_surface_clear_reference(struct svga_winsys_context *swc,
+                                struct vmw_svga_winsys_surface *vsurf)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_ctx_validate_item *isrf =
+      util_hash_table_get(vswc->hash, vsurf);
+
+   if (isrf && isrf->referenced) {
+      isrf->referenced = FALSE;
+      p_atomic_dec(&vsurf->validated);
+   }
+}
+
+static void
+vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
+                               uint32 *where,
+                               struct vmw_svga_winsys_surface *vsurf,
+                               unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_ctx_validate_item *isrf;
+
+   assert(vswc->surface.staged < vswc->surface.reserved);
+   isrf = util_hash_table_get(vswc->hash, vsurf);
+
+   if (isrf == NULL) {
+      isrf = &vswc->surface.items[vswc->surface.used + vswc->surface.staged];
+      vmw_svga_winsys_surface_reference(&isrf->vsurf, vsurf);
+      isrf->referenced = FALSE;
+      /*
+       * Note that a failure here may just fall back to unhashed behavior
+       * and potentially cause unnecessary flushing, so ignore the
+       * return code.
+       */
+      (void) util_hash_table_set(vswc->hash, vsurf, isrf);
+      ++vswc->surface.staged;
+
+      vswc->seen_surfaces += vsurf->size;
+      /* divide by 5 not well tuned for performance */
+      if (vswc->seen_surfaces >= vswc->vws->ioctl.max_surface_memory / 5)
+         vswc->preemptive_flush = TRUE;
+   }
+
+   if (!(flags & SVGA_RELOC_INTERNAL) && !isrf->referenced) {
+      isrf->referenced = TRUE;
+      p_atomic_inc(&vsurf->validated);
+   }
+
+   *where = vsurf->sid;
+}
  
  static void
  vmw_swc_surface_relocation(struct svga_winsys_context *swc,
@@ -249,75 +476,79 @@ vmw_swc_surface_relocation(struct svga_winsys_context *swc,
                             struct svga_winsys_surface *surface,
                             unsigned flags)
  {
-   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
     struct vmw_svga_winsys_surface *vsurf;
  
+   assert(swc->have_gb_objects || mobid == NULL);
+
     if(!surface) {
        *where = SVGA3D_INVALID_ID;
+      if (mobid)
+         *mobid = SVGA3D_INVALID_ID;
        return;
     }
  
-   assert(vswc->surface.staged < vswc->surface.reserved);
-
     vsurf = vmw_svga_winsys_surface(surface);
+   vmw_swc_surface_only_relocation(swc, where, vsurf, flags);
  
-   *where = vsurf->sid;
+   if (swc->have_gb_objects && vsurf->buf != NULL) {
  
-   vmw_svga_winsys_surface_reference(&vswc->surface.handles[vswc->surface.used + vswc->surface.staged], vsurf);
-   p_atomic_inc(&vsurf->validated);
-   ++vswc->surface.staged;
-}
+      /*
+       * Make sure backup buffer ends up fenced.
+       */
  
+      pipe_mutex_lock(vsurf->mutex);
+      assert(vsurf->buf != NULL);
+      
+      vmw_swc_mob_relocation(swc, mobid, NULL, (struct svga_winsys_buffer *)
+                             vsurf->buf, 0, flags);
+      pipe_mutex_unlock(vsurf->mutex);
+   }
+}
  
  static void
-vmw_swc_region_relocation(struct svga_winsys_context *swc,
-                          struct SVGAGuestPtr *where,
-                          struct svga_winsys_buffer *buffer,
-                          uint32 offset,
-                          unsigned flags)
+vmw_swc_shader_relocation(struct svga_winsys_context *swc,
+                         uint32 *shid,
+                         uint32 *mobid,
+                         uint32 *offset,
+                         struct svga_winsys_gb_shader *shader)
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
-   struct vmw_region_relocation *reloc;
-   unsigned translated_flags;
-   enum pipe_error ret;
-   
-   assert(vswc->region.staged < vswc->region.reserved);
+   struct vmw_svga_winsys_shader *vshader;
+   struct vmw_ctx_validate_item *ishader;
+   if(!shader) {
+      *shid = SVGA3D_INVALID_ID;
+      return;
+   }
  
-   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
-   reloc->where = where;
-   pb_reference(&reloc->buffer, vmw_pb_buffer(buffer));
-   reloc->offset = offset;
+   assert(vswc->shader.staged < vswc->shader.reserved);
+   vshader = vmw_svga_winsys_shader(shader);
+   ishader = util_hash_table_get(vswc->hash, vshader);
+
+   if (ishader == NULL) {
+      ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
+      vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
+      ishader->referenced = FALSE;
+      /*
+       * Note that a failure here may just fall back to unhashed behavior
+       * and potentially cause unnecessary flushing, so ignore the
+       * return code.
+       */
+      (void) util_hash_table_set(vswc->hash, vshader, ishader);
+      ++vswc->shader.staged;
+   }
  
-   ++vswc->region.staged;
+   if (!ishader->referenced) {
+      ishader->referenced = TRUE;
+      p_atomic_inc(&vshader->validated);
+   }
  
-   translated_flags = vmw_translate_to_pb_flags(flags);
-   ret = pb_validate_add_buffer(vswc->validate, reloc->buffer, translated_flags);
-   /* TODO: Update pipebuffer to reserve buffers and not fail here */
-   assert(ret == PIPE_OK);
-   (void)ret;
+   *shid = vshader->shid;
  
-   /*
-    * Flush preemptively the FIFO commands to keep the GMR working set within
-    * the GMR pool size.
-    *
-    * This is necessary for applications like SPECviewperf that generate huge
-    * amounts of immediate vertex data, so that we don't pile up too much of
-    * that vertex data neither in the guest nor in the host.
-    *
-    * Note that in the current implementation if a region is referred twice in
-    * a command stream, it will be accounted twice. We could detect repeated
-    * regions and count only once, but there is no incentive to do that, since
-    * regions are typically short-lived; always referred in a single command;
-    * and at the worst we just flush the commands a bit sooner, which for the
-    * SVGA virtual device it's not a performance issue since flushing commands
-    * to the FIFO won't cause flushing in the host.
-    */
-   vswc->seen_regions += reloc->buffer->size;
-   if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/3)
-      vswc->preemptive_flush = TRUE;
+   if (mobid != NULL && vshader->buf)
+      vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf,
+                            0, SVGA_RELOC_READ);
  }
  
-
  static void
  vmw_swc_commit(struct svga_winsys_context *swc)
  {
@@ -334,6 +565,12 @@ vmw_swc_commit(struct svga_winsys_context *swc)
     vswc->surface.staged = 0;
     vswc->surface.reserved = 0;
  
+   assert(vswc->shader.staged <= vswc->shader.reserved);
+   assert(vswc->shader.used + vswc->shader.staged <= vswc->shader.size);
+   vswc->shader.used += vswc->shader.staged;
+   vswc->shader.staged = 0;
+   vswc->shader.reserved = 0;
+
     assert(vswc->region.staged <= vswc->region.reserved);
     assert(vswc->region.used + vswc->region.staged <= vswc->region.size);
     vswc->region.used += vswc->region.staged;
@@ -348,19 +585,38 @@ vmw_swc_destroy(struct svga_winsys_context *swc)
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
     unsigned i;
  
-   for(i = 0; i < vswc->region.used; ++i) {
-      pb_reference(&vswc->region.relocs[i].buffer, NULL);
+   for(i = 0; i < vswc->surface.used; ++i) {
+      struct vmw_ctx_validate_item *isurf = &vswc->surface.items[i];
+      if (isurf->referenced)
+         p_atomic_dec(&isurf->vsurf->validated);
+      vmw_svga_winsys_surface_reference(&isurf->vsurf, NULL);
     }
  
-   for(i = 0; i < vswc->surface.used; ++i) {
-      p_atomic_dec(&vswc->surface.handles[i]->validated);
-      vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
+   for(i = 0; i < vswc->shader.used; ++i) {
+      struct vmw_ctx_validate_item *ishader = &vswc->shader.items[i];
+      if (ishader->referenced)
+         p_atomic_dec(&ishader->vshader->validated);
+      vmw_svga_winsys_shader_reference(&ishader->vshader, NULL);
     }
+
+   util_hash_table_destroy(vswc->hash);
     pb_validate_destroy(vswc->validate);
     vmw_ioctl_context_destroy(vswc->vws, swc->cid);
+#ifdef DEBUG
+   debug_flush_ctx_destroy(vswc->fctx);
+#endif
     FREE(vswc);
  }
  
+static unsigned vmw_hash_ptr(void *p)
+{
+   return (unsigned)(unsigned long)p;
+}
+
+static int vmw_ptr_compare(void *key1, void *key2)
+{
+   return (key1 == key2) ? 0 : 1;
+}
  
  struct svga_winsys_context *
  vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
@@ -376,22 +632,41 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
     vswc->base.reserve = vmw_swc_reserve;
     vswc->base.surface_relocation = vmw_swc_surface_relocation;
     vswc->base.region_relocation = vmw_swc_region_relocation;
+   vswc->base.mob_relocation = vmw_swc_mob_relocation;
+   vswc->base.context_relocation = vmw_swc_context_relocation;
+   vswc->base.shader_relocation = vmw_swc_shader_relocation;
     vswc->base.commit = vmw_swc_commit;
     vswc->base.flush = vmw_swc_flush;
+   vswc->base.surface_map = vmw_svga_winsys_surface_map;
+   vswc->base.surface_unmap = vmw_svga_winsys_surface_unmap;
  
     vswc->base.cid = vmw_ioctl_context_create(vws);
+   vswc->base.have_gb_objects = sws->have_gb_objects;
  
     vswc->vws = vws;
  
     vswc->command.size = VMW_COMMAND_SIZE;
     vswc->surface.size = VMW_SURFACE_RELOCS;
+   vswc->shader.size = VMW_SHADER_RELOCS;
     vswc->region.size = VMW_REGION_RELOCS;
  
     vswc->validate = pb_validate_create();
-   if(!vswc->validate) {
-      FREE(vswc);
-      return NULL;
-   }
+   if(!vswc->validate)
+      goto out_no_validate;
+
+   vswc->hash = util_hash_table_create(vmw_hash_ptr, vmw_ptr_compare);
+   if (!vswc->hash)
+      goto out_no_hash;
+
+#ifdef DEBUG
+   vswc->fctx = debug_flush_ctx_create(TRUE, VMW_DEBUG_FLUSH_STACK);
+#endif
  
     return &vswc->base;
+
+out_no_hash:
+   pb_validate_destroy(vswc->validate);
+out_no_validate:
+   FREE(vswc);
+   return NULL;
  }