swr: Implement fence attached work queues for deferred deletion.
authorBruce Cherniak <bruce.cherniak@intel.com>
Tue, 13 Dec 2016 01:24:59 +0000 (19:24 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Fri, 16 Dec 2016 17:29:02 +0000 (11:29 -0600)
Work can now be added to fences and triggered by fence completion. This
allows for deferred resource deletion, and other asynchronous tasks.

Reviewed-by: George Kyriazis <george.kyriazis@intel.com>
src/gallium/drivers/swr/Makefile.sources
src/gallium/drivers/swr/swr_context.cpp
src/gallium/drivers/swr/swr_fence.cpp
src/gallium/drivers/swr/swr_fence.h
src/gallium/drivers/swr/swr_fence_work.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_fence_work.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_scratch.cpp
src/gallium/drivers/swr/swr_screen.cpp
src/gallium/drivers/swr/swr_state.cpp

index d81d458d3e80f53f0430c9029f39e5859c3508ac..1afb532434c9ae7d8e35c2f04bc1a59fe5388543 100644 (file)
@@ -42,6 +42,8 @@ CXX_SOURCES := \
        swr_memory.h \
        swr_fence.h \
        swr_fence.cpp \
+       swr_fence_work.h \
+       swr_fence_work.cpp \
        swr_query.h \
        swr_query.cpp
 
index b8c87faef733b9eabcc0389bcbd5aa23dc716742..89330857ae01c658c53bf06502708a0e8e4b5fd5 100644 (file)
@@ -355,9 +355,6 @@ swr_destroy(struct pipe_context *pipe)
    if (ctx->blitter)
       util_blitter_destroy(ctx->blitter);
 
-   /* Idle core before deleting context */
-   SwrWaitForIdle(ctx->swrContext);
-
    for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
    }
@@ -372,6 +369,10 @@ swr_destroy(struct pipe_context *pipe)
       pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
    }
 
+   /* Idle core after destroying buffer resources, but before deleting
+    * context.  Destroying resources has potentially called StoreTiles.*/
+   SwrWaitForIdle(ctx->swrContext);
+
    if (ctx->swrContext)
       SwrDestroyContext(ctx->swrContext);
 
index 7fe24700bfafcb8fe40479783a1f2b69971be402..c73bbbf270803a674887947b8515956106cbc0ec 100644 (file)
  * to SwrSync call.
  */
 static void
-swr_sync_cb(uint64_t userData, uint64_t userData2, uint64_t userData3)
+swr_fence_cb(uint64_t userData, uint64_t userData2, uint64_t userData3)
 {
    struct swr_fence *fence = (struct swr_fence *)userData;
 
+   /* Complete all work attached to the fence */
+   swr_fence_do_work(fence);
+
    /* Correct value is in SwrSync data, and not the fence write field. */
    fence->read = userData2;
 }
@@ -56,7 +59,7 @@ swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fh)
 
    fence->write++;
    fence->pending = TRUE;
-   SwrSync(ctx->swrContext, swr_sync_cb, (uint64_t)fence, fence->write, 0);
+   SwrSync(ctx->swrContext, swr_fence_cb, (uint64_t)fence, fence->write, 0);
 }
 
 /*
@@ -72,6 +75,7 @@ swr_fence_create()
 
    pipe_reference_init(&fence->reference, 1);
    fence->id = fence_id++;
+   fence->work.tail = &fence->work.head;
 
    return (struct pipe_fence_handle *)fence;
 }
@@ -80,6 +84,8 @@ swr_fence_create()
 static void
 swr_fence_destroy(struct swr_fence *fence)
 {
+   /* Complete any work left if fence was not submitted */
+   swr_fence_do_work(fence);
    FREE(fence);
 }
 
@@ -101,8 +107,10 @@ swr_fence_reference(struct pipe_screen *screen,
       old = NULL;
    }
 
-   if (pipe_reference(&old->reference, &fence->reference))
+   if (pipe_reference(&old->reference, &fence->reference)) {
+      swr_fence_finish(screen, NULL, (struct pipe_fence_handle *) old, 0);
       swr_fence_destroy(old);
+   }
 }
 
 
index 80a4345af87d4650ca7ab70e4750eb1bc3966f5d..4766b5b89111bcfee201431afc43f5f61da20135 100644 (file)
@@ -25,6 +25,8 @@
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
 
+#include "swr_fence_work.h"
+
 struct pipe_screen;
 
 struct swr_fence {
@@ -36,6 +38,12 @@ struct swr_fence {
    unsigned pending;
 
    unsigned id; /* Just for reference */
+   
+   struct {
+      uint32_t count;
+      struct swr_fence_work head;
+      struct swr_fence_work *tail;
+   } work;
 };
 
 
diff --git a/src/gallium/drivers/swr/swr_fence_work.cpp b/src/gallium/drivers/swr/swr_fence_work.cpp
new file mode 100644 (file)
index 0000000..3f83e61
--- /dev/null
@@ -0,0 +1,148 @@
+/****************************************************************************
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "swr_context.h"
+#include "swr_fence.h"
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+/*
+ * Called by swr_fence_cb to complete the work queue
+ */
+void
+swr_fence_do_work(struct swr_fence *fence)
+{
+   struct swr_fence_work *work, *tmp;
+
+   if (fence->work.head.next) {
+      work = fence->work.head.next;
+      /* Immediately clear the head so any new work gets added to a new work
+       * queue */
+      p_atomic_set(&fence->work.head.next, nullptr);
+      p_atomic_set(&fence->work.tail, &fence->work.head);
+      p_atomic_set(&fence->work.count, 0);
+
+      do {
+         tmp = work->next;
+         work->callback(work);
+         FREE(work);
+         work = tmp;
+      } while(work);
+   }
+}
+
+
+/*
+ * Called by one of the specialized work routines below
+ */
+static inline void
+swr_add_fence_work(struct pipe_fence_handle *fh,
+                   struct swr_fence_work *work)
+{
+   /* If no fence, just do the work now */
+   if (!fh) {
+      work->callback(work);
+      FREE(work);
+      return;
+   }
+
+   struct swr_fence *fence  = swr_fence(fh);
+   p_atomic_set(&fence->work.tail->next, work);
+   p_atomic_set(&fence->work.tail, work);
+   p_atomic_inc(&fence->work.count);
+}
+
+
+/*
+ * Generic free/free_aligned, and delete vs/fs
+ */
+template<bool aligned_free>
+static void
+swr_free_cb(struct swr_fence_work *work)
+{
+   if (aligned_free)
+      AlignedFree(work->free.data);
+   else  
+      FREE(work->free.data);
+}
+
+static void
+swr_delete_vs_cb(struct swr_fence_work *work)
+{
+   delete work->free.swr_vs;
+}
+
+static void
+swr_delete_fs_cb(struct swr_fence_work *work)
+{
+   delete work->free.swr_fs;
+}
+
+bool
+swr_fence_work_free(struct pipe_fence_handle *fence, void *data,
+                    bool aligned_free)
+{
+   struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
+   if (!work)
+      return false;
+   if (aligned_free)
+      work->callback = swr_free_cb<true>;
+   else
+      work->callback = swr_free_cb<false>;
+   work->free.data = data;
+
+   swr_add_fence_work(fence, work);
+
+   return true;
+}
+
+bool
+swr_fence_work_delete_vs(struct pipe_fence_handle *fence,
+                         struct swr_vertex_shader *swr_vs)
+{
+   struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
+   if (!work)
+      return false;
+   work->callback = swr_delete_vs_cb;
+   work->free.swr_vs = swr_vs;
+
+   swr_add_fence_work(fence, work);
+
+   return true;
+}
+
+bool
+swr_fence_work_delete_fs(struct pipe_fence_handle *fence,
+                         struct swr_fragment_shader *swr_fs)
+{
+   struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
+   if (!work)
+      return false;
+   work->callback = swr_delete_fs_cb;
+   work->free.swr_fs = swr_fs;
+
+   swr_add_fence_work(fence, work);
+
+   return true;
+}
diff --git a/src/gallium/drivers/swr/swr_fence_work.h b/src/gallium/drivers/swr/swr_fence_work.h
new file mode 100644 (file)
index 0000000..1240360
--- /dev/null
@@ -0,0 +1,47 @@
+/****************************************************************************
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_FENCE_WORK_H
+#define SWR_FENCE_WORK_H
+
+typedef void(*SWR_WORK_CALLBACK_FUNC)(struct swr_fence_work *work);
+
+struct swr_fence_work {
+   SWR_WORK_CALLBACK_FUNC callback;
+
+   union {
+      void *data;
+      struct swr_vertex_shader *swr_vs;
+      struct swr_fragment_shader *swr_fs;
+   } free;
+
+   struct swr_fence_work *next;
+};
+
+void swr_fence_do_work(struct swr_fence *fence);
+
+bool swr_fence_work_free(struct pipe_fence_handle *fence, void *data,
+                         bool aligned_free = false);
+bool swr_fence_work_delete_vs(struct pipe_fence_handle *fence,
+                              struct swr_vertex_shader *swr_vs);
+bool swr_fence_work_delete_fs(struct pipe_fence_handle *fence,
+                              struct swr_fragment_shader *swr_vs);
+#endif
index 2515c8beaf390e64f3f0978a90b085e4d3446e7a..58d18d04fab12970a6b138866d3a2d0ab3d18cad 100644 (file)
@@ -23,7 +23,9 @@
 
 #include "util/u_memory.h"
 #include "swr_context.h"
+#include "swr_screen.h"
 #include "swr_scratch.h"
+#include "swr_fence_work.h"
 #include "api.h"
 
 
@@ -46,18 +48,18 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
 
       /* Need to grow space */
       if (max_size_in_flight > space->current_size) {
-         /* Must idle the pipeline, this is infrequent */
-         SwrWaitForIdle(ctx->swrContext);
-
          space->current_size = max_size_in_flight;
 
          if (space->base) {
-            align_free(space->base);
+            /* defer delete, use aligned-free */
+            struct swr_screen *screen = swr_screen(ctx->pipe.screen);
+            swr_fence_work_free(screen->flush_fence, space->base, true);
             space->base = NULL;
          }
 
          if (!space->base) {
-            space->base = (uint8_t *)align_malloc(space->current_size, 4);
+            space->base = (uint8_t *)AlignedMalloc(space->current_size, 
+                                                   sizeof(void *));
             space->head = (void *)space->base;
          }
       }
@@ -65,14 +67,6 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
       /* Wrap */
       if (((uint8_t *)space->head + size)
           >= ((uint8_t *)space->base + space->current_size)) {
-         /*
-          * TODO XXX: Should add a fence on wrap.  Assumption is that
-          * current_space >> size, and there are at least MAX_DRAWS_IN_FLIGHT
-          * draws in scratch.  So fence would always be met on wrap.  A fence
-          * would ensure that first frame in buffer is done before wrapping.
-          * If fence ever needs to be waited on, can increase buffer size.
-          * So far in testing, this hasn't been necessary.
-          */
          space->head = space->base;
       }
 
@@ -103,14 +97,10 @@ swr_destroy_scratch_buffers(struct swr_context *ctx)
    struct swr_scratch_buffers *scratch = ctx->scratch;
 
    if (scratch) {
-      if (scratch->vs_constants.base)
-         align_free(scratch->vs_constants.base);
-      if (scratch->fs_constants.base)
-         align_free(scratch->fs_constants.base);
-      if (scratch->vertex_buffer.base)
-         align_free(scratch->vertex_buffer.base);
-      if (scratch->index_buffer.base)
-         align_free(scratch->index_buffer.base);
+      AlignedFree(scratch->vs_constants.base);
+      AlignedFree(scratch->fs_constants.base);
+      AlignedFree(scratch->vertex_buffer.base);
+      AlignedFree(scratch->index_buffer.base);
       FREE(scratch);
    }
 }
index 7a46d092f410c8ce6ef5e304b9f0ca72c7c5293d..a9905d79b4ed69f6e5a2c6805d64bb25b4e83e84 100644 (file)
@@ -869,29 +869,28 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
    struct swr_resource *spr = swr_resource(pt);
    struct pipe_context *pipe = screen->pipe;
 
-   /* Only wait on fence if the resource is being used */
-   if (pipe && spr->status) {
-      /* But, if there's no fence pending, submit one.
-       * XXX: Remove once draw timestamps are implmented. */
-      if (!swr_is_fence_pending(screen->flush_fence))
-         swr_fence_submit(swr_context(pipe), screen->flush_fence);
-
+   if (spr->display_target) {
+      /* If resource is display target, winsys manages the buffer and will
+       * free it on displaytarget_destroy. */
       swr_fence_finish(p_screen, NULL, screen->flush_fence, 0);
-      swr_resource_unused(pt);
-   }
 
-   /*
-    * Free resource primary surface.  If resource is display target, winsys
-    * manages the buffer and will free it on displaytarget_destroy.
-    */
-   if (spr->display_target) {
-      /* display target */
       struct sw_winsys *winsys = screen->winsys;
       winsys->displaytarget_destroy(winsys, spr->display_target);
-   } else
-      AlignedFree(spr->swr.pBaseAddress);
 
-   AlignedFree(spr->secondary.pBaseAddress);
+   } else {
+      /* For regular resources, if the resource is being used, defer deletion
+       * (use aligned-free) */
+      if (pipe && spr->status) {
+         swr_resource_unused(pt);
+         swr_fence_work_free(screen->flush_fence,
+                             spr->swr.pBaseAddress, true);
+         swr_fence_work_free(screen->flush_fence, 
+                             spr->secondary.pBaseAddress, true);
+      } else {
+         AlignedFree(spr->swr.pBaseAddress);
+         AlignedFree(spr->secondary.pBaseAddress);
+      }
+   }
 
    FREE(spr);
 }
index 4475252e50ef73aa5ad497e7c3f2fc9bd95eeded..41e03560b178d5352314f21aa6a928765dc0c8a8 100644 (file)
@@ -372,10 +372,9 @@ swr_delete_vs_state(struct pipe_context *pipe, void *vs)
    struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
    FREE((void *)swr_vs->pipe.tokens);
    struct swr_screen *screen = swr_screen(pipe->screen);
-   if (!swr_is_fence_pending(screen->flush_fence))
-      swr_fence_submit(swr_context(pipe), screen->flush_fence);
-   swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
-   delete swr_vs;
+
+   /* Defer deletion of vs state */
+   swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
 }
 
 static void *
@@ -412,10 +411,9 @@ swr_delete_fs_state(struct pipe_context *pipe, void *fs)
    struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
    FREE((void *)swr_fs->pipe.tokens);
    struct swr_screen *screen = swr_screen(pipe->screen);
-   if (!swr_is_fence_pending(screen->flush_fence))
-      swr_fence_submit(swr_context(pipe), screen->flush_fence);
-   swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
-   delete swr_fs;
+
+   /* Defer deleton of fs state */
+   swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
 }
 
 
@@ -912,7 +910,7 @@ swr_update_derived(struct pipe_context *pipe,
                    const struct pipe_draw_info *p_draw_info)
 {
    struct swr_context *ctx = swr_context(pipe);
-   struct swr_screen *screen = swr_screen(ctx->pipe.screen);
+   struct swr_screen *screen = swr_screen(pipe->screen);
 
    /* Update screen->pipe to current pipe context. */
    if (screen->pipe != pipe)