gallium/swr: Re-enable scratch space for client-memory buffers
authorKrzysztof Raszkowski <krzysztof.raszkowski@intel.com>
Wed, 1 Apr 2020 15:02:06 +0000 (17:02 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 1 Apr 2020 20:00:06 +0000 (20:00 +0000)
Commit 7d33203b446cdfa11c2aaea18caf05b120a16283 fixed race condition
in freeing scratch memory mechanism but that approach creates
performance regression in some cases. This change revert previous
changes and fix freeing scratch memory mechanism.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4406>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4406>

src/gallium/drivers/swr/swr_scratch.cpp
src/gallium/drivers/swr/swr_state.cpp

index 83cb319b4108474a85d8f40d07f80fba5bdd6a15..66f18365cc74f1ee33fd1acd50a4e5a445372781 100644 (file)
@@ -25,6 +25,7 @@
 #include "swr_context.h"
 #include "swr_screen.h"
 #include "swr_scratch.h"
+#include "swr_fence.h"
 #include "swr_fence_work.h"
 #include "api.h"
 
@@ -46,8 +47,10 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
       space->current_size = max_size_in_flight;
 
       if (space->base) {
-         /* defer delete, use aligned-free */
+         /* defer delete, use aligned-free, fence finish enforces the defer
+          * delete will be on the *next* fence */
          struct swr_screen *screen = swr_screen(ctx->pipe.screen);
+         swr_fence_finish(ctx->pipe.screen, NULL, screen->flush_fence, 0);
          swr_fence_work_free(screen->flush_fence, space->base, true);
          space->base = NULL;
       }
index 07ff9b46ad2bf0f77a9f334b52c0cd8cfedd509b..81c70b4568d1855d9d109c017b66ddf9924f4f1d 100644 (file)
@@ -1422,12 +1422,20 @@ swr_update_derived(struct pipe_context *pipe,
             partial_inbounds = 0;
             min_vertex_index = info.min_index + info.index_bias;
 
-            /* Use user memory directly. The draw will access user-buffer
-             * directly and then block. It's easier and usually
-             * faster than copying.
-             */
-            post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW;
-            p_data = (const uint8_t *) vb->buffer.user;
+            size = AlignUp(size, 4);
+            /* If size of client memory copy is too large, don't copy. The
+             * draw will access user-buffer directly and then block.  This is
+             * faster than queuing many large client draws. */
+            if (size >= screen->client_copy_limit) {
+               post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW;
+               p_data = (const uint8_t *) vb->buffer.user;
+            } else {
+               /* Copy only needed vertices to scratch space */
+               const void *ptr = (const uint8_t *) vb->buffer.user + base;
+               ptr = (uint8_t *)swr_copy_to_scratch_space(
+                     ctx, &ctx->scratch->vertex_buffer, ptr, size);
+               p_data = (const uint8_t *)ptr - base;
+            }
          } else if (vb->buffer.resource) {
             /* VBO */
             if (!pitch) {
@@ -1488,12 +1496,20 @@ swr_update_derived(struct pipe_context *pipe,
 
             size = info.count * pitch;
 
-            /* Use user memory directly. The draw will access user-buffer
-             * directly and then block. It's easier and usually
-             * faster than copying.
-             */
-            post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW;
-            p_data = (const uint8_t *) info.index.user;
+            size = AlignUp(size, 4);
+            /* If size of client memory copy is too large, don't copy. The
+             * draw will access user-buffer directly and then block.  This is
+             * faster than queuing many large client draws. */
+            if (size >= screen->client_copy_limit) {
+               post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW;
+               p_data = (const uint8_t *) info.index.user;
+            } else {
+               /* Copy indices to scratch space */
+               const void *ptr = info.index.user;
+               ptr = swr_copy_to_scratch_space(
+                     ctx, &ctx->scratch->index_buffer, ptr, size);
+               p_data = (const uint8_t *)ptr;
+            }
          }
 
          SWR_INDEX_BUFFER_STATE swrIndexBuffer;