glthread: don't use atomics for refcounting to decrease overhead on AMD Zen

author Marek Olšák <marek.olsak@amd.com>

Sun, 22 Mar 2020 18:45:14 +0000 (14:45 -0400)

committer Marge Bot <eric+marge@anholt.net>

Thu, 30 Apr 2020 22:01:55 +0000 (22:01 +0000)
author Marek Olšák <marek.olsak@amd.com>
Sun, 22 Mar 2020 18:45:14 +0000 (14:45 -0400)
committer Marge Bot <eric+marge@anholt.net>
Thu, 30 Apr 2020 22:01:55 +0000 (22:01 +0000)
diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h

index 4a8ce5abce6601b2d4179764d36d4641bfd2424a..64b82ac131740ff3e66035fe78aafce3ea1e00c1 100644 (file)
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -114,6 +114,7 @@ struct glthread_state
     struct gl_buffer_object *upload_buffer;
     uint8_t *upload_ptr;
     unsigned upload_offset;
+   int upload_buffer_private_refcount;
  
     /** Caps. */
     GLboolean SupportsBufferUploads;
diff --git a/src/mesa/main/glthread_bufferobj.c b/src/mesa/main/glthread_bufferobj.c

index 3d393af075b110fbfd86dc7dd4b6e5024ca7369f..52828308052e89588396b03b671657ec1ae19aff 100644 (file)
--- a/src/mesa/main/glthread_bufferobj.c
+++ b/src/mesa/main/glthread_bufferobj.c
@@ -97,11 +97,39 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
           return;
        }
  
+      if (glthread->upload_buffer_private_refcount > 0) {
+         p_atomic_add(&glthread->upload_buffer->RefCount,
+                      -glthread->upload_buffer_private_refcount);
+         glthread->upload_buffer_private_refcount = 0;
+      }
        _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
        glthread->upload_buffer =
           new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
        glthread->upload_offset = 0;
        offset = 0;
+
+      /* Since atomic operations are very very slow when 2 threads are not
+       * sharing one L3 cache (which can happen on AMD Zen), prevent using
+       * atomics as follows:
+       *
+       * This function has to return a buffer reference to the caller.
+       * Instead of atomic_inc for every call, it does all possible future
+       * increments in advance when the upload buffer is allocated.
+       * The maximum number of times the function can be called per upload
+       * buffer is default_size, because the minimum allocation size is 1.
+       * Therefore the function can only return default_size number of
+       * references at most, so we will never need more. This is the number
+       * that is added to RefCount at allocation.
+       *
+       * upload_buffer_private_refcount tracks how many buffer references
+       * are left to return to callers. If the buffer is full and there are
+       * still references left, they are atomically subtracted from RefCount
+       * before the buffer is unreferenced.
+       *
+       * This can increase performance by 20%.
+       */
+      glthread->upload_buffer->RefCount += default_size;
+      glthread->upload_buffer_private_refcount = default_size;
     }
  
     /* Upload data. */
@@ -112,8 +140,11 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
  
     glthread->upload_offset = offset + size;
     *out_offset = offset;
+
     assert(*out_buffer == NULL);
-   _mesa_reference_buffer_object(ctx, out_buffer, glthread->upload_buffer);
+   assert(glthread->upload_buffer_private_refcount > 0);
+   *out_buffer = glthread->upload_buffer;
+   glthread->upload_buffer_private_refcount--;
  }
  
  /** Tracks the current bindings for the vertex array and index array buffers.
author	Marek Olšák <marek.olsak@amd.com>
	Sun, 22 Mar 2020 18:45:14 +0000 (14:45 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 30 Apr 2020 22:01:55 +0000 (22:01 +0000)
src/mesa/main/glthread.h		patch \| blob \| history
src/mesa/main/glthread_bufferobj.c		patch \| blob \| history