glthread: do glBufferSubData as unsynchronized upload + GPU copy
authorMarek Olšák <marek.olsak@amd.com>
Fri, 6 Mar 2020 02:50:17 +0000 (21:50 -0500)
committerMarge Bot <eric+marge@anholt.net>
Thu, 30 Apr 2020 22:01:55 +0000 (22:01 +0000)
1. glthread has a private upload buffer (as struct gl_buffer_object *)
2. the new function glInternalBufferSubDataCopyMESA is used to execute the copy
   (the source buffer parameter type is struct gl_buffer_object * as GLintptr)

Now glthread can handle arbitrary glBufferSubData sizes without syncing.

This is a good exercise for uploading data outside of the driver thread.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4314>

src/mesa/main/glthread.c
src/mesa/main/glthread.h
src/mesa/main/glthread_bufferobj.c
src/mesa/main/menums.h

index b8e04c9f771893f415bced0c4cf8fe717d8edc37..8769f8b40336c2d1e4201831e5daba15f7acd09d 100644 (file)
@@ -106,6 +106,11 @@ _mesa_glthread_init(struct gl_context *ctx)
 
    glthread->enabled = true;
    glthread->stats.queue = &glthread->queue;
+
+   glthread->SupportsBufferUploads =
+      ctx->Const.BufferCreateMapUnsynchronizedThreadSafe &&
+      ctx->Const.AllowMappedBuffersDuringExecution;
+
    ctx->CurrentClientDispatch = ctx->MarshalExec;
 
    /* Execute the thread initialization function in the thread. */
index 32c7826a8f2b734c3a3e74915eb2fd3a91dbe1f1..4a8ce5abce6601b2d4179764d36d4641bfd2424a 100644 (file)
@@ -50,6 +50,7 @@
 #include "compiler/shader_enums.h"
 
 struct gl_context;
+struct gl_buffer_object;
 struct _mesa_HashTable;
 
 struct glthread_attrib_binding {
@@ -109,6 +110,14 @@ struct glthread_state
    /** Index of the batch being filled and about to be submitted. */
    unsigned next;
 
+   /** Upload buffer. */
+   struct gl_buffer_object *upload_buffer;
+   uint8_t *upload_ptr;
+   unsigned upload_offset;
+
+   /** Caps. */
+   GLboolean SupportsBufferUploads;
+
    /** Vertex Array objects tracked by glthread independently of Mesa. */
    struct _mesa_HashTable *VAOs;
    struct glthread_vao *CurrentVAO;
@@ -129,6 +138,10 @@ void _mesa_glthread_disable(struct gl_context *ctx, const char *func);
 void _mesa_glthread_flush_batch(struct gl_context *ctx);
 void _mesa_glthread_finish(struct gl_context *ctx);
 void _mesa_glthread_finish_before(struct gl_context *ctx, const char *func);
+void _mesa_glthread_upload(struct gl_context *ctx, const void *data,
+                           GLsizeiptr size, unsigned *out_offset,
+                           struct gl_buffer_object **out_buffer,
+                           uint8_t **out_ptr);
 
 void _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target,
                                GLuint buffer);
index 2a9c913cdfc5a981e810260200ff2717bbb181ec..3d393af075b110fbfd86dc7dd4b6e5024ca7369f 100644 (file)
  * IN THE SOFTWARE.
  */
 
-#include "glthread_marshal.h"
-#include "dispatch.h"
+#include "main/glthread_marshal.h"
+#include "main/dispatch.h"
+#include "main/bufferobj.h"
+
+/**
+ * Create an upload buffer. This is called from the app thread, so everything
+ * has to be thread-safe in the driver.
+ */
+static struct gl_buffer_object *
+new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
+{
+   assert(ctx->GLThread.SupportsBufferUploads);
+
+   struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1);
+   if (!obj)
+      return NULL;
+
+   obj->Immutable = true;
+
+   if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL,
+                               GL_WRITE_ONLY,
+                               GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
+                               obj)) {
+      ctx->Driver.DeleteBuffer(ctx, obj);
+      return NULL;
+   }
+
+   *ptr = ctx->Driver.MapBufferRange(ctx, 0, size,
+                                     GL_MAP_WRITE_BIT |
+                                     GL_MAP_UNSYNCHRONIZED_BIT |
+                                     MESA_MAP_THREAD_SAFE_BIT,
+                                     obj, MAP_GLTHREAD);
+   if (!*ptr) {
+      ctx->Driver.DeleteBuffer(ctx, obj);
+      return NULL;
+   }
+
+   return obj;
+}
+
+void
+_mesa_glthread_upload(struct gl_context *ctx, const void *data,
+                      GLsizeiptr size, unsigned *out_offset,
+                      struct gl_buffer_object **out_buffer,
+                      uint8_t **out_ptr)
+{
+   struct glthread_state *glthread = &ctx->GLThread;
+   const unsigned default_size = 1024 * 1024;
+
+   if (unlikely(size > INT_MAX))
+      return;
+
+   /* The alignment was chosen arbitrarily. */
+   unsigned offset = align(glthread->upload_offset, 8);
+
+   /* Allocate a new buffer if needed. */
+   if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
+      /* If the size is greater than the buffer size, allocate a separate buffer
+       * just for this upload.
+       */
+      if (unlikely(size > default_size)) {
+         uint8_t *ptr;
+
+         assert(*out_buffer == NULL);
+         *out_buffer = new_upload_buffer(ctx, size, &ptr);
+         if (!*out_buffer)
+            return;
+
+         *out_offset = 0;
+         if (data)
+            memcpy(ptr, data, size);
+         else
+            *out_ptr = ptr;
+         return;
+      }
+
+      _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
+      glthread->upload_buffer =
+         new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
+      glthread->upload_offset = 0;
+      offset = 0;
+   }
+
+   /* Upload data. */
+   if (data)
+      memcpy(glthread->upload_ptr + offset, data, size);
+   else
+      *out_ptr = glthread->upload_ptr + offset;
+
+   glthread->upload_offset = offset + size;
+   *out_offset = offset;
+   assert(*out_buffer == NULL);
+   _mesa_reference_buffer_object(ctx, out_buffer, glthread->upload_buffer);
+}
 
 /** Tracks the current bindings for the vertex array and index array buffers.
  *
@@ -269,6 +361,31 @@ _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
    GET_CURRENT_CONTEXT(ctx);
    size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
 
+   /* Fast path: Copy the data to an upload buffer, and use the GPU
+    * to copy the uploaded data to the destination buffer.
+    */
+   /* TODO: Handle offset == 0 && size < buffer_size.
+    *       If offset == 0 and size == buffer_size, it's better to discard
+    *       the buffer storage, but we don't know the buffer size in glthread.
+    */
+   if (ctx->GLThread.SupportsBufferUploads &&
+       data && offset > 0 && size > 0) {
+      struct gl_buffer_object *upload_buffer = NULL;
+      unsigned upload_offset = 0;
+
+      _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
+                            NULL);
+
+      if (upload_buffer) {
+         _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
+                                                     upload_offset,
+                                                     target_or_name,
+                                                     offset, size, named,
+                                                     ext_dsa);
+         return;
+      }
+   }
+
    if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
                 cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
                 (named && target_or_name == 0))) {
index 79c14da5ea0062a10e0e5404396ff879e96a8dca..d0a1dc487401d177b48414262fa91a3bfbe95354 100644 (file)
@@ -141,6 +141,7 @@ typedef enum
 {
    MAP_USER,
    MAP_INTERNAL,
+   MAP_GLTHREAD,
    MAP_COUNT
 } gl_map_buffer_index;