From 7f22e0fd29369f478da1d36520049f001cd698d1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 5 Mar 2020 21:50:17 -0500 Subject: [PATCH] glthread: do glBufferSubData as unsynchronized upload + GPU copy 1. glthread has a private upload buffer (as struct gl_buffer_object *) 2. the new function glInternalBufferSubDataCopyMESA is used to execute the copy (the source buffer parameter type is struct gl_buffer_object * as GLintptr) Now glthread can handle arbitrary glBufferSubData sizes without syncing. This is a good exercise for uploading data outside of the driver thread. Part-of: --- src/mesa/main/glthread.c | 5 ++ src/mesa/main/glthread.h | 13 ++++ src/mesa/main/glthread_bufferobj.c | 121 ++++++++++++++++++++++++++++- src/mesa/main/menums.h | 1 + 4 files changed, 138 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c index b8e04c9f771..8769f8b4033 100644 --- a/src/mesa/main/glthread.c +++ b/src/mesa/main/glthread.c @@ -106,6 +106,11 @@ _mesa_glthread_init(struct gl_context *ctx) glthread->enabled = true; glthread->stats.queue = &glthread->queue; + + glthread->SupportsBufferUploads = + ctx->Const.BufferCreateMapUnsynchronizedThreadSafe && + ctx->Const.AllowMappedBuffersDuringExecution; + ctx->CurrentClientDispatch = ctx->MarshalExec; /* Execute the thread initialization function in the thread. */ diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h index 32c7826a8f2..4a8ce5abce6 100644 --- a/src/mesa/main/glthread.h +++ b/src/mesa/main/glthread.h @@ -50,6 +50,7 @@ #include "compiler/shader_enums.h" struct gl_context; +struct gl_buffer_object; struct _mesa_HashTable; struct glthread_attrib_binding { @@ -109,6 +110,14 @@ struct glthread_state /** Index of the batch being filled and about to be submitted. */ unsigned next; + /** Upload buffer. */ + struct gl_buffer_object *upload_buffer; + uint8_t *upload_ptr; + unsigned upload_offset; + + /** Caps. */ + GLboolean SupportsBufferUploads; + /** Vertex Array objects tracked by glthread independently of Mesa. */ struct _mesa_HashTable *VAOs; struct glthread_vao *CurrentVAO; @@ -129,6 +138,10 @@ void _mesa_glthread_disable(struct gl_context *ctx, const char *func); void _mesa_glthread_flush_batch(struct gl_context *ctx); void _mesa_glthread_finish(struct gl_context *ctx); void _mesa_glthread_finish_before(struct gl_context *ctx, const char *func); +void _mesa_glthread_upload(struct gl_context *ctx, const void *data, + GLsizeiptr size, unsigned *out_offset, + struct gl_buffer_object **out_buffer, + uint8_t **out_ptr); void _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer); diff --git a/src/mesa/main/glthread_bufferobj.c b/src/mesa/main/glthread_bufferobj.c index 2a9c913cdfc..3d393af075b 100644 --- a/src/mesa/main/glthread_bufferobj.c +++ b/src/mesa/main/glthread_bufferobj.c @@ -21,8 +21,100 @@ * IN THE SOFTWARE. */ -#include "glthread_marshal.h" -#include "dispatch.h" +#include "main/glthread_marshal.h" +#include "main/dispatch.h" +#include "main/bufferobj.h" + +/** + * Create an upload buffer. This is called from the app thread, so everything + * has to be thread-safe in the driver. + */ +static struct gl_buffer_object * +new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr) +{ + assert(ctx->GLThread.SupportsBufferUploads); + + struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1); + if (!obj) + return NULL; + + obj->Immutable = true; + + if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL, + GL_WRITE_ONLY, + GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT, + obj)) { + ctx->Driver.DeleteBuffer(ctx, obj); + return NULL; + } + + *ptr = ctx->Driver.MapBufferRange(ctx, 0, size, + GL_MAP_WRITE_BIT | + GL_MAP_UNSYNCHRONIZED_BIT | + MESA_MAP_THREAD_SAFE_BIT, + obj, MAP_GLTHREAD); + if (!*ptr) { + ctx->Driver.DeleteBuffer(ctx, obj); + return NULL; + } + + return obj; +} + +void +_mesa_glthread_upload(struct gl_context *ctx, const void *data, + GLsizeiptr size, unsigned *out_offset, + struct gl_buffer_object **out_buffer, + uint8_t **out_ptr) +{ + struct glthread_state *glthread = &ctx->GLThread; + const unsigned default_size = 1024 * 1024; + + if (unlikely(size > INT_MAX)) + return; + + /* The alignment was chosen arbitrarily. */ + unsigned offset = align(glthread->upload_offset, 8); + + /* Allocate a new buffer if needed. */ + if (unlikely(!glthread->upload_buffer || offset + size > default_size)) { + /* If the size is greater than the buffer size, allocate a separate buffer + * just for this upload. + */ + if (unlikely(size > default_size)) { + uint8_t *ptr; + + assert(*out_buffer == NULL); + *out_buffer = new_upload_buffer(ctx, size, &ptr); + if (!*out_buffer) + return; + + *out_offset = 0; + if (data) + memcpy(ptr, data, size); + else + *out_ptr = ptr; + return; + } + + _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL); + glthread->upload_buffer = + new_upload_buffer(ctx, default_size, &glthread->upload_ptr); + glthread->upload_offset = 0; + offset = 0; + } + + /* Upload data. */ + if (data) + memcpy(glthread->upload_ptr + offset, data, size); + else + *out_ptr = glthread->upload_ptr + offset; + + glthread->upload_offset = offset + size; + *out_offset = offset; + assert(*out_buffer == NULL); + _mesa_reference_buffer_object(ctx, out_buffer, glthread->upload_buffer); +} /** Tracks the current bindings for the vertex array and index array buffers. * @@ -269,6 +361,31 @@ _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset, GET_CURRENT_CONTEXT(ctx); size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size; + /* Fast path: Copy the data to an upload buffer, and use the GPU + * to copy the uploaded data to the destination buffer. + */ + /* TODO: Handle offset == 0 && size < buffer_size. + * If offset == 0 and size == buffer_size, it's better to discard + * the buffer storage, but we don't know the buffer size in glthread. + */ + if (ctx->GLThread.SupportsBufferUploads && + data && offset > 0 && size > 0) { + struct gl_buffer_object *upload_buffer = NULL; + unsigned upload_offset = 0; + + _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer, + NULL); + + if (upload_buffer) { + _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer, + upload_offset, + target_or_name, + offset, size, named, + ext_dsa); + return; + } + } + if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 || cmd_size > MARSHAL_MAX_CMD_SIZE || !data || (named && target_or_name == 0))) { diff --git a/src/mesa/main/menums.h b/src/mesa/main/menums.h index 79c14da5ea0..d0a1dc48740 100644 --- a/src/mesa/main/menums.h +++ b/src/mesa/main/menums.h @@ -141,6 +141,7 @@ typedef enum { MAP_USER, MAP_INTERNAL, + MAP_GLTHREAD, MAP_COUNT } gl_map_buffer_index; -- 2.30.2