src/mesa/main/glthread_bufferobj.c

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "main/glthread_marshal.h"
  25 #include "main/dispatch.h"
  26 #include "main/bufferobj.h"
  27
  28 /**
  29  * Create an upload buffer. This is called from the app thread, so everything
  30  * has to be thread-safe in the driver.
  31  */
  32 static struct gl_buffer_object *
  33 new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
  34 {
  35    assert(ctx->GLThread.SupportsBufferUploads);
  36
  37    struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1);
  38    if (!obj)
  39       return NULL;
  40
  41    obj->Immutable = true;
  42
  43    if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL,
  44                                GL_WRITE_ONLY,
  45                                GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
  46                                obj)) {
  47       ctx->Driver.DeleteBuffer(ctx, obj);
  48       return NULL;
  49    }
  50
  51    *ptr = ctx->Driver.MapBufferRange(ctx, 0, size,
  52                                      GL_MAP_WRITE_BIT |
  53                                      GL_MAP_UNSYNCHRONIZED_BIT |
  54                                      MESA_MAP_THREAD_SAFE_BIT,
  55                                      obj, MAP_GLTHREAD);
  56    if (!*ptr) {
  57       ctx->Driver.DeleteBuffer(ctx, obj);
  58       return NULL;
  59    }
  60
  61    return obj;
  62 }
  63
  64 void
  65 _mesa_glthread_upload(struct gl_context *ctx, const void *data,
  66                       GLsizeiptr size, unsigned *out_offset,
  67                       struct gl_buffer_object **out_buffer,
  68                       uint8_t **out_ptr)
  69 {
  70    struct glthread_state *glthread = &ctx->GLThread;
  71    const unsigned default_size = 1024 * 1024;
  72
  73    if (unlikely(size > INT_MAX))
  74       return;
  75
  76    /* The alignment was chosen arbitrarily. */
  77    unsigned offset = align(glthread->upload_offset, 8);
  78
  79    /* Allocate a new buffer if needed. */
  80    if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
  81       /* If the size is greater than the buffer size, allocate a separate buffer
  82        * just for this upload.
  83        */
  84       if (unlikely(size > default_size)) {
  85          uint8_t *ptr;
  86
  87          assert(*out_buffer == NULL);
  88          *out_buffer = new_upload_buffer(ctx, size, &ptr);
  89          if (!*out_buffer)
  90             return;
  91
  92          *out_offset = 0;
  93          if (data)
  94             memcpy(ptr, data, size);
  95          else
  96             *out_ptr = ptr;
  97          return;
  98       }
  99
 100       if (glthread->upload_buffer_private_refcount > 0) {
 101          p_atomic_add(&glthread->upload_buffer->RefCount,
 102                       -glthread->upload_buffer_private_refcount);
 103          glthread->upload_buffer_private_refcount = 0;
 104       }
 105       _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
 106       glthread->upload_buffer =
 107          new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
 108       glthread->upload_offset = 0;
 109       offset = 0;
 110
 111       /* Since atomic operations are very very slow when 2 threads are not
 112        * sharing one L3 cache (which can happen on AMD Zen), prevent using
 113        * atomics as follows:
 114        *
 115        * This function has to return a buffer reference to the caller.
 116        * Instead of atomic_inc for every call, it does all possible future
 117        * increments in advance when the upload buffer is allocated.
 118        * The maximum number of times the function can be called per upload
 119        * buffer is default_size, because the minimum allocation size is 1.
 120        * Therefore the function can only return default_size number of
 121        * references at most, so we will never need more. This is the number
 122        * that is added to RefCount at allocation.
 123        *
 124        * upload_buffer_private_refcount tracks how many buffer references
 125        * are left to return to callers. If the buffer is full and there are
 126        * still references left, they are atomically subtracted from RefCount
 127        * before the buffer is unreferenced.
 128        *
 129        * This can increase performance by 20%.
 130        */
 131       glthread->upload_buffer->RefCount += default_size;
 132       glthread->upload_buffer_private_refcount = default_size;
 133    }
 134
 135    /* Upload data. */
 136    if (data)
 137       memcpy(glthread->upload_ptr + offset, data, size);
 138    else
 139       *out_ptr = glthread->upload_ptr + offset;
 140
 141    glthread->upload_offset = offset + size;
 142    *out_offset = offset;
 143
 144    assert(*out_buffer == NULL);
 145    assert(glthread->upload_buffer_private_refcount > 0);
 146    *out_buffer = glthread->upload_buffer;
 147    glthread->upload_buffer_private_refcount--;
 148 }
 149
 150 /** Tracks the current bindings for the vertex array and index array buffers.
 151  *
 152  * This is part of what we need to enable glthread on compat-GL contexts that
 153  * happen to use VBOs, without also supporting the full tracking of VBO vs
 154  * user vertex array bindings per attribute on each vertex array for
 155  * determining what to upload at draw call time.
 156  *
 157  * Note that GL core makes it so that a buffer binding with an invalid handle
 158  * in the "buffer" parameter will throw an error, and then a
 159  * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
 160  * However, in GL core the draw call would throw an error as well, so we don't
 161  * really care if our tracking is wrong for this case -- we never need to
 162  * marshal user data for draw calls, and the unmarshal will just generate an
 163  * error or not as appropriate.
 164  *
 165  * For compatibility GL, we do need to accurately know whether the draw call
 166  * on the unmarshal side will dereference a user pointer or load data from a
 167  * VBO per vertex.  That would make it seem like we need to track whether a
 168  * "buffer" is valid, so that we can know when an error will be generated
 169  * instead of updating the binding.  However, compat GL has the ridiculous
 170  * feature that if you pass a bad name, it just gens a buffer object for you,
 171  * so we escape without having to know if things are valid or not.
 172  */
 173 void
 174 _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
 175 {
 176    struct glthread_state *glthread = &ctx->GLThread;
 177
 178    switch (target) {
 179    case GL_ARRAY_BUFFER:
 180       glthread->CurrentArrayBufferName = buffer;
 181       break;
 182    case GL_ELEMENT_ARRAY_BUFFER:
 183       /* The current element array buffer binding is actually tracked in the
 184        * vertex array object instead of the context, so this would need to
 185        * change on vertex array object updates.
 186        */
 187       glthread->CurrentVAO->CurrentElementBufferName = buffer;
 188       break;
 189    case GL_DRAW_INDIRECT_BUFFER:
 190       glthread->CurrentDrawIndirectBufferName = buffer;
 191       break;
 192    }
 193 }
 194
 195 void
 196 _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
 197                              const GLuint *buffers)
 198 {
 199    struct glthread_state *glthread = &ctx->GLThread;
 200
 201    if (!buffers)
 202       return;
 203
 204    for (unsigned i = 0; i < n; i++) {
 205       GLuint id = buffers[i];
 206
 207       if (id == glthread->CurrentArrayBufferName)
 208          _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
 209       if (id == glthread->CurrentVAO->CurrentElementBufferName)
 210          _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
 211       if (id == glthread->CurrentDrawIndirectBufferName)
 212          _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
 213    }
 214 }
 215
 216 /* BufferData: marshalled asynchronously */
 217 struct marshal_cmd_BufferData
 218 {
 219    struct marshal_cmd_base cmd_base;
 220    GLuint target_or_name;
 221    GLsizeiptr size;
 222    GLenum usage;
 223    const GLvoid *data_external_mem;
 224    bool data_null; /* If set, no data follows for "data" */
 225    bool named;
 226    bool ext_dsa;
 227    /* Next size bytes are GLubyte data[size] */
 228 };
 229
 230 void
 231 _mesa_unmarshal_BufferData(struct gl_context *ctx,
 232                            const struct marshal_cmd_BufferData *cmd)
 233 {
 234    const GLuint target_or_name = cmd->target_or_name;
 235    const GLsizei size = cmd->size;
 236    const GLenum usage = cmd->usage;
 237    const void *data;
 238
 239    if (cmd->data_null)
 240       data = NULL;
 241    else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
 242       data = cmd->data_external_mem;
 243    else
 244       data = (const void *) (cmd + 1);
 245
 246    if (cmd->ext_dsa) {
 247       CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
 248                               (target_or_name, size, data, usage));
 249    } else if (cmd->named) {
 250       CALL_NamedBufferData(ctx->CurrentServerDispatch,
 251                            (target_or_name, size, data, usage));
 252    } else {
 253       CALL_BufferData(ctx->CurrentServerDispatch,
 254                       (target_or_name, size, data, usage));
 255    }
 256 }
 257
 258 void
 259 _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
 260                                 const struct marshal_cmd_NamedBufferData *cmd)
 261 {
 262    unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
 263 }
 264
 265 void
 266 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
 267                                    const struct marshal_cmd_NamedBufferDataEXT *cmd)
 268 {
 269    unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
 270 }
 271
 272 static void
 273 _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
 274                                 const GLvoid *data, GLenum usage, bool named,
 275                                 bool ext_dsa, const char *func)
 276 {
 277    GET_CURRENT_CONTEXT(ctx);
 278    bool external_mem = !named &&
 279                        target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
 280    bool copy_data = data && !external_mem;
 281    int cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
 282
 283    if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
 284                 cmd_size > MARSHAL_MAX_CMD_SIZE ||
 285                 (named && target_or_name == 0))) {
 286       _mesa_glthread_finish_before(ctx, func);
 287       if (named) {
 288          CALL_NamedBufferData(ctx->CurrentServerDispatch,
 289                               (target_or_name, size, data, usage));
 290       } else {
 291          CALL_BufferData(ctx->CurrentServerDispatch,
 292                          (target_or_name, size, data, usage));
 293       }
 294       return;
 295    }
 296
 297    struct marshal_cmd_BufferData *cmd =
 298       _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
 299                                       cmd_size);
 300
 301    cmd->target_or_name = target_or_name;
 302    cmd->size = size;
 303    cmd->usage = usage;
 304    cmd->data_null = !data;
 305    cmd->named = named;
 306    cmd->ext_dsa = ext_dsa;
 307    cmd->data_external_mem = data;
 308
 309    if (copy_data) {
 310       char *variable_data = (char *) (cmd + 1);
 311       memcpy(variable_data, data, size);
 312    }
 313 }
 314
 315 void GLAPIENTRY
 316 _mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
 317                          GLenum usage)
 318 {
 319    _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
 320                                    "BufferData");
 321 }
 322
 323 void GLAPIENTRY
 324 _mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
 325                               const GLvoid * data, GLenum usage)
 326 {
 327    _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
 328                                    "NamedBufferData");
 329 }
 330
 331 void GLAPIENTRY
 332 _mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
 333                                  const GLvoid *data, GLenum usage)
 334 {
 335    _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
 336                                    "NamedBufferDataEXT");
 337 }
 338
 339
 340 /* BufferSubData: marshalled asynchronously */
 341 struct marshal_cmd_BufferSubData
 342 {
 343    struct marshal_cmd_base cmd_base;
 344    GLenum target_or_name;
 345    GLintptr offset;
 346    GLsizeiptr size;
 347    bool named;
 348    bool ext_dsa;
 349    /* Next size bytes are GLubyte data[size] */
 350 };
 351
 352 void
 353 _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
 354                               const struct marshal_cmd_BufferSubData *cmd)
 355 {
 356    const GLenum target_or_name = cmd->target_or_name;
 357    const GLintptr offset = cmd->offset;
 358    const GLsizeiptr size = cmd->size;
 359    const void *data = (const void *) (cmd + 1);
 360
 361    if (cmd->ext_dsa) {
 362       CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
 363                                  (target_or_name, offset, size, data));
 364    } else if (cmd->named) {
 365       CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
 366                               (target_or_name, offset, size, data));
 367    } else {
 368       CALL_BufferSubData(ctx->CurrentServerDispatch,
 369                          (target_or_name, offset, size, data));
 370    }
 371 }
 372
 373 void
 374 _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
 375                                    const struct marshal_cmd_NamedBufferSubData *cmd)
 376 {
 377    unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
 378 }
 379
 380 void
 381 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
 382                                       const struct marshal_cmd_NamedBufferSubDataEXT *cmd)
 383 {
 384    unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
 385 }
 386
 387 static void
 388 _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
 389                                    GLsizeiptr size, const GLvoid *data,
 390                                    bool named, bool ext_dsa, const char *func)
 391 {
 392    GET_CURRENT_CONTEXT(ctx);
 393    size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
 394
 395    /* Fast path: Copy the data to an upload buffer, and use the GPU
 396     * to copy the uploaded data to the destination buffer.
 397     */
 398    /* TODO: Handle offset == 0 && size < buffer_size.
 399     *       If offset == 0 and size == buffer_size, it's better to discard
 400     *       the buffer storage, but we don't know the buffer size in glthread.
 401     */
 402    if (ctx->GLThread.SupportsBufferUploads &&
 403        data && offset > 0 && size > 0) {
 404       struct gl_buffer_object *upload_buffer = NULL;
 405       unsigned upload_offset = 0;
 406
 407       _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
 408                             NULL);
 409
 410       if (upload_buffer) {
 411          _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
 412                                                      upload_offset,
 413                                                      target_or_name,
 414                                                      offset, size, named,
 415                                                      ext_dsa);
 416          return;
 417       }
 418    }
 419
 420    if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
 421                 cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
 422                 (named && target_or_name == 0))) {
 423       _mesa_glthread_finish_before(ctx, func);
 424       if (named) {
 425          CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
 426                                  (target_or_name, offset, size, data));
 427       } else {
 428          CALL_BufferSubData(ctx->CurrentServerDispatch,
 429                             (target_or_name, offset, size, data));
 430       }
 431       return;
 432    }
 433
 434    struct marshal_cmd_BufferSubData *cmd =
 435       _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
 436                                       cmd_size);
 437    cmd->target_or_name = target_or_name;
 438    cmd->offset = offset;
 439    cmd->size = size;
 440    cmd->named = named;
 441    cmd->ext_dsa = ext_dsa;
 442
 443    char *variable_data = (char *) (cmd + 1);
 444    memcpy(variable_data, data, size);
 445 }
 446
 447 void GLAPIENTRY
 448 _mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
 449                             const GLvoid * data)
 450 {
 451    _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
 452                                       false, "BufferSubData");
 453 }
 454
 455 void GLAPIENTRY
 456 _mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
 457                                  GLsizeiptr size, const GLvoid * data)
 458 {
 459    _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
 460                                       false, "NamedBufferSubData");
 461 }
 462
 463 void GLAPIENTRY
 464 _mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
 465                                     GLsizeiptr size, const GLvoid * data)
 466 {
 467    _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
 468                                       true, "NamedBufferSubDataEXT");
 469 }