src/mesa/main/glthread_draw.c

   1 /*
   2  * Copyright © 2020 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /* Draw function marshalling for glthread.
  25  *
  26  * The purpose of these glDraw wrappers is to upload non-VBO vertex and
  27  * index data, so that glthread doesn't have to execute synchronously.
  28  */
  29
  30 #include "c99_alloca.h"
  31
  32 #include "main/glthread_marshal.h"
  33 #include "main/dispatch.h"
  34 #include "main/varray.h"
  35
  36 static inline unsigned
  37 get_index_size(GLenum type)
  38 {
  39    /* GL_UNSIGNED_BYTE  - GL_UNSIGNED_BYTE = 0
  40     * GL_UNSIGNED_SHORT - GL_UNSIGNED_BYTE = 2
  41     * GL_UNSIGNED_INT   - GL_UNSIGNED_BYTE = 4
  42     *
  43     * Divide by 2 to get n=0,1,2, then the index size is: 1 << n
  44     */
  45    return 1 << ((type - GL_UNSIGNED_BYTE) >> 1);
  46 }
  47
  48 static inline bool
  49 is_index_type_valid(GLenum type)
  50 {
  51    /* GL_UNSIGNED_BYTE  = 0x1401
  52     * GL_UNSIGNED_SHORT = 0x1403
  53     * GL_UNSIGNED_INT   = 0x1405
  54     *
  55     * The trick is that bit 1 and bit 2 mean USHORT and UINT, respectively.
  56     * After clearing those two bits (with ~6), we should get UBYTE.
  57     * Both bits can't be set, because the enum would be greater than UINT.
  58     */
  59    return type <= GL_UNSIGNED_INT && (type & ~6) == GL_UNSIGNED_BYTE;
  60 }
  61
  62 static ALWAYS_INLINE struct gl_buffer_object *
  63 upload_indices(struct gl_context *ctx, unsigned count, unsigned index_size,
  64                const GLvoid **indices)
  65 {
  66    struct gl_buffer_object *upload_buffer = NULL;
  67    unsigned upload_offset = 0;
  68
  69    assert(count);
  70
  71    _mesa_glthread_upload(ctx, *indices, index_size * count,
  72                          &upload_offset, &upload_buffer, NULL);
  73    assert(upload_buffer);
  74    *indices = (const GLvoid*)(intptr_t)upload_offset;
  75
  76    return upload_buffer;
  77 }
  78
  79 static ALWAYS_INLINE struct gl_buffer_object *
  80 upload_multi_indices(struct gl_context *ctx, unsigned total_count,
  81                      unsigned index_size, unsigned draw_count,
  82                      const GLsizei *count, const GLvoid *const *indices,
  83                      const GLvoid **out_indices)
  84 {
  85    struct gl_buffer_object *upload_buffer = NULL;
  86    unsigned upload_offset = 0;
  87    uint8_t *upload_ptr = NULL;
  88
  89    assert(total_count);
  90
  91    _mesa_glthread_upload(ctx, NULL, index_size * total_count,
  92                          &upload_offset, &upload_buffer, &upload_ptr);
  93    assert(upload_buffer);
  94
  95    for (unsigned i = 0, offset = 0; i < draw_count; i++) {
  96       if (count[i] == 0)
  97          continue;
  98
  99       unsigned size = count[i] * index_size;
 100
 101       memcpy(upload_ptr + offset, indices[i], size);
 102       out_indices[i] = (const GLvoid*)(intptr_t)(upload_offset + offset);
 103       offset += size;
 104    }
 105
 106    return upload_buffer;
 107 }
 108
 109 static ALWAYS_INLINE bool
 110 upload_vertices(struct gl_context *ctx, unsigned user_buffer_mask,
 111                 unsigned start_vertex, unsigned num_vertices,
 112                 unsigned start_instance, unsigned num_instances,
 113                 struct glthread_attrib_binding *buffers)
 114 {
 115    struct glthread_vao *vao = ctx->GLThread.CurrentVAO;
 116    unsigned attrib_mask_iter = vao->Enabled;
 117    unsigned num_buffers = 0;
 118
 119    assert((num_vertices || !(user_buffer_mask & ~vao->NonZeroDivisorMask)) &&
 120           (num_instances || !(user_buffer_mask & vao->NonZeroDivisorMask)));
 121
 122    if (unlikely(vao->BufferInterleaved & user_buffer_mask)) {
 123       /* Slower upload path where some buffers reference multiple attribs,
 124        * so we have to use 2 while loops instead of 1.
 125        */
 126       unsigned start_offset[VERT_ATTRIB_MAX];
 127       unsigned end_offset[VERT_ATTRIB_MAX];
 128       uint32_t buffer_mask = 0;
 129
 130       while (attrib_mask_iter) {
 131          unsigned i = u_bit_scan(&attrib_mask_iter);
 132          unsigned binding_index = vao->Attrib[i].BufferIndex;
 133
 134          if (!(user_buffer_mask & (1 << binding_index)))
 135             continue;
 136
 137          unsigned stride = vao->Attrib[binding_index].Stride;
 138          unsigned instance_div = vao->Attrib[binding_index].Divisor;
 139          unsigned element_size = vao->Attrib[i].ElementSize;
 140          unsigned offset = vao->Attrib[i].RelativeOffset;
 141          unsigned size;
 142
 143          if (instance_div) {
 144             /* Per-instance attrib. */
 145
 146             /* Figure out how many instances we'll render given instance_div.  We
 147              * can't use the typical div_round_up() pattern because the CTS uses
 148              * instance_div = ~0 for a test, which overflows div_round_up()'s
 149              * addition.
 150              */
 151             unsigned count = num_instances / instance_div;
 152             if (count * instance_div != num_instances)
 153                count++;
 154
 155             offset += stride * start_instance;
 156             size = stride * (count - 1) + element_size;
 157          } else {
 158             /* Per-vertex attrib. */
 159             offset += stride * start_vertex;
 160             size = stride * (num_vertices - 1) + element_size;
 161          }
 162
 163          unsigned binding_index_bit = 1u << binding_index;
 164
 165          /* Update upload offsets. */
 166          if (!(buffer_mask & binding_index_bit)) {
 167             start_offset[binding_index] = offset;
 168             end_offset[binding_index] = offset + size;
 169          } else {
 170             if (offset < start_offset[binding_index])
 171                start_offset[binding_index] = offset;
 172             if (offset + size > end_offset[binding_index])
 173                end_offset[binding_index] = offset + size;
 174          }
 175
 176          buffer_mask |= binding_index_bit;
 177       }
 178
 179       /* Upload buffers. */
 180       while (buffer_mask) {
 181          struct gl_buffer_object *upload_buffer = NULL;
 182          unsigned upload_offset = 0;
 183          unsigned start, end;
 184
 185          unsigned binding_index = u_bit_scan(&buffer_mask);
 186
 187          start = start_offset[binding_index];
 188          end = end_offset[binding_index];
 189          assert(start < end);
 190
 191          const void *ptr = vao->Attrib[binding_index].Pointer;
 192          _mesa_glthread_upload(ctx, (uint8_t*)ptr + start,
 193                                end - start, &upload_offset,
 194                                &upload_buffer, NULL);
 195          assert(upload_buffer);
 196
 197          buffers[num_buffers].buffer = upload_buffer;
 198          buffers[num_buffers].offset = upload_offset - start;
 199          buffers[num_buffers].original_pointer = ptr;
 200          num_buffers++;
 201       }
 202
 203       return true;
 204    }
 205
 206    /* Faster path where all attribs are separate. */
 207    while (attrib_mask_iter) {
 208       unsigned i = u_bit_scan(&attrib_mask_iter);
 209       unsigned binding_index = vao->Attrib[i].BufferIndex;
 210
 211       if (!(user_buffer_mask & (1 << binding_index)))
 212          continue;
 213
 214       struct gl_buffer_object *upload_buffer = NULL;
 215       unsigned upload_offset = 0;
 216       unsigned stride = vao->Attrib[binding_index].Stride;
 217       unsigned instance_div = vao->Attrib[binding_index].Divisor;
 218       unsigned element_size = vao->Attrib[i].ElementSize;
 219       unsigned offset = vao->Attrib[i].RelativeOffset;
 220       unsigned size;
 221
 222       if (instance_div) {
 223          /* Per-instance attrib. */
 224
 225          /* Figure out how many instances we'll render given instance_div.  We
 226           * can't use the typical div_round_up() pattern because the CTS uses
 227           * instance_div = ~0 for a test, which overflows div_round_up()'s
 228           * addition.
 229           */
 230          unsigned count = num_instances / instance_div;
 231          if (count * instance_div != num_instances)
 232             count++;
 233
 234          offset += stride * start_instance;
 235          size = stride * (count - 1) + element_size;
 236       } else {
 237          /* Per-vertex attrib. */
 238          offset += stride * start_vertex;
 239          size = stride * (num_vertices - 1) + element_size;
 240       }
 241
 242       const void *ptr = vao->Attrib[binding_index].Pointer;
 243       _mesa_glthread_upload(ctx, (uint8_t*)ptr + offset,
 244                             size, &upload_offset, &upload_buffer, NULL);
 245       assert(upload_buffer);
 246
 247       buffers[num_buffers].buffer = upload_buffer;
 248       buffers[num_buffers].offset = upload_offset - offset;
 249       buffers[num_buffers].original_pointer = ptr;
 250       num_buffers++;
 251    }
 252
 253    return true;
 254 }
 255
 256 struct marshal_cmd_DrawArraysInstancedBaseInstance
 257 {
 258    struct marshal_cmd_base cmd_base;
 259    GLenum mode;
 260    GLint first;
 261    GLsizei count;
 262    GLsizei instance_count;
 263    GLuint baseinstance;
 264    GLuint user_buffer_mask;
 265 };
 266
 267 void
 268 _mesa_unmarshal_DrawArraysInstancedBaseInstance(struct gl_context *ctx,
 269                                                 const struct marshal_cmd_DrawArraysInstancedBaseInstance *cmd)
 270 {
 271    const GLenum mode = cmd->mode;
 272    const GLint first = cmd->first;
 273    const GLsizei count = cmd->count;
 274    const GLsizei instance_count = cmd->instance_count;
 275    const GLuint baseinstance = cmd->baseinstance;
 276    const GLuint user_buffer_mask = cmd->user_buffer_mask;
 277    const struct glthread_attrib_binding *buffers =
 278       (const struct glthread_attrib_binding *)(cmd + 1);
 279
 280    /* Bind uploaded buffers if needed. */
 281    if (user_buffer_mask) {
 282       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 283                                       false);
 284    }
 285
 286    CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch,
 287                                         (mode, first, count, instance_count,
 288                                          baseinstance));
 289
 290    /* Restore states. */
 291    if (user_buffer_mask) {
 292       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 293                                       true);
 294    }
 295 }
 296
 297 static ALWAYS_INLINE void
 298 draw_arrays_async(struct gl_context *ctx, GLenum mode, GLint first,
 299                   GLsizei count, GLsizei instance_count, GLuint baseinstance,
 300                   unsigned user_buffer_mask,
 301                   const struct glthread_attrib_binding *buffers)
 302 {
 303    int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]);
 304    int cmd_size = sizeof(struct marshal_cmd_DrawArraysInstancedBaseInstance) +
 305                   buffers_size;
 306    struct marshal_cmd_DrawArraysInstancedBaseInstance *cmd;
 307
 308    cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawArraysInstancedBaseInstance,
 309                                          cmd_size);
 310    cmd->mode = mode;
 311    cmd->first = first;
 312    cmd->count = count;
 313    cmd->instance_count = instance_count;
 314    cmd->baseinstance = baseinstance;
 315    cmd->user_buffer_mask = user_buffer_mask;
 316
 317    if (user_buffer_mask)
 318       memcpy(cmd + 1, buffers, buffers_size);
 319 }
 320
 321 static ALWAYS_INLINE void
 322 draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count,
 323             GLuint baseinstance, bool compiled_into_dlist)
 324 {
 325    GET_CURRENT_CONTEXT(ctx);
 326
 327    struct glthread_vao *vao = ctx->GLThread.CurrentVAO;
 328    unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled;
 329
 330    if (compiled_into_dlist && ctx->GLThread.inside_dlist) {
 331       _mesa_glthread_finish_before(ctx, "DrawArrays");
 332       /* Use the function that's compiled into a display list. */
 333       CALL_DrawArrays(ctx->CurrentServerDispatch, (mode, first, count));
 334       return;
 335    }
 336
 337    /* Fast path when nothing needs to be done.
 338     *
 339     * This is also an error path. Zero counts should still call the driver
 340     * for possible GL errors.
 341     */
 342    if (ctx->API == API_OPENGL_CORE || !user_buffer_mask ||
 343        count <= 0 || instance_count <= 0) {
 344       draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance,
 345                         0, NULL);
 346       return;
 347    }
 348
 349    /* Upload and draw. */
 350    struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX];
 351    if (!ctx->GLThread.SupportsNonVBOUploads ||
 352        !upload_vertices(ctx, user_buffer_mask, first, count, baseinstance,
 353                         instance_count, buffers)) {
 354       _mesa_glthread_finish_before(ctx, "DrawArrays");
 355       CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch,
 356                                            (mode, first, count, instance_count,
 357                                             baseinstance));
 358       return;
 359    }
 360
 361    draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance,
 362                      user_buffer_mask, buffers);
 363 }
 364
 365 struct marshal_cmd_MultiDrawArrays
 366 {
 367    struct marshal_cmd_base cmd_base;
 368    GLenum mode;
 369    GLsizei draw_count;
 370    GLuint user_buffer_mask;
 371 };
 372
 373 void
 374 _mesa_unmarshal_MultiDrawArrays(struct gl_context *ctx,
 375                                 const struct marshal_cmd_MultiDrawArrays *cmd)
 376 {
 377    const GLenum mode = cmd->mode;
 378    const GLsizei draw_count = cmd->draw_count;
 379    const GLuint user_buffer_mask = cmd->user_buffer_mask;
 380
 381    const char *variable_data = (const char *)(cmd + 1);
 382    const GLint *first = (GLint *)variable_data;
 383    variable_data += sizeof(GLint) * draw_count;
 384    const GLsizei *count = (GLsizei *)variable_data;
 385    variable_data += sizeof(GLsizei) * draw_count;
 386    const struct glthread_attrib_binding *buffers =
 387       (const struct glthread_attrib_binding *)variable_data;
 388
 389    /* Bind uploaded buffers if needed. */
 390    if (user_buffer_mask) {
 391       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 392                                       false);
 393    }
 394
 395    CALL_MultiDrawArrays(ctx->CurrentServerDispatch,
 396                         (mode, first, count, draw_count));
 397
 398    /* Restore states. */
 399    if (user_buffer_mask) {
 400       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 401                                       true);
 402    }
 403 }
 404
 405 static ALWAYS_INLINE void
 406 multi_draw_arrays_async(struct gl_context *ctx, GLenum mode,
 407                         const GLint *first, const GLsizei *count,
 408                         GLsizei draw_count, unsigned user_buffer_mask,
 409                         const struct glthread_attrib_binding *buffers)
 410 {
 411    int first_size = sizeof(GLint) * draw_count;
 412    int count_size = sizeof(GLsizei) * draw_count;
 413    int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]);
 414    int cmd_size = sizeof(struct marshal_cmd_MultiDrawArrays) +
 415                   first_size + count_size + buffers_size;
 416    struct marshal_cmd_MultiDrawArrays *cmd;
 417
 418    cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_MultiDrawArrays,
 419                                          cmd_size);
 420    cmd->mode = mode;
 421    cmd->draw_count = draw_count;
 422    cmd->user_buffer_mask = user_buffer_mask;
 423
 424    char *variable_data = (char*)(cmd + 1);
 425    memcpy(variable_data, first, first_size);
 426    variable_data += first_size;
 427    memcpy(variable_data, count, count_size);
 428
 429    if (user_buffer_mask) {
 430       variable_data += count_size;
 431       memcpy(variable_data, buffers, buffers_size);
 432    }
 433 }
 434
 435 void GLAPIENTRY
 436 _mesa_marshal_MultiDrawArrays(GLenum mode, const GLint *first,
 437                               const GLsizei *count, GLsizei draw_count)
 438 {
 439    GET_CURRENT_CONTEXT(ctx);
 440
 441    struct glthread_vao *vao = ctx->GLThread.CurrentVAO;
 442    unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled;
 443
 444    if (ctx->GLThread.inside_dlist)
 445       goto sync;
 446
 447    if (draw_count >= 0 &&
 448        (ctx->API == API_OPENGL_CORE || !user_buffer_mask)) {
 449       multi_draw_arrays_async(ctx, mode, first, count, draw_count, 0, NULL);
 450       return;
 451    }
 452
 453    /* If the draw count is too high or negative, the queue can't be used. */
 454    if (!ctx->GLThread.SupportsNonVBOUploads ||
 455        draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 16)
 456       goto sync;
 457
 458    unsigned min_index = ~0;
 459    unsigned max_index_exclusive = 0;
 460
 461    for (unsigned i = 0; i < draw_count; i++) {
 462       GLsizei vertex_count = count[i];
 463
 464       if (vertex_count < 0) {
 465          /* Just call the driver to set the error. */
 466          multi_draw_arrays_async(ctx, mode, first, count, draw_count, 0, NULL);
 467          return;
 468       }
 469       if (vertex_count == 0)
 470          continue;
 471
 472       min_index = MIN2(min_index, first[i]);
 473       max_index_exclusive = MAX2(max_index_exclusive, first[i] + vertex_count);
 474    }
 475
 476    unsigned num_vertices = max_index_exclusive - min_index;
 477    if (num_vertices == 0) {
 478       /* Nothing to do, but call the driver to set possible GL errors. */
 479       multi_draw_arrays_async(ctx, mode, first, count, draw_count, 0, NULL);
 480       return;
 481    }
 482
 483    /* Upload and draw. */
 484    struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX];
 485    if (!upload_vertices(ctx, user_buffer_mask, min_index, num_vertices,
 486                         0, 1, buffers))
 487       goto sync;
 488
 489    multi_draw_arrays_async(ctx, mode, first, count, draw_count,
 490                            user_buffer_mask, buffers);
 491    return;
 492
 493 sync:
 494    _mesa_glthread_finish_before(ctx, "MultiDrawArrays");
 495    CALL_MultiDrawArrays(ctx->CurrentServerDispatch,
 496                         (mode, first, count, draw_count));
 497 }
 498
 499 struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance
 500 {
 501    struct marshal_cmd_base cmd_base;
 502    bool index_bounds_valid;
 503    GLenum mode;
 504    GLenum type;
 505    GLsizei count;
 506    GLsizei instance_count;
 507    GLint basevertex;
 508    GLuint baseinstance;
 509    GLuint min_index;
 510    GLuint max_index;
 511    GLuint user_buffer_mask;
 512    const GLvoid *indices;
 513    struct gl_buffer_object *index_buffer;
 514 };
 515
 516 void
 517 _mesa_unmarshal_DrawElementsInstancedBaseVertexBaseInstance(struct gl_context *ctx,
 518                                                             const struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance *cmd)
 519 {
 520    const GLenum mode = cmd->mode;
 521    const GLsizei count = cmd->count;
 522    const GLenum type = cmd->type;
 523    const GLvoid *indices = cmd->indices;
 524    const GLsizei instance_count = cmd->instance_count;
 525    const GLint basevertex = cmd->basevertex;
 526    const GLuint baseinstance = cmd->baseinstance;
 527    const GLuint min_index = cmd->min_index;
 528    const GLuint max_index = cmd->max_index;
 529    const GLuint user_buffer_mask = cmd->user_buffer_mask;
 530    struct gl_buffer_object *index_buffer = cmd->index_buffer;
 531    const struct glthread_attrib_binding *buffers =
 532       (const struct glthread_attrib_binding *)(cmd + 1);
 533
 534    /* Bind uploaded buffers if needed. */
 535    if (user_buffer_mask) {
 536       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 537                                       false);
 538    }
 539    if (index_buffer) {
 540       _mesa_InternalBindElementBuffer(ctx, index_buffer);
 541    }
 542
 543    /* Draw. */
 544    if (cmd->index_bounds_valid && instance_count == 1 && baseinstance == 0) {
 545       CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch,
 546                                        (mode, min_index, max_index, count,
 547                                         type, indices, basevertex));
 548    } else {
 549       CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch,
 550                                                        (mode, count, type, indices,
 551                                                         instance_count, basevertex,
 552                                                         baseinstance));
 553    }
 554
 555    /* Restore states. */
 556    if (index_buffer) {
 557       _mesa_InternalBindElementBuffer(ctx, NULL);
 558    }
 559    if (user_buffer_mask) {
 560       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 561                                       true);
 562    }
 563 }
 564
 565 static ALWAYS_INLINE void
 566 draw_elements_async(struct gl_context *ctx, GLenum mode, GLsizei count,
 567                     GLenum type, const GLvoid *indices, GLsizei instance_count,
 568                     GLint basevertex, GLuint baseinstance,
 569                     bool index_bounds_valid, GLuint min_index, GLuint max_index,
 570                     struct gl_buffer_object *index_buffer,
 571                     unsigned user_buffer_mask,
 572                     const struct glthread_attrib_binding *buffers)
 573 {
 574    int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]);
 575    int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance) +
 576                   buffers_size;
 577    struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance *cmd;
 578
 579    cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawElementsInstancedBaseVertexBaseInstance, cmd_size);
 580    cmd->mode = mode;
 581    cmd->count = count;
 582    cmd->type = type;
 583    cmd->indices = indices;
 584    cmd->instance_count = instance_count;
 585    cmd->basevertex = basevertex;
 586    cmd->baseinstance = baseinstance;
 587    cmd->min_index = min_index;
 588    cmd->max_index = max_index;
 589    cmd->user_buffer_mask = user_buffer_mask;
 590    cmd->index_bounds_valid = index_bounds_valid;
 591    cmd->index_buffer = index_buffer;
 592
 593    if (user_buffer_mask)
 594       memcpy(cmd + 1, buffers, buffers_size);
 595 }
 596
 597 static void
 598 draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices,
 599               GLsizei instance_count, GLint basevertex, GLuint baseinstance,
 600               bool index_bounds_valid, GLuint min_index, GLuint max_index,
 601               bool compiled_into_dlist)
 602 {
 603    GET_CURRENT_CONTEXT(ctx);
 604
 605    struct glthread_vao *vao = ctx->GLThread.CurrentVAO;
 606    unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled;
 607    bool has_user_indices = vao->CurrentElementBufferName == 0;
 608
 609    if (compiled_into_dlist && ctx->GLThread.inside_dlist)
 610       goto sync;
 611
 612    /* Fast path when nothing needs to be done.
 613     *
 614     * This is also an error path. Zero counts should still call the driver
 615     * for possible GL errors.
 616     */
 617    if (ctx->API == API_OPENGL_CORE ||
 618        count <= 0 || instance_count <= 0 || max_index < min_index ||
 619        !is_index_type_valid(type) ||
 620        (!user_buffer_mask && !has_user_indices)) {
 621       draw_elements_async(ctx, mode, count, type, indices, instance_count,
 622                           basevertex, baseinstance, index_bounds_valid,
 623                           min_index, max_index, 0, 0, NULL);
 624       return;
 625    }
 626
 627    if (!ctx->GLThread.SupportsNonVBOUploads)
 628       goto sync;
 629
 630    bool need_index_bounds = user_buffer_mask & ~vao->NonZeroDivisorMask;
 631    unsigned index_size = get_index_size(type);
 632
 633    if (need_index_bounds && !index_bounds_valid) {
 634       /* Sync if indices come from a buffer and vertices come from memory
 635        * and index bounds are not valid.
 636        *
 637        * We would have to map the indices to compute the index bounds, and
 638        * for that we would have to sync anyway.
 639        */
 640       if (!has_user_indices)
 641          goto sync;
 642
 643       /* Compute the index bounds. */
 644       min_index = ~0;
 645       max_index = 0;
 646       vbo_get_minmax_index_mapped(count, index_size,
 647                                   ctx->GLThread._RestartIndex[index_size - 1],
 648                                   ctx->GLThread._PrimitiveRestart, indices,
 649                                   &min_index, &max_index);
 650       index_bounds_valid = true;
 651    }
 652
 653    unsigned start_vertex = min_index + basevertex;
 654    unsigned num_vertices = max_index + 1 - min_index;
 655
 656    /* If there is too much data to upload, sync and let the driver unroll
 657     * indices. */
 658    if (util_is_vbo_upload_ratio_too_large(count, num_vertices))
 659       goto sync;
 660
 661    struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX];
 662    if (user_buffer_mask &&
 663        !upload_vertices(ctx, user_buffer_mask, start_vertex, num_vertices,
 664                         baseinstance, instance_count, buffers))
 665       goto sync;
 666
 667    /* Upload indices. */
 668    struct gl_buffer_object *index_buffer = NULL;
 669    if (has_user_indices)
 670       index_buffer = upload_indices(ctx, count, index_size, &indices);
 671
 672    /* Draw asynchronously. */
 673    draw_elements_async(ctx, mode, count, type, indices, instance_count,
 674                        basevertex, baseinstance, index_bounds_valid,
 675                        min_index, max_index, index_buffer,
 676                        user_buffer_mask, buffers);
 677    return;
 678
 679 sync:
 680    _mesa_glthread_finish_before(ctx, "DrawElements");
 681
 682    if (compiled_into_dlist && ctx->GLThread.inside_dlist) {
 683       /* Only use the ones that are compiled into display lists. */
 684       if (basevertex) {
 685          CALL_DrawElementsBaseVertex(ctx->CurrentServerDispatch,
 686                                      (mode, count, type, indices, basevertex));
 687       } else if (index_bounds_valid) {
 688          CALL_DrawRangeElements(ctx->CurrentServerDispatch,
 689                                 (mode, min_index, max_index, count, type, indices));
 690       } else {
 691          CALL_DrawElements(ctx->CurrentServerDispatch, (mode, count, type, indices));
 692       }
 693    } else if (index_bounds_valid && instance_count == 1 && baseinstance == 0) {
 694       CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch,
 695                                        (mode, min_index, max_index, count,
 696                                         type, indices, basevertex));
 697    } else {
 698       CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch,
 699                                                        (mode, count, type, indices,
 700                                                         instance_count, basevertex,
 701                                                         baseinstance));
 702    }
 703 }
 704
 705 struct marshal_cmd_MultiDrawElementsBaseVertex
 706 {
 707    struct marshal_cmd_base cmd_base;
 708    bool has_base_vertex;
 709    GLenum mode;
 710    GLenum type;
 711    GLsizei draw_count;
 712    GLuint user_buffer_mask;
 713    struct gl_buffer_object *index_buffer;
 714 };
 715
 716 void
 717 _mesa_unmarshal_MultiDrawElementsBaseVertex(struct gl_context *ctx,
 718                                             const struct marshal_cmd_MultiDrawElementsBaseVertex *cmd)
 719 {
 720    const GLenum mode = cmd->mode;
 721    const GLenum type = cmd->type;
 722    const GLsizei draw_count = cmd->draw_count;
 723    const GLuint user_buffer_mask = cmd->user_buffer_mask;
 724    struct gl_buffer_object *index_buffer = cmd->index_buffer;
 725    const bool has_base_vertex = cmd->has_base_vertex;
 726
 727    const char *variable_data = (const char *)(cmd + 1);
 728    const GLsizei *count = (GLsizei *)variable_data;
 729    variable_data += sizeof(GLsizei) * draw_count;
 730    const GLvoid *const *indices = (const GLvoid *const *)variable_data;
 731    variable_data += sizeof(const GLvoid *const *) * draw_count;
 732    const GLsizei *basevertex = NULL;
 733    if (has_base_vertex) {
 734       basevertex = (GLsizei *)variable_data;
 735       variable_data += sizeof(GLsizei) * draw_count;
 736    }
 737    const struct glthread_attrib_binding *buffers =
 738       (const struct glthread_attrib_binding *)variable_data;
 739
 740    /* Bind uploaded buffers if needed. */
 741    if (user_buffer_mask) {
 742       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 743                                       false);
 744    }
 745    if (index_buffer) {
 746       _mesa_InternalBindElementBuffer(ctx, index_buffer);
 747    }
 748
 749    /* Draw. */
 750    if (has_base_vertex) {
 751       CALL_MultiDrawElementsBaseVertex(ctx->CurrentServerDispatch,
 752                                        (mode, count, type, indices, draw_count,
 753                                         basevertex));
 754    } else {
 755       CALL_MultiDrawElementsEXT(ctx->CurrentServerDispatch,
 756                                 (mode, count, type, indices, draw_count));
 757    }
 758
 759    /* Restore states. */
 760    if (index_buffer) {
 761       _mesa_InternalBindElementBuffer(ctx, NULL);
 762    }
 763    if (user_buffer_mask) {
 764       _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask,
 765                                       true);
 766    }
 767 }
 768
 769 static ALWAYS_INLINE void
 770 multi_draw_elements_async(struct gl_context *ctx, GLenum mode,
 771                           const GLsizei *count, GLenum type,
 772                           const GLvoid *const *indices, GLsizei draw_count,
 773                           const GLsizei *basevertex,
 774                           struct gl_buffer_object *index_buffer,
 775                           unsigned user_buffer_mask,
 776                           const struct glthread_attrib_binding *buffers)
 777 {
 778    int count_size = sizeof(GLsizei) * draw_count;
 779    int indices_size = sizeof(indices[0]) * draw_count;
 780    int basevertex_size = basevertex ? sizeof(GLsizei) * draw_count : 0;
 781    int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]);
 782    int cmd_size = sizeof(struct marshal_cmd_MultiDrawElementsBaseVertex) +
 783                   count_size + indices_size + basevertex_size + buffers_size;
 784    struct marshal_cmd_MultiDrawElementsBaseVertex *cmd;
 785
 786    cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_MultiDrawElementsBaseVertex, cmd_size);
 787    cmd->mode = mode;
 788    cmd->type = type;
 789    cmd->draw_count = draw_count;
 790    cmd->user_buffer_mask = user_buffer_mask;
 791    cmd->index_buffer = index_buffer;
 792    cmd->has_base_vertex = basevertex != NULL;
 793
 794    char *variable_data = (char*)(cmd + 1);
 795    memcpy(variable_data, count, count_size);
 796    variable_data += count_size;
 797    memcpy(variable_data, indices, indices_size);
 798    variable_data += indices_size;
 799
 800    if (basevertex) {
 801       memcpy(variable_data, basevertex, basevertex_size);
 802       variable_data += basevertex_size;
 803    }
 804
 805    if (user_buffer_mask)
 806       memcpy(variable_data, buffers, buffers_size);
 807 }
 808
 809 void GLAPIENTRY
 810 _mesa_marshal_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count,
 811                                           GLenum type,
 812                                           const GLvoid *const *indices,
 813                                           GLsizei draw_count,
 814                                           const GLsizei *basevertex)
 815 {
 816    GET_CURRENT_CONTEXT(ctx);
 817
 818    struct glthread_vao *vao = ctx->GLThread.CurrentVAO;
 819    unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled;
 820    bool has_user_indices = vao->CurrentElementBufferName == 0;
 821
 822    if (ctx->GLThread.inside_dlist)
 823       goto sync;
 824
 825    /* Fast path when nothing needs to be done. */
 826    if (draw_count >= 0 &&
 827        (ctx->API == API_OPENGL_CORE ||
 828         !is_index_type_valid(type) ||
 829         (!user_buffer_mask && !has_user_indices))) {
 830       multi_draw_elements_async(ctx, mode, count, type, indices, draw_count,
 831                                 basevertex, 0, 0, NULL);
 832       return;
 833    }
 834
 835    bool need_index_bounds = user_buffer_mask & ~vao->NonZeroDivisorMask;
 836
 837    /* If the draw count is too high or negative, the queue can't be used.
 838     *
 839     * Sync if indices come from a buffer and vertices come from memory
 840     * and index bounds are not valid. We would have to map the indices
 841     * to compute the index bounds, and for that we would have to sync anyway.
 842     */
 843    if (!ctx->GLThread.SupportsNonVBOUploads ||
 844        draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 32 ||
 845        (need_index_bounds && !has_user_indices))
 846       goto sync;
 847
 848    unsigned index_size = get_index_size(type);
 849    unsigned min_index = ~0;
 850    unsigned max_index = 0;
 851    unsigned total_count = 0;
 852    unsigned num_vertices = 0;
 853
 854    /* This is always true if there is per-vertex data that needs to be
 855     * uploaded.
 856     */
 857    if (need_index_bounds) {
 858       /* Compute the index bounds. */
 859       for (unsigned i = 0; i < draw_count; i++) {
 860          GLsizei vertex_count = count[i];
 861
 862          if (vertex_count < 0) {
 863             /* Just call the driver to set the error. */
 864             multi_draw_elements_async(ctx, mode, count, type, indices, draw_count,
 865                                       basevertex, 0, 0, NULL);
 866             return;
 867          }
 868          if (vertex_count == 0)
 869             continue;
 870
 871          unsigned min = ~0, max = 0;
 872          vbo_get_minmax_index_mapped(vertex_count, index_size,
 873                                      ctx->GLThread._RestartIndex[index_size - 1],
 874                                      ctx->GLThread._PrimitiveRestart, indices[i],
 875                                      &min, &max);
 876          if (basevertex) {
 877             min += basevertex[i];
 878             max += basevertex[i];
 879          }
 880          min_index = MIN2(min_index, min);
 881          max_index = MAX2(max_index, max);
 882          total_count += vertex_count;
 883       }
 884
 885       num_vertices = max_index + 1 - min_index;
 886
 887       if (total_count == 0 || num_vertices == 0) {
 888          /* Nothing to do, but call the driver to set possible GL errors. */
 889          multi_draw_elements_async(ctx, mode, count, type, indices, draw_count,
 890                                    basevertex, 0, 0, NULL);
 891          return;
 892       }
 893
 894       /* If there is too much data to upload, sync and let the driver unroll
 895        * indices. */
 896       if (util_is_vbo_upload_ratio_too_large(total_count, num_vertices))
 897          goto sync;
 898    } else if (has_user_indices) {
 899       /* Only compute total_count for the upload of indices. */
 900       for (unsigned i = 0; i < draw_count; i++) {
 901          GLsizei vertex_count = count[i];
 902
 903          if (vertex_count < 0) {
 904             /* Just call the driver to set the error. */
 905             multi_draw_elements_async(ctx, mode, count, type, indices, draw_count,
 906                                       basevertex, 0, 0, NULL);
 907             return;
 908          }
 909          if (vertex_count == 0)
 910             continue;
 911
 912          total_count += vertex_count;
 913       }
 914
 915       if (total_count == 0) {
 916          /* Nothing to do, but call the driver to set possible GL errors. */
 917          multi_draw_elements_async(ctx, mode, count, type, indices, draw_count,
 918                                    basevertex, 0, 0, NULL);
 919          return;
 920       }
 921    }
 922
 923    /* Upload vertices. */
 924    struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX];
 925    if (user_buffer_mask &&
 926        !upload_vertices(ctx, user_buffer_mask, min_index, num_vertices,
 927                         0, 1, buffers))
 928       goto sync;
 929
 930    /* Upload indices. */
 931    struct gl_buffer_object *index_buffer = NULL;
 932    if (has_user_indices) {
 933       const GLvoid **out_indices = alloca(sizeof(indices[0]) * draw_count);
 934
 935       index_buffer = upload_multi_indices(ctx, total_count, index_size,
 936                                           draw_count, count, indices,
 937                                           out_indices);
 938       indices = out_indices;
 939    }
 940
 941    /* Draw asynchronously. */
 942    multi_draw_elements_async(ctx, mode, count, type, indices, draw_count,
 943                              basevertex, index_buffer, user_buffer_mask,
 944                              buffers);
 945    return;
 946
 947 sync:
 948    _mesa_glthread_finish_before(ctx, "DrawElements");
 949
 950    if (basevertex) {
 951       CALL_MultiDrawElementsBaseVertex(ctx->CurrentServerDispatch,
 952                                        (mode, count, type, indices, draw_count,
 953                                         basevertex));
 954    } else {
 955       CALL_MultiDrawElementsEXT(ctx->CurrentServerDispatch,
 956                                 (mode, count, type, indices, draw_count));
 957    }
 958 }
 959
 960 void GLAPIENTRY
 961 _mesa_marshal_DrawArrays(GLenum mode, GLint first, GLsizei count)
 962 {
 963    draw_arrays(mode, first, count, 1, 0, true);
 964 }
 965
 966 void GLAPIENTRY
 967 _mesa_marshal_DrawArraysInstancedARB(GLenum mode, GLint first, GLsizei count,
 968                                      GLsizei instance_count)
 969 {
 970    draw_arrays(mode, first, count, instance_count, 0, false);
 971 }
 972
 973 void GLAPIENTRY
 974 _mesa_marshal_DrawArraysInstancedBaseInstance(GLenum mode, GLint first,
 975                                               GLsizei count, GLsizei instance_count,
 976                                               GLuint baseinstance)
 977 {
 978    draw_arrays(mode, first, count, instance_count, baseinstance, false);
 979 }
 980
 981 void GLAPIENTRY
 982 _mesa_marshal_DrawElements(GLenum mode, GLsizei count, GLenum type,
 983                            const GLvoid *indices)
 984 {
 985    draw_elements(mode, count, type, indices, 1, 0, 0, false, 0, 0, true);
 986 }
 987
 988 void GLAPIENTRY
 989 _mesa_marshal_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
 990                                 GLsizei count, GLenum type,
 991                                 const GLvoid *indices)
 992 {
 993    draw_elements(mode, count, type, indices, 1, 0, 0, true, start, end, true);
 994 }
 995
 996 void GLAPIENTRY
 997 _mesa_marshal_DrawElementsInstancedARB(GLenum mode, GLsizei count, GLenum type,
 998                                        const GLvoid *indices, GLsizei instance_count)
 999 {
1000    draw_elements(mode, count, type, indices, instance_count, 0, 0, false, 0, 0, false);
1001 }
1002
1003 void GLAPIENTRY
1004 _mesa_marshal_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
1005                                      const GLvoid *indices, GLint basevertex)
1006 {
1007    draw_elements(mode, count, type, indices, 1, basevertex, 0, false, 0, 0, true);
1008 }
1009
1010 void GLAPIENTRY
1011 _mesa_marshal_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
1012                                           GLsizei count, GLenum type,
1013                                           const GLvoid *indices, GLint basevertex)
1014 {
1015    draw_elements(mode, count, type, indices, 1, basevertex, 0, true, start, end, false);
1016 }
1017
1018 void GLAPIENTRY
1019 _mesa_marshal_DrawElementsInstancedBaseVertex(GLenum mode, GLsizei count,
1020                                               GLenum type, const GLvoid *indices,
1021                                               GLsizei instance_count, GLint basevertex)
1022 {
1023    draw_elements(mode, count, type, indices, instance_count, basevertex, 0, false, 0, 0, false);
1024 }
1025
1026 void GLAPIENTRY
1027 _mesa_marshal_DrawElementsInstancedBaseInstance(GLenum mode, GLsizei count,
1028                                                 GLenum type, const GLvoid *indices,
1029                                                 GLsizei instance_count, GLuint baseinstance)
1030 {
1031    draw_elements(mode, count, type, indices, instance_count, 0, baseinstance, false, 0, 0, false);
1032 }
1033
1034 void GLAPIENTRY
1035 _mesa_marshal_DrawElementsInstancedBaseVertexBaseInstance(GLenum mode, GLsizei count,
1036                                                           GLenum type, const GLvoid *indices,
1037                                                           GLsizei instance_count, GLint basevertex,
1038                                                           GLuint baseinstance)
1039 {
1040    draw_elements(mode, count, type, indices, instance_count, basevertex, baseinstance, false, 0, 0, false);
1041 }
1042
1043 void GLAPIENTRY
1044 _mesa_marshal_MultiDrawElementsEXT(GLenum mode, const GLsizei *count,
1045                                    GLenum type, const GLvoid *const *indices,
1046                                    GLsizei draw_count)
1047 {
1048    _mesa_marshal_MultiDrawElementsBaseVertex(mode, count, type, indices,
1049                                              draw_count, NULL);
1050 }
1051
1052 void
1053 _mesa_unmarshal_DrawArrays(struct gl_context *ctx, const struct marshal_cmd_DrawArrays *cmd)
1054 {
1055    unreachable("never used - DrawArraysInstancedBaseInstance is used instead");
1056 }
1057
1058 void
1059 _mesa_unmarshal_DrawArraysInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawArraysInstancedARB *cmd)
1060 {
1061    unreachable("never used - DrawArraysInstancedBaseInstance is used instead");
1062 }
1063
1064 void
1065 _mesa_unmarshal_DrawElements(struct gl_context *ctx, const struct marshal_cmd_DrawElements *cmd)
1066 {
1067    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1068 }
1069
1070 void
1071 _mesa_unmarshal_DrawRangeElements(struct gl_context *ctx, const struct marshal_cmd_DrawRangeElements *cmd)
1072 {
1073    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1074 }
1075
1076 void
1077 _mesa_unmarshal_DrawElementsInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedARB *cmd)
1078 {
1079    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1080 }
1081
1082 void
1083 _mesa_unmarshal_DrawElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsBaseVertex *cmd)
1084 {
1085    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1086 }
1087
1088 void
1089 _mesa_unmarshal_DrawRangeElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawRangeElementsBaseVertex *cmd)
1090 {
1091    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1092 }
1093
1094 void
1095 _mesa_unmarshal_DrawElementsInstancedBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseVertex *cmd)
1096 {
1097    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1098 }
1099
1100 void
1101 _mesa_unmarshal_DrawElementsInstancedBaseInstance(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseInstance *cmd)
1102 {
1103    unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
1104 }
1105
1106 void
1107 _mesa_unmarshal_MultiDrawElementsEXT(struct gl_context *ctx, const struct marshal_cmd_MultiDrawElementsEXT *cmd)
1108 {
1109    unreachable("never used - MultiDrawElementsBaseVertex is used instead");
1110 }