src/mesa/drivers/dri/i965/intel_buffer_objects.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2003 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /**
  29  * @file intel_buffer_objects.c
  30  *
  31  * This provides core GL buffer object functionality.
  32  */
  33
  34 #include "main/imports.h"
  35 #include "main/mtypes.h"
  36 #include "main/macros.h"
  37 #include "main/bufferobj.h"
  38
  39 #include "brw_context.h"
  40 #include "intel_blit.h"
  41 #include "intel_buffer_objects.h"
  42 #include "intel_batchbuffer.h"
  43
  44 static GLboolean
  45 intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
  46
  47 static void
  48 intel_bufferobj_mark_gpu_usage(struct intel_buffer_object *intel_obj,
  49                                uint32_t offset, uint32_t size)
  50 {
  51    intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset);
  52    intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size);
  53 }
  54
  55 static void
  56 intel_bufferobj_mark_inactive(struct intel_buffer_object *intel_obj)
  57 {
  58    intel_obj->gpu_active_start = ~0;
  59    intel_obj->gpu_active_end = 0;
  60 }
  61
  62 /** Allocates a new drm_intel_bo to store the data for the buffer object. */
  63 static void
  64 intel_bufferobj_alloc_buffer(struct brw_context *brw,
  65                              struct intel_buffer_object *intel_obj)
  66 {
  67    intel_obj->buffer = drm_intel_bo_alloc(brw->bufmgr, "bufferobj",
  68                                           intel_obj->Base.Size, 64);
  69
  70    /* the buffer might be bound as a uniform buffer, need to update it
  71     */
  72    brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER;
  73
  74    intel_bufferobj_mark_inactive(intel_obj);
  75 }
  76
  77 static void
  78 release_buffer(struct intel_buffer_object *intel_obj)
  79 {
  80    drm_intel_bo_unreference(intel_obj->buffer);
  81    intel_obj->buffer = NULL;
  82 }
  83
  84 /**
  85  * The NewBufferObject() driver hook.
  86  *
  87  * Allocates a new intel_buffer_object structure and initializes it.
  88  *
  89  * There is some duplication between mesa's bufferobjects and our
  90  * bufmgr buffers.  Both have an integer handle and a hashtable to
  91  * lookup an opaque structure.  It would be nice if the handles and
  92  * internal structure where somehow shared.
  93  */
  94 static struct gl_buffer_object *
  95 intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target)
  96 {
  97    struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
  98
  99    _mesa_initialize_buffer_object(ctx, &obj->Base, name, target);
 100
 101    obj->buffer = NULL;
 102
 103    return &obj->Base;
 104 }
 105
 106 /**
 107  * The DeleteBuffer() driver hook.
 108  *
 109  * Deletes a single OpenGL buffer object.  Used by glDeleteBuffers().
 110  */
 111 static void
 112 intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
 113 {
 114    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 115
 116    assert(intel_obj);
 117
 118    /* Buffer objects are automatically unmapped when deleting according
 119     * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
 120     * (though it does if you call glDeleteBuffers)
 121     */
 122    if (obj->Pointer)
 123       intel_bufferobj_unmap(ctx, obj);
 124
 125    drm_intel_bo_unreference(intel_obj->buffer);
 126    free(intel_obj);
 127 }
 128
 129
 130 /**
 131  * The BufferData() driver hook.
 132  *
 133  * Implements glBufferData(), which recreates a buffer object's data store
 134  * and populates it with the given data, if present.
 135  *
 136  * Any data that was previously stored in the buffer object is lost.
 137  *
 138  * \return true for success, false if out of memory
 139  */
 140 static GLboolean
 141 intel_bufferobj_data(struct gl_context * ctx,
 142                      GLenum target,
 143                      GLsizeiptrARB size,
 144                      const GLvoid * data,
 145                      GLenum usage, struct gl_buffer_object *obj)
 146 {
 147    struct brw_context *brw = brw_context(ctx);
 148    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 149
 150    /* Part of the ABI, but this function doesn't use it.
 151     */
 152    (void) target;
 153
 154    intel_obj->Base.Size = size;
 155    intel_obj->Base.Usage = usage;
 156
 157    assert(!obj->Pointer); /* Mesa should have unmapped it */
 158
 159    if (intel_obj->buffer != NULL)
 160       release_buffer(intel_obj);
 161
 162    if (size != 0) {
 163       intel_bufferobj_alloc_buffer(brw, intel_obj);
 164       if (!intel_obj->buffer)
 165          return false;
 166
 167       if (data != NULL)
 168          drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
 169    }
 170
 171    return true;
 172 }
 173
 174
 175 /**
 176  * The BufferSubData() driver hook.
 177  *
 178  * Implements glBufferSubData(), which replaces a portion of the data in a
 179  * buffer object.
 180  *
 181  * If the data range specified by (size + offset) extends beyond the end of
 182  * the buffer or if data is NULL, no copy is performed.
 183  */
 184 static void
 185 intel_bufferobj_subdata(struct gl_context * ctx,
 186                         GLintptrARB offset,
 187                         GLsizeiptrARB size,
 188                         const GLvoid * data, struct gl_buffer_object *obj)
 189 {
 190    struct brw_context *brw = brw_context(ctx);
 191    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 192    bool busy;
 193
 194    if (size == 0)
 195       return;
 196
 197    assert(intel_obj);
 198
 199    /* See if we can unsynchronized write the data into the user's BO. This
 200     * avoids GPU stalls in unfortunately common user patterns (uploading
 201     * sequentially into a BO, with draw calls in between each upload).
 202     *
 203     * Once we've hit this path, we mark this GL BO as preferring stalling to
 204     * blits, so that we can hopefully hit this path again in the future
 205     * (otherwise, an app that might occasionally stall but mostly not will end
 206     * up with blitting all the time, at the cost of bandwidth)
 207     */
 208    if (brw->has_llc) {
 209       if (offset + size <= intel_obj->gpu_active_start ||
 210           intel_obj->gpu_active_end <= offset) {
 211          drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
 212          memcpy(intel_obj->buffer->virtual + offset, data, size);
 213          drm_intel_bo_unmap(intel_obj->buffer);
 214
 215          if (intel_obj->gpu_active_end > intel_obj->gpu_active_start)
 216             intel_obj->prefer_stall_to_blit = true;
 217          return;
 218       }
 219    }
 220
 221    busy =
 222       drm_intel_bo_busy(intel_obj->buffer) ||
 223       drm_intel_bo_references(brw->batch.bo, intel_obj->buffer);
 224
 225    if (busy) {
 226       if (size == intel_obj->Base.Size) {
 227          /* Replace the current busy bo so the subdata doesn't stall. */
 228          drm_intel_bo_unreference(intel_obj->buffer);
 229          intel_bufferobj_alloc_buffer(brw, intel_obj);
 230       } else if (!intel_obj->prefer_stall_to_blit) {
 231          perf_debug("Using a blit copy to avoid stalling on "
 232                     "glBufferSubData(%ld, %ld) (%ldkb) to a busy "
 233                     "(%d-%d) buffer object.\n",
 234                     (long)offset, (long)offset + size, (long)(size/1024),
 235                     intel_obj->gpu_active_start,
 236                     intel_obj->gpu_active_end);
 237          drm_intel_bo *temp_bo =
 238             drm_intel_bo_alloc(brw->bufmgr, "subdata temp", size, 64);
 239
 240          drm_intel_bo_subdata(temp_bo, 0, size, data);
 241
 242          intel_emit_linear_blit(brw,
 243                                 intel_obj->buffer, offset,
 244                                 temp_bo, 0,
 245                                 size);
 246
 247          drm_intel_bo_unreference(temp_bo);
 248          return;
 249       } else {
 250          perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
 251                     "(%d-%d) buffer object.  Use glMapBufferRange() to "
 252                     "avoid this.\n",
 253                     (long)offset, (long)offset + size, (long)(size/1024),
 254                     intel_obj->gpu_active_start,
 255                     intel_obj->gpu_active_end);
 256          intel_batchbuffer_flush(brw);
 257       }
 258    }
 259
 260    drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
 261    intel_bufferobj_mark_inactive(intel_obj);
 262 }
 263
 264
 265 /**
 266  * The GetBufferSubData() driver hook.
 267  *
 268  * Implements glGetBufferSubData(), which copies a subrange of a buffer
 269  * object into user memory.
 270  */
 271 static void
 272 intel_bufferobj_get_subdata(struct gl_context * ctx,
 273                             GLintptrARB offset,
 274                             GLsizeiptrARB size,
 275                             GLvoid * data, struct gl_buffer_object *obj)
 276 {
 277    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 278    struct brw_context *brw = brw_context(ctx);
 279
 280    assert(intel_obj);
 281    if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) {
 282       intel_batchbuffer_flush(brw);
 283    }
 284    drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
 285
 286    intel_bufferobj_mark_inactive(intel_obj);
 287 }
 288
 289
 290 /**
 291  * The MapBufferRange() driver hook.
 292  *
 293  * This implements both glMapBufferRange() and glMapBuffer().
 294  *
 295  * The goal of this extension is to allow apps to accumulate their rendering
 296  * at the same time as they accumulate their buffer object.  Without it,
 297  * you'd end up blocking on execution of rendering every time you mapped
 298  * the buffer to put new data in.
 299  *
 300  * We support it in 3 ways: If unsynchronized, then don't bother
 301  * flushing the batchbuffer before mapping the buffer, which can save blocking
 302  * in many cases.  If we would still block, and they allow the whole buffer
 303  * to be invalidated, then just allocate a new buffer to replace the old one.
 304  * If not, and we'd block, and they allow the subrange of the buffer to be
 305  * invalidated, then we can make a new little BO, let them write into that,
 306  * and blit it into the real BO at unmap time.
 307  */
 308 static void *
 309 intel_bufferobj_map_range(struct gl_context * ctx,
 310                           GLintptr offset, GLsizeiptr length,
 311                           GLbitfield access, struct gl_buffer_object *obj)
 312 {
 313    struct brw_context *brw = brw_context(ctx);
 314    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 315
 316    assert(intel_obj);
 317
 318    /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
 319     * internally uses our functions directly.
 320     */
 321    obj->Offset = offset;
 322    obj->Length = length;
 323    obj->AccessFlags = access;
 324
 325    if (intel_obj->buffer == NULL) {
 326       obj->Pointer = NULL;
 327       return NULL;
 328    }
 329
 330    /* If the access is synchronized (like a normal buffer mapping), then get
 331     * things flushed out so the later mapping syncs appropriately through GEM.
 332     * If the user doesn't care about existing buffer contents and mapping would
 333     * cause us to block, then throw out the old buffer.
 334     *
 335     * If they set INVALIDATE_BUFFER, we can pitch the current contents to
 336     * achieve the required synchronization.
 337     */
 338    if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
 339       if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) {
 340          if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
 341             drm_intel_bo_unreference(intel_obj->buffer);
 342             intel_bufferobj_alloc_buffer(brw, intel_obj);
 343          } else {
 344             perf_debug("Stalling on the GPU for mapping a busy buffer "
 345                        "object\n");
 346             intel_batchbuffer_flush(brw);
 347          }
 348       } else if (drm_intel_bo_busy(intel_obj->buffer) &&
 349                  (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
 350          drm_intel_bo_unreference(intel_obj->buffer);
 351          intel_bufferobj_alloc_buffer(brw, intel_obj);
 352       }
 353    }
 354
 355    /* If the user is mapping a range of an active buffer object but
 356     * doesn't require the current contents of that range, make a new
 357     * BO, and we'll copy what they put in there out at unmap or
 358     * FlushRange time.
 359     */
 360    if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
 361        (access & GL_MAP_INVALIDATE_RANGE_BIT) &&
 362        drm_intel_bo_busy(intel_obj->buffer)) {
 363       if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
 364          intel_obj->range_map_buffer = malloc(length);
 365          obj->Pointer = intel_obj->range_map_buffer;
 366       } else {
 367          intel_obj->range_map_bo = drm_intel_bo_alloc(brw->bufmgr,
 368                                                       "range map",
 369                                                       length, 64);
 370          if (!(access & GL_MAP_READ_BIT)) {
 371             drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
 372          } else {
 373             drm_intel_bo_map(intel_obj->range_map_bo,
 374                              (access & GL_MAP_WRITE_BIT) != 0);
 375          }
 376          obj->Pointer = intel_obj->range_map_bo->virtual;
 377       }
 378       return obj->Pointer;
 379    }
 380
 381    if (access & GL_MAP_UNSYNCHRONIZED_BIT)
 382       drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
 383    else if (!(access & GL_MAP_READ_BIT)) {
 384       drm_intel_gem_bo_map_gtt(intel_obj->buffer);
 385       intel_bufferobj_mark_inactive(intel_obj);
 386    } else {
 387       drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
 388       intel_bufferobj_mark_inactive(intel_obj);
 389    }
 390
 391    obj->Pointer = intel_obj->buffer->virtual + offset;
 392    return obj->Pointer;
 393 }
 394
 395 /**
 396  * The FlushMappedBufferRange() driver hook.
 397  *
 398  * Implements glFlushMappedBufferRange(), which signifies that modifications
 399  * have been made to a range of a mapped buffer, and it should be flushed.
 400  *
 401  * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
 402  *
 403  * Ideally we'd use a BO to avoid taking up cache space for the temporary
 404  * data, but FlushMappedBufferRange may be followed by further writes to
 405  * the pointer, so we would have to re-map after emitting our blit, which
 406  * would defeat the point.
 407  */
 408 static void
 409 intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
 410                                    GLintptr offset, GLsizeiptr length,
 411                                    struct gl_buffer_object *obj)
 412 {
 413    struct brw_context *brw = brw_context(ctx);
 414    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 415    drm_intel_bo *temp_bo;
 416
 417    /* Unless we're in the range map using a temporary system buffer,
 418     * there's no work to do.
 419     */
 420    if (intel_obj->range_map_buffer == NULL)
 421       return;
 422
 423    if (length == 0)
 424       return;
 425
 426    temp_bo = drm_intel_bo_alloc(brw->bufmgr, "range map flush", length, 64);
 427
 428    drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
 429
 430    intel_emit_linear_blit(brw,
 431                           intel_obj->buffer, obj->Offset + offset,
 432                           temp_bo, 0,
 433                           length);
 434    intel_bufferobj_mark_gpu_usage(intel_obj, obj->Offset + offset, length);
 435
 436    drm_intel_bo_unreference(temp_bo);
 437 }
 438
 439
 440 /**
 441  * The UnmapBuffer() driver hook.
 442  *
 443  * Implements glUnmapBuffer().
 444  */
 445 static GLboolean
 446 intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
 447 {
 448    struct brw_context *brw = brw_context(ctx);
 449    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
 450
 451    assert(intel_obj);
 452    assert(obj->Pointer);
 453    if (intel_obj->range_map_buffer != NULL) {
 454       /* Since we've emitted some blits to buffers that will (likely) be used
 455        * in rendering operations in other cache domains in this batch, emit a
 456        * flush.  Once again, we wish for a domain tracker in libdrm to cover
 457        * usage inside of a batchbuffer.
 458        */
 459       intel_batchbuffer_emit_mi_flush(brw);
 460       free(intel_obj->range_map_buffer);
 461       intel_obj->range_map_buffer = NULL;
 462    } else if (intel_obj->range_map_bo != NULL) {
 463       drm_intel_bo_unmap(intel_obj->range_map_bo);
 464
 465       intel_emit_linear_blit(brw,
 466                              intel_obj->buffer, obj->Offset,
 467                              intel_obj->range_map_bo, 0,
 468                              obj->Length);
 469       intel_bufferobj_mark_gpu_usage(intel_obj, obj->Offset, obj->Length);
 470
 471       /* Since we've emitted some blits to buffers that will (likely) be used
 472        * in rendering operations in other cache domains in this batch, emit a
 473        * flush.  Once again, we wish for a domain tracker in libdrm to cover
 474        * usage inside of a batchbuffer.
 475        */
 476       intel_batchbuffer_emit_mi_flush(brw);
 477
 478       drm_intel_bo_unreference(intel_obj->range_map_bo);
 479       intel_obj->range_map_bo = NULL;
 480    } else if (intel_obj->buffer != NULL) {
 481       drm_intel_bo_unmap(intel_obj->buffer);
 482    }
 483    obj->Pointer = NULL;
 484    obj->Offset = 0;
 485    obj->Length = 0;
 486
 487    return true;
 488 }
 489
 490 /**
 491  * Gets a pointer to the object's BO, and marks the given range as being used
 492  * on the GPU.
 493  *
 494  * Anywhere that uses buffer objects in the pipeline should be using this to
 495  * mark the range of the buffer that is being accessed by the pipeline.
 496  */
 497 drm_intel_bo *
 498 intel_bufferobj_buffer(struct brw_context *brw,
 499                        struct intel_buffer_object *intel_obj,
 500                        uint32_t offset, uint32_t size)
 501 {
 502    /* This is needed so that things like transform feedback and texture buffer
 503     * objects that need a BO but don't want to check that they exist for
 504     * draw-time validation can just always get a BO from a GL buffer object.
 505     */
 506    if (intel_obj->buffer == NULL)
 507       intel_bufferobj_alloc_buffer(brw, intel_obj);
 508
 509    intel_bufferobj_mark_gpu_usage(intel_obj, offset, size);
 510
 511    return intel_obj->buffer;
 512 }
 513
 514 /**
 515  * The CopyBufferSubData() driver hook.
 516  *
 517  * Implements glCopyBufferSubData(), which copies a portion of one buffer
 518  * object's data to another.  Independent source and destination offsets
 519  * are allowed.
 520  */
 521 static void
 522 intel_bufferobj_copy_subdata(struct gl_context *ctx,
 523                              struct gl_buffer_object *src,
 524                              struct gl_buffer_object *dst,
 525                              GLintptr read_offset, GLintptr write_offset,
 526                              GLsizeiptr size)
 527 {
 528    struct brw_context *brw = brw_context(ctx);
 529    struct intel_buffer_object *intel_src = intel_buffer_object(src);
 530    struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
 531    drm_intel_bo *src_bo, *dst_bo;
 532
 533    if (size == 0)
 534       return;
 535
 536    dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size);
 537    src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size);
 538
 539    intel_emit_linear_blit(brw,
 540                           dst_bo, write_offset,
 541                           src_bo, read_offset, size);
 542
 543    /* Since we've emitted some blits to buffers that will (likely) be used
 544     * in rendering operations in other cache domains in this batch, emit a
 545     * flush.  Once again, we wish for a domain tracker in libdrm to cover
 546     * usage inside of a batchbuffer.
 547     */
 548    intel_batchbuffer_emit_mi_flush(brw);
 549 }
 550
 551 void
 552 intelInitBufferObjectFuncs(struct dd_function_table *functions)
 553 {
 554    functions->NewBufferObject = intel_bufferobj_alloc;
 555    functions->DeleteBuffer = intel_bufferobj_free;
 556    functions->BufferData = intel_bufferobj_data;
 557    functions->BufferSubData = intel_bufferobj_subdata;
 558    functions->GetBufferSubData = intel_bufferobj_get_subdata;
 559    functions->MapBufferRange = intel_bufferobj_map_range;
 560    functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
 561    functions->UnmapBuffer = intel_bufferobj_unmap;
 562    functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
 563 }