1 /**************************************************************************
3 * Copyright 2003 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * @file intel_buffer_objects.c
31 * This provides core GL buffer object functionality.
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36 #include "main/macros.h"
37 #include "main/bufferobj.h"
39 #include "brw_context.h"
40 #include "intel_blit.h"
41 #include "intel_buffer_objects.h"
42 #include "intel_batchbuffer.h"
45 intel_bufferobj_unmap(struct gl_context
* ctx
, struct gl_buffer_object
*obj
);
48 intel_bufferobj_mark_gpu_usage(struct intel_buffer_object
*intel_obj
,
49 uint32_t offset
, uint32_t size
)
51 intel_obj
->gpu_active_start
= MIN2(intel_obj
->gpu_active_start
, offset
);
52 intel_obj
->gpu_active_end
= MAX2(intel_obj
->gpu_active_end
, offset
+ size
);
56 intel_bufferobj_mark_inactive(struct intel_buffer_object
*intel_obj
)
58 intel_obj
->gpu_active_start
= ~0;
59 intel_obj
->gpu_active_end
= 0;
62 /** Allocates a new drm_intel_bo to store the data for the buffer object. */
64 intel_bufferobj_alloc_buffer(struct brw_context
*brw
,
65 struct intel_buffer_object
*intel_obj
)
67 intel_obj
->buffer
= drm_intel_bo_alloc(brw
->bufmgr
, "bufferobj",
68 intel_obj
->Base
.Size
, 64);
70 /* the buffer might be bound as a uniform buffer, need to update it
72 brw
->state
.dirty
.brw
|= BRW_NEW_UNIFORM_BUFFER
;
74 intel_bufferobj_mark_inactive(intel_obj
);
78 release_buffer(struct intel_buffer_object
*intel_obj
)
80 drm_intel_bo_unreference(intel_obj
->buffer
);
81 intel_obj
->buffer
= NULL
;
85 * The NewBufferObject() driver hook.
87 * Allocates a new intel_buffer_object structure and initializes it.
89 * There is some duplication between mesa's bufferobjects and our
90 * bufmgr buffers. Both have an integer handle and a hashtable to
91 * lookup an opaque structure. It would be nice if the handles and
92 * internal structure where somehow shared.
94 static struct gl_buffer_object
*
95 intel_bufferobj_alloc(struct gl_context
* ctx
, GLuint name
, GLenum target
)
97 struct intel_buffer_object
*obj
= CALLOC_STRUCT(intel_buffer_object
);
99 _mesa_initialize_buffer_object(ctx
, &obj
->Base
, name
, target
);
107 * The DeleteBuffer() driver hook.
109 * Deletes a single OpenGL buffer object. Used by glDeleteBuffers().
112 intel_bufferobj_free(struct gl_context
* ctx
, struct gl_buffer_object
*obj
)
114 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
118 /* Buffer objects are automatically unmapped when deleting according
119 * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
120 * (though it does if you call glDeleteBuffers)
123 intel_bufferobj_unmap(ctx
, obj
);
125 drm_intel_bo_unreference(intel_obj
->buffer
);
131 * The BufferData() driver hook.
133 * Implements glBufferData(), which recreates a buffer object's data store
134 * and populates it with the given data, if present.
136 * Any data that was previously stored in the buffer object is lost.
138 * \return true for success, false if out of memory
141 intel_bufferobj_data(struct gl_context
* ctx
,
145 GLenum usage
, struct gl_buffer_object
*obj
)
147 struct brw_context
*brw
= brw_context(ctx
);
148 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
150 /* Part of the ABI, but this function doesn't use it.
154 intel_obj
->Base
.Size
= size
;
155 intel_obj
->Base
.Usage
= usage
;
157 assert(!obj
->Pointer
); /* Mesa should have unmapped it */
159 if (intel_obj
->buffer
!= NULL
)
160 release_buffer(intel_obj
);
163 intel_bufferobj_alloc_buffer(brw
, intel_obj
);
164 if (!intel_obj
->buffer
)
168 drm_intel_bo_subdata(intel_obj
->buffer
, 0, size
, data
);
176 * The BufferSubData() driver hook.
178 * Implements glBufferSubData(), which replaces a portion of the data in a
181 * If the data range specified by (size + offset) extends beyond the end of
182 * the buffer or if data is NULL, no copy is performed.
185 intel_bufferobj_subdata(struct gl_context
* ctx
,
188 const GLvoid
* data
, struct gl_buffer_object
*obj
)
190 struct brw_context
*brw
= brw_context(ctx
);
191 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
199 /* See if we can unsynchronized write the data into the user's BO. This
200 * avoids GPU stalls in unfortunately common user patterns (uploading
201 * sequentially into a BO, with draw calls in between each upload).
203 * Once we've hit this path, we mark this GL BO as preferring stalling to
204 * blits, so that we can hopefully hit this path again in the future
205 * (otherwise, an app that might occasionally stall but mostly not will end
206 * up with blitting all the time, at the cost of bandwidth)
209 if (offset
+ size
<= intel_obj
->gpu_active_start
||
210 intel_obj
->gpu_active_end
<= offset
) {
211 drm_intel_gem_bo_map_unsynchronized(intel_obj
->buffer
);
212 memcpy(intel_obj
->buffer
->virtual + offset
, data
, size
);
213 drm_intel_bo_unmap(intel_obj
->buffer
);
215 if (intel_obj
->gpu_active_end
> intel_obj
->gpu_active_start
)
216 intel_obj
->prefer_stall_to_blit
= true;
222 drm_intel_bo_busy(intel_obj
->buffer
) ||
223 drm_intel_bo_references(brw
->batch
.bo
, intel_obj
->buffer
);
226 if (size
== intel_obj
->Base
.Size
) {
227 /* Replace the current busy bo so the subdata doesn't stall. */
228 drm_intel_bo_unreference(intel_obj
->buffer
);
229 intel_bufferobj_alloc_buffer(brw
, intel_obj
);
230 } else if (!intel_obj
->prefer_stall_to_blit
) {
231 perf_debug("Using a blit copy to avoid stalling on "
232 "glBufferSubData(%ld, %ld) (%ldkb) to a busy "
233 "(%d-%d) buffer object.\n",
234 (long)offset
, (long)offset
+ size
, (long)(size
/1024),
235 intel_obj
->gpu_active_start
,
236 intel_obj
->gpu_active_end
);
237 drm_intel_bo
*temp_bo
=
238 drm_intel_bo_alloc(brw
->bufmgr
, "subdata temp", size
, 64);
240 drm_intel_bo_subdata(temp_bo
, 0, size
, data
);
242 intel_emit_linear_blit(brw
,
243 intel_obj
->buffer
, offset
,
247 drm_intel_bo_unreference(temp_bo
);
250 perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
251 "(%d-%d) buffer object. Use glMapBufferRange() to "
253 (long)offset
, (long)offset
+ size
, (long)(size
/1024),
254 intel_obj
->gpu_active_start
,
255 intel_obj
->gpu_active_end
);
256 intel_batchbuffer_flush(brw
);
260 drm_intel_bo_subdata(intel_obj
->buffer
, offset
, size
, data
);
261 intel_bufferobj_mark_inactive(intel_obj
);
266 * The GetBufferSubData() driver hook.
268 * Implements glGetBufferSubData(), which copies a subrange of a buffer
269 * object into user memory.
272 intel_bufferobj_get_subdata(struct gl_context
* ctx
,
275 GLvoid
* data
, struct gl_buffer_object
*obj
)
277 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
278 struct brw_context
*brw
= brw_context(ctx
);
281 if (drm_intel_bo_references(brw
->batch
.bo
, intel_obj
->buffer
)) {
282 intel_batchbuffer_flush(brw
);
284 drm_intel_bo_get_subdata(intel_obj
->buffer
, offset
, size
, data
);
286 intel_bufferobj_mark_inactive(intel_obj
);
291 * The MapBufferRange() driver hook.
293 * This implements both glMapBufferRange() and glMapBuffer().
295 * The goal of this extension is to allow apps to accumulate their rendering
296 * at the same time as they accumulate their buffer object. Without it,
297 * you'd end up blocking on execution of rendering every time you mapped
298 * the buffer to put new data in.
300 * We support it in 3 ways: If unsynchronized, then don't bother
301 * flushing the batchbuffer before mapping the buffer, which can save blocking
302 * in many cases. If we would still block, and they allow the whole buffer
303 * to be invalidated, then just allocate a new buffer to replace the old one.
304 * If not, and we'd block, and they allow the subrange of the buffer to be
305 * invalidated, then we can make a new little BO, let them write into that,
306 * and blit it into the real BO at unmap time.
309 intel_bufferobj_map_range(struct gl_context
* ctx
,
310 GLintptr offset
, GLsizeiptr length
,
311 GLbitfield access
, struct gl_buffer_object
*obj
)
313 struct brw_context
*brw
= brw_context(ctx
);
314 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
318 /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
319 * internally uses our functions directly.
321 obj
->Offset
= offset
;
322 obj
->Length
= length
;
323 obj
->AccessFlags
= access
;
325 if (intel_obj
->buffer
== NULL
) {
330 /* If the access is synchronized (like a normal buffer mapping), then get
331 * things flushed out so the later mapping syncs appropriately through GEM.
332 * If the user doesn't care about existing buffer contents and mapping would
333 * cause us to block, then throw out the old buffer.
335 * If they set INVALIDATE_BUFFER, we can pitch the current contents to
336 * achieve the required synchronization.
338 if (!(access
& GL_MAP_UNSYNCHRONIZED_BIT
)) {
339 if (drm_intel_bo_references(brw
->batch
.bo
, intel_obj
->buffer
)) {
340 if (access
& GL_MAP_INVALIDATE_BUFFER_BIT
) {
341 drm_intel_bo_unreference(intel_obj
->buffer
);
342 intel_bufferobj_alloc_buffer(brw
, intel_obj
);
344 perf_debug("Stalling on the GPU for mapping a busy buffer "
346 intel_batchbuffer_flush(brw
);
348 } else if (drm_intel_bo_busy(intel_obj
->buffer
) &&
349 (access
& GL_MAP_INVALIDATE_BUFFER_BIT
)) {
350 drm_intel_bo_unreference(intel_obj
->buffer
);
351 intel_bufferobj_alloc_buffer(brw
, intel_obj
);
355 /* If the user is mapping a range of an active buffer object but
356 * doesn't require the current contents of that range, make a new
357 * BO, and we'll copy what they put in there out at unmap or
360 if (!(access
& GL_MAP_UNSYNCHRONIZED_BIT
) &&
361 (access
& GL_MAP_INVALIDATE_RANGE_BIT
) &&
362 drm_intel_bo_busy(intel_obj
->buffer
)) {
363 if (access
& GL_MAP_FLUSH_EXPLICIT_BIT
) {
364 intel_obj
->range_map_buffer
= malloc(length
);
365 obj
->Pointer
= intel_obj
->range_map_buffer
;
367 intel_obj
->range_map_bo
= drm_intel_bo_alloc(brw
->bufmgr
,
370 if (!(access
& GL_MAP_READ_BIT
)) {
371 drm_intel_gem_bo_map_gtt(intel_obj
->range_map_bo
);
373 drm_intel_bo_map(intel_obj
->range_map_bo
,
374 (access
& GL_MAP_WRITE_BIT
) != 0);
376 obj
->Pointer
= intel_obj
->range_map_bo
->virtual;
381 if (access
& GL_MAP_UNSYNCHRONIZED_BIT
)
382 drm_intel_gem_bo_map_unsynchronized(intel_obj
->buffer
);
383 else if (!(access
& GL_MAP_READ_BIT
)) {
384 drm_intel_gem_bo_map_gtt(intel_obj
->buffer
);
385 intel_bufferobj_mark_inactive(intel_obj
);
387 drm_intel_bo_map(intel_obj
->buffer
, (access
& GL_MAP_WRITE_BIT
) != 0);
388 intel_bufferobj_mark_inactive(intel_obj
);
391 obj
->Pointer
= intel_obj
->buffer
->virtual + offset
;
396 * The FlushMappedBufferRange() driver hook.
398 * Implements glFlushMappedBufferRange(), which signifies that modifications
399 * have been made to a range of a mapped buffer, and it should be flushed.
401 * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
403 * Ideally we'd use a BO to avoid taking up cache space for the temporary
404 * data, but FlushMappedBufferRange may be followed by further writes to
405 * the pointer, so we would have to re-map after emitting our blit, which
406 * would defeat the point.
409 intel_bufferobj_flush_mapped_range(struct gl_context
*ctx
,
410 GLintptr offset
, GLsizeiptr length
,
411 struct gl_buffer_object
*obj
)
413 struct brw_context
*brw
= brw_context(ctx
);
414 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
415 drm_intel_bo
*temp_bo
;
417 /* Unless we're in the range map using a temporary system buffer,
418 * there's no work to do.
420 if (intel_obj
->range_map_buffer
== NULL
)
426 temp_bo
= drm_intel_bo_alloc(brw
->bufmgr
, "range map flush", length
, 64);
428 drm_intel_bo_subdata(temp_bo
, 0, length
, intel_obj
->range_map_buffer
);
430 intel_emit_linear_blit(brw
,
431 intel_obj
->buffer
, obj
->Offset
+ offset
,
434 intel_bufferobj_mark_gpu_usage(intel_obj
, obj
->Offset
+ offset
, length
);
436 drm_intel_bo_unreference(temp_bo
);
441 * The UnmapBuffer() driver hook.
443 * Implements glUnmapBuffer().
446 intel_bufferobj_unmap(struct gl_context
* ctx
, struct gl_buffer_object
*obj
)
448 struct brw_context
*brw
= brw_context(ctx
);
449 struct intel_buffer_object
*intel_obj
= intel_buffer_object(obj
);
452 assert(obj
->Pointer
);
453 if (intel_obj
->range_map_buffer
!= NULL
) {
454 /* Since we've emitted some blits to buffers that will (likely) be used
455 * in rendering operations in other cache domains in this batch, emit a
456 * flush. Once again, we wish for a domain tracker in libdrm to cover
457 * usage inside of a batchbuffer.
459 intel_batchbuffer_emit_mi_flush(brw
);
460 free(intel_obj
->range_map_buffer
);
461 intel_obj
->range_map_buffer
= NULL
;
462 } else if (intel_obj
->range_map_bo
!= NULL
) {
463 drm_intel_bo_unmap(intel_obj
->range_map_bo
);
465 intel_emit_linear_blit(brw
,
466 intel_obj
->buffer
, obj
->Offset
,
467 intel_obj
->range_map_bo
, 0,
469 intel_bufferobj_mark_gpu_usage(intel_obj
, obj
->Offset
, obj
->Length
);
471 /* Since we've emitted some blits to buffers that will (likely) be used
472 * in rendering operations in other cache domains in this batch, emit a
473 * flush. Once again, we wish for a domain tracker in libdrm to cover
474 * usage inside of a batchbuffer.
476 intel_batchbuffer_emit_mi_flush(brw
);
478 drm_intel_bo_unreference(intel_obj
->range_map_bo
);
479 intel_obj
->range_map_bo
= NULL
;
480 } else if (intel_obj
->buffer
!= NULL
) {
481 drm_intel_bo_unmap(intel_obj
->buffer
);
491 * Gets a pointer to the object's BO, and marks the given range as being used
494 * Anywhere that uses buffer objects in the pipeline should be using this to
495 * mark the range of the buffer that is being accessed by the pipeline.
498 intel_bufferobj_buffer(struct brw_context
*brw
,
499 struct intel_buffer_object
*intel_obj
,
500 uint32_t offset
, uint32_t size
)
502 /* This is needed so that things like transform feedback and texture buffer
503 * objects that need a BO but don't want to check that they exist for
504 * draw-time validation can just always get a BO from a GL buffer object.
506 if (intel_obj
->buffer
== NULL
)
507 intel_bufferobj_alloc_buffer(brw
, intel_obj
);
509 intel_bufferobj_mark_gpu_usage(intel_obj
, offset
, size
);
511 return intel_obj
->buffer
;
515 * The CopyBufferSubData() driver hook.
517 * Implements glCopyBufferSubData(), which copies a portion of one buffer
518 * object's data to another. Independent source and destination offsets
522 intel_bufferobj_copy_subdata(struct gl_context
*ctx
,
523 struct gl_buffer_object
*src
,
524 struct gl_buffer_object
*dst
,
525 GLintptr read_offset
, GLintptr write_offset
,
528 struct brw_context
*brw
= brw_context(ctx
);
529 struct intel_buffer_object
*intel_src
= intel_buffer_object(src
);
530 struct intel_buffer_object
*intel_dst
= intel_buffer_object(dst
);
531 drm_intel_bo
*src_bo
, *dst_bo
;
536 dst_bo
= intel_bufferobj_buffer(brw
, intel_dst
, write_offset
, size
);
537 src_bo
= intel_bufferobj_buffer(brw
, intel_src
, read_offset
, size
);
539 intel_emit_linear_blit(brw
,
540 dst_bo
, write_offset
,
541 src_bo
, read_offset
, size
);
543 /* Since we've emitted some blits to buffers that will (likely) be used
544 * in rendering operations in other cache domains in this batch, emit a
545 * flush. Once again, we wish for a domain tracker in libdrm to cover
546 * usage inside of a batchbuffer.
548 intel_batchbuffer_emit_mi_flush(brw
);
552 intelInitBufferObjectFuncs(struct dd_function_table
*functions
)
554 functions
->NewBufferObject
= intel_bufferobj_alloc
;
555 functions
->DeleteBuffer
= intel_bufferobj_free
;
556 functions
->BufferData
= intel_bufferobj_data
;
557 functions
->BufferSubData
= intel_bufferobj_subdata
;
558 functions
->GetBufferSubData
= intel_bufferobj_get_subdata
;
559 functions
->MapBufferRange
= intel_bufferobj_map_range
;
560 functions
->FlushMappedBufferRange
= intel_bufferobj_flush_mapped_range
;
561 functions
->UnmapBuffer
= intel_bufferobj_unmap
;
562 functions
->CopyBufferSubData
= intel_bufferobj_copy_subdata
;