s/Tungsten Graphics/VMware/
[mesa.git] / src / mesa / drivers / dri / i965 / intel_buffer_objects.c
1 /**************************************************************************
2 *
3 * Copyright 2003 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file intel_buffer_objects.c
30 *
31 * This provides core GL buffer object functionality.
32 */
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36 #include "main/macros.h"
37 #include "main/bufferobj.h"
38
39 #include "brw_context.h"
40 #include "intel_blit.h"
41 #include "intel_buffer_objects.h"
42 #include "intel_batchbuffer.h"
43
44 static GLboolean
45 intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
46
47 static void
48 intel_bufferobj_mark_gpu_usage(struct intel_buffer_object *intel_obj,
49 uint32_t offset, uint32_t size)
50 {
51 intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset);
52 intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size);
53 }
54
55 static void
56 intel_bufferobj_mark_inactive(struct intel_buffer_object *intel_obj)
57 {
58 intel_obj->gpu_active_start = ~0;
59 intel_obj->gpu_active_end = 0;
60 }
61
62 /** Allocates a new drm_intel_bo to store the data for the buffer object. */
63 static void
64 intel_bufferobj_alloc_buffer(struct brw_context *brw,
65 struct intel_buffer_object *intel_obj)
66 {
67 intel_obj->buffer = drm_intel_bo_alloc(brw->bufmgr, "bufferobj",
68 intel_obj->Base.Size, 64);
69
70 /* the buffer might be bound as a uniform buffer, need to update it
71 */
72 brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER;
73
74 intel_bufferobj_mark_inactive(intel_obj);
75 }
76
77 static void
78 release_buffer(struct intel_buffer_object *intel_obj)
79 {
80 drm_intel_bo_unreference(intel_obj->buffer);
81 intel_obj->buffer = NULL;
82 }
83
84 /**
85 * The NewBufferObject() driver hook.
86 *
87 * Allocates a new intel_buffer_object structure and initializes it.
88 *
89 * There is some duplication between mesa's bufferobjects and our
90 * bufmgr buffers. Both have an integer handle and a hashtable to
91 * lookup an opaque structure. It would be nice if the handles and
92 * internal structure where somehow shared.
93 */
94 static struct gl_buffer_object *
95 intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target)
96 {
97 struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
98
99 _mesa_initialize_buffer_object(ctx, &obj->Base, name, target);
100
101 obj->buffer = NULL;
102
103 return &obj->Base;
104 }
105
106 /**
107 * The DeleteBuffer() driver hook.
108 *
109 * Deletes a single OpenGL buffer object. Used by glDeleteBuffers().
110 */
111 static void
112 intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
113 {
114 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
115
116 assert(intel_obj);
117
118 /* Buffer objects are automatically unmapped when deleting according
119 * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
120 * (though it does if you call glDeleteBuffers)
121 */
122 if (obj->Pointer)
123 intel_bufferobj_unmap(ctx, obj);
124
125 drm_intel_bo_unreference(intel_obj->buffer);
126 free(intel_obj);
127 }
128
129
130 /**
131 * The BufferData() driver hook.
132 *
133 * Implements glBufferData(), which recreates a buffer object's data store
134 * and populates it with the given data, if present.
135 *
136 * Any data that was previously stored in the buffer object is lost.
137 *
138 * \return true for success, false if out of memory
139 */
140 static GLboolean
141 intel_bufferobj_data(struct gl_context * ctx,
142 GLenum target,
143 GLsizeiptrARB size,
144 const GLvoid * data,
145 GLenum usage, struct gl_buffer_object *obj)
146 {
147 struct brw_context *brw = brw_context(ctx);
148 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
149
150 /* Part of the ABI, but this function doesn't use it.
151 */
152 (void) target;
153
154 intel_obj->Base.Size = size;
155 intel_obj->Base.Usage = usage;
156
157 assert(!obj->Pointer); /* Mesa should have unmapped it */
158
159 if (intel_obj->buffer != NULL)
160 release_buffer(intel_obj);
161
162 if (size != 0) {
163 intel_bufferobj_alloc_buffer(brw, intel_obj);
164 if (!intel_obj->buffer)
165 return false;
166
167 if (data != NULL)
168 drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
169 }
170
171 return true;
172 }
173
174
175 /**
176 * The BufferSubData() driver hook.
177 *
178 * Implements glBufferSubData(), which replaces a portion of the data in a
179 * buffer object.
180 *
181 * If the data range specified by (size + offset) extends beyond the end of
182 * the buffer or if data is NULL, no copy is performed.
183 */
184 static void
185 intel_bufferobj_subdata(struct gl_context * ctx,
186 GLintptrARB offset,
187 GLsizeiptrARB size,
188 const GLvoid * data, struct gl_buffer_object *obj)
189 {
190 struct brw_context *brw = brw_context(ctx);
191 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
192 bool busy;
193
194 if (size == 0)
195 return;
196
197 assert(intel_obj);
198
199 /* See if we can unsynchronized write the data into the user's BO. This
200 * avoids GPU stalls in unfortunately common user patterns (uploading
201 * sequentially into a BO, with draw calls in between each upload).
202 *
203 * Once we've hit this path, we mark this GL BO as preferring stalling to
204 * blits, so that we can hopefully hit this path again in the future
205 * (otherwise, an app that might occasionally stall but mostly not will end
206 * up with blitting all the time, at the cost of bandwidth)
207 */
208 if (brw->has_llc) {
209 if (offset + size <= intel_obj->gpu_active_start ||
210 intel_obj->gpu_active_end <= offset) {
211 drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
212 memcpy(intel_obj->buffer->virtual + offset, data, size);
213 drm_intel_bo_unmap(intel_obj->buffer);
214
215 if (intel_obj->gpu_active_end > intel_obj->gpu_active_start)
216 intel_obj->prefer_stall_to_blit = true;
217 return;
218 }
219 }
220
221 busy =
222 drm_intel_bo_busy(intel_obj->buffer) ||
223 drm_intel_bo_references(brw->batch.bo, intel_obj->buffer);
224
225 if (busy) {
226 if (size == intel_obj->Base.Size) {
227 /* Replace the current busy bo so the subdata doesn't stall. */
228 drm_intel_bo_unreference(intel_obj->buffer);
229 intel_bufferobj_alloc_buffer(brw, intel_obj);
230 } else if (!intel_obj->prefer_stall_to_blit) {
231 perf_debug("Using a blit copy to avoid stalling on "
232 "glBufferSubData(%ld, %ld) (%ldkb) to a busy "
233 "(%d-%d) buffer object.\n",
234 (long)offset, (long)offset + size, (long)(size/1024),
235 intel_obj->gpu_active_start,
236 intel_obj->gpu_active_end);
237 drm_intel_bo *temp_bo =
238 drm_intel_bo_alloc(brw->bufmgr, "subdata temp", size, 64);
239
240 drm_intel_bo_subdata(temp_bo, 0, size, data);
241
242 intel_emit_linear_blit(brw,
243 intel_obj->buffer, offset,
244 temp_bo, 0,
245 size);
246
247 drm_intel_bo_unreference(temp_bo);
248 return;
249 } else {
250 perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
251 "(%d-%d) buffer object. Use glMapBufferRange() to "
252 "avoid this.\n",
253 (long)offset, (long)offset + size, (long)(size/1024),
254 intel_obj->gpu_active_start,
255 intel_obj->gpu_active_end);
256 intel_batchbuffer_flush(brw);
257 }
258 }
259
260 drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
261 intel_bufferobj_mark_inactive(intel_obj);
262 }
263
264
265 /**
266 * The GetBufferSubData() driver hook.
267 *
268 * Implements glGetBufferSubData(), which copies a subrange of a buffer
269 * object into user memory.
270 */
271 static void
272 intel_bufferobj_get_subdata(struct gl_context * ctx,
273 GLintptrARB offset,
274 GLsizeiptrARB size,
275 GLvoid * data, struct gl_buffer_object *obj)
276 {
277 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
278 struct brw_context *brw = brw_context(ctx);
279
280 assert(intel_obj);
281 if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) {
282 intel_batchbuffer_flush(brw);
283 }
284 drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
285
286 intel_bufferobj_mark_inactive(intel_obj);
287 }
288
289
290 /**
291 * The MapBufferRange() driver hook.
292 *
293 * This implements both glMapBufferRange() and glMapBuffer().
294 *
295 * The goal of this extension is to allow apps to accumulate their rendering
296 * at the same time as they accumulate their buffer object. Without it,
297 * you'd end up blocking on execution of rendering every time you mapped
298 * the buffer to put new data in.
299 *
300 * We support it in 3 ways: If unsynchronized, then don't bother
301 * flushing the batchbuffer before mapping the buffer, which can save blocking
302 * in many cases. If we would still block, and they allow the whole buffer
303 * to be invalidated, then just allocate a new buffer to replace the old one.
304 * If not, and we'd block, and they allow the subrange of the buffer to be
305 * invalidated, then we can make a new little BO, let them write into that,
306 * and blit it into the real BO at unmap time.
307 */
308 static void *
309 intel_bufferobj_map_range(struct gl_context * ctx,
310 GLintptr offset, GLsizeiptr length,
311 GLbitfield access, struct gl_buffer_object *obj)
312 {
313 struct brw_context *brw = brw_context(ctx);
314 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
315
316 assert(intel_obj);
317
318 /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
319 * internally uses our functions directly.
320 */
321 obj->Offset = offset;
322 obj->Length = length;
323 obj->AccessFlags = access;
324
325 if (intel_obj->buffer == NULL) {
326 obj->Pointer = NULL;
327 return NULL;
328 }
329
330 /* If the access is synchronized (like a normal buffer mapping), then get
331 * things flushed out so the later mapping syncs appropriately through GEM.
332 * If the user doesn't care about existing buffer contents and mapping would
333 * cause us to block, then throw out the old buffer.
334 *
335 * If they set INVALIDATE_BUFFER, we can pitch the current contents to
336 * achieve the required synchronization.
337 */
338 if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
339 if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) {
340 if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
341 drm_intel_bo_unreference(intel_obj->buffer);
342 intel_bufferobj_alloc_buffer(brw, intel_obj);
343 } else {
344 perf_debug("Stalling on the GPU for mapping a busy buffer "
345 "object\n");
346 intel_batchbuffer_flush(brw);
347 }
348 } else if (drm_intel_bo_busy(intel_obj->buffer) &&
349 (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
350 drm_intel_bo_unreference(intel_obj->buffer);
351 intel_bufferobj_alloc_buffer(brw, intel_obj);
352 }
353 }
354
355 /* If the user is mapping a range of an active buffer object but
356 * doesn't require the current contents of that range, make a new
357 * BO, and we'll copy what they put in there out at unmap or
358 * FlushRange time.
359 */
360 if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
361 (access & GL_MAP_INVALIDATE_RANGE_BIT) &&
362 drm_intel_bo_busy(intel_obj->buffer)) {
363 if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
364 intel_obj->range_map_buffer = malloc(length);
365 obj->Pointer = intel_obj->range_map_buffer;
366 } else {
367 intel_obj->range_map_bo = drm_intel_bo_alloc(brw->bufmgr,
368 "range map",
369 length, 64);
370 if (!(access & GL_MAP_READ_BIT)) {
371 drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
372 } else {
373 drm_intel_bo_map(intel_obj->range_map_bo,
374 (access & GL_MAP_WRITE_BIT) != 0);
375 }
376 obj->Pointer = intel_obj->range_map_bo->virtual;
377 }
378 return obj->Pointer;
379 }
380
381 if (access & GL_MAP_UNSYNCHRONIZED_BIT)
382 drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
383 else if (!(access & GL_MAP_READ_BIT)) {
384 drm_intel_gem_bo_map_gtt(intel_obj->buffer);
385 intel_bufferobj_mark_inactive(intel_obj);
386 } else {
387 drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
388 intel_bufferobj_mark_inactive(intel_obj);
389 }
390
391 obj->Pointer = intel_obj->buffer->virtual + offset;
392 return obj->Pointer;
393 }
394
395 /**
396 * The FlushMappedBufferRange() driver hook.
397 *
398 * Implements glFlushMappedBufferRange(), which signifies that modifications
399 * have been made to a range of a mapped buffer, and it should be flushed.
400 *
401 * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
402 *
403 * Ideally we'd use a BO to avoid taking up cache space for the temporary
404 * data, but FlushMappedBufferRange may be followed by further writes to
405 * the pointer, so we would have to re-map after emitting our blit, which
406 * would defeat the point.
407 */
408 static void
409 intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
410 GLintptr offset, GLsizeiptr length,
411 struct gl_buffer_object *obj)
412 {
413 struct brw_context *brw = brw_context(ctx);
414 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
415 drm_intel_bo *temp_bo;
416
417 /* Unless we're in the range map using a temporary system buffer,
418 * there's no work to do.
419 */
420 if (intel_obj->range_map_buffer == NULL)
421 return;
422
423 if (length == 0)
424 return;
425
426 temp_bo = drm_intel_bo_alloc(brw->bufmgr, "range map flush", length, 64);
427
428 drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
429
430 intel_emit_linear_blit(brw,
431 intel_obj->buffer, obj->Offset + offset,
432 temp_bo, 0,
433 length);
434 intel_bufferobj_mark_gpu_usage(intel_obj, obj->Offset + offset, length);
435
436 drm_intel_bo_unreference(temp_bo);
437 }
438
439
440 /**
441 * The UnmapBuffer() driver hook.
442 *
443 * Implements glUnmapBuffer().
444 */
445 static GLboolean
446 intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
447 {
448 struct brw_context *brw = brw_context(ctx);
449 struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
450
451 assert(intel_obj);
452 assert(obj->Pointer);
453 if (intel_obj->range_map_buffer != NULL) {
454 /* Since we've emitted some blits to buffers that will (likely) be used
455 * in rendering operations in other cache domains in this batch, emit a
456 * flush. Once again, we wish for a domain tracker in libdrm to cover
457 * usage inside of a batchbuffer.
458 */
459 intel_batchbuffer_emit_mi_flush(brw);
460 free(intel_obj->range_map_buffer);
461 intel_obj->range_map_buffer = NULL;
462 } else if (intel_obj->range_map_bo != NULL) {
463 drm_intel_bo_unmap(intel_obj->range_map_bo);
464
465 intel_emit_linear_blit(brw,
466 intel_obj->buffer, obj->Offset,
467 intel_obj->range_map_bo, 0,
468 obj->Length);
469 intel_bufferobj_mark_gpu_usage(intel_obj, obj->Offset, obj->Length);
470
471 /* Since we've emitted some blits to buffers that will (likely) be used
472 * in rendering operations in other cache domains in this batch, emit a
473 * flush. Once again, we wish for a domain tracker in libdrm to cover
474 * usage inside of a batchbuffer.
475 */
476 intel_batchbuffer_emit_mi_flush(brw);
477
478 drm_intel_bo_unreference(intel_obj->range_map_bo);
479 intel_obj->range_map_bo = NULL;
480 } else if (intel_obj->buffer != NULL) {
481 drm_intel_bo_unmap(intel_obj->buffer);
482 }
483 obj->Pointer = NULL;
484 obj->Offset = 0;
485 obj->Length = 0;
486
487 return true;
488 }
489
490 /**
491 * Gets a pointer to the object's BO, and marks the given range as being used
492 * on the GPU.
493 *
494 * Anywhere that uses buffer objects in the pipeline should be using this to
495 * mark the range of the buffer that is being accessed by the pipeline.
496 */
497 drm_intel_bo *
498 intel_bufferobj_buffer(struct brw_context *brw,
499 struct intel_buffer_object *intel_obj,
500 uint32_t offset, uint32_t size)
501 {
502 /* This is needed so that things like transform feedback and texture buffer
503 * objects that need a BO but don't want to check that they exist for
504 * draw-time validation can just always get a BO from a GL buffer object.
505 */
506 if (intel_obj->buffer == NULL)
507 intel_bufferobj_alloc_buffer(brw, intel_obj);
508
509 intel_bufferobj_mark_gpu_usage(intel_obj, offset, size);
510
511 return intel_obj->buffer;
512 }
513
514 /**
515 * The CopyBufferSubData() driver hook.
516 *
517 * Implements glCopyBufferSubData(), which copies a portion of one buffer
518 * object's data to another. Independent source and destination offsets
519 * are allowed.
520 */
521 static void
522 intel_bufferobj_copy_subdata(struct gl_context *ctx,
523 struct gl_buffer_object *src,
524 struct gl_buffer_object *dst,
525 GLintptr read_offset, GLintptr write_offset,
526 GLsizeiptr size)
527 {
528 struct brw_context *brw = brw_context(ctx);
529 struct intel_buffer_object *intel_src = intel_buffer_object(src);
530 struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
531 drm_intel_bo *src_bo, *dst_bo;
532
533 if (size == 0)
534 return;
535
536 dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size);
537 src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size);
538
539 intel_emit_linear_blit(brw,
540 dst_bo, write_offset,
541 src_bo, read_offset, size);
542
543 /* Since we've emitted some blits to buffers that will (likely) be used
544 * in rendering operations in other cache domains in this batch, emit a
545 * flush. Once again, we wish for a domain tracker in libdrm to cover
546 * usage inside of a batchbuffer.
547 */
548 intel_batchbuffer_emit_mi_flush(brw);
549 }
550
551 void
552 intelInitBufferObjectFuncs(struct dd_function_table *functions)
553 {
554 functions->NewBufferObject = intel_bufferobj_alloc;
555 functions->DeleteBuffer = intel_bufferobj_free;
556 functions->BufferData = intel_bufferobj_data;
557 functions->BufferSubData = intel_bufferobj_subdata;
558 functions->GetBufferSubData = intel_bufferobj_get_subdata;
559 functions->MapBufferRange = intel_bufferobj_map_range;
560 functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
561 functions->UnmapBuffer = intel_bufferobj_unmap;
562 functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
563 }