2 #include "util/u_inlines.h"
3 #include "util/u_memory.h"
4 #include "util/u_math.h"
5 #include "util/u_surface.h"
7 #include "nouveau_screen.h"
8 #include "nouveau_context.h"
9 #include "nouveau_winsys.h"
10 #include "nouveau_fence.h"
11 #include "nouveau_buffer.h"
12 #include "nouveau_mm.h"
14 #define NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD 192
16 struct nouveau_transfer
{
17 struct pipe_transfer base
;
20 struct nouveau_bo
*bo
;
21 struct nouveau_mm_allocation
*mm
;
25 static inline struct nouveau_transfer
*
26 nouveau_transfer(struct pipe_transfer
*transfer
)
28 return (struct nouveau_transfer
*)transfer
;
32 nouveau_buffer_malloc(struct nv04_resource
*buf
)
35 buf
->data
= align_malloc(buf
->base
.width0
, NOUVEAU_MIN_BUFFER_MAP_ALIGN
);
40 nouveau_buffer_allocate(struct nouveau_screen
*screen
,
41 struct nv04_resource
*buf
, unsigned domain
)
43 uint32_t size
= align(buf
->base
.width0
, 0x100);
45 if (domain
== NOUVEAU_BO_VRAM
) {
46 buf
->mm
= nouveau_mm_allocate(screen
->mm_VRAM
, size
,
47 &buf
->bo
, &buf
->offset
);
49 return nouveau_buffer_allocate(screen
, buf
, NOUVEAU_BO_GART
);
50 NOUVEAU_DRV_STAT(screen
, buf_obj_current_bytes_vid
, buf
->base
.width0
);
52 if (domain
== NOUVEAU_BO_GART
) {
53 buf
->mm
= nouveau_mm_allocate(screen
->mm_GART
, size
,
54 &buf
->bo
, &buf
->offset
);
57 NOUVEAU_DRV_STAT(screen
, buf_obj_current_bytes_sys
, buf
->base
.width0
);
60 if (!nouveau_buffer_malloc(buf
))
65 buf
->address
= buf
->bo
->offset
+ buf
->offset
;
67 util_range_set_empty(&buf
->valid_buffer_range
);
73 release_allocation(struct nouveau_mm_allocation
**mm
,
74 struct nouveau_fence
*fence
)
76 nouveau_fence_work(fence
, nouveau_mm_free_work
, *mm
);
81 nouveau_buffer_release_gpu_storage(struct nv04_resource
*buf
)
83 if (buf
->fence
&& buf
->fence
->state
< NOUVEAU_FENCE_STATE_FLUSHED
) {
84 nouveau_fence_work(buf
->fence
, nouveau_fence_unref_bo
, buf
->bo
);
87 nouveau_bo_ref(NULL
, &buf
->bo
);
91 release_allocation(&buf
->mm
, buf
->fence
);
93 if (buf
->domain
== NOUVEAU_BO_VRAM
)
94 NOUVEAU_DRV_STAT_RES(buf
, buf_obj_current_bytes_vid
, -(uint64_t)buf
->base
.width0
);
95 if (buf
->domain
== NOUVEAU_BO_GART
)
96 NOUVEAU_DRV_STAT_RES(buf
, buf_obj_current_bytes_sys
, -(uint64_t)buf
->base
.width0
);
102 nouveau_buffer_reallocate(struct nouveau_screen
*screen
,
103 struct nv04_resource
*buf
, unsigned domain
)
105 nouveau_buffer_release_gpu_storage(buf
);
107 nouveau_fence_ref(NULL
, &buf
->fence
);
108 nouveau_fence_ref(NULL
, &buf
->fence_wr
);
110 buf
->status
&= NOUVEAU_BUFFER_STATUS_REALLOC_MASK
;
112 return nouveau_buffer_allocate(screen
, buf
, domain
);
116 nouveau_buffer_destroy(struct pipe_screen
*pscreen
,
117 struct pipe_resource
*presource
)
119 struct nv04_resource
*res
= nv04_resource(presource
);
121 nouveau_buffer_release_gpu_storage(res
);
123 if (res
->data
&& !(res
->status
& NOUVEAU_BUFFER_STATUS_USER_MEMORY
))
124 align_free(res
->data
);
126 nouveau_fence_ref(NULL
, &res
->fence
);
127 nouveau_fence_ref(NULL
, &res
->fence_wr
);
129 util_range_destroy(&res
->valid_buffer_range
);
133 NOUVEAU_DRV_STAT(nouveau_screen(pscreen
), buf_obj_current_count
, -1);
136 /* Set up a staging area for the transfer. This is either done in "regular"
137 * system memory if the driver supports push_data (nv50+) and the data is
138 * small enough (and permit_pb == true), or in GART memory.
141 nouveau_transfer_staging(struct nouveau_context
*nv
,
142 struct nouveau_transfer
*tx
, bool permit_pb
)
144 const unsigned adj
= tx
->base
.box
.x
& NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK
;
145 const unsigned size
= align(tx
->base
.box
.width
, 4) + adj
;
150 if ((size
<= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD
) && permit_pb
) {
151 tx
->map
= align_malloc(size
, NOUVEAU_MIN_BUFFER_MAP_ALIGN
);
156 nouveau_mm_allocate(nv
->screen
->mm_GART
, size
, &tx
->bo
, &tx
->offset
);
159 if (!nouveau_bo_map(tx
->bo
, 0, NULL
))
160 tx
->map
= (uint8_t *)tx
->bo
->map
+ tx
->offset
;
166 /* Copies data from the resource into the the transfer's temporary GART
167 * buffer. Also updates buf->data if present.
169 * Maybe just migrate to GART right away if we actually need to do this. */
171 nouveau_transfer_read(struct nouveau_context
*nv
, struct nouveau_transfer
*tx
)
173 struct nv04_resource
*buf
= nv04_resource(tx
->base
.resource
);
174 const unsigned base
= tx
->base
.box
.x
;
175 const unsigned size
= tx
->base
.box
.width
;
177 NOUVEAU_DRV_STAT(nv
->screen
, buf_read_bytes_staging_vid
, size
);
179 nv
->copy_data(nv
, tx
->bo
, tx
->offset
, NOUVEAU_BO_GART
,
180 buf
->bo
, buf
->offset
+ base
, buf
->domain
, size
);
182 if (nouveau_bo_wait(tx
->bo
, NOUVEAU_BO_RD
, nv
->client
))
186 memcpy(buf
->data
+ base
, tx
->map
, size
);
192 nouveau_transfer_write(struct nouveau_context
*nv
, struct nouveau_transfer
*tx
,
193 unsigned offset
, unsigned size
)
195 struct nv04_resource
*buf
= nv04_resource(tx
->base
.resource
);
196 uint8_t *data
= tx
->map
+ offset
;
197 const unsigned base
= tx
->base
.box
.x
+ offset
;
198 const bool can_cb
= !((base
| size
) & 3);
201 memcpy(data
, buf
->data
+ base
, size
);
203 buf
->status
|= NOUVEAU_BUFFER_STATUS_DIRTY
;
205 if (buf
->domain
== NOUVEAU_BO_VRAM
)
206 NOUVEAU_DRV_STAT(nv
->screen
, buf_write_bytes_staging_vid
, size
);
207 if (buf
->domain
== NOUVEAU_BO_GART
)
208 NOUVEAU_DRV_STAT(nv
->screen
, buf_write_bytes_staging_sys
, size
);
211 nv
->copy_data(nv
, buf
->bo
, buf
->offset
+ base
, buf
->domain
,
212 tx
->bo
, tx
->offset
+ offset
, NOUVEAU_BO_GART
, size
);
214 if (nv
->push_cb
&& can_cb
)
216 base
, size
/ 4, (const uint32_t *)data
);
218 nv
->push_data(nv
, buf
->bo
, buf
->offset
+ base
, buf
->domain
, size
, data
);
220 nouveau_fence_ref(nv
->screen
->fence
.current
, &buf
->fence
);
221 nouveau_fence_ref(nv
->screen
->fence
.current
, &buf
->fence_wr
);
224 /* Does a CPU wait for the buffer's backing data to become reliably accessible
225 * for write/read by waiting on the buffer's relevant fences.
228 nouveau_buffer_sync(struct nouveau_context
*nv
,
229 struct nv04_resource
*buf
, unsigned rw
)
231 if (rw
== PIPE_TRANSFER_READ
) {
234 NOUVEAU_DRV_STAT_RES(buf
, buf_non_kernel_fence_sync_count
,
235 !nouveau_fence_signalled(buf
->fence_wr
));
236 if (!nouveau_fence_wait(buf
->fence_wr
, &nv
->debug
))
241 NOUVEAU_DRV_STAT_RES(buf
, buf_non_kernel_fence_sync_count
,
242 !nouveau_fence_signalled(buf
->fence
));
243 if (!nouveau_fence_wait(buf
->fence
, &nv
->debug
))
246 nouveau_fence_ref(NULL
, &buf
->fence
);
248 nouveau_fence_ref(NULL
, &buf
->fence_wr
);
254 nouveau_buffer_busy(struct nv04_resource
*buf
, unsigned rw
)
256 if (rw
== PIPE_TRANSFER_READ
)
257 return (buf
->fence_wr
&& !nouveau_fence_signalled(buf
->fence_wr
));
259 return (buf
->fence
&& !nouveau_fence_signalled(buf
->fence
));
263 nouveau_buffer_transfer_init(struct nouveau_transfer
*tx
,
264 struct pipe_resource
*resource
,
265 const struct pipe_box
*box
,
268 tx
->base
.resource
= resource
;
270 tx
->base
.usage
= usage
;
271 tx
->base
.box
.x
= box
->x
;
274 tx
->base
.box
.width
= box
->width
;
275 tx
->base
.box
.height
= 1;
276 tx
->base
.box
.depth
= 1;
278 tx
->base
.layer_stride
= 0;
285 nouveau_buffer_transfer_del(struct nouveau_context
*nv
,
286 struct nouveau_transfer
*tx
)
289 if (likely(tx
->bo
)) {
290 nouveau_fence_work(nv
->screen
->fence
.current
,
291 nouveau_fence_unref_bo
, tx
->bo
);
293 release_allocation(&tx
->mm
, nv
->screen
->fence
.current
);
296 (tx
->base
.box
.x
& NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK
));
301 /* Creates a cache in system memory of the buffer data. */
303 nouveau_buffer_cache(struct nouveau_context
*nv
, struct nv04_resource
*buf
)
305 struct nouveau_transfer tx
;
307 tx
.base
.resource
= &buf
->base
;
309 tx
.base
.box
.width
= buf
->base
.width0
;
314 if (!nouveau_buffer_malloc(buf
))
316 if (!(buf
->status
& NOUVEAU_BUFFER_STATUS_DIRTY
))
318 nv
->stats
.buf_cache_count
++;
320 if (!nouveau_transfer_staging(nv
, &tx
, false))
323 ret
= nouveau_transfer_read(nv
, &tx
);
325 buf
->status
&= ~NOUVEAU_BUFFER_STATUS_DIRTY
;
326 memcpy(buf
->data
, tx
.map
, buf
->base
.width0
);
328 nouveau_buffer_transfer_del(nv
, &tx
);
333 #define NOUVEAU_TRANSFER_DISCARD \
334 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
336 /* Checks whether it is possible to completely discard the memory backing this
337 * resource. This can be useful if we would otherwise have to wait for a read
338 * operation to complete on this data.
341 nouveau_buffer_should_discard(struct nv04_resource
*buf
, unsigned usage
)
343 if (!(usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
))
345 if (unlikely(buf
->base
.bind
& PIPE_BIND_SHARED
))
347 if (unlikely(usage
& PIPE_TRANSFER_PERSISTENT
))
349 return buf
->mm
&& nouveau_buffer_busy(buf
, PIPE_TRANSFER_WRITE
);
352 /* Returns a pointer to a memory area representing a window into the
355 * This may or may not be the _actual_ memory area of the resource. However
356 * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
357 * area, the contents of the returned map are copied over to the resource.
359 * The usage indicates what the caller plans to do with the map:
361 * WRITE means that the user plans to write to it
363 * READ means that the user plans on reading from it
365 * DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
366 * potentially overwritten, and even if it isn't, the bits that aren't don't
367 * need to be maintained.
369 * DISCARD_RANGE means that all the data in the specified range is going to
372 * The strategy for determining what kind of memory area to return is complex,
373 * see comments inside of the function.
376 nouveau_buffer_transfer_map(struct pipe_context
*pipe
,
377 struct pipe_resource
*resource
,
378 unsigned level
, unsigned usage
,
379 const struct pipe_box
*box
,
380 struct pipe_transfer
**ptransfer
)
382 struct nouveau_context
*nv
= nouveau_context(pipe
);
383 struct nv04_resource
*buf
= nv04_resource(resource
);
384 struct nouveau_transfer
*tx
= MALLOC_STRUCT(nouveau_transfer
);
390 nouveau_buffer_transfer_init(tx
, resource
, box
, usage
);
391 *ptransfer
= &tx
->base
;
393 if (usage
& PIPE_TRANSFER_READ
)
394 NOUVEAU_DRV_STAT(nv
->screen
, buf_transfers_rd
, 1);
395 if (usage
& PIPE_TRANSFER_WRITE
)
396 NOUVEAU_DRV_STAT(nv
->screen
, buf_transfers_wr
, 1);
398 /* If we are trying to write to an uninitialized range, the user shouldn't
399 * care what was there before. So we can treat the write as if the target
400 * range were being discarded. Furthermore, since we know that even if this
401 * buffer is busy due to GPU activity, because the contents were
402 * uninitialized, the GPU can't care what was there, and so we can treat
403 * the write as being unsynchronized.
405 if ((usage
& PIPE_TRANSFER_WRITE
) &&
406 !util_ranges_intersect(&buf
->valid_buffer_range
, box
->x
, box
->x
+ box
->width
))
407 usage
|= PIPE_TRANSFER_DISCARD_RANGE
| PIPE_TRANSFER_UNSYNCHRONIZED
;
409 if (usage
& PIPE_TRANSFER_PERSISTENT
)
410 usage
|= PIPE_TRANSFER_UNSYNCHRONIZED
;
412 if (buf
->domain
== NOUVEAU_BO_VRAM
) {
413 if (usage
& NOUVEAU_TRANSFER_DISCARD
) {
414 /* Set up a staging area for the user to write to. It will be copied
415 * back into VRAM on unmap. */
416 if (usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
)
417 buf
->status
&= NOUVEAU_BUFFER_STATUS_REALLOC_MASK
;
418 nouveau_transfer_staging(nv
, tx
, true);
420 if (buf
->status
& NOUVEAU_BUFFER_STATUS_GPU_WRITING
) {
421 /* The GPU is currently writing to this buffer. Copy its current
422 * contents to a staging area in the GART. This is necessary since
423 * not the whole area being mapped is being discarded.
426 align_free(buf
->data
);
429 nouveau_transfer_staging(nv
, tx
, false);
430 nouveau_transfer_read(nv
, tx
);
432 /* The buffer is currently idle. Create a staging area for writes,
433 * and make sure that the cached data is up-to-date. */
434 if (usage
& PIPE_TRANSFER_WRITE
)
435 nouveau_transfer_staging(nv
, tx
, true);
437 nouveau_buffer_cache(nv
, buf
);
440 return buf
->data
? (buf
->data
+ box
->x
) : tx
->map
;
442 if (unlikely(buf
->domain
== 0)) {
443 return buf
->data
+ box
->x
;
446 /* At this point, buf->domain == GART */
448 if (nouveau_buffer_should_discard(buf
, usage
)) {
449 int ref
= buf
->base
.reference
.count
- 1;
450 nouveau_buffer_reallocate(nv
->screen
, buf
, buf
->domain
);
451 if (ref
> 0) /* any references inside context possible ? */
452 nv
->invalidate_resource_storage(nv
, &buf
->base
, ref
);
455 /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
456 * relevant flags. If buf->mm is set, that means this resource is part of a
457 * larger slab bo that holds multiple resources. So in that case, don't
458 * wait on the whole slab and instead use the logic below to return a
459 * reasonable buffer for that case.
461 ret
= nouveau_bo_map(buf
->bo
,
462 buf
->mm
? 0 : nouveau_screen_transfer_flags(usage
),
468 map
= (uint8_t *)buf
->bo
->map
+ buf
->offset
+ box
->x
;
470 /* using kernel fences only if !buf->mm */
471 if ((usage
& PIPE_TRANSFER_UNSYNCHRONIZED
) || !buf
->mm
)
474 /* If the GPU is currently reading/writing this buffer, we shouldn't
475 * interfere with its progress. So instead we either wait for the GPU to
476 * complete its operation, or set up a staging area to perform our work in.
478 if (nouveau_buffer_busy(buf
, usage
& PIPE_TRANSFER_READ_WRITE
)) {
479 if (unlikely(usage
& PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE
)) {
480 /* Discarding was not possible, must sync because
481 * subsequent transfers might use UNSYNCHRONIZED. */
482 nouveau_buffer_sync(nv
, buf
, usage
& PIPE_TRANSFER_READ_WRITE
);
484 if (usage
& PIPE_TRANSFER_DISCARD_RANGE
) {
485 /* The whole range is being discarded, so it doesn't matter what was
486 * there before. No need to copy anything over. */
487 nouveau_transfer_staging(nv
, tx
, true);
490 if (nouveau_buffer_busy(buf
, PIPE_TRANSFER_READ
)) {
491 if (usage
& PIPE_TRANSFER_DONTBLOCK
)
494 nouveau_buffer_sync(nv
, buf
, usage
& PIPE_TRANSFER_READ_WRITE
);
496 /* It is expected that the returned buffer be a representation of the
497 * data in question, so we must copy it over from the buffer. */
498 nouveau_transfer_staging(nv
, tx
, true);
500 memcpy(tx
->map
, map
, box
->width
);
512 nouveau_buffer_transfer_flush_region(struct pipe_context
*pipe
,
513 struct pipe_transfer
*transfer
,
514 const struct pipe_box
*box
)
516 struct nouveau_transfer
*tx
= nouveau_transfer(transfer
);
517 struct nv04_resource
*buf
= nv04_resource(transfer
->resource
);
520 nouveau_transfer_write(nouveau_context(pipe
), tx
, box
->x
, box
->width
);
522 util_range_add(&buf
->valid_buffer_range
,
523 tx
->base
.box
.x
+ box
->x
,
524 tx
->base
.box
.x
+ box
->x
+ box
->width
);
527 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
528 * was returned was not the real resource's data, this needs to transfer the
529 * data back to the resource.
531 * Also marks vbo dirty based on the buffer's binding
534 nouveau_buffer_transfer_unmap(struct pipe_context
*pipe
,
535 struct pipe_transfer
*transfer
)
537 struct nouveau_context
*nv
= nouveau_context(pipe
);
538 struct nouveau_transfer
*tx
= nouveau_transfer(transfer
);
539 struct nv04_resource
*buf
= nv04_resource(transfer
->resource
);
541 if (tx
->base
.usage
& PIPE_TRANSFER_WRITE
) {
542 if (!(tx
->base
.usage
& PIPE_TRANSFER_FLUSH_EXPLICIT
)) {
544 nouveau_transfer_write(nv
, tx
, 0, tx
->base
.box
.width
);
546 util_range_add(&buf
->valid_buffer_range
,
547 tx
->base
.box
.x
, tx
->base
.box
.x
+ tx
->base
.box
.width
);
550 if (likely(buf
->domain
)) {
551 const uint8_t bind
= buf
->base
.bind
;
552 /* make sure we invalidate dedicated caches */
553 if (bind
& (PIPE_BIND_VERTEX_BUFFER
| PIPE_BIND_INDEX_BUFFER
))
554 nv
->vbo_dirty
= true;
558 if (!tx
->bo
&& (tx
->base
.usage
& PIPE_TRANSFER_WRITE
))
559 NOUVEAU_DRV_STAT(nv
->screen
, buf_write_bytes_direct
, tx
->base
.box
.width
);
561 nouveau_buffer_transfer_del(nv
, tx
);
567 nouveau_copy_buffer(struct nouveau_context
*nv
,
568 struct nv04_resource
*dst
, unsigned dstx
,
569 struct nv04_resource
*src
, unsigned srcx
, unsigned size
)
571 assert(dst
->base
.target
== PIPE_BUFFER
&& src
->base
.target
== PIPE_BUFFER
);
573 if (likely(dst
->domain
) && likely(src
->domain
)) {
575 dst
->bo
, dst
->offset
+ dstx
, dst
->domain
,
576 src
->bo
, src
->offset
+ srcx
, src
->domain
, size
);
578 dst
->status
|= NOUVEAU_BUFFER_STATUS_GPU_WRITING
;
579 nouveau_fence_ref(nv
->screen
->fence
.current
, &dst
->fence
);
580 nouveau_fence_ref(nv
->screen
->fence
.current
, &dst
->fence_wr
);
582 src
->status
|= NOUVEAU_BUFFER_STATUS_GPU_READING
;
583 nouveau_fence_ref(nv
->screen
->fence
.current
, &src
->fence
);
585 struct pipe_box src_box
;
589 src_box
.width
= size
;
592 util_resource_copy_region(&nv
->pipe
,
593 &dst
->base
, 0, dstx
, 0, 0,
594 &src
->base
, 0, &src_box
);
597 util_range_add(&dst
->valid_buffer_range
, dstx
, dstx
+ size
);
602 nouveau_resource_map_offset(struct nouveau_context
*nv
,
603 struct nv04_resource
*res
, uint32_t offset
,
606 if (unlikely(res
->status
& NOUVEAU_BUFFER_STATUS_USER_MEMORY
))
607 return res
->data
+ offset
;
609 if (res
->domain
== NOUVEAU_BO_VRAM
) {
610 if (!res
->data
|| (res
->status
& NOUVEAU_BUFFER_STATUS_GPU_WRITING
))
611 nouveau_buffer_cache(nv
, res
);
613 if (res
->domain
!= NOUVEAU_BO_GART
)
614 return res
->data
+ offset
;
618 rw
= (flags
& NOUVEAU_BO_WR
) ? PIPE_TRANSFER_WRITE
: PIPE_TRANSFER_READ
;
619 nouveau_buffer_sync(nv
, res
, rw
);
620 if (nouveau_bo_map(res
->bo
, 0, NULL
))
623 if (nouveau_bo_map(res
->bo
, flags
, nv
->client
))
626 return (uint8_t *)res
->bo
->map
+ res
->offset
+ offset
;
630 const struct u_resource_vtbl nouveau_buffer_vtbl
=
632 u_default_resource_get_handle
, /* get_handle */
633 nouveau_buffer_destroy
, /* resource_destroy */
634 nouveau_buffer_transfer_map
, /* transfer_map */
635 nouveau_buffer_transfer_flush_region
, /* transfer_flush_region */
636 nouveau_buffer_transfer_unmap
, /* transfer_unmap */
637 u_default_transfer_inline_write
/* transfer_inline_write */
640 struct pipe_resource
*
641 nouveau_buffer_create(struct pipe_screen
*pscreen
,
642 const struct pipe_resource
*templ
)
644 struct nouveau_screen
*screen
= nouveau_screen(pscreen
);
645 struct nv04_resource
*buffer
;
648 buffer
= CALLOC_STRUCT(nv04_resource
);
652 buffer
->base
= *templ
;
653 buffer
->vtbl
= &nouveau_buffer_vtbl
;
654 pipe_reference_init(&buffer
->base
.reference
, 1);
655 buffer
->base
.screen
= pscreen
;
657 if (buffer
->base
.flags
& (PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
658 PIPE_RESOURCE_FLAG_MAP_COHERENT
)) {
659 buffer
->domain
= NOUVEAU_BO_GART
;
660 } else if (buffer
->base
.bind
== 0 || (buffer
->base
.bind
&
661 (screen
->vidmem_bindings
& screen
->sysmem_bindings
))) {
662 switch (buffer
->base
.usage
) {
663 case PIPE_USAGE_DEFAULT
:
664 case PIPE_USAGE_IMMUTABLE
:
665 buffer
->domain
= NV_VRAM_DOMAIN(screen
);
667 case PIPE_USAGE_DYNAMIC
:
668 /* For most apps, we'd have to do staging transfers to avoid sync
669 * with this usage, and GART -> GART copies would be suboptimal.
671 buffer
->domain
= NV_VRAM_DOMAIN(screen
);
673 case PIPE_USAGE_STAGING
:
674 case PIPE_USAGE_STREAM
:
675 buffer
->domain
= NOUVEAU_BO_GART
;
682 if (buffer
->base
.bind
& screen
->vidmem_bindings
)
683 buffer
->domain
= NV_VRAM_DOMAIN(screen
);
685 if (buffer
->base
.bind
& screen
->sysmem_bindings
)
686 buffer
->domain
= NOUVEAU_BO_GART
;
688 /* There can be very special situations where we want non-gpu-mapped
689 * buffers, but never through this interface.
691 assert(buffer
->domain
);
692 ret
= nouveau_buffer_allocate(screen
, buffer
, buffer
->domain
);
697 if (buffer
->domain
== NOUVEAU_BO_VRAM
&& screen
->hint_buf_keep_sysmem_copy
)
698 nouveau_buffer_cache(NULL
, buffer
);
700 NOUVEAU_DRV_STAT(screen
, buf_obj_current_count
, 1);
702 util_range_init(&buffer
->valid_buffer_range
);
704 return &buffer
->base
;
712 struct pipe_resource
*
713 nouveau_user_buffer_create(struct pipe_screen
*pscreen
, void *ptr
,
714 unsigned bytes
, unsigned bind
)
716 struct nv04_resource
*buffer
;
718 buffer
= CALLOC_STRUCT(nv04_resource
);
722 pipe_reference_init(&buffer
->base
.reference
, 1);
723 buffer
->vtbl
= &nouveau_buffer_vtbl
;
724 buffer
->base
.screen
= pscreen
;
725 buffer
->base
.format
= PIPE_FORMAT_R8_UNORM
;
726 buffer
->base
.usage
= PIPE_USAGE_IMMUTABLE
;
727 buffer
->base
.bind
= bind
;
728 buffer
->base
.width0
= bytes
;
729 buffer
->base
.height0
= 1;
730 buffer
->base
.depth0
= 1;
733 buffer
->status
= NOUVEAU_BUFFER_STATUS_USER_MEMORY
;
735 util_range_init(&buffer
->valid_buffer_range
);
736 util_range_add(&buffer
->valid_buffer_range
, 0, bytes
);
738 return &buffer
->base
;
742 nouveau_buffer_data_fetch(struct nouveau_context
*nv
, struct nv04_resource
*buf
,
743 struct nouveau_bo
*bo
, unsigned offset
, unsigned size
)
745 if (!nouveau_buffer_malloc(buf
))
747 if (nouveau_bo_map(bo
, NOUVEAU_BO_RD
, nv
->client
))
749 memcpy(buf
->data
, (uint8_t *)bo
->map
+ offset
, size
);
753 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
755 nouveau_buffer_migrate(struct nouveau_context
*nv
,
756 struct nv04_resource
*buf
, const unsigned new_domain
)
758 struct nouveau_screen
*screen
= nv
->screen
;
759 struct nouveau_bo
*bo
;
760 const unsigned old_domain
= buf
->domain
;
761 unsigned size
= buf
->base
.width0
;
765 assert(new_domain
!= old_domain
);
767 if (new_domain
== NOUVEAU_BO_GART
&& old_domain
== 0) {
768 if (!nouveau_buffer_allocate(screen
, buf
, new_domain
))
770 ret
= nouveau_bo_map(buf
->bo
, 0, nv
->client
);
773 memcpy((uint8_t *)buf
->bo
->map
+ buf
->offset
, buf
->data
, size
);
774 align_free(buf
->data
);
776 if (old_domain
!= 0 && new_domain
!= 0) {
777 struct nouveau_mm_allocation
*mm
= buf
->mm
;
779 if (new_domain
== NOUVEAU_BO_VRAM
) {
780 /* keep a system memory copy of our data in case we hit a fallback */
781 if (!nouveau_buffer_data_fetch(nv
, buf
, buf
->bo
, buf
->offset
, size
))
783 if (nouveau_mesa_debug
)
784 debug_printf("migrating %u KiB to VRAM\n", size
/ 1024);
787 offset
= buf
->offset
;
791 nouveau_buffer_allocate(screen
, buf
, new_domain
);
793 nv
->copy_data(nv
, buf
->bo
, buf
->offset
, new_domain
,
794 bo
, offset
, old_domain
, buf
->base
.width0
);
796 nouveau_fence_work(screen
->fence
.current
, nouveau_fence_unref_bo
, bo
);
798 release_allocation(&mm
, screen
->fence
.current
);
800 if (new_domain
== NOUVEAU_BO_VRAM
&& old_domain
== 0) {
801 struct nouveau_transfer tx
;
802 if (!nouveau_buffer_allocate(screen
, buf
, NOUVEAU_BO_VRAM
))
804 tx
.base
.resource
= &buf
->base
;
806 tx
.base
.box
.width
= buf
->base
.width0
;
809 if (!nouveau_transfer_staging(nv
, &tx
, false))
811 nouveau_transfer_write(nv
, &tx
, 0, tx
.base
.box
.width
);
812 nouveau_buffer_transfer_del(nv
, &tx
);
816 assert(buf
->domain
== new_domain
);
820 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
821 * We'd like to only allocate @size bytes here, but then we'd have to rebase
822 * the vertex indices ...
825 nouveau_user_buffer_upload(struct nouveau_context
*nv
,
826 struct nv04_resource
*buf
,
827 unsigned base
, unsigned size
)
829 struct nouveau_screen
*screen
= nouveau_screen(buf
->base
.screen
);
832 assert(buf
->status
& NOUVEAU_BUFFER_STATUS_USER_MEMORY
);
834 buf
->base
.width0
= base
+ size
;
835 if (!nouveau_buffer_reallocate(screen
, buf
, NOUVEAU_BO_GART
))
838 ret
= nouveau_bo_map(buf
->bo
, 0, nv
->client
);
841 memcpy((uint8_t *)buf
->bo
->map
+ buf
->offset
+ base
, buf
->data
+ base
, size
);
846 /* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
850 nouveau_buffer_invalidate(struct pipe_context
*pipe
,
851 struct pipe_resource
*resource
)
853 struct nouveau_context
*nv
= nouveau_context(pipe
);
854 struct nv04_resource
*buf
= nv04_resource(resource
);
855 int ref
= buf
->base
.reference
.count
- 1;
857 /* Shared buffers shouldn't get reallocated */
858 if (unlikely(buf
->base
.bind
& PIPE_BIND_SHARED
))
861 /* We can't touch persistent/coherent buffers */
862 if (buf
->base
.flags
& (PIPE_RESOURCE_FLAG_MAP_PERSISTENT
|
863 PIPE_RESOURCE_FLAG_MAP_COHERENT
))
866 /* If the buffer is sub-allocated and not currently being written, just
867 * wipe the valid buffer range. Otherwise we have to create fresh
868 * storage. (We don't keep track of fences for non-sub-allocated BO's.)
870 if (buf
->mm
&& !nouveau_buffer_busy(buf
, PIPE_TRANSFER_WRITE
)) {
871 util_range_set_empty(&buf
->valid_buffer_range
);
873 nouveau_buffer_reallocate(nv
->screen
, buf
, buf
->domain
);
874 if (ref
> 0) /* any references inside context possible ? */
875 nv
->invalidate_resource_storage(nv
, &buf
->base
, ref
);
880 /* Scratch data allocation. */
883 nouveau_scratch_bo_alloc(struct nouveau_context
*nv
, struct nouveau_bo
**pbo
,
886 return nouveau_bo_new(nv
->screen
->device
, NOUVEAU_BO_GART
| NOUVEAU_BO_MAP
,
887 4096, size
, NULL
, pbo
);
891 nouveau_scratch_unref_bos(void *d
)
893 struct runout
*b
= d
;
896 for (i
= 0; i
< b
->nr
; ++i
)
897 nouveau_bo_ref(NULL
, &b
->bo
[i
]);
903 nouveau_scratch_runout_release(struct nouveau_context
*nv
)
905 if (!nv
->scratch
.runout
)
908 if (!nouveau_fence_work(nv
->screen
->fence
.current
, nouveau_scratch_unref_bos
,
913 nv
->scratch
.runout
= NULL
;
916 /* Allocate an extra bo if we can't fit everything we need simultaneously.
917 * (Could happen for very large user arrays.)
920 nouveau_scratch_runout(struct nouveau_context
*nv
, unsigned size
)
925 if (nv
->scratch
.runout
)
926 n
= nv
->scratch
.runout
->nr
;
929 nv
->scratch
.runout
= REALLOC(nv
->scratch
.runout
, n
== 0 ? 0 :
930 (sizeof(*nv
->scratch
.runout
) + (n
+ 0) * sizeof(void *)),
931 sizeof(*nv
->scratch
.runout
) + (n
+ 1) * sizeof(void *));
932 nv
->scratch
.runout
->nr
= n
+ 1;
933 nv
->scratch
.runout
->bo
[n
] = NULL
;
935 ret
= nouveau_scratch_bo_alloc(nv
, &nv
->scratch
.runout
->bo
[n
], size
);
937 ret
= nouveau_bo_map(nv
->scratch
.runout
->bo
[n
], 0, NULL
);
939 nouveau_bo_ref(NULL
, &nv
->scratch
.runout
->bo
[--nv
->scratch
.runout
->nr
]);
942 nv
->scratch
.current
= nv
->scratch
.runout
->bo
[n
];
943 nv
->scratch
.offset
= 0;
944 nv
->scratch
.end
= size
;
945 nv
->scratch
.map
= nv
->scratch
.current
->map
;
950 /* Continue to next scratch buffer, if available (no wrapping, large enough).
951 * Allocate it if it has not yet been created.
954 nouveau_scratch_next(struct nouveau_context
*nv
, unsigned size
)
956 struct nouveau_bo
*bo
;
958 const unsigned i
= (nv
->scratch
.id
+ 1) % NOUVEAU_MAX_SCRATCH_BUFS
;
960 if ((size
> nv
->scratch
.bo_size
) || (i
== nv
->scratch
.wrap
))
964 bo
= nv
->scratch
.bo
[i
];
966 ret
= nouveau_scratch_bo_alloc(nv
, &bo
, nv
->scratch
.bo_size
);
969 nv
->scratch
.bo
[i
] = bo
;
971 nv
->scratch
.current
= bo
;
972 nv
->scratch
.offset
= 0;
973 nv
->scratch
.end
= nv
->scratch
.bo_size
;
975 ret
= nouveau_bo_map(bo
, NOUVEAU_BO_WR
, nv
->client
);
977 nv
->scratch
.map
= bo
->map
;
982 nouveau_scratch_more(struct nouveau_context
*nv
, unsigned min_size
)
986 ret
= nouveau_scratch_next(nv
, min_size
);
988 ret
= nouveau_scratch_runout(nv
, min_size
);
993 /* Copy data to a scratch buffer and return address & bo the data resides in. */
995 nouveau_scratch_data(struct nouveau_context
*nv
,
996 const void *data
, unsigned base
, unsigned size
,
997 struct nouveau_bo
**bo
)
999 unsigned bgn
= MAX2(base
, nv
->scratch
.offset
);
1000 unsigned end
= bgn
+ size
;
1002 if (end
>= nv
->scratch
.end
) {
1004 if (!nouveau_scratch_more(nv
, end
))
1008 nv
->scratch
.offset
= align(end
, 4);
1010 memcpy(nv
->scratch
.map
+ bgn
, (const uint8_t *)data
+ base
, size
);
1012 *bo
= nv
->scratch
.current
;
1013 return (*bo
)->offset
+ (bgn
- base
);
1017 nouveau_scratch_get(struct nouveau_context
*nv
,
1018 unsigned size
, uint64_t *gpu_addr
, struct nouveau_bo
**pbo
)
1020 unsigned bgn
= nv
->scratch
.offset
;
1021 unsigned end
= nv
->scratch
.offset
+ size
;
1023 if (end
>= nv
->scratch
.end
) {
1025 if (!nouveau_scratch_more(nv
, end
))
1029 nv
->scratch
.offset
= align(end
, 4);
1031 *pbo
= nv
->scratch
.current
;
1032 *gpu_addr
= nv
->scratch
.current
->offset
+ bgn
;
1033 return nv
->scratch
.map
+ bgn
;