struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_winsys *ws = bo->ws;
int64_t abs_timeout;
- int i;
if (timeout == 0) {
if (p_atomic_read(&bo->num_active_ioctls))
if (timeout == 0) {
pipe_mutex_lock(ws->bo_fence_lock);
- for (i = 0; i < RING_LAST; i++)
- if (bo->fence[i]) {
- if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
- /* Release the idle fence to avoid checking it again later. */
- amdgpu_fence_reference(&bo->fence[i], NULL);
- } else {
- pipe_mutex_unlock(ws->bo_fence_lock);
- return false;
- }
+ if (bo->fence) {
+ if (amdgpu_fence_wait(bo->fence, 0, false)) {
+ /* Release the idle fence to avoid checking it again later. */
+ amdgpu_fence_reference(&bo->fence, NULL);
+ } else {
+ pipe_mutex_unlock(ws->bo_fence_lock);
+ return false;
}
+ }
pipe_mutex_unlock(ws->bo_fence_lock);
return true;
} else {
- struct pipe_fence_handle *fence[RING_LAST] = {};
- bool fence_idle[RING_LAST] = {};
+ struct pipe_fence_handle *fence = NULL;
+ bool fence_idle = false;
bool buffer_idle = true;
- /* Take references to all fences, so that we can wait for them
+ /* Take a reference to the fences, so that we can wait for it
* without the lock. */
pipe_mutex_lock(ws->bo_fence_lock);
- for (i = 0; i < RING_LAST; i++)
- amdgpu_fence_reference(&fence[i], bo->fence[i]);
+ amdgpu_fence_reference(&fence, bo->fence);
pipe_mutex_unlock(ws->bo_fence_lock);
- /* Now wait for the fences. */
- for (i = 0; i < RING_LAST; i++) {
- if (fence[i]) {
- if (amdgpu_fence_wait(fence[i], abs_timeout, true))
- fence_idle[i] = true;
- else
- buffer_idle = false;
- }
+ /* Now wait for the fence. */
+ if (fence) {
+ if (amdgpu_fence_wait(fence, abs_timeout, true))
+ fence_idle = true;
+ else
+ buffer_idle = false;
}
/* Release idle fences to avoid checking them again later. */
pipe_mutex_lock(ws->bo_fence_lock);
- for (i = 0; i < RING_LAST; i++) {
- if (fence[i] == bo->fence[i] && fence_idle[i])
- amdgpu_fence_reference(&bo->fence[i], NULL);
-
- amdgpu_fence_reference(&fence[i], NULL);
- }
+ if (fence == bo->fence && fence_idle)
+ amdgpu_fence_reference(&bo->fence, NULL);
+ amdgpu_fence_reference(&fence, NULL);
pipe_mutex_unlock(ws->bo_fence_lock);
return buffer_idle;
void amdgpu_bo_destroy(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
- int i;
pipe_mutex_lock(bo->ws->global_bo_list_lock);
LIST_DEL(&bo->global_list_item);
amdgpu_va_range_free(bo->va_handle);
amdgpu_bo_free(bo->bo);
- for (i = 0; i < RING_LAST; i++)
- amdgpu_fence_reference(&bo->fence[i], NULL);
+ amdgpu_fence_reference(&bo->fence, NULL);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size);
+
+ if (bo->map_count >= 1) {
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->ws->mapped_vram -= bo->base.size;
+ else
+ bo->ws->mapped_gtt -= bo->base.size;
+ }
+
FREE(bo);
}
/* Clear the cache and try again. */
pb_cache_release_all_buffers(&bo->ws->bo_cache);
r = amdgpu_bo_cpu_map(bo->bo, &cpu);
+ if (r)
+ return NULL;
+ }
+
+ if (p_atomic_inc_return(&bo->map_count) == 1) {
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->ws->mapped_vram += bo->base.size;
+ else
+ bo->ws->mapped_gtt += bo->base.size;
}
- return r ? NULL : cpu;
+ return cpu;
}
static void amdgpu_bo_unmap(struct pb_buffer *buf)
{
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+ if (bo->user_ptr)
+ return;
+
+ if (p_atomic_dec_zero(&bo->map_count)) {
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->ws->mapped_vram -= bo->base.size;
+ else
+ bo->ws->mapped_gtt -= bo->base.size;
+ }
+
amdgpu_bo_cpu_unmap(bo->bo);
}
unsigned alignment,
unsigned usage,
enum radeon_bo_domain initial_domain,
- unsigned flags)
+ unsigned flags,
+ unsigned pb_cache_bucket)
{
struct amdgpu_bo_alloc_request request = {0};
amdgpu_bo_handle buf_handle;
uint64_t va = 0;
struct amdgpu_winsys_bo *bo;
amdgpu_va_handle va_handle;
+ unsigned va_gap_size;
int r;
assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
return NULL;
}
- pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base);
+ pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base,
+ pb_cache_bucket);
request.alloc_size = size;
request.phys_alignment = alignment;
goto error_bo_alloc;
}
+ va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- size, alignment, 0, &va, &va_handle, 0);
+ size + va_gap_size, alignment, 0, &va, &va_handle, 0);
if (r)
goto error_va_alloc;
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo;
- unsigned usage = 0;
+ unsigned usage = 0, pb_cache_bucket;
/* Align size to page size. This is the minimum alignment for normal
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
assert(flags < sizeof(usage) * 8 - 3);
usage |= 1 << (flags + 3);
+ /* Determine the pb_cache bucket for minimizing pb_cache misses. */
+ pb_cache_bucket = 0;
+ if (size <= 4096) /* small buffers */
+ pb_cache_bucket += 1;
+ if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */
+ pb_cache_bucket += 2;
+ if (flags == RADEON_FLAG_GTT_WC) /* WC */
+ pb_cache_bucket += 4;
+ assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+
/* Get a buffer from the cache. */
bo = (struct amdgpu_winsys_bo*)
- pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage);
+ pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
+ pb_cache_bucket);
if (bo)
return &bo->base;
/* Create a new one. */
- bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
+ bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
+ pb_cache_bucket);
if (!bo) {
/* Clear the cache and try again. */
pb_cache_release_all_buffers(&ws->bo_cache);
- bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
+ bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
+ pb_cache_bucket);
if (!bo)
return NULL;
}
return NULL;
}
-static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
- unsigned stride, unsigned offset,
- unsigned slice_size,
- struct winsys_handle *whandle)
+static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
+ unsigned stride, unsigned offset,
+ unsigned slice_size,
+ struct winsys_handle *whandle)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
enum amdgpu_bo_handle_type type;
type = amdgpu_bo_handle_type_kms;
break;
default:
- return FALSE;
+ return false;
}
r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
if (r)
- return FALSE;
+ return false;
whandle->stride = stride;
whandle->offset = offset;
whandle->offset += slice_size * whandle->layer;
bo->is_shared = true;
- return TRUE;
+ return true;
}
static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,