static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
uint64_t size,
unsigned alignment,
- unsigned usage,
enum radeon_bo_domain initial_domain,
unsigned flags,
- unsigned pb_cache_bucket)
+ int heap)
{
struct amdgpu_bo_alloc_request request = {0};
amdgpu_bo_handle buf_handle;
unsigned va_gap_size;
int r;
- assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
+ /* VRAM or GTT must be specified, but not both at the same time. */
+ assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);
+
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
if (!bo) {
return NULL;
}
- pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
- pb_cache_bucket);
+ if (heap >= 0) {
+ pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base,
+ heap);
+ }
request.alloc_size = size;
request.phys_alignment = alignment;
if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+ /* If VRAM is just stolen system memory, allow both VRAM and
+ * GTT, whichever has free space. If a buffer is evicted from
+ * VRAM to GTT, it will stay there.
+ *
+ * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
+ * placements even with a low amount of stolen VRAM.
+ */
+ if (!ws->info.has_dedicated_vram && ws->info.drm_minor < 6)
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ /* TODO: Enable this once the kernel handles it efficiently. */
+ /*if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
ws->info.drm_minor >= 20)
- request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
+ request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;*/
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
if (r)
goto error_va_alloc;
- r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
+ unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE;
+
+ if (!(flags & RADEON_FLAG_READ_ONLY))
+ vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+
+ r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
+ AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
pipe_reference_init(&bo->base.reference, 1);
bo->base.alignment = alignment;
- bo->base.usage = usage;
+ bo->base.usage = 0;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo;
- unsigned usage = 0, pb_cache_bucket = 0;
+ int heap = -1;
/* VRAM implies WC. This is not optional. */
assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
/* NO_CPU_ACCESS is valid with VRAM only. */
assert(domain == RADEON_DOMAIN_VRAM || !(flags & RADEON_FLAG_NO_CPU_ACCESS));
+ /* Sparse buffers must have NO_CPU_ACCESS set. */
+ assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
+
/* Sub-allocate small buffers from slabs. */
if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
if (flags & RADEON_FLAG_SPARSE) {
assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0);
- flags |= RADEON_FLAG_NO_CPU_ACCESS;
-
return amdgpu_bo_sparse_create(ws, size, domain, flags);
}
bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
if (use_reusable_pool) {
- int heap = radeon_get_heap_index(domain, flags);
+ heap = radeon_get_heap_index(domain, flags);
assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
- usage = 1 << heap; /* Only set one usage bit for each heap. */
-
- pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
- assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
/* Get a buffer from the cache. */
bo = (struct amdgpu_winsys_bo*)
- pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
- pb_cache_bucket);
+ pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap);
if (bo)
return &bo->base;
}
/* Create a new one. */
- bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
- pb_cache_bucket);
+ bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
if (!bo) {
/* Clear the cache and try again. */
pb_slabs_reclaim(&ws->bo_slabs);
pb_cache_release_all_buffers(&ws->bo_cache);
- bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
- pb_cache_bucket);
+ bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
if (!bo)
return NULL;
}