bo = device->ws->buffer_create(device->ws,
new_size, 4096,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!bo) {
cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!device->trace_bo)
return false;
if (bo_size) {
pool->bo = device->ws->buffer_create(device->ws, bo_size,
- 32, RADEON_DOMAIN_VRAM, 0);
+ 32, RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING);
pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
}
pool->size = bo_size;
unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
unsigned max_offchip_buffers;
unsigned hs_offchip_param = 0;
+ uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
if (!queue->has_tess_rings) {
if (needs_tess_rings)
add_tess_rings = true;
scratch_size,
4096,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
+ ring_bo_flags);
if (!scratch_bo)
goto fail;
} else
compute_scratch_size,
4096,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
+ ring_bo_flags);
if (!compute_scratch_bo)
goto fail;
esgs_ring_size,
4096,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
+ ring_bo_flags);
if (!esgs_ring_bo)
goto fail;
} else {
gsvs_ring_size,
4096,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
+ ring_bo_flags);
if (!gsvs_ring_bo)
goto fail;
} else {
tess_factor_ring_size,
256,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
+ ring_bo_flags);
if (!tess_factor_ring_bo)
goto fail;
tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
tess_offchip_ring_size,
256,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_CPU_ACCESS);
+ ring_bo_flags);
if (!tess_offchip_ring_bo)
goto fail;
} else {
size,
4096,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!descriptor_bo)
goto fail;
} else
if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
flags |= RADEON_FLAG_IMPLICIT_SYNC;
+ if (!dedicate_info && !import_info)
+ flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
domain, flags);
event->bo = device->ws->buffer_create(device->ws, 8, 8,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!event->bo) {
vk_free2(&device->alloc, pAllocator, event);
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
size += 4 * pCreateInfo->queryCount;
pool->bo = device->ws->buffer_create(device->ws, size,
- 64, RADEON_DOMAIN_GTT, 0);
+ 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!pool->bo) {
vk_free2(&device->alloc, pAllocator, pool);
RADEON_FLAG_VIRTUAL = (1 << 3),
RADEON_FLAG_VA_UNCACHED = (1 << 4),
RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
+ RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
};
enum radeon_bo_usage { /* bitfield */
slab->size = 256 * 1024;
slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
- RADEON_DOMAIN_VRAM, 0);
+ RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING);
slab->ptr = (char*)device->ws->buffer_map(slab->bo);
list_inithead(&slab->shaders);
device->gfx_init = device->ws->buffer_create(device->ws,
cs->cdw * 4, 4096,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!device->gfx_init)
goto fail;
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+ if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && ws->info.drm_minor >= 20) {
+ bo->is_local = true;
+ request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
+ }
/* this won't do anything on pre 4.9 kernels */
if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM))
uint64_t size;
struct radv_amdgpu_winsys *ws;
bool is_virtual;
+ bool is_local;
int ref_count;
union {
if (cs->ws->use_ib_bos) {
cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!cs->ib_buffer) {
free(cs);
return NULL;
cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!cs->ib_buffer) {
cs->base.cdw = 0;
return;
}
+ if (bo->is_local)
+ return;
+
radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
}
} else if (count == 1 && !extra_bo && !extra_cs &&
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
+ if (cs->num_buffers == 0) {
+ *bo_list = 0;
+ return 0;
+ }
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
cs->priorities, bo_list);
} else {
if (extra_cs) {
total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
}
-
+ if (total_buffer_count == 0) {
+ *bo_list = 0;
+ return 0;
+ }
amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
if (!handles || !priorities) {
"see dmesg for more information.\n");
}
- amdgpu_bo_list_destroy(bo_list);
+ if (bo_list)
+ amdgpu_bo_list_destroy(bo_list);
if (fence)
radv_amdgpu_request_to_fence(ctx, fence, &request);
"see dmesg for more information.\n");
}
- amdgpu_bo_list_destroy(bo_list);
+ if (bo_list)
+ amdgpu_bo_list_destroy(bo_list);
if (r)
return r;
}
assert(cnt);
- bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
+ bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
ptr = ws->buffer_map(bo);
if (preamble_cs) {
"see dmesg for more information.\n");
}
- amdgpu_bo_list_destroy(bo_list);
+ if (bo_list)
+ amdgpu_bo_list_destroy(bo_list);
ws->buffer_destroy(bo);
if (r)
assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (ctx->fence_bo)
ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
if (ctx->fence_map)