+++ /dev/null
-[*.{c,h}]
-indent_style = space
-indent_size = 4
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
{
- return (struct radeon_bo *)bo;
+ return (struct radeon_bo *)bo;
}
struct radeon_bo_va_hole {
- struct list_head list;
- uint64_t offset;
- uint64_t size;
+ struct list_head list;
+ uint64_t offset;
+ uint64_t size;
};
static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
{
- struct drm_radeon_gem_busy args = {0};
+ struct drm_radeon_gem_busy args = {0};
- args.handle = bo->handle;
- return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
- &args, sizeof(args)) != 0;
+ args.handle = bo->handle;
+ return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+ &args, sizeof(args)) != 0;
}
static bool radeon_bo_is_busy(struct radeon_bo *bo)
{
- unsigned num_idle;
- bool busy = false;
-
- if (bo->handle)
- return radeon_real_bo_is_busy(bo);
-
- mtx_lock(&bo->rws->bo_fence_lock);
- for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
- if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
- busy = true;
- break;
- }
- radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
- }
- memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
- (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
- bo->u.slab.num_fences -= num_idle;
- mtx_unlock(&bo->rws->bo_fence_lock);
-
- return busy;
+ unsigned num_idle;
+ bool busy = false;
+
+ if (bo->handle)
+ return radeon_real_bo_is_busy(bo);
+
+ mtx_lock(&bo->rws->bo_fence_lock);
+ for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
+ if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
+ busy = true;
+ break;
+ }
+ radeon_bo_reference(&bo->u.slab.fences[num_idle], NULL);
+ }
+ memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
+ (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
+ bo->u.slab.num_fences -= num_idle;
+ mtx_unlock(&bo->rws->bo_fence_lock);
+
+ return busy;
}
static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
{
- struct drm_radeon_gem_wait_idle args = {0};
+ struct drm_radeon_gem_wait_idle args = {0};
- args.handle = bo->handle;
- while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
- &args, sizeof(args)) == -EBUSY);
+ args.handle = bo->handle;
+ while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
+ &args, sizeof(args)) == -EBUSY);
}
static void radeon_bo_wait_idle(struct radeon_bo *bo)
{
- if (bo->handle) {
- radeon_real_bo_wait_idle(bo);
- } else {
- mtx_lock(&bo->rws->bo_fence_lock);
- while (bo->u.slab.num_fences) {
- struct radeon_bo *fence = NULL;
- radeon_bo_reference(&fence, bo->u.slab.fences[0]);
- mtx_unlock(&bo->rws->bo_fence_lock);
-
- /* Wait without holding the fence lock. */
- radeon_real_bo_wait_idle(fence);
-
- mtx_lock(&bo->rws->bo_fence_lock);
- if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
- radeon_bo_reference(&bo->u.slab.fences[0], NULL);
- memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
- (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
- bo->u.slab.num_fences--;
- }
- radeon_bo_reference(&fence, NULL);
- }
- mtx_unlock(&bo->rws->bo_fence_lock);
- }
+ if (bo->handle) {
+ radeon_real_bo_wait_idle(bo);
+ } else {
+ mtx_lock(&bo->rws->bo_fence_lock);
+ while (bo->u.slab.num_fences) {
+ struct radeon_bo *fence = NULL;
+ radeon_bo_reference(&fence, bo->u.slab.fences[0]);
+ mtx_unlock(&bo->rws->bo_fence_lock);
+
+ /* Wait without holding the fence lock. */
+ radeon_real_bo_wait_idle(fence);
+
+ mtx_lock(&bo->rws->bo_fence_lock);
+ if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
+ radeon_bo_reference(&bo->u.slab.fences[0], NULL);
+ memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
+ (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
+ bo->u.slab.num_fences--;
+ }
+ radeon_bo_reference(&fence, NULL);
+ }
+ mtx_unlock(&bo->rws->bo_fence_lock);
+ }
}
static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
enum radeon_bo_usage usage)
{
- struct radeon_bo *bo = radeon_bo(_buf);
- int64_t abs_timeout;
+ struct radeon_bo *bo = radeon_bo(_buf);
+ int64_t abs_timeout;
- /* No timeout. Just query. */
- if (timeout == 0)
- return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
+ /* No timeout. Just query. */
+ if (timeout == 0)
+ return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
- abs_timeout = os_time_get_absolute_timeout(timeout);
+ abs_timeout = os_time_get_absolute_timeout(timeout);
- /* Wait if any ioctl is being submitted with this buffer. */
- if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
- return false;
+ /* Wait if any ioctl is being submitted with this buffer. */
+ if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
+ return false;
- /* Infinite timeout. */
- if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
- radeon_bo_wait_idle(bo);
- return true;
- }
+ /* Infinite timeout. */
+ if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
+ radeon_bo_wait_idle(bo);
+ return true;
+ }
- /* Other timeouts need to be emulated with a loop. */
- while (radeon_bo_is_busy(bo)) {
- if (os_time_get_nano() >= abs_timeout)
- return false;
- os_time_sleep(10);
- }
+ /* Other timeouts need to be emulated with a loop. */
+ while (radeon_bo_is_busy(bo)) {
+ if (os_time_get_nano() >= abs_timeout)
+ return false;
+ os_time_sleep(10);
+ }
- return true;
+ return true;
}
static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
{
- /* Zero domains the driver doesn't understand. */
- domain &= RADEON_DOMAIN_VRAM_GTT;
+ /* Zero domains the driver doesn't understand. */
+ domain &= RADEON_DOMAIN_VRAM_GTT;
- /* If no domain is set, we must set something... */
- if (!domain)
- domain = RADEON_DOMAIN_VRAM_GTT;
+ /* If no domain is set, we must set something... */
+ if (!domain)
+ domain = RADEON_DOMAIN_VRAM_GTT;
- return domain;
+ return domain;
}
static enum radeon_bo_domain radeon_bo_get_initial_domain(
- struct pb_buffer *buf)
+ struct pb_buffer *buf)
{
- struct radeon_bo *bo = (struct radeon_bo*)buf;
- struct drm_radeon_gem_op args;
-
- if (bo->rws->info.drm_minor < 38)
- return RADEON_DOMAIN_VRAM_GTT;
-
- memset(&args, 0, sizeof(args));
- args.handle = bo->handle;
- args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
-
- if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
- &args, sizeof(args))) {
- fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
- bo, bo->handle);
- /* Default domain as returned by get_valid_domain. */
- return RADEON_DOMAIN_VRAM_GTT;
- }
-
- /* GEM domains and winsys domains are defined the same. */
- return get_valid_domain(args.value);
+ struct radeon_bo *bo = (struct radeon_bo*)buf;
+ struct drm_radeon_gem_op args;
+
+ if (bo->rws->info.drm_minor < 38)
+ return RADEON_DOMAIN_VRAM_GTT;
+
+ memset(&args, 0, sizeof(args));
+ args.handle = bo->handle;
+ args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
+
+ if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
+ &args, sizeof(args))) {
+ fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
+ bo, bo->handle);
+ /* Default domain as returned by get_valid_domain. */
+ return RADEON_DOMAIN_VRAM_GTT;
+ }
+
+ /* GEM domains and winsys domains are defined the same. */
+ return get_valid_domain(args.value);
}
static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
struct radeon_vm_heap *heap,
uint64_t size, uint64_t alignment)
{
- struct radeon_bo_va_hole *hole, *n;
- uint64_t offset = 0, waste = 0;
-
- /* All VM address space holes will implicitly start aligned to the
- * size alignment, so we don't need to sanitize the alignment here
- */
- size = align(size, info->gart_page_size);
-
- mtx_lock(&heap->mutex);
- /* first look for a hole */
- LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
- offset = hole->offset;
- waste = offset % alignment;
- waste = waste ? alignment - waste : 0;
- offset += waste;
- if (offset >= (hole->offset + hole->size)) {
- continue;
- }
- if (!waste && hole->size == size) {
- offset = hole->offset;
- list_del(&hole->list);
- FREE(hole);
- mtx_unlock(&heap->mutex);
- return offset;
- }
- if ((hole->size - waste) > size) {
- if (waste) {
- n = CALLOC_STRUCT(radeon_bo_va_hole);
- n->size = waste;
- n->offset = hole->offset;
- list_add(&n->list, &hole->list);
- }
- hole->size -= (size + waste);
- hole->offset += size + waste;
- mtx_unlock(&heap->mutex);
- return offset;
- }
- if ((hole->size - waste) == size) {
- hole->size = waste;
- mtx_unlock(&heap->mutex);
- return offset;
- }
- }
-
- offset = heap->start;
- waste = offset % alignment;
- waste = waste ? alignment - waste : 0;
-
- if (offset + waste + size > heap->end) {
- mtx_unlock(&heap->mutex);
- return 0;
- }
-
- if (waste) {
- n = CALLOC_STRUCT(radeon_bo_va_hole);
- n->size = waste;
- n->offset = offset;
- list_add(&n->list, &heap->holes);
- }
- offset += waste;
- heap->start += size + waste;
- mtx_unlock(&heap->mutex);
- return offset;
+ struct radeon_bo_va_hole *hole, *n;
+ uint64_t offset = 0, waste = 0;
+
+ /* All VM address space holes will implicitly start aligned to the
+ * size alignment, so we don't need to sanitize the alignment here
+ */
+ size = align(size, info->gart_page_size);
+
+ mtx_lock(&heap->mutex);
+ /* first look for a hole */
+ LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
+ offset = hole->offset;
+ waste = offset % alignment;
+ waste = waste ? alignment - waste : 0;
+ offset += waste;
+ if (offset >= (hole->offset + hole->size)) {
+ continue;
+ }
+ if (!waste && hole->size == size) {
+ offset = hole->offset;
+ list_del(&hole->list);
+ FREE(hole);
+ mtx_unlock(&heap->mutex);
+ return offset;
+ }
+ if ((hole->size - waste) > size) {
+ if (waste) {
+ n = CALLOC_STRUCT(radeon_bo_va_hole);
+ n->size = waste;
+ n->offset = hole->offset;
+ list_add(&n->list, &hole->list);
+ }
+ hole->size -= (size + waste);
+ hole->offset += size + waste;
+ mtx_unlock(&heap->mutex);
+ return offset;
+ }
+ if ((hole->size - waste) == size) {
+ hole->size = waste;
+ mtx_unlock(&heap->mutex);
+ return offset;
+ }
+ }
+
+ offset = heap->start;
+ waste = offset % alignment;
+ waste = waste ? alignment - waste : 0;
+
+ if (offset + waste + size > heap->end) {
+ mtx_unlock(&heap->mutex);
+ return 0;
+ }
+
+ if (waste) {
+ n = CALLOC_STRUCT(radeon_bo_va_hole);
+ n->size = waste;
+ n->offset = offset;
+ list_add(&n->list, &heap->holes);
+ }
+ offset += waste;
+ heap->start += size + waste;
+ mtx_unlock(&heap->mutex);
+ return offset;
}
static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
uint64_t size, uint64_t alignment)
{
- uint64_t va = 0;
-
- /* Try to allocate from the 64-bit address space first.
- * If it doesn't exist (start = 0) or if it doesn't have enough space,
- * fall back to the 32-bit address space.
- */
- if (ws->vm64.start)
- va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
- if (!va)
- va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
- return va;
+ uint64_t va = 0;
+
+ /* Try to allocate from the 64-bit address space first.
+ * If it doesn't exist (start = 0) or if it doesn't have enough space,
+ * fall back to the 32-bit address space.
+ */
+ if (ws->vm64.start)
+ va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
+ if (!va)
+ va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
+ return va;
}
static void radeon_bomgr_free_va(const struct radeon_info *info,
struct radeon_vm_heap *heap,
uint64_t va, uint64_t size)
{
- struct radeon_bo_va_hole *hole = NULL;
-
- size = align(size, info->gart_page_size);
-
- mtx_lock(&heap->mutex);
- if ((va + size) == heap->start) {
- heap->start = va;
- /* Delete uppermost hole if it reaches the new top */
- if (!list_is_empty(&heap->holes)) {
- hole = container_of(heap->holes.next, hole, list);
- if ((hole->offset + hole->size) == va) {
- heap->start = hole->offset;
- list_del(&hole->list);
- FREE(hole);
- }
- }
- } else {
- struct radeon_bo_va_hole *next;
-
- hole = container_of(&heap->holes, hole, list);
- LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
- if (next->offset < va)
- break;
- hole = next;
- }
-
- if (&hole->list != &heap->holes) {
- /* Grow upper hole if it's adjacent */
- if (hole->offset == (va + size)) {
- hole->offset = va;
- hole->size += size;
- /* Merge lower hole if it's adjacent */
- if (next != hole && &next->list != &heap->holes &&
- (next->offset + next->size) == va) {
- next->size += hole->size;
- list_del(&hole->list);
- FREE(hole);
- }
- goto out;
+ struct radeon_bo_va_hole *hole = NULL;
+
+ size = align(size, info->gart_page_size);
+
+ mtx_lock(&heap->mutex);
+ if ((va + size) == heap->start) {
+ heap->start = va;
+ /* Delete uppermost hole if it reaches the new top */
+ if (!list_is_empty(&heap->holes)) {
+ hole = container_of(heap->holes.next, hole, list);
+ if ((hole->offset + hole->size) == va) {
+ heap->start = hole->offset;
+ list_del(&hole->list);
+ FREE(hole);
+ }
+ }
+ } else {
+ struct radeon_bo_va_hole *next;
+
+ hole = container_of(&heap->holes, hole, list);
+ LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
+ if (next->offset < va)
+ break;
+ hole = next;
+ }
+
+ if (&hole->list != &heap->holes) {
+ /* Grow upper hole if it's adjacent */
+ if (hole->offset == (va + size)) {
+ hole->offset = va;
+ hole->size += size;
+ /* Merge lower hole if it's adjacent */
+ if (next != hole && &next->list != &heap->holes &&
+ (next->offset + next->size) == va) {
+ next->size += hole->size;
+ list_del(&hole->list);
+ FREE(hole);
}
- }
-
- /* Grow lower hole if it's adjacent */
- if (next != hole && &next->list != &heap->holes &&
- (next->offset + next->size) == va) {
- next->size += size;
goto out;
- }
-
- /* FIXME on allocation failure we just lose virtual address space
- * maybe print a warning
- */
- next = CALLOC_STRUCT(radeon_bo_va_hole);
- if (next) {
- next->size = size;
- next->offset = va;
- list_add(&next->list, &hole->list);
- }
- }
+ }
+ }
+
+ /* Grow lower hole if it's adjacent */
+ if (next != hole && &next->list != &heap->holes &&
+ (next->offset + next->size) == va) {
+ next->size += size;
+ goto out;
+ }
+
+ /* FIXME on allocation failure we just lose virtual address space
+ * maybe print a warning
+ */
+ next = CALLOC_STRUCT(radeon_bo_va_hole);
+ if (next) {
+ next->size = size;
+ next->offset = va;
+ list_add(&next->list, &hole->list);
+ }
+ }
out:
- mtx_unlock(&heap->mutex);
+ mtx_unlock(&heap->mutex);
}
void radeon_bo_destroy(struct pb_buffer *_buf)
{
- struct radeon_bo *bo = radeon_bo(_buf);
- struct radeon_drm_winsys *rws = bo->rws;
- struct drm_gem_close args;
-
- assert(bo->handle && "must not be called for slab entries");
-
- memset(&args, 0, sizeof(args));
-
- mtx_lock(&rws->bo_handles_mutex);
- _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
- if (bo->flink_name) {
- _mesa_hash_table_remove_key(rws->bo_names,
- (void*)(uintptr_t)bo->flink_name);
- }
- mtx_unlock(&rws->bo_handles_mutex);
-
- if (bo->u.real.ptr)
- os_munmap(bo->u.real.ptr, bo->base.size);
-
- if (rws->info.r600_has_virtual_memory) {
- if (rws->va_unmap_working) {
- struct drm_radeon_gem_va va;
-
- va.handle = bo->handle;
- va.vm_id = 0;
- va.operation = RADEON_VA_UNMAP;
- va.flags = RADEON_VM_PAGE_READABLE |
- RADEON_VM_PAGE_WRITEABLE |
- RADEON_VM_PAGE_SNOOPED;
- va.offset = bo->va;
-
- if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
- sizeof(va)) != 0 &&
- va.operation == RADEON_VA_RESULT_ERROR) {
- fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
- fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
- fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
- }
- }
-
- radeon_bomgr_free_va(&rws->info,
- bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
- bo->va, bo->base.size);
- }
-
- /* Close object. */
- args.handle = bo->handle;
- drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
-
- mtx_destroy(&bo->u.real.map_mutex);
-
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
- else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
-
- if (bo->u.real.map_count >= 1) {
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- bo->rws->mapped_vram -= bo->base.size;
- else
- bo->rws->mapped_gtt -= bo->base.size;
- bo->rws->num_mapped_buffers--;
- }
-
- FREE(bo);
+ struct radeon_bo *bo = radeon_bo(_buf);
+ struct radeon_drm_winsys *rws = bo->rws;
+ struct drm_gem_close args;
+
+ assert(bo->handle && "must not be called for slab entries");
+
+ memset(&args, 0, sizeof(args));
+
+ mtx_lock(&rws->bo_handles_mutex);
+ _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
+ if (bo->flink_name) {
+ _mesa_hash_table_remove_key(rws->bo_names,
+ (void*)(uintptr_t)bo->flink_name);
+ }
+ mtx_unlock(&rws->bo_handles_mutex);
+
+ if (bo->u.real.ptr)
+ os_munmap(bo->u.real.ptr, bo->base.size);
+
+ if (rws->info.r600_has_virtual_memory) {
+ if (rws->va_unmap_working) {
+ struct drm_radeon_gem_va va;
+
+ va.handle = bo->handle;
+ va.vm_id = 0;
+ va.operation = RADEON_VA_UNMAP;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+
+ if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
+ sizeof(va)) != 0 &&
+ va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
+ fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
+ fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
+ }
+ }
+
+ radeon_bomgr_free_va(&rws->info,
+ bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
+ bo->va, bo->base.size);
+ }
+
+ /* Close object. */
+ args.handle = bo->handle;
+ drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
+
+ mtx_destroy(&bo->u.real.map_mutex);
+
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+ rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
+
+ if (bo->u.real.map_count >= 1) {
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->rws->mapped_vram -= bo->base.size;
+ else
+ bo->rws->mapped_gtt -= bo->base.size;
+ bo->rws->num_mapped_buffers--;
+ }
+
+ FREE(bo);
}
static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
{
struct radeon_bo *bo = radeon_bo(_buf);
- assert(bo->handle && "must not be called for slab entries");
+ assert(bo->handle && "must not be called for slab entries");
if (bo->u.real.use_reusable_pool)
pb_cache_add_buffer(&bo->u.real.cache_entry);
void *radeon_bo_do_map(struct radeon_bo *bo)
{
- struct drm_radeon_gem_mmap args = {0};
- void *ptr;
- unsigned offset;
-
- /* If the buffer is created from user memory, return the user pointer. */
- if (bo->user_ptr)
- return bo->user_ptr;
-
- if (bo->handle) {
- offset = 0;
- } else {
- offset = bo->va - bo->u.slab.real->va;
- bo = bo->u.slab.real;
- }
-
- /* Map the buffer. */
- mtx_lock(&bo->u.real.map_mutex);
- /* Return the pointer if it's already mapped. */
- if (bo->u.real.ptr) {
- bo->u.real.map_count++;
- mtx_unlock(&bo->u.real.map_mutex);
- return (uint8_t*)bo->u.real.ptr + offset;
- }
- args.handle = bo->handle;
- args.offset = 0;
- args.size = (uint64_t)bo->base.size;
- if (drmCommandWriteRead(bo->rws->fd,
- DRM_RADEON_GEM_MMAP,
- &args,
- sizeof(args))) {
- mtx_unlock(&bo->u.real.map_mutex);
- fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
- bo, bo->handle);
- return NULL;
- }
-
- ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
- bo->rws->fd, args.addr_ptr);
- if (ptr == MAP_FAILED) {
- /* Clear the cache and try again. */
- pb_cache_release_all_buffers(&bo->rws->bo_cache);
-
- ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
- bo->rws->fd, args.addr_ptr);
- if (ptr == MAP_FAILED) {
- mtx_unlock(&bo->u.real.map_mutex);
- fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
- return NULL;
- }
- }
- bo->u.real.ptr = ptr;
- bo->u.real.map_count = 1;
-
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- bo->rws->mapped_vram += bo->base.size;
- else
- bo->rws->mapped_gtt += bo->base.size;
- bo->rws->num_mapped_buffers++;
-
- mtx_unlock(&bo->u.real.map_mutex);
- return (uint8_t*)bo->u.real.ptr + offset;
+ struct drm_radeon_gem_mmap args = {0};
+ void *ptr;
+ unsigned offset;
+
+ /* If the buffer is created from user memory, return the user pointer. */
+ if (bo->user_ptr)
+ return bo->user_ptr;
+
+ if (bo->handle) {
+ offset = 0;
+ } else {
+ offset = bo->va - bo->u.slab.real->va;
+ bo = bo->u.slab.real;
+ }
+
+ /* Map the buffer. */
+ mtx_lock(&bo->u.real.map_mutex);
+ /* Return the pointer if it's already mapped. */
+ if (bo->u.real.ptr) {
+ bo->u.real.map_count++;
+ mtx_unlock(&bo->u.real.map_mutex);
+ return (uint8_t*)bo->u.real.ptr + offset;
+ }
+ args.handle = bo->handle;
+ args.offset = 0;
+ args.size = (uint64_t)bo->base.size;
+ if (drmCommandWriteRead(bo->rws->fd,
+ DRM_RADEON_GEM_MMAP,
+ &args,
+ sizeof(args))) {
+ mtx_unlock(&bo->u.real.map_mutex);
+ fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
+ bo, bo->handle);
+ return NULL;
+ }
+
+ ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+ bo->rws->fd, args.addr_ptr);
+ if (ptr == MAP_FAILED) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&bo->rws->bo_cache);
+
+ ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+ bo->rws->fd, args.addr_ptr);
+ if (ptr == MAP_FAILED) {
+ mtx_unlock(&bo->u.real.map_mutex);
+ fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
+ return NULL;
+ }
+ }
+ bo->u.real.ptr = ptr;
+ bo->u.real.map_count = 1;
+
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->rws->mapped_vram += bo->base.size;
+ else
+ bo->rws->mapped_gtt += bo->base.size;
+ bo->rws->num_mapped_buffers++;
+
+ mtx_unlock(&bo->u.real.map_mutex);
+ return (uint8_t*)bo->u.real.ptr + offset;
}
static void *radeon_bo_map(struct pb_buffer *buf,
struct radeon_cmdbuf *rcs,
enum pipe_transfer_usage usage)
{
- struct radeon_bo *bo = (struct radeon_bo*)buf;
- struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
-
- /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
- if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
- /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
- if (usage & PIPE_TRANSFER_DONTBLOCK) {
- if (!(usage & PIPE_TRANSFER_WRITE)) {
- /* Mapping for read.
- *
- * Since we are mapping for read, we don't need to wait
- * if the GPU is using the buffer for read too
- * (neither one is changing it).
- *
- * Only check whether the buffer is being used for write. */
- if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
- cs->flush_cs(cs->flush_data,
- RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
- return NULL;
- }
-
- if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
- RADEON_USAGE_WRITE)) {
- return NULL;
- }
- } else {
- if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
- cs->flush_cs(cs->flush_data,
- RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
- return NULL;
- }
-
- if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
- RADEON_USAGE_READWRITE)) {
- return NULL;
- }
+ struct radeon_bo *bo = (struct radeon_bo*)buf;
+ struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
+
+ /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (!(usage & PIPE_TRANSFER_WRITE)) {
+ /* Mapping for read.
+ *
+ * Since we are mapping for read, we don't need to wait
+ * if the GPU is using the buffer for read too
+ * (neither one is changing it).
+ *
+ * Only check whether the buffer is being used for write. */
+ if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
+ cs->flush_cs(cs->flush_data,
+ RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
+ return NULL;
+ }
+
+ if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
+ RADEON_USAGE_WRITE)) {
+ return NULL;
+ }
+ } else {
+ if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
+ cs->flush_cs(cs->flush_data,
+ RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
+ return NULL;
+ }
+
+ if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
+ RADEON_USAGE_READWRITE)) {
+ return NULL;
+ }
+ }
+ } else {
+ uint64_t time = os_time_get_nano();
+
+ if (!(usage & PIPE_TRANSFER_WRITE)) {
+ /* Mapping for read.
+ *
+ * Since we are mapping for read, we don't need to wait
+ * if the GPU is using the buffer for read too
+ * (neither one is changing it).
+ *
+ * Only check whether the buffer is being used for write. */
+ if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
+ cs->flush_cs(cs->flush_data,
+ RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
}
- } else {
- uint64_t time = os_time_get_nano();
-
- if (!(usage & PIPE_TRANSFER_WRITE)) {
- /* Mapping for read.
- *
- * Since we are mapping for read, we don't need to wait
- * if the GPU is using the buffer for read too
- * (neither one is changing it).
- *
- * Only check whether the buffer is being used for write. */
- if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
- cs->flush_cs(cs->flush_data,
- RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
- }
- radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
- RADEON_USAGE_WRITE);
- } else {
- /* Mapping for write. */
- if (cs) {
- if (radeon_bo_is_referenced_by_cs(cs, bo)) {
- cs->flush_cs(cs->flush_data,
- RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
- } else {
- /* Try to avoid busy-waiting in radeon_bo_wait. */
- if (p_atomic_read(&bo->num_active_ioctls))
- radeon_drm_cs_sync_flush(rcs);
- }
- }
-
- radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
- RADEON_USAGE_READWRITE);
+ radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_WRITE);
+ } else {
+ /* Mapping for write. */
+ if (cs) {
+ if (radeon_bo_is_referenced_by_cs(cs, bo)) {
+ cs->flush_cs(cs->flush_data,
+ RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
+ } else {
+ /* Try to avoid busy-waiting in radeon_bo_wait. */
+ if (p_atomic_read(&bo->num_active_ioctls))
+ radeon_drm_cs_sync_flush(rcs);
+ }
}
- bo->rws->buffer_wait_time += os_time_get_nano() - time;
- }
- }
+ radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_READWRITE);
+ }
- return radeon_bo_do_map(bo);
+ bo->rws->buffer_wait_time += os_time_get_nano() - time;
+ }
+ }
+
+ return radeon_bo_do_map(bo);
}
static void radeon_bo_unmap(struct pb_buffer *_buf)
{
- struct radeon_bo *bo = (struct radeon_bo*)_buf;
+ struct radeon_bo *bo = (struct radeon_bo*)_buf;
- if (bo->user_ptr)
- return;
+ if (bo->user_ptr)
+ return;
- if (!bo->handle)
- bo = bo->u.slab.real;
+ if (!bo->handle)
+ bo = bo->u.slab.real;
- mtx_lock(&bo->u.real.map_mutex);
- if (!bo->u.real.ptr) {
- mtx_unlock(&bo->u.real.map_mutex);
- return; /* it's not been mapped */
- }
+ mtx_lock(&bo->u.real.map_mutex);
+ if (!bo->u.real.ptr) {
+ mtx_unlock(&bo->u.real.map_mutex);
+ return; /* it's not been mapped */
+ }
- assert(bo->u.real.map_count);
- if (--bo->u.real.map_count) {
- mtx_unlock(&bo->u.real.map_mutex);
- return; /* it's been mapped multiple times */
- }
+ assert(bo->u.real.map_count);
+ if (--bo->u.real.map_count) {
+ mtx_unlock(&bo->u.real.map_mutex);
+ return; /* it's been mapped multiple times */
+ }
- os_munmap(bo->u.real.ptr, bo->base.size);
- bo->u.real.ptr = NULL;
+ os_munmap(bo->u.real.ptr, bo->base.size);
+ bo->u.real.ptr = NULL;
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- bo->rws->mapped_vram -= bo->base.size;
- else
- bo->rws->mapped_gtt -= bo->base.size;
- bo->rws->num_mapped_buffers--;
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->rws->mapped_vram -= bo->base.size;
+ else
+ bo->rws->mapped_gtt -= bo->base.size;
+ bo->rws->num_mapped_buffers--;
- mtx_unlock(&bo->u.real.map_mutex);
+ mtx_unlock(&bo->u.real.map_mutex);
}
static const struct pb_vtbl radeon_bo_vtbl = {
- radeon_bo_destroy_or_cache
- /* other functions are never called */
+ radeon_bo_destroy_or_cache
+ /* other functions are never called */
};
static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
unsigned flags,
int heap)
{
- struct radeon_bo *bo;
- struct drm_radeon_gem_create args;
- int r;
-
- memset(&args, 0, sizeof(args));
-
- assert(initial_domains);
- assert((initial_domains &
- ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
-
- args.size = size;
- args.alignment = alignment;
- args.initial_domain = initial_domains;
- args.flags = 0;
-
- /* If VRAM is just stolen system memory, allow both VRAM and
- * GTT, whichever has free space. If a buffer is evicted from
- * VRAM to GTT, it will stay there.
- */
- if (!rws->info.has_dedicated_vram)
- args.initial_domain |= RADEON_DOMAIN_GTT;
-
- if (flags & RADEON_FLAG_GTT_WC)
- args.flags |= RADEON_GEM_GTT_WC;
- if (flags & RADEON_FLAG_NO_CPU_ACCESS)
- args.flags |= RADEON_GEM_NO_CPU_ACCESS;
-
- if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
- &args, sizeof(args))) {
- fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
- fprintf(stderr, "radeon: size : %u bytes\n", size);
- fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
- fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
- fprintf(stderr, "radeon: flags : %u\n", args.flags);
- return NULL;
- }
-
- assert(args.handle != 0);
-
- bo = CALLOC_STRUCT(radeon_bo);
- if (!bo)
- return NULL;
-
- pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment = alignment;
- bo->base.usage = 0;
- bo->base.size = size;
- bo->base.vtbl = &radeon_bo_vtbl;
- bo->rws = rws;
- bo->handle = args.handle;
- bo->va = 0;
- bo->initial_domain = initial_domains;
- bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
- (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
-
- if (heap >= 0) {
- pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
- heap);
- }
-
- if (rws->info.r600_has_virtual_memory) {
- struct drm_radeon_gem_va va;
- unsigned va_gap_size;
-
- va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
-
- if (flags & RADEON_FLAG_32BIT) {
- bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
- size + va_gap_size, alignment);
- assert(bo->va + size < rws->vm32.end);
- } else {
- bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
- }
-
- va.handle = bo->handle;
- va.vm_id = 0;
- va.operation = RADEON_VA_MAP;
- va.flags = RADEON_VM_PAGE_READABLE |
- RADEON_VM_PAGE_WRITEABLE |
- RADEON_VM_PAGE_SNOOPED;
- va.offset = bo->va;
- r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
- if (r && va.operation == RADEON_VA_RESULT_ERROR) {
- fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
- fprintf(stderr, "radeon: size : %d bytes\n", size);
- fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
- fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
- fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
- radeon_bo_destroy(&bo->base);
- return NULL;
- }
- mtx_lock(&rws->bo_handles_mutex);
- if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
- struct pb_buffer *b = &bo->base;
- struct radeon_bo *old_bo =
- util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
-
- mtx_unlock(&rws->bo_handles_mutex);
- pb_reference(&b, &old_bo->base);
- return radeon_bo(b);
- }
-
- _mesa_hash_table_insert(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
- mtx_unlock(&rws->bo_handles_mutex);
- }
-
- if (initial_domains & RADEON_DOMAIN_VRAM)
- rws->allocated_vram += align(size, rws->info.gart_page_size);
- else if (initial_domains & RADEON_DOMAIN_GTT)
- rws->allocated_gtt += align(size, rws->info.gart_page_size);
-
- return bo;
+ struct radeon_bo *bo;
+ struct drm_radeon_gem_create args;
+ int r;
+
+ memset(&args, 0, sizeof(args));
+
+ assert(initial_domains);
+ assert((initial_domains &
+ ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
+
+ args.size = size;
+ args.alignment = alignment;
+ args.initial_domain = initial_domains;
+ args.flags = 0;
+
+ /* If VRAM is just stolen system memory, allow both VRAM and
+ * GTT, whichever has free space. If a buffer is evicted from
+ * VRAM to GTT, it will stay there.
+ */
+ if (!rws->info.has_dedicated_vram)
+ args.initial_domain |= RADEON_DOMAIN_GTT;
+
+ if (flags & RADEON_FLAG_GTT_WC)
+ args.flags |= RADEON_GEM_GTT_WC;
+ if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+ args.flags |= RADEON_GEM_NO_CPU_ACCESS;
+
+ if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
+ &args, sizeof(args))) {
+ fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
+ fprintf(stderr, "radeon: size : %u bytes\n", size);
+ fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
+ fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
+ fprintf(stderr, "radeon: flags : %u\n", args.flags);
+ return NULL;
+ }
+
+ assert(args.handle != 0);
+
+ bo = CALLOC_STRUCT(radeon_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->base.reference, 1);
+ bo->base.alignment = alignment;
+ bo->base.usage = 0;
+ bo->base.size = size;
+ bo->base.vtbl = &radeon_bo_vtbl;
+ bo->rws = rws;
+ bo->handle = args.handle;
+ bo->va = 0;
+ bo->initial_domain = initial_domains;
+ bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
+ (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
+
+ if (heap >= 0) {
+ pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
+ heap);
+ }
+
+ if (rws->info.r600_has_virtual_memory) {
+ struct drm_radeon_gem_va va;
+ unsigned va_gap_size;
+
+ va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
+
+ if (flags & RADEON_FLAG_32BIT) {
+ bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
+ size + va_gap_size, alignment);
+ assert(bo->va + size < rws->vm32.end);
+ } else {
+ bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
+ }
+
+ va.handle = bo->handle;
+ va.vm_id = 0;
+ va.operation = RADEON_VA_MAP;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+ r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+ if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
+ fprintf(stderr, "radeon: size : %d bytes\n", size);
+ fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
+ fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
+ fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
+ radeon_bo_destroy(&bo->base);
+ return NULL;
+ }
+ mtx_lock(&rws->bo_handles_mutex);
+ if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+ struct pb_buffer *b = &bo->base;
+ struct radeon_bo *old_bo =
+ util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
+
+ mtx_unlock(&rws->bo_handles_mutex);
+ pb_reference(&b, &old_bo->base);
+ return radeon_bo(b);
+ }
+
+ _mesa_hash_table_insert(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
+ mtx_unlock(&rws->bo_handles_mutex);
+ }
+
+ if (initial_domains & RADEON_DOMAIN_VRAM)
+ rws->allocated_vram += align(size, rws->info.gart_page_size);
+ else if (initial_domains & RADEON_DOMAIN_GTT)
+ rws->allocated_gtt += align(size, rws->info.gart_page_size);
+
+ return bo;
}
bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
{
- struct radeon_bo *bo = NULL; /* fix container_of */
- bo = container_of(entry, bo, u.slab.entry);
+ struct radeon_bo *bo = NULL; /* fix container_of */
+ bo = container_of(entry, bo, u.slab.entry);
- return radeon_bo_can_reclaim(&bo->base);
+ return radeon_bo_can_reclaim(&bo->base);
}
static void radeon_bo_slab_destroy(struct pb_buffer *_buf)
{
- struct radeon_bo *bo = radeon_bo(_buf);
+ struct radeon_bo *bo = radeon_bo(_buf);
- assert(!bo->handle);
+ assert(!bo->handle);
- pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
+ pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
}
static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
- radeon_bo_slab_destroy
- /* other functions are never called */
+ radeon_bo_slab_destroy
+ /* other functions are never called */
};
struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
unsigned entry_size,
unsigned group_index)
{
- struct radeon_drm_winsys *ws = priv;
- struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
- enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
- enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
- unsigned base_hash;
+ struct radeon_drm_winsys *ws = priv;
+ struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
+ enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
+ enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
+ unsigned base_hash;
- if (!slab)
- return NULL;
+ if (!slab)
+ return NULL;
- slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
- 64 * 1024, 64 * 1024,
- domains, flags));
- if (!slab->buffer)
- goto fail;
+ slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
+ 64 * 1024, 64 * 1024,
+ domains, flags));
+ if (!slab->buffer)
+ goto fail;
- assert(slab->buffer->handle);
+ assert(slab->buffer->handle);
- slab->base.num_entries = slab->buffer->base.size / entry_size;
- slab->base.num_free = slab->base.num_entries;
- slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
- if (!slab->entries)
- goto fail_buffer;
+ slab->base.num_entries = slab->buffer->base.size / entry_size;
+ slab->base.num_free = slab->base.num_entries;
+ slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
+ if (!slab->entries)
+ goto fail_buffer;
- list_inithead(&slab->base.free);
+ list_inithead(&slab->base.free);
- base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
+ base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
- for (unsigned i = 0; i < slab->base.num_entries; ++i) {
- struct radeon_bo *bo = &slab->entries[i];
+ for (unsigned i = 0; i < slab->base.num_entries; ++i) {
+ struct radeon_bo *bo = &slab->entries[i];
- bo->base.alignment = entry_size;
- bo->base.usage = slab->buffer->base.usage;
- bo->base.size = entry_size;
- bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
- bo->rws = ws;
- bo->va = slab->buffer->va + i * entry_size;
- bo->initial_domain = domains;
- bo->hash = base_hash + i;
- bo->u.slab.entry.slab = &slab->base;
- bo->u.slab.entry.group_index = group_index;
- bo->u.slab.real = slab->buffer;
+ bo->base.alignment = entry_size;
+ bo->base.usage = slab->buffer->base.usage;
+ bo->base.size = entry_size;
+ bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
+ bo->rws = ws;
+ bo->va = slab->buffer->va + i * entry_size;
+ bo->initial_domain = domains;
+ bo->hash = base_hash + i;
+ bo->u.slab.entry.slab = &slab->base;
+ bo->u.slab.entry.group_index = group_index;
+ bo->u.slab.real = slab->buffer;
- list_addtail(&bo->u.slab.entry.head, &slab->base.free);
- }
+ list_addtail(&bo->u.slab.entry.head, &slab->base.free);
+ }
- return &slab->base;
+ return &slab->base;
fail_buffer:
- radeon_bo_reference(&slab->buffer, NULL);
+ radeon_bo_reference(&slab->buffer, NULL);
fail:
- FREE(slab);
- return NULL;
+ FREE(slab);
+ return NULL;
}
void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
{
- struct radeon_slab *slab = (struct radeon_slab *)pslab;
-
- for (unsigned i = 0; i < slab->base.num_entries; ++i) {
- struct radeon_bo *bo = &slab->entries[i];
- for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
- radeon_bo_reference(&bo->u.slab.fences[j], NULL);
- FREE(bo->u.slab.fences);
- }
-
- FREE(slab->entries);
- radeon_bo_reference(&slab->buffer, NULL);
- FREE(slab);
+ struct radeon_slab *slab = (struct radeon_slab *)pslab;
+
+ for (unsigned i = 0; i < slab->base.num_entries; ++i) {
+ struct radeon_bo *bo = &slab->entries[i];
+ for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
+ radeon_bo_reference(&bo->u.slab.fences[j], NULL);
+ FREE(bo->u.slab.fences);
+ }
+
+ FREE(slab->entries);
+ radeon_bo_reference(&slab->buffer, NULL);
+ FREE(slab);
}
static unsigned eg_tile_split(unsigned tile_split)
{
- switch (tile_split) {
- case 0: tile_split = 64; break;
- case 1: tile_split = 128; break;
- case 2: tile_split = 256; break;
- case 3: tile_split = 512; break;
- default:
- case 4: tile_split = 1024; break;
- case 5: tile_split = 2048; break;
- case 6: tile_split = 4096; break;
- }
- return tile_split;
+ switch (tile_split) {
+ case 0: tile_split = 64; break;
+ case 1: tile_split = 128; break;
+ case 2: tile_split = 256; break;
+ case 3: tile_split = 512; break;
+ default:
+ case 4: tile_split = 1024; break;
+ case 5: tile_split = 2048; break;
+ case 6: tile_split = 4096; break;
+ }
+ return tile_split;
}
static unsigned eg_tile_split_rev(unsigned eg_tile_split)
{
- switch (eg_tile_split) {
- case 64: return 0;
- case 128: return 1;
- case 256: return 2;
- case 512: return 3;
- default:
- case 1024: return 4;
- case 2048: return 5;
- case 4096: return 6;
- }
+ switch (eg_tile_split) {
+ case 64: return 0;
+ case 128: return 1;
+ case 256: return 2;
+ case 512: return 3;
+ default:
+ case 1024: return 4;
+ case 2048: return 5;
+ case 4096: return 6;
+ }
}
static void radeon_bo_get_metadata(struct pb_buffer *_buf,
- struct radeon_bo_metadata *md)
+ struct radeon_bo_metadata *md)
{
- struct radeon_bo *bo = radeon_bo(_buf);
- struct drm_radeon_gem_set_tiling args;
+ struct radeon_bo *bo = radeon_bo(_buf);
+ struct drm_radeon_gem_set_tiling args;
- assert(bo->handle && "must not be called for slab entries");
+ assert(bo->handle && "must not be called for slab entries");
- memset(&args, 0, sizeof(args));
+ memset(&args, 0, sizeof(args));
- args.handle = bo->handle;
+ args.handle = bo->handle;
- drmCommandWriteRead(bo->rws->fd,
- DRM_RADEON_GEM_GET_TILING,
- &args,
- sizeof(args));
+ drmCommandWriteRead(bo->rws->fd,
+ DRM_RADEON_GEM_GET_TILING,
+ &args,
+ sizeof(args));
- md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
- md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
- if (args.tiling_flags & RADEON_TILING_MICRO)
- md->u.legacy.microtile = RADEON_LAYOUT_TILED;
- else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
- md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
+ md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
+ md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
+ if (args.tiling_flags & RADEON_TILING_MICRO)
+ md->u.legacy.microtile = RADEON_LAYOUT_TILED;
+ else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+ md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
- if (args.tiling_flags & RADEON_TILING_MACRO)
- md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
+ if (args.tiling_flags & RADEON_TILING_MACRO)
+ md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
- md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
- md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
- md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
- md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
- md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
- md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
+ md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
+ md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
+ md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
+ md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
+ md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
+ md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
}
static void radeon_bo_set_metadata(struct pb_buffer *_buf,
struct radeon_bo_metadata *md)
{
- struct radeon_bo *bo = radeon_bo(_buf);
- struct drm_radeon_gem_set_tiling args;
+ struct radeon_bo *bo = radeon_bo(_buf);
+ struct drm_radeon_gem_set_tiling args;
- assert(bo->handle && "must not be called for slab entries");
+ assert(bo->handle && "must not be called for slab entries");
- memset(&args, 0, sizeof(args));
+ memset(&args, 0, sizeof(args));
- os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
+ os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
- if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
- args.tiling_flags |= RADEON_TILING_MICRO;
- else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
- args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
+ if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ args.tiling_flags |= RADEON_TILING_MICRO;
+ else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
+ args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
- if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
- args.tiling_flags |= RADEON_TILING_MACRO;
+ if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ args.tiling_flags |= RADEON_TILING_MACRO;
- args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
- RADEON_TILING_EG_BANKW_SHIFT;
- args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
- RADEON_TILING_EG_BANKH_SHIFT;
- if (md->u.legacy.tile_split) {
- args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
- RADEON_TILING_EG_TILE_SPLIT_MASK) <<
- RADEON_TILING_EG_TILE_SPLIT_SHIFT;
- }
- args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
- RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
+ args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
+ RADEON_TILING_EG_BANKW_SHIFT;
+ args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
+ RADEON_TILING_EG_BANKH_SHIFT;
+ if (md->u.legacy.tile_split) {
+ args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
+ RADEON_TILING_EG_TILE_SPLIT_MASK) <<
+ RADEON_TILING_EG_TILE_SPLIT_SHIFT;
+ }
+ args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
+ RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
- if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
- args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
+ if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
+ args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
- args.handle = bo->handle;
- args.pitch = md->u.legacy.stride;
+ args.handle = bo->handle;
+ args.pitch = md->u.legacy.stride;
- drmCommandWriteRead(bo->rws->fd,
- DRM_RADEON_GEM_SET_TILING,
- &args,
- sizeof(args));
+ drmCommandWriteRead(bo->rws->fd,
+ DRM_RADEON_GEM_SET_TILING,
+ &args,
+ sizeof(args));
}
static struct pb_buffer *
enum radeon_bo_domain domain,
enum radeon_bo_flag flags)
{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
- struct radeon_bo *bo;
- int heap = -1;
-
- assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
-
- /* Only 32-bit sizes are supported. */
- if (size > UINT_MAX)
- return NULL;
-
- /* VRAM implies WC. This is not optional. */
- if (domain & RADEON_DOMAIN_VRAM)
- flags |= RADEON_FLAG_GTT_WC;
- /* NO_CPU_ACCESS is valid with VRAM only. */
- if (domain != RADEON_DOMAIN_VRAM)
- flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
-
- /* Sub-allocate small buffers from slabs. */
- if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
- size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
- ws->info.r600_has_virtual_memory &&
- alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
- struct pb_slab_entry *entry;
- int heap = radeon_get_heap_index(domain, flags);
-
- if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
- goto no_slab;
-
- entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
- if (!entry) {
- /* Clear the cache and try again. */
- pb_cache_release_all_buffers(&ws->bo_cache);
-
- entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
- }
- if (!entry)
- return NULL;
-
- bo = NULL;
- bo = container_of(entry, bo, u.slab.entry);
-
- pipe_reference_init(&bo->base.reference, 1);
-
- return &bo->base;
- }
+ struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct radeon_bo *bo;
+ int heap = -1;
+
+ assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
+
+ /* Only 32-bit sizes are supported. */
+ if (size > UINT_MAX)
+ return NULL;
+
+ /* VRAM implies WC. This is not optional. */
+ if (domain & RADEON_DOMAIN_VRAM)
+ flags |= RADEON_FLAG_GTT_WC;
+ /* NO_CPU_ACCESS is valid with VRAM only. */
+ if (domain != RADEON_DOMAIN_VRAM)
+ flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
+
+ /* Sub-allocate small buffers from slabs. */
+ if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
+ size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
+ ws->info.r600_has_virtual_memory &&
+ alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
+ struct pb_slab_entry *entry;
+ int heap = radeon_get_heap_index(domain, flags);
+
+ if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
+ goto no_slab;
+
+ entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+ if (!entry) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&ws->bo_cache);
+
+ entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+ }
+ if (!entry)
+ return NULL;
+
+ bo = NULL;
+ bo = container_of(entry, bo, u.slab.entry);
+
+ pipe_reference_init(&bo->base.reference, 1);
+
+ return &bo->base;
+ }
no_slab:
- /* This flag is irrelevant for the cache. */
- flags &= ~RADEON_FLAG_NO_SUBALLOC;
-
- /* Align size to page size. This is the minimum alignment for normal
- * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
- * like constant/uniform buffers, can benefit from better and more reuse.
- */
- size = align(size, ws->info.gart_page_size);
- alignment = align(alignment, ws->info.gart_page_size);
-
- bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
-
- /* Shared resources don't use cached heaps. */
- if (use_reusable_pool) {
- heap = radeon_get_heap_index(domain, flags);
- assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
-
- bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
- 0, heap));
- if (bo)
- return &bo->base;
- }
-
- bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
- if (!bo) {
- /* Clear the cache and try again. */
- if (ws->info.r600_has_virtual_memory)
- pb_slabs_reclaim(&ws->bo_slabs);
- pb_cache_release_all_buffers(&ws->bo_cache);
- bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
- if (!bo)
- return NULL;
- }
-
- bo->u.real.use_reusable_pool = use_reusable_pool;
-
- mtx_lock(&ws->bo_handles_mutex);
- _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
- mtx_unlock(&ws->bo_handles_mutex);
-
- return &bo->base;
+ /* This flag is irrelevant for the cache. */
+ flags &= ~RADEON_FLAG_NO_SUBALLOC;
+
+ /* Align size to page size. This is the minimum alignment for normal
+ * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
+ * like constant/uniform buffers, can benefit from better and more reuse.
+ */
+ size = align(size, ws->info.gart_page_size);
+ alignment = align(alignment, ws->info.gart_page_size);
+
+ bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
+ /* Shared resources don't use cached heaps. */
+ if (use_reusable_pool) {
+ heap = radeon_get_heap_index(domain, flags);
+ assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
+
+ bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
+ 0, heap));
+ if (bo)
+ return &bo->base;
+ }
+
+ bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
+ if (!bo) {
+ /* Clear the cache and try again. */
+ if (ws->info.r600_has_virtual_memory)
+ pb_slabs_reclaim(&ws->bo_slabs);
+ pb_cache_release_all_buffers(&ws->bo_cache);
+ bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
+ if (!bo)
+ return NULL;
+ }
+
+ bo->u.real.use_reusable_pool = use_reusable_pool;
+
+ mtx_lock(&ws->bo_handles_mutex);
+ _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
+ mtx_unlock(&ws->bo_handles_mutex);
+
+ return &bo->base;
}
static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
void *pointer, uint64_t size)
{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
- struct drm_radeon_gem_userptr args;
- struct radeon_bo *bo;
- int r;
-
- bo = CALLOC_STRUCT(radeon_bo);
- if (!bo)
- return NULL;
-
- memset(&args, 0, sizeof(args));
- args.addr = (uintptr_t)pointer;
- args.size = align(size, ws->info.gart_page_size);
- args.flags = RADEON_GEM_USERPTR_ANONONLY |
- RADEON_GEM_USERPTR_VALIDATE |
- RADEON_GEM_USERPTR_REGISTER;
- if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
- &args, sizeof(args))) {
- FREE(bo);
- return NULL;
- }
-
- assert(args.handle != 0);
-
- mtx_lock(&ws->bo_handles_mutex);
-
- /* Initialize it. */
- pipe_reference_init(&bo->base.reference, 1);
- bo->handle = args.handle;
- bo->base.alignment = 0;
- bo->base.size = size;
- bo->base.vtbl = &radeon_bo_vtbl;
- bo->rws = ws;
- bo->user_ptr = pointer;
- bo->va = 0;
- bo->initial_domain = RADEON_DOMAIN_GTT;
- bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
- (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
-
- _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
-
- mtx_unlock(&ws->bo_handles_mutex);
-
- if (ws->info.r600_has_virtual_memory) {
- struct drm_radeon_gem_va va;
-
- bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
-
- va.handle = bo->handle;
- va.operation = RADEON_VA_MAP;
- va.vm_id = 0;
- va.offset = bo->va;
- va.flags = RADEON_VM_PAGE_READABLE |
- RADEON_VM_PAGE_WRITEABLE |
- RADEON_VM_PAGE_SNOOPED;
- va.offset = bo->va;
- r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
- if (r && va.operation == RADEON_VA_RESULT_ERROR) {
- fprintf(stderr, "radeon: Failed to assign virtual address space\n");
- radeon_bo_destroy(&bo->base);
- return NULL;
- }
- mtx_lock(&ws->bo_handles_mutex);
- if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
- struct pb_buffer *b = &bo->base;
- struct radeon_bo *old_bo =
- util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
-
- mtx_unlock(&ws->bo_handles_mutex);
- pb_reference(&b, &old_bo->base);
- return b;
- }
-
- _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
- mtx_unlock(&ws->bo_handles_mutex);
- }
-
- ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
-
- return (struct pb_buffer*)bo;
+ struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct drm_radeon_gem_userptr args;
+ struct radeon_bo *bo;
+ int r;
+
+ bo = CALLOC_STRUCT(radeon_bo);
+ if (!bo)
+ return NULL;
+
+ memset(&args, 0, sizeof(args));
+ args.addr = (uintptr_t)pointer;
+ args.size = align(size, ws->info.gart_page_size);
+ args.flags = RADEON_GEM_USERPTR_ANONONLY |
+ RADEON_GEM_USERPTR_VALIDATE |
+ RADEON_GEM_USERPTR_REGISTER;
+ if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
+ &args, sizeof(args))) {
+ FREE(bo);
+ return NULL;
+ }
+
+ assert(args.handle != 0);
+
+ mtx_lock(&ws->bo_handles_mutex);
+
+ /* Initialize it. */
+ pipe_reference_init(&bo->base.reference, 1);
+ bo->handle = args.handle;
+ bo->base.alignment = 0;
+ bo->base.size = size;
+ bo->base.vtbl = &radeon_bo_vtbl;
+ bo->rws = ws;
+ bo->user_ptr = pointer;
+ bo->va = 0;
+ bo->initial_domain = RADEON_DOMAIN_GTT;
+ bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
+ (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
+
+ _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
+
+ mtx_unlock(&ws->bo_handles_mutex);
+
+ if (ws->info.r600_has_virtual_memory) {
+ struct drm_radeon_gem_va va;
+
+ bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
+
+ va.handle = bo->handle;
+ va.operation = RADEON_VA_MAP;
+ va.vm_id = 0;
+ va.offset = bo->va;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+ r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+ if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to assign virtual address space\n");
+ radeon_bo_destroy(&bo->base);
+ return NULL;
+ }
+ mtx_lock(&ws->bo_handles_mutex);
+ if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+ struct pb_buffer *b = &bo->base;
+ struct radeon_bo *old_bo =
+ util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
+
+ mtx_unlock(&ws->bo_handles_mutex);
+ pb_reference(&b, &old_bo->base);
+ return b;
+ }
+
+ _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
+ mtx_unlock(&ws->bo_handles_mutex);
+ }
+
+ ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
+
+ return (struct pb_buffer*)bo;
}
static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
struct winsys_handle *whandle,
unsigned vm_alignment)
{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
- struct radeon_bo *bo;
- int r;
- unsigned handle;
- uint64_t size = 0;
-
- /* We must maintain a list of pairs <handle, bo>, so that we always return
- * the same BO for one particular handle. If we didn't do that and created
- * more than one BO for the same handle and then relocated them in a CS,
- * we would hit a deadlock in the kernel.
- *
- * The list of pairs is guarded by a mutex, of course. */
- mtx_lock(&ws->bo_handles_mutex);
-
- if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
- /* First check if there already is an existing bo for the handle. */
- bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
- } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
- /* We must first get the GEM handle, as fds are unreliable keys */
- r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
- if (r)
- goto fail;
- bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
- } else {
- /* Unknown handle type */
- goto fail;
- }
-
- if (bo) {
- /* Increase the refcount. */
- struct pb_buffer *b = NULL;
- pb_reference(&b, &bo->base);
- goto done;
- }
-
- /* There isn't, create a new one. */
- bo = CALLOC_STRUCT(radeon_bo);
- if (!bo) {
- goto fail;
- }
-
- if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
- struct drm_gem_open open_arg = {};
- memset(&open_arg, 0, sizeof(open_arg));
- /* Open the BO. */
- open_arg.name = whandle->handle;
- if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
- FREE(bo);
- goto fail;
- }
- handle = open_arg.handle;
- size = open_arg.size;
- bo->flink_name = whandle->handle;
- } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
- size = lseek(whandle->handle, 0, SEEK_END);
- /*
- * Could check errno to determine whether the kernel is new enough, but
- * it doesn't really matter why this failed, just that it failed.
- */
- if (size == (off_t)-1) {
- FREE(bo);
- goto fail;
- }
- lseek(whandle->handle, 0, SEEK_SET);
- }
-
- assert(handle != 0);
-
- bo->handle = handle;
-
- /* Initialize it. */
- pipe_reference_init(&bo->base.reference, 1);
- bo->base.alignment = 0;
- bo->base.size = (unsigned) size;
- bo->base.vtbl = &radeon_bo_vtbl;
- bo->rws = ws;
- bo->va = 0;
- bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
- (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
-
- if (bo->flink_name)
- _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
-
- _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
+ struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct radeon_bo *bo;
+ int r;
+ unsigned handle;
+ uint64_t size = 0;
+
+ /* We must maintain a list of pairs <handle, bo>, so that we always return
+ * the same BO for one particular handle. If we didn't do that and created
+ * more than one BO for the same handle and then relocated them in a CS,
+ * we would hit a deadlock in the kernel.
+ *
+ * The list of pairs is guarded by a mutex, of course. */
+ mtx_lock(&ws->bo_handles_mutex);
+
+ if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
+ /* First check if there already is an existing bo for the handle. */
+ bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
+ /* We must first get the GEM handle, as fds are unreliable keys */
+ r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
+ if (r)
+ goto fail;
+ bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
+ } else {
+ /* Unknown handle type */
+ goto fail;
+ }
+
+ if (bo) {
+ /* Increase the refcount. */
+ struct pb_buffer *b = NULL;
+ pb_reference(&b, &bo->base);
+ goto done;
+ }
+
+ /* There isn't, create a new one. */
+ bo = CALLOC_STRUCT(radeon_bo);
+ if (!bo) {
+ goto fail;
+ }
+
+ if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
+ struct drm_gem_open open_arg = {};
+ memset(&open_arg, 0, sizeof(open_arg));
+ /* Open the BO. */
+ open_arg.name = whandle->handle;
+ if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
+ FREE(bo);
+ goto fail;
+ }
+ handle = open_arg.handle;
+ size = open_arg.size;
+ bo->flink_name = whandle->handle;
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
+ size = lseek(whandle->handle, 0, SEEK_END);
+ /*
+ * Could check errno to determine whether the kernel is new enough, but
+ * it doesn't really matter why this failed, just that it failed.
+ */
+ if (size == (off_t)-1) {
+ FREE(bo);
+ goto fail;
+ }
+ lseek(whandle->handle, 0, SEEK_SET);
+ }
+
+ assert(handle != 0);
+
+ bo->handle = handle;
+
+ /* Initialize it. */
+ pipe_reference_init(&bo->base.reference, 1);
+ bo->base.alignment = 0;
+ bo->base.size = (unsigned) size;
+ bo->base.vtbl = &radeon_bo_vtbl;
+ bo->rws = ws;
+ bo->va = 0;
+ bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
+ (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
+
+ if (bo->flink_name)
+ _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
+
+ _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
done:
- mtx_unlock(&ws->bo_handles_mutex);
-
- if (ws->info.r600_has_virtual_memory && !bo->va) {
- struct drm_radeon_gem_va va;
-
- bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
-
- va.handle = bo->handle;
- va.operation = RADEON_VA_MAP;
- va.vm_id = 0;
- va.offset = bo->va;
- va.flags = RADEON_VM_PAGE_READABLE |
- RADEON_VM_PAGE_WRITEABLE |
- RADEON_VM_PAGE_SNOOPED;
- va.offset = bo->va;
- r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
- if (r && va.operation == RADEON_VA_RESULT_ERROR) {
- fprintf(stderr, "radeon: Failed to assign virtual address space\n");
- radeon_bo_destroy(&bo->base);
- return NULL;
- }
- mtx_lock(&ws->bo_handles_mutex);
- if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
- struct pb_buffer *b = &bo->base;
- struct radeon_bo *old_bo =
- util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
-
- mtx_unlock(&ws->bo_handles_mutex);
- pb_reference(&b, &old_bo->base);
- return b;
- }
-
- _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
- mtx_unlock(&ws->bo_handles_mutex);
- }
-
- bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
-
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
- else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
-
- return (struct pb_buffer*)bo;
+ mtx_unlock(&ws->bo_handles_mutex);
+
+ if (ws->info.r600_has_virtual_memory && !bo->va) {
+ struct drm_radeon_gem_va va;
+
+ bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
+
+ va.handle = bo->handle;
+ va.operation = RADEON_VA_MAP;
+ va.vm_id = 0;
+ va.offset = bo->va;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+ r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+ if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to assign virtual address space\n");
+ radeon_bo_destroy(&bo->base);
+ return NULL;
+ }
+ mtx_lock(&ws->bo_handles_mutex);
+ if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+ struct pb_buffer *b = &bo->base;
+ struct radeon_bo *old_bo =
+ util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
+
+ mtx_unlock(&ws->bo_handles_mutex);
+ pb_reference(&b, &old_bo->base);
+ return b;
+ }
+
+ _mesa_hash_table_insert(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
+ mtx_unlock(&ws->bo_handles_mutex);
+ }
+
+ bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
+
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+ ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
+
+ return (struct pb_buffer*)bo;
fail:
- mtx_unlock(&ws->bo_handles_mutex);
- return NULL;
+ mtx_unlock(&ws->bo_handles_mutex);
+ return NULL;
}
static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
struct pb_buffer *buffer,
struct winsys_handle *whandle)
{
- struct drm_gem_flink flink;
- struct radeon_bo *bo = radeon_bo(buffer);
- struct radeon_drm_winsys *ws = bo->rws;
+ struct drm_gem_flink flink;
+ struct radeon_bo *bo = radeon_bo(buffer);
+ struct radeon_drm_winsys *ws = bo->rws;
- /* Don't allow exports of slab entries. */
- if (!bo->handle)
- return false;
+ /* Don't allow exports of slab entries. */
+ if (!bo->handle)
+ return false;
- memset(&flink, 0, sizeof(flink));
+ memset(&flink, 0, sizeof(flink));
- bo->u.real.use_reusable_pool = false;
+ bo->u.real.use_reusable_pool = false;
- if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
- if (!bo->flink_name) {
- flink.handle = bo->handle;
+ if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
+ if (!bo->flink_name) {
+ flink.handle = bo->handle;
- if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
- return false;
- }
-
- bo->flink_name = flink.name;
-
- mtx_lock(&ws->bo_handles_mutex);
- _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
- mtx_unlock(&ws->bo_handles_mutex);
- }
- whandle->handle = bo->flink_name;
- } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
- whandle->handle = bo->handle;
- } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
- if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
+ if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
return false;
- }
-
- return true;
+ }
+
+ bo->flink_name = flink.name;
+
+ mtx_lock(&ws->bo_handles_mutex);
+ _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
+ mtx_unlock(&ws->bo_handles_mutex);
+ }
+ whandle->handle = bo->flink_name;
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
+ whandle->handle = bo->handle;
+ } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
+ if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
+ return false;
+ }
+
+ return true;
}
static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
{
- return ((struct radeon_bo*)buf)->va;
+ return ((struct radeon_bo*)buf)->va;
}
static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
{
- struct radeon_bo *bo = radeon_bo(buf);
+ struct radeon_bo *bo = radeon_bo(buf);
- if (bo->handle)
- return 0;
+ if (bo->handle)
+ return 0;
- return bo->va - bo->u.slab.real->va;
+ return bo->va - bo->u.slab.real->va;
}
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
{
- ws->base.buffer_set_metadata = radeon_bo_set_metadata;
- ws->base.buffer_get_metadata = radeon_bo_get_metadata;
- ws->base.buffer_map = radeon_bo_map;
- ws->base.buffer_unmap = radeon_bo_unmap;
- ws->base.buffer_wait = radeon_bo_wait;
- ws->base.buffer_create = radeon_winsys_bo_create;
- ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
- ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
- ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
- ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
- ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
- ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
- ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
- ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
+ ws->base.buffer_set_metadata = radeon_bo_set_metadata;
+ ws->base.buffer_get_metadata = radeon_bo_get_metadata;
+ ws->base.buffer_map = radeon_bo_map;
+ ws->base.buffer_unmap = radeon_bo_unmap;
+ ws->base.buffer_wait = radeon_bo_wait;
+ ws->base.buffer_create = radeon_winsys_bo_create;
+ ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
+ ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
+ ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
+ ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
+ ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+ ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
+ ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
+ ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
}
#include "pipebuffer/pb_slab.h"
struct radeon_bo {
- struct pb_buffer base;
- union {
- struct {
- struct pb_cache_entry cache_entry;
+ struct pb_buffer base;
+ union {
+ struct {
+ struct pb_cache_entry cache_entry;
- void *ptr;
- mtx_t map_mutex;
- unsigned map_count;
- bool use_reusable_pool;
- } real;
- struct {
- struct pb_slab_entry entry;
- struct radeon_bo *real;
+ void *ptr;
+ mtx_t map_mutex;
+ unsigned map_count;
+ bool use_reusable_pool;
+ } real;
+ struct {
+ struct pb_slab_entry entry;
+ struct radeon_bo *real;
- unsigned num_fences;
- unsigned max_fences;
- struct radeon_bo **fences;
- } slab;
- } u;
+ unsigned num_fences;
+ unsigned max_fences;
+ struct radeon_bo **fences;
+ } slab;
+ } u;
- struct radeon_drm_winsys *rws;
- void *user_ptr; /* from buffer_from_ptr */
+ struct radeon_drm_winsys *rws;
+ void *user_ptr; /* from buffer_from_ptr */
- uint32_t handle; /* 0 for slab entries */
- uint32_t flink_name;
- uint64_t va;
- uint32_t hash;
- enum radeon_bo_domain initial_domain;
+ uint32_t handle; /* 0 for slab entries */
+ uint32_t flink_name;
+ uint64_t va;
+ uint32_t hash;
+ enum radeon_bo_domain initial_domain;
- /* how many command streams is this bo referenced in? */
- int num_cs_references;
+ /* how many command streams is this bo referenced in? */
+ int num_cs_references;
- /* how many command streams, which are being emitted in a separate
- * thread, is this bo referenced in? */
- int num_active_ioctls;
+ /* how many command streams, which are being emitted in a separate
+ * thread, is this bo referenced in? */
+ int num_active_ioctls;
};
struct radeon_slab {
- struct pb_slab base;
- struct radeon_bo *buffer;
- struct radeon_bo *entries;
+ struct pb_slab base;
+ struct radeon_bo *buffer;
+ struct radeon_bo *entries;
};
void radeon_bo_destroy(struct pb_buffer *_buf);
static inline
void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
{
- pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
+ pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
}
void *radeon_bo_do_map(struct radeon_bo *bo);
#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
-static struct pipe_fence_handle *
-radeon_cs_create_fence(struct radeon_cmdbuf *rcs);
+static struct pipe_fence_handle *radeon_cs_create_fence(struct radeon_cmdbuf *rcs);
static void radeon_fence_reference(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src);
static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
{
- struct radeon_ctx *ctx = CALLOC_STRUCT(radeon_ctx);
- if (!ctx)
- return NULL;
+ struct radeon_ctx *ctx = CALLOC_STRUCT(radeon_ctx);
+ if (!ctx)
+ return NULL;
- ctx->ws = (struct radeon_drm_winsys*)ws;
- ctx->gpu_reset_counter = radeon_drm_get_gpu_reset_counter(ctx->ws);
- return (struct radeon_winsys_ctx*)ctx;
+ ctx->ws = (struct radeon_drm_winsys*)ws;
+ ctx->gpu_reset_counter = radeon_drm_get_gpu_reset_counter(ctx->ws);
+ return (struct radeon_winsys_ctx*)ctx;
}
static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
{
- FREE(ctx);
+ FREE(ctx);
}
static enum pipe_reset_status
radeon_drm_ctx_query_reset_status(struct radeon_winsys_ctx *rctx)
{
- struct radeon_ctx *ctx = (struct radeon_ctx*)rctx;
+ struct radeon_ctx *ctx = (struct radeon_ctx*)rctx;
- unsigned latest = radeon_drm_get_gpu_reset_counter(ctx->ws);
+ unsigned latest = radeon_drm_get_gpu_reset_counter(ctx->ws);
- if (ctx->gpu_reset_counter == latest)
- return PIPE_NO_RESET;
+ if (ctx->gpu_reset_counter == latest)
+ return PIPE_NO_RESET;
- ctx->gpu_reset_counter = latest;
- return PIPE_UNKNOWN_CONTEXT_RESET;
+ ctx->gpu_reset_counter = latest;
+ return PIPE_UNKNOWN_CONTEXT_RESET;
}
static bool radeon_init_cs_context(struct radeon_cs_context *csc,
struct radeon_drm_winsys *ws)
{
- int i;
-
- csc->fd = ws->fd;
-
- csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
- csc->chunks[0].length_dw = 0;
- csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
- csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
- csc->chunks[1].length_dw = 0;
- csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
- csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
- csc->chunks[2].length_dw = 2;
- csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
-
- csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
- csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
- csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
-
- csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
-
- for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
- csc->reloc_indices_hashlist[i] = -1;
- }
- return true;
+ int i;
+
+ csc->fd = ws->fd;
+
+ csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
+ csc->chunks[0].length_dw = 0;
+ csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
+ csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
+ csc->chunks[1].length_dw = 0;
+ csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
+ csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
+ csc->chunks[2].length_dw = 2;
+ csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
+
+ csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
+ csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
+ csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
+
+ csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
+
+ for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
+ csc->reloc_indices_hashlist[i] = -1;
+ }
+ return true;
}
static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
{
- unsigned i;
-
- for (i = 0; i < csc->num_relocs; i++) {
- p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
- radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
- }
- for (i = 0; i < csc->num_slab_buffers; ++i) {
- p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references);
- radeon_bo_reference(&csc->slab_buffers[i].bo, NULL);
- }
-
- csc->num_relocs = 0;
- csc->num_validated_relocs = 0;
- csc->num_slab_buffers = 0;
- csc->chunks[0].length_dw = 0;
- csc->chunks[1].length_dw = 0;
-
- for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
- csc->reloc_indices_hashlist[i] = -1;
- }
+ unsigned i;
+
+ for (i = 0; i < csc->num_relocs; i++) {
+ p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
+ radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
+ }
+ for (i = 0; i < csc->num_slab_buffers; ++i) {
+ p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references);
+ radeon_bo_reference(&csc->slab_buffers[i].bo, NULL);
+ }
+
+ csc->num_relocs = 0;
+ csc->num_validated_relocs = 0;
+ csc->num_slab_buffers = 0;
+ csc->chunks[0].length_dw = 0;
+ csc->chunks[1].length_dw = 0;
+
+ for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
+ csc->reloc_indices_hashlist[i] = -1;
+ }
}
static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
{
- radeon_cs_context_cleanup(csc);
- FREE(csc->slab_buffers);
- FREE(csc->relocs_bo);
- FREE(csc->relocs);
+ radeon_cs_context_cleanup(csc);
+ FREE(csc->slab_buffers);
+ FREE(csc->relocs_bo);
+ FREE(csc->relocs);
}
void *flush_ctx,
bool stop_exec_on_failure)
{
- struct radeon_drm_winsys *ws = ((struct radeon_ctx*)ctx)->ws;
- struct radeon_drm_cs *cs;
-
- cs = CALLOC_STRUCT(radeon_drm_cs);
- if (!cs) {
- return NULL;
- }
- util_queue_fence_init(&cs->flush_completed);
-
- cs->ws = ws;
- cs->flush_cs = flush;
- cs->flush_data = flush_ctx;
-
- if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
- FREE(cs);
- return NULL;
- }
- if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
- radeon_destroy_cs_context(&cs->csc1);
- FREE(cs);
- return NULL;
- }
-
- /* Set the first command buffer as current. */
- cs->csc = &cs->csc1;
- cs->cst = &cs->csc2;
- cs->base.current.buf = cs->csc->buf;
- cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
- cs->ring_type = ring_type;
-
- p_atomic_inc(&ws->num_cs);
- return &cs->base;
+ struct radeon_drm_winsys *ws = ((struct radeon_ctx*)ctx)->ws;
+ struct radeon_drm_cs *cs;
+
+ cs = CALLOC_STRUCT(radeon_drm_cs);
+ if (!cs) {
+ return NULL;
+ }
+ util_queue_fence_init(&cs->flush_completed);
+
+ cs->ws = ws;
+ cs->flush_cs = flush;
+ cs->flush_data = flush_ctx;
+
+ if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
+ FREE(cs);
+ return NULL;
+ }
+ if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
+ radeon_destroy_cs_context(&cs->csc1);
+ FREE(cs);
+ return NULL;
+ }
+
+ /* Set the first command buffer as current. */
+ cs->csc = &cs->csc1;
+ cs->cst = &cs->csc2;
+ cs->base.current.buf = cs->csc->buf;
+ cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
+ cs->ring_type = ring_type;
+
+ p_atomic_inc(&ws->num_cs);
+ return &cs->base;
}
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
{
- unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
- struct radeon_bo_item *buffers;
- unsigned num_buffers;
- int i = csc->reloc_indices_hashlist[hash];
-
- if (bo->handle) {
- buffers = csc->relocs_bo;
- num_buffers = csc->num_relocs;
- } else {
- buffers = csc->slab_buffers;
- num_buffers = csc->num_slab_buffers;
- }
-
- /* not found or found */
- if (i == -1 || (i < num_buffers && buffers[i].bo == bo))
- return i;
-
- /* Hash collision, look for the BO in the list of relocs linearly. */
- for (i = num_buffers - 1; i >= 0; i--) {
- if (buffers[i].bo == bo) {
- /* Put this reloc in the hash list.
- * This will prevent additional hash collisions if there are
- * several consecutive lookup_buffer calls for the same buffer.
- *
- * Example: Assuming buffers A,B,C collide in the hash list,
- * the following sequence of relocs:
- * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
- * will collide here: ^ and here: ^,
- * meaning that we should get very few collisions in the end. */
- csc->reloc_indices_hashlist[hash] = i;
- return i;
- }
- }
- return -1;
+ unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ struct radeon_bo_item *buffers;
+ unsigned num_buffers;
+ int i = csc->reloc_indices_hashlist[hash];
+
+ if (bo->handle) {
+ buffers = csc->relocs_bo;
+ num_buffers = csc->num_relocs;
+ } else {
+ buffers = csc->slab_buffers;
+ num_buffers = csc->num_slab_buffers;
+ }
+
+ /* not found or found */
+ if (i == -1 || (i < num_buffers && buffers[i].bo == bo))
+ return i;
+
+ /* Hash collision, look for the BO in the list of relocs linearly. */
+ for (i = num_buffers - 1; i >= 0; i--) {
+ if (buffers[i].bo == bo) {
+ /* Put this reloc in the hash list.
+ * This will prevent additional hash collisions if there are
+ * several consecutive lookup_buffer calls for the same buffer.
+ *
+ * Example: Assuming buffers A,B,C collide in the hash list,
+ * the following sequence of relocs:
+ * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
+ * will collide here: ^ and here: ^,
+ * meaning that we should get very few collisions in the end. */
+ csc->reloc_indices_hashlist[hash] = i;
+ return i;
+ }
+ }
+ return -1;
}
static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs,
struct radeon_bo *bo)
{
- struct radeon_cs_context *csc = cs->csc;
- struct drm_radeon_cs_reloc *reloc;
- unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
- int i = -1;
-
- i = radeon_lookup_buffer(csc, bo);
-
- if (i >= 0) {
- /* For async DMA, every add_buffer call must add a buffer to the list
- * no matter how many duplicates there are. This is due to the fact
- * the DMA CS checker doesn't use NOP packets for offset patching,
- * but always uses the i-th buffer from the list to patch the i-th
- * offset. If there are N offsets in a DMA CS, there must also be N
- * buffers in the relocation list.
- *
- * This doesn't have to be done if virtual memory is enabled,
- * because there is no offset patching with virtual memory.
- */
- if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) {
- return i;
- }
- }
-
- /* New relocation, check if the backing array is large enough. */
- if (csc->num_relocs >= csc->max_relocs) {
- uint32_t size;
- csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3));
-
- size = csc->max_relocs * sizeof(csc->relocs_bo[0]);
- csc->relocs_bo = realloc(csc->relocs_bo, size);
-
- size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
- csc->relocs = realloc(csc->relocs, size);
-
- csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
- }
-
- /* Initialize the new relocation. */
- csc->relocs_bo[csc->num_relocs].bo = NULL;
- csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0;
- radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
- p_atomic_inc(&bo->num_cs_references);
- reloc = &csc->relocs[csc->num_relocs];
- reloc->handle = bo->handle;
- reloc->read_domains = 0;
- reloc->write_domain = 0;
- reloc->flags = 0;
-
- csc->reloc_indices_hashlist[hash] = csc->num_relocs;
-
- csc->chunks[1].length_dw += RELOC_DWORDS;
-
- return csc->num_relocs++;
+ struct radeon_cs_context *csc = cs->csc;
+ struct drm_radeon_cs_reloc *reloc;
+ unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ int i = -1;
+
+ i = radeon_lookup_buffer(csc, bo);
+
+ if (i >= 0) {
+ /* For async DMA, every add_buffer call must add a buffer to the list
+ * no matter how many duplicates there are. This is due to the fact
+ * the DMA CS checker doesn't use NOP packets for offset patching,
+ * but always uses the i-th buffer from the list to patch the i-th
+ * offset. If there are N offsets in a DMA CS, there must also be N
+ * buffers in the relocation list.
+ *
+ * This doesn't have to be done if virtual memory is enabled,
+ * because there is no offset patching with virtual memory.
+ */
+ if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) {
+ return i;
+ }
+ }
+
+ /* New relocation, check if the backing array is large enough. */
+ if (csc->num_relocs >= csc->max_relocs) {
+ uint32_t size;
+ csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3));
+
+ size = csc->max_relocs * sizeof(csc->relocs_bo[0]);
+ csc->relocs_bo = realloc(csc->relocs_bo, size);
+
+ size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
+ csc->relocs = realloc(csc->relocs, size);
+
+ csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
+ }
+
+ /* Initialize the new relocation. */
+ csc->relocs_bo[csc->num_relocs].bo = NULL;
+ csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0;
+ radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
+ p_atomic_inc(&bo->num_cs_references);
+ reloc = &csc->relocs[csc->num_relocs];
+ reloc->handle = bo->handle;
+ reloc->read_domains = 0;
+ reloc->write_domain = 0;
+ reloc->flags = 0;
+
+ csc->reloc_indices_hashlist[hash] = csc->num_relocs;
+
+ csc->chunks[1].length_dw += RELOC_DWORDS;
+
+ return csc->num_relocs++;
}
static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs,
struct radeon_bo *bo)
{
- struct radeon_cs_context *csc = cs->csc;
- unsigned hash;
- struct radeon_bo_item *item;
- int idx;
- int real_idx;
-
- idx = radeon_lookup_buffer(csc, bo);
- if (idx >= 0)
- return idx;
-
- real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real);
-
- /* Check if the backing array is large enough. */
- if (csc->num_slab_buffers >= csc->max_slab_buffers) {
- unsigned new_max = MAX2(csc->max_slab_buffers + 16,
- (unsigned)(csc->max_slab_buffers * 1.3));
- struct radeon_bo_item *new_buffers =
+ struct radeon_cs_context *csc = cs->csc;
+ unsigned hash;
+ struct radeon_bo_item *item;
+ int idx;
+ int real_idx;
+
+ idx = radeon_lookup_buffer(csc, bo);
+ if (idx >= 0)
+ return idx;
+
+ real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real);
+
+ /* Check if the backing array is large enough. */
+ if (csc->num_slab_buffers >= csc->max_slab_buffers) {
+ unsigned new_max = MAX2(csc->max_slab_buffers + 16,
+ (unsigned)(csc->max_slab_buffers * 1.3));
+ struct radeon_bo_item *new_buffers =
REALLOC(csc->slab_buffers,
csc->max_slab_buffers * sizeof(*new_buffers),
new_max * sizeof(*new_buffers));
- if (!new_buffers) {
- fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n");
- return -1;
- }
+ if (!new_buffers) {
+ fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n");
+ return -1;
+ }
- csc->max_slab_buffers = new_max;
- csc->slab_buffers = new_buffers;
- }
+ csc->max_slab_buffers = new_max;
+ csc->slab_buffers = new_buffers;
+ }
- /* Initialize the new relocation. */
- idx = csc->num_slab_buffers++;
- item = &csc->slab_buffers[idx];
+ /* Initialize the new relocation. */
+ idx = csc->num_slab_buffers++;
+ item = &csc->slab_buffers[idx];
- item->bo = NULL;
- item->u.slab.real_idx = real_idx;
- radeon_bo_reference(&item->bo, bo);
- p_atomic_inc(&bo->num_cs_references);
+ item->bo = NULL;
+ item->u.slab.real_idx = real_idx;
+ radeon_bo_reference(&item->bo, bo);
+ p_atomic_inc(&bo->num_cs_references);
- hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
- csc->reloc_indices_hashlist[hash] = idx;
+ hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ csc->reloc_indices_hashlist[hash] = idx;
- return idx;
+ return idx;
}
static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
- struct pb_buffer *buf,
- enum radeon_bo_usage usage,
- enum radeon_bo_domain domains,
- enum radeon_bo_priority priority)
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domains,
+ enum radeon_bo_priority priority)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct radeon_bo *bo = (struct radeon_bo*)buf;
- enum radeon_bo_domain added_domains;
-
- /* If VRAM is just stolen system memory, allow both VRAM and
- * GTT, whichever has free space. If a buffer is evicted from
- * VRAM to GTT, it will stay there.
- */
- if (!cs->ws->info.has_dedicated_vram)
- domains |= RADEON_DOMAIN_GTT;
-
- enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
- enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
- struct drm_radeon_cs_reloc *reloc;
- int index;
-
- if (!bo->handle) {
- index = radeon_lookup_or_add_slab_buffer(cs, bo);
- if (index < 0)
- return 0;
-
- index = cs->csc->slab_buffers[index].u.slab.real_idx;
- } else {
- index = radeon_lookup_or_add_real_buffer(cs, bo);
- }
-
- reloc = &cs->csc->relocs[index];
- added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
- reloc->read_domains |= rd;
- reloc->write_domain |= wd;
- reloc->flags = MAX2(reloc->flags, priority);
- cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
-
- if (added_domains & RADEON_DOMAIN_VRAM)
- cs->base.used_vram += bo->base.size;
- else if (added_domains & RADEON_DOMAIN_GTT)
- cs->base.used_gart += bo->base.size;
-
- return index;
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_bo *bo = (struct radeon_bo*)buf;
+ enum radeon_bo_domain added_domains;
+
+ /* If VRAM is just stolen system memory, allow both VRAM and
+ * GTT, whichever has free space. If a buffer is evicted from
+ * VRAM to GTT, it will stay there.
+ */
+ if (!cs->ws->info.has_dedicated_vram)
+ domains |= RADEON_DOMAIN_GTT;
+
+ enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
+ enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
+ struct drm_radeon_cs_reloc *reloc;
+ int index;
+
+ if (!bo->handle) {
+ index = radeon_lookup_or_add_slab_buffer(cs, bo);
+ if (index < 0)
+ return 0;
+
+ index = cs->csc->slab_buffers[index].u.slab.real_idx;
+ } else {
+ index = radeon_lookup_or_add_real_buffer(cs, bo);
+ }
+
+ reloc = &cs->csc->relocs[index];
+ added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+ reloc->read_domains |= rd;
+ reloc->write_domain |= wd;
+ reloc->flags = MAX2(reloc->flags, priority);
+ cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
+
+ if (added_domains & RADEON_DOMAIN_VRAM)
+ cs->base.used_vram += bo->base.size;
+ else if (added_domains & RADEON_DOMAIN_GTT)
+ cs->base.used_gart += bo->base.size;
+
+ return index;
}
static int radeon_drm_cs_lookup_buffer(struct radeon_cmdbuf *rcs,
- struct pb_buffer *buf)
+ struct pb_buffer *buf)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
+ return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
}
static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- bool status =
- cs->base.used_gart < cs->ws->info.gart_size * 0.8 &&
- cs->base.used_vram < cs->ws->info.vram_size * 0.8;
-
- if (status) {
- cs->csc->num_validated_relocs = cs->csc->num_relocs;
- } else {
- /* Remove lately-added buffers. The validation failed with them
- * and the CS is about to be flushed because of that. Keep only
- * the already-validated buffers. */
- unsigned i;
-
- for (i = cs->csc->num_validated_relocs; i < cs->csc->num_relocs; i++) {
- p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
- radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
- }
- cs->csc->num_relocs = cs->csc->num_validated_relocs;
-
- /* Flush if there are any relocs. Clean up otherwise. */
- if (cs->csc->num_relocs) {
- cs->flush_cs(cs->flush_data,
- RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
- } else {
- radeon_cs_context_cleanup(cs->csc);
- cs->base.used_vram = 0;
- cs->base.used_gart = 0;
-
- assert(cs->base.current.cdw == 0);
- if (cs->base.current.cdw != 0) {
- fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
- }
- }
- }
- return status;
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ bool status =
+ cs->base.used_gart < cs->ws->info.gart_size * 0.8 &&
+ cs->base.used_vram < cs->ws->info.vram_size * 0.8;
+
+ if (status) {
+ cs->csc->num_validated_relocs = cs->csc->num_relocs;
+ } else {
+ /* Remove lately-added buffers. The validation failed with them
+ * and the CS is about to be flushed because of that. Keep only
+ * the already-validated buffers. */
+ unsigned i;
+
+ for (i = cs->csc->num_validated_relocs; i < cs->csc->num_relocs; i++) {
+ p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
+ radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
+ }
+ cs->csc->num_relocs = cs->csc->num_validated_relocs;
+
+ /* Flush if there are any relocs. Clean up otherwise. */
+ if (cs->csc->num_relocs) {
+ cs->flush_cs(cs->flush_data,
+ RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
+ } else {
+ radeon_cs_context_cleanup(cs->csc);
+ cs->base.used_vram = 0;
+ cs->base.used_gart = 0;
+
+ assert(cs->base.current.cdw == 0);
+ if (cs->base.current.cdw != 0) {
+ fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
+ }
+ }
+ }
+ return status;
}
static bool radeon_drm_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw,
static unsigned radeon_drm_cs_get_buffer_list(struct radeon_cmdbuf *rcs,
struct radeon_bo_list_item *list)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- int i;
-
- if (list) {
- for (i = 0; i < cs->csc->num_relocs; i++) {
- list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
- list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
- list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage;
- }
- }
- return cs->csc->num_relocs;
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ int i;
+
+ if (list) {
+ for (i = 0; i < cs->csc->num_relocs; i++) {
+ list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
+ list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
+ list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage;
+ }
+ }
+ return cs->csc->num_relocs;
}
void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
{
- struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
- unsigned i;
- int r;
-
- r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
- &csc->cs, sizeof(struct drm_radeon_cs));
- if (r) {
- if (r == -ENOMEM)
- fprintf(stderr, "radeon: Not enough memory for command submission.\n");
- else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
- unsigned i;
-
- fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
- for (i = 0; i < csc->chunks[0].length_dw; i++) {
- fprintf(stderr, "0x%08X\n", csc->buf[i]);
- }
- } else {
- fprintf(stderr, "radeon: The kernel rejected CS, "
- "see dmesg for more information (%i).\n", r);
- }
- }
-
- for (i = 0; i < csc->num_relocs; i++)
- p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
- for (i = 0; i < csc->num_slab_buffers; i++)
- p_atomic_dec(&csc->slab_buffers[i].bo->num_active_ioctls);
-
- radeon_cs_context_cleanup(csc);
+ struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
+ unsigned i;
+ int r;
+
+ r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
+ &csc->cs, sizeof(struct drm_radeon_cs));
+ if (r) {
+ if (r == -ENOMEM)
+ fprintf(stderr, "radeon: Not enough memory for command submission.\n");
+ else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
+ unsigned i;
+
+ fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
+ for (i = 0; i < csc->chunks[0].length_dw; i++) {
+ fprintf(stderr, "0x%08X\n", csc->buf[i]);
+ }
+ } else {
+ fprintf(stderr, "radeon: The kernel rejected CS, "
+ "see dmesg for more information (%i).\n", r);
+ }
+ }
+
+ for (i = 0; i < csc->num_relocs; i++)
+ p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
+ for (i = 0; i < csc->num_slab_buffers; i++)
+ p_atomic_dec(&csc->slab_buffers[i].bo->num_active_ioctls);
+
+ radeon_cs_context_cleanup(csc);
}
/*
*/
void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- /* Wait for any pending ioctl of this CS to complete. */
- if (util_queue_is_initialized(&cs->ws->cs_queue))
- util_queue_fence_wait(&cs->flush_completed);
+ /* Wait for any pending ioctl of this CS to complete. */
+ if (util_queue_is_initialized(&cs->ws->cs_queue))
+ util_queue_fence_wait(&cs->flush_completed);
}
/* Add the given fence to a slab buffer fence list.
*/
static void radeon_bo_slab_fence(struct radeon_bo *bo, struct radeon_bo *fence)
{
- unsigned dst;
-
- assert(fence->num_cs_references);
-
- /* Cleanup older fences */
- dst = 0;
- for (unsigned src = 0; src < bo->u.slab.num_fences; ++src) {
- if (bo->u.slab.fences[src]->num_cs_references) {
- bo->u.slab.fences[dst] = bo->u.slab.fences[src];
- dst++;
- } else {
- radeon_bo_reference(&bo->u.slab.fences[src], NULL);
- }
- }
- bo->u.slab.num_fences = dst;
-
- /* Check available space for the new fence */
- if (bo->u.slab.num_fences >= bo->u.slab.max_fences) {
- unsigned new_max_fences = bo->u.slab.max_fences + 1;
- struct radeon_bo **new_fences = REALLOC(bo->u.slab.fences,
- bo->u.slab.max_fences * sizeof(*new_fences),
- new_max_fences * sizeof(*new_fences));
- if (!new_fences) {
- fprintf(stderr, "radeon_bo_slab_fence: allocation failure, dropping fence\n");
- return;
- }
-
- bo->u.slab.fences = new_fences;
- bo->u.slab.max_fences = new_max_fences;
- }
-
- /* Add the new fence */
- bo->u.slab.fences[bo->u.slab.num_fences] = NULL;
- radeon_bo_reference(&bo->u.slab.fences[bo->u.slab.num_fences], fence);
- bo->u.slab.num_fences++;
+ unsigned dst;
+
+ assert(fence->num_cs_references);
+
+ /* Cleanup older fences */
+ dst = 0;
+ for (unsigned src = 0; src < bo->u.slab.num_fences; ++src) {
+ if (bo->u.slab.fences[src]->num_cs_references) {
+ bo->u.slab.fences[dst] = bo->u.slab.fences[src];
+ dst++;
+ } else {
+ radeon_bo_reference(&bo->u.slab.fences[src], NULL);
+ }
+ }
+ bo->u.slab.num_fences = dst;
+
+ /* Check available space for the new fence */
+ if (bo->u.slab.num_fences >= bo->u.slab.max_fences) {
+ unsigned new_max_fences = bo->u.slab.max_fences + 1;
+ struct radeon_bo **new_fences = REALLOC(bo->u.slab.fences,
+ bo->u.slab.max_fences * sizeof(*new_fences),
+ new_max_fences * sizeof(*new_fences));
+ if (!new_fences) {
+ fprintf(stderr, "radeon_bo_slab_fence: allocation failure, dropping fence\n");
+ return;
+ }
+
+ bo->u.slab.fences = new_fences;
+ bo->u.slab.max_fences = new_max_fences;
+ }
+
+ /* Add the new fence */
+ bo->u.slab.fences[bo->u.slab.num_fences] = NULL;
+ radeon_bo_reference(&bo->u.slab.fences[bo->u.slab.num_fences], fence);
+ bo->u.slab.num_fences++;
}
DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
unsigned flags,
struct pipe_fence_handle **pfence)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct radeon_cs_context *tmp;
-
- switch (cs->ring_type) {
- case RING_DMA:
- /* pad DMA ring to 8 DWs */
- if (cs->ws->info.chip_class <= GFX6) {
- while (rcs->current.cdw & 7)
- radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
- } else {
- while (rcs->current.cdw & 7)
- radeon_emit(&cs->base, 0x00000000); /* NOP packet */
- }
- break;
- case RING_GFX:
- /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
- * r6xx, requires at least 4 dw alignment to avoid a hw bug.
- */
- if (cs->ws->info.gfx_ib_pad_with_type2) {
- while (rcs->current.cdw & 7)
- radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
- } else {
- while (rcs->current.cdw & 7)
- radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
- }
- break;
- case RING_UVD:
- while (rcs->current.cdw & 15)
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_cs_context *tmp;
+
+ switch (cs->ring_type) {
+ case RING_DMA:
+ /* pad DMA ring to 8 DWs */
+ if (cs->ws->info.chip_class <= GFX6) {
+ while (rcs->current.cdw & 7)
+ radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
+ } else {
+ while (rcs->current.cdw & 7)
+ radeon_emit(&cs->base, 0x00000000); /* NOP packet */
+ }
+ break;
+ case RING_GFX:
+ /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
+ * r6xx, requires at least 4 dw alignment to avoid a hw bug.
+ */
+ if (cs->ws->info.gfx_ib_pad_with_type2) {
+ while (rcs->current.cdw & 7)
radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
- break;
- default:
- break;
- }
-
- if (rcs->current.cdw > rcs->current.max_dw) {
- fprintf(stderr, "radeon: command stream overflowed\n");
- }
-
- if (pfence || cs->csc->num_slab_buffers) {
- struct pipe_fence_handle *fence;
-
- if (cs->next_fence) {
- fence = cs->next_fence;
- cs->next_fence = NULL;
- } else {
- fence = radeon_cs_create_fence(rcs);
- }
-
- if (fence) {
- if (pfence)
- radeon_fence_reference(pfence, fence);
-
- mtx_lock(&cs->ws->bo_fence_lock);
- for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) {
- struct radeon_bo *bo = cs->csc->slab_buffers[i].bo;
- p_atomic_inc(&bo->num_active_ioctls);
- radeon_bo_slab_fence(bo, (struct radeon_bo *)fence);
- }
- mtx_unlock(&cs->ws->bo_fence_lock);
-
- radeon_fence_reference(&fence, NULL);
- }
- } else {
- radeon_fence_reference(&cs->next_fence, NULL);
- }
-
- radeon_drm_cs_sync_flush(rcs);
-
- /* Swap command streams. */
- tmp = cs->csc;
- cs->csc = cs->cst;
- cs->cst = tmp;
-
- /* If the CS is not empty or overflowed, emit it in a separate thread. */
- if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
- unsigned i, num_relocs;
-
- num_relocs = cs->cst->num_relocs;
-
- cs->cst->chunks[0].length_dw = cs->base.current.cdw;
-
- for (i = 0; i < num_relocs; i++) {
- /* Update the number of active asynchronous CS ioctls for the buffer. */
- p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
- }
-
- switch (cs->ring_type) {
- case RING_DMA:
- cs->cst->flags[0] = 0;
- cs->cst->flags[1] = RADEON_CS_RING_DMA;
- cs->cst->cs.num_chunks = 3;
- if (cs->ws->info.r600_has_virtual_memory) {
- cs->cst->flags[0] |= RADEON_CS_USE_VM;
- }
- break;
-
- case RING_UVD:
- cs->cst->flags[0] = 0;
- cs->cst->flags[1] = RADEON_CS_RING_UVD;
- cs->cst->cs.num_chunks = 3;
- break;
+ } else {
+ while (rcs->current.cdw & 7)
+ radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
+ }
+ break;
+ case RING_UVD:
+ while (rcs->current.cdw & 15)
+ radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
+ break;
+ default:
+ break;
+ }
- case RING_VCE:
- cs->cst->flags[0] = 0;
- cs->cst->flags[1] = RADEON_CS_RING_VCE;
- cs->cst->cs.num_chunks = 3;
- break;
+ if (rcs->current.cdw > rcs->current.max_dw) {
+ fprintf(stderr, "radeon: command stream overflowed\n");
+ }
+
+ if (pfence || cs->csc->num_slab_buffers) {
+ struct pipe_fence_handle *fence;
+
+ if (cs->next_fence) {
+ fence = cs->next_fence;
+ cs->next_fence = NULL;
+ } else {
+ fence = radeon_cs_create_fence(rcs);
+ }
+
+ if (fence) {
+ if (pfence)
+ radeon_fence_reference(pfence, fence);
+
+ mtx_lock(&cs->ws->bo_fence_lock);
+ for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) {
+ struct radeon_bo *bo = cs->csc->slab_buffers[i].bo;
+ p_atomic_inc(&bo->num_active_ioctls);
+ radeon_bo_slab_fence(bo, (struct radeon_bo *)fence);
+ }
+ mtx_unlock(&cs->ws->bo_fence_lock);
+
+ radeon_fence_reference(&fence, NULL);
+ }
+ } else {
+ radeon_fence_reference(&cs->next_fence, NULL);
+ }
- default:
- case RING_GFX:
- case RING_COMPUTE:
- cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
- cs->cst->flags[1] = RADEON_CS_RING_GFX;
+ radeon_drm_cs_sync_flush(rcs);
+
+ /* Swap command streams. */
+ tmp = cs->csc;
+ cs->csc = cs->cst;
+ cs->cst = tmp;
+
+ /* If the CS is not empty or overflowed, emit it in a separate thread. */
+ if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
+ unsigned i, num_relocs;
+
+ num_relocs = cs->cst->num_relocs;
+
+ cs->cst->chunks[0].length_dw = cs->base.current.cdw;
+
+ for (i = 0; i < num_relocs; i++) {
+ /* Update the number of active asynchronous CS ioctls for the buffer. */
+ p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
+ }
+
+ switch (cs->ring_type) {
+ case RING_DMA:
+ cs->cst->flags[0] = 0;
+ cs->cst->flags[1] = RADEON_CS_RING_DMA;
+ cs->cst->cs.num_chunks = 3;
+ if (cs->ws->info.r600_has_virtual_memory) {
+ cs->cst->flags[0] |= RADEON_CS_USE_VM;
+ }
+ break;
+
+ case RING_UVD:
+ cs->cst->flags[0] = 0;
+ cs->cst->flags[1] = RADEON_CS_RING_UVD;
+ cs->cst->cs.num_chunks = 3;
+ break;
+
+ case RING_VCE:
+ cs->cst->flags[0] = 0;
+ cs->cst->flags[1] = RADEON_CS_RING_VCE;
+ cs->cst->cs.num_chunks = 3;
+ break;
+
+ default:
+ case RING_GFX:
+ case RING_COMPUTE:
+ cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
+ cs->cst->flags[1] = RADEON_CS_RING_GFX;
+ cs->cst->cs.num_chunks = 3;
+
+ if (cs->ws->info.r600_has_virtual_memory) {
+ cs->cst->flags[0] |= RADEON_CS_USE_VM;
+ cs->cst->cs.num_chunks = 3;
+ }
+ if (flags & PIPE_FLUSH_END_OF_FRAME) {
+ cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
+ cs->cst->cs.num_chunks = 3;
+ }
+ if (cs->ring_type == RING_COMPUTE) {
+ cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
cs->cst->cs.num_chunks = 3;
+ }
+ break;
+ }
+
+ if (util_queue_is_initialized(&cs->ws->cs_queue)) {
+ util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
+ radeon_drm_cs_emit_ioctl_oneshot, NULL, 0);
+ if (!(flags & PIPE_FLUSH_ASYNC))
+ radeon_drm_cs_sync_flush(rcs);
+ } else {
+ radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
+ }
+ } else {
+ radeon_cs_context_cleanup(cs->cst);
+ }
- if (cs->ws->info.r600_has_virtual_memory) {
- cs->cst->flags[0] |= RADEON_CS_USE_VM;
- cs->cst->cs.num_chunks = 3;
- }
- if (flags & PIPE_FLUSH_END_OF_FRAME) {
- cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
- cs->cst->cs.num_chunks = 3;
- }
- if (cs->ring_type == RING_COMPUTE) {
- cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
- cs->cst->cs.num_chunks = 3;
- }
- break;
- }
-
- if (util_queue_is_initialized(&cs->ws->cs_queue)) {
- util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
- radeon_drm_cs_emit_ioctl_oneshot, NULL, 0);
- if (!(flags & PIPE_FLUSH_ASYNC))
- radeon_drm_cs_sync_flush(rcs);
- } else {
- radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
- }
- } else {
- radeon_cs_context_cleanup(cs->cst);
- }
-
- /* Prepare a new CS. */
- cs->base.current.buf = cs->csc->buf;
- cs->base.current.cdw = 0;
- cs->base.used_vram = 0;
- cs->base.used_gart = 0;
-
- if (cs->ring_type == RING_GFX)
- cs->ws->num_gfx_IBs++;
- else if (cs->ring_type == RING_DMA)
- cs->ws->num_sdma_IBs++;
- return 0;
+ /* Prepare a new CS. */
+ cs->base.current.buf = cs->csc->buf;
+ cs->base.current.cdw = 0;
+ cs->base.used_vram = 0;
+ cs->base.used_gart = 0;
+
+ if (cs->ring_type == RING_GFX)
+ cs->ws->num_gfx_IBs++;
+ else if (cs->ring_type == RING_DMA)
+ cs->ws->num_sdma_IBs++;
+ return 0;
}
static void radeon_drm_cs_destroy(struct radeon_cmdbuf *rcs)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
-
- radeon_drm_cs_sync_flush(rcs);
- util_queue_fence_destroy(&cs->flush_completed);
- radeon_cs_context_cleanup(&cs->csc1);
- radeon_cs_context_cleanup(&cs->csc2);
- p_atomic_dec(&cs->ws->num_cs);
- radeon_destroy_cs_context(&cs->csc1);
- radeon_destroy_cs_context(&cs->csc2);
- radeon_fence_reference(&cs->next_fence, NULL);
- FREE(cs);
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+
+ radeon_drm_cs_sync_flush(rcs);
+ util_queue_fence_destroy(&cs->flush_completed);
+ radeon_cs_context_cleanup(&cs->csc1);
+ radeon_cs_context_cleanup(&cs->csc2);
+ p_atomic_dec(&cs->ws->num_cs);
+ radeon_destroy_cs_context(&cs->csc1);
+ radeon_destroy_cs_context(&cs->csc2);
+ radeon_fence_reference(&cs->next_fence, NULL);
+ FREE(cs);
}
static bool radeon_bo_is_referenced(struct radeon_cmdbuf *rcs,
struct pb_buffer *_buf,
enum radeon_bo_usage usage)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct radeon_bo *bo = (struct radeon_bo*)_buf;
- int index;
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct radeon_bo *bo = (struct radeon_bo*)_buf;
+ int index;
- if (!bo->num_cs_references)
- return false;
+ if (!bo->num_cs_references)
+ return false;
- index = radeon_lookup_buffer(cs->csc, bo);
- if (index == -1)
- return false;
+ index = radeon_lookup_buffer(cs->csc, bo);
+ if (index == -1)
+ return false;
- if (!bo->handle)
- index = cs->csc->slab_buffers[index].u.slab.real_idx;
+ if (!bo->handle)
+ index = cs->csc->slab_buffers[index].u.slab.real_idx;
- if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
- return true;
- if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
- return true;
+ if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
+ return true;
+ if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
+ return true;
- return false;
+ return false;
}
/* FENCES */
-static struct pipe_fence_handle *
-radeon_cs_create_fence(struct radeon_cmdbuf *rcs)
+static struct pipe_fence_handle *radeon_cs_create_fence(struct radeon_cmdbuf *rcs)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- struct pb_buffer *fence;
-
- /* Create a fence, which is a dummy BO. */
- fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
- RADEON_DOMAIN_GTT,
- RADEON_FLAG_NO_SUBALLOC
- | RADEON_FLAG_NO_INTERPROCESS_SHARING);
- if (!fence)
- return NULL;
-
- /* Add the fence as a dummy relocation. */
- cs->ws->base.cs_add_buffer(rcs, fence,
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+ struct pb_buffer *fence;
+
+ /* Create a fence, which is a dummy BO. */
+ fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
+ RADEON_DOMAIN_GTT,
+ RADEON_FLAG_NO_SUBALLOC
+ | RADEON_FLAG_NO_INTERPROCESS_SHARING);
+ if (!fence)
+ return NULL;
+
+ /* Add the fence as a dummy relocation. */
+ cs->ws->base.cs_add_buffer(rcs, fence,
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
RADEON_PRIO_FENCE);
- return (struct pipe_fence_handle*)fence;
+ return (struct pipe_fence_handle*)fence;
}
static bool radeon_fence_wait(struct radeon_winsys *ws,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
- return ws->buffer_wait((struct pb_buffer*)fence, timeout,
- RADEON_USAGE_READWRITE);
+ return ws->buffer_wait((struct pb_buffer*)fence, timeout,
+ RADEON_USAGE_READWRITE);
}
static void radeon_fence_reference(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src)
{
- pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
+ pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
}
-static struct pipe_fence_handle *
-radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs)
+static struct pipe_fence_handle *radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct pipe_fence_handle *fence = NULL;
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
{
- ws->base.ctx_create = radeon_drm_ctx_create;
- ws->base.ctx_destroy = radeon_drm_ctx_destroy;
- ws->base.ctx_query_reset_status = radeon_drm_ctx_query_reset_status;
- ws->base.cs_create = radeon_drm_cs_create;
- ws->base.cs_destroy = radeon_drm_cs_destroy;
- ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
- ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
- ws->base.cs_validate = radeon_drm_cs_validate;
- ws->base.cs_check_space = radeon_drm_cs_check_space;
- ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
- ws->base.cs_flush = radeon_drm_cs_flush;
- ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence;
- ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
- ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
- ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency;
- ws->base.fence_wait = radeon_fence_wait;
- ws->base.fence_reference = radeon_fence_reference;
+ ws->base.ctx_create = radeon_drm_ctx_create;
+ ws->base.ctx_destroy = radeon_drm_ctx_destroy;
+ ws->base.ctx_query_reset_status = radeon_drm_ctx_query_reset_status;
+ ws->base.cs_create = radeon_drm_cs_create;
+ ws->base.cs_destroy = radeon_drm_cs_destroy;
+ ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
+ ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
+ ws->base.cs_validate = radeon_drm_cs_validate;
+ ws->base.cs_check_space = radeon_drm_cs_check_space;
+ ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
+ ws->base.cs_flush = radeon_drm_cs_flush;
+ ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence;
+ ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
+ ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
+ ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency;
+ ws->base.fence_wait = radeon_fence_wait;
+ ws->base.fence_reference = radeon_fence_reference;
}
};
struct radeon_bo_item {
- struct radeon_bo *bo;
- union {
- struct {
- uint32_t priority_usage;
- } real;
- struct {
- unsigned real_idx;
- } slab;
- } u;
+ struct radeon_bo *bo;
+ union {
+ struct {
+ uint32_t priority_usage;
+ } real;
+ struct {
+ unsigned real_idx;
+ } slab;
+ } u;
};
struct radeon_cs_context {
- uint32_t buf[16 * 1024];
-
- int fd;
- struct drm_radeon_cs cs;
- struct drm_radeon_cs_chunk chunks[3];
- uint64_t chunk_array[3];
- uint32_t flags[2];
-
- /* Buffers. */
- unsigned max_relocs;
- unsigned num_relocs;
- unsigned num_validated_relocs;
- struct radeon_bo_item *relocs_bo;
- struct drm_radeon_cs_reloc *relocs;
-
- unsigned num_slab_buffers;
- unsigned max_slab_buffers;
- struct radeon_bo_item *slab_buffers;
-
- int reloc_indices_hashlist[4096];
+ uint32_t buf[16 * 1024];
+
+ int fd;
+ struct drm_radeon_cs cs;
+ struct drm_radeon_cs_chunk chunks[3];
+ uint64_t chunk_array[3];
+ uint32_t flags[2];
+
+ /* Buffers. */
+ unsigned max_relocs;
+ unsigned num_relocs;
+ unsigned num_validated_relocs;
+ struct radeon_bo_item *relocs_bo;
+ struct drm_radeon_cs_reloc *relocs;
+
+ unsigned num_slab_buffers;
+ unsigned max_slab_buffers;
+ struct radeon_bo_item *slab_buffers;
+
+ int reloc_indices_hashlist[4096];
};
struct radeon_drm_cs {
- struct radeon_cmdbuf base;
- enum ring_type ring_type;
-
- /* We flip between these two CS. While one is being consumed
- * by the kernel in another thread, the other one is being filled
- * by the pipe driver. */
- struct radeon_cs_context csc1;
- struct radeon_cs_context csc2;
- /* The currently-used CS. */
- struct radeon_cs_context *csc;
- /* The CS being currently-owned by the other thread. */
- struct radeon_cs_context *cst;
-
- /* The winsys. */
- struct radeon_drm_winsys *ws;
-
- /* Flush CS. */
- void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
- void *flush_data;
-
- struct util_queue_fence flush_completed;
- struct pipe_fence_handle *next_fence;
+ struct radeon_cmdbuf base;
+ enum ring_type ring_type;
+
+ /* We flip between these two CS. While one is being consumed
+ * by the kernel in another thread, the other one is being filled
+ * by the pipe driver. */
+ struct radeon_cs_context csc1;
+ struct radeon_cs_context csc2;
+ /* The currently-used CS. */
+ struct radeon_cs_context *csc;
+ /* The CS being currently-owned by the other thread. */
+ struct radeon_cs_context *cst;
+
+ /* The winsys. */
+ struct radeon_drm_winsys *ws;
+
+ /* Flush CS. */
+ void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
+ void *flush_data;
+
+ struct util_queue_fence flush_completed;
+ struct pipe_fence_handle *next_fence;
};
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
static inline struct radeon_drm_cs *
radeon_drm_cs(struct radeon_cmdbuf *base)
{
- return (struct radeon_drm_cs*)base;
+ return (struct radeon_drm_cs*)base;
}
static inline bool
radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
struct radeon_bo *bo)
{
- int num_refs = bo->num_cs_references;
- return num_refs == bo->rws->num_cs ||
- (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1);
+ int num_refs = bo->num_cs_references;
+ return num_refs == bo->rws->num_cs ||
+ (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1);
}
static inline bool
radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
struct radeon_bo *bo)
{
- int index;
+ int index;
- if (!bo->num_cs_references)
- return false;
+ if (!bo->num_cs_references)
+ return false;
- index = radeon_lookup_buffer(cs->csc, bo);
- if (index == -1)
- return false;
+ index = radeon_lookup_buffer(cs->csc, bo);
+ if (index == -1)
+ return false;
- if (!bo->handle)
- index = cs->csc->slab_buffers[index].u.slab.real_idx;
+ if (!bo->handle)
+ index = cs->csc->slab_buffers[index].u.slab.real_idx;
- return cs->csc->relocs[index].write_domain != 0;
+ return cs->csc->relocs[index].write_domain != 0;
}
static inline bool
radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
{
- return bo->num_cs_references != 0;
+ return bo->num_cs_references != 0;
}
void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs);
static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
{
- unsigned index, tileb;
+ unsigned index, tileb;
- tileb = 8 * 8 * surf->bpe;
- tileb = MIN2(surf->u.legacy.tile_split, tileb);
+ tileb = 8 * 8 * surf->bpe;
+ tileb = MIN2(surf->u.legacy.tile_split, tileb);
- for (index = 0; tileb > 64; index++)
- tileb >>= 1;
+ for (index = 0; tileb > 64; index++)
+ tileb >>= 1;
- assert(index < 16);
- return index;
+ assert(index < 16);
+ return index;
}
#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03)
static void set_micro_tile_mode(struct radeon_surf *surf,
struct radeon_info *info)
{
- uint32_t tile_mode;
+ uint32_t tile_mode;
- if (info->chip_class < GFX6) {
- surf->micro_tile_mode = 0;
- return;
- }
+ if (info->chip_class < GFX6) {
+ surf->micro_tile_mode = 0;
+ return;
+ }
- tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
+ tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
- if (info->chip_class >= GFX7)
- surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
- else
- surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
+ if (info->chip_class >= GFX7)
+ surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
+ else
+ surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
}
static void surf_level_winsys_to_drm(struct radeon_surface_level *level_drm,
const struct legacy_surf_level *level_ws,
unsigned bpe)
{
- level_drm->offset = level_ws->offset;
- level_drm->slice_size = (uint64_t)level_ws->slice_size_dw * 4;
- level_drm->nblk_x = level_ws->nblk_x;
- level_drm->nblk_y = level_ws->nblk_y;
- level_drm->pitch_bytes = level_ws->nblk_x * bpe;
- level_drm->mode = level_ws->mode;
+ level_drm->offset = level_ws->offset;
+ level_drm->slice_size = (uint64_t)level_ws->slice_size_dw * 4;
+ level_drm->nblk_x = level_ws->nblk_x;
+ level_drm->nblk_y = level_ws->nblk_y;
+ level_drm->pitch_bytes = level_ws->nblk_x * bpe;
+ level_drm->mode = level_ws->mode;
}
static void surf_level_drm_to_winsys(struct legacy_surf_level *level_ws,
const struct radeon_surface_level *level_drm,
unsigned bpe)
{
- level_ws->offset = level_drm->offset;
- level_ws->slice_size_dw = level_drm->slice_size / 4;
- level_ws->nblk_x = level_drm->nblk_x;
- level_ws->nblk_y = level_drm->nblk_y;
- level_ws->mode = level_drm->mode;
- assert(level_drm->nblk_x * bpe == level_drm->pitch_bytes);
+ level_ws->offset = level_drm->offset;
+ level_ws->slice_size_dw = level_drm->slice_size / 4;
+ level_ws->nblk_x = level_drm->nblk_x;
+ level_ws->nblk_y = level_drm->nblk_y;
+ level_ws->mode = level_drm->mode;
+ assert(level_drm->nblk_x * bpe == level_drm->pitch_bytes);
}
static void surf_winsys_to_drm(struct radeon_surface *surf_drm,
enum radeon_surf_mode mode,
const struct radeon_surf *surf_ws)
{
- int i;
-
- memset(surf_drm, 0, sizeof(*surf_drm));
-
- surf_drm->npix_x = tex->width0;
- surf_drm->npix_y = tex->height0;
- surf_drm->npix_z = tex->depth0;
- surf_drm->blk_w = util_format_get_blockwidth(tex->format);
- surf_drm->blk_h = util_format_get_blockheight(tex->format);
- surf_drm->blk_d = 1;
- surf_drm->array_size = 1;
- surf_drm->last_level = tex->last_level;
- surf_drm->bpe = bpe;
- surf_drm->nsamples = tex->nr_samples ? tex->nr_samples : 1;
-
- surf_drm->flags = flags;
- surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, TYPE);
- surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, MODE);
- surf_drm->flags |= RADEON_SURF_SET(mode, MODE) |
- RADEON_SURF_HAS_SBUFFER_MIPTREE |
- RADEON_SURF_HAS_TILE_MODE_INDEX;
-
- switch (tex->target) {
- case PIPE_TEXTURE_1D:
- surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
- break;
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D:
- surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
- break;
- case PIPE_TEXTURE_3D:
- surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
- break;
- case PIPE_TEXTURE_1D_ARRAY:
- surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
- surf_drm->array_size = tex->array_size;
- break;
- case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
- assert(tex->array_size % 6 == 0);
- /* fall through */
- case PIPE_TEXTURE_2D_ARRAY:
- surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
- surf_drm->array_size = tex->array_size;
- break;
- case PIPE_TEXTURE_CUBE:
- surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
- break;
- case PIPE_BUFFER:
- default:
- assert(0);
- }
-
- surf_drm->bo_size = surf_ws->surf_size;
- surf_drm->bo_alignment = surf_ws->surf_alignment;
-
- surf_drm->bankw = surf_ws->u.legacy.bankw;
- surf_drm->bankh = surf_ws->u.legacy.bankh;
- surf_drm->mtilea = surf_ws->u.legacy.mtilea;
- surf_drm->tile_split = surf_ws->u.legacy.tile_split;
-
- for (i = 0; i <= surf_drm->last_level; i++) {
- surf_level_winsys_to_drm(&surf_drm->level[i], &surf_ws->u.legacy.level[i],
- bpe * surf_drm->nsamples);
-
- surf_drm->tiling_index[i] = surf_ws->u.legacy.tiling_index[i];
- }
-
- if (flags & RADEON_SURF_SBUFFER) {
- surf_drm->stencil_tile_split = surf_ws->u.legacy.stencil_tile_split;
-
- for (i = 0; i <= surf_drm->last_level; i++) {
- surf_level_winsys_to_drm(&surf_drm->stencil_level[i],
- &surf_ws->u.legacy.stencil_level[i],
- surf_drm->nsamples);
- surf_drm->stencil_tiling_index[i] = surf_ws->u.legacy.stencil_tiling_index[i];
- }
- }
+ int i;
+
+ memset(surf_drm, 0, sizeof(*surf_drm));
+
+ surf_drm->npix_x = tex->width0;
+ surf_drm->npix_y = tex->height0;
+ surf_drm->npix_z = tex->depth0;
+ surf_drm->blk_w = util_format_get_blockwidth(tex->format);
+ surf_drm->blk_h = util_format_get_blockheight(tex->format);
+ surf_drm->blk_d = 1;
+ surf_drm->array_size = 1;
+ surf_drm->last_level = tex->last_level;
+ surf_drm->bpe = bpe;
+ surf_drm->nsamples = tex->nr_samples ? tex->nr_samples : 1;
+
+ surf_drm->flags = flags;
+ surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, TYPE);
+ surf_drm->flags = RADEON_SURF_CLR(surf_drm->flags, MODE);
+ surf_drm->flags |= RADEON_SURF_SET(mode, MODE) |
+ RADEON_SURF_HAS_SBUFFER_MIPTREE |
+ RADEON_SURF_HAS_TILE_MODE_INDEX;
+
+ switch (tex->target) {
+ case PIPE_TEXTURE_1D:
+ surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
+ break;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
+ break;
+ case PIPE_TEXTURE_3D:
+ surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
+ surf_drm->array_size = tex->array_size;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
+ assert(tex->array_size % 6 == 0);
+ /* fall through */
+ case PIPE_TEXTURE_2D_ARRAY:
+ surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
+ surf_drm->array_size = tex->array_size;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ surf_drm->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
+ break;
+ case PIPE_BUFFER:
+ default:
+ assert(0);
+ }
+
+ surf_drm->bo_size = surf_ws->surf_size;
+ surf_drm->bo_alignment = surf_ws->surf_alignment;
+
+ surf_drm->bankw = surf_ws->u.legacy.bankw;
+ surf_drm->bankh = surf_ws->u.legacy.bankh;
+ surf_drm->mtilea = surf_ws->u.legacy.mtilea;
+ surf_drm->tile_split = surf_ws->u.legacy.tile_split;
+
+ for (i = 0; i <= surf_drm->last_level; i++) {
+ surf_level_winsys_to_drm(&surf_drm->level[i], &surf_ws->u.legacy.level[i],
+ bpe * surf_drm->nsamples);
+
+ surf_drm->tiling_index[i] = surf_ws->u.legacy.tiling_index[i];
+ }
+
+ if (flags & RADEON_SURF_SBUFFER) {
+ surf_drm->stencil_tile_split = surf_ws->u.legacy.stencil_tile_split;
+
+ for (i = 0; i <= surf_drm->last_level; i++) {
+ surf_level_winsys_to_drm(&surf_drm->stencil_level[i],
+ &surf_ws->u.legacy.stencil_level[i],
+ surf_drm->nsamples);
+ surf_drm->stencil_tiling_index[i] = surf_ws->u.legacy.stencil_tiling_index[i];
+ }
+ }
}
static void surf_drm_to_winsys(struct radeon_drm_winsys *ws,
struct radeon_surf *surf_ws,
const struct radeon_surface *surf_drm)
{
- int i;
-
- memset(surf_ws, 0, sizeof(*surf_ws));
-
- surf_ws->blk_w = surf_drm->blk_w;
- surf_ws->blk_h = surf_drm->blk_h;
- surf_ws->bpe = surf_drm->bpe;
- surf_ws->is_linear = surf_drm->level[0].mode <= RADEON_SURF_MODE_LINEAR_ALIGNED;
- surf_ws->has_stencil = !!(surf_drm->flags & RADEON_SURF_SBUFFER);
- surf_ws->flags = surf_drm->flags;
-
- surf_ws->surf_size = surf_drm->bo_size;
- surf_ws->surf_alignment = surf_drm->bo_alignment;
-
- surf_ws->u.legacy.bankw = surf_drm->bankw;
- surf_ws->u.legacy.bankh = surf_drm->bankh;
- surf_ws->u.legacy.mtilea = surf_drm->mtilea;
- surf_ws->u.legacy.tile_split = surf_drm->tile_split;
-
- surf_ws->u.legacy.macro_tile_index = cik_get_macro_tile_index(surf_ws);
-
- for (i = 0; i <= surf_drm->last_level; i++) {
- surf_level_drm_to_winsys(&surf_ws->u.legacy.level[i], &surf_drm->level[i],
- surf_drm->bpe * surf_drm->nsamples);
- surf_ws->u.legacy.tiling_index[i] = surf_drm->tiling_index[i];
- }
-
- if (surf_ws->flags & RADEON_SURF_SBUFFER) {
- surf_ws->u.legacy.stencil_tile_split = surf_drm->stencil_tile_split;
-
- for (i = 0; i <= surf_drm->last_level; i++) {
- surf_level_drm_to_winsys(&surf_ws->u.legacy.stencil_level[i],
- &surf_drm->stencil_level[i],
- surf_drm->nsamples);
- surf_ws->u.legacy.stencil_tiling_index[i] = surf_drm->stencil_tiling_index[i];
- }
- }
-
- set_micro_tile_mode(surf_ws, &ws->info);
- surf_ws->is_displayable = surf_ws->is_linear ||
- surf_ws->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
- surf_ws->micro_tile_mode == RADEON_MICRO_MODE_ROTATED;
+ int i;
+
+ memset(surf_ws, 0, sizeof(*surf_ws));
+
+ surf_ws->blk_w = surf_drm->blk_w;
+ surf_ws->blk_h = surf_drm->blk_h;
+ surf_ws->bpe = surf_drm->bpe;
+ surf_ws->is_linear = surf_drm->level[0].mode <= RADEON_SURF_MODE_LINEAR_ALIGNED;
+ surf_ws->has_stencil = !!(surf_drm->flags & RADEON_SURF_SBUFFER);
+ surf_ws->flags = surf_drm->flags;
+
+ surf_ws->surf_size = surf_drm->bo_size;
+ surf_ws->surf_alignment = surf_drm->bo_alignment;
+
+ surf_ws->u.legacy.bankw = surf_drm->bankw;
+ surf_ws->u.legacy.bankh = surf_drm->bankh;
+ surf_ws->u.legacy.mtilea = surf_drm->mtilea;
+ surf_ws->u.legacy.tile_split = surf_drm->tile_split;
+
+ surf_ws->u.legacy.macro_tile_index = cik_get_macro_tile_index(surf_ws);
+
+ for (i = 0; i <= surf_drm->last_level; i++) {
+ surf_level_drm_to_winsys(&surf_ws->u.legacy.level[i], &surf_drm->level[i],
+ surf_drm->bpe * surf_drm->nsamples);
+ surf_ws->u.legacy.tiling_index[i] = surf_drm->tiling_index[i];
+ }
+
+ if (surf_ws->flags & RADEON_SURF_SBUFFER) {
+ surf_ws->u.legacy.stencil_tile_split = surf_drm->stencil_tile_split;
+
+ for (i = 0; i <= surf_drm->last_level; i++) {
+ surf_level_drm_to_winsys(&surf_ws->u.legacy.stencil_level[i],
+ &surf_drm->stencil_level[i],
+ surf_drm->nsamples);
+ surf_ws->u.legacy.stencil_tiling_index[i] = surf_drm->stencil_tiling_index[i];
+ }
+ }
+
+ set_micro_tile_mode(surf_ws, &ws->info);
+ surf_ws->is_displayable = surf_ws->is_linear ||
+ surf_ws->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
+ surf_ws->micro_tile_mode == RADEON_MICRO_MODE_ROTATED;
}
static void si_compute_cmask(const struct radeon_info *info,
- const struct ac_surf_config *config,
- struct radeon_surf *surf)
+ const struct ac_surf_config *config,
+ struct radeon_surf *surf)
{
- unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
- unsigned num_pipes = info->num_tile_pipes;
- unsigned cl_width, cl_height;
-
- if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
- return;
-
- assert(info->chip_class <= GFX8);
-
- switch (num_pipes) {
- case 2:
- cl_width = 32;
- cl_height = 16;
- break;
- case 4:
- cl_width = 32;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 32;
- break;
- case 16: /* Hawaii */
- cl_width = 64;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- unsigned base_align = num_pipes * pipe_interleave_bytes;
-
- unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
- unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
- unsigned slice_elements = (width * height) / (8*8);
-
- /* Each element of CMASK is a nibble. */
- unsigned slice_bytes = slice_elements / 2;
-
- surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128);
- if (surf->u.legacy.cmask_slice_tile_max)
- surf->u.legacy.cmask_slice_tile_max -= 1;
-
- unsigned num_layers;
- if (config->is_3d)
- num_layers = config->info.depth;
- else if (config->is_cube)
- num_layers = 6;
- else
- num_layers = config->info.array_size;
-
- surf->cmask_alignment = MAX2(256, base_align);
- surf->cmask_size = align(slice_bytes, base_align) * num_layers;
+ unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
+ unsigned num_pipes = info->num_tile_pipes;
+ unsigned cl_width, cl_height;
+
+ if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
+ return;
+
+ assert(info->chip_class <= GFX8);
+
+ switch (num_pipes) {
+ case 2:
+ cl_width = 32;
+ cl_height = 16;
+ break;
+ case 4:
+ cl_width = 32;
+ cl_height = 32;
+ break;
+ case 8:
+ cl_width = 64;
+ cl_height = 32;
+ break;
+ case 16: /* Hawaii */
+ cl_width = 64;
+ cl_height = 64;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ unsigned base_align = num_pipes * pipe_interleave_bytes;
+
+ unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
+ unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
+ unsigned slice_elements = (width * height) / (8*8);
+
+ /* Each element of CMASK is a nibble. */
+ unsigned slice_bytes = slice_elements / 2;
+
+ surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128);
+ if (surf->u.legacy.cmask_slice_tile_max)
+ surf->u.legacy.cmask_slice_tile_max -= 1;
+
+ unsigned num_layers;
+ if (config->is_3d)
+ num_layers = config->info.depth;
+ else if (config->is_cube)
+ num_layers = 6;
+ else
+ num_layers = config->info.array_size;
+
+ surf->cmask_alignment = MAX2(256, base_align);
+ surf->cmask_size = align(slice_bytes, base_align) * num_layers;
}
static void si_compute_htile(const struct radeon_info *info,
struct radeon_surf *surf, unsigned num_layers)
{
- unsigned cl_width, cl_height, width, height;
- unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
- unsigned num_pipes = info->num_tile_pipes;
+ unsigned cl_width, cl_height, width, height;
+ unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
+ unsigned num_pipes = info->num_tile_pipes;
- surf->htile_size = 0;
+ surf->htile_size = 0;
- if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) ||
- surf->flags & RADEON_SURF_NO_HTILE)
- return;
+ if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) ||
+ surf->flags & RADEON_SURF_NO_HTILE)
+ return;
- if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
- !info->htile_cmask_support_1d_tiling)
- return;
+ if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
+ !info->htile_cmask_support_1d_tiling)
+ return;
- /* Overalign HTILE on P2 configs to work around GPU hangs in
+ /* Overalign HTILE on P2 configs to work around GPU hangs in
* piglit/depthstencil-render-miplevels 585.
*
* This has been confirmed to help Kabini & Stoney, where the hangs
* are always reproducible. I think I have seen the test hang
* on Carrizo too, though it was very rare there.
*/
- if (info->chip_class >= GFX7 && num_pipes < 4)
- num_pipes = 4;
-
- switch (num_pipes) {
- case 1:
- cl_width = 32;
- cl_height = 16;
- break;
- case 2:
- cl_width = 32;
- cl_height = 32;
- break;
- case 4:
- cl_width = 64;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 64;
- break;
- case 16:
- cl_width = 128;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
- height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
-
- slice_elements = (width * height) / (8 * 8);
- slice_bytes = slice_elements * 4;
-
- pipe_interleave_bytes = info->pipe_interleave_bytes;
- base_align = num_pipes * pipe_interleave_bytes;
-
- surf->htile_alignment = base_align;
- surf->htile_size = num_layers * align(slice_bytes, base_align);
+ if (info->chip_class >= GFX7 && num_pipes < 4)
+ num_pipes = 4;
+
+ switch (num_pipes) {
+ case 1:
+ cl_width = 32;
+ cl_height = 16;
+ break;
+ case 2:
+ cl_width = 32;
+ cl_height = 32;
+ break;
+ case 4:
+ cl_width = 64;
+ cl_height = 32;
+ break;
+ case 8:
+ cl_width = 64;
+ cl_height = 64;
+ break;
+ case 16:
+ cl_width = 128;
+ cl_height = 64;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
+ height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
+
+ slice_elements = (width * height) / (8 * 8);
+ slice_bytes = slice_elements * 4;
+
+ pipe_interleave_bytes = info->pipe_interleave_bytes;
+ base_align = num_pipes * pipe_interleave_bytes;
+
+ surf->htile_alignment = base_align;
+ surf->htile_size = num_layers * align(slice_bytes, base_align);
}
static int radeon_winsys_surface_init(struct radeon_winsys *rws,
enum radeon_surf_mode mode,
struct radeon_surf *surf_ws)
{
- struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
- struct radeon_surface surf_drm;
- int r;
-
- surf_winsys_to_drm(&surf_drm, tex, flags, bpe, mode, surf_ws);
-
- if (!(flags & (RADEON_SURF_IMPORTED | RADEON_SURF_FMASK))) {
- r = radeon_surface_best(ws->surf_man, &surf_drm);
- if (r)
- return r;
- }
-
- r = radeon_surface_init(ws->surf_man, &surf_drm);
- if (r)
- return r;
-
- surf_drm_to_winsys(ws, surf_ws, &surf_drm);
-
- /* Compute FMASK. */
- if (ws->gen == DRV_SI &&
- tex->nr_samples >= 2 &&
- !(flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK | RADEON_SURF_NO_FMASK))) {
- /* FMASK is allocated like an ordinary texture. */
- struct pipe_resource templ = *tex;
- struct radeon_surf fmask = {};
- unsigned fmask_flags, bpe;
-
- templ.nr_samples = 1;
- fmask_flags = flags | RADEON_SURF_FMASK;
-
- switch (tex->nr_samples) {
- case 2:
- case 4:
- bpe = 1;
- break;
- case 8:
- bpe = 4;
- break;
- default:
- fprintf(stderr, "radeon: Invalid sample count for FMASK allocation.\n");
- return -1;
- }
-
- if (radeon_winsys_surface_init(rws, &templ, fmask_flags, bpe,
- RADEON_SURF_MODE_2D, &fmask)) {
- fprintf(stderr, "Got error in surface_init while allocating FMASK.\n");
- return -1;
- }
-
- assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
-
- surf_ws->fmask_size = fmask.surf_size;
- surf_ws->fmask_alignment = MAX2(256, fmask.surf_alignment);
- surf_ws->fmask_tile_swizzle = fmask.tile_swizzle;
-
- surf_ws->u.legacy.fmask.slice_tile_max =
+ struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
+ struct radeon_surface surf_drm;
+ int r;
+
+ surf_winsys_to_drm(&surf_drm, tex, flags, bpe, mode, surf_ws);
+
+ if (!(flags & (RADEON_SURF_IMPORTED | RADEON_SURF_FMASK))) {
+ r = radeon_surface_best(ws->surf_man, &surf_drm);
+ if (r)
+ return r;
+ }
+
+ r = radeon_surface_init(ws->surf_man, &surf_drm);
+ if (r)
+ return r;
+
+ surf_drm_to_winsys(ws, surf_ws, &surf_drm);
+
+ /* Compute FMASK. */
+ if (ws->gen == DRV_SI &&
+ tex->nr_samples >= 2 &&
+ !(flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK | RADEON_SURF_NO_FMASK))) {
+ /* FMASK is allocated like an ordinary texture. */
+ struct pipe_resource templ = *tex;
+ struct radeon_surf fmask = {};
+ unsigned fmask_flags, bpe;
+
+ templ.nr_samples = 1;
+ fmask_flags = flags | RADEON_SURF_FMASK;
+
+ switch (tex->nr_samples) {
+ case 2:
+ case 4:
+ bpe = 1;
+ break;
+ case 8:
+ bpe = 4;
+ break;
+ default:
+ fprintf(stderr, "radeon: Invalid sample count for FMASK allocation.\n");
+ return -1;
+ }
+
+ if (radeon_winsys_surface_init(rws, &templ, fmask_flags, bpe,
+ RADEON_SURF_MODE_2D, &fmask)) {
+ fprintf(stderr, "Got error in surface_init while allocating FMASK.\n");
+ return -1;
+ }
+
+ assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
+
+ surf_ws->fmask_size = fmask.surf_size;
+ surf_ws->fmask_alignment = MAX2(256, fmask.surf_alignment);
+ surf_ws->fmask_tile_swizzle = fmask.tile_swizzle;
+
+ surf_ws->u.legacy.fmask.slice_tile_max =
(fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
- if (surf_ws->u.legacy.fmask.slice_tile_max)
- surf_ws->u.legacy.fmask.slice_tile_max -= 1;
-
- surf_ws->u.legacy.fmask.tiling_index = fmask.u.legacy.tiling_index[0];
- surf_ws->u.legacy.fmask.bankh = fmask.u.legacy.bankh;
- surf_ws->u.legacy.fmask.pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
- }
-
- if (ws->gen == DRV_SI &&
- (tex->nr_samples <= 1 || surf_ws->fmask_size)) {
- struct ac_surf_config config;
-
- /* Only these fields need to be set for the CMASK computation. */
- config.info.width = tex->width0;
- config.info.height = tex->height0;
- config.info.depth = tex->depth0;
- config.info.array_size = tex->array_size;
- config.is_3d = !!(tex->target == PIPE_TEXTURE_3D);
- config.is_cube = !!(tex->target == PIPE_TEXTURE_CUBE);
-
- si_compute_cmask(&ws->info, &config, surf_ws);
- }
-
- if (ws->gen == DRV_SI) {
- si_compute_htile(&ws->info, surf_ws, util_num_layers(tex, 0));
-
- /* Determine the memory layout of multiple allocations in one buffer. */
- surf_ws->total_size = surf_ws->surf_size;
-
- if (surf_ws->htile_size) {
- surf_ws->htile_offset = align64(surf_ws->total_size, surf_ws->htile_alignment);
- surf_ws->total_size = surf_ws->htile_offset + surf_ws->htile_size;
- }
-
- if (surf_ws->fmask_size) {
- assert(tex->nr_samples >= 2);
- surf_ws->fmask_offset = align64(surf_ws->total_size, surf_ws->fmask_alignment);
- surf_ws->total_size = surf_ws->fmask_offset + surf_ws->fmask_size;
- }
-
- /* Single-sample CMASK is in a separate buffer. */
- if (surf_ws->cmask_size && tex->nr_samples >= 2) {
- surf_ws->cmask_offset = align64(surf_ws->total_size, surf_ws->cmask_alignment);
- surf_ws->total_size = surf_ws->cmask_offset + surf_ws->cmask_size;
- }
- }
-
- return 0;
+ if (surf_ws->u.legacy.fmask.slice_tile_max)
+ surf_ws->u.legacy.fmask.slice_tile_max -= 1;
+
+ surf_ws->u.legacy.fmask.tiling_index = fmask.u.legacy.tiling_index[0];
+ surf_ws->u.legacy.fmask.bankh = fmask.u.legacy.bankh;
+ surf_ws->u.legacy.fmask.pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
+ }
+
+ if (ws->gen == DRV_SI &&
+ (tex->nr_samples <= 1 || surf_ws->fmask_size)) {
+ struct ac_surf_config config;
+
+ /* Only these fields need to be set for the CMASK computation. */
+ config.info.width = tex->width0;
+ config.info.height = tex->height0;
+ config.info.depth = tex->depth0;
+ config.info.array_size = tex->array_size;
+ config.is_3d = !!(tex->target == PIPE_TEXTURE_3D);
+ config.is_cube = !!(tex->target == PIPE_TEXTURE_CUBE);
+
+ si_compute_cmask(&ws->info, &config, surf_ws);
+ }
+
+ if (ws->gen == DRV_SI) {
+ si_compute_htile(&ws->info, surf_ws, util_num_layers(tex, 0));
+
+ /* Determine the memory layout of multiple allocations in one buffer. */
+ surf_ws->total_size = surf_ws->surf_size;
+
+ if (surf_ws->htile_size) {
+ surf_ws->htile_offset = align64(surf_ws->total_size, surf_ws->htile_alignment);
+ surf_ws->total_size = surf_ws->htile_offset + surf_ws->htile_size;
+ }
+
+ if (surf_ws->fmask_size) {
+ assert(tex->nr_samples >= 2);
+ surf_ws->fmask_offset = align64(surf_ws->total_size, surf_ws->fmask_alignment);
+ surf_ws->total_size = surf_ws->fmask_offset + surf_ws->fmask_size;
+ }
+
+ /* Single-sample CMASK is in a separate buffer. */
+ if (surf_ws->cmask_size && tex->nr_samples >= 2) {
+ surf_ws->cmask_offset = align64(surf_ws->total_size, surf_ws->cmask_alignment);
+ surf_ws->total_size = surf_ws->cmask_offset + surf_ws->cmask_size;
+ }
+ }
+
+ return 0;
}
void radeon_surface_init_functions(struct radeon_drm_winsys *ws)
{
- ws->base.surface_init = radeon_winsys_surface_init;
+ ws->base.surface_init = radeon_winsys_surface_init;
}
unsigned request, const char *request_name,
bool enable)
{
- struct drm_radeon_info info;
- unsigned value = enable ? 1 : 0;
-
- memset(&info, 0, sizeof(info));
-
- mtx_lock(&*mutex);
-
- /* Early exit if we are sure the request will fail. */
- if (enable) {
- if (*owner) {
- mtx_unlock(&*mutex);
- return false;
- }
- } else {
- if (*owner != applier) {
- mtx_unlock(&*mutex);
- return false;
- }
- }
-
- /* Pass through the request to the kernel. */
- info.value = (unsigned long)&value;
- info.request = request;
- if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO,
- &info, sizeof(info)) != 0) {
- mtx_unlock(&*mutex);
- return false;
- }
-
- /* Update the rights in the winsys. */
- if (enable) {
- if (value) {
- *owner = applier;
- mtx_unlock(&*mutex);
- return true;
- }
- } else {
- *owner = NULL;
- }
-
- mtx_unlock(&*mutex);
- return false;
+ struct drm_radeon_info info;
+ unsigned value = enable ? 1 : 0;
+
+ memset(&info, 0, sizeof(info));
+
+ mtx_lock(&*mutex);
+
+ /* Early exit if we are sure the request will fail. */
+ if (enable) {
+ if (*owner) {
+ mtx_unlock(&*mutex);
+ return false;
+ }
+ } else {
+ if (*owner != applier) {
+ mtx_unlock(&*mutex);
+ return false;
+ }
+ }
+
+ /* Pass through the request to the kernel. */
+ info.value = (unsigned long)&value;
+ info.request = request;
+ if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO,
+ &info, sizeof(info)) != 0) {
+ mtx_unlock(&*mutex);
+ return false;
+ }
+
+ /* Update the rights in the winsys. */
+ if (enable) {
+ if (value) {
+ *owner = applier;
+ mtx_unlock(&*mutex);
+ return true;
+ }
+ } else {
+ *owner = NULL;
+ }
+
+ mtx_unlock(&*mutex);
+ return false;
}
static bool radeon_get_drm_value(int fd, unsigned request,
const char *errname, uint32_t *out)
{
- struct drm_radeon_info info;
- int retval;
-
- memset(&info, 0, sizeof(info));
-
- info.value = (unsigned long)out;
- info.request = request;
-
- retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
- if (retval) {
- if (errname) {
- fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
- errname, retval);
- }
- return false;
- }
- return true;
+ struct drm_radeon_info info;
+ int retval;
+
+ memset(&info, 0, sizeof(info));
+
+ info.value = (unsigned long)out;
+ info.request = request;
+
+ retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
+ if (retval) {
+ if (errname) {
+ fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
+ errname, retval);
+ }
+ return false;
+ }
+ return true;
}
/* Helper function to do the ioctls needed for setup and init. */
static bool do_winsys_init(struct radeon_drm_winsys *ws)
{
- struct drm_radeon_gem_info gem_info;
- int retval;
- drmVersionPtr version;
-
- memset(&gem_info, 0, sizeof(gem_info));
-
- /* We do things in a specific order here.
- *
- * DRM version first. We need to be sure we're running on a KMS chipset.
- * This is also for some features.
- *
- * Then, the PCI ID. This is essential and should return usable numbers
- * for all Radeons. If this fails, we probably got handed an FD for some
- * non-Radeon card.
- *
- * The GEM info is actually bogus on the kernel side, as well as our side
- * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
- * we don't actually use the info for anything yet.
- *
- * The GB and Z pipe requests should always succeed, but they might not
- * return sensical values for all chipsets, but that's alright because
- * the pipe drivers already know that.
- */
-
- /* Get DRM version. */
- version = drmGetVersion(ws->fd);
- if (version->version_major != 2 ||
- version->version_minor < 12) {
- fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
- "only compatible with 2.12.0 (kernel 3.2) or later.\n",
- __FUNCTION__,
- version->version_major,
- version->version_minor,
- version->version_patchlevel);
- drmFreeVersion(version);
- return false;
- }
-
- ws->info.drm_major = version->version_major;
- ws->info.drm_minor = version->version_minor;
- ws->info.drm_patchlevel = version->version_patchlevel;
- ws->info.is_amdgpu = false;
- drmFreeVersion(version);
-
- /* Get PCI ID. */
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID",
- &ws->info.pci_id))
- return false;
-
- /* Check PCI ID. */
- switch (ws->info.pci_id) {
+ struct drm_radeon_gem_info gem_info;
+ int retval;
+ drmVersionPtr version;
+
+ memset(&gem_info, 0, sizeof(gem_info));
+
+ /* We do things in a specific order here.
+ *
+ * DRM version first. We need to be sure we're running on a KMS chipset.
+ * This is also for some features.
+ *
+ * Then, the PCI ID. This is essential and should return usable numbers
+ * for all Radeons. If this fails, we probably got handed an FD for some
+ * non-Radeon card.
+ *
+ * The GEM info is actually bogus on the kernel side, as well as our side
+ * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
+ * we don't actually use the info for anything yet.
+ *
+ * The GB and Z pipe requests should always succeed, but they might not
+ * return sensical values for all chipsets, but that's alright because
+ * the pipe drivers already know that.
+ */
+
+ /* Get DRM version. */
+ version = drmGetVersion(ws->fd);
+ if (version->version_major != 2 ||
+ version->version_minor < 12) {
+ fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
+ "only compatible with 2.12.0 (kernel 3.2) or later.\n",
+ __FUNCTION__,
+ version->version_major,
+ version->version_minor,
+ version->version_patchlevel);
+ drmFreeVersion(version);
+ return false;
+ }
+
+ ws->info.drm_major = version->version_major;
+ ws->info.drm_minor = version->version_minor;
+ ws->info.drm_patchlevel = version->version_patchlevel;
+ ws->info.is_amdgpu = false;
+ drmFreeVersion(version);
+
+ /* Get PCI ID. */
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID",
+ &ws->info.pci_id))
+ return false;
+
+ /* Check PCI ID. */
+ switch (ws->info.pci_id) {
#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R300; break;
#include "pci_ids/r300_pci_ids.h"
#undef CHIPSET
#undef CHIPSET
#define CHIPSET(pci_id, cfamily) \
- case pci_id: \
- ws->info.family = CHIP_##cfamily; \
- ws->info.name = #cfamily; \
- ws->gen = DRV_SI; \
- break;
+ case pci_id: \
+ ws->info.family = CHIP_##cfamily; \
+ ws->info.name = #cfamily; \
+ ws->gen = DRV_SI; \
+ break;
#include "pci_ids/radeonsi_pci_ids.h"
#undef CHIPSET
- default:
- fprintf(stderr, "radeon: Invalid PCI ID.\n");
- return false;
- }
-
- switch (ws->info.family) {
- default:
- case CHIP_UNKNOWN:
- fprintf(stderr, "radeon: Unknown family.\n");
- return false;
- case CHIP_R300:
- case CHIP_R350:
- case CHIP_RV350:
- case CHIP_RV370:
- case CHIP_RV380:
- case CHIP_RS400:
- case CHIP_RC410:
- case CHIP_RS480:
- ws->info.chip_class = R300;
- break;
- case CHIP_R420: /* R4xx-based cores. */
- case CHIP_R423:
- case CHIP_R430:
- case CHIP_R480:
- case CHIP_R481:
- case CHIP_RV410:
- case CHIP_RS600:
- case CHIP_RS690:
- case CHIP_RS740:
- ws->info.chip_class = R400;
- break;
- case CHIP_RV515: /* R5xx-based cores. */
- case CHIP_R520:
- case CHIP_RV530:
- case CHIP_R580:
- case CHIP_RV560:
- case CHIP_RV570:
- ws->info.chip_class = R500;
- break;
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
- ws->info.chip_class = R600;
- break;
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
- ws->info.chip_class = R700;
- break;
- case CHIP_CEDAR:
- case CHIP_REDWOOD:
- case CHIP_JUNIPER:
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- case CHIP_PALM:
- case CHIP_SUMO:
- case CHIP_SUMO2:
- case CHIP_BARTS:
- case CHIP_TURKS:
- case CHIP_CAICOS:
- ws->info.chip_class = EVERGREEN;
- break;
- case CHIP_CAYMAN:
- case CHIP_ARUBA:
- ws->info.chip_class = CAYMAN;
- break;
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- ws->info.chip_class = GFX6;
- break;
- case CHIP_BONAIRE:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_HAWAII:
- ws->info.chip_class = GFX7;
- break;
- }
-
- /* Set which chips don't have dedicated VRAM. */
- switch (ws->info.family) {
- case CHIP_RS400:
- case CHIP_RC410:
- case CHIP_RS480:
- case CHIP_RS600:
- case CHIP_RS690:
- case CHIP_RS740:
- case CHIP_RS780:
- case CHIP_RS880:
- case CHIP_PALM:
- case CHIP_SUMO:
- case CHIP_SUMO2:
- case CHIP_ARUBA:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- ws->info.has_dedicated_vram = false;
- break;
-
- default:
- ws->info.has_dedicated_vram = true;
- }
-
- ws->info.num_rings[RING_GFX] = 1;
- /* Check for dma */
- ws->info.num_rings[RING_DMA] = 0;
- /* DMA is disabled on R700. There is IB corruption and hangs. */
- if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) {
- ws->info.num_rings[RING_DMA] = 1;
- }
-
- /* Check for UVD and VCE */
- ws->info.has_hw_decode = false;
- ws->info.vce_fw_version = 0x00000000;
- if (ws->info.drm_minor >= 32) {
- uint32_t value = RADEON_CS_RING_UVD;
- if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING,
- "UVD Ring working", &value)) {
- ws->info.has_hw_decode = value;
- ws->info.num_rings[RING_UVD] = 1;
- }
-
- value = RADEON_CS_RING_VCE;
- if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING,
- NULL, &value) && value) {
-
- if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION,
- "VCE FW version", &value)) {
- ws->info.vce_fw_version = value;
- ws->info.num_rings[RING_VCE] = 1;
- }
- }
- }
-
- /* Check for userptr support. */
- {
- struct drm_radeon_gem_userptr args = {0};
-
- /* If the ioctl doesn't exist, -EINVAL is returned.
- *
- * If the ioctl exists, it should return -EACCES
- * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER
- * aren't set.
- */
- ws->info.has_userptr =
+ default:
+ fprintf(stderr, "radeon: Invalid PCI ID.\n");
+ return false;
+ }
+
+ switch (ws->info.family) {
+ default:
+ case CHIP_UNKNOWN:
+ fprintf(stderr, "radeon: Unknown family.\n");
+ return false;
+ case CHIP_R300:
+ case CHIP_R350:
+ case CHIP_RV350:
+ case CHIP_RV370:
+ case CHIP_RV380:
+ case CHIP_RS400:
+ case CHIP_RC410:
+ case CHIP_RS480:
+ ws->info.chip_class = R300;
+ break;
+ case CHIP_R420: /* R4xx-based cores. */
+ case CHIP_R423:
+ case CHIP_R430:
+ case CHIP_R480:
+ case CHIP_R481:
+ case CHIP_RV410:
+ case CHIP_RS600:
+ case CHIP_RS690:
+ case CHIP_RS740:
+ ws->info.chip_class = R400;
+ break;
+ case CHIP_RV515: /* R5xx-based cores. */
+ case CHIP_R520:
+ case CHIP_RV530:
+ case CHIP_R580:
+ case CHIP_RV560:
+ case CHIP_RV570:
+ ws->info.chip_class = R500;
+ break;
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ ws->info.chip_class = R600;
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ ws->info.chip_class = R700;
+ break;
+ case CHIP_CEDAR:
+ case CHIP_REDWOOD:
+ case CHIP_JUNIPER:
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ case CHIP_BARTS:
+ case CHIP_TURKS:
+ case CHIP_CAICOS:
+ ws->info.chip_class = EVERGREEN;
+ break;
+ case CHIP_CAYMAN:
+ case CHIP_ARUBA:
+ ws->info.chip_class = CAYMAN;
+ break;
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ ws->info.chip_class = GFX6;
+ break;
+ case CHIP_BONAIRE:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_HAWAII:
+ ws->info.chip_class = GFX7;
+ break;
+ }
+
+ /* Set which chips don't have dedicated VRAM. */
+ switch (ws->info.family) {
+ case CHIP_RS400:
+ case CHIP_RC410:
+ case CHIP_RS480:
+ case CHIP_RS600:
+ case CHIP_RS690:
+ case CHIP_RS740:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ case CHIP_ARUBA:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ ws->info.has_dedicated_vram = false;
+ break;
+
+ default:
+ ws->info.has_dedicated_vram = true;
+ }
+
+ ws->info.num_rings[RING_GFX] = 1;
+ /* Check for dma */
+ ws->info.num_rings[RING_DMA] = 0;
+ /* DMA is disabled on R700. There is IB corruption and hangs. */
+ if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) {
+ ws->info.num_rings[RING_DMA] = 1;
+ }
+
+ /* Check for UVD and VCE */
+ ws->info.has_hw_decode = false;
+ ws->info.vce_fw_version = 0x00000000;
+ if (ws->info.drm_minor >= 32) {
+ uint32_t value = RADEON_CS_RING_UVD;
+ if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING,
+ "UVD Ring working", &value)) {
+ ws->info.has_hw_decode = value;
+ ws->info.num_rings[RING_UVD] = 1;
+ }
+
+ value = RADEON_CS_RING_VCE;
+ if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING,
+ NULL, &value) && value) {
+
+ if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION,
+ "VCE FW version", &value)) {
+ ws->info.vce_fw_version = value;
+ ws->info.num_rings[RING_VCE] = 1;
+ }
+ }
+ }
+
+ /* Check for userptr support. */
+ {
+ struct drm_radeon_gem_userptr args = {0};
+
+ /* If the ioctl doesn't exist, -EINVAL is returned.
+ *
+ * If the ioctl exists, it should return -EACCES
+ * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER
+ * aren't set.
+ */
+ ws->info.has_userptr =
drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
&args, sizeof(args)) == -EACCES;
- }
-
- /* Get GEM info. */
- retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
- &gem_info, sizeof(gem_info));
- if (retval) {
- fprintf(stderr, "radeon: Failed to get MM info, error number %d\n",
- retval);
- return false;
- }
- ws->info.gart_size = gem_info.gart_size;
- ws->info.vram_size = gem_info.vram_size;
- ws->info.vram_vis_size = gem_info.vram_visible;
- /* Older versions of the kernel driver reported incorrect values, and
- * didn't support more than 256MB of visible VRAM anyway
- */
- if (ws->info.drm_minor < 49)
- ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
-
- /* Radeon allocates all buffers contiguously, which makes large allocations
- * unlikely to succeed. */
- if (ws->info.has_dedicated_vram)
- ws->info.max_alloc_size = ws->info.vram_size * 0.7;
- else
- ws->info.max_alloc_size = ws->info.gart_size * 0.7;
-
- if (ws->info.drm_minor < 40)
- ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
- /* Both 32-bit and 64-bit address spaces only have 4GB. */
- ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024);
-
- /* Get max clock frequency info and convert it to MHz */
- radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
- &ws->info.max_shader_clock);
- ws->info.max_shader_clock /= 1000;
-
- ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-
- /* Generation-specific queries. */
- if (ws->gen == DRV_R300) {
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
- "GB pipe count",
- &ws->info.r300_num_gb_pipes))
- return false;
-
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES,
- "Z pipe count",
- &ws->info.r300_num_z_pipes))
- return false;
- }
- else if (ws->gen >= DRV_R600) {
- uint32_t tiling_config = 0;
-
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
- "num backends",
- &ws->info.num_render_backends))
- return false;
-
- /* get the GPU counter frequency, failure is not fatal */
- radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
- &ws->info.clock_crystal_freq);
-
- radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
- &tiling_config);
-
- ws->info.r600_num_banks =
+ }
+
+ /* Get GEM info. */
+ retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
+ &gem_info, sizeof(gem_info));
+ if (retval) {
+ fprintf(stderr, "radeon: Failed to get MM info, error number %d\n",
+ retval);
+ return false;
+ }
+ ws->info.gart_size = gem_info.gart_size;
+ ws->info.vram_size = gem_info.vram_size;
+ ws->info.vram_vis_size = gem_info.vram_visible;
+ /* Older versions of the kernel driver reported incorrect values, and
+ * didn't support more than 256MB of visible VRAM anyway
+ */
+ if (ws->info.drm_minor < 49)
+ ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
+
+ /* Radeon allocates all buffers contiguously, which makes large allocations
+ * unlikely to succeed. */
+ if (ws->info.has_dedicated_vram)
+ ws->info.max_alloc_size = ws->info.vram_size * 0.7;
+ else
+ ws->info.max_alloc_size = ws->info.gart_size * 0.7;
+
+ if (ws->info.drm_minor < 40)
+ ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
+ /* Both 32-bit and 64-bit address spaces only have 4GB. */
+ ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024);
+
+ /* Get max clock frequency info and convert it to MHz */
+ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
+ &ws->info.max_shader_clock);
+ ws->info.max_shader_clock /= 1000;
+
+ ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ /* Generation-specific queries. */
+ if (ws->gen == DRV_R300) {
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
+ "GB pipe count",
+ &ws->info.r300_num_gb_pipes))
+ return false;
+
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES,
+ "Z pipe count",
+ &ws->info.r300_num_z_pipes))
+ return false;
+ }
+ else if (ws->gen >= DRV_R600) {
+ uint32_t tiling_config = 0;
+
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
+ "num backends",
+ &ws->info.num_render_backends))
+ return false;
+
+ /* get the GPU counter frequency, failure is not fatal */
+ radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
+ &ws->info.clock_crystal_freq);
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
+ &tiling_config);
+
+ ws->info.r600_num_banks =
ws->info.chip_class >= EVERGREEN ?
- 4 << ((tiling_config & 0xf0) >> 4) :
- 4 << ((tiling_config & 0x30) >> 4);
+ 4 << ((tiling_config & 0xf0) >> 4) :
+ 4 << ((tiling_config & 0x30) >> 4);
- ws->info.pipe_interleave_bytes =
+ ws->info.pipe_interleave_bytes =
ws->info.chip_class >= EVERGREEN ?
- 256 << ((tiling_config & 0xf00) >> 8) :
- 256 << ((tiling_config & 0xc0) >> 6);
-
- if (!ws->info.pipe_interleave_bytes)
- ws->info.pipe_interleave_bytes =
- ws->info.chip_class >= EVERGREEN ? 512 : 256;
-
- radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
- &ws->info.num_tile_pipes);
-
- /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the
- * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti)
- * reports a different value (12). Fix it by setting what's in the
- * GB_TILE_MODE array (8).
- */
- if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12)
- ws->info.num_tile_pipes = 8;
-
- if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
- &ws->info.r600_gb_backend_map))
- ws->info.r600_gb_backend_map_valid = true;
-
- /* Default value. */
- ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends);
- /*
- * This fails (silently) on non-GCN or older kernels, overwriting the
- * default enabled_rb_mask with the result of the last query.
- */
- if (ws->gen >= DRV_SI)
- radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
- &ws->info.enabled_rb_mask);
-
- ws->info.r600_has_virtual_memory = false;
- if (ws->info.drm_minor >= 13) {
- uint32_t ib_vm_max_size;
-
- ws->info.r600_has_virtual_memory = true;
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
- &ws->va_start))
- ws->info.r600_has_virtual_memory = false;
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
- &ib_vm_max_size))
- ws->info.r600_has_virtual_memory = false;
- radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
- &ws->va_unmap_working);
- }
- if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false))
- ws->info.r600_has_virtual_memory = false;
- }
-
- /* Get max pipes, this is only needed for compute shaders. All evergreen+
- * chips have at least 2 pipes, so we use 2 as a default. */
- ws->info.r600_max_quad_pipes = 2;
- radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL,
- &ws->info.r600_max_quad_pipes);
-
- /* All GPUs have at least one compute unit */
- ws->info.num_good_compute_units = 1;
- radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL,
- &ws->info.num_good_compute_units);
-
- radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
- &ws->info.max_se);
-
- switch (ws->info.family) {
- case CHIP_HAINAN:
- case CHIP_KABINI:
- ws->info.num_tcc_blocks = 2;
- break;
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_BONAIRE:
- case CHIP_KAVERI:
- ws->info.num_tcc_blocks = 4;
- break;
- case CHIP_PITCAIRN:
- ws->info.num_tcc_blocks = 8;
- break;
- case CHIP_TAHITI:
- ws->info.num_tcc_blocks = 12;
- break;
- case CHIP_HAWAII:
- ws->info.num_tcc_blocks = 16;
- break;
- default:
- ws->info.num_tcc_blocks = 0;
- break;
- }
-
- if (!ws->info.max_se) {
- switch (ws->info.family) {
- default:
- ws->info.max_se = 1;
- break;
- case CHIP_CYPRESS:
- case CHIP_HEMLOCK:
- case CHIP_BARTS:
- case CHIP_CAYMAN:
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_BONAIRE:
- ws->info.max_se = 2;
- break;
- case CHIP_HAWAII:
- ws->info.max_se = 4;
- break;
- }
- }
-
- radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
- &ws->info.max_sh_per_se);
- if (ws->gen == DRV_SI) {
- ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units /
- (ws->info.max_se * ws->info.max_sh_per_se);
- }
-
- radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL,
- &ws->accel_working2);
- if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) {
- fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, "
- "returned accel_working2 value %u is smaller than 2. "
- "Please install a newer kernel.\n",
- ws->accel_working2);
- return false;
- }
-
- if (ws->info.chip_class == GFX7) {
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL,
- ws->info.cik_macrotile_mode_array)) {
- fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n");
- return false;
- }
- }
-
- if (ws->info.chip_class >= GFX6) {
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL,
- ws->info.si_tile_mode_array)) {
- fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n");
- return false;
- }
- }
-
- /* Hawaii with old firmware needs type2 nop packet.
- * accel_working2 with value 3 indicates the new firmware.
- */
- ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 ||
- (ws->info.family == CHIP_HAWAII &&
- ws->accel_working2 < 3);
- ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
- ws->info.ib_start_alignment = 4096;
- ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40;
- /* HTILE is broken with 1D tiling on old kernels and GFX7. */
- ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 ||
- ws->info.drm_minor >= 38;
- ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48;
- ws->info.has_bo_metadata = false;
- ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43;
- ws->info.has_eqaa_surface_allocator = false;
- ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31;
- ws->info.kernel_flushes_tc_l2_after_ib = true;
- /* Old kernels disallowed register writes via COPY_DATA
- * that are used for indirect compute dispatches. */
- ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 ||
- (ws->info.chip_class == GFX6 &&
- ws->info.drm_minor >= 45);
- /* GFX6 doesn't support unaligned loads. */
- ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 &&
- ws->info.drm_minor >= 50;
- ws->info.has_sparse_vm_mappings = false;
- /* 2D tiling on GFX7 is supported since DRM 2.35.0 */
- ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35;
- ws->info.has_read_registers_query = ws->info.drm_minor >= 42;
- ws->info.max_alignment = 1024*1024;
- ws->info.has_graphics = true;
- ws->info.cpdma_prefetch_writes_memory = true;
- ws->info.max_wave64_per_simd = 10;
- ws->info.num_physical_sgprs_per_simd = 512;
- ws->info.num_physical_wave64_vgprs_per_simd = 256;
- /* Potential hang on Kabini: */
- ws->info.use_late_alloc = ws->info.family != CHIP_KABINI;
-
- ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
- strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
-
- return true;
+ 256 << ((tiling_config & 0xf00) >> 8) :
+ 256 << ((tiling_config & 0xc0) >> 6);
+
+ if (!ws->info.pipe_interleave_bytes)
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ? 512 : 256;
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
+ &ws->info.num_tile_pipes);
+
+ /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the
+ * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti)
+ * reports a different value (12). Fix it by setting what's in the
+ * GB_TILE_MODE array (8).
+ */
+ if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12)
+ ws->info.num_tile_pipes = 8;
+
+ if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
+ &ws->info.r600_gb_backend_map))
+ ws->info.r600_gb_backend_map_valid = true;
+
+ /* Default value. */
+ ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends);
+ /*
+ * This fails (silently) on non-GCN or older kernels, overwriting the
+ * default enabled_rb_mask with the result of the last query.
+ */
+ if (ws->gen >= DRV_SI)
+ radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
+ &ws->info.enabled_rb_mask);
+
+ ws->info.r600_has_virtual_memory = false;
+ if (ws->info.drm_minor >= 13) {
+ uint32_t ib_vm_max_size;
+
+ ws->info.r600_has_virtual_memory = true;
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
+ &ws->va_start))
+ ws->info.r600_has_virtual_memory = false;
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+ &ib_vm_max_size))
+ ws->info.r600_has_virtual_memory = false;
+ radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
+ &ws->va_unmap_working);
+ }
+ if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false))
+ ws->info.r600_has_virtual_memory = false;
+ }
+
+ /* Get max pipes, this is only needed for compute shaders. All evergreen+
+ * chips have at least 2 pipes, so we use 2 as a default. */
+ ws->info.r600_max_quad_pipes = 2;
+ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL,
+ &ws->info.r600_max_quad_pipes);
+
+ /* All GPUs have at least one compute unit */
+ ws->info.num_good_compute_units = 1;
+ radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL,
+ &ws->info.num_good_compute_units);
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
+ &ws->info.max_se);
+
+ switch (ws->info.family) {
+ case CHIP_HAINAN:
+ case CHIP_KABINI:
+ ws->info.num_tcc_blocks = 2;
+ break;
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_BONAIRE:
+ case CHIP_KAVERI:
+ ws->info.num_tcc_blocks = 4;
+ break;
+ case CHIP_PITCAIRN:
+ ws->info.num_tcc_blocks = 8;
+ break;
+ case CHIP_TAHITI:
+ ws->info.num_tcc_blocks = 12;
+ break;
+ case CHIP_HAWAII:
+ ws->info.num_tcc_blocks = 16;
+ break;
+ default:
+ ws->info.num_tcc_blocks = 0;
+ break;
+ }
+
+ if (!ws->info.max_se) {
+ switch (ws->info.family) {
+ default:
+ ws->info.max_se = 1;
+ break;
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ case CHIP_BARTS:
+ case CHIP_CAYMAN:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_BONAIRE:
+ ws->info.max_se = 2;
+ break;
+ case CHIP_HAWAII:
+ ws->info.max_se = 4;
+ break;
+ }
+ }
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
+ &ws->info.max_sh_per_se);
+ if (ws->gen == DRV_SI) {
+ ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units /
+ (ws->info.max_se * ws->info.max_sh_per_se);
+ }
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL,
+ &ws->accel_working2);
+ if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) {
+ fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, "
+ "returned accel_working2 value %u is smaller than 2. "
+ "Please install a newer kernel.\n",
+ ws->accel_working2);
+ return false;
+ }
+
+ if (ws->info.chip_class == GFX7) {
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL,
+ ws->info.cik_macrotile_mode_array)) {
+ fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n");
+ return false;
+ }
+ }
+
+ if (ws->info.chip_class >= GFX6) {
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL,
+ ws->info.si_tile_mode_array)) {
+ fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n");
+ return false;
+ }
+ }
+
+ /* Hawaii with old firmware needs type2 nop packet.
+ * accel_working2 with value 3 indicates the new firmware.
+ */
+ ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 ||
+ (ws->info.family == CHIP_HAWAII &&
+ ws->accel_working2 < 3);
+ ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
+ ws->info.ib_start_alignment = 4096;
+ ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40;
+ /* HTILE is broken with 1D tiling on old kernels and GFX7. */
+ ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 ||
+ ws->info.drm_minor >= 38;
+ ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48;
+ ws->info.has_bo_metadata = false;
+ ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43;
+ ws->info.has_eqaa_surface_allocator = false;
+ ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31;
+ ws->info.kernel_flushes_tc_l2_after_ib = true;
+ /* Old kernels disallowed register writes via COPY_DATA
+ * that are used for indirect compute dispatches. */
+ ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 ||
+ (ws->info.chip_class == GFX6 &&
+ ws->info.drm_minor >= 45);
+ /* GFX6 doesn't support unaligned loads. */
+ ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 &&
+ ws->info.drm_minor >= 50;
+ ws->info.has_sparse_vm_mappings = false;
+ /* 2D tiling on GFX7 is supported since DRM 2.35.0 */
+ ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35;
+ ws->info.has_read_registers_query = ws->info.drm_minor >= 42;
+ ws->info.max_alignment = 1024*1024;
+ ws->info.has_graphics = true;
+ ws->info.cpdma_prefetch_writes_memory = true;
+ ws->info.max_wave64_per_simd = 10;
+ ws->info.num_physical_sgprs_per_simd = 512;
+ ws->info.num_physical_wave64_vgprs_per_simd = 256;
+ /* Potential hang on Kabini: */
+ ws->info.use_late_alloc = ws->info.family != CHIP_KABINI;
+
+ ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
+ strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
+
+ return true;
}
static void radeon_winsys_destroy(struct radeon_winsys *rws)
{
- struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
+ struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
- if (util_queue_is_initialized(&ws->cs_queue))
- util_queue_destroy(&ws->cs_queue);
+ if (util_queue_is_initialized(&ws->cs_queue))
+ util_queue_destroy(&ws->cs_queue);
- mtx_destroy(&ws->hyperz_owner_mutex);
- mtx_destroy(&ws->cmask_owner_mutex);
+ mtx_destroy(&ws->hyperz_owner_mutex);
+ mtx_destroy(&ws->cmask_owner_mutex);
- if (ws->info.r600_has_virtual_memory)
- pb_slabs_deinit(&ws->bo_slabs);
- pb_cache_deinit(&ws->bo_cache);
+ if (ws->info.r600_has_virtual_memory)
+ pb_slabs_deinit(&ws->bo_slabs);
+ pb_cache_deinit(&ws->bo_cache);
- if (ws->gen >= DRV_R600) {
- radeon_surface_manager_free(ws->surf_man);
- }
+ if (ws->gen >= DRV_R600) {
+ radeon_surface_manager_free(ws->surf_man);
+ }
- _mesa_hash_table_destroy(ws->bo_names, NULL);
- _mesa_hash_table_destroy(ws->bo_handles, NULL);
- _mesa_hash_table_destroy(ws->bo_vas, NULL);
- mtx_destroy(&ws->bo_handles_mutex);
- mtx_destroy(&ws->vm32.mutex);
- mtx_destroy(&ws->vm64.mutex);
- mtx_destroy(&ws->bo_fence_lock);
+ _mesa_hash_table_destroy(ws->bo_names, NULL);
+ _mesa_hash_table_destroy(ws->bo_handles, NULL);
+ _mesa_hash_table_destroy(ws->bo_vas, NULL);
+ mtx_destroy(&ws->bo_handles_mutex);
+ mtx_destroy(&ws->vm32.mutex);
+ mtx_destroy(&ws->vm64.mutex);
+ mtx_destroy(&ws->bo_fence_lock);
- if (ws->fd >= 0)
- close(ws->fd);
+ if (ws->fd >= 0)
+ close(ws->fd);
- FREE(rws);
+ FREE(rws);
}
static void radeon_query_info(struct radeon_winsys *rws,
struct radeon_info *info)
{
- *info = ((struct radeon_drm_winsys *)rws)->info;
+ *info = ((struct radeon_drm_winsys *)rws)->info;
}
static bool radeon_cs_request_feature(struct radeon_cmdbuf *rcs,
enum radeon_feature_id fid,
bool enable)
{
- struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
-
- switch (fid) {
- case RADEON_FID_R300_HYPERZ_ACCESS:
- return radeon_set_fd_access(cs, &cs->ws->hyperz_owner,
- &cs->ws->hyperz_owner_mutex,
- RADEON_INFO_WANT_HYPERZ, "Hyper-Z",
- enable);
-
- case RADEON_FID_R300_CMASK_ACCESS:
- return radeon_set_fd_access(cs, &cs->ws->cmask_owner,
- &cs->ws->cmask_owner_mutex,
- RADEON_INFO_WANT_CMASK, "AA optimizations",
- enable);
- }
- return false;
+ struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+
+ switch (fid) {
+ case RADEON_FID_R300_HYPERZ_ACCESS:
+ return radeon_set_fd_access(cs, &cs->ws->hyperz_owner,
+ &cs->ws->hyperz_owner_mutex,
+ RADEON_INFO_WANT_HYPERZ, "Hyper-Z",
+ enable);
+
+ case RADEON_FID_R300_CMASK_ACCESS:
+ return radeon_set_fd_access(cs, &cs->ws->cmask_owner,
+ &cs->ws->cmask_owner_mutex,
+ RADEON_INFO_WANT_CMASK, "AA optimizations",
+ enable);
+ }
+ return false;
}
uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws)
{
- uint64_t retval = 0;
+ uint64_t retval = 0;
- if (!ws->info.has_gpu_reset_status_query)
- return 0;
+ if (!ws->info.has_gpu_reset_status_query)
+ return 0;
- radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
- "gpu-reset-counter", (uint32_t*)&retval);
- return retval;
+ radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
+ "gpu-reset-counter", (uint32_t*)&retval);
+ return retval;
}
static uint64_t radeon_query_value(struct radeon_winsys *rws,
enum radeon_value_id value)
{
- struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
- uint64_t retval = 0;
-
- switch (value) {
- case RADEON_REQUESTED_VRAM_MEMORY:
- return ws->allocated_vram;
- case RADEON_REQUESTED_GTT_MEMORY:
- return ws->allocated_gtt;
- case RADEON_MAPPED_VRAM:
- return ws->mapped_vram;
- case RADEON_MAPPED_GTT:
- return ws->mapped_gtt;
- case RADEON_BUFFER_WAIT_TIME_NS:
- return ws->buffer_wait_time;
- case RADEON_NUM_MAPPED_BUFFERS:
- return ws->num_mapped_buffers;
- case RADEON_TIMESTAMP:
- if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) {
- assert(0);
- return 0;
- }
-
- radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp",
- (uint32_t*)&retval);
- return retval;
- case RADEON_NUM_GFX_IBS:
- return ws->num_gfx_IBs;
- case RADEON_NUM_SDMA_IBS:
- return ws->num_sdma_IBs;
- case RADEON_NUM_BYTES_MOVED:
- radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED,
- "num-bytes-moved", (uint32_t*)&retval);
- return retval;
- case RADEON_NUM_EVICTIONS:
- case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
- case RADEON_VRAM_VIS_USAGE:
- case RADEON_GFX_BO_LIST_COUNTER:
- case RADEON_GFX_IB_SIZE_COUNTER:
- return 0; /* unimplemented */
- case RADEON_VRAM_USAGE:
- radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE,
- "vram-usage", (uint32_t*)&retval);
- return retval;
- case RADEON_GTT_USAGE:
- radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE,
- "gtt-usage", (uint32_t*)&retval);
- return retval;
- case RADEON_GPU_TEMPERATURE:
- radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP,
- "gpu-temp", (uint32_t*)&retval);
- return retval;
- case RADEON_CURRENT_SCLK:
- radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK,
- "current-gpu-sclk", (uint32_t*)&retval);
- return retval;
- case RADEON_CURRENT_MCLK:
- radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
- "current-gpu-mclk", (uint32_t*)&retval);
- return retval;
- case RADEON_CS_THREAD_TIME:
- return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
- }
- return 0;
+ struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
+ uint64_t retval = 0;
+
+ switch (value) {
+ case RADEON_REQUESTED_VRAM_MEMORY:
+ return ws->allocated_vram;
+ case RADEON_REQUESTED_GTT_MEMORY:
+ return ws->allocated_gtt;
+ case RADEON_MAPPED_VRAM:
+ return ws->mapped_vram;
+ case RADEON_MAPPED_GTT:
+ return ws->mapped_gtt;
+ case RADEON_BUFFER_WAIT_TIME_NS:
+ return ws->buffer_wait_time;
+ case RADEON_NUM_MAPPED_BUFFERS:
+ return ws->num_mapped_buffers;
+ case RADEON_TIMESTAMP:
+ if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) {
+ assert(0);
+ return 0;
+ }
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp",
+ (uint32_t*)&retval);
+ return retval;
+ case RADEON_NUM_GFX_IBS:
+ return ws->num_gfx_IBs;
+ case RADEON_NUM_SDMA_IBS:
+ return ws->num_sdma_IBs;
+ case RADEON_NUM_BYTES_MOVED:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED,
+ "num-bytes-moved", (uint32_t*)&retval);
+ return retval;
+ case RADEON_NUM_EVICTIONS:
+ case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
+ case RADEON_VRAM_VIS_USAGE:
+ case RADEON_GFX_BO_LIST_COUNTER:
+ case RADEON_GFX_IB_SIZE_COUNTER:
+ return 0; /* unimplemented */
+ case RADEON_VRAM_USAGE:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE,
+ "vram-usage", (uint32_t*)&retval);
+ return retval;
+ case RADEON_GTT_USAGE:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE,
+ "gtt-usage", (uint32_t*)&retval);
+ return retval;
+ case RADEON_GPU_TEMPERATURE:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP,
+ "gpu-temp", (uint32_t*)&retval);
+ return retval;
+ case RADEON_CURRENT_SCLK:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK,
+ "current-gpu-sclk", (uint32_t*)&retval);
+ return retval;
+ case RADEON_CURRENT_MCLK:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
+ "current-gpu-mclk", (uint32_t*)&retval);
+ return retval;
+ case RADEON_CS_THREAD_TIME:
+ return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
+ }
+ return 0;
}
static bool radeon_read_registers(struct radeon_winsys *rws,
unsigned reg_offset,
unsigned num_registers, uint32_t *out)
{
- struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
- unsigned i;
+ struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
+ unsigned i;
- for (i = 0; i < num_registers; i++) {
- uint32_t reg = reg_offset + i*4;
+ for (i = 0; i < num_registers; i++) {
+ uint32_t reg = reg_offset + i*4;
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®))
- return false;
- out[i] = reg;
- }
- return true;
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®))
+ return false;
+ out[i] = reg;
+ }
+ return true;
}
DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true)
static bool radeon_winsys_unref(struct radeon_winsys *ws)
{
- struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
- bool destroy;
-
- /* When the reference counter drops to zero, remove the fd from the table.
- * This must happen while the mutex is locked, so that
- * radeon_drm_winsys_create in another thread doesn't get the winsys
- * from the table when the counter drops to 0. */
- mtx_lock(&fd_tab_mutex);
-
- destroy = pipe_reference(&rws->reference, NULL);
- if (destroy && fd_tab) {
- _mesa_hash_table_remove_key(fd_tab, intptr_to_pointer(rws->fd));
- if (_mesa_hash_table_num_entries(fd_tab) == 0) {
- _mesa_hash_table_destroy(fd_tab, NULL);
- fd_tab = NULL;
- }
- }
-
- mtx_unlock(&fd_tab_mutex);
- return destroy;
+ struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
+ bool destroy;
+
+ /* When the reference counter drops to zero, remove the fd from the table.
+ * This must happen while the mutex is locked, so that
+ * radeon_drm_winsys_create in another thread doesn't get the winsys
+ * from the table when the counter drops to 0. */
+ mtx_lock(&fd_tab_mutex);
+
+ destroy = pipe_reference(&rws->reference, NULL);
+ if (destroy && fd_tab) {
+ _mesa_hash_table_remove_key(fd_tab, intptr_to_pointer(rws->fd));
+ if (_mesa_hash_table_num_entries(fd_tab) == 0) {
+ _mesa_hash_table_destroy(fd_tab, NULL);
+ fd_tab = NULL;
+ }
+ }
+
+ mtx_unlock(&fd_tab_mutex);
+ return destroy;
}
static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
unsigned cache)
{
- struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
+ struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
- if (util_queue_is_initialized(&rws->cs_queue)) {
- util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
- util_cpu_caps.cores_per_L3);
- }
+ if (util_queue_is_initialized(&rws->cs_queue)) {
+ util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
+ util_cpu_caps.cores_per_L3);
+ }
}
PUBLIC struct radeon_winsys *
radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
- radeon_screen_create_t screen_create)
+ radeon_screen_create_t screen_create)
{
- struct radeon_drm_winsys *ws;
-
- mtx_lock(&fd_tab_mutex);
- if (!fd_tab) {
- fd_tab = util_hash_table_create_fd_keys();
- }
-
- ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
- if (ws) {
- pipe_reference(NULL, &ws->reference);
- mtx_unlock(&fd_tab_mutex);
- return &ws->base;
- }
-
- ws = CALLOC_STRUCT(radeon_drm_winsys);
- if (!ws) {
- mtx_unlock(&fd_tab_mutex);
- return NULL;
- }
-
- ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
-
- if (!do_winsys_init(ws))
- goto fail1;
-
- pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS,
- 500000, ws->check_vm ? 1.0f : 2.0f, 0,
- MIN2(ws->info.vram_size, ws->info.gart_size),
- radeon_bo_destroy,
- radeon_bo_can_reclaim);
-
- if (ws->info.r600_has_virtual_memory) {
- /* There is no fundamental obstacle to using slab buffer allocation
- * without GPUVM, but enabling it requires making sure that the drivers
- * honor the address offset.
- */
- if (!pb_slabs_init(&ws->bo_slabs,
- RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2,
- RADEON_MAX_SLAB_HEAPS,
- ws,
- radeon_bo_can_reclaim_slab,
- radeon_bo_slab_alloc,
- radeon_bo_slab_free))
- goto fail_cache;
-
- ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2;
- } else {
- ws->info.min_alloc_size = ws->info.gart_page_size;
- }
-
- if (ws->gen >= DRV_R600) {
- ws->surf_man = radeon_surface_manager_new(ws->fd);
- if (!ws->surf_man)
- goto fail_slab;
- }
-
- /* init reference */
- pipe_reference_init(&ws->reference, 1);
-
- /* Set functions. */
- ws->base.unref = radeon_winsys_unref;
- ws->base.destroy = radeon_winsys_destroy;
- ws->base.query_info = radeon_query_info;
- ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
- ws->base.cs_request_feature = radeon_cs_request_feature;
- ws->base.query_value = radeon_query_value;
- ws->base.read_registers = radeon_read_registers;
-
- radeon_drm_bo_init_functions(ws);
- radeon_drm_cs_init_functions(ws);
- radeon_surface_init_functions(ws);
-
- (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain);
- (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain);
-
- ws->bo_names = util_hash_table_create_ptr_keys();
- ws->bo_handles = util_hash_table_create_ptr_keys();
- ws->bo_vas = util_hash_table_create_ptr_keys();
- (void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
- (void) mtx_init(&ws->vm32.mutex, mtx_plain);
- (void) mtx_init(&ws->vm64.mutex, mtx_plain);
- (void) mtx_init(&ws->bo_fence_lock, mtx_plain);
- list_inithead(&ws->vm32.holes);
- list_inithead(&ws->vm64.holes);
-
- /* The kernel currently returns 8MB. Make sure this doesn't change. */
- if (ws->va_start > 8 * 1024 * 1024) {
- /* Not enough 32-bit address space. */
- radeon_winsys_destroy(&ws->base);
- mtx_unlock(&fd_tab_mutex);
- return NULL;
- }
-
- ws->vm32.start = ws->va_start;
- ws->vm32.end = 1ull << 32;
-
- /* The maximum is 8GB of virtual address space limited by the kernel.
- * It's obviously not enough for bigger cards, like Hawaiis with 4GB
- * and 8GB of physical memory and 4GB of GART.
- *
- * Older kernels set the limit to 4GB, which is even worse, so they only
- * have 32-bit address space.
- */
- if (ws->info.drm_minor >= 41) {
- ws->vm64.start = 1ull << 32;
- ws->vm64.end = 1ull << 33;
- }
-
- /* TTM aligns the BO size to the CPU page size */
- ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
- ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */
-
- if (ws->num_cpus > 1 && debug_get_option_thread())
- util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0);
-
- /* Create the screen at the end. The winsys must be initialized
- * completely.
- *
- * Alternatively, we could create the screen based on "ws->gen"
- * and link all drivers into one binary blob. */
- ws->base.screen = screen_create(&ws->base, config);
- if (!ws->base.screen) {
- radeon_winsys_destroy(&ws->base);
- mtx_unlock(&fd_tab_mutex);
- return NULL;
- }
-
- _mesa_hash_table_insert(fd_tab, intptr_to_pointer(ws->fd), ws);
-
- /* We must unlock the mutex once the winsys is fully initialized, so that
- * other threads attempting to create the winsys from the same fd will
- * get a fully initialized winsys and not just half-way initialized. */
- mtx_unlock(&fd_tab_mutex);
-
- return &ws->base;
+ struct radeon_drm_winsys *ws;
+
+ mtx_lock(&fd_tab_mutex);
+ if (!fd_tab) {
+ fd_tab = util_hash_table_create_fd_keys();
+ }
+
+ ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+ if (ws) {
+ pipe_reference(NULL, &ws->reference);
+ mtx_unlock(&fd_tab_mutex);
+ return &ws->base;
+ }
+
+ ws = CALLOC_STRUCT(radeon_drm_winsys);
+ if (!ws) {
+ mtx_unlock(&fd_tab_mutex);
+ return NULL;
+ }
+
+ ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+
+ if (!do_winsys_init(ws))
+ goto fail1;
+
+ pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS,
+ 500000, ws->check_vm ? 1.0f : 2.0f, 0,
+ MIN2(ws->info.vram_size, ws->info.gart_size),
+ radeon_bo_destroy,
+ radeon_bo_can_reclaim);
+
+ if (ws->info.r600_has_virtual_memory) {
+ /* There is no fundamental obstacle to using slab buffer allocation
+ * without GPUVM, but enabling it requires making sure that the drivers
+ * honor the address offset.
+ */
+ if (!pb_slabs_init(&ws->bo_slabs,
+ RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2,
+ RADEON_MAX_SLAB_HEAPS,
+ ws,
+ radeon_bo_can_reclaim_slab,
+ radeon_bo_slab_alloc,
+ radeon_bo_slab_free))
+ goto fail_cache;
+
+ ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2;
+ } else {
+ ws->info.min_alloc_size = ws->info.gart_page_size;
+ }
+
+ if (ws->gen >= DRV_R600) {
+ ws->surf_man = radeon_surface_manager_new(ws->fd);
+ if (!ws->surf_man)
+ goto fail_slab;
+ }
+
+ /* init reference */
+ pipe_reference_init(&ws->reference, 1);
+
+ /* Set functions. */
+ ws->base.unref = radeon_winsys_unref;
+ ws->base.destroy = radeon_winsys_destroy;
+ ws->base.query_info = radeon_query_info;
+ ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
+ ws->base.cs_request_feature = radeon_cs_request_feature;
+ ws->base.query_value = radeon_query_value;
+ ws->base.read_registers = radeon_read_registers;
+
+ radeon_drm_bo_init_functions(ws);
+ radeon_drm_cs_init_functions(ws);
+ radeon_surface_init_functions(ws);
+
+ (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain);
+ (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain);
+
+ ws->bo_names = util_hash_table_create_ptr_keys();
+ ws->bo_handles = util_hash_table_create_ptr_keys();
+ ws->bo_vas = util_hash_table_create_ptr_keys();
+ (void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
+ (void) mtx_init(&ws->vm32.mutex, mtx_plain);
+ (void) mtx_init(&ws->vm64.mutex, mtx_plain);
+ (void) mtx_init(&ws->bo_fence_lock, mtx_plain);
+ list_inithead(&ws->vm32.holes);
+ list_inithead(&ws->vm64.holes);
+
+ /* The kernel currently returns 8MB. Make sure this doesn't change. */
+ if (ws->va_start > 8 * 1024 * 1024) {
+ /* Not enough 32-bit address space. */
+ radeon_winsys_destroy(&ws->base);
+ mtx_unlock(&fd_tab_mutex);
+ return NULL;
+ }
+
+ ws->vm32.start = ws->va_start;
+ ws->vm32.end = 1ull << 32;
+
+ /* The maximum is 8GB of virtual address space limited by the kernel.
+ * It's obviously not enough for bigger cards, like Hawaiis with 4GB
+ * and 8GB of physical memory and 4GB of GART.
+ *
+ * Older kernels set the limit to 4GB, which is even worse, so they only
+ * have 32-bit address space.
+ */
+ if (ws->info.drm_minor >= 41) {
+ ws->vm64.start = 1ull << 32;
+ ws->vm64.end = 1ull << 33;
+ }
+
+ /* TTM aligns the BO size to the CPU page size */
+ ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
+ ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */
+
+ if (ws->num_cpus > 1 && debug_get_option_thread())
+ util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0);
+
+ /* Create the screen at the end. The winsys must be initialized
+ * completely.
+ *
+ * Alternatively, we could create the screen based on "ws->gen"
+ * and link all drivers into one binary blob. */
+ ws->base.screen = screen_create(&ws->base, config);
+ if (!ws->base.screen) {
+ radeon_winsys_destroy(&ws->base);
+ mtx_unlock(&fd_tab_mutex);
+ return NULL;
+ }
+
+ _mesa_hash_table_insert(fd_tab, intptr_to_pointer(ws->fd), ws);
+
+ /* We must unlock the mutex once the winsys is fully initialized, so that
+ * other threads attempting to create the winsys from the same fd will
+ * get a fully initialized winsys and not just half-way initialized. */
+ mtx_unlock(&fd_tab_mutex);
+
+ return &ws->base;
fail_slab:
- if (ws->info.r600_has_virtual_memory)
- pb_slabs_deinit(&ws->bo_slabs);
+ if (ws->info.r600_has_virtual_memory)
+ pb_slabs_deinit(&ws->bo_slabs);
fail_cache:
- pb_cache_deinit(&ws->bo_cache);
+ pb_cache_deinit(&ws->bo_cache);
fail1:
- mtx_unlock(&fd_tab_mutex);
- if (ws->surf_man)
- radeon_surface_manager_free(ws->surf_man);
- if (ws->fd >= 0)
- close(ws->fd);
-
- FREE(ws);
- return NULL;
+ mtx_unlock(&fd_tab_mutex);
+ if (ws->surf_man)
+ radeon_surface_manager_free(ws->surf_man);
+ if (ws->fd >= 0)
+ close(ws->fd);
+
+ FREE(ws);
+ return NULL;
}
struct radeon_drm_cs;
enum radeon_generation {
- DRV_R300,
- DRV_R600,
- DRV_SI
+ DRV_R300,
+ DRV_R600,
+ DRV_SI
};
#define RADEON_SLAB_MIN_SIZE_LOG2 9
#define RADEON_SLAB_MAX_SIZE_LOG2 14
struct radeon_vm_heap {
- mtx_t mutex;
- uint64_t start;
- uint64_t end;
- struct list_head holes;
+ mtx_t mutex;
+ uint64_t start;
+ uint64_t end;
+ struct list_head holes;
};
struct radeon_drm_winsys {
- struct radeon_winsys base;
- struct pipe_reference reference;
- struct pb_cache bo_cache;
- struct pb_slabs bo_slabs;
-
- int fd; /* DRM file descriptor */
- int num_cs; /* The number of command streams created. */
- uint64_t allocated_vram;
- uint64_t allocated_gtt;
- uint64_t mapped_vram;
- uint64_t mapped_gtt;
- uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
- uint64_t num_gfx_IBs;
- uint64_t num_sdma_IBs;
- uint64_t num_mapped_buffers;
- uint32_t next_bo_hash;
-
- enum radeon_generation gen;
- struct radeon_info info;
- uint32_t va_start;
- uint32_t va_unmap_working;
- uint32_t accel_working2;
-
- /* List of buffer GEM names. Protected by bo_handles_mutex. */
- struct hash_table *bo_names;
- /* List of buffer handles. Protectded by bo_handles_mutex. */
- struct hash_table *bo_handles;
- /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
- struct hash_table *bo_vas;
- mtx_t bo_handles_mutex;
- mtx_t bo_fence_lock;
-
- struct radeon_vm_heap vm32;
- struct radeon_vm_heap vm64;
-
- bool check_vm;
-
- struct radeon_surface_manager *surf_man;
-
- uint32_t num_cpus; /* Number of CPUs. */
-
- struct radeon_drm_cs *hyperz_owner;
- mtx_t hyperz_owner_mutex;
- struct radeon_drm_cs *cmask_owner;
- mtx_t cmask_owner_mutex;
-
- /* multithreaded command submission */
- struct util_queue cs_queue;
+ struct radeon_winsys base;
+ struct pipe_reference reference;
+ struct pb_cache bo_cache;
+ struct pb_slabs bo_slabs;
+
+ int fd; /* DRM file descriptor */
+ int num_cs; /* The number of command streams created. */
+ uint64_t allocated_vram;
+ uint64_t allocated_gtt;
+ uint64_t mapped_vram;
+ uint64_t mapped_gtt;
+ uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
+ uint64_t num_gfx_IBs;
+ uint64_t num_sdma_IBs;
+ uint64_t num_mapped_buffers;
+ uint32_t next_bo_hash;
+
+ enum radeon_generation gen;
+ struct radeon_info info;
+ uint32_t va_start;
+ uint32_t va_unmap_working;
+ uint32_t accel_working2;
+
+ /* List of buffer GEM names. Protected by bo_handles_mutex. */
+ struct hash_table *bo_names;
+ /* List of buffer handles. Protectded by bo_handles_mutex. */
+ struct hash_table *bo_handles;
+ /* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
+ struct hash_table *bo_vas;
+ mtx_t bo_handles_mutex;
+ mtx_t bo_fence_lock;
+
+ struct radeon_vm_heap vm32;
+ struct radeon_vm_heap vm64;
+
+ bool check_vm;
+
+ struct radeon_surface_manager *surf_man;
+
+ uint32_t num_cpus; /* Number of CPUs. */
+
+ struct radeon_drm_cs *hyperz_owner;
+ mtx_t hyperz_owner_mutex;
+ struct radeon_drm_cs *cmask_owner;
+ mtx_t cmask_owner_mutex;
+
+ /* multithreaded command submission */
+ struct util_queue cs_queue;
};
-static inline struct radeon_drm_winsys *
-radeon_drm_winsys(struct radeon_winsys *base)
+static inline struct radeon_drm_winsys *radeon_drm_winsys(struct radeon_winsys *base)
{
- return (struct radeon_drm_winsys*)base;
+ return (struct radeon_drm_winsys*)base;
}
uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws);