From 89bba44e969f15bf20da6d700c493237b095a588 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 11:52:52 -0700 Subject: [PATCH] Add intel_bufmgr_gem for new graphics execution manager. --- src/mesa/drivers/dri/common/dri_bufmgr.c | 4 +- src/mesa/drivers/dri/common/dri_bufmgr.h | 17 +- src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 4 +- src/mesa/drivers/dri/i965/Makefile | 2 +- src/mesa/drivers/dri/i965/intel_bufmgr_gem.c | 1 + .../drivers/dri/intel/intel_batchbuffer.c | 9 +- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 818 ++++++++++++++++++ src/mesa/drivers/dri/intel/intel_bufmgr_gem.h | 19 + src/mesa/drivers/dri/intel/intel_ioctl.c | 32 +- src/mesa/drivers/dri/intel/intel_ioctl.h | 3 +- 10 files changed, 869 insertions(+), 40 deletions(-) create mode 120000 src/mesa/drivers/dri/i965/intel_bufmgr_gem.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_gem.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_gem.h diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 4df006fb9f8..70ae0914992 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -148,9 +148,9 @@ int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, return reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf); } -void *dri_process_relocs(dri_bo *batch_buf, GLuint *count) +void *dri_process_relocs(dri_bo *batch_buf) { - return batch_buf->bufmgr->process_relocs(batch_buf, count); + return batch_buf->bufmgr->process_relocs(batch_buf); } void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence) diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index 4593eaf9f7a..cbfeb9136db 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -41,7 +41,12 @@ typedef struct _dri_bo dri_bo; typedef struct _dri_fence dri_fence; struct _dri_bo { - /** Size in bytes of the buffer object. */ + /** + * Size in bytes of the buffer object. + * + * The size may be larger than the size originally requested for the + * allocation, such as being aligned to page size. + */ unsigned long size; /** * Card virtual address (offset from the beginning of the aperture) for the @@ -169,10 +174,10 @@ struct _dri_bufmgr { * into them the appopriate order. * * \param batch_buf buffer at the root of the tree of relocations - * \param count returns the number of buffers validated. - * \return relocation record for use in command submission. - * */ - void *(*process_relocs)(dri_bo *batch_buf, GLuint *count); + * \return argument to be completed and passed to the execbuffers ioctl + * (if any). + */ + void *(*process_relocs)(dri_bo *batch_buf); void (*post_submit)(dri_bo *batch_buf, dri_fence **fence); @@ -214,7 +219,7 @@ void dri_bufmgr_destroy(dri_bufmgr *bufmgr); int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf); -void *dri_process_relocs(dri_bo *batch_buf, uint32_t *count); +void *dri_process_relocs(dri_bo *batch_buf); void dri_post_process_relocs(dri_bo *batch_buf); void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence); int dri_bufmgr_check_aperture_space(dri_bo *bo); diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index 9bf3f3437ce..9dd06b07eb7 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -1098,7 +1098,7 @@ dri_fake_reloc_and_validate_buffer(dri_bo *bo) } static void * -dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) +dri_fake_process_relocs(dri_bo *batch_buf) { dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; @@ -1126,8 +1126,6 @@ dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) assert(ret == 0); - *count_p = 0; /* junk */ - bufmgr_fake->current_total_size = 0; return NULL; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index d46b3428f59..ca9b7da40f0 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -9,7 +9,7 @@ DRIVER_SOURCES = \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ - intel_bufmgr_ttm.c \ + intel_bufmgr_gem.c \ intel_context.c \ intel_decode.c \ intel_depthstencil.c \ diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c new file mode 120000 index 00000000000..dee0daf9c04 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_gem.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index a594fb6cc46..c5b0f531d4f 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -131,11 +131,8 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; - void *start; - GLuint count; dri_bo_unmap(batch->buf); - start = dri_process_relocs(batch->buf, &count); batch->map = NULL; batch->ptr = NULL; @@ -148,12 +145,16 @@ do_flush_locked(struct intel_batchbuffer *batch, if (!(intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS)) { if (intel->ttm == GL_TRUE) { + struct drm_i915_gem_execbuffer *execbuf; + + execbuf = dri_process_relocs(batch->buf); intel_exec_ioctl(batch->intel, used, batch->cliprect_mode != LOOP_CLIPRECTS, allow_unlock, - start, count, &batch->last_fence); + execbuf, &batch->last_fence); } else { + dri_process_relocs(batch->buf); intel_batch_ioctl(batch->intel, batch->buf->offset, used, diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c new file mode 100644 index 00000000000..2d8dced214b --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -0,0 +1,818 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström + * Keith Whitwell + * Eric Anholt + * Dave Airlie + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "errno.h" +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "string.h" +#include "imports.h" + +#include "i915_drm.h" + +#include "intel_bufmgr_gem.h" + +#define DBG(...) do { \ + if (bufmgr_gem->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +struct intel_validate_entry { + dri_bo *bo; + struct drm_i915_op_arg bo_arg; +}; + +struct dri_gem_bo_bucket_entry { + uint32_t gem_handle; + struct dri_gem_bo_bucket_entry *next; +}; + +struct dri_gem_bo_bucket { + struct dri_gem_bo_bucket_entry *head; + struct dri_gem_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_GEM_BO_BUCKETS 16 +typedef struct _dri_bufmgr_gem { + dri_bufmgr bufmgr; + + int fd; + + uint32_t max_relocs; + + struct drm_i915_gem_validate_entry *validate_array; + dri_bo **validate_bo; + int validate_array_size; + int validate_count; + + /** Array of lists of cached gem objects of power-of-two sizes */ + struct dri_gem_bo_bucket cache_bucket[INTEL_GEM_BO_BUCKETS]; + + struct drm_i915_gem_execbuffer exec_arg; +} dri_bufmgr_gem; + +typedef struct _dri_bo_gem { + dri_bo bo; + + int refcount; + unsigned int map_count; + uint32_t gem_handle; + const char *name; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** Array passed to the DRM containing relocation information. */ + struct drm_i915_gem_relocation_entry *relocs; + /** Array of bos corresponding to relocs[i].target_handle */ + dri_bo **reloc_target_bo; + /** Number of entries in relocs */ + int reloc_count; + /** Mapped address for the buffer */ + void *virtual; +} dri_bo_gem; + +typedef struct _dri_fence_gem +{ + dri_fence fence; + + int refcount; + const char *name; + drmFence drm_fence; +} dri_fence_gem; + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_gem_bo_bucket * +dri_gem_bo_bucket_for_size(dri_bufmgr_gem *bufmgr_gem, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_GEM_BO_BUCKETS) + return NULL; + + return &bufmgr_gem->cache_bucket[i]; +} + + +static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) +{ + int i, j; + + for (i = 0; i < bufmgr_gem->validate_count; i++) { + dri_bo *bo = bufmgr_gem->validate_bo[i]; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + if (bo_gem->relocs == NULL) { + DBG("%2d: %s\n", i, bo_gem->name); + continue; + } + + for (j = 0; j < bo_gem->reloc_count; j++) { + dri_bo *target_bo = bo_gem->reloc_target_bo[j]; + dri_bo_gem *target_gem = (dri_bo_gem *)target_bo; + + DBG("%2d: %s@0x%08llx -> %s@0x%08lx + 0x%08x\n", + i, + bo_gem->name, bo_gem->relocs[j].offset, + target_gem->name, target_bo->offset, + bo_gem->relocs[j].delta); + } + } +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +intel_add_validate_buffer(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + int index; + + if (bo_gem->validate_index != -1) + return; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_gem->validate_count == bufmgr_gem->validate_array_size) { + int new_size = bufmgr_gem->validate_array_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_gem->validate_array = + realloc(bufmgr_gem->validate_array, + sizeof(*bufmgr_gem->validate_array) * new_size); + bufmgr_gem->validate_bo = + realloc(bufmgr_gem->validate_bo, + sizeof(*bufmgr_gem->validate_bo) * new_size); + bufmgr_gem->validate_array_size = new_size; + } + + index = bufmgr_gem->validate_count; + /* Fill in array entry */ + bufmgr_gem->validate_array[index].buffer_handle = bo_gem->gem_handle; + bufmgr_gem->validate_array[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->validate_bo[index] = bo; + dri_bo_reference(bo); + bufmgr_gem->validate_count++; +} + + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static int +intel_setup_reloc_list(dri_bo *bo) +{ + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + + bo_gem->relocs = calloc(bufmgr_gem->max_relocs, + sizeof(struct drm_i915_gem_relocation_entry)); + bo_gem->reloc_target_bo = calloc(1, sizeof(dri_bo *)); + + return 0; +} + +static dri_bo * +dri_gem_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + dri_bo_gem *bo_gem; + unsigned int page_size = getpagesize(); + int ret; + struct dri_gem_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + /* Round the allocated size up to a power of two number of pages. */ + bo_gem->bo.size = 1 << logbase2(size); + if (bo_gem->bo.size < page_size) + bo_gem->bo.size = page_size; + bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo_gem->bo.size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) { + bo_gem->bo.size = size; + if (bo_gem->bo.size < page_size) + bo_gem->bo.size = page_size; + } + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_gem_bo_bucket_entry *entry = bucket->head; +#if 0 + int busy; + + /* XXX */ + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_gem->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); +#else + alloc_from_cache = 0; +#endif + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + bo_gem->gem_handle = entry->gem_handle; + free(entry); + } + } + + if (!alloc_from_cache) { + struct drm_gem_alloc alloc; + + memset(&alloc, 0, sizeof(alloc)); + alloc.size = bo_gem->bo.size; + + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_ALLOC, &alloc); + bo_gem->gem_handle = alloc.handle; + if (ret != 0) { + free(bo_gem); + return NULL; + } + } + + bo_gem->bo.offset = 0; + bo_gem->bo.virtual = NULL; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->name = name; + bo_gem->refcount = 1; + bo_gem->validate_index = -1; + + DBG("bo_create: %p (%s) %ldb\n", &bo_gem->bo, bo_gem->name, size); + + return &bo_gem->bo; +} + +/* Our GEM backend doesn't allow creation of static buffers, as that requires + * privelege for the non-fake case, and the lock in the fake case where we were + * working around the X Server not creating buffers and passing handles to us. + */ +static dri_bo * +dri_gem_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) +{ + return NULL; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_bo * +intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + dri_bo_gem *bo_gem; + int ret; + struct drm_gem_open open_arg; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + memset(&open_arg, 0, sizeof(open_arg)); + open_arg.name = handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg); + if (ret != 0) { + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(-ret)); + free(bo_gem); + return NULL; + } + bo_gem->bo.size = open_arg.size; + bo_gem->bo.offset = 0; + bo_gem->bo.virtual = NULL; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->name = name; + bo_gem->refcount = 1; + bo_gem->validate_index = -1; + + DBG("bo_create_from_handle: %p %08x (%s)\n", + &bo_gem->bo, handle, bo_gem->name); + + return &bo_gem->bo; +} + +static void +dri_gem_bo_reference(dri_bo *bo) +{ + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + bo_gem->refcount++; +} + +static void +dri_gem_bo_unreference(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + if (!bo) + return; + + if (--bo_gem->refcount == 0) { + struct dri_gem_bo_bucket *bucket; + int ret; + + assert(bo_gem->map_count == 0); + + if (bo_gem->relocs != NULL) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < bo_gem->reloc_count; i++) + dri_bo_unreference(bo_gem->reloc_target_bo[i]); + free(bo_gem->reloc_target_bo); + free(bo_gem->relocs); + } + + bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo->size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_gem_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->gem_handle = bo_gem->gem_handle; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + struct drm_gem_unreference unref; + + /* Decrement the kernel refcount for the buffer. */ + unref.handle = bo_gem->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); + if (ret != 0) { + fprintf(stderr, "DRM_IOCTL_GEM_UNREFERENCE failed (%s): %s\n", + bo_gem->name, strerror(-ret)); + } + } + + DBG("bo_unreference final: %p (%s)\n", &bo_gem->bo, bo_gem->name); + + free(bo); + return; + } +} + +static int +dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) +{ + dri_bufmgr_gem *bufmgr_gem; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + int ret; + + bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops. + */ + if (bo_gem->map_count++ != 0) + return 0; + + assert(bo->virtual == NULL); + + DBG("bo_map: %p (%s)\n", &bo_gem->bo, bo_gem->name); + + if (bo_gem->virtual == NULL) { + struct drm_gem_mmap mmap_arg; + + mmap_arg.handle = bo_gem->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", + __FILE__, __LINE__, bo_gem->name, strerror(-ret)); + } + bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; + } + + /* XXX Synchronization with hardware */ + + bo->virtual = bo_gem->virtual; + + return 0; +} + +static int +dri_gem_bo_unmap(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + if (bo == NULL) + return 0; + + assert(bo_gem->map_count != 0); + if (--bo_gem->map_count != 0) + return 0; + + bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + + assert(bo->virtual != NULL); + + DBG("bo_unmap: %p (%s)\n", &bo_gem->bo, bo_gem->name); + + munmap(bo_gem->virtual, bo->size); + bo_gem->virtual = NULL; + bo->virtual = NULL; + + return 0; +} + +static void +dri_gem_fence_reference(dri_fence *fence) +{ + dri_fence_gem *fence_gem = (dri_fence_gem *)fence; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; + + ++fence_gem->refcount; + DBG("fence_reference: %p (%s)\n", &fence_gem->fence, fence_gem->name); +} + +static void +dri_gem_fence_unreference(dri_fence *fence) +{ + dri_fence_gem *fence_gem = (dri_fence_gem *)fence; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; + + if (!fence) + return; + + DBG("fence_unreference: %p (%s)\n", &fence_gem->fence, fence_gem->name); + + if (--fence_gem->refcount == 0) { + int ret; + + ret = drmFenceUnreference(bufmgr_gem->fd, &fence_gem->drm_fence); + if (ret != 0) { + fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", + fence_gem->name, strerror(-ret)); + } + + free(fence); + return; + } +} + +static void +dri_gem_fence_wait(dri_fence *fence) +{ + dri_fence_gem *fence_gem = (dri_fence_gem *)fence; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; + int ret; + + ret = drmFenceWait(bufmgr_gem->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_gem->drm_fence, 0); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", + __FILE__, __LINE__, fence_gem->name, strerror(-ret)); + abort(); + } + + DBG("fence_wait: %p (%s)\n", &fence_gem->fence, fence_gem->name); +} + +static void +dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + int i; + + free(bufmgr_gem->validate_array); + free(bufmgr_gem->validate_bo); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { + struct dri_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; + struct dri_gem_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + struct drm_gem_unreference unref; + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + unref.handle = entry->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); + if (ret != 0) { + fprintf(stderr, "DRM_IOCTL_GEM_UNREFERENCE failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_bo. + */ +static int +dri_gem_emit_reloc(dri_bo *bo, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + dri_bo_gem *target_bo_gem = (dri_bo_gem *)target_bo; + + /* Create a new relocation list if needed */ + if (bo_gem->relocs == NULL) + intel_setup_reloc_list(bo); + + /* Check overflow */ + assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); + + bo_gem->relocs[bo_gem->reloc_count].offset = offset; + bo_gem->relocs[bo_gem->reloc_count].delta = delta; + bo_gem->relocs[bo_gem->reloc_count].target_handle = + target_bo_gem->gem_handle; + + bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; + dri_bo_reference(target_bo); + + bo_gem->reloc_count++; + return 0; +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_gem_bo_process_reloc(dri_bo *bo) +{ + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + int i; + + if (bo_gem->relocs == NULL) + return; + + for (i = 0; i < bo_gem->reloc_count; i++) { + dri_bo *target_bo = bo_gem->reloc_target_bo[i]; + + /* Continue walking the tree depth-first. */ + dri_gem_bo_process_reloc(target_bo); + + /* Add the target to the validate list */ + intel_add_validate_buffer(target_bo); + } +} + +static void * +dri_gem_process_reloc(dri_bo *batch_buf) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; + + /* Update indices and set up the validate list. */ + dri_gem_bo_process_reloc(batch_buf); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + intel_add_validate_buffer(batch_buf); + + bufmgr_gem->exec_arg.buffers_ptr = (uintptr_t)bufmgr_gem->validate_array; + bufmgr_gem->exec_arg.buffer_count = bufmgr_gem->validate_count; + bufmgr_gem->exec_arg.batch_start_offset = bufmgr_gem->validate_count; + + return &bufmgr_gem->exec_arg; +} + +static void +intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem) +{ + int i; + + for (i = 0; i < bufmgr_gem->validate_count; i++) { + dri_bo *bo = bufmgr_gem->validate_bo[i]; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + /* Update the buffer offset */ + if (bufmgr_gem->validate_array[i].buffer_offset != bo->offset) { + DBG("BO %s migrated: 0x%08lx -> 0x%08llx\n", + bo_gem->name, bo->offset, + bufmgr_gem->validate_array[i].buffer_offset); + bo->offset = bufmgr_gem->validate_array[i].buffer_offset; + } + } +} + +static void +dri_gem_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; + int i; + + intel_update_buffer_offsets (bufmgr_gem); + + if (bufmgr_gem->bufmgr.debug) + dri_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->validate_count; i++) { + dri_bo *bo = bufmgr_gem->validate_bo[i]; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + dri_bo_unreference(bo); + bufmgr_gem->validate_bo[i] = NULL; + } + bufmgr_gem->validate_count = 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + /* + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + int i; + + for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { + bufmgr_gem->cache_bucket[i].max_entries = -1; + } + */ +} + +/* + * + */ +static int +dri_gem_check_aperture_space(dri_bo *bo) +{ + return 0; +} + +/** + * Initializes the GEM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + * \param fence_type Driver-specific fence type used for fences with no flush. + * \param fence_type_flush Driver-specific fence type used for fences with a + * flush. + */ +dri_bufmgr * +intel_bufmgr_gem_init(int fd, int batch_size) +{ + dri_bufmgr_gem *bufmgr_gem; + int i; + + bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); + bufmgr_gem->fd = fd; + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_gem->bufmgr.bo_alloc = dri_gem_alloc; + bufmgr_gem->bufmgr.bo_alloc_static = dri_gem_alloc_static; + bufmgr_gem->bufmgr.bo_reference = dri_gem_bo_reference; + bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference; + bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map; + bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap; + bufmgr_gem->bufmgr.fence_reference = dri_gem_fence_reference; + bufmgr_gem->bufmgr.fence_unreference = dri_gem_fence_unreference; + bufmgr_gem->bufmgr.fence_wait = dri_gem_fence_wait; + bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy; + bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc; + bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc; + bufmgr_gem->bufmgr.post_submit = dri_gem_post_submit; + bufmgr_gem->bufmgr.debug = GL_FALSE; + bufmgr_gem->bufmgr.check_aperture_space = dri_gem_check_aperture_space; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) + bufmgr_gem->cache_bucket[i].tail = &bufmgr_gem->cache_bucket[i].head; + + return &bufmgr_gem->bufmgr; +} + diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h new file mode 100644 index 00000000000..a28f5ae814b --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h @@ -0,0 +1,19 @@ + +#ifndef INTEL_BUFMGR_GEM_H +#define INTEL_BUFMGR_GEM_H + +#include "dri_bufmgr.h" + +extern dri_bo *intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle); + +dri_fence *intel_gem_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg); + + +dri_bufmgr *intel_bufmgr_gem_init(int fd, int batch_size); + +void +intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr); + +#endif /* INTEL_BUFMGR_GEM_H */ diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 66e36102b9d..f9624a6abeb 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -151,9 +151,9 @@ void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence) + struct drm_i915_gem_execbuffer *execbuf, + dri_fence **fence) { - struct drm_i915_execbuffer execbuf; dri_fence *fo; int ret; @@ -169,16 +169,13 @@ intel_exec_ioctl(struct intel_context *intel, memset(&execbuf, 0, sizeof(execbuf)); - execbuf.num_buffers = count; - execbuf.batch.used = used; - execbuf.batch.cliprects = intel->pClipRects; - execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - execbuf.batch.DR1 = 0; - execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); - - execbuf.ops_list = (unsigned long)start; // TODO - execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; + execbuf->batch_start_offset = 0; + execbuf->batch_len = used; + execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; + execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf->DR1 = 0; + execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); do { ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, @@ -191,17 +188,6 @@ intel_exec_ioctl(struct intel_context *intel, exit(1); } - if (execbuf.fence_arg.error != 0) { - - /* - * Fence creation has failed, but the GPU has been - * idled by the kernel. Safe to continue. - */ - - *fence = NULL; - return; - } - fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", &execbuf.fence_arg); if (!fo) { diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 8674aef723d..7691a27f928 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -41,6 +41,7 @@ void intel_batch_ioctl( struct intel_context *intel, void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence); + struct drm_i915_gem_execbuffer *execbuf, + dri_fence **fence); #endif -- 2.30.2