From 4cd3ef58a989f61ff22669648e4117426c6e603c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 3 Oct 2007 15:50:46 +1000 Subject: [PATCH] i915/drmbuf: attempt to push relocations into buffer manager This moves the relocations into the buffer manager in prepration for a superioctl move. --- src/mesa/drivers/dri/common/dri_bufmgr.c | 16 +++ src/mesa/drivers/dri/common/dri_bufmgr.h | 13 ++ src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 131 +++++++++++++++++- src/mesa/drivers/dri/common/dri_bufmgr_ttm.c | 131 +++++++++++++++++- src/mesa/drivers/dri/i915/Makefile | 3 +- src/mesa/drivers/dri/i915/intel_batchbuffer.c | 112 ++------------- src/mesa/drivers/dri/i915/intel_batchbuffer.h | 17 +-- 7 files changed, 305 insertions(+), 118 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 407409bf06d..359b57863ad 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -150,3 +150,19 @@ dri_bufmgr_destroy(dri_bufmgr *bufmgr) { bufmgr->destroy(bufmgr); } + + +void dri_emit_reloc(dri_bo *batch_buf, GLuint flags, GLuint delta, GLuint offset, dri_bo *relocatee) +{ + batch_buf->bufmgr->emit_reloc(batch_buf, flags, delta, offset, relocatee); +} + +void dri_process_relocs(dri_bo *batch_buf) +{ + batch_buf->bufmgr->process_relocs(batch_buf); +} + +void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + batch_buf->bufmgr->post_submit(batch_buf, last_fence); +} diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index 3be342926f7..aeeb6bdb43c 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -158,6 +158,15 @@ struct _dri_bufmgr { * Tears down the buffer manager instance. */ void (*destroy)(dri_bufmgr *bufmgr); + + /** + * Add relocation + */ + void (*emit_reloc)(dri_bo *batch_buf, GLuint flags, GLuint delta, GLuint offset, dri_bo *relocatee); + + void *(*process_relocs)(dri_bo *batch_buf); + + void (*post_submit)(dri_bo *batch_buf, dri_fence **fence); }; dri_bo *dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size, @@ -195,4 +204,8 @@ void dri_bufmgr_destroy(dri_bufmgr *bufmgr); dri_bo *dri_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, unsigned int handle); +void dri_emit_reloc(dri_bo *batch_buf, GLuint flags, GLuint delta, GLuint offset, dri_bo *relocatee); +void dri_process_relocs(dri_bo *batch_buf); +void dri_post_process_relocs(dri_bo *batch_buf); +void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence); #endif diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index e0d23a36477..adf0cf5ab72 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -59,6 +59,16 @@ * processed through the command queue wouldn't need to care about * fences. */ +#define MAX_RELOCS 4096 + +struct fake_buffer_reloc +{ + dri_bo *buf; + GLuint offset; + GLuint delta; /* not needed? */ + GLuint validate_flags; +}; + struct block { struct block *next, *prev; struct mem_block *mem; /* BM_MEM_AGP */ @@ -107,6 +117,12 @@ typedef struct _bufmgr_fake { int (*fence_wait)(void *private, unsigned int fence_cookie); /** Driver-supplied argument to driver callbacks */ void *driver_priv; + + + /** fake relocation list */ + struct fake_buffer_reloc reloc[MAX_RELOCS]; + GLuint nr_relocs; + GLboolean performed_rendering; } dri_bufmgr_fake; typedef struct _dri_bo_fake { @@ -837,6 +853,117 @@ dri_fake_destroy(dri_bufmgr *bufmgr) free(bufmgr); } +static void +dri_fake_emit_reloc(dri_bo *batch_buf, GLuint flags, GLuint delta, GLuint offset, + dri_bo *relocatee) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + struct fake_buffer_reloc *r = &bufmgr_fake->reloc[bufmgr_fake->nr_relocs++]; + + assert(bufmgr_fake->nr_relocs <= MAX_RELOCS); + + dri_bo_reference(relocatee); + + r->buf = relocatee; + r->offset = offset; + r->delta = delta; + r->validate_flags = flags; + + return; +} + + +static int +relocation_sort(const void *a_in, const void *b_in) { + const struct fake_buffer_reloc *a = a_in, *b = b_in; + + return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1; +} + +static void * +dri_fake_process_reloc(dri_bo *batch_buf) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + GLuint i; + GLuint *ptr; + + assert(batch_buf->virtual != NULL); + ptr = batch_buf->virtual; + + bufmgr_fake->performed_rendering = GL_FALSE; + + /* Sort our relocation list in terms of referenced buffer pointer. + * This lets us uniquely validate the buffers with the sum of all the flags, + * while avoiding O(n^2) on number of relocations. + */ + qsort(bufmgr_fake->reloc, bufmgr_fake->nr_relocs, sizeof(bufmgr_fake->reloc[0]), + relocation_sort); + + /* Perform the necessary validations of buffers, and enter the relocations + * in the batchbuffer. + */ + for (i = 0; i < bufmgr_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bufmgr_fake->reloc[i]; + + if (r->validate_flags & DRM_BO_FLAG_WRITE) + bufmgr_fake->performed_rendering = GL_TRUE; + + /* If this is the first time we've seen this buffer in the relocation + * list, figure out our flags and validate it. + */ + if (i == 0 || bufmgr_fake->reloc[i - 1].buf != r->buf) { + uint32_t validate_flags; + int j, ret; + + /* Accumulate the flags we need for validating this buffer. */ + validate_flags = r->validate_flags; + for (j = i + 1; j < bufmgr_fake->nr_relocs; j++) { + if (bufmgr_fake->reloc[j].buf != r->buf) + break; + validate_flags |= bufmgr_fake->reloc[j].validate_flags; + } + + /* Validate. If we fail, fence to clear the unfenced list and bail + * out. + */ + ret = dri_bo_validate(r->buf, validate_flags); + if (ret != 0) { + dri_fence *fo; + dri_bo_unmap(batch_buf); + fo = dri_fence_validated(batch_buf->bufmgr, + "batchbuffer failure fence", GL_TRUE); + dri_fence_unreference(fo); + goto done; + } + } + ptr[r->offset / 4] = r->buf->offset + r->delta; + dri_bo_unreference(r->buf); + } + dri_bo_unmap(batch_buf); + + dri_bo_validate(batch_buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + bufmgr_fake->nr_relocs = 0; + done: + return NULL; +} + +static void +dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + dri_fence *fo; + + fo = dri_fence_validated(batch_buf->bufmgr, "Batch fence", GL_TRUE); + + if (bufmgr_fake->performed_rendering) { + dri_fence_unreference(*last_fence); + *last_fence = fo; + } else { + dri_fence_unreference(fo); + } +} + dri_bufmgr * dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, unsigned long size, @@ -873,7 +1000,9 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, bufmgr_fake->bufmgr.fence_reference = dri_fake_fence_reference; bufmgr_fake->bufmgr.fence_unreference = dri_fake_fence_unreference; bufmgr_fake->bufmgr.destroy = dri_fake_destroy; - + bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; + bufmgr_fake->bufmgr.process_relocs = dri_fake_process_reloc; + bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit; bufmgr_fake->fence_emit = fence_emit; bufmgr_fake->fence_wait = fence_wait; bufmgr_fake->driver_priv = driver_priv; diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_ttm.c b/src/mesa/drivers/dri/common/dri_bufmgr_ttm.c index 235398eb872..84ac0b2c0d9 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_ttm.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_ttm.c @@ -43,6 +43,15 @@ #include "imports.h" #define BUFMGR_DEBUG 0 +#define MAX_RELOCS 4096 + +struct ttm_buffer_reloc +{ + dri_bo *buf; + GLuint offset; + GLuint delta; /* not needed? */ + GLuint validate_flags; +}; typedef struct _dri_bufmgr_ttm { dri_bufmgr bufmgr; @@ -51,6 +60,12 @@ typedef struct _dri_bufmgr_ttm { _glthread_Mutex mutex; unsigned int fence_type; unsigned int fence_type_flush; + + /** ttm relocation list */ + struct ttm_buffer_reloc reloc[MAX_RELOCS]; + GLuint nr_relocs; + GLboolean performed_rendering; + } dri_bufmgr_ttm; typedef struct _dri_bo_ttm { @@ -431,6 +446,118 @@ dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) free(bufmgr); } + +static void +dri_ttm_emit_reloc(dri_bo *batch_buf, GLuint flags, GLuint delta, GLuint offset, + dri_bo *relocatee) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + struct ttm_buffer_reloc *r = &bufmgr_ttm->reloc[bufmgr_ttm->nr_relocs++]; + + assert(bufmgr_ttm->nr_relocs <= MAX_RELOCS); + + dri_bo_reference(relocatee); + + r->buf = relocatee; + r->offset = offset; + r->delta = delta; + r->validate_flags = flags; + + return; +} + + +static int +relocation_sort(const void *a_in, const void *b_in) { + const struct ttm_buffer_reloc *a = a_in, *b = b_in; + + return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1; +} + +static void * +dri_ttm_process_reloc(dri_bo *batch_buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + GLuint i; + GLuint *ptr; + + assert(batch_buf->virtual != NULL); + ptr = batch_buf->virtual; + + bufmgr_ttm->performed_rendering = GL_FALSE; + + /* Sort our relocation list in terms of referenced buffer pointer. + * This lets us uniquely validate the buffers with the sum of all the flags, + * while avoiding O(n^2) on number of relocations. + */ + qsort(bufmgr_ttm->reloc, bufmgr_ttm->nr_relocs, sizeof(bufmgr_ttm->reloc[0]), + relocation_sort); + + /* Perform the necessary validations of buffers, and enter the relocations + * in the batchbuffer. + */ + for (i = 0; i < bufmgr_ttm->nr_relocs; i++) { + struct ttm_buffer_reloc *r = &bufmgr_ttm->reloc[i]; + + if (r->validate_flags & DRM_BO_FLAG_WRITE) + bufmgr_ttm->performed_rendering = GL_TRUE; + + /* If this is the first time we've seen this buffer in the relocation + * list, figure out our flags and validate it. + */ + if (i == 0 || bufmgr_ttm->reloc[i - 1].buf != r->buf) { + uint32_t validate_flags; + int j, ret; + + /* Accumulate the flags we need for validating this buffer. */ + validate_flags = r->validate_flags; + for (j = i + 1; j < bufmgr_ttm->nr_relocs; j++) { + if (bufmgr_ttm->reloc[j].buf != r->buf) + break; + validate_flags |= bufmgr_ttm->reloc[j].validate_flags; + } + + /* Validate. If we fail, fence to clear the unfenced list and bail + * out. + */ + ret = dri_bo_validate(r->buf, validate_flags); + if (ret != 0) { + dri_fence *fo; + dri_bo_unmap(batch_buf); + fo = dri_fence_validated(batch_buf->bufmgr, + "batchbuffer failure fence", GL_TRUE); + dri_fence_unreference(fo); + goto done; + } + } + ptr[r->offset / 4] = r->buf->offset + r->delta; + dri_bo_unreference(r->buf); + } + dri_bo_unmap(batch_buf); + + dri_bo_validate(batch_buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + bufmgr_ttm->nr_relocs = 0; + done: + return NULL; +} + +static void +dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + dri_fence *fo; + + fo = dri_fence_validated(batch_buf->bufmgr, "Batch fence", GL_TRUE); + + if (bufmgr_ttm->performed_rendering) { + dri_fence_unreference(*last_fence); + *last_fence = fo; + } else { + dri_fence_unreference(fo); + } +} + /** * Initializes the TTM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -464,6 +591,8 @@ dri_bufmgr_ttm_init(int fd, unsigned int fence_type, bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; - + bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; + bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; + bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; return &bufmgr_ttm->bufmgr; } diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 38e40902111..84749f1fd95 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -52,7 +52,8 @@ DRIVER_SOURCES = \ intel_state.c \ intel_tris.c \ intel_fbo.c \ - intel_depthstencil.c + intel_depthstencil.c \ + intel_drmbuf.c C_SOURCES = \ $(COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c index 045ff0a5b09..6934b8841ea 100644 --- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c @@ -116,85 +116,21 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) free(batch); } -static int -relocation_sort(const void *a_in, const void *b_in) { - const struct buffer_reloc *a = a_in, *b = b_in; - - return (intptr_t)a->buf < (intptr_t)b->buf ? -1 : 1; -} /* TODO: Push this whole function into bufmgr. */ static void do_flush_locked(struct intel_batchbuffer *batch, - GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock) + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock) { - GLuint *ptr; - GLuint i; struct intel_context *intel = batch->intel; - dri_fence *fo; - GLboolean performed_rendering = GL_FALSE; - - assert(batch->buf->virtual != NULL); - ptr = batch->buf->virtual; - - /* Sort our relocation list in terms of referenced buffer pointer. - * This lets us uniquely validate the buffers with the sum of all the flags, - * while avoiding O(n^2) on number of relocations. - */ - qsort(batch->reloc, batch->nr_relocs, sizeof(batch->reloc[0]), - relocation_sort); - - /* Perform the necessary validations of buffers, and enter the relocations - * in the batchbuffer. - */ - for (i = 0; i < batch->nr_relocs; i++) { - struct buffer_reloc *r = &batch->reloc[i]; - if (r->validate_flags & DRM_BO_FLAG_WRITE) - performed_rendering = GL_TRUE; + dri_process_relocs(batch->buf); - /* If this is the first time we've seen this buffer in the relocation - * list, figure out our flags and validate it. - */ - if (i == 0 || batch->reloc[i - 1].buf != r->buf) { - uint32_t validate_flags; - int j, ret; - - /* Accumulate the flags we need for validating this buffer. */ - validate_flags = r->validate_flags; - for (j = i + 1; j < batch->nr_relocs; j++) { - if (batch->reloc[j].buf != r->buf) - break; - validate_flags |= batch->reloc[j].validate_flags; - } - - /* Validate. If we fail, fence to clear the unfenced list and bail - * out. - */ - ret = dri_bo_validate(r->buf, validate_flags); - if (ret != 0) { - dri_bo_unmap(batch->buf); - fo = dri_fence_validated(intel->intelScreen->bufmgr, - "batchbuffer failure fence", GL_TRUE); - dri_fence_unreference(fo); - goto done; - } - } - ptr[r->offset / 4] = r->buf->offset + r->delta; - dri_bo_unreference(r->buf); - } - - dri_bo_unmap(batch->buf); batch->map = NULL; batch->ptr = NULL; - - dri_bo_validate(batch->buf, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); - - batch->list_count = 0; - batch->nr_relocs = 0; batch->flags = 0; /* Throw away non-effective packets. Won't work once we have @@ -208,21 +144,7 @@ do_flush_locked(struct intel_batchbuffer *batch, used, ignore_cliprects, allow_unlock); } - /* Associate a fence with the validated buffers, and note that we included - * a flush at the end. - */ - fo = dri_fence_validated(intel->intelScreen->bufmgr, - "Batch fence", GL_TRUE); - - if (performed_rendering) { - dri_fence_unreference(batch->last_fence); - batch->last_fence = fo; - } else { - /* If we didn't validate any buffers for writing by the card, we don't - * need to track the fence for glFinish(). - */ - dri_fence_unreference(fo); - } + dri_post_submit(batch->buf, &batch->last_fence); if (intel->numClipRects == 0 && !ignore_cliprects) { if (allow_unlock) { @@ -237,16 +159,14 @@ do_flush_locked(struct intel_batchbuffer *batch, intel->vtbl.lost_hardware(intel); } -done: if (INTEL_DEBUG & DEBUG_BATCH) { - dri_bo_map(batch->buf, GL_FALSE); - intel_decode(ptr, used / 4, batch->buf->offset, - intel->intelScreen->deviceID); - dri_bo_unmap(batch->buf); + // dri_bo_map(batch->buf, GL_FALSE); + // intel_decode(ptr, used / 4, batch->buf->offset, + // intel->intelScreen->deviceID); + // dri_bo_unmap(batch->buf); } } - void intel_batchbuffer_flush(struct intel_batchbuffer *batch) { @@ -280,7 +200,7 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch) do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS), GL_FALSE); - + if (!was_locked) UNLOCK_HARDWARE(intel); @@ -305,22 +225,12 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, GLuint flags, GLuint delta) { - struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++]; - - assert(batch->nr_relocs <= MAX_RELOCS); - - dri_bo_reference(buffer); - r->buf = buffer; - r->offset = batch->ptr - batch->map; - r->delta = delta; - r->validate_flags = flags; - + dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); batch->ptr += 4; + return GL_TRUE; } - - void intel_batchbuffer_data(struct intel_batchbuffer *batch, const void *data, GLuint bytes, GLuint flags) diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.h b/src/mesa/drivers/dri/i915/intel_batchbuffer.h index 850a91e1c91..3cb272d7346 100644 --- a/src/mesa/drivers/dri/i915/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.h @@ -2,6 +2,9 @@ #define INTEL_BATCHBUFFER_H #include "mtypes.h" + +#include "intel_drmbuf.h" + #include "dri_bufmgr.h" struct intel_context; @@ -9,19 +12,9 @@ struct intel_context; #define BATCH_SZ 16384 #define BATCH_RESERVED 16 -#define MAX_RELOCS 4096 - #define INTEL_BATCH_NO_CLIPRECTS 0x1 #define INTEL_BATCH_CLIPRECTS 0x2 -struct buffer_reloc -{ - dri_bo *buf; - GLuint offset; - GLuint delta; /* not needed? */ - GLuint validate_flags; -}; - struct intel_batchbuffer { struct intel_context *intel; @@ -30,13 +23,9 @@ struct intel_batchbuffer dri_fence *last_fence; GLuint flags; - drmBOList list; - GLuint list_count; GLubyte *map; GLubyte *ptr; - struct buffer_reloc reloc[MAX_RELOCS]; - GLuint nr_relocs; GLuint size; }; -- 2.30.2