From 7cc7ff7051d427ff45b4d7d3664e2eecd13d0e13 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Apr 2008 16:22:05 +1000 Subject: [PATCH] intel/fake_bufmgr: Attempt to restrict references to objects in a batchbuffer > aperture size. So with compiz on Intel hw with fake bufmgr, opening 4 firefox windows at 1680x1050 and hitting alt-tab, could cause the batchbuffer to try and reference more than the 32MB of RAM allocated. Fix 1: Fix 1 is to pre-verify the list of buffers against the current batchbuffer and if it can't possibly fit in the aperture to flush the batchbuffer to the hardware and try again. If the buffers still can't fit well then you are hosed as I'm not sure there is a nice way to tell anyone. Fix 2: Next problem was that even with a simple check for total < aperture, we ran into fragmentation issues, this meant that half way down a set of buffers, we would fail as no blocks were available. Fix this by nuking the memory manager from orbit and letting it start again and relayout the blocks in a manner that fits. Fix 3: Finally the initial problem we were seeing was a memcpy to a NULL backing store. We seem to end up with a texture at some point that never gets mapped but ends up with data in it. compiz al-tab icons have this property. So I created a card dirty bit that memcpy's any buffer that is !static and is written to back to memory. This probably is wrong but it makes compiz work for now. Caveats: 965 support is still fail. --- src/mesa/drivers/dri/common/dri_bufmgr.c | 10 +- src/mesa/drivers/dri/common/dri_bufmgr.h | 9 +- src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 121 ++++++++++++++++-- src/mesa/drivers/dri/i915/i915_vtbl.c | 31 +++++ .../drivers/dri/intel/intel_batchbuffer.c | 10 +- src/mesa/drivers/dri/intel/intel_blit.c | 10 ++ src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 14 +- 7 files changed, 187 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 8413a51577c..4df006fb9f8 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -142,10 +142,10 @@ dri_bufmgr_destroy(dri_bufmgr *bufmgr) } -void dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, +int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf) { - reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf); + return reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf); } void *dri_process_relocs(dri_bo *batch_buf, GLuint *count) @@ -163,3 +163,9 @@ dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug) { bufmgr->debug = enable_debug; } + +int +dri_bufmgr_check_aperture_space(dri_bo *bo) +{ + return bo->bufmgr->check_aperture_space(bo); +} diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index 08cf8ca3e15..4593eaf9f7a 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -156,7 +156,7 @@ struct _dri_bufmgr { * \param target Buffer whose offset should be written into the relocation * entry. */ - void (*emit_reloc)(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + int (*emit_reloc)(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target); /** @@ -176,6 +176,7 @@ struct _dri_bufmgr { void (*post_submit)(dri_bo *batch_buf, dri_fence **fence); + int (*check_aperture_space)(dri_bo *bo); GLboolean debug; /**< Enables verbose debugging printouts */ }; @@ -211,9 +212,11 @@ void dri_bo_fake_disable_backing_store(dri_bo *bo, void *ptr); void dri_bufmgr_destroy(dri_bufmgr *bufmgr); -void dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf); +int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf); void *dri_process_relocs(dri_bo *batch_buf, uint32_t *count); void dri_post_process_relocs(dri_bo *batch_buf); void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence); +int dri_bufmgr_check_aperture_space(dri_bo *bo); + #endif diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index 9d94ca3b39e..c3c28afc9fb 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -133,6 +133,9 @@ typedef struct _bufmgr_fake { GLboolean debug; GLboolean performed_rendering; + + /* keep track of the current total size of objects we have relocs for */ + unsigned long current_total_size; } dri_bufmgr_fake; typedef struct _dri_bo_fake { @@ -142,6 +145,8 @@ typedef struct _dri_bo_fake { const char *name; unsigned dirty:1; + unsigned size_accounted:1; /*this buffers size has been accounted against the aperture */ + unsigned card_dirty:1; /* has the card written to this buffer - we make need to copy it back */ unsigned int refcount; /* Flags may consist of any of the DRM_BO flags, plus * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two @@ -177,6 +182,8 @@ typedef struct _dri_fence_fake { static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie); +static int dri_fake_check_aperture_space(dri_bo *bo); + #define MAXFENCE 0x7fffffff static GLboolean FENCE_LTE( unsigned a, unsigned b ) @@ -264,11 +271,19 @@ alloc_block(dri_bo *bo) */ static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block) { + dri_bo_fake *bo_fake; DBG("free block %p\n", block); if (!block) return; + bo_fake = (dri_bo_fake *)block->bo; + if (bo_fake->card_dirty == GL_TRUE) { + memcpy(bo_fake->backing_store, block->virtual, block->bo->size); + bo_fake->card_dirty = GL_FALSE; + bo_fake->dirty = GL_TRUE; + } + if (block->on_hardware) { block->bo = NULL; } @@ -287,11 +302,15 @@ static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block) static void alloc_backing_store(dri_bo *bo) { + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; dri_bo_fake *bo_fake = (dri_bo_fake *)bo; assert(!bo_fake->backing_store); assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); bo_fake->backing_store = ALIGN_MALLOC(bo->size, 64); + + DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size); + assert(bo_fake->backing_store); } static void @@ -495,9 +514,6 @@ static GLboolean evict_and_alloc_block(dri_bo *bo) DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size); - assert(is_empty_list(&bufmgr_fake->on_hardware)); - assert(is_empty_list(&bufmgr_fake->fenced)); - return GL_FALSE; } @@ -784,6 +800,26 @@ dri_fake_bo_unmap(dri_bo *bo) return 0; } +static void +dri_fake_kick_all(dri_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + bufmgr_fake->performed_rendering = GL_FALSE; + /* okay for ever BO that is on the HW kick it off. + seriously not afraid of the POLICE right now */ + foreach_s(block, tmp, &bufmgr_fake->on_hardware) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + block->on_hardware = 0; + free_block(bufmgr_fake, block); + bo_fake->block = NULL; + bo_fake->validated = GL_FALSE; + bo_fake->dirty = GL_TRUE; + block->bo->offset = -1; + } +} + static int dri_fake_bo_validate(dri_bo *bo, uint64_t flags) { @@ -810,6 +846,9 @@ dri_fake_bo_validate(dri_bo *bo, uint64_t flags) return 0; } + /* reset size accounted */ + bo_fake->size_accounted = 0; + /* Allocate the card memory */ if (!bo_fake->block && !evict_and_alloc_block(bo)) { bufmgr_fake->fail = 1; @@ -836,7 +875,13 @@ dri_fake_bo_validate(dri_bo *bo, uint64_t flags) */ dri_bufmgr_fake_wait_idle(bufmgr_fake); - memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size); + /* we may never have mapped this BO so it might not have any backing store */ + /* if this happens it should be rare, but 0 the card memory in any case */ + if (bo_fake->backing_store) + memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size); + else + memset(bo_fake->block->virtual, 0, bo->size); + bo_fake->dirty = 0; } @@ -917,17 +962,25 @@ dri_fake_destroy(dri_bufmgr *bufmgr) free(bufmgr); } -static void +static int dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf) { dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr; struct fake_buffer_reloc *r; dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf; - int i; + dri_bo_fake *target_fake = (dri_bo_fake *)target_buf; + int ret, i; assert(reloc_buf); assert(target_buf); + + if (!target_fake->is_static && !target_fake->size_accounted) { + ret = dri_fake_check_aperture_space(target_buf); + if (ret) + return ret; + } + if (reloc_fake->relocs == NULL) { reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS); @@ -954,7 +1007,7 @@ dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, } } - return; + return 0; } /** @@ -1008,8 +1061,11 @@ dri_fake_reloc_and_validate_buffer(dri_bo *bo) /* Validate the target buffer if that hasn't been done. */ if (!target_fake->validated) { ret = dri_fake_reloc_and_validate_buffer(r->target_buf); - if (ret != 0) + if (ret != 0) { + if (bo->virtual != NULL) + dri_bo_unmap(bo); return ret; + } } /* Calculate the value of the relocation entry. */ @@ -1028,8 +1084,15 @@ dri_fake_reloc_and_validate_buffer(dri_bo *bo) if (bo->virtual != NULL) dri_bo_unmap(bo); - if (bo_fake->validate_flags & DRM_BO_FLAG_WRITE) + if (bo_fake->validate_flags & DRM_BO_FLAG_WRITE) { + if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) { + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + + bo_fake->card_dirty = GL_TRUE; + } bufmgr_fake->performed_rendering = GL_TRUE; + } return dri_fake_bo_validate(bo, bo_fake->validate_flags); } @@ -1040,17 +1103,30 @@ dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; int ret; + int retry_count = 0; bufmgr_fake->performed_rendering = GL_FALSE; dri_fake_calculate_validate_flags(batch_buf); batch_fake->validate_flags = DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ; + + /* we've ran out of RAM so blow the whole lot away and retry */ + restart: ret = dri_fake_reloc_and_validate_buffer(batch_buf); + if (bufmgr_fake->fail == 1) { + if (retry_count == 0) { + retry_count++; + dri_fake_kick_all(bufmgr_fake); + bufmgr_fake->fail = 0; + goto restart; + } + } assert(ret == 0); *count_p = 0; /* junk */ + bufmgr_fake->current_total_size = 0; return NULL; } @@ -1097,6 +1173,29 @@ dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence) dri_bo_fake_post_submit(batch_buf); } +static int +dri_fake_check_aperture_space(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + GLuint sz; + + sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); + + if (bo_fake->size_accounted || bo_fake->is_static) + return 0; + + if (bufmgr_fake->current_total_size + sz > bufmgr_fake->size) { + DBG("check_space: bo %d %d overflowed bufmgr\n", bo_fake->id, sz); + return -1; + } + + bufmgr_fake->current_total_size += sz; + bo_fake->size_accounted = 1; + DBG("check_space: bo %d %d %d\n", bo_fake->id, bo->size, bufmgr_fake->current_total_size); + return 0; +} + dri_bufmgr * dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, unsigned long size, @@ -1132,7 +1231,8 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit; - bufmgr_fake->bufmgr.debug = GL_FALSE; + bufmgr_fake->bufmgr.check_aperture_space = dri_fake_check_aperture_space; + bufmgr_fake->bufmgr.debug = GL_TRUE; bufmgr_fake->fence_emit = fence_emit; bufmgr_fake->fence_wait = fence_wait; @@ -1140,3 +1240,4 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, return &bufmgr_fake->bufmgr; } + diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 94d70be441b..0f246513ddf 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -295,6 +295,7 @@ i915_emit_state(struct intel_context *intel) struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = i915->current; int i; + int ret, count; GLuint dirty; BATCH_LOCALS; @@ -311,7 +312,37 @@ i915_emit_state(struct intel_context *intel) */ intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, LOOP_CLIPRECTS); + count = 0; + again: + dirty = get_dirty(state); + + ret = 0; + if (dirty & I915_UPLOAD_BUFFERS) { + ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); + ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); + } + + if (dirty & I915_UPLOAD_TEX_ALL) { + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) { + if (state->tex_buffer[i]) { + ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); + } + } + } + if (ret) { + if (count == 0) { + count++; + intel_batchbuffer_flush(intel->batch); + goto again; + } else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); + assert(0); + } + } + /* work out list of buffers to emit */ + /* Do this here as we may have flushed the batchbuffer above, * causing more state to be dirty! */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index c1701f06403..d4abbb08608 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -87,6 +87,10 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->ptr = batch->map; batch->dirty_state = ~0; batch->cliprect_mode = IGNORE_CLIPRECTS; + + /* account batchbuffer in aperture */ + dri_bufmgr_check_aperture_space(batch->buf); + } struct intel_batchbuffer * @@ -264,7 +268,11 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, GLuint flags, GLuint delta) { - dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); + int ret; + int count = 0; + + ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); + /* * Using the old buffer offset, write in what the right data would be, in case * the buffer doesn't move and we can short-circuit the relocation processing diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index f4358bb3ddf..0f990c00b43 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -54,6 +54,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, struct intel_context *intel; const intelScreenPrivate *intelScreen; + int ret; DBG("%s\n", __FUNCTION__); @@ -123,6 +124,15 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, } #endif + again: + ret = dri_bufmgr_check_aperture_space(dst->buffer); + ret |= dri_bufmgr_check_aperture_space(src->buffer); + + if (ret) { + intel_batchbuffer_flush(intel->batch); + goto again; + } + for (i = 0; i < nbox; i++, pbox++) { drm_clip_rect_t box = *pbox; diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c index f164b489639..6828425e776 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c @@ -817,7 +817,7 @@ dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) * the relocation entry write when the buffer hasn't moved from the * last known offset in target_buf. */ -static void +static int dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf) { @@ -851,6 +851,7 @@ dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ /* Check wraparound */ assert(reloc_buf_ttm->reloc_buf_data[0] != 0); + return 0; } /** @@ -1039,6 +1040,15 @@ intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) } } +/* + * + */ +static int +dri_ttm_check_aperture_space(dri_bo *bo) +{ + return 0; +} + /** * Initializes the TTM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -1082,7 +1092,7 @@ intel_bufmgr_ttm_init(int fd, unsigned int fence_type, bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; bufmgr_ttm->bufmgr.debug = GL_FALSE; - + bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; /* Initialize the linked lists for BO reuse cache. */ for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; -- 2.30.2