X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_state_cache.c;h=3988625ea9189466bc75c86d7f3e31fafe5e9a4b;hb=47b556fbcaea4660b21481e40d89167d883d47f5;hp=e4c9ba7d870fccb95137a10ecf8e4c427a5885c2;hpb=e128cc205e30540597ecd57849060a6bf4714d9f;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e4c9ba7d870..3988625ea91 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -31,65 +31,36 @@ /** @file brw_state_cache.c * - * This file implements a simple static state cache for 965. The consumers - * can query the hash table of state using a cache_id, opaque key data, - * and list of buffers that will be used in relocations, and receive the - * corresponding state buffer object of state (plus associated auxiliary - * data) in return. + * This file implements a simple static state cache for 965. The + * consumers can query the hash table of state using a cache_id, + * opaque key data, and receive the corresponding state buffer object + * of state (plus associated auxiliary data) in return. Objects in + * the cache may not have relocations (pointers to other BOs) in them. * - * The inner workings are a simple hash table based on a CRC of the key data. - * The cache_id and relocation target buffers associated with the state - * buffer are included as auxiliary key data, but are not part of the hash - * value (this should be fixed, but will likely be fixed instead by making - * consumers use structured keys). + * The inner workings are a simple hash table based on a CRC of the + * key data. * - * Replacement is not implemented. Instead, when the cache gets too big, at - * a safe point (unlock) we throw out all of the cache data and let it - * regenerate for the next rendering operation. - * - * The reloc_buf pointers need to be included as key data, otherwise the - * non-unique values stuffed in the offset in key data through - * brw_cache_data() may result in successful probe for state buffers - * even when the buffer being referenced doesn't match. The result would be - * that the same state cache entry is used twice for different buffers, - * only one of the two buffers referenced gets put into the offset, and the - * incorrect program is run for the other instance. + * Replacement is not implemented. Instead, when the cache gets too + * big we throw out all of the cache data and let it get regenerated. */ #include "main/imports.h" -#include "brw_state.h" #include "intel_batchbuffer.h" +#include "brw_state.h" -/* XXX: Fixme - have to include these to get the sizes of the prog_key - * structs: - */ -#include "brw_wm.h" -#include "brw_vs.h" -#include "brw_clip.h" -#include "brw_sf.h" -#include "brw_gs.h" - +#define FILE_DEBUG_FLAG DEBUG_STATE static GLuint -hash_key(const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +hash_key(struct brw_cache_item *item) { - GLuint *ikey = (GLuint *)key; - GLuint hash = 0, i; + GLuint *ikey = (GLuint *)item->key; + GLuint hash = item->cache_id, i; - assert(key_size % 4 == 0); + assert(item->key_size % 4 == 0); /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) { - hash ^= ikey[i]; - hash = (hash << 5) | (hash >> 27); - } - - /* Include the BO pointers as key data as well */ - ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(dri_bo *); - for (i = 0; i < key_size/4; i++) { + for (i = 0; i < item->key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } @@ -97,28 +68,19 @@ hash_key(const void *key, GLuint key_size, return hash; } - -/** - * Marks a new buffer as being chosen for the given cache id. - */ -static void -update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, - dri_bo *bo) +static int +brw_cache_item_equals(const struct brw_cache_item *a, + const struct brw_cache_item *b) { - if (bo == cache->last_bo[cache_id]) - return; /* no change */ - - dri_bo_unreference(cache->last_bo[cache_id]); - cache->last_bo[cache_id] = bo; - dri_bo_reference(cache->last_bo[cache_id]); - cache->brw->state.dirty.cache |= 1 << cache_id; + return a->cache_id == b->cache_id && + a->hash == b->hash && + a->key_size == b->key_size && + (memcmp(a->key, b->key, a->key_size) == 0); } - static struct brw_cache_item * -search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - GLuint hash, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +search_cache(struct brw_cache *cache, GLuint hash, + struct brw_cache_item *lookup) { struct brw_cache_item *c; @@ -133,13 +95,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, #endif for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->cache_id == cache_id && - c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0 && - c->nr_reloc_bufs == nr_reloc_bufs && - memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(dri_bo *)) == 0) + if (brw_cache_item_equals(lookup, c)) return c; } @@ -155,7 +111,7 @@ rehash(struct brw_cache *cache) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + items = (struct brw_cache_item**) calloc(1, size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -173,300 +129,224 @@ rehash(struct brw_cache *cache) /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo * +bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return) + const void *key, GLuint key_size, + uint32_t *inout_offset, void *out_aux) { + struct brw_context *brw = cache->brw; struct brw_cache_item *item; - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + struct brw_cache_item lookup; + GLuint hash; - item = search_cache(cache, cache_id, hash, key, key_size, - reloc_bufs, nr_reloc_bufs); + lookup.cache_id = cache_id; + lookup.key = key; + lookup.key_size = key_size; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item == NULL) - return NULL; + return false; - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); + *(void **)out_aux = ((char *)item->key + item->key_size); - update_cache_last(cache, cache_id, item->bo); + if (item->offset != *inout_offset) { + brw->state.dirty.cache |= (1 << cache_id); + *inout_offset = item->offset; + } - dri_bo_reference(item->bo); - return item->bo; + return true; } - -dri_bo * -brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) +static void +brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) { - struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); - GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); - GLuint aux_size = cache->aux_size[cache_id]; - void *tmp; - dri_bo *bo; - int i; - - /* Create the buffer object to contain the data */ - bo = dri_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6); + struct brw_context *brw = cache->brw; + struct intel_context *intel = &brw->intel; + drm_intel_bo *new_bo; + new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64); - /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = _mesa_malloc(key_size + aux_size + relocs_size); - - memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); - memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); - for (i = 0; i < nr_reloc_bufs; i++) { - if (reloc_bufs[i] != NULL) - dri_bo_reference(reloc_bufs[i]); + /* Copy any existing data that needs to be saved. */ + if (cache->next_offset != 0) { + drm_intel_bo_map(cache->bo, false); + drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual); + drm_intel_bo_unmap(cache->bo); } - item->cache_id = cache_id; - item->key = tmp; - item->hash = hash; - item->key_size = key_size; - item->reloc_bufs = tmp + key_size + aux_size; - item->nr_reloc_bufs = nr_reloc_bufs; + drm_intel_bo_unreference(cache->bo); + cache->bo = new_bo; + cache->bo_used_by_gpu = false; - item->bo = bo; - dri_bo_reference(bo); + /* Since we have a new BO in place, we need to signal the units + * that depend on it (state base address on gen5+, or unit state before). + */ + brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE; +} - if (cache->n_items > cache->size * 1.5) - rehash(cache); +/** + * Attempts to find an item in the cache with identical data and aux + * data to use + */ +static bool +brw_try_upload_using_copy(struct brw_cache *cache, + struct brw_cache_item *result_item, + const void *data, + const void *aux) +{ + int i; + struct brw_cache_item *item; - hash %= cache->size; - item->next = cache->items[hash]; - cache->items[hash] = item; - cache->n_items++; + for (i = 0; i < cache->size; i++) { + for (item = cache->items[i]; item; item = item->next) { + const void *item_aux = item->key + item->key_size; + int ret; + + if (item->cache_id != result_item->cache_id || + item->size != result_item->size || + item->aux_size != result_item->aux_size) { + continue; + } - if (aux_return) { - assert(cache->aux_size[cache_id]); - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - } + if (memcmp(item_aux, aux, item->aux_size) != 0) { + continue; + } - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to cache id %d\n", - cache->name[cache_id], - data_size, cache_id); + drm_intel_bo_map(cache->bo, false); + ret = memcmp(cache->bo->virtual + item->offset, data, item->size); + drm_intel_bo_unmap(cache->bo); + if (ret) + continue; - /* Copy data to the buffer */ - dri_bo_subdata(bo, 0, data_size, data); + result_item->offset = item->offset; - update_cache_last(cache, cache_id, bo); + return true; + } + } - return bo; + return false; } +static void +brw_upload_item_data(struct brw_cache *cache, + struct brw_cache_item *item, + const void *data) +{ + /* Allocate space in the cache BO for our new program. */ + if (cache->next_offset + item->size > cache->bo->size) { + uint32_t new_size = cache->bo->size * 2; -/** - * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. - * - * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be - * better to use, as the potentially changing offsets in the data-used-as-key - * will result in excessive cache misses. - * - * If aux data is involved, use search/upload instead. + while (cache->next_offset + item->size > new_size) + new_size *= 2; - */ -dri_bo * -brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) -{ - dri_bo *bo; - struct brw_cache_item *item; - GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - - item = search_cache(cache, cache_id, hash, data, data_size, - reloc_bufs, nr_reloc_bufs); - if (item) { - update_cache_last(cache, cache_id, item->bo); - dri_bo_reference(item->bo); - return item->bo; + brw_cache_new_bo(cache, new_size); + } + + /* If we would block on writing to an in-use program BO, just + * recreate it. + */ + if (cache->bo_used_by_gpu) { + brw_cache_new_bo(cache, cache->bo->size); } - bo = brw_upload_cache(cache, cache_id, - data, data_size, - reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, NULL); + item->offset = cache->next_offset; - return bo; + /* Programs are always 64-byte aligned, so set up the next one now */ + cache->next_offset = ALIGN(item->offset + item->size, 64); } -enum pool_type { - DW_SURFACE_STATE, - DW_GENERAL_STATE -}; +void +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + uint32_t *out_offset, + void *out_aux) +{ + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + GLuint hash; + void *tmp; + item->cache_id = cache_id; + item->size = data_size; + item->key = key; + item->key_size = key_size; + item->aux_size = aux_size; + hash = hash_key(item); + item->hash = hash; -static void -brw_init_cache_id(struct brw_cache *cache, - const char *name, - enum brw_cache_id id, - GLuint aux_size) -{ - cache->name[id] = strdup(name); - cache->aux_size[id] = aux_size; -} + /* If we can find a matching prog/prog_data combo in the cache + * already, then reuse the existing stuff. This will mean not + * flagging CACHE_NEW_* when transitioning between the two + * equivalent hash keys. This is notably useful for programs + * generating shaders at runtime, where multiple shaders may + * compile to the thing in our backend. + */ + if (!brw_try_upload_using_copy(cache, item, data, aux)) { + brw_upload_item_data(cache, item, data); + } + /* Set up the memory containing the key and aux_data */ + tmp = malloc(key_size + aux_size); -static void -brw_init_non_surface_cache(struct brw_context *brw) -{ - struct brw_cache *cache = &brw->cache; + memcpy(tmp, key, key_size); + memcpy(tmp + key_size, aux, aux_size); - cache->brw = brw; + item->key = tmp; - cache->size = 7; - cache->n_items = 0; - cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, - "CC_VP", - BRW_CC_VP, - 0); - - brw_init_cache_id(cache, - "CC_UNIT", - BRW_CC_UNIT, - 0); - - brw_init_cache_id(cache, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_data)); - - brw_init_cache_id(cache, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - 0); - - brw_init_cache_id(cache, - "SAMPLER", - BRW_SAMPLER, - 0); - - brw_init_cache_id(cache, - "WM_UNIT", - BRW_WM_UNIT, - 0); - - brw_init_cache_id(cache, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_data)); - - brw_init_cache_id(cache, - "SF_VP", - BRW_SF_VP, - 0); - - brw_init_cache_id(cache, - "SF_UNIT", - BRW_SF_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_UNIT", - BRW_VS_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_data)); - - brw_init_cache_id(cache, - "CLIP_UNIT", - BRW_CLIP_UNIT, - 0); - - brw_init_cache_id(cache, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_data)); - - brw_init_cache_id(cache, - "GS_UNIT", - BRW_GS_UNIT, - 0); - - brw_init_cache_id(cache, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_data)); -} + if (cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + cache->n_items++; + /* Copy data to the buffer */ + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); -static void -brw_init_surface_cache(struct brw_context *brw) + *out_offset = item->offset; + *(void **)out_aux = (void *)((char *)item->key + item->key_size); + cache->brw->state.dirty.cache |= 1 << cache_id; +} + +void +brw_init_caches(struct brw_context *brw) { - struct brw_cache *cache = &brw->surface_cache; + struct intel_context *intel = &brw->intel; + struct brw_cache *cache = &brw->cache; cache->brw = brw; cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - 0); - - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0); -} - + calloc(1, cache->size * sizeof(struct brw_cache_item)); -void -brw_init_caches(struct brw_context *brw) -{ - brw_init_non_surface_cache(brw); - brw_init_surface_cache(brw); + cache->bo = drm_intel_bo_alloc(intel->bufmgr, + "program cache", + 4096, 64); } - static void brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { + struct intel_context *intel = &brw->intel; struct brw_cache_item *c, *next; GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { - int j; - next = c->next; - for (j = 0; j < c->nr_reloc_bufs; j++) - dri_bo_unreference(c->reloc_bufs[j]); - dri_bo_unreference(c->bo); free((void *)c->key); free(c); } @@ -475,82 +355,38 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; - if (brw->curbe.last_buf) { - _mesa_free(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - } + /* Start putting programs into the start of the BO again, since + * we'll never find the old results. + */ + cache->next_offset = 0; + /* We need to make sure that the programs get regenerated, since + * any offsets leftover in brw_context will no longer be valid. + */ brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; -} - -/* Clear all entries from the cache that point to the given bo. - * - * This lets us release memory for reuse earlier for known-dead buffers, - * at the cost of walking the entire hash table. - */ -void -brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) -{ - struct brw_cache_item **prev; - GLuint i; - - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); - - for (i = 0; i < cache->size; i++) { - for (prev = &cache->items[i]; *prev;) { - struct brw_cache_item *c = *prev; - - if (drm_intel_bo_references(c->bo, bo)) { - int j; - - *prev = c->next; - - for (j = 0; j < c->nr_reloc_bufs; j++) - dri_bo_unreference(c->reloc_bufs[j]); - dri_bo_unreference(c->bo); - free((void *)c->key); - free(c); - cache->n_items--; - } else { - prev = &c->next; - } - } - } + intel_batchbuffer_flush(intel); } void brw_state_cache_check_size(struct brw_context *brw) { - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); - - /* un-tuned guess. We've got around 20 state objects for a total of around - * 32k, so 1000 of them is around 1.5MB. + /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of + * state cache. */ if (brw->cache.n_items > 1000) brw_clear_cache(brw, &brw->cache); - - if (brw->surface_cache.n_items > 1000) - brw_clear_cache(brw, &brw->surface_cache); } static void brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { - GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + DBG("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); - for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(cache->last_bo[i]); - free(cache->name[i]); - } free(cache->items); cache->items = NULL; cache->size = 0; @@ -561,5 +397,4 @@ void brw_destroy_caches(struct brw_context *brw) { brw_destroy_cache(brw, &brw->cache); - brw_destroy_cache(brw, &brw->surface_cache); }