#include "main/imports.h"
#include "intel_batchbuffer.h"
#include "brw_state.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_vec4_gs.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
GLuint size, i;
size = cache->size * 3;
- items = (struct brw_cache_item**) calloc(1, size * sizeof(*items));
+ items = calloc(1, size * sizeof(*items));
for (i = 0; i < cache->size; i++)
for (c = cache->items[i]; c; c = next) {
items[c->hash % size] = c;
}
- FREE(cache->items);
+ free(cache->items);
cache->items = items;
cache->size = size;
}
brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
{
struct brw_context *brw = cache->brw;
- struct intel_context *intel = &brw->intel;
drm_intel_bo *new_bo;
- new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64);
+ new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
/* Copy any existing data that needs to be saved. */
if (cache->next_offset != 0) {
brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE;
}
+/**
+ * Attempts to find an item in the cache with identical data and aux
+ * data to use
+ */
+static bool
+brw_try_upload_using_copy(struct brw_cache *cache,
+ struct brw_cache_item *result_item,
+ const void *data,
+ const void *aux)
+{
+ int i;
+ struct brw_cache_item *item;
+
+ for (i = 0; i < cache->size; i++) {
+ for (item = cache->items[i]; item; item = item->next) {
+ const void *item_aux = item->key + item->key_size;
+ int ret;
+
+ if (item->cache_id != result_item->cache_id ||
+ item->size != result_item->size ||
+ item->aux_size != result_item->aux_size) {
+ continue;
+ }
+
+ if (cache->aux_compare[result_item->cache_id]) {
+ if (!cache->aux_compare[result_item->cache_id](item_aux, aux,
+ item->aux_size,
+ item->key))
+ continue;
+ } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
+ continue;
+ }
+
+ drm_intel_bo_map(cache->bo, false);
+ ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
+ drm_intel_bo_unmap(cache->bo);
+ if (ret)
+ continue;
+
+ result_item->offset = item->offset;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void
+brw_upload_item_data(struct brw_cache *cache,
+ struct brw_cache_item *item,
+ const void *data)
+{
+ /* Allocate space in the cache BO for our new program. */
+ if (cache->next_offset + item->size > cache->bo->size) {
+ uint32_t new_size = cache->bo->size * 2;
+
+ while (cache->next_offset + item->size > new_size)
+ new_size *= 2;
+
+ brw_cache_new_bo(cache, new_size);
+ }
+
+ /* If we would block on writing to an in-use program BO, just
+ * recreate it.
+ */
+ if (cache->bo_used_by_gpu) {
+ brw_cache_new_bo(cache, cache->bo->size);
+ }
+
+ item->offset = cache->next_offset;
+
+ /* Programs are always 64-byte aligned, so set up the next one now */
+ cache->next_offset = ALIGN(item->offset + item->size, 64);
+}
+
void
brw_upload_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
void *tmp;
item->cache_id = cache_id;
+ item->size = data_size;
item->key = key;
item->key_size = key_size;
+ item->aux_size = aux_size;
hash = hash_key(item);
item->hash = hash;
- /* Allocate space in the cache BO for our new program. */
- if (cache->next_offset + data_size > cache->bo->size) {
- uint32_t new_size = cache->bo->size * 2;
-
- while (cache->next_offset + data_size > new_size)
- new_size *= 2;
-
- brw_cache_new_bo(cache, new_size);
- }
-
- /* If we would block on writing to an in-use program BO, just
- * recreate it.
+ /* If we can find a matching prog/prog_data combo in the cache
+ * already, then reuse the existing stuff. This will mean not
+ * flagging CACHE_NEW_* when transitioning between the two
+ * equivalent hash keys. This is notably useful for programs
+ * generating shaders at runtime, where multiple shaders may
+ * compile to the thing in our backend.
*/
- if (cache->bo_used_by_gpu) {
- brw_cache_new_bo(cache, cache->bo->size);
+ if (!brw_try_upload_using_copy(cache, item, data, aux)) {
+ brw_upload_item_data(cache, item, data);
}
- item->offset = cache->next_offset;
- item->size = data_size;
-
- /* Programs are always 64-byte aligned, so set up the next one now */
- cache->next_offset = ALIGN(item->offset + data_size, 64);
-
/* Set up the memory containing the key and aux_data */
tmp = malloc(key_size + aux_size);
void
brw_init_caches(struct brw_context *brw)
{
- struct intel_context *intel = &brw->intel;
struct brw_cache *cache = &brw->cache;
cache->brw = brw;
cache->size = 7;
cache->n_items = 0;
- cache->items = (struct brw_cache_item **)
- calloc(1, cache->size * sizeof(struct brw_cache_item));
+ cache->items =
+ calloc(1, cache->size * sizeof(struct brw_cache_item *));
- cache->bo = drm_intel_bo_alloc(intel->bufmgr,
+ cache->bo = drm_intel_bo_alloc(brw->bufmgr,
"program cache",
4096, 64);
+
+ cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare;
+ cache->aux_compare[BRW_GS_PROG] = brw_gs_prog_data_compare;
+ cache->aux_compare[BRW_WM_PROG] = brw_wm_prog_data_compare;
+ cache->aux_free[BRW_VS_PROG] = brw_vs_prog_data_free;
+ cache->aux_free[BRW_GS_PROG] = brw_gs_prog_data_free;
+ cache->aux_free[BRW_WM_PROG] = brw_wm_prog_data_free;
}
static void
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
- struct intel_context *intel = &brw->intel;
struct brw_cache_item *c, *next;
GLuint i;
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
+ if (cache->aux_free[c->cache_id]) {
+ const void *item_aux = c->key + c->key_size;
+ cache->aux_free[c->cache_id](item_aux);
+ }
free((void *)c->key);
free(c);
}
brw->state.dirty.mesa |= ~0;
brw->state.dirty.brw |= ~0;
brw->state.dirty.cache |= ~0;
- intel_batchbuffer_flush(intel);
+ intel_batchbuffer_flush(brw);
}
void
brw_state_cache_check_size(struct brw_context *brw)
{
- /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of
+ /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of
* state cache.
*/
- if (brw->cache.n_items > 1000)
+ if (brw->cache.n_items > 2000) {
+ perf_debug("Exceeded state cache size limit. Clearing the set "
+ "of compiled programs, which will trigger recompiles\n");
brw_clear_cache(brw, &brw->cache);
+ }
}
DBG("%s\n", __FUNCTION__);
+ drm_intel_bo_unreference(cache->bo);
+ cache->bo = NULL;
brw_clear_cache(brw, cache);
free(cache->items);
cache->items = NULL;