#include "brw_state.h"
#include "brw_vs.h"
#include "brw_wm.h"
-#include "brw_vs.h"
-#include "brw_vec4_gs.h"
+#include "brw_gs.h"
+#include "brw_cs.h"
+#include "brw_program.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key, GLuint key_size,
- uint32_t *inout_offset, void *out_aux)
+ uint32_t *inout_offset, void *inout_aux)
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item;
if (item == NULL)
return false;
- *(void **)out_aux = ((char *)item->key + item->key_size);
+ void *aux = ((char *) item->key) + item->key_size;
- if (item->offset != *inout_offset) {
- brw->state.dirty.cache |= (1 << cache_id);
+ if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
+ brw->ctx.NewDriverState |= (1 << cache_id);
*inout_offset = item->offset;
+ *((void **) inout_aux) = aux;
}
return true;
/* Since we have a new BO in place, we need to signal the units
* that depend on it (state base address on gen5+, or unit state before).
*/
- brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE;
+ brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
+ brw->batch.state_base_address_emitted = false;
}
/**
- * Attempts to find an item in the cache with identical data and aux
- * data to use
+ * Attempts to find an item in the cache with identical data.
*/
-static bool
-brw_try_upload_using_copy(struct brw_cache *cache,
- struct brw_cache_item *result_item,
- const void *data,
- const void *aux)
+static const struct brw_cache_item *
+brw_lookup_prog(const struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data, unsigned data_size)
{
- struct brw_context *brw = cache->brw;
- int i;
- struct brw_cache_item *item;
+ const struct brw_context *brw = cache->brw;
+ unsigned i;
+ const struct brw_cache_item *item;
for (i = 0; i < cache->size; i++) {
for (item = cache->items[i]; item; item = item->next) {
- const void *item_aux = item->key + item->key_size;
int ret;
- if (item->cache_id != result_item->cache_id ||
- item->size != result_item->size ||
- item->aux_size != result_item->aux_size) {
- continue;
- }
-
- if (cache->aux_compare[result_item->cache_id]) {
- if (!cache->aux_compare[result_item->cache_id](item_aux, aux))
- continue;
- } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
+ if (item->cache_id != cache_id || item->size != data_size)
continue;
- }
if (!brw->has_llc)
drm_intel_bo_map(cache->bo, false);
if (ret)
continue;
- result_item->offset = item->offset;
-
- return true;
+ return item;
}
}
- return false;
+ return NULL;
}
-static void
-brw_upload_item_data(struct brw_cache *cache,
- struct brw_cache_item *item,
- const void *data)
+static uint32_t
+brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
{
+ uint32_t offset;
struct brw_context *brw = cache->brw;
/* Allocate space in the cache BO for our new program. */
- if (cache->next_offset + item->size > cache->bo->size) {
+ if (cache->next_offset + size > cache->bo->size) {
uint32_t new_size = cache->bo->size * 2;
- while (cache->next_offset + item->size > new_size)
+ while (cache->next_offset + size > new_size)
new_size *= 2;
brw_cache_new_bo(cache, new_size);
brw_cache_new_bo(cache, cache->bo->size);
}
- item->offset = cache->next_offset;
+ offset = cache->next_offset;
/* Programs are always 64-byte aligned, so set up the next one now */
- cache->next_offset = ALIGN(item->offset + item->size, 64);
+ cache->next_offset = ALIGN(offset + size, 64);
+
+ return offset;
}
void
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ const struct brw_cache_item *matching_data =
+ brw_lookup_prog(cache, cache_id, data, data_size);
GLuint hash;
void *tmp;
hash = hash_key(item);
item->hash = hash;
- /* If we can find a matching prog/prog_data combo in the cache
- * already, then reuse the existing stuff. This will mean not
- * flagging CACHE_NEW_* when transitioning between the two
- * equivalent hash keys. This is notably useful for programs
- * generating shaders at runtime, where multiple shaders may
- * compile to the thing in our backend.
+ /* If we can find a matching prog in the cache already, then reuse the
+ * existing stuff without creating new copy into the underlying buffer
+ * object. This is notably useful for programs generating shaders at
+ * runtime, where multiple shaders may compile to the same thing in our
+ * backend.
*/
- if (!brw_try_upload_using_copy(cache, item, data, aux)) {
- brw_upload_item_data(cache, item, data);
+ if (matching_data) {
+ item->offset = matching_data->offset;
+ } else {
+ item->offset = brw_alloc_item_data(cache, data_size);
+
+ /* Copy data to the buffer */
+ if (brw->has_llc) {
+ memcpy((char *)cache->bo->virtual + item->offset, data, data_size);
+ } else {
+ drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+ }
}
/* Set up the memory containing the key and aux_data */
item->key = tmp;
- if (cache->n_items > cache->size * 1.5)
+ if (cache->n_items > cache->size * 1.5f)
rehash(cache);
hash %= cache->size;
cache->items[hash] = item;
cache->n_items++;
- /* Copy data to the buffer */
- if (brw->has_llc) {
- memcpy((char *) cache->bo->virtual + item->offset, data, data_size);
- } else {
- drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
- }
-
*out_offset = item->offset;
*(void **)out_aux = (void *)((char *)item->key + item->key_size);
- cache->brw->state.dirty.cache |= 1 << cache_id;
+ cache->brw->ctx.NewDriverState |= 1 << cache_id;
}
void
4096, 64);
if (brw->has_llc)
drm_intel_gem_bo_map_unsynchronized(cache->bo);
-
- cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare;
- cache->aux_compare[BRW_GS_PROG] = brw_gs_prog_data_compare;
- cache->aux_compare[BRW_WM_PROG] = brw_wm_prog_data_compare;
- cache->aux_free[BRW_VS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_GS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_WM_PROG] = brw_stage_prog_data_free;
}
static void
struct brw_cache_item *c, *next;
GLuint i;
- DBG("%s\n", __FUNCTION__);
+ DBG("%s\n", __func__);
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
- if (cache->aux_free[c->cache_id]) {
+ if (c->cache_id == BRW_CACHE_VS_PROG ||
+ c->cache_id == BRW_CACHE_GS_PROG ||
+ c->cache_id == BRW_CACHE_FS_PROG ||
+ c->cache_id == BRW_CACHE_CS_PROG) {
const void *item_aux = c->key + c->key_size;
- cache->aux_free[c->cache_id](item_aux);
+ brw_stage_prog_data_free(item_aux);
}
free((void *)c->key);
free(c);
/* We need to make sure that the programs get regenerated, since
* any offsets leftover in brw_context will no longer be valid.
*/
- brw->state.dirty.mesa |= ~0;
- brw->state.dirty.brw |= ~0ull;
- brw->state.dirty.cache |= ~0;
+ brw->NewGLState = ~0;
+ brw->ctx.NewDriverState = ~0ull;
+ brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
+ brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
+ brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
+ brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
+
+ /* Also, NULL out any stale program pointers. */
+ brw->vs.prog_data = NULL;
+ brw->vs.base.prog_data = NULL;
+ brw->tcs.prog_data = NULL;
+ brw->tcs.base.prog_data = NULL;
+ brw->tes.prog_data = NULL;
+ brw->tes.base.prog_data = NULL;
+ brw->gs.prog_data = NULL;
+ brw->gs.base.prog_data = NULL;
+ brw->wm.prog_data = NULL;
+ brw->wm.base.prog_data = NULL;
+ brw->cs.prog_data = NULL;
+ brw->cs.base.prog_data = NULL;
+
intel_batchbuffer_flush(brw);
}
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
- DBG("%s\n", __FUNCTION__);
+ DBG("%s\n", __func__);
if (brw->has_llc)
drm_intel_bo_unmap(cache->bo);