X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_state_cache.c;h=0e98e654c1c7c67e41afc2de4a59999c5f701d93;hb=22d9a4824baf0bf89bb8e39025ad01fecb213888;hp=bb5047ea4d6b6bf4a5508a1f62e5b14d52a4672d;hpb=c35f14f36880eb20f5e54480444e343520e9bec5;p=mesa.git

diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index bb5047ea4d6..0e98e654c1c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -49,8 +49,9 @@
 #include "brw_state.h"
 #include "brw_vs.h"
 #include "brw_wm.h"
-#include "brw_vs.h"
-#include "brw_vec4_gs.h"
+#include "brw_gs.h"
+#include "brw_cs.h"
+#include "brw_program.h"
 
 #define FILE_DEBUG_FLAG DEBUG_STATE
 
@@ -137,7 +138,7 @@ bool
 brw_search_cache(struct brw_cache *cache,
                  enum brw_cache_id cache_id,
                  const void *key, GLuint key_size,
-                 uint32_t *inout_offset, void *out_aux)
+                 uint32_t *inout_offset, void *inout_aux)
 {
    struct brw_context *brw = cache->brw;
    struct brw_cache_item *item;
@@ -155,11 +156,12 @@ brw_search_cache(struct brw_cache *cache,
    if (item == NULL)
       return false;
 
-   *(void **)out_aux = ((char *)item->key + item->key_size);
+   void *aux = ((char *) item->key) + item->key_size;
 
-   if (item->offset != *inout_offset) {
-      SET_DIRTY_BIT(cache, 1 << cache_id);
+   if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
+      brw->ctx.NewDriverState |= (1 << cache_id);
       *inout_offset = item->offset;
+      *((void **) inout_aux) = aux;
    }
 
    return true;
@@ -172,14 +174,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
    drm_intel_bo *new_bo;
 
    new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
+   if (brw->has_llc)
+      drm_intel_gem_bo_map_unsynchronized(new_bo);
 
    /* Copy any existing data that needs to be saved. */
    if (cache->next_offset != 0) {
-      drm_intel_bo_map(cache->bo, false);
-      drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual);
-      drm_intel_bo_unmap(cache->bo);
+      if (brw->has_llc) {
+         memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset);
+      } else {
+         drm_intel_bo_map(cache->bo, false);
+         drm_intel_bo_subdata(new_bo, 0, cache->next_offset,
+                              cache->bo->virtual);
+         drm_intel_bo_unmap(cache->bo);
+      }
    }
 
+   if (brw->has_llc)
+      drm_intel_bo_unmap(cache->bo);
    drm_intel_bo_unreference(cache->bo);
    cache->bo = new_bo;
    cache->bo_used_by_gpu = false;
@@ -187,65 +198,55 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
    /* Since we have a new BO in place, we need to signal the units
     * that depend on it (state base address on gen5+, or unit state before).
     */
-   SET_DIRTY_BIT(brw, BRW_NEW_PROGRAM_CACHE);
+   brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
+   brw->batch.state_base_address_emitted = false;
 }
 
 /**
- * Attempts to find an item in the cache with identical data and aux
- * data to use
+ * Attempts to find an item in the cache with identical data.
  */
-static bool
-brw_try_upload_using_copy(struct brw_cache *cache,
-			  struct brw_cache_item *result_item,
-			  const void *data,
-			  const void *aux)
+static const struct brw_cache_item *
+brw_lookup_prog(const struct brw_cache *cache,
+                enum brw_cache_id cache_id,
+                const void *data, unsigned data_size)
 {
-   int i;
-   struct brw_cache_item *item;
+   const struct brw_context *brw = cache->brw;
+   unsigned i;
+   const struct brw_cache_item *item;
 
    for (i = 0; i < cache->size; i++) {
       for (item = cache->items[i]; item; item = item->next) {
-	 const void *item_aux = item->key + item->key_size;
 	 int ret;
 
-	 if (item->cache_id != result_item->cache_id ||
-	     item->size != result_item->size ||
-	     item->aux_size != result_item->aux_size) {
+	 if (item->cache_id != cache_id || item->size != data_size)
 	    continue;
-	 }
 
-         if (cache->aux_compare[result_item->cache_id]) {
-            if (!cache->aux_compare[result_item->cache_id](item_aux, aux))
-               continue;
-         } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
-	    continue;
-	 }
-
-	 drm_intel_bo_map(cache->bo, false);
+         if (!brw->has_llc)
+            drm_intel_bo_map(cache->bo, false);
 	 ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
-	 drm_intel_bo_unmap(cache->bo);
+         if (!brw->has_llc)
+            drm_intel_bo_unmap(cache->bo);
 	 if (ret)
 	    continue;
 
-	 result_item->offset = item->offset;
-
-	 return true;
+	 return item;
       }
    }
 
-   return false;
+   return NULL;
 }
 
-static void
-brw_upload_item_data(struct brw_cache *cache,
-		     struct brw_cache_item *item,
-		     const void *data)
+static uint32_t
+brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
 {
+   uint32_t offset;
+   struct brw_context *brw = cache->brw;
+
    /* Allocate space in the cache BO for our new program. */
-   if (cache->next_offset + item->size > cache->bo->size) {
+   if (cache->next_offset + size > cache->bo->size) {
       uint32_t new_size = cache->bo->size * 2;
 
-      while (cache->next_offset + item->size > new_size)
+      while (cache->next_offset + size > new_size)
 	 new_size *= 2;
 
       brw_cache_new_bo(cache, new_size);
@@ -254,14 +255,17 @@ brw_upload_item_data(struct brw_cache *cache,
    /* If we would block on writing to an in-use program BO, just
     * recreate it.
     */
-   if (cache->bo_used_by_gpu) {
+   if (!brw->has_llc && cache->bo_used_by_gpu) {
+      perf_debug("Copying busy program cache buffer.\n");
       brw_cache_new_bo(cache, cache->bo->size);
    }
 
-   item->offset = cache->next_offset;
+   offset = cache->next_offset;
 
    /* Programs are always 64-byte aligned, so set up the next one now */
-   cache->next_offset = ALIGN(item->offset + item->size, 64);
+   cache->next_offset = ALIGN(offset + size, 64);
+
+   return offset;
 }
 
 void
@@ -278,6 +282,8 @@ brw_upload_cache(struct brw_cache *cache,
 {
    struct brw_context *brw = cache->brw;
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   const struct brw_cache_item *matching_data =
+      brw_lookup_prog(cache, cache_id, data, data_size);
    GLuint hash;
    void *tmp;
 
@@ -289,15 +295,23 @@ brw_upload_cache(struct brw_cache *cache,
    hash = hash_key(item);
    item->hash = hash;
 
-   /* If we can find a matching prog/prog_data combo in the cache
-    * already, then reuse the existing stuff.  This will mean not
-    * flagging CACHE_NEW_* when transitioning between the two
-    * equivalent hash keys.  This is notably useful for programs
-    * generating shaders at runtime, where multiple shaders may
-    * compile to the thing in our backend.
+   /* If we can find a matching prog in the cache already, then reuse the
+    * existing stuff without creating new copy into the underlying buffer
+    * object. This is notably useful for programs generating shaders at
+    * runtime, where multiple shaders may compile to the same thing in our
+    * backend.
     */
-   if (!brw_try_upload_using_copy(cache, item, data, aux)) {
-      brw_upload_item_data(cache, item, data);
+   if (matching_data) {
+      item->offset = matching_data->offset;
+   } else {
+      item->offset = brw_alloc_item_data(cache, data_size);
+
+      /* Copy data to the buffer */
+      if (brw->has_llc) {
+         memcpy((char *)cache->bo->virtual + item->offset, data, data_size);
+      } else {
+         drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+      }
    }
 
    /* Set up the memory containing the key and aux_data */
@@ -308,7 +322,7 @@ brw_upload_cache(struct brw_cache *cache,
 
    item->key = tmp;
 
-   if (cache->n_items > cache->size * 1.5)
+   if (cache->n_items > cache->size * 1.5f)
       rehash(cache);
 
    hash %= cache->size;
@@ -316,12 +330,9 @@ brw_upload_cache(struct brw_cache *cache,
    cache->items[hash] = item;
    cache->n_items++;
 
-   /* Copy data to the buffer */
-   drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
-
    *out_offset = item->offset;
    *(void **)out_aux = (void *)((char *)item->key + item->key_size);
-   SET_DIRTY_BIT(cache, 1 << cache_id);
+   cache->brw->ctx.NewDriverState |= 1 << cache_id;
 }
 
 void
@@ -339,13 +350,8 @@ brw_init_caches(struct brw_context *brw)
    cache->bo = drm_intel_bo_alloc(brw->bufmgr,
 				  "program cache",
 				  4096, 64);
-
-   cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare;
-   cache->aux_compare[BRW_GS_PROG] = brw_gs_prog_data_compare;
-   cache->aux_compare[BRW_WM_PROG] = brw_wm_prog_data_compare;
-   cache->aux_free[BRW_VS_PROG] = brw_stage_prog_data_free;
-   cache->aux_free[BRW_GS_PROG] = brw_stage_prog_data_free;
-   cache->aux_free[BRW_WM_PROG] = brw_stage_prog_data_free;
+   if (brw->has_llc)
+      drm_intel_gem_bo_map_unsynchronized(cache->bo);
 }
 
 static void
@@ -354,14 +360,17 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    struct brw_cache_item *c, *next;
    GLuint i;
 
-   DBG("%s\n", __FUNCTION__);
+   DBG("%s\n", __func__);
 
    for (i = 0; i < cache->size; i++) {
       for (c = cache->items[i]; c; c = next) {
 	 next = c->next;
-         if (cache->aux_free[c->cache_id]) {
+         if (c->cache_id == BRW_CACHE_VS_PROG ||
+             c->cache_id == BRW_CACHE_GS_PROG ||
+             c->cache_id == BRW_CACHE_FS_PROG ||
+             c->cache_id == BRW_CACHE_CS_PROG) {
             const void *item_aux = c->key + c->key_size;
-            cache->aux_free[c->cache_id](item_aux);
+            brw_stage_prog_data_free(item_aux);
          }
 	 free((void *)c->key);
 	 free(c);
@@ -379,9 +388,27 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    /* We need to make sure that the programs get regenerated, since
     * any offsets leftover in brw_context will no longer be valid.
     */
-   SET_DIRTY_ALL(mesa);
-   SET_DIRTY64_ALL(brw);
-   SET_DIRTY_ALL(cache);
+   brw->NewGLState = ~0;
+   brw->ctx.NewDriverState = ~0ull;
+   brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
+   brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
+   brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
+   brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
+
+   /* Also, NULL out any stale program pointers. */
+   brw->vs.prog_data = NULL;
+   brw->vs.base.prog_data = NULL;
+   brw->tcs.prog_data = NULL;
+   brw->tcs.base.prog_data = NULL;
+   brw->tes.prog_data = NULL;
+   brw->tes.base.prog_data = NULL;
+   brw->gs.prog_data = NULL;
+   brw->gs.base.prog_data = NULL;
+   brw->wm.prog_data = NULL;
+   brw->wm.base.prog_data = NULL;
+   brw->cs.prog_data = NULL;
+   brw->cs.base.prog_data = NULL;
+
    intel_batchbuffer_flush(brw);
 }
 
@@ -403,8 +430,10 @@ static void
 brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 {
 
-   DBG("%s\n", __FUNCTION__);
+   DBG("%s\n", __func__);
 
+   if (brw->has_llc)
+      drm_intel_bo_unmap(cache->bo);
    drm_intel_bo_unreference(cache->bo);
    cache->bo = NULL;
    brw_clear_cache(brw, cache);