iris: Support multiple binder BOs, update Surface State Base Address

author Kenneth Graunke <kenneth@whitecape.org>

Sun, 9 Sep 2018 02:43:34 +0000 (19:43 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Thu, 21 Feb 2019 18:26:08 +0000 (10:26 -0800)
author Kenneth Graunke <kenneth@whitecape.org>
Sun, 9 Sep 2018 02:43:34 +0000 (19:43 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Thu, 21 Feb 2019 18:26:08 +0000 (10:26 -0800)
diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c

index 2271513f6c9c806b859feaabe2d47d00b7903299..b35466d69f1f597b58dd2f6fddde7d5fe9762eb9 100644 (file)
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@@ -38,7 +38,6 @@
   */
  
  #include "iris_batch.h"
-#include "iris_binder.h"
  #include "iris_bufmgr.h"
  #include "iris_context.h"
  
@@ -158,8 +157,6 @@ iris_init_batch(struct iris_batch *batch,
     batch->validation_list =
        malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
  
-   batch->binder.bo = NULL;
-
     batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                   _mesa_key_pointer_equal);
     batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer,
@@ -254,9 +251,6 @@ iris_batch_reset(struct iris_batch *batch)
     create_batch(batch);
     assert(batch->bo->index == 0);
  
-   iris_destroy_binder(&batch->binder);
-   iris_init_binder(&batch->binder, batch->bo->bufmgr);
-
     if (batch->state_sizes)
        _mesa_hash_table_clear(batch->state_sizes, NULL);
  
@@ -281,8 +275,6 @@ iris_batch_free(struct iris_batch *batch)
     _mesa_hash_table_destroy(batch->cache.render, NULL);
     _mesa_set_destroy(batch->cache.depth, NULL);
  
-   iris_destroy_binder(&batch->binder);
-
     if (batch->state_sizes) {
        _mesa_hash_table_destroy(batch->state_sizes, NULL);
        gen_batch_decode_ctx_finish(&batch->decoder);
@@ -432,18 +424,16 @@ _iris_batch_flush_fence(struct iris_batch *batch,
  
     if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) {
        int bytes_for_commands = iris_batch_bytes_used(batch);
-      int bytes_for_binder = batch->binder.insert_point;
        int second_bytes = 0;
        if (batch->bo != batch->exec_bos[0]) {
           second_bytes = bytes_for_commands;
           bytes_for_commands += batch->primary_batch_size;
        }
        fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5d+%5db (%0.1f%%) "
-              "(cmds), %5db (%0.1f%%) (binder), %4d BOs (%0.1fMb aperture)\n",
+              "(cmds), %4d BOs (%0.1fMb aperture)\n",
                file, line,
                batch->primary_batch_size, second_bytes,
                100.0f * bytes_for_commands / BATCH_SZ,
-              bytes_for_binder, 100.0f * bytes_for_binder / IRIS_BINDER_SIZE,
                batch->exec_count,
                (float) batch->aperture_space / (1024 * 1024));
        dump_validation_list(batch);
diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h

index 7d446817d3df5f5aa960278610c52e6a4490955e..8ff3f60fa9d2fe544913c36dd002a3956b393009 100644 (file)
--- a/src/gallium/drivers/iris/iris_batch.h
+++ b/src/gallium/drivers/iris/iris_batch.h
@@ -29,7 +29,6 @@
  #include <string.h>
  #include "i915_drm.h"
  #include "common/gen_decoder.h"
-#include "iris_binder.h"
  
  /* The kernel assumes batchbuffers are smaller than 256kB. */
  #define MAX_BATCH_SIZE (256 * 1024)
@@ -58,6 +57,9 @@ struct iris_batch {
     /** Last BO submitted to the hardware.  Used for glFinish(). */
     struct iris_bo *last_bo;
  
+   /** Last Surface State Base Address set in this hardware context. */
+   uint64_t last_surface_base_address;
+
     uint32_t hw_ctx_id;
  
     /** Which engine this batch targets - a I915_EXEC_RING_MASK value */
@@ -72,9 +74,6 @@ struct iris_batch {
     /** The amount of aperture space (in bytes) used by all exec_bos */
     int aperture_space;
  
-   /** Binder (containing binding tables) */
-   struct iris_binder binder;
-
     struct {
        /**
         * Set of struct brw_bo * that have been rendered to within this
diff --git a/src/gallium/drivers/iris/iris_binder.c b/src/gallium/drivers/iris/iris_binder.c

index cba84f5fa5319e153c8021fbf5cb537a12fa985a..2cac1b71256ad15aaf7a1dbd2d7342fdd6ac5969 100644 (file)
--- a/src/gallium/drivers/iris/iris_binder.c
+++ b/src/gallium/drivers/iris/iris_binder.c
@@ -49,6 +49,8 @@
   * and cycling back around where possible to avoid replacing it at all costs.
   *
   * XXX: if we do have to flush, we should emit a performance warning.
+ *
+ * XXX: these comments are out of date
   */
  
  #include <stdlib.h>
@@ -62,98 +64,131 @@
  /* Avoid using offset 0, tools consider it NULL */
  #define INIT_INSERT_POINT BTP_ALIGNMENT
  
-/**
- * Reserve a block of space in the binder, given the raw size in bytes.
- */
-uint32_t
-iris_binder_reserve(struct iris_batch *batch, unsigned size)
+static bool
+binder_has_space(struct iris_binder *binder, unsigned size)
+{
+   return binder->insert_point + size <= IRIS_BINDER_SIZE;
+}
+
+static void
+binder_realloc(struct iris_context *ice)
  {
-   struct iris_binder *binder = &batch->binder;
+   struct iris_screen *screen = (void *) ice->ctx.screen;
+   struct iris_bufmgr *bufmgr = screen->bufmgr;
+   struct iris_binder *binder = &ice->state.binder;
  
-   assert(size > 0);
-   assert((binder->insert_point % BTP_ALIGNMENT) == 0);
+   iris_bo_unreference(binder->bo);
+
+   binder->bo =
+      iris_bo_alloc(bufmgr, "binder", IRIS_BINDER_SIZE, IRIS_MEMZONE_BINDER);
+   binder->map = iris_bo_map(NULL, binder->bo, MAP_WRITE);
+   binder->insert_point = INIT_INSERT_POINT;
  
-   /* If we can't fit all stages in the binder, flush the batch which
-    * will cause us to gain a new empty binder.
+   /* Allocating a new binder requires changing Surface State Base Address,
+    * which also invalidates all our previous binding tables - each entry
+    * in those tables is an offset from the old base.
+    *
+    * We do this here so that iris_binder_reserve_3d correctly gets a new
+    * larger total_size when making the updated reservation.
      */
-   if (binder->insert_point + size > IRIS_BINDER_SIZE)
-      iris_batch_flush(batch);
+   ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS;
+}
  
+static uint32_t
+binder_insert(struct iris_binder *binder, unsigned size)
+{
     uint32_t offset = binder->insert_point;
  
-   /* It had better fit now. */
-   assert(offset + size <= IRIS_BINDER_SIZE);
-
     binder->insert_point = align(binder->insert_point + size, BTP_ALIGNMENT);
  
-   iris_use_pinned_bo(batch, binder->bo, false);
-
     return offset;
  }
  
+/**
+ * Reserve a block of space in the binder, given the raw size in bytes.
+ */
+uint32_t
+iris_binder_reserve(struct iris_context *ice,
+                    unsigned size)
+{
+   struct iris_binder *binder = &ice->state.binder;
+
+   if (!binder_has_space(binder, size))
+      binder_realloc(ice);
+
+   assert(size > 0);
+   return binder_insert(binder, size);
+}
+
  /**
   * Reserve and record binder space for 3D pipeline shader stages.
   *
   * Note that you must actually populate the new binding tables after
   * calling this command - the new area is uninitialized.
   */
-bool
-iris_binder_reserve_3d(struct iris_batch *batch,
-                       struct iris_context *ice)
+void
+iris_binder_reserve_3d(struct iris_context *ice)
  {
     struct iris_compiled_shader **shaders = ice->shaders.prog;
-   struct iris_binder *binder = &batch->binder;
-   unsigned total_size = 0;
+   struct iris_binder *binder = &ice->state.binder;
     unsigned sizes[MESA_SHADER_STAGES] = {};
+   unsigned total_size;
  
-   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
-      if (!(ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage)))
-         continue;
+   /* If nothing is dirty, skip all this. */
+   if (!(ice->state.dirty & IRIS_ALL_DIRTY_BINDINGS))
+      return;
  
+   /* Get the binding table sizes for each stage */
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
        if (!shaders[stage])
           continue;
  
        const struct brw_stage_prog_data *prog_data =
           (const void *) shaders[stage]->prog_data;
  
+      /* Round up the size so our next table has an aligned starting offset */
        sizes[stage] = align(prog_data->binding_table.size_bytes, BTP_ALIGNMENT);
-      total_size += sizes[stage];
     }
  
-   if (total_size == 0)
-      return false;
+   /* Make space for the new binding tables...this may take two tries. */
+   while (true) {
+      total_size = 0;
+      for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+         if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage))
+            total_size += sizes[stage];
+      }
  
-   uint32_t offset = iris_binder_reserve(batch, total_size);
-   bool flushed = offset == INIT_INSERT_POINT;
+      assert(total_size < IRIS_BINDER_SIZE);
  
-   /* Assign space and record the current binding table. */
-   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
-      if (!(ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage)))
-         continue;
+      if (total_size == 0)
+         return;
+
+      if (binder_has_space(binder, total_size))
+         break;
  
-      binder->bt_offset[stage] = sizes[stage] > 0 ? offset : 0;
-      offset += sizes[stage];
+      /* It didn't fit.  Allocate a new buffer and try again.  Note that
+       * this will flag all bindings dirty, which may increase total_size
+       * on the next iteration.
+       */
+      binder_realloc(ice);
     }
  
-   return flushed;
-}
+   /* Assign space and record the new binding table offsets. */
+   uint32_t offset = binder_insert(binder, total_size);
  
-void
-iris_init_binder(struct iris_binder *binder, struct iris_bufmgr *bufmgr)
-{
-   binder->bo =
-      iris_bo_alloc(bufmgr, "binder", IRIS_BINDER_SIZE, IRIS_MEMZONE_BINDER);
-   binder->map = iris_bo_map(NULL, binder->bo, MAP_WRITE);
-   binder->insert_point = INIT_INSERT_POINT;
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+      if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
+         binder->bt_offset[stage] = sizes[stage] > 0 ? offset : 0;
+         offset += sizes[stage];
+      }
+   }
  }
  
-/**
- * Is the binder empty?  (If so, old binding table pointers are stale.)
- */
-bool
-iris_binder_is_empty(struct iris_binder *binder)
+void
+iris_init_binder(struct iris_context *ice)
  {
-   return binder->insert_point <= INIT_INSERT_POINT;
+   memset(&ice->state.binder, 0, sizeof(struct iris_binder));
+   binder_realloc(ice);
  }
  
  void
diff --git a/src/gallium/drivers/iris/iris_binder.h b/src/gallium/drivers/iris/iris_binder.h

index bd1e17ae4c459bea56f2419e45ca247f3f79ba18..e63170e298f49f61b5327b384d1d99fea9a12b69 100644 (file)
--- a/src/gallium/drivers/iris/iris_binder.h
+++ b/src/gallium/drivers/iris/iris_binder.h
@@ -49,11 +49,9 @@ struct iris_binder
     uint32_t bt_offset[MESA_SHADER_STAGES];
  };
  
-void iris_init_binder(struct iris_binder *binder, struct iris_bufmgr *bufmgr);
-bool iris_binder_is_empty(struct iris_binder *binder);
+void iris_init_binder(struct iris_context *ice);
  void iris_destroy_binder(struct iris_binder *binder);
-uint32_t iris_binder_reserve(struct iris_batch *batch, unsigned size);
-bool iris_binder_reserve_3d(struct iris_batch *batch,
-                            struct iris_context *ice);
+uint32_t iris_binder_reserve(struct iris_context *ice, unsigned size);
+void iris_binder_reserve_3d(struct iris_context *ice);
  
  #endif
diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c

index 3ff48ed00f51e290cbc21f8a15806959ac0f6736..e7718eab7eb63c4adafe33dce0c4b5d67799593d 100644 (file)
--- a/src/gallium/drivers/iris/iris_blorp.c
+++ b/src/gallium/drivers/iris/iris_blorp.c
@@ -120,7 +120,7 @@ blorp_get_surface_address(struct blorp_batch *blorp_batch,
  UNUSED static struct blorp_address
  blorp_get_surface_base_address(UNUSED struct blorp_batch *blorp_batch)
  {
-   return (struct blorp_address) { .offset = IRIS_MEMZONE_SURFACE_START };
+   return (struct blorp_address) { .offset = IRIS_MEMZONE_BINDER_START };
  }
  
  static void *
@@ -146,17 +146,22 @@ blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
                            void **surface_maps)
  {
     struct iris_context *ice = blorp_batch->blorp->driver_ctx;
+   struct iris_binder *binder = &ice->state.binder;
     struct iris_batch *batch = blorp_batch->driver_batch;
  
-   *bt_offset = iris_binder_reserve(batch, num_entries * sizeof(uint32_t));
-   uint32_t *bt_map = batch->binder.map + *bt_offset;
+   *bt_offset = iris_binder_reserve(ice, num_entries * sizeof(uint32_t));
+   uint32_t *bt_map = binder->map + *bt_offset;
  
     for (unsigned i = 0; i < num_entries; i++) {
        surface_maps[i] = stream_state(batch, ice->state.surface_uploader,
                                       state_size, state_alignment,
                                       &surface_offsets[i], NULL);
-      bt_map[i] = surface_offsets[i];
+      bt_map[i] = surface_offsets[i] - (uint32_t) binder->bo->gtt_offset;
     }
+
+   iris_use_pinned_bo(batch, binder->bo, false);
+
+   ice->vtbl.update_surface_base_address(batch, binder);
  }
  
  static void *
diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c

index 058ae15ab81cfadd94af9d2f62593aab129adf81..50e7d4f715da4b11774e4b5ea3b32f5f79deb8bd 100644 (file)
--- a/src/gallium/drivers/iris/iris_bufmgr.c
+++ b/src/gallium/drivers/iris/iris_bufmgr.c
@@ -244,10 +244,10 @@ bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size)
  static enum iris_memory_zone
  memzone_for_address(uint64_t address)
  {
-   STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
+   STATIC_ASSERT(IRIS_MEMZONE_OTHER_START   > IRIS_MEMZONE_DYNAMIC_START);
     STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
-   STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SHADER_START);
-   STATIC_ASSERT(IRIS_BINDER_ADDRESS == IRIS_MEMZONE_SURFACE_START);
+   STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_BINDER_START);
+   STATIC_ASSERT(IRIS_MEMZONE_BINDER_START  > IRIS_MEMZONE_SHADER_START);
     STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
  
     if (address >= IRIS_MEMZONE_OTHER_START)
@@ -259,7 +259,7 @@ memzone_for_address(uint64_t address)
     if (address > IRIS_MEMZONE_DYNAMIC_START)
        return IRIS_MEMZONE_DYNAMIC;
  
-   if (address == IRIS_BINDER_ADDRESS)
+   if (address > IRIS_MEMZONE_BINDER_START)
        return IRIS_MEMZONE_BINDER;
  
     if (address > IRIS_MEMZONE_SURFACE_START)
@@ -365,8 +365,14 @@ bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address)
  }
  
  static struct bo_cache_bucket *
-get_bucket_allocator(struct iris_bufmgr *bufmgr, uint64_t size)
+get_bucket_allocator(struct iris_bufmgr *bufmgr,
+                     enum iris_memory_zone memzone,
+                     uint64_t size)
  {
+   /* Bucketing is not worth using for binders...we'll never have 64... */
+   if (memzone == IRIS_MEMZONE_BINDER)
+      return NULL;
+
     /* Skip using the bucket allocator for very large sizes, as it allocates
      * 64 of them and this can balloon rather quickly.
      */
@@ -393,12 +399,11 @@ vma_alloc(struct iris_bufmgr *bufmgr,
            uint64_t size,
            uint64_t alignment)
  {
-   if (memzone == IRIS_MEMZONE_BINDER)
-      return IRIS_BINDER_ADDRESS;
-   else if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
+   if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
        return IRIS_BORDER_COLOR_POOL_ADDRESS;
  
-   struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
+   struct bo_cache_bucket *bucket =
+      get_bucket_allocator(bufmgr, memzone, size);
     uint64_t addr;
  
     if (bucket) {
@@ -419,8 +424,7 @@ vma_free(struct iris_bufmgr *bufmgr,
           uint64_t address,
           uint64_t size)
  {
-   if (address == IRIS_BINDER_ADDRESS ||
-       address == IRIS_BORDER_COLOR_POOL_ADDRESS)
+   if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
        return;
  
     /* Un-canonicalize the address. */
@@ -429,12 +433,13 @@ vma_free(struct iris_bufmgr *bufmgr,
     if (address == 0ull)
        return;
  
-   struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
+   enum iris_memory_zone memzone = memzone_for_address(address);
+   struct bo_cache_bucket *bucket =
+      get_bucket_allocator(bufmgr, memzone, size);
  
     if (bucket) {
        bucket_vma_free(bucket, address);
     } else {
-      enum iris_memory_zone memzone = memzone_for_address(address);
        util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
     }
  }
@@ -1599,9 +1604,12 @@ iris_bufmgr_init(struct gen_device_info *devinfo, int fd)
  
     util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER],
                        PAGE_SIZE, _4GB);
+   util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_BINDER],
+                      IRIS_MEMZONE_BINDER_START,
+                      IRIS_MAX_BINDERS * IRIS_BINDER_SIZE);
     util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE],
-                      IRIS_MEMZONE_SURFACE_START + IRIS_BINDER_SIZE,
-                      _4GB - IRIS_BINDER_SIZE);
+                      IRIS_MEMZONE_SURFACE_START,
+                      _4GB - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE);
     util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC],
                        IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
                        _4GB - IRIS_BORDER_COLOR_POOL_SIZE);
diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h

index 8be545cb04b4d043dc75c97f8a44eff7c0f2a920..9210f44c9449ed1628699a6e1071000f79e61cf4 100644 (file)
--- a/src/gallium/drivers/iris/iris_bufmgr.h
+++ b/src/gallium/drivers/iris/iris_bufmgr.h
@@ -48,14 +48,11 @@ struct pipe_debug_callback;
   *
   * We lay out the virtual address space as follows:
   *
- * - [0,   4K): Nothing  (empty page for null address)
- * - [4K,  4G): Shaders  (Instruction Base Address)
- * - [4G,  8G): Surfaces (Surface State Base Address, Bindless ...)
- * - [8G, 12G): Dynamic  (Dynamic State Base Address)
- * - [12G, *):  Other    (everything else in the full 48-bit VMA)
- *
- * A special 64kB "binder" buffer lives at the start of the surface memory
- * zone, holding binding tables referring to objects in the rest of the zone.
+ * - [0,   4K): Nothing            (empty page for null address)
+ * - [4K,  4G): Shaders            (Instruction Base Address)
+ * - [4G,  8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
+ * - [8G, 12G): Dynamic            (Dynamic State Base Address)
+ * - [12G, *):  Other              (everything else in the full 48-bit VMA)
   *
   * A special buffer for border color lives at the start of the dynamic state
   * memory zone.  This unfortunately has to be handled specially because the
@@ -65,32 +62,29 @@ struct pipe_debug_callback;
   * each a separate VMA.  However, we assign address globally, so buffers will
   * have the same address in all GEM contexts.  This lets us have a single BO
   * field for the address, which is easy and cheap.
- *
- * One exception is the special "binder" BO.  Binders are context-local,
- * so while there are many of them, all binders are stored at the same
- * fixed address (in different VMAs).
   */
  enum iris_memory_zone {
     IRIS_MEMZONE_SHADER,
+   IRIS_MEMZONE_BINDER,
     IRIS_MEMZONE_SURFACE,
     IRIS_MEMZONE_DYNAMIC,
     IRIS_MEMZONE_OTHER,
  
-   IRIS_MEMZONE_BINDER,
     IRIS_MEMZONE_BORDER_COLOR_POOL,
  };
  
  /* Intentionally exclude single buffer "zones" */
  #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 2)
  
+#define IRIS_BINDER_SIZE (64 * 1024)
+#define IRIS_MAX_BINDERS 100
+
  #define IRIS_MEMZONE_SHADER_START     (0ull * (1ull << 32))
-#define IRIS_MEMZONE_SURFACE_START    (1ull * (1ull << 32))
+#define IRIS_MEMZONE_BINDER_START     (1ull * (1ull << 32))
+#define IRIS_MEMZONE_SURFACE_START    (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
  #define IRIS_MEMZONE_DYNAMIC_START    (2ull * (1ull << 32))
  #define IRIS_MEMZONE_OTHER_START      (3ull * (1ull << 32))
  
-#define IRIS_BINDER_ADDRESS IRIS_MEMZONE_SURFACE_START
-#define IRIS_BINDER_SIZE (64 * 1024)
-
  #define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
  #define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)
  
diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c

index daaa9409d2e4fece90ff55f821158816aa5639cc..bc637ea04922b869b35e1806ba6ea9c9c5bf73a5 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -130,6 +130,7 @@ iris_destroy_context(struct pipe_context *ctx)
     slab_destroy_child(&ice->transfer_pool);
  
     iris_batch_free(&ice->render_batch);
+   iris_destroy_binder(&ice->state.binder);
  
     ralloc_free(ice);
  }
@@ -189,14 +190,15 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
  
     iris_init_program_cache(ice);
     iris_init_border_color_pool(ice);
+   iris_init_binder(ice);
  
     slab_create_child(&ice->transfer_pool, &screen->transfer_pool);
  
     ice->state.surface_uploader =
-      u_upload_create(&ice->ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
+      u_upload_create(ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
                        IRIS_RESOURCE_FLAG_SURFACE_MEMZONE);
     ice->state.dynamic_uploader =
-      u_upload_create(&ice->ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
+      u_upload_create(ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
                        IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE);
  
     genX_call(devinfo, init_state, ice);
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h

index f7411727bb01f3649ba30531b3b0a7bf8646d8c1..a01e0d13eb4276b4eb3ebd3b60b39d03663677a2 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -30,6 +30,7 @@
  #include "intel/common/gen_debug.h"
  #include "intel/compiler/brw_compiler.h"
  #include "iris_batch.h"
+#include "iris_binder.h"
  #include "iris_resource.h"
  #include "iris_screen.h"
  
@@ -109,6 +110,13 @@ struct blorp_params;
  #define IRIS_DIRTY_VF                       (1ull << 52)
  #define IRIS_DIRTY_VF_TOPOLOGY              (1ull << 53)
  
+#define IRIS_ALL_DIRTY_BINDINGS (IRIS_DIRTY_BINDINGS_VS  | \
+                                 IRIS_DIRTY_BINDINGS_TCS | \
+                                 IRIS_DIRTY_BINDINGS_TES | \
+                                 IRIS_DIRTY_BINDINGS_GS  | \
+                                 IRIS_DIRTY_BINDINGS_FS  | \
+                                 IRIS_DIRTY_BINDINGS_CS)
+
  /**
   * Non-orthogonal state (NOS) dependency flags.
   *
@@ -262,6 +270,8 @@ struct iris_vtable {
     void (*upload_render_state)(struct iris_context *ice,
                                 struct iris_batch *batch,
                                 const struct pipe_draw_info *draw);
+   void (*update_surface_base_address)(struct iris_batch *batch,
+                                       struct iris_binder *binder);
     void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags,
                                   struct iris_bo *bo, uint32_t offset,
                                   uint64_t imm);
@@ -382,6 +392,8 @@ struct iris_context {
        // "I'm streaming this out at draw time and never want it again!"
        struct u_upload_mgr *dynamic_uploader;
  
+      struct iris_binder binder;
+
        struct iris_border_color_pool border_color_pool;
  
        /**
diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c

index f6911350a7bf40cf69dd75b1f277b2b84e1fce95..0567bbac72ef4bf138a64f413213ef2b2bb161fe 100644 (file)
--- a/src/gallium/drivers/iris/iris_draw.c
+++ b/src/gallium/drivers/iris/iris_draw.c
@@ -80,21 +80,9 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
     iris_predraw_resolve_inputs(ice, batch);
     iris_predraw_resolve_framebuffer(ice, batch);
  
-   if (iris_binder_is_empty(&batch->binder)) {
-      ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS |
-                          IRIS_DIRTY_BINDINGS_TCS |
-                          IRIS_DIRTY_BINDINGS_TES |
-                          IRIS_DIRTY_BINDINGS_GS |
-                          IRIS_DIRTY_BINDINGS_FS;
-   }
+   iris_binder_reserve_3d(ice);
  
-   if (iris_binder_reserve_3d(batch, ice)) {
-      ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS |
-                          IRIS_DIRTY_BINDINGS_TCS |
-                          IRIS_DIRTY_BINDINGS_TES |
-                          IRIS_DIRTY_BINDINGS_GS |
-                          IRIS_DIRTY_BINDINGS_FS;
-   }
+   ice->vtbl.update_surface_base_address(batch, &ice->state.binder);
     ice->vtbl.upload_render_state(ice, batch, info);
  
     ice->state.dirty = 0ull;
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c

index 454e05979e49243b648eb2650292bf69107fd7ac..54bf3fd602356d8add4bd11b1828c37f0e7ae209 100644 (file)
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -445,6 +445,36 @@ emit_state(struct iris_batch *batch,
  #define cso_changed_memcmp(x) \
     (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)
  
+static void
+flush_for_state_base_change(struct iris_batch *batch)
+{
+   /* Flush before emitting STATE_BASE_ADDRESS.
+    *
+    * This isn't documented anywhere in the PRM.  However, it seems to be
+    * necessary prior to changing the surface state base adress.  We've
+    * seen issues in Vulkan where we get GPU hangs when using multi-level
+    * command buffers which clear depth, reset state base address, and then
+    * go render stuff.
+    *
+    * Normally, in GL, we would trust the kernel to do sufficient stalls
+    * and flushes prior to executing our batch.  However, it doesn't seem
+    * as if the kernel's flushing is always sufficient and we don't want to
+    * rely on it.
+    *
+    * We make this an end-of-pipe sync instead of a normal flush because we
+    * do not know the current status of the GPU.  On Haswell at least,
+    * having a fast-clear operation in flight at the same time as a normal
+    * rendering operation can cause hangs.  Since the kernel's flushing is
+    * insufficient, we need to ensure that any rendering operations from
+    * other processes are definitely complete before we try to do our own
+    * rendering.  It's a bit of a big hammer but it appears to work.
+    */
+   iris_emit_end_of_pipe_sync(batch,
+                              PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                              PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                              PIPE_CONTROL_DATA_CACHE_FLUSH);
+}
+
  /**
   * Upload the initial GPU state for a render context.
   *
@@ -459,18 +489,19 @@ iris_init_render_context(struct iris_screen *screen,
  {
     iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER);
  
-   /* XXX: PIPE_CONTROLs */
+   flush_for_state_base_change(batch);
  
     /* We program STATE_BASE_ADDRESS once at context initialization time.
      * Each base address points at a 4GB memory zone, and never needs to
      * change.  See iris_bufmgr.h for a description of the memory zones.
+    *
+    * Except for Surface State Base Address.  That one changes.
      */
     iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
     #if 0
     // XXX: MOCS is stupid for this.
        sba.GeneralStateMemoryObjectControlState            = MOCS_WB;
        sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB;
-      sba.SurfaceStateMemoryObjectControlState            = MOCS_WB;
        sba.DynamicStateMemoryObjectControlState            = MOCS_WB;
        sba.IndirectObjectMemoryObjectControlState          = MOCS_WB;
        sba.InstructionMemoryObjectControlState             = MOCS_WB;
@@ -478,7 +509,6 @@ iris_init_render_context(struct iris_screen *screen,
     #endif
  
        sba.GeneralStateBaseAddressModifyEnable   = true;
-      sba.SurfaceStateBaseAddressModifyEnable   = true;
        sba.DynamicStateBaseAddressModifyEnable   = true;
        sba.IndirectObjectBaseAddressModifyEnable = true;
        sba.InstructionBaseAddressModifyEnable    = true;
@@ -489,7 +519,6 @@ iris_init_render_context(struct iris_screen *screen,
        sba.InstructionBuffersizeModifyEnable     = true;
  
        sba.InstructionBaseAddress  = ro_bo(NULL, IRIS_MEMZONE_SHADER_START);
-      sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START);
        sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START);
  
        sba.GeneralStateBufferSize   = 0xfffff;
@@ -3063,6 +3092,9 @@ use_ssbo(struct iris_batch *batch, struct iris_context *ice,
     return surf_state->offset;
  }
  
+#define push_bt_entry(addr) \
+   assert(addr >= binder_addr); bt_map[s++] = (addr) - binder_addr;
+
  /**
   * Populate the binding table for a given shader stage.
   *
@@ -3075,13 +3107,14 @@ iris_populate_binding_table(struct iris_context *ice,
                              struct iris_batch *batch,
                              gl_shader_stage stage)
  {
-   const struct iris_binder *binder = &batch->binder;
+   const struct iris_binder *binder = &ice->state.binder;
     struct iris_compiled_shader *shader = ice->shaders.prog[stage];
     if (!shader)
        return;
  
     const struct shader_info *info = iris_get_shader_info(ice, stage);
     struct iris_shader_state *shs = &ice->state.shaders[stage];
+   uint32_t binder_addr = binder->bo->gtt_offset;
  
     //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
     uint32_t *bt_map = binder->map + binder->bt_offset[stage];
@@ -3092,13 +3125,14 @@ iris_populate_binding_table(struct iris_context *ice,
        /* Note that cso_fb->nr_cbufs == fs_key->nr_color_regions. */
        if (cso_fb->nr_cbufs) {
           for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
-            if (cso_fb->cbufs[i])
-               bt_map[s++] = use_surface(batch, cso_fb->cbufs[i], true);
-            else
-               bt_map[s++] = use_null_fb_surface(batch, ice);
+            uint32_t addr =
+               cso_fb->cbufs[i] ? use_surface(batch, cso_fb->cbufs[i], true)
+                                : use_null_fb_surface(batch, ice);
+            push_bt_entry(addr);
           }
        } else {
-         bt_map[s++] = use_null_fb_surface(batch, ice);
+         uint32_t addr = use_null_fb_surface(batch, ice);
+         push_bt_entry(addr);
        }
     }
  
@@ -3107,8 +3141,9 @@ iris_populate_binding_table(struct iris_context *ice,
  
     for (int i = 0; i < shs->num_textures; i++) {
        struct iris_sampler_view *view = shs->textures[i];
-      bt_map[s++] = view ? use_sampler_view(batch, view)
-                         : use_null_surface(batch, ice);
+      uint32_t addr = view ? use_sampler_view(batch, view)
+                           : use_null_surface(batch, ice);
+      push_bt_entry(addr);
     }
  
     for (int i = 0; i < 1 + info->num_ubos; i++) {
@@ -3116,7 +3151,8 @@ iris_populate_binding_table(struct iris_context *ice,
        if (!cbuf->surface_state.res)
           break;
  
-      bt_map[s++] = use_const_buffer(batch, cbuf);
+      uint32_t addr = use_const_buffer(batch, cbuf);
+      push_bt_entry(addr);
     }
  
     /* XXX: st is wasting 16 binding table slots for ABOs.  Should add a cap
@@ -3126,7 +3162,8 @@ iris_populate_binding_table(struct iris_context *ice,
      */
     if (info->num_abos + info->num_ssbos > 0) {
        for (int i = 0; i < IRIS_MAX_ABOS + info->num_ssbos; i++) {
-         bt_map[s++] = use_ssbo(batch, ice, shs, i);
+         uint32_t addr = use_ssbo(batch, ice, shs, i);
+         push_bt_entry(addr);
        }
     }
  
@@ -3263,6 +3300,27 @@ iris_restore_context_saved_bos(struct iris_context *ice,
     }
  }
  
+/**
+ * Possibly emit STATE_BASE_ADDRESS to update Surface State Base Address.
+ */
+static void
+iris_update_surface_base_address(struct iris_batch *batch,
+                                 struct iris_binder *binder)
+{
+   if (batch->last_surface_base_address == binder->bo->gtt_offset)
+      return;
+
+   flush_for_state_base_change(batch);
+
+   iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
+      // XXX: sba.SurfaceStateMemoryObjectControlState = MOCS_WB;
+      sba.SurfaceStateBaseAddressModifyEnable = true;
+      sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0);
+   }
+
+   batch->last_surface_base_address = binder->bo->gtt_offset;
+}
+
  static void
  iris_upload_dirty_render_state(struct iris_context *ice,
                                 struct iris_batch *batch,
@@ -3274,6 +3332,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
        return;
  
     struct iris_genx_state *genx = ice->state.genx;
+   struct iris_binder *binder = &ice->state.binder;
     struct brw_wm_prog_data *wm_prog_data = (void *)
        ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
  
@@ -3426,7 +3485,12 @@ iris_upload_dirty_render_state(struct iris_context *ice,
        }
     }
  
-   struct iris_binder *binder = &batch->binder;
+   /* Always pin the binder.  If we're emitting new binding table pointers,
+    * we need it.  If not, we're probably inheriting old tables via the
+    * context, and need it anyway.  Since true zero-bindings cases are
+    * practically non-existent, just pin it and avoid last_res tracking.
+    */
+   iris_use_pinned_bo(batch, binder->bo, false);
  
     for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
        if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
@@ -4309,6 +4373,7 @@ genX(init_state)(struct iris_context *ice)
     ice->vtbl.destroy_state = iris_destroy_state;
     ice->vtbl.init_render_context = iris_init_render_context;
     ice->vtbl.upload_render_state = iris_upload_render_state;
+   ice->vtbl.update_surface_base_address = iris_update_surface_base_address;
     ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
     ice->vtbl.derived_program_state_size = iris_derived_program_state_size;
     ice->vtbl.store_derived_program_state = iris_store_derived_program_state;
author	Kenneth Graunke <kenneth@whitecape.org>
	Sun, 9 Sep 2018 02:43:34 +0000 (19:43 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Thu, 21 Feb 2019 18:26:08 +0000 (10:26 -0800)
src/gallium/drivers/iris/iris_batch.c		patch \| blob \| history
src/gallium/drivers/iris/iris_batch.h		patch \| blob \| history
src/gallium/drivers/iris/iris_binder.c		patch \| blob \| history
src/gallium/drivers/iris/iris_binder.h		patch \| blob \| history
src/gallium/drivers/iris/iris_blorp.c		patch \| blob \| history
src/gallium/drivers/iris/iris_bufmgr.c		patch \| blob \| history
src/gallium/drivers/iris/iris_bufmgr.h		patch \| blob \| history
src/gallium/drivers/iris/iris_context.c		patch \| blob \| history
src/gallium/drivers/iris/iris_context.h		patch \| blob \| history
src/gallium/drivers/iris/iris_draw.c		patch \| blob \| history
src/gallium/drivers/iris/iris_state.c		patch \| blob \| history