iris: Try to recover from GPU hangs.

author Kenneth Graunke <kenneth@whitecape.org>

Wed, 8 May 2019 06:19:30 +0000 (23:19 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Thu, 9 May 2019 23:49:07 +0000 (16:49 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Wed, 8 May 2019 06:19:30 +0000 (23:19 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Thu, 9 May 2019 23:49:07 +0000 (16:49 -0700)
diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c

index d2b4fc88fe686480ecc38a777971525380085784..f3d2e569aa720f2304fa2e361d1f3072c2bb6c80 100644 (file)
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@@ -451,6 +451,28 @@ iris_finish_batch(struct iris_batch *batch)
        batch->primary_batch_size = iris_batch_bytes_used(batch);
  }
  
+/**
+ * Replace our current GEM context with a new one (in case it got banned).
+ */
+static bool
+replace_hw_ctx(struct iris_batch *batch)
+{
+   struct iris_screen *screen = batch->screen;
+   struct iris_bufmgr *bufmgr = screen->bufmgr;
+
+   uint32_t new_ctx = iris_clone_hw_context(bufmgr, batch->hw_ctx_id);
+   if (!new_ctx)
+      return false;
+
+   iris_destroy_hw_context(bufmgr, batch->hw_ctx_id);
+   batch->hw_ctx_id = new_ctx;
+
+   /* Notify the context that state must be re-initialized. */
+   iris_lost_context_state(batch);
+
+   return true;
+}
+
  /**
   * Submit the batch to the GPU via execbuffer2.
   */
@@ -583,6 +605,15 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
     /* Start a new batch buffer. */
     iris_batch_reset(batch);
  
+   /* EIO means our context is banned.  In this case, try and replace it
+    * with a new logical context, and inform iris_context that all state
+    * has been lost and needs to be re-initialized.  If this succeeds,
+    * dubiously claim success...
+    */
+   if (ret == -EIO && replace_hw_ctx(batch)) {
+      ret = 0;
+   }
+
     if (ret >= 0) {
        //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
           //iris_check_for_reset(ice);
diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c

index a1d11755a24110cfc28ffa18e4ee83e722d66a84..7ed4fdcd8d78671cb60c31dd250d217039d64e56 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -63,6 +63,44 @@ iris_set_debug_callback(struct pipe_context *ctx,
        memset(&ice->dbg, 0, sizeof(ice->dbg));
  }
  
+/**
+ * Called from the batch module when it detects a GPU hang.
+ *
+ * In this case, we've lost our GEM context, and can't rely on any existing
+ * state on the GPU.  We must mark everything dirty and wipe away any saved
+ * assumptions about the last known state of the GPU.
+ */
+void
+iris_lost_context_state(struct iris_batch *batch)
+{
+   /* The batch module doesn't have an iris_context, because we want to
+    * avoid introducing lots of layering violations.  Unfortunately, here
+    * we do need to inform the context of batch catastrophe.  We know the
+    * batch is one of our context's, so hackily claw our way back.
+    */
+   struct iris_context *ice = NULL;
+   struct iris_screen *screen;
+
+   if (batch->name == IRIS_BATCH_RENDER) {
+      ice = container_of(batch, ice, batches[IRIS_BATCH_RENDER]);
+      assert(&ice->batches[IRIS_BATCH_RENDER] == batch);
+      screen = (void *) ice->ctx.screen;
+
+      ice->vtbl.init_render_context(screen, batch, &ice->vtbl, &ice->dbg);
+   } else if (batch->name == IRIS_BATCH_COMPUTE) {
+      ice = container_of(batch, ice, batches[IRIS_BATCH_COMPUTE]);
+      assert(&ice->batches[IRIS_BATCH_COMPUTE] == batch);
+      screen = (void *) ice->ctx.screen;
+
+      ice->vtbl.init_compute_context(screen, batch, &ice->vtbl, &ice->dbg);
+   } else {
+      unreachable("unhandled batch reset");
+   }
+
+   ice->state.dirty = ~0ull;
+   memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
+}
+
  static void
  iris_get_sample_position(struct pipe_context *ctx,
                           unsigned sample_count,
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h

index 31f345d36b082b5a8fa58bdd000b7dca21d67e24..4501c4fcad9c6642f948ef2d54e57c5418fb5c12 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -662,6 +662,8 @@ double get_time(void);
  struct pipe_context *
  iris_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
  
+void iris_lost_context_state(struct iris_batch *batch);
+
  void iris_init_blit_functions(struct pipe_context *ctx);
  void iris_init_clear_functions(struct pipe_context *ctx);
  void iris_init_program_functions(struct pipe_context *ctx);
author	Kenneth Graunke <kenneth@whitecape.org>
	Wed, 8 May 2019 06:19:30 +0000 (23:19 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Thu, 9 May 2019 23:49:07 +0000 (16:49 -0700)
src/gallium/drivers/iris/iris_batch.c		patch \| blob \| history
src/gallium/drivers/iris/iris_context.c		patch \| blob \| history
src/gallium/drivers/iris/iris_context.h		patch \| blob \| history