i965: Remove ring switching entirely

author Jason Ekstrand <jason.ekstrand@intel.com>

Sat, 12 May 2018 01:16:48 +0000 (18:16 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Tue, 22 May 2018 22:46:39 +0000 (15:46 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Sat, 12 May 2018 01:16:48 +0000 (18:16 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Tue, 22 May 2018 22:46:39 +0000 (15:46 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c

index 5ce899bcbcc11b9b3943776bfd715a9f1f7395c9..de08fc3ac160262e3163c4096f2fa39d288b1556 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -182,7 +182,7 @@ brw_dispatch_compute_common(struct gl_context *ctx)
     /* Flush the batch if the batch/state buffers are nearly full.  We can
      * grow them if needed, but this is not free, so we'd like to avoid it.
      */
-   intel_batchbuffer_require_space(brw, 600, RENDER_RING);
+   intel_batchbuffer_require_space(brw, 600);
     brw_require_statebuffer_space(brw, 2500);
     intel_batchbuffer_save_state(brw);
  
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 773f104824df7d8ed0063e316709234f66b92a7b..2613b9fda22a380ce61955c7167808108c70ecbc 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -461,12 +461,6 @@ struct brw_query_object {
     bool flushed;
  };
  
-enum brw_gpu_ring {
-   UNKNOWN_RING,
-   RENDER_RING,
-   BLT_RING,
-};
-
  struct brw_reloc_list {
     struct drm_i915_gem_relocation_entry *relocs;
     int reloc_count;
@@ -497,7 +491,6 @@ struct intel_batchbuffer {
     uint32_t *map_next;
     uint32_t state_used;
  
-   enum brw_gpu_ring ring;
     bool use_shadow_copy;
     bool use_batch_first;
     bool needs_sol_reset;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c

index ae3b7be2dddd0d7eb6aaf8abeea3f6dd789e0eb4..18aa12feaefbec0024565b1d311646927533aa5b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -798,7 +798,7 @@ brw_draw_single_prim(struct gl_context *ctx,
     /* Flush the batch if the batch/state buffers are nearly full.  We can
      * grow them if needed, but this is not free, so we'd like to avoid it.
      */
-   intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
+   intel_batchbuffer_require_space(brw, 1500);
     brw_require_statebuffer_space(brw, 2400);
     intel_batchbuffer_save_state(brw);
  
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c

index 6d7ab92cf61d4d9808ee1c92b5bdc3abfd3a5960..9a663b1d61ca0c95a6840898b4776bd15d216778 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -348,7 +348,7 @@ brw_emit_depthbuffer(struct brw_context *brw)
     brw_emit_depth_stall_flushes(brw);
  
     const unsigned ds_dwords = brw->isl_dev.ds.size / 4;
-   intel_batchbuffer_begin(brw, ds_dwords, RENDER_RING);
+   intel_batchbuffer_begin(brw, ds_dwords);
     uint32_t *ds_map = brw->batch.map_next;
     const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map;
  
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c

index cbd2853f58cb3610716dfe5b1b7f346923d5fee0..122ac260703e56ae52fc12fffb1b7e0aeeffb4d2 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -544,29 +544,17 @@ brw_emit_mi_flush(struct brw_context *brw)
  {
     const struct gen_device_info *devinfo = &brw->screen->devinfo;
  
-   if (brw->batch.ring == BLT_RING && devinfo->gen >= 6) {
-      const unsigned n_dwords = devinfo->gen >= 8 ? 5 : 4;
-      BEGIN_BATCH_BLT(n_dwords);
-      OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      if (n_dwords == 5)
-         OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
-      if (devinfo->gen >= 6) {
-         flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
-                  PIPE_CONTROL_DATA_CACHE_FLUSH |
-                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                  PIPE_CONTROL_VF_CACHE_INVALIDATE |
-                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-                  PIPE_CONTROL_CS_STALL;
-      }
-      brw_emit_pipe_control_flush(brw, flags);
+   int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
+   if (devinfo->gen >= 6) {
+      flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+               PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+               PIPE_CONTROL_DATA_CACHE_FLUSH |
+               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+               PIPE_CONTROL_VF_CACHE_INVALIDATE |
+               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+               PIPE_CONTROL_CS_STALL;
     }
+   brw_emit_pipe_control_flush(brw, flags);
  }
  
  int
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c

index a86fa78acafedaa0630ca4d710d7b6177f47b0d7..d34240ee1b718bd93d9f81b8c33458647ecd84e0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -264,5 +264,5 @@ void brw_upload_urb_fence(struct brw_context *brw)
        while (--pad);
     }
  
-   intel_batchbuffer_data(brw, &uf, sizeof(uf), RENDER_RING);
+   intel_batchbuffer_data(brw, &uf, sizeof(uf));
  }
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c

index 581438966e54579c45c4c81bb158b7e33e431b8c..808bff0db85e8f19c04f244fdc593d3d64866542 100644 (file)
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -44,7 +44,7 @@ blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
     assert(batch->blorp->driver_ctx == batch->driver_batch);
     struct brw_context *brw = batch->driver_batch;
  
-   intel_batchbuffer_begin(brw, n, RENDER_RING);
+   intel_batchbuffer_begin(brw, n);
     uint32_t *map = brw->batch.map_next;
     brw->batch.map_next += n;
     intel_batchbuffer_advance(brw);
@@ -277,7 +277,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
     brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
  
  retry:
-   intel_batchbuffer_require_space(brw, 1400, RENDER_RING);
+   intel_batchbuffer_require_space(brw, 1400);
     brw_require_statebuffer_space(brw, 600);
     intel_batchbuffer_save_state(brw);
     brw->batch.no_wrap = true;
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c

index 4f44b9965e6bf68f45a4db999d2c435d576d74dd..b485e2cf8111d9d388da9f03de972c6bb076b5be 100644 (file)
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -59,7 +59,7 @@
  UNUSED static void *
  emit_dwords(struct brw_context *brw, unsigned n)
  {
-   intel_batchbuffer_begin(brw, n, RENDER_RING);
+   intel_batchbuffer_begin(brw, n);
     uint32_t *map = brw->batch.map_next;
     brw->batch.map_next += n;
     intel_batchbuffer_advance(brw);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c

index 4f78d8d05086a6416c9016652dbfa3c55fa55bc2..8f47e613df82c2a888c45f6281e481e573edaaa4 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -274,11 +274,6 @@ intel_batchbuffer_reset(struct brw_context *brw)
     batch->needs_sol_reset = false;
     batch->state_base_address_emitted = false;
  
-   /* We don't know what ring the new batch will be sent to until we see the
-    * first BEGIN_BATCH or BEGIN_BATCH_BLT.  Mark it as unknown.
-    */
-   batch->ring = UNKNOWN_RING;
-
     if (batch->state_batch_sizes)
        _mesa_hash_table_clear(batch->state_batch_sizes, NULL);
  }
@@ -311,8 +306,6 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw)
     brw->batch.exec_count = brw->batch.saved.exec_count;
  
     brw->batch.map_next = brw->batch.saved.map_next;
-   if (USED_BATCH(brw->batch) == 0)
-      brw->batch.ring = UNKNOWN_RING;
  }
  
  void
@@ -507,18 +500,10 @@ grow_buffer(struct brw_context *brw,
  }
  
  void
-intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
-                                enum brw_gpu_ring ring)
+intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz)
  {
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
     struct intel_batchbuffer *batch = &brw->batch;
  
-   /* If we're switching rings, implicitly flush the batch. */
-   if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING &&
-       devinfo->gen >= 6) {
-      intel_batchbuffer_flush(brw);
-   }
-
     const unsigned batch_used = USED_BATCH(*batch) * 4;
     if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) {
        intel_batchbuffer_flush(brw);
@@ -530,11 +515,6 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
        batch->map_next = (void *) batch->batch.map + batch_used;
        assert(batch_used + sz < batch->batch.bo->size);
     }
-
-   /* The intel_batchbuffer_flush() calls above might have changed
-    * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end.
-    */
-   brw->batch.ring = ring;
  }
  
  /**
@@ -601,46 +581,44 @@ brw_finish_batch(struct brw_context *brw)
      */
     brw_emit_query_end(brw);
  
-   if (brw->batch.ring == RENDER_RING) {
-      /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
-       * assume that the L3 cache is configured according to the hardware
-       * defaults.  On Kernel 4.16+, we no longer need to do this.
-       */
-      if (devinfo->gen >= 7 &&
-          !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION))
-         gen7_restore_default_l3_config(brw);
-
-      if (devinfo->is_haswell) {
-         /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
-          * 3DSTATE_CC_STATE_POINTERS > "Note":
-          *
-          * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
-          *  3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
-          *
-          * From the example in the docs, it seems to expect a regular pipe control
-          * flush here as well. We may have done it already, but meh.
-          *
-          * See also WaAvoidRCZCounterRollover.
-          */
-         brw_emit_mi_flush(brw);
-         BEGIN_BATCH(2);
-         OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
-         OUT_BATCH(brw->cc.state_offset | 1);
-         ADVANCE_BATCH();
-         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                          PIPE_CONTROL_CS_STALL);
-      }
+   /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
+    * assume that the L3 cache is configured according to the hardware
+    * defaults.  On Kernel 4.16+, we no longer need to do this.
+    */
+   if (devinfo->gen >= 7 &&
+       !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION))
+      gen7_restore_default_l3_config(brw);
  
-      /* Do not restore push constant packets during context restore. */
-      if (devinfo->gen >= 7)
-         gen10_emit_isp_disable(brw);
+   if (devinfo->is_haswell) {
+      /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
+       * 3DSTATE_CC_STATE_POINTERS > "Note":
+       *
+       * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
+       *  3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
+       *
+       * From the example in the docs, it seems to expect a regular pipe control
+       * flush here as well. We may have done it already, but meh.
+       *
+       * See also WaAvoidRCZCounterRollover.
+       */
+      brw_emit_mi_flush(brw);
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+      OUT_BATCH(brw->cc.state_offset | 1);
+      ADVANCE_BATCH();
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                       PIPE_CONTROL_CS_STALL);
     }
  
+   /* Do not restore push constant packets during context restore. */
+   if (devinfo->gen >= 7)
+      gen10_emit_isp_disable(brw);
+
     /* Emit MI_BATCH_BUFFER_END to finish our batch.  Note that execbuf2
      * requires our batch size to be QWord aligned, so we pad it out if
      * necessary by emitting an extra MI_NOOP after the end.
      */
-   intel_batchbuffer_require_space(brw, 8, brw->batch.ring);
+   intel_batchbuffer_require_space(brw, 8);
     *brw->batch.map_next++ = MI_BATCH_BUFFER_END;
     if (USED_BATCH(brw->batch) & 1) {
        *brw->batch.map_next++ = MI_NOOP;
@@ -747,7 +725,6 @@ execbuffer(int fd,
  static int
  submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
  {
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
     __DRIscreen *dri_screen = brw->screen->driScrnPriv;
     struct intel_batchbuffer *batch = &brw->batch;
     int ret = 0;
@@ -776,7 +753,6 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
         *   To avoid stalling, execobject.offset should match the current
         *   address of that object within the active context.
         */
-      assert(devinfo->gen < 6 || batch->ring == RENDER_RING);
        int flags = I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
  
        if (batch->needs_sol_reset)
@@ -1045,10 +1021,10 @@ brw_state_batch(struct brw_context *brw,
  
  void
  intel_batchbuffer_data(struct brw_context *brw,
-                       const void *data, GLuint bytes, enum brw_gpu_ring ring)
+                       const void *data, GLuint bytes)
  {
     assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(brw, bytes, ring);
+   intel_batchbuffer_require_space(brw, bytes);
     memcpy(brw->batch.map_next, data, bytes);
     brw->batch.map_next += bytes >> 2;
  }
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h

index 7be5b10f3ab05b09c6e26391fa5bc8be2fb956ba..bd07bef9debc1c7ec22246d974270be697fd3a72 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -25,8 +25,7 @@ void intel_batchbuffer_init(struct brw_context *brw);
  void intel_batchbuffer_free(struct intel_batchbuffer *batch);
  void intel_batchbuffer_save_state(struct brw_context *brw);
  void intel_batchbuffer_reset_to_saved(struct brw_context *brw);
-void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
-                                     enum brw_gpu_ring ring);
+void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz);
  int _intel_batchbuffer_flush_fence(struct brw_context *brw,
                                     int in_fence_fd, int *out_fence_fd,
                                     const char *file, int line);
@@ -43,8 +42,7 @@ int _intel_batchbuffer_flush_fence(struct brw_context *brw,
   * intel_buffer_dword() calls.
   */
  void intel_batchbuffer_data(struct brw_context *brw,
-                            const void *data, GLuint bytes,
-                            enum brw_gpu_ring ring);
+                            const void *data, GLuint bytes);
  
  bool brw_batch_has_aperture_space(struct brw_context *brw,
                                    unsigned extra_space_in_bytes);
@@ -81,9 +79,9 @@ static inline uint32_t float_as_int(float f)
  }
  
  static inline void
-intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring)
+intel_batchbuffer_begin(struct brw_context *brw, int n)
  {
-   intel_batchbuffer_require_space(brw, n * 4, ring);
+   intel_batchbuffer_require_space(brw, n * 4);
  
  #ifdef DEBUG
     brw->batch.emit = USED_BATCH(brw->batch);
@@ -117,12 +115,13 @@ brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p)
  }
  
  #define BEGIN_BATCH(n) do {                            \
-   intel_batchbuffer_begin(brw, (n), RENDER_RING);     \
+   intel_batchbuffer_begin(brw, (n));                  \
     uint32_t *__map = brw->batch.map_next;              \
     brw->batch.map_next += (n)
  
  #define BEGIN_BATCH_BLT(n) do {                        \
-   intel_batchbuffer_begin(brw, (n), BLT_RING);        \
+   assert(brw->screen->devinfo.gen < 6);               \
+   intel_batchbuffer_begin(brw, (n));                  \
     uint32_t *__map = brw->batch.map_next;              \
     brw->batch.map_next += (n)
  
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c

index 5ef78584cab17365968f0ffcef565d831da7929d..90784c5b1958073e0ed93814b45e2f27b25f31b8 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -288,7 +288,7 @@ emit_copy_blit(struct brw_context *brw,
  
     unsigned length = devinfo->gen >= 8 ? 10 : 8;
  
-   intel_batchbuffer_require_space(brw, length * 4, BLT_RING);
+   intel_batchbuffer_require_space(brw, length * 4);
     DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
         __func__,
         src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -661,7 +661,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
  
     unsigned xy_setup_blt_length = devinfo->gen >= 8 ? 10 : 8;
     intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) +
-                                        (3 * 4) + dwords * 4, BLT_RING);
+                                        (3 * 4) + dwords * 4);
  
     opcode = XY_SETUP_BLT_CMD;
     if (cpp == 4)
@@ -699,7 +699,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
     OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
     ADVANCE_BATCH();
  
-   intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING);
+   intel_batchbuffer_data(brw, src_bits, dwords * 4);
  
     brw_emit_mi_flush(brw);
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Sat, 12 May 2018 01:16:48 +0000 (18:16 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Tue, 22 May 2018 22:46:39 +0000 (15:46 -0700)
src/mesa/drivers/dri/i965/brw_compute.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_context.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_draw.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_misc_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_pipe_control.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_urb.c		patch \| blob \| history
src/mesa/drivers/dri/i965/genX_blorp_exec.c		patch \| blob \| history
src/mesa/drivers/dri/i965/genX_state_upload.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_batchbuffer.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_batchbuffer.h		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_blit.c		patch \| blob \| history