i965/eu: Take into account the target cache argument in brw_set_dp_read_message.

[mesa.git] / src / mesa / drivers / dri / i965 / gen7_l3_state.c
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c

index 79569358914c80977d7f25fd97074fa150c3f008..49b59040b1cdae1a2b0d843a81387da6f39e79be 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -59,7 +59,9 @@ struct brw_l3_config {
  };
  
  /**
- * IVB/HSW validated L3 configurations.
+ * IVB/HSW validated L3 configurations.  The first entry will be used as
+ * default by gen7_restore_default_l3_config(), otherwise the ordering is
+ * unimportant.
   */
  static const struct brw_l3_config ivb_l3_configs[] = {
     /* SLM URB ALL DC  RO  IS   C   T */
@@ -81,7 +83,7 @@ static const struct brw_l3_config ivb_l3_configs[] = {
  };
  
  /**
- * VLV validated L3 configurations.
+ * VLV validated L3 configurations.  \sa ivb_l3_configs.
   */
  static const struct brw_l3_config vlv_l3_configs[] = {
     /* SLM URB ALL DC  RO  IS   C   T */
@@ -97,7 +99,7 @@ static const struct brw_l3_config vlv_l3_configs[] = {
  };
  
  /**
- * BDW validated L3 configurations.
+ * BDW validated L3 configurations.  \sa ivb_l3_configs.
   */
  static const struct brw_l3_config bdw_l3_configs[] = {
     /* SLM URB ALL DC  RO  IS   C   T */
@@ -113,7 +115,7 @@ static const struct brw_l3_config bdw_l3_configs[] = {
  };
  
  /**
- * CHV/SKL validated L3 configurations.
+ * CHV/SKL validated L3 configurations.  \sa ivb_l3_configs.
   */
  static const struct brw_l3_config chv_l3_configs[] = {
     /* SLM URB ALL DC  RO  IS   C   T */
@@ -296,7 +298,12 @@ static struct brw_l3_weights
  get_pipeline_state_l3_weights(const struct brw_context *brw)
  {
     const struct brw_stage_state *stage_states[] = {
-      &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base
+      [MESA_SHADER_VERTEX] = &brw->vs.base,
+      [MESA_SHADER_TESS_CTRL] = &brw->tcs.base,
+      [MESA_SHADER_TESS_EVAL] = &brw->tes.base,
+      [MESA_SHADER_GEOMETRY] = &brw->gs.base,
+      [MESA_SHADER_FRAGMENT] = &brw->wm.base,
+      [MESA_SHADER_COMPUTE] = &brw->cs.base
     };
     bool needs_dc = false, needs_slm = false;
  
@@ -328,23 +335,39 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
  
     /* According to the hardware docs, the L3 partitioning can only be changed
      * while the pipeline is completely drained and the caches are flushed,
-    * which involves a first PIPE_CONTROL flush which stalls the pipeline and
-    * initiates invalidation of the relevant caches...
+    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_DATA_CACHE_FLUSH |
+                               PIPE_CONTROL_NO_WRITE |
+                               PIPE_CONTROL_CS_STALL);
+
+   /* ...followed by a second pipelined PIPE_CONTROL that initiates
+    * invalidation of the relevant caches.  Note that because RO invalidation
+    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+    * command is processed by the CS) we cannot combine it with the previous
+    * stalling flush as the hardware documentation suggests, because that
+    * would cause the CS to stall on previous rendering *after* RO
+    * invalidation and wouldn't prevent the RO caches from being polluted by
+    * concurrent rendering before the stall completes.  This intentionally
+    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+    * already guarantee that there is no concurrent GPGPU kernel execution
+    * (see SKL HSD 2132585).
      */
     brw_emit_pipe_control_flush(brw,
                                 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
                                 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
-                               PIPE_CONTROL_NO_WRITE |
-                               PIPE_CONTROL_CS_STALL);
+                               PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+                               PIPE_CONTROL_NO_WRITE);
  
-   /* ...followed by a second stalling flush which guarantees that
-    * invalidation is complete when the L3 configuration registers are
-    * modified.
+   /* Now send a third stalling flush to make sure that invalidation is
+    * complete when the L3 configuration registers are modified.
      */
     brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_DATA_CACHE_FLUSH |
                                 PIPE_CONTROL_NO_WRITE |
                                 PIPE_CONTROL_CS_STALL);
  
@@ -513,6 +536,7 @@ const struct brw_tracked_state gen7_l3_state = {
     .dirty = {
        .mesa = 0,
        .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP |
               BRW_NEW_CS_PROG_DATA |
               BRW_NEW_FS_PROG_DATA |
               BRW_NEW_GS_PROG_DATA |
@@ -520,3 +544,54 @@ const struct brw_tracked_state gen7_l3_state = {
     },
     .emit = emit_l3_state
  };
+
+/**
+ * Hack to restore the default L3 configuration.
+ *
+ * This will be called at the end of every batch in order to reset the L3
+ * configuration to the default values for the time being until the kernel is
+ * fixed.  Until kernel commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b
+ * (included in v4.1) we would set the MI_RESTORE_INHIBIT bit when submitting
+ * batch buffers for the default context used by the DDX, which meant that any
+ * context state changed by the GL would leak into the DDX, the assumption
+ * being that the DDX would initialize any state it cares about manually.  The
+ * DDX is however not careful enough to program an L3 configuration
+ * explicitly, and it makes assumptions about it (URB size) which won't hold
+ * and cause it to misrender if we let our L3 set-up to leak into the DDX.
+ *
+ * Since v4.1 of the Linux kernel the default context is saved and restored
+ * normally, so it's far less likely for our L3 programming to interfere with
+ * other contexts -- In fact restoring the default L3 configuration at the end
+ * of the batch will be redundant most of the time.  A kind of state leak is
+ * still possible though if the context making assumptions about L3 state is
+ * created immediately after our context was active (e.g. without the DDX
+ * default context being scheduled in between) because at present the DRM
+ * doesn't fully initialize the contents of newly created contexts and instead
+ * sets the MI_RESTORE_INHIBIT flag causing it to inherit the state from the
+ * last active context.
+ *
+ * It's possible to realize such a scenario if, say, an X server (or a GL
+ * application using an outdated non-L3-aware Mesa version) is started while
+ * another GL application is running and happens to have modified the L3
+ * configuration, or if no X server is running at all and a GL application
+ * using a non-L3-aware Mesa version is started after another GL application
+ * ran and modified the L3 configuration -- The latter situation can actually
+ * be reproduced easily on IVB in our CI system.
+ */
+void
+gen7_restore_default_l3_config(struct brw_context *brw)
+{
+   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+   /* For efficiency assume that the first entry of the array matches the
+    * default configuration.
+    */
+   const struct brw_l3_config *const cfg = get_l3_configs(devinfo);
+   assert(cfg == get_l3_config(devinfo,
+                               get_default_l3_weights(devinfo, false, false)));
+
+   if (cfg != brw->l3.config && brw->can_do_pipelined_register_writes) {
+      setup_l3_config(brw, cfg);
+      update_urb_size(brw, cfg);
+      brw->l3.config = cfg;
+   }
+}