i965/eu: Take into account the target cache argument in brw_set_dp_read_message.

[mesa.git] / src / mesa / drivers / dri / i965 / gen7_l3_state.c
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c

index b63e61ca8f0939f1cbd4450b6cd31b7dec4cebbe..49b59040b1cdae1a2b0d843a81387da6f39e79be 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -298,7 +298,12 @@ static struct brw_l3_weights
  get_pipeline_state_l3_weights(const struct brw_context *brw)
  {
     const struct brw_stage_state *stage_states[] = {
-      &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base
+      [MESA_SHADER_VERTEX] = &brw->vs.base,
+      [MESA_SHADER_TESS_CTRL] = &brw->tcs.base,
+      [MESA_SHADER_TESS_EVAL] = &brw->tes.base,
+      [MESA_SHADER_GEOMETRY] = &brw->gs.base,
+      [MESA_SHADER_FRAGMENT] = &brw->wm.base,
+      [MESA_SHADER_COMPUTE] = &brw->cs.base
     };
     bool needs_dc = false, needs_slm = false;
  
@@ -330,23 +335,39 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
  
     /* According to the hardware docs, the L3 partitioning can only be changed
      * while the pipeline is completely drained and the caches are flushed,
-    * which involves a first PIPE_CONTROL flush which stalls the pipeline and
-    * initiates invalidation of the relevant caches...
+    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_DATA_CACHE_FLUSH |
+                               PIPE_CONTROL_NO_WRITE |
+                               PIPE_CONTROL_CS_STALL);
+
+   /* ...followed by a second pipelined PIPE_CONTROL that initiates
+    * invalidation of the relevant caches.  Note that because RO invalidation
+    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+    * command is processed by the CS) we cannot combine it with the previous
+    * stalling flush as the hardware documentation suggests, because that
+    * would cause the CS to stall on previous rendering *after* RO
+    * invalidation and wouldn't prevent the RO caches from being polluted by
+    * concurrent rendering before the stall completes.  This intentionally
+    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+    * already guarantee that there is no concurrent GPGPU kernel execution
+    * (see SKL HSD 2132585).
      */
     brw_emit_pipe_control_flush(brw,
                                 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
                                 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
-                               PIPE_CONTROL_NO_WRITE |
-                               PIPE_CONTROL_CS_STALL);
+                               PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+                               PIPE_CONTROL_NO_WRITE);
  
-   /* ...followed by a second stalling flush which guarantees that
-    * invalidation is complete when the L3 configuration registers are
-    * modified.
+   /* Now send a third stalling flush to make sure that invalidation is
+    * complete when the L3 configuration registers are modified.
      */
     brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_DATA_CACHE_FLUSH |
                                 PIPE_CONTROL_NO_WRITE |
                                 PIPE_CONTROL_CS_STALL);
  
@@ -515,6 +536,7 @@ const struct brw_tracked_state gen7_l3_state = {
     .dirty = {
        .mesa = 0,
        .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP |
               BRW_NEW_CS_PROG_DATA |
               BRW_NEW_FS_PROG_DATA |
               BRW_NEW_GS_PROG_DATA |