anv: Fix cache pollution race during L3 partitioning set-up.
authorJordan Justen <jordan.l.justen@intel.com>
Mon, 28 Mar 2016 19:08:49 +0000 (12:08 -0700)
committerJordan Justen <jordan.l.justen@intel.com>
Tue, 29 Mar 2016 00:01:35 +0000 (17:01 -0700)
Port 0aa4f99f562a05880a779707cbcd46be459863bf to anv.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
src/intel/vulkan/gen7_cmd_buffer.c
src/intel/vulkan/gen8_cmd_buffer.c

index 04c1d3b34775e6c549298b9eb621fe48254a293d..06b3a75cbef59ca2945f364c142137c7a6aa67b2 100644 (file)
@@ -323,22 +323,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
    bool changed = cmd_buffer->state.current_l3_config != l3cr2_val;
 
    if (changed) {
-      /* According to the hardware docs, the L3 partitioning can only be changed
-       * while the pipeline is completely drained and the caches are flushed,
-       * which involves a first PIPE_CONTROL flush which stalls the pipeline and
-       * initiates invalidation of the relevant caches...
+      /* According to the hardware docs, the L3 partitioning can only be
+       * changed while the pipeline is completely drained and the caches are
+       * flushed, which involves a first PIPE_CONTROL flush which stalls the
+       * pipeline...
        */
       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .TextureCacheInvalidationEnable = true,
-                     .ConstantCacheInvalidationEnable = true,
-                     .InstructionCacheInvalidateEnable = true,
                      .DCFlushEnable = true,
                      .PostSyncOperation = NoWrite,
                      .CommandStreamerStallEnable = true);
 
-      /* ...followed by a second stalling flush which guarantees that
-       * invalidation is complete when the L3 configuration registers are
-       * modified.
+      /* ...followed by a second pipelined PIPE_CONTROL that initiates
+       * invalidation of the relevant caches. Note that because RO
+       * invalidation happens at the top of the pipeline (i.e. right away as
+       * the PIPE_CONTROL command is processed by the CS) we cannot combine it
+       * with the previous stalling flush as the hardware documentation
+       * suggests, because that would cause the CS to stall on previous
+       * rendering *after* RO invalidation and wouldn't prevent the RO caches
+       * from being polluted by concurrent rendering before the stall
+       * completes. This intentionally doesn't implement the SKL+ hardware
+       * workaround suggesting to enable CS stall on PIPE_CONTROLs with the
+       * texture cache invalidation bit set for GPGPU workloads because the
+       * previous and subsequent PIPE_CONTROLs already guarantee that there is
+       * no concurrent GPGPU kernel execution (see SKL HSD 2132585).
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .TextureCacheInvalidationEnable = true,
+                     .ConstantCacheInvalidationEnable = true,
+                     .InstructionCacheInvalidateEnable = true,
+                     .PostSyncOperation = NoWrite);
+
+      /* Now send a third stalling flush to make sure that invalidation is
+       * complete when the L3 configuration registers are modified.
        */
       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
                      .DCFlushEnable = true,
index 3fb5c276107317ffc498aaf7188184e17187cce0..dab1d7411e70d3469bc064e4603a11dc7ad9b4bb 100644 (file)
@@ -134,22 +134,38 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
    bool changed = cmd_buffer->state.current_l3_config != l3cr_val;
 
    if (changed) {
-      /* According to the hardware docs, the L3 partitioning can only be changed
-       * while the pipeline is completely drained and the caches are flushed,
-       * which involves a first PIPE_CONTROL flush which stalls the pipeline and
-       * initiates invalidation of the relevant caches...
+      /* According to the hardware docs, the L3 partitioning can only be
+       * changed while the pipeline is completely drained and the caches are
+       * flushed, which involves a first PIPE_CONTROL flush which stalls the
+       * pipeline...
        */
       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .TextureCacheInvalidationEnable = true,
-                     .ConstantCacheInvalidationEnable = true,
-                     .InstructionCacheInvalidateEnable = true,
                      .DCFlushEnable = true,
                      .PostSyncOperation = NoWrite,
                      .CommandStreamerStallEnable = true);
 
-      /* ...followed by a second stalling flush which guarantees that
-       * invalidation is complete when the L3 configuration registers are
-       * modified.
+      /* ...followed by a second pipelined PIPE_CONTROL that initiates
+       * invalidation of the relevant caches. Note that because RO
+       * invalidation happens at the top of the pipeline (i.e. right away as
+       * the PIPE_CONTROL command is processed by the CS) we cannot combine it
+       * with the previous stalling flush as the hardware documentation
+       * suggests, because that would cause the CS to stall on previous
+       * rendering *after* RO invalidation and wouldn't prevent the RO caches
+       * from being polluted by concurrent rendering before the stall
+       * completes. This intentionally doesn't implement the SKL+ hardware
+       * workaround suggesting to enable CS stall on PIPE_CONTROLs with the
+       * texture cache invalidation bit set for GPGPU workloads because the
+       * previous and subsequent PIPE_CONTROLs already guarantee that there is
+       * no concurrent GPGPU kernel execution (see SKL HSD 2132585).
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .TextureCacheInvalidationEnable = true,
+                     .ConstantCacheInvalidationEnable = true,
+                     .InstructionCacheInvalidateEnable = true,
+                     .PostSyncOperation = NoWrite);
+
+      /* Now send a third stalling flush to make sure that invalidation is
+       * complete when the L3 configuration registers are modified.
        */
       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
                      .DCFlushEnable = true,