anv: Move cmd_buffer_config_l3 into anv_cmd_buffer.c
authorJason Ekstrand <jason.ekstrand@intel.com>
Tue, 23 Aug 2016 00:13:51 +0000 (17:13 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Sat, 3 Sep 2016 15:23:07 +0000 (08:23 -0700)
This is the only remaining part of genX_l3.c and there's really no good
reason for it to be in its own file.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/intel/vulkan/Makefile.sources
src/intel/vulkan/anv_genX.h
src/intel/vulkan/gen7_cmd_buffer.c
src/intel/vulkan/gen8_cmd_buffer.c
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_l3.c [deleted file]

index 9e56fe70eeac93e722f9f3013aa09ad8aa070d39..8b4b97ffd23a7adc23043efae4bbce9111430f75 100644 (file)
@@ -73,7 +73,6 @@ VULKAN_GENERATED_FILES := \
 
 GEN7_FILES := \
        genX_cmd_buffer.c \
-       genX_l3.c \
        genX_pipeline.c \
        gen7_cmd_buffer.c \
        gen7_pipeline.c \
@@ -81,7 +80,6 @@ GEN7_FILES := \
 
 GEN75_FILES := \
        genX_cmd_buffer.c \
-       genX_l3.c \
        genX_pipeline.c \
        gen7_cmd_buffer.c \
        gen7_pipeline.c \
@@ -89,7 +87,6 @@ GEN75_FILES := \
 
 GEN8_FILES := \
        genX_cmd_buffer.c \
-       genX_l3.c \
        genX_pipeline.c \
        gen8_cmd_buffer.c \
        gen8_pipeline.c \
@@ -97,7 +94,6 @@ GEN8_FILES := \
 
 GEN9_FILES := \
        genX_cmd_buffer.c \
-       genX_l3.c \
        genX_pipeline.c \
        gen8_cmd_buffer.c \
        gen8_pipeline.c \
index 11814dd28339df89274c8f7bc5a8831ea658f37e..bfec1aeca8607010525b9ef92d9d16845314f3ec 100644 (file)
@@ -45,7 +45,7 @@ void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
 void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
 
 void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
-                                const struct anv_pipeline *pipeline);
+                                const struct gen_l3_config *cfg);
 
 void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
 void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
index 61778aa25b106433d5919c079fba26ffb54d87d2..b627ef0a6ff9d000778cd422d3dda3030a130065 100644 (file)
@@ -189,7 +189,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 
    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
 
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config);
 
    genX(flush_pipeline_select_gpgpu)(cmd_buffer);
 
index e22b4e2132fff726c53c59e49b601e3f11ad849a..70586086efcf919cf6a05cc92f8526539849c190 100644 (file)
@@ -380,7 +380,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 
    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
 
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config);
 
    genX(flush_pipeline_select_gpgpu)(cmd_buffer);
 
index 2806231ea2aa7c2f5cde37c941cbbc09cc3aa445..b6f93e7740d2d2c2f8b98afc2819acce14cabb7c 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "anv_private.h"
 
+#include "common/gen_l3_config.h"
 #include "genxml/gen_macros.h"
 #include "genxml/genX_pack.h"
 
@@ -149,6 +150,163 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
    }
 }
 
+#define IVB_L3SQCREG1_SQGHPCI_DEFAULT     0x00730000
+#define VLV_L3SQCREG1_SQGHPCI_DEFAULT     0x00d30000
+#define HSW_L3SQCREG1_SQGHPCI_DEFAULT     0x00610000
+
+/**
+ * Program the hardware to use the specified L3 configuration.
+ */
+void
+genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
+                           const struct gen_l3_config *cfg)
+{
+   assert(cfg);
+   if (cfg == cmd_buffer->state.current_l3_config)
+      return;
+
+   if (unlikely(INTEL_DEBUG & DEBUG_L3)) {
+      fprintf(stderr, "L3 config transition: ");
+      gen_dump_l3_config(cfg, stderr);
+   }
+
+   const bool has_slm = cfg->n[GEN_L3P_SLM];
+
+   /* According to the hardware docs, the L3 partitioning can only be changed
+    * while the pipeline is completely drained and the caches are flushed,
+    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
+    */
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.DCFlushEnable = true;
+      pc.PostSyncOperation = NoWrite;
+      pc.CommandStreamerStallEnable = true;
+   }
+
+   /* ...followed by a second pipelined PIPE_CONTROL that initiates
+    * invalidation of the relevant caches.  Note that because RO invalidation
+    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+    * command is processed by the CS) we cannot combine it with the previous
+    * stalling flush as the hardware documentation suggests, because that
+    * would cause the CS to stall on previous rendering *after* RO
+    * invalidation and wouldn't prevent the RO caches from being polluted by
+    * concurrent rendering before the stall completes.  This intentionally
+    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+    * already guarantee that there is no concurrent GPGPU kernel execution
+    * (see SKL HSD 2132585).
+    */
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.TextureCacheInvalidationEnable = true;
+      pc.ConstantCacheInvalidationEnable = true;
+      pc.InstructionCacheInvalidateEnable = true;
+      pc.StateCacheInvalidationEnable = true;
+      pc.PostSyncOperation = NoWrite;
+   }
+
+   /* Now send a third stalling flush to make sure that invalidation is
+    * complete when the L3 configuration registers are modified.
+    */
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.DCFlushEnable = true;
+      pc.PostSyncOperation = NoWrite;
+      pc.CommandStreamerStallEnable = true;
+   }
+
+#if GEN_GEN >= 8
+
+   assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]);
+
+   uint32_t l3cr;
+   anv_pack_struct(&l3cr, GENX(L3CNTLREG),
+                   .SLMEnable = has_slm,
+                   .URBAllocation = cfg->n[GEN_L3P_URB],
+                   .ROAllocation = cfg->n[GEN_L3P_RO],
+                   .DCAllocation = cfg->n[GEN_L3P_DC],
+                   .AllAllocation = cfg->n[GEN_L3P_ALL]);
+
+   /* Set up the L3 partitioning. */
+   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG_num), l3cr);
+
+#else
+
+   const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL];
+   const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] ||
+                       cfg->n[GEN_L3P_ALL];
+   const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] ||
+                      cfg->n[GEN_L3P_ALL];
+   const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] ||
+                      cfg->n[GEN_L3P_ALL];
+
+   assert(!cfg->n[GEN_L3P_ALL]);
+
+   /* When enabled SLM only uses a portion of the L3 on half of the banks,
+    * the matching space on the remaining banks has to be allocated to a
+    * client (URB for all validated configurations) set to the
+    * lower-bandwidth 2-bank address hashing mode.
+    */
+   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
+   const bool urb_low_bw = has_slm && !devinfo->is_baytrail;
+   assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]);
+
+   /* Minimum number of ways that can be allocated to the URB. */
+   const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0);
+   assert(cfg->n[GEN_L3P_URB] >= n0_urb);
+
+   uint32_t l3sqcr1, l3cr2, l3cr3;
+   anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1),
+                   .ConvertDC_UC = !has_dc,
+                   .ConvertIS_UC = !has_is,
+                   .ConvertC_UC = !has_c,
+                   .ConvertT_UC = !has_t);
+   l3sqcr1 |=
+      GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
+      devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
+      IVB_L3SQCREG1_SQGHPCI_DEFAULT;
+
+   anv_pack_struct(&l3cr2, GENX(L3CNTLREG2),
+                   .SLMEnable = has_slm,
+                   .URBLowBandwidth = urb_low_bw,
+                   .URBAllocation = cfg->n[GEN_L3P_URB],
+#if !GEN_IS_HASWELL
+                   .ALLAllocation = cfg->n[GEN_L3P_ALL],
+#endif
+                   .ROAllocation = cfg->n[GEN_L3P_RO],
+                   .DCAllocation = cfg->n[GEN_L3P_DC]);
+
+   anv_pack_struct(&l3cr3, GENX(L3CNTLREG3),
+                   .ISAllocation = cfg->n[GEN_L3P_IS],
+                   .ISLowBandwidth = 0,
+                   .CAllocation = cfg->n[GEN_L3P_C],
+                   .CLowBandwidth = 0,
+                   .TAllocation = cfg->n[GEN_L3P_T],
+                   .TLowBandwidth = 0);
+
+   /* Set up the L3 partitioning. */
+   emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1_num), l3sqcr1);
+   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2_num), l3cr2);
+   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3_num), l3cr3);
+
+#if GEN_IS_HASWELL
+   if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) {
+      /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
+       * them disabled to avoid crashing the system hard.
+       */
+      uint32_t scratch1, chicken3;
+      anv_pack_struct(&scratch1, GENX(SCRATCH1),
+                      .L3AtomicDisable = !has_dc);
+      anv_pack_struct(&chicken3, GENX(CHICKEN3),
+                      .L3AtomicDisable = !has_dc);
+      emit_lri(&cmd_buffer->batch, GENX(SCRATCH1_num), scratch1);
+      emit_lri(&cmd_buffer->batch, GENX(CHICKEN3_num), chicken3);
+   }
+#endif
+
+#endif
+
+   cmd_buffer->state.current_l3_config = cfg;
+}
+
 void
 genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -471,7 +629,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
 
    assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
 
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config);
 
    genX(flush_pipeline_select_3d)(cmd_buffer);
 
diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c
deleted file mode 100644 (file)
index 3a96693..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "anv_private.h"
-
-#include "common/gen_l3_config.h"
-#include "genxml/gen_macros.h"
-#include "genxml/genX_pack.h"
-
-#define emit_lri(batch, reg, imm)                               \
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {     \
-      lri.RegisterOffset = __anv_reg_num(reg);                  \
-      lri.DataDWord = imm;                                      \
-   }
-
-#define IVB_L3SQCREG1_SQGHPCI_DEFAULT     0x00730000
-#define VLV_L3SQCREG1_SQGHPCI_DEFAULT     0x00d30000
-#define HSW_L3SQCREG1_SQGHPCI_DEFAULT     0x00610000
-
-/**
- * Program the hardware to use the specified L3 configuration.
- */
-static void
-setup_l3_config(struct anv_cmd_buffer *cmd_buffer/*, struct brw_context *brw*/,
-                const struct gen_l3_config *cfg)
-{
-   const bool has_slm = cfg->n[GEN_L3P_SLM];
-
-   /* According to the hardware docs, the L3 partitioning can only be changed
-    * while the pipeline is completely drained and the caches are flushed,
-    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
-    */
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      pc.DCFlushEnable = true;
-      pc.PostSyncOperation = NoWrite;
-      pc.CommandStreamerStallEnable = true;
-   }
-
-   /* ...followed by a second pipelined PIPE_CONTROL that initiates
-    * invalidation of the relevant caches.  Note that because RO invalidation
-    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
-    * command is processed by the CS) we cannot combine it with the previous
-    * stalling flush as the hardware documentation suggests, because that
-    * would cause the CS to stall on previous rendering *after* RO
-    * invalidation and wouldn't prevent the RO caches from being polluted by
-    * concurrent rendering before the stall completes.  This intentionally
-    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
-    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
-    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
-    * already guarantee that there is no concurrent GPGPU kernel execution
-    * (see SKL HSD 2132585).
-    */
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      pc.TextureCacheInvalidationEnable = true;
-      pc.ConstantCacheInvalidationEnable = true;
-      pc.InstructionCacheInvalidateEnable = true;
-      pc.StateCacheInvalidationEnable = true;
-      pc.PostSyncOperation = NoWrite;
-   }
-
-   /* Now send a third stalling flush to make sure that invalidation is
-    * complete when the L3 configuration registers are modified.
-    */
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      pc.DCFlushEnable = true;
-      pc.PostSyncOperation = NoWrite;
-      pc.CommandStreamerStallEnable = true;
-   }
-
-#if GEN_GEN >= 8
-
-   assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]);
-
-   uint32_t l3cr;
-   anv_pack_struct(&l3cr, GENX(L3CNTLREG),
-                   .SLMEnable = has_slm,
-                   .URBAllocation = cfg->n[GEN_L3P_URB],
-                   .ROAllocation = cfg->n[GEN_L3P_RO],
-                   .DCAllocation = cfg->n[GEN_L3P_DC],
-                   .AllAllocation = cfg->n[GEN_L3P_ALL]);
-
-   /* Set up the L3 partitioning. */
-   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr);
-
-#else
-
-   const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL];
-   const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] ||
-                       cfg->n[GEN_L3P_ALL];
-   const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] ||
-                      cfg->n[GEN_L3P_ALL];
-   const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] ||
-                      cfg->n[GEN_L3P_ALL];
-
-   assert(!cfg->n[GEN_L3P_ALL]);
-
-   /* When enabled SLM only uses a portion of the L3 on half of the banks,
-    * the matching space on the remaining banks has to be allocated to a
-    * client (URB for all validated configurations) set to the
-    * lower-bandwidth 2-bank address hashing mode.
-    */
-   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
-   const bool urb_low_bw = has_slm && !devinfo->is_baytrail;
-   assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]);
-
-   /* Minimum number of ways that can be allocated to the URB. */
-   const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0);
-   assert(cfg->n[GEN_L3P_URB] >= n0_urb);
-
-   uint32_t l3sqcr1, l3cr2, l3cr3;
-   anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1),
-                   .ConvertDC_UC = !has_dc,
-                   .ConvertIS_UC = !has_is,
-                   .ConvertC_UC = !has_c,
-                   .ConvertT_UC = !has_t);
-   l3sqcr1 |=
-      GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
-      devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
-      IVB_L3SQCREG1_SQGHPCI_DEFAULT;
-
-   anv_pack_struct(&l3cr2, GENX(L3CNTLREG2),
-                   .SLMEnable = has_slm,
-                   .URBLowBandwidth = urb_low_bw,
-                   .URBAllocation = cfg->n[GEN_L3P_URB],
-#if !GEN_IS_HASWELL
-                   .ALLAllocation = cfg->n[GEN_L3P_ALL],
-#endif
-                   .ROAllocation = cfg->n[GEN_L3P_RO],
-                   .DCAllocation = cfg->n[GEN_L3P_DC]);
-
-   anv_pack_struct(&l3cr3, GENX(L3CNTLREG3),
-                   .ISAllocation = cfg->n[GEN_L3P_IS],
-                   .ISLowBandwidth = 0,
-                   .CAllocation = cfg->n[GEN_L3P_C],
-                   .CLowBandwidth = 0,
-                   .TAllocation = cfg->n[GEN_L3P_T],
-                   .TLowBandwidth = 0);
-
-   /* Set up the L3 partitioning. */
-   emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1), l3sqcr1);
-   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2);
-   emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3);
-
-#if GEN_IS_HASWELL
-   if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) {
-      /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
-       * them disabled to avoid crashing the system hard.
-       */
-      uint32_t scratch1, chicken3;
-      anv_pack_struct(&scratch1, GENX(SCRATCH1),
-                      .L3AtomicDisable = !has_dc);
-      anv_pack_struct(&chicken3, GENX(CHICKEN3),
-                      .L3AtomicDisable = !has_dc);
-      emit_lri(&cmd_buffer->batch, GENX(SCRATCH1), scratch1);
-      emit_lri(&cmd_buffer->batch, GENX(CHICKEN3), chicken3);
-   }
-#endif
-
-#endif
-
-}
-
-void
-genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
-                           const struct anv_pipeline *pipeline)
-{
-   struct anv_cmd_state *state = &cmd_buffer->state;
-   const struct gen_l3_config *const cfg = pipeline->urb.l3_config;
-   assert(cfg);
-   if (cfg != state->current_l3_config) {
-      setup_l3_config(cmd_buffer, cfg);
-      state->current_l3_config = cfg;
-
-      if (unlikely(INTEL_DEBUG & DEBUG_L3)) {
-         fprintf(stderr, "L3 config transition: ");
-         gen_dump_l3_config(cfg, stderr);
-      }
-   }
-}