#include "anv_private.h"
+#include "common/gen_l3_config.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
}
}
+#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000
+#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000
+#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000
+
+/**
+ * Program the hardware to use the specified L3 configuration.
+ */
+void
+genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
+ const struct gen_l3_config *cfg)
+{
+ assert(cfg);
+ if (cfg == cmd_buffer->state.current_l3_config)
+ return;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_L3)) {
+ fprintf(stderr, "L3 config transition: ");
+ gen_dump_l3_config(cfg, stderr);
+ }
+
+ const bool has_slm = cfg->n[GEN_L3P_SLM];
+
+ /* According to the hardware docs, the L3 partitioning can only be changed
+ * while the pipeline is completely drained and the caches are flushed,
+ * which involves a first PIPE_CONTROL flush which stalls the pipeline...
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.DCFlushEnable = true;
+ pc.PostSyncOperation = NoWrite;
+ pc.CommandStreamerStallEnable = true;
+ }
+
+ /* ...followed by a second pipelined PIPE_CONTROL that initiates
+ * invalidation of the relevant caches. Note that because RO invalidation
+ * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+ * command is processed by the CS) we cannot combine it with the previous
+ * stalling flush as the hardware documentation suggests, because that
+ * would cause the CS to stall on previous rendering *after* RO
+ * invalidation and wouldn't prevent the RO caches from being polluted by
+ * concurrent rendering before the stall completes. This intentionally
+ * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+ * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+ * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+ * already guarantee that there is no concurrent GPGPU kernel execution
+ * (see SKL HSD 2132585).
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.TextureCacheInvalidationEnable = true;
+ pc.ConstantCacheInvalidationEnable = true;
+ pc.InstructionCacheInvalidateEnable = true;
+ pc.StateCacheInvalidationEnable = true;
+ pc.PostSyncOperation = NoWrite;
+ }
+
+ /* Now send a third stalling flush to make sure that invalidation is
+ * complete when the L3 configuration registers are modified.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.DCFlushEnable = true;
+ pc.PostSyncOperation = NoWrite;
+ pc.CommandStreamerStallEnable = true;
+ }
+
+#if GEN_GEN >= 8
+
+ assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]);
+
+ uint32_t l3cr;
+ anv_pack_struct(&l3cr, GENX(L3CNTLREG),
+ .SLMEnable = has_slm,
+ .URBAllocation = cfg->n[GEN_L3P_URB],
+ .ROAllocation = cfg->n[GEN_L3P_RO],
+ .DCAllocation = cfg->n[GEN_L3P_DC],
+ .AllAllocation = cfg->n[GEN_L3P_ALL]);
+
+ /* Set up the L3 partitioning. */
+ emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG_num), l3cr);
+
+#else
+
+ const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL];
+ const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] ||
+ cfg->n[GEN_L3P_ALL];
+ const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] ||
+ cfg->n[GEN_L3P_ALL];
+ const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] ||
+ cfg->n[GEN_L3P_ALL];
+
+ assert(!cfg->n[GEN_L3P_ALL]);
+
+ /* When enabled SLM only uses a portion of the L3 on half of the banks,
+ * the matching space on the remaining banks has to be allocated to a
+ * client (URB for all validated configurations) set to the
+ * lower-bandwidth 2-bank address hashing mode.
+ */
+ const struct gen_device_info *devinfo = &cmd_buffer->device->info;
+ const bool urb_low_bw = has_slm && !devinfo->is_baytrail;
+ assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]);
+
+ /* Minimum number of ways that can be allocated to the URB. */
+ const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0);
+ assert(cfg->n[GEN_L3P_URB] >= n0_urb);
+
+ uint32_t l3sqcr1, l3cr2, l3cr3;
+ anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1),
+ .ConvertDC_UC = !has_dc,
+ .ConvertIS_UC = !has_is,
+ .ConvertC_UC = !has_c,
+ .ConvertT_UC = !has_t);
+ l3sqcr1 |=
+ GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
+ devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
+ IVB_L3SQCREG1_SQGHPCI_DEFAULT;
+
+ anv_pack_struct(&l3cr2, GENX(L3CNTLREG2),
+ .SLMEnable = has_slm,
+ .URBLowBandwidth = urb_low_bw,
+ .URBAllocation = cfg->n[GEN_L3P_URB],
+#if !GEN_IS_HASWELL
+ .ALLAllocation = cfg->n[GEN_L3P_ALL],
+#endif
+ .ROAllocation = cfg->n[GEN_L3P_RO],
+ .DCAllocation = cfg->n[GEN_L3P_DC]);
+
+ anv_pack_struct(&l3cr3, GENX(L3CNTLREG3),
+ .ISAllocation = cfg->n[GEN_L3P_IS],
+ .ISLowBandwidth = 0,
+ .CAllocation = cfg->n[GEN_L3P_C],
+ .CLowBandwidth = 0,
+ .TAllocation = cfg->n[GEN_L3P_T],
+ .TLowBandwidth = 0);
+
+ /* Set up the L3 partitioning. */
+ emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1_num), l3sqcr1);
+ emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2_num), l3cr2);
+ emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3_num), l3cr3);
+
+#if GEN_IS_HASWELL
+ if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) {
+ /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
+ * them disabled to avoid crashing the system hard.
+ */
+ uint32_t scratch1, chicken3;
+ anv_pack_struct(&scratch1, GENX(SCRATCH1),
+ .L3AtomicDisable = !has_dc);
+ anv_pack_struct(&chicken3, GENX(CHICKEN3),
+ .L3AtomicDisable = !has_dc);
+ emit_lri(&cmd_buffer->batch, GENX(SCRATCH1_num), scratch1);
+ emit_lri(&cmd_buffer->batch, GENX(CHICKEN3_num), chicken3);
+ }
+#endif
+
+#endif
+
+ cmd_buffer->state.current_l3_config = cfg;
+}
+
void
genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
{
assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
- genX(cmd_buffer_config_l3)(cmd_buffer, pipeline);
+ genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config);
genX(flush_pipeline_select_3d)(cmd_buffer);
+++ /dev/null
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "anv_private.h"
-
-#include "common/gen_l3_config.h"
-#include "genxml/gen_macros.h"
-#include "genxml/genX_pack.h"
-
-#define emit_lri(batch, reg, imm) \
- anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
- lri.RegisterOffset = __anv_reg_num(reg); \
- lri.DataDWord = imm; \
- }
-
-#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000
-#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000
-#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000
-
-/**
- * Program the hardware to use the specified L3 configuration.
- */
-static void
-setup_l3_config(struct anv_cmd_buffer *cmd_buffer/*, struct brw_context *brw*/,
- const struct gen_l3_config *cfg)
-{
- const bool has_slm = cfg->n[GEN_L3P_SLM];
-
- /* According to the hardware docs, the L3 partitioning can only be changed
- * while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline...
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.DCFlushEnable = true;
- pc.PostSyncOperation = NoWrite;
- pc.CommandStreamerStallEnable = true;
- }
-
- /* ...followed by a second pipelined PIPE_CONTROL that initiates
- * invalidation of the relevant caches. Note that because RO invalidation
- * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
- * command is processed by the CS) we cannot combine it with the previous
- * stalling flush as the hardware documentation suggests, because that
- * would cause the CS to stall on previous rendering *after* RO
- * invalidation and wouldn't prevent the RO caches from being polluted by
- * concurrent rendering before the stall completes. This intentionally
- * doesn't implement the SKL+ hardware workaround suggesting to enable CS
- * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
- * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
- * already guarantee that there is no concurrent GPGPU kernel execution
- * (see SKL HSD 2132585).
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.TextureCacheInvalidationEnable = true;
- pc.ConstantCacheInvalidationEnable = true;
- pc.InstructionCacheInvalidateEnable = true;
- pc.StateCacheInvalidationEnable = true;
- pc.PostSyncOperation = NoWrite;
- }
-
- /* Now send a third stalling flush to make sure that invalidation is
- * complete when the L3 configuration registers are modified.
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.DCFlushEnable = true;
- pc.PostSyncOperation = NoWrite;
- pc.CommandStreamerStallEnable = true;
- }
-
-#if GEN_GEN >= 8
-
- assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]);
-
- uint32_t l3cr;
- anv_pack_struct(&l3cr, GENX(L3CNTLREG),
- .SLMEnable = has_slm,
- .URBAllocation = cfg->n[GEN_L3P_URB],
- .ROAllocation = cfg->n[GEN_L3P_RO],
- .DCAllocation = cfg->n[GEN_L3P_DC],
- .AllAllocation = cfg->n[GEN_L3P_ALL]);
-
- /* Set up the L3 partitioning. */
- emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr);
-
-#else
-
- const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL];
- const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] ||
- cfg->n[GEN_L3P_ALL];
- const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] ||
- cfg->n[GEN_L3P_ALL];
- const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] ||
- cfg->n[GEN_L3P_ALL];
-
- assert(!cfg->n[GEN_L3P_ALL]);
-
- /* When enabled SLM only uses a portion of the L3 on half of the banks,
- * the matching space on the remaining banks has to be allocated to a
- * client (URB for all validated configurations) set to the
- * lower-bandwidth 2-bank address hashing mode.
- */
- const struct gen_device_info *devinfo = &cmd_buffer->device->info;
- const bool urb_low_bw = has_slm && !devinfo->is_baytrail;
- assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]);
-
- /* Minimum number of ways that can be allocated to the URB. */
- const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0);
- assert(cfg->n[GEN_L3P_URB] >= n0_urb);
-
- uint32_t l3sqcr1, l3cr2, l3cr3;
- anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1),
- .ConvertDC_UC = !has_dc,
- .ConvertIS_UC = !has_is,
- .ConvertC_UC = !has_c,
- .ConvertT_UC = !has_t);
- l3sqcr1 |=
- GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
- devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
- IVB_L3SQCREG1_SQGHPCI_DEFAULT;
-
- anv_pack_struct(&l3cr2, GENX(L3CNTLREG2),
- .SLMEnable = has_slm,
- .URBLowBandwidth = urb_low_bw,
- .URBAllocation = cfg->n[GEN_L3P_URB],
-#if !GEN_IS_HASWELL
- .ALLAllocation = cfg->n[GEN_L3P_ALL],
-#endif
- .ROAllocation = cfg->n[GEN_L3P_RO],
- .DCAllocation = cfg->n[GEN_L3P_DC]);
-
- anv_pack_struct(&l3cr3, GENX(L3CNTLREG3),
- .ISAllocation = cfg->n[GEN_L3P_IS],
- .ISLowBandwidth = 0,
- .CAllocation = cfg->n[GEN_L3P_C],
- .CLowBandwidth = 0,
- .TAllocation = cfg->n[GEN_L3P_T],
- .TLowBandwidth = 0);
-
- /* Set up the L3 partitioning. */
- emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1), l3sqcr1);
- emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2);
- emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3);
-
-#if GEN_IS_HASWELL
- if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) {
- /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
- * them disabled to avoid crashing the system hard.
- */
- uint32_t scratch1, chicken3;
- anv_pack_struct(&scratch1, GENX(SCRATCH1),
- .L3AtomicDisable = !has_dc);
- anv_pack_struct(&chicken3, GENX(CHICKEN3),
- .L3AtomicDisable = !has_dc);
- emit_lri(&cmd_buffer->batch, GENX(SCRATCH1), scratch1);
- emit_lri(&cmd_buffer->batch, GENX(CHICKEN3), chicken3);
- }
-#endif
-
-#endif
-
-}
-
-void
-genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
- const struct anv_pipeline *pipeline)
-{
- struct anv_cmd_state *state = &cmd_buffer->state;
- const struct gen_l3_config *const cfg = pipeline->urb.l3_config;
- assert(cfg);
- if (cfg != state->current_l3_config) {
- setup_l3_config(cmd_buffer, cfg);
- state->current_l3_config = cfg;
-
- if (unlikely(INTEL_DEBUG & DEBUG_L3)) {
- fprintf(stderr, "L3 config transition: ");
- gen_dump_l3_config(cfg, stderr);
- }
- }
-}