i965: Implement programming of the L3 configuration.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 3 Sep 2015 15:07:44 +0000 (18:07 +0300)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 9 Dec 2015 11:46:05 +0000 (13:46 +0200)
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/gen7_l3_state.c

index ac4c93b1888fe30924b05ff8c57c9d4ff1bf4c94..712c7c730a454591b387237a3ef0598a503ba324 100644 (file)
@@ -165,3 +165,98 @@ get_l3_way_size(const struct brw_device_info *devinfo)
    else
       return 8 * devinfo->num_slices;
 }
+
+/**
+ * Program the hardware to use the specified L3 configuration.
+ */
+static void
+setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
+{
+   const bool has_dc = cfg->n[L3P_DC] || cfg->n[L3P_ALL];
+   const bool has_is = cfg->n[L3P_IS] || cfg->n[L3P_RO] || cfg->n[L3P_ALL];
+   const bool has_c = cfg->n[L3P_C] || cfg->n[L3P_RO] || cfg->n[L3P_ALL];
+   const bool has_t = cfg->n[L3P_T] || cfg->n[L3P_RO] || cfg->n[L3P_ALL];
+   const bool has_slm = cfg->n[L3P_SLM];
+
+   /* According to the hardware docs, the L3 partitioning can only be changed
+    * while the pipeline is completely drained and the caches are flushed,
+    * which involves a first PIPE_CONTROL flush which stalls the pipeline and
+    * initiates invalidation of the relevant caches...
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                               PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+                               PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_NO_WRITE |
+                               PIPE_CONTROL_CS_STALL);
+
+   /* ...followed by a second stalling flush which guarantees that
+    * invalidation is complete when the L3 configuration registers are
+    * modified.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+                               PIPE_CONTROL_NO_WRITE |
+                               PIPE_CONTROL_CS_STALL);
+
+   if (brw->gen >= 8) {
+      assert(!cfg->n[L3P_IS] && !cfg->n[L3P_C] && !cfg->n[L3P_T]);
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+
+      /* Set up the L3 partitioning. */
+      OUT_BATCH(GEN8_L3CNTLREG);
+      OUT_BATCH((has_slm ? GEN8_L3CNTLREG_SLM_ENABLE : 0) |
+                SET_FIELD(cfg->n[L3P_URB], GEN8_L3CNTLREG_URB_ALLOC) |
+                SET_FIELD(cfg->n[L3P_RO], GEN8_L3CNTLREG_RO_ALLOC) |
+                SET_FIELD(cfg->n[L3P_DC], GEN8_L3CNTLREG_DC_ALLOC) |
+                SET_FIELD(cfg->n[L3P_ALL], GEN8_L3CNTLREG_ALL_ALLOC));
+
+      ADVANCE_BATCH();
+
+   } else {
+      assert(!cfg->n[L3P_ALL]);
+
+      /* When enabled SLM only uses a portion of the L3 on half of the banks,
+       * the matching space on the remaining banks has to be allocated to a
+       * client (URB for all validated configurations) set to the
+       * lower-bandwidth 2-bank address hashing mode.
+       */
+      const bool urb_low_bw = has_slm && !brw->is_baytrail;
+      assert(!urb_low_bw || cfg->n[L3P_URB] == cfg->n[L3P_SLM]);
+
+      /* Minimum number of ways that can be allocated to the URB. */
+      const unsigned n0_urb = (brw->is_baytrail ? 32 : 0);
+      assert(cfg->n[L3P_URB] >= n0_urb);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2));
+
+      /* Demote any clients with no ways assigned to LLC. */
+      OUT_BATCH(GEN7_L3SQCREG1);
+      OUT_BATCH((brw->is_haswell ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
+                 brw->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
+                 IVB_L3SQCREG1_SQGHPCI_DEFAULT) |
+                (has_dc ? 0 : GEN7_L3SQCREG1_CONV_DC_UC) |
+                (has_is ? 0 : GEN7_L3SQCREG1_CONV_IS_UC) |
+                (has_c ? 0 : GEN7_L3SQCREG1_CONV_C_UC) |
+                (has_t ? 0 : GEN7_L3SQCREG1_CONV_T_UC));
+
+      /* Set up the L3 partitioning. */
+      OUT_BATCH(GEN7_L3CNTLREG2);
+      OUT_BATCH((has_slm ? GEN7_L3CNTLREG2_SLM_ENABLE : 0) |
+                SET_FIELD(cfg->n[L3P_URB] - n0_urb, GEN7_L3CNTLREG2_URB_ALLOC) |
+                (urb_low_bw ? GEN7_L3CNTLREG2_URB_LOW_BW : 0) |
+                SET_FIELD(cfg->n[L3P_ALL], GEN7_L3CNTLREG2_ALL_ALLOC) |
+                SET_FIELD(cfg->n[L3P_RO], GEN7_L3CNTLREG2_RO_ALLOC) |
+                SET_FIELD(cfg->n[L3P_DC], GEN7_L3CNTLREG2_DC_ALLOC));
+      OUT_BATCH(GEN7_L3CNTLREG3);
+      OUT_BATCH(SET_FIELD(cfg->n[L3P_IS], GEN7_L3CNTLREG3_IS_ALLOC) |
+                SET_FIELD(cfg->n[L3P_C], GEN7_L3CNTLREG3_C_ALLOC) |
+                SET_FIELD(cfg->n[L3P_T], GEN7_L3CNTLREG3_T_ALLOC));
+
+      ADVANCE_BATCH();
+   }
+}