i965/gen9: Optimize slice and subslice load balancing behavior.
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_multisample_state.c
index 4427f15996df097d702d231f6dcef8bde60d28f1..5381d326f2600aa2fac27d33fb4165722d503dc2 100644 (file)
 #include "brw_multisample_state.h"
 
 /**
- * 3DSTATE_MULTISAMPLE
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
+ * markers in the pipeline by programming a PIPE_CONTROL with stall.
  */
-void
-gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samples)
+static void
+gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
 {
-   assert(num_samples <= 16);
+   UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_CS_STALL |
+                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
+}
 
-   unsigned log2_samples = ffs(MAX2(num_samples, 1)) - 1;
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
+ * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
+ * after the command to ensure the state has been delivered prior to any
+ * command causing a marker in the pipeline.
+ */
+static void
+gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
+{
+   UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
 
-   BEGIN_BATCH(2);
-   OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE << 16 | (2 - 2));
-   OUT_BATCH(MS_PIXEL_LOCATION_CENTER | log2_samples << 1);
-   ADVANCE_BATCH();
+   /* Write to CACHE_MODE_0 (0x7000) */
+   brw_load_register_imm32(brw, GEN7_CACHE_MODE_0, 0);
+
+   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
+    * be idle; i.e., full flush is required.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_CACHE_FLUSH_BITS |
+                               PIPE_CONTROL_CACHE_INVALIDATE_BITS);
 }
 
 /**
@@ -49,6 +74,11 @@ gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samples)
 void
 gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen == 10)
+      gen10_emit_wa_cs_stall_flush(brw);
+
    BEGIN_BATCH(9);
    OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
 
@@ -68,21 +98,7 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
    /* 1x and 2x MSAA */
    OUT_BATCH(brw_multisample_positions_1x_2x);
    ADVANCE_BATCH();
-}
 
-
-static void
-upload_multisample_state(struct brw_context *brw)
-{
-   gen8_emit_3dstate_multisample(brw, brw->num_samples);
-   gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw));
+   if (devinfo->gen == 10)
+      gen10_emit_wa_lri_to_cache_mode_zero(brw);
 }
-
-const struct brw_tracked_state gen8_multisample_state = {
-   .dirty = {
-      .mesa = _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_CONTEXT |
-             BRW_NEW_NUM_SAMPLES,
-   },
-   .emit = upload_multisample_state
-};