i965/gen10: Implement WaSampleOffsetIZ workaround
authorAnuj Phogat <anuj.phogat@gmail.com>
Sat, 9 Sep 2017 00:23:28 +0000 (17:23 -0700)
committerAnuj Phogat <anuj.phogat@gmail.com>
Fri, 3 Nov 2017 21:30:33 +0000 (14:30 -0700)
There are few other (duplicate) workarounds which have similar recommendations:
WaFlushHangWhenNonPipelineStateAndMarkerStalled
WaCSStallBefore3DSamplePattern
WaPipeControlBefore3DStateSamplePattern

WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
driver is using mid batch context restore. Ignoring it for now because We're
not doing mid-batch context restore in Mesa.

This workaround doesn't fix any of the piglit hangs we've seen
on CNL. But it might be fixing something we haven't tested yet.

V2: Use brw_load_register_imm32() to program CACHE_MODE_0.
    Get rid of brw_flush_gpu_caches().

V3: Make the workaround helper functions static.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Reviewed-by :Nanley Chery <nanley.g.chery@intel.com>

src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/gen8_multisample_state.c

index 4abb790612d9d1c3a1b739b051b11c7496effc13..270cdf29db334685b818603e00052d9a87d2ab27 100644 (file)
@@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN7_GPGPU_DISPATCHDIMY         0x2504
 #define GEN7_GPGPU_DISPATCHDIMZ         0x2508
 
+#define GEN7_CACHE_MODE_0               0x7000
 #define GEN7_CACHE_MODE_1               0x7004
 # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
index 3afa5862757378aeeee87bad8fd459265a96ddc7..9f849d64bbc5176e4aaa9c0e30e49d2b4d42b6fc 100644 (file)
 #include "brw_defines.h"
 #include "brw_multisample_state.h"
 
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
+ * markers in the pipeline by programming a PIPE_CONTROL with stall.
+ */
+static void
+gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_CS_STALL |
+                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
+}
+
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
+ * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
+ * after the command to ensure the state has been delivered prior to any
+ * command causing a marker in the pipeline.
+ */
+static void
+gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+
+   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
+    * be idle; i.e., full flush is required.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_CACHE_FLUSH_BITS |
+                               PIPE_CONTROL_CACHE_INVALIDATE_BITS);
+
+   /* Write to CACHE_MODE_0 (0x7000) */
+   brw_load_register_imm32(brw, GEN7_CACHE_MODE_0, 0);
+}
+
 /**
  * 3DSTATE_SAMPLE_PATTERN
  */
 void
 gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen == 10)
+      gen10_emit_wa_cs_stall_flush(brw);
+
    BEGIN_BATCH(9);
    OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
 
@@ -52,4 +98,7 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
    /* 1x and 2x MSAA */
    OUT_BATCH(brw_multisample_positions_1x_2x);
    ADVANCE_BATCH();
+
+   if (devinfo->gen == 10)
+      gen10_emit_wa_lri_to_cache_mode_zero(brw);
 }