Initial benchmarking didn't show any performance benefits. But it might eventually.
Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
iris_upload_slice_hashing_state(batch);
#endif
+#if GEN_GEN >= 11
+ /* WA_220160979: Enable Hardware filtering of Semi-Pipelined State in WM */
+ iris_pack_state(GENX(COMMON_SLICE_CHICKEN4), ®_val, reg) {
+ reg.EnableHardwareFilteringinWM = true;
+ reg.EnableHardwareFilteringinWMMask = true;
+ }
+ iris_emit_lri(batch, COMMON_SLICE_CHICKEN4, reg_val);
+#endif
+
/* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid
* changing it dynamically. We set it to the maximum size here, and
* instead include the render target dimensions in the viewport, so
lri.DataDWord = cache_mode_0;
}
}
+
+ /* WA_220160979: Enable Hardware filtering of Semi-Pipelined State in WM. */
+ uint32_t common_slice_chicken4;
+ anv_pack_struct(&common_slice_chicken4, GENX(COMMON_SLICE_CHICKEN4),
+ .EnableHardwareFilteringinWM = true,
+ .EnableHardwareFilteringinWMMask = true);
+
+ anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+ lri.RegisterOffset = GENX(COMMON_SLICE_CHICKEN4_num);
+ lri.DataDWord = common_slice_chicken4;
+ }
#endif
/* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
# define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7)
# define GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE (1 << 11)
+
+#define COMMON_SLICE_CHICKEN4 0x7300
+# define GEN11_ENABLE_HARDWARE_FILTERING_IN_WM (1 << 5)
+
#define HALF_SLICE_CHICKEN7 0xE194
# define TEXEL_OFFSET_FIX_ENABLE (1 << 1)
# define TEXEL_OFFSET_FIX_MASK REG_MASK(1 << 1)
*/
brw_load_register_imm32(brw, GEN8_L3CNTLREG,
GEN8_L3CNTLREG_EDBC_NO_HANG);
+
+ /* WA_220160979: Enable Hardware filtering of Semi-Pipelined State in WM */
+ brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN4,
+ GEN11_ENABLE_HARDWARE_FILTERING_IN_WM |
+ REG_MASK(GEN11_ENABLE_HARDWARE_FILTERING_IN_WM));
}
/* hardware specification recommends disabling repacking for