#include "blorp_priv.h"
#include "dev/gen_device_info.h"
#include "common/gen_sample_positions.h"
+#include "common/gen_l3_config.h"
#include "genxml/gen_macros.h"
/**
uint32_t *sizes,
unsigned num_vbs);
-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
blorp_get_workaround_page(struct blorp_batch *batch);
-#endif
static void
blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
blorp_get_surface_base_address(struct blorp_batch *batch);
#endif
+#if GEN_GEN >= 7
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch);
+# else
static void
blorp_emit_urb_config(struct blorp_batch *batch,
unsigned vs_entry_size, unsigned sf_entry_size);
+#endif
static void
blorp_emit_pipeline(struct blorp_batch *batch,
*/
static void
emit_urb_config(struct blorp_batch *batch,
- const struct blorp_params *params)
+ const struct blorp_params *params,
+ enum gen_urb_deref_block_size *deref_block_size)
{
/* Once vertex fetcher has written full VUE entries with complete
* header the space requirement is as follows per vertex (in bytes):
const unsigned sf_entry_size =
params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0;
+#if GEN_GEN >= 7
+ assert(sf_entry_size == 0);
+ const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
+
+ unsigned entries[4], start[4];
+ gen_get_urb_config(batch->blorp->compiler->devinfo,
+ blorp_get_l3_config(batch),
+ false, false, entry_size,
+ entries, start, deref_block_size);
+
+#if GEN_GEN == 7 && !GEN_IS_HASWELL
+ /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
+ *
+ * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
+ * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+ * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+ * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
+ * needs to be sent before any combination of VS associated 3DSTATE."
+ */
+ blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
+ pc.DepthStallEnable = true;
+ pc.PostSyncOperation = WriteImmediateData;
+ pc.Address = blorp_get_workaround_page(batch);
+ }
+#endif
+
+ for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
+ blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) {
+ urb._3DCommandSubOpcode += i;
+ urb.VSURBStartingAddress = start[i];
+ urb.VSURBEntryAllocationSize = entry_size[i] - 1;
+ urb.VSNumberofURBEntries = entries[i];
+ }
+ }
+#else /* GEN_GEN < 7 */
blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
+#endif
}
#if GEN_GEN >= 7
static void
blorp_emit_sf_config(struct blorp_batch *batch,
- const struct blorp_params *params)
+ const struct blorp_params *params,
+ enum gen_urb_deref_block_size urb_deref_block_size)
{
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
blorp_emit(batch, GENX(3DSTATE_SF), sf) {
#if GEN_GEN >= 12
- sf.DerefBlockSize = PerPolyDerefMode;
+ sf.DerefBlockSize = urb_deref_block_size;
#endif
}
psx.PixelShaderValid = true;
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
+ psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
+#if GEN_GEN >= 9
+ psx.PixelShaderComputesStencil = prog_data->computed_stencil;
+#endif
}
if (params->src.enabled)
unreachable("not reached");
}
- if (prog_data)
+ if (prog_data) {
wm.ThreadDispatchEnable = true;
+ wm.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
+ }
if (params->src.enabled)
wm.PixelShaderKillsPixel = true;
uint32_t color_calc_state_offset;
uint32_t depth_stencil_state_offset;
- emit_urb_config(batch, params);
+ enum gen_urb_deref_block_size urb_deref_block_size;
+ emit_urb_config(batch, params, &urb_deref_block_size);
if (params->wm_prog_data) {
blend_state_offset = blorp_emit_blend_state(batch, params);
clip.PerspectiveDivideDisable = true;
}
- blorp_emit_sf_config(batch, params);
+ blorp_emit_sf_config(batch, params, urb_deref_block_size);
blorp_emit_ps_config(batch, params);
blorp_emit_cc_viewport(batch);
+
+#if GEN_GEN >= 12
+ /* Disable Primitive Replication. */
+ blorp_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
+#endif
}
/******** This is the end of the pipeline setup code ********/
blorp_emit_cc_viewport(batch);
}
- /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the
- * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch
- * even when WM_HZ_OP is active. However, WM thread dispatch is normally
- * disabled for HiZ ops and it appears that force-enabling it can lead to
- * GPU hangs on at least Skylake. Since we don't know the current state of
- * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.
- */
- blorp_emit(batch, GENX(3DSTATE_WM), wm);
+ if (GEN_GEN >= 12 && params->stencil.enabled &&
+ params->hiz_op == ISL_AUX_OP_FULL_RESOLVE) {
+ /* GEN:BUG:1605967699
+ *
+ * This workaround requires that the Force Thread Dispatch Enable flag
+ * needs to be set to ForceOFF on the first WM_HZ_OP state cycle
+ * (followed by a CS Stall):
+ *
+ * "Workaround: There is a potential software workaround for the
+ * issue by doing these 2 steps 1) setting the force thread dispatch
+ * enable(bits 20:19) in the 3dstate_WM_body state to be set to
+ * Force_OFF (value of 1) along with the first WM_HZ_OP state cycle.
+ * The second WM_HZ_OP state which is required by programming
+ * sequencing to complete the HZ_OP operation can reprogram the
+ * 3dstate_WM_body to set to NORMAL(value of 0)."
+ */
+ blorp_emit(batch, GENX(3DSTATE_WM), wm) {
+ wm.ForceThreadDispatchEnable = ForceOff;
+ }
+ blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.CommandStreamerStallEnable = true;
+ }
+ } else {
+ /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the
+ * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch
+ * even when WM_HZ_OP is active. However, WM thread dispatch is normally
+ * disabled for HiZ ops and it appears that force-enabling it can lead to
+ * GPU hangs on at least Skylake. Since we don't know the current state of
+ * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.
+ */
+ blorp_emit(batch, GENX(3DSTATE_WM), wm);
+ }
/* If we can't alter the depth stencil config and multiple layers are
* involved, the HiZ op will fail. This is because the op requires that a
hzp.DepthBufferResolveEnable = params->depth.enabled;
#if GEN_GEN >= 12
if (params->stencil.enabled) {
- assert(params->stencil.aux_usage == ISL_AUX_USAGE_CCS_E);
+ assert(params->stencil.aux_usage == ISL_AUX_USAGE_STC_CCS);
hzp.StencilBufferResolveEnable = true;
}
#endif
pc.Address = blorp_get_workaround_page(batch);
}
+
+ if (GEN_GEN >= 12 && params->stencil.enabled &&
+ params->hiz_op == ISL_AUX_OP_FULL_RESOLVE) {
+ /* GEN:BUG:1605967699
+ *
+ * The second WM_HZ_OP state which is required by programming
+ * sequencing to complete the HZ_OP operation can reprogram the
+ * 3dstate_WM_body to set to NORMAL(value of 0)."
+ */
+ blorp_emit(batch, GENX(3DSTATE_WM), wm);
+ }
+
blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp);
}
#endif