blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
struct blorp_address address, uint32_t delta);
+static uint64_t
+blorp_get_surface_address(struct blorp_batch *batch,
+ struct blorp_address address);
+
#if GEN_GEN >= 7 && GEN_GEN < 10
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch);
vb[idx].BufferPitch = stride;
#if GEN_GEN >= 6
- vb[idx].VertexBufferMOCS = addr.mocs;
+ vb[idx].MOCS = addr.mocs;
#endif
#if GEN_GEN >= 7
blorp_emit_input_varying_data(batch, params, &addrs[1], &size);
blorp_fill_vertex_buffer_state(batch, vb, 1, addrs[1], size, 0);
+ blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs);
+
const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
if (!dw)
return;
- blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs);
-
for (unsigned i = 0; i < num_vbs; i++) {
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
dw += GENX(VERTEX_BUFFER_STATE_length);
ps.BindingTableEntryCount = 1;
}
+ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to
+ * disable prefetching of binding tables on A0 and B0 steppings.
+ * TODO: Revisit this WA on C0 stepping.
+ */
+ if (GEN_GEN == 11)
+ ps.BindingTableEntryCount = 0;
+
if (prog_data) {
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
ps._32PixelDispatchEnable = prog_data->dispatch_32;
+ /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+ *
+ * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+ * Dispatch must not be enabled for PER_PIXEL dispatch mode."
+ *
+ * Since 16x MSAA is first introduced on SKL, we don't need to apply
+ * the workaround on any older hardware.
+ */
+ if (GEN_GEN >= 9 && !prog_data->persample_dispatch &&
+ params->num_samples == 16) {
+ assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+ ps._32PixelDispatchEnable = false;
+ }
+
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
static void
blorp_emit_surface_state(struct blorp_batch *batch,
const struct brw_blorp_surface_info *surface,
- enum isl_aux_op op,
+ enum isl_aux_op aux_op,
void *state, uint32_t state_offset,
const bool color_write_disables[4],
bool is_render_target)
isl_surf_fill_state(batch->blorp->isl_dev, state,
.surf = &surf, .view = &surface->view,
.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
+ .address =
+ blorp_get_surface_address(batch, surface->addr),
+ .aux_address = aux_usage == ISL_AUX_USAGE_NONE ? 0 :
+ blorp_get_surface_address(batch, surface->aux_addr),
+ .clear_address = !use_clear_address ? 0 :
+ blorp_get_surface_address(batch,
+ surface->clear_color_addr),
.mocs = surface->addr.mocs,
.clear_color = surface->clear_color,
.use_clear_address = use_clear_address,
surface->aux_addr, *aux_addr);
}
- if (surface->clear_color_addr.buffer) {
+ if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) {
#if GEN_GEN >= 10
assert((surface->clear_color_addr.offset & 0x3f) == 0);
uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;
isl_dev->ss.clear_color_state_offset,
surface->clear_color_addr, *clear_addr);
#elif GEN_GEN >= 7
- if (op == ISL_AUX_OP_FULL_RESOLVE || op == ISL_AUX_OP_PARTIAL_RESOLVE) {
+ /* Fast clears just whack the AUX surface and don't actually use the
+ * clear color for anything. We can avoid the MI memcpy on that case.
+ */
+ if (aux_op != ISL_AUX_OP_FAST_CLEAR) {
struct blorp_address dst_addr = blorp_get_surface_base_address(batch);
dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset;
blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr,
*/
blorp_emit_3dstate_multisample(batch, params);
+ /* From the BDW PRM Volume 7, Depth Buffer Clear:
+ *
+ * The clear value must be between the min and max depth values
+ * (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is
+ * D32_FLOAT, then +/-DENORM values are also allowed.
+ *
+ * Set the bounds to match our hardware limits, [0.0, 1.0].
+ */
+ if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) {
+ assert(params->depth.clear_color.f32[0] >= 0.0f);
+ assert(params->depth.clear_color.f32[0] <= 1.0f);
+ blorp_emit_cc_viewport(batch);
+ }
+
+ /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the
+ * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch
+ * even when WM_HZ_OP is active. However, WM thread dispatch is normally
+ * disabled for HiZ ops and it appears that force-enabling it can lead to
+ * GPU hangs on at least Skylake. Since we don't know the current state of
+ * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.
+ */
+ blorp_emit(batch, GENX(3DSTATE_WM), wm);
+
/* If we can't alter the depth stencil config and multiple layers are
* involved, the HiZ op will fail. This is because the op requires that a
* new config is emitted for each additional layer.