#define BLORP_GENX_EXEC_H
#include "blorp_priv.h"
-#include "common/gen_device_info.h"
+#include "dev/gen_device_info.h"
#include "common/gen_sample_positions.h"
#include "genxml/gen_macros.h"
static void *
blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
struct blorp_address *addr);
+static void
+blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
+ const struct blorp_address *addrs,
+ unsigned num_vbs);
#if GEN_GEN >= 8
static struct blorp_address
blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
struct blorp_address address, uint32_t delta);
+#if GEN_GEN >= 7 && GEN_GEN < 10
+static struct blorp_address
+blorp_get_surface_base_address(struct blorp_batch *batch);
+#endif
+
static void
blorp_emit_urb_config(struct blorp_batch *batch,
unsigned vs_entry_size, unsigned sf_entry_size);
blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
}
+#if GEN_GEN >= 7
+static void
+blorp_emit_memcpy(struct blorp_batch *batch,
+ struct blorp_address dst,
+ struct blorp_address src,
+ uint32_t size);
+#endif
+
static void
blorp_emit_vertex_data(struct blorp_batch *batch,
const struct blorp_params *params,
}
blorp_flush_range(batch, data, *size);
+
+ if (params->dst_clear_color_as_input) {
+#if GEN_GEN >= 7
+ /* In this case, the clear color isn't known statically and instead
+ * comes in through an indirect which we have to copy into the vertex
+ * buffer before we execute the 3DPRIMITIVE. We already copied the
+ * value of params->wm_inputs.clear_color into the vertex buffer in the
+ * loop above. Now we emit code to stomp it from the GPU with the
+ * actual clear color value.
+ */
+ assert(num_varyings == 1);
+
+ /* The clear color is the first thing after the header */
+ struct blorp_address clear_color_input_addr = *addr;
+ clear_color_input_addr.offset += 16;
+
+ const unsigned clear_color_size =
+ GEN_GEN < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4;
+ blorp_emit_memcpy(batch, clear_color_input_addr,
+ params->dst.clear_color_addr,
+ clear_color_size);
+#else
+ unreachable("MCS partial resolve is not a thing on SNB and earlier");
+#endif
+ }
}
static void
-blorp_emit_vertex_buffers(struct blorp_batch *batch,
- const struct blorp_params *params)
+blorp_fill_vertex_buffer_state(struct blorp_batch *batch,
+ struct GENX(VERTEX_BUFFER_STATE) *vb,
+ unsigned idx,
+ struct blorp_address addr, uint32_t size,
+ uint32_t stride)
{
- struct GENX(VERTEX_BUFFER_STATE) vb[2];
- memset(vb, 0, sizeof(vb));
+ vb[idx].VertexBufferIndex = idx;
+ vb[idx].BufferStartingAddress = addr;
+ vb[idx].BufferPitch = stride;
- uint32_t size;
- blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size);
- vb[0].VertexBufferIndex = 0;
- vb[0].BufferPitch = 3 * sizeof(float);
#if GEN_GEN >= 6
- vb[0].VertexBufferMOCS = batch->blorp->mocs.vb;
-#endif
-#if GEN_GEN >= 7
- vb[0].AddressModifyEnable = true;
-#endif
-#if GEN_GEN >= 8
- vb[0].BufferSize = size;
-#elif GEN_GEN >= 5
- vb[0].BufferAccessType = VERTEXDATA;
- vb[0].EndAddress = vb[0].BufferStartingAddress;
- vb[0].EndAddress.offset += size - 1;
-#elif GEN_GEN == 4
- vb[0].BufferAccessType = VERTEXDATA;
- vb[0].MaxIndex = 2;
+ vb[idx].MOCS = addr.mocs;
#endif
- blorp_emit_input_varying_data(batch, params,
- &vb[1].BufferStartingAddress, &size);
- vb[1].VertexBufferIndex = 1;
- vb[1].BufferPitch = 0;
-#if GEN_GEN >= 6
- vb[1].VertexBufferMOCS = batch->blorp->mocs.vb;
-#endif
#if GEN_GEN >= 7
- vb[1].AddressModifyEnable = true;
+ vb[idx].AddressModifyEnable = true;
#endif
+
#if GEN_GEN >= 8
- vb[1].BufferSize = size;
+ vb[idx].BufferSize = size;
#elif GEN_GEN >= 5
- vb[1].BufferAccessType = INSTANCEDATA;
- vb[1].EndAddress = vb[1].BufferStartingAddress;
- vb[1].EndAddress.offset += size - 1;
+ vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
+ vb[idx].EndAddress = vb[idx].BufferStartingAddress;
+ vb[idx].EndAddress.offset += size - 1;
#elif GEN_GEN == 4
- vb[1].BufferAccessType = INSTANCEDATA;
- vb[1].MaxIndex = 0;
+ vb[idx].BufferAccessType = stride > 0 ? VERTEXDATA : INSTANCEDATA;
+ vb[idx].MaxIndex = stride > 0 ? size / stride : 0;
#endif
+}
- const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2;
+static void
+blorp_emit_vertex_buffers(struct blorp_batch *batch,
+ const struct blorp_params *params)
+{
+ struct GENX(VERTEX_BUFFER_STATE) vb[3];
+ uint32_t num_vbs = 2;
+ memset(vb, 0, sizeof(vb));
+
+ struct blorp_address addrs[2] = {};
+ uint32_t size;
+ blorp_emit_vertex_data(batch, params, &addrs[0], &size);
+ blorp_fill_vertex_buffer_state(batch, vb, 0, addrs[0], size,
+ 3 * sizeof(float));
+
+ blorp_emit_input_varying_data(batch, params, &addrs[1], &size);
+ blorp_fill_vertex_buffer_state(batch, vb, 1, addrs[1], size, 0);
+
+ blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs);
+
+ const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
if (!dw)
return;
- for (unsigned i = 0; i < 2; i++) {
+ for (unsigned i = 0; i < num_vbs; i++) {
GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
dw += GENX(VERTEX_BUFFER_STATE_length);
}
#else
.Component1Control = VFCOMP_STORE_0,
#endif
- .Component2Control = VFCOMP_STORE_SRC,
- .Component3Control = VFCOMP_STORE_SRC,
+ .Component2Control = VFCOMP_STORE_0,
+ .Component3Control = VFCOMP_STORE_0,
#if GEN_GEN <= 5
.DestinationElementOffset = slot * 4,
#endif
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
static uint32_t
-blorp_emit_cc_viewport(struct blorp_batch *batch,
- const struct blorp_params *params)
+blorp_emit_cc_viewport(struct blorp_batch *batch)
{
uint32_t cc_vp_offset;
blorp_emit_dynamic(batch, GENX(CC_VIEWPORT), vp, 32, &cc_vp_offset) {
}
static uint32_t
-blorp_emit_sampler_state(struct blorp_batch *batch,
- const struct blorp_params *params)
+blorp_emit_sampler_state(struct blorp_batch *batch)
{
uint32_t offset;
blorp_emit_dynamic(batch, GENX(SAMPLER_STATE), sampler, 32, &offset) {
sampler.VAddressMagFilterRoundingEnable = true;
sampler.UAddressMinFilterRoundingEnable = true;
sampler.UAddressMagFilterRoundingEnable = true;
-#if GEN_GEN >= 6
+#if GEN_GEN > 6
sampler.NonnormalizedCoordinateEnable = true;
#endif
}
const struct blorp_params *params)
{
struct brw_vs_prog_data *vs_prog_data = params->vs_prog_data;
+ assert(!vs_prog_data || GEN_GEN < 11 ||
+ vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8);
blorp_emit(batch, GENX(3DSTATE_VS), vs) {
if (vs_prog_data) {
ps.BindingTableEntryCount = 1;
}
- if (prog_data) {
- ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
- ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
+ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to
+ * disable prefetching of binding tables on A0 and B0 steppings.
+ * TODO: Revisit this WA on C0 stepping.
+ */
+ if (GEN_GEN == 11)
+ ps.BindingTableEntryCount = 0;
+ if (prog_data) {
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
+ ps._32PixelDispatchEnable = prog_data->dispatch_32;
+
+ /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+ *
+ * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+ * Dispatch must not be enabled for PER_PIXEL dispatch mode."
+ *
+ * Since 16x MSAA is first introduced on SKL, we don't need to apply
+ * the workaround on any older hardware.
+ */
+ if (GEN_GEN >= 9 && !prog_data->persample_dispatch &&
+ params->num_samples == 16) {
+ assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+ ps._32PixelDispatchEnable = false;
+ }
- ps.KernelStartPointer0 = params->wm_prog_kernel;
- ps.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
+ ps.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
+ ps.DispatchGRFStartRegisterForConstantSetupData2 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
+
+ ps.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
+ ps.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
}
- /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
- * it implicitly scales for different GT levels (which have some # of
- * PSDs).
+ /* 3DSTATE_PS expects the number of threads per PSD, which is always 64
+ * for pre Gen11 and 128 for gen11+; On gen11+ If a programmed value is
+ * k, it implies 2(k+1) threads. It implicitly scales for different GT
+ * levels (which have some # of PSDs).
*
- * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
+ * In Gen8 the format is U8-2 whereas in Gen9+ it is U9-1.
*/
if (GEN_GEN >= 9)
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
ps.MaximumNumberofThreadsPerPSD = 64 - 2;
switch (params->fast_clear_op) {
- case BLORP_FAST_CLEAR_OP_NONE:
+ case ISL_AUX_OP_NONE:
break;
#if GEN_GEN >= 9
- case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
+ case ISL_AUX_OP_PARTIAL_RESOLVE:
ps.RenderTargetResolveType = RESOLVE_PARTIAL;
break;
- case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
+ case ISL_AUX_OP_FULL_RESOLVE:
ps.RenderTargetResolveType = RESOLVE_FULL;
break;
#else
- case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
+ case ISL_AUX_OP_FULL_RESOLVE:
ps.RenderTargetResolveEnable = true;
break;
#endif
- case BLORP_FAST_CLEAR_OP_CLEAR:
+ case ISL_AUX_OP_FAST_CLEAR:
ps.RenderTargetFastClearEnable = true;
break;
default:
blorp_emit(batch, GENX(3DSTATE_WM), wm) {
switch (params->hiz_op) {
- case BLORP_HIZ_OP_DEPTH_CLEAR:
+ case ISL_AUX_OP_FAST_CLEAR:
wm.DepthBufferClear = true;
break;
- case BLORP_HIZ_OP_DEPTH_RESOLVE:
+ case ISL_AUX_OP_FULL_RESOLVE:
wm.DepthBufferResolveEnable = true;
break;
- case BLORP_HIZ_OP_HIZ_RESOLVE:
+ case ISL_AUX_OP_AMBIGUATE:
wm.HierarchicalDepthBufferResolveEnable = true;
break;
- case BLORP_HIZ_OP_NONE:
+ case ISL_AUX_OP_NONE:
break;
default:
unreachable("not reached");
#endif
if (prog_data) {
+ ps._8PixelDispatchEnable = prog_data->dispatch_8;
+ ps._16PixelDispatchEnable = prog_data->dispatch_16;
+ ps._32PixelDispatchEnable = prog_data->dispatch_32;
+
ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
- ps.KernelStartPointer0 = params->wm_prog_kernel;
- ps.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
-
- ps._8PixelDispatchEnable = prog_data->dispatch_8;
- ps._16PixelDispatchEnable = prog_data->dispatch_16;
+ ps.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
+ ps.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.AttributeEnable = prog_data->num_varying_inputs > 0;
} else {
ps.SamplerCount = 1; /* Up to 4 samplers */
switch (params->fast_clear_op) {
- case BLORP_FAST_CLEAR_OP_NONE:
+ case ISL_AUX_OP_NONE:
break;
- case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
+ case ISL_AUX_OP_FULL_RESOLVE:
ps.RenderTargetResolveEnable = true;
break;
- case BLORP_FAST_CLEAR_OP_CLEAR:
+ case ISL_AUX_OP_FAST_CLEAR:
ps.RenderTargetFastClearEnable = true;
break;
default:
batch->blorp->isl_dev->info->max_wm_threads - 1;
switch (params->hiz_op) {
- case BLORP_HIZ_OP_DEPTH_CLEAR:
+ case ISL_AUX_OP_FAST_CLEAR:
wm.DepthBufferClear = true;
break;
- case BLORP_HIZ_OP_DEPTH_RESOLVE:
+ case ISL_AUX_OP_FULL_RESOLVE:
wm.DepthBufferResolveEnable = true;
break;
- case BLORP_HIZ_OP_HIZ_RESOLVE:
+ case ISL_AUX_OP_AMBIGUATE:
wm.HierarchicalDepthBufferResolveEnable = true;
break;
- case BLORP_HIZ_OP_NONE:
+ case ISL_AUX_OP_NONE:
break;
default:
unreachable("not reached");
if (prog_data) {
wm.ThreadDispatchEnable = true;
+ wm._8PixelDispatchEnable = prog_data->dispatch_8;
+ wm._16PixelDispatchEnable = prog_data->dispatch_16;
+ wm._32PixelDispatchEnable = prog_data->dispatch_32;
+
wm.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 1);
wm.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
-
- wm.KernelStartPointer0 = params->wm_prog_kernel;
- wm.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 2);
- wm._8PixelDispatchEnable = prog_data->dispatch_8;
- wm._16PixelDispatchEnable = prog_data->dispatch_16;
+ wm.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 0);
+ wm.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 1);
+ wm.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 2);
wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
}
static uint32_t
blorp_emit_color_calc_state(struct blorp_batch *batch,
- const struct blorp_params *params)
+ MAYBE_UNUSED const struct blorp_params *params)
{
uint32_t offset;
blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
ds.DepthBufferWriteEnable = true;
switch (params->hiz_op) {
- case BLORP_HIZ_OP_NONE:
+ case ISL_AUX_OP_NONE:
ds.DepthTestEnable = true;
ds.DepthTestFunction = COMPAREFUNCTION_ALWAYS;
break;
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
*/
- case BLORP_HIZ_OP_DEPTH_RESOLVE:
+ case ISL_AUX_OP_FULL_RESOLVE:
ds.DepthTestEnable = true;
ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
break;
- case BLORP_HIZ_OP_DEPTH_CLEAR:
- case BLORP_HIZ_OP_HIZ_RESOLVE:
+ case ISL_AUX_OP_FAST_CLEAR:
+ case ISL_AUX_OP_AMBIGUATE:
ds.DepthTestEnable = false;
break;
+ case ISL_AUX_OP_PARTIAL_RESOLVE:
+ unreachable("Invalid HIZ op");
}
}
blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
if (params->src.enabled)
- blorp_emit_sampler_state(batch, params);
+ blorp_emit_sampler_state(batch);
blorp_emit_3dstate_multisample(batch, params);
blorp_emit_sf_config(batch, params);
blorp_emit_ps_config(batch, params);
- blorp_emit_cc_viewport(batch, params);
+ blorp_emit_cc_viewport(batch);
}
/******** This is the end of the pipeline setup code ********/
#endif /* GEN_GEN >= 6 */
+#if GEN_GEN >= 7
+static void
+blorp_emit_memcpy(struct blorp_batch *batch,
+ struct blorp_address dst,
+ struct blorp_address src,
+ uint32_t size)
+{
+ assert(size % 4 == 0);
+
+ for (unsigned dw = 0; dw < size; dw += 4) {
+#if GEN_GEN >= 8
+ blorp_emit(batch, GENX(MI_COPY_MEM_MEM), cp) {
+ cp.DestinationMemoryAddress = dst;
+ cp.SourceMemoryAddress = src;
+ }
+#else
+ /* IVB does not have a general purpose register for command streamer
+ * commands. Therefore, we use an alternate temporary register.
+ */
+#define BLORP_TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
+ blorp_emit(batch, GENX(MI_LOAD_REGISTER_MEM), load) {
+ load.RegisterAddress = BLORP_TEMP_REG;
+ load.MemoryAddress = src;
+ }
+ blorp_emit(batch, GENX(MI_STORE_REGISTER_MEM), store) {
+ store.RegisterAddress = BLORP_TEMP_REG;
+ store.MemoryAddress = dst;
+ }
+#undef BLORP_TEMP_REG
+#endif
+ dst.offset += 4;
+ src.offset += 4;
+ }
+}
+#endif
+
static void
blorp_emit_surface_state(struct blorp_batch *batch,
const struct brw_blorp_surface_info *surface,
+ enum isl_aux_op op,
void *state, uint32_t state_offset,
const bool color_write_disables[4],
bool is_render_target)
write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;
}
- const uint32_t mocs =
- is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex;
+ const bool use_clear_address =
+ GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL);
isl_surf_fill_state(batch->blorp->isl_dev, state,
.surf = &surf, .view = &surface->view,
.aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
- .mocs = mocs, .clear_color = surface->clear_color,
+ .mocs = surface->addr.mocs,
+ .clear_color = surface->clear_color,
+ .use_clear_address = use_clear_address,
.write_disables = write_disable_mask);
blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
surface->aux_addr, *aux_addr);
}
+ if (surface->clear_color_addr.buffer) {
+#if GEN_GEN >= 10
+ assert((surface->clear_color_addr.offset & 0x3f) == 0);
+ uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;
+ blorp_surface_reloc(batch, state_offset +
+ isl_dev->ss.clear_color_state_offset,
+ surface->clear_color_addr, *clear_addr);
+#elif GEN_GEN >= 7
+ if (op == ISL_AUX_OP_FULL_RESOLVE || op == ISL_AUX_OP_PARTIAL_RESOLVE) {
+ struct blorp_address dst_addr = blorp_get_surface_base_address(batch);
+ dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset;
+ blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr,
+ isl_dev->ss.clear_value_size);
+ }
+#else
+ unreachable("Fast clears are only supported on gen7+");
+#endif
+ }
+
blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
}
const struct blorp_params *params)
{
const struct isl_device *isl_dev = batch->blorp->isl_dev;
- uint32_t bind_offset, surface_offsets[2];
+ uint32_t bind_offset = 0, surface_offsets[2];
void *surface_maps[2];
+ MAYBE_UNUSED bool has_indirect_clear_color = false;
if (params->use_pre_baked_binding_table) {
bind_offset = params->pre_baked_binding_table_offset;
} else {
if (params->dst.enabled) {
blorp_emit_surface_state(batch, ¶ms->dst,
+ params->fast_clear_op,
surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
surface_offsets[BLORP_RENDERBUFFER_BT_INDEX],
params->color_write_disable, true);
+ if (params->dst.clear_color_addr.buffer != NULL)
+ has_indirect_clear_color = true;
} else {
assert(params->depth.enabled || params->stencil.enabled);
const struct brw_blorp_surface_info *surface =
if (params->src.enabled) {
blorp_emit_surface_state(batch, ¶ms->src,
+ params->fast_clear_op,
surface_maps[BLORP_TEXTURE_BT_INDEX],
surface_offsets[BLORP_TEXTURE_BT_INDEX],
NULL, false);
+ if (params->src.clear_color_addr.buffer != NULL)
+ has_indirect_clear_color = true;
}
}
+#if GEN_GEN >= 7
+ if (has_indirect_clear_color) {
+ /* Updating a surface state object may require that the state cache be
+ * invalidated. From the SKL PRM, Shared Functions -> State -> State
+ * Caching:
+ *
+ * Whenever the RENDER_SURFACE_STATE object in memory pointed to by
+ * the Binding Table Pointer (BTP) and Binding Table Index (BTI) is
+ * modified [...], the L1 state cache must be invalidated to ensure
+ * the new surface or sampler state is fetched from system memory.
+ */
+ blorp_emit(batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.StateCacheInvalidationEnable = true;
+ }
+ }
+#endif
+
#if GEN_GEN >= 7
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), bt);
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_HS), bt);
if (dw == NULL)
return;
- struct isl_depth_stencil_hiz_emit_info info = {
-#if GEN_GEN >= 7
- .mocs = 1, /* GEN7_MOCS_L3 */
-#else
- .mocs = 0,
-#endif
- };
+ struct isl_depth_stencil_hiz_emit_info info = { };
if (params->depth.enabled) {
info.view = ¶ms->depth.view;
+ info.mocs = params->depth.addr.mocs;
} else if (params->stencil.enabled) {
info.view = ¶ms->stencil.view;
+ info.mocs = params->stencil.addr.mocs;
}
if (params->depth.enabled) {
* requested.
*/
if (params->stencil.enabled)
- assert(params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR);
+ assert(params->hiz_op == ISL_AUX_OP_FAST_CLEAR);
+
+ /* From the BDW PRM Volume 2, 3DSTATE_WM_HZ_OP:
+ *
+ * 3DSTATE_MULTISAMPLE packet must be used prior to this packet to change
+ * the Number of Multisamples. This packet must not be used to change
+ * Number of Multisamples in a rendering sequence.
+ *
+ * Since HIZ may be the first thing in a batch buffer, play safe and always
+ * emit 3DSTATE_MULTISAMPLE.
+ */
+ blorp_emit_3dstate_multisample(batch, params);
+
+ /* From the BDW PRM Volume 7, Depth Buffer Clear:
+ *
+ * The clear value must be between the min and max depth values
+ * (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is
+ * D32_FLOAT, then +/-DENORM values are also allowed.
+ *
+ * Set the bounds to match our hardware limits, [0.0, 1.0].
+ */
+ if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) {
+ assert(params->depth.clear_color.f32[0] >= 0.0f);
+ assert(params->depth.clear_color.f32[0] <= 1.0f);
+ blorp_emit_cc_viewport(batch);
+ }
+
+ /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the
+ * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch
+ * even when WM_HZ_OP is active. However, WM thread dispatch is normally
+ * disabled for HiZ ops and it appears that force-enabling it can lead to
+ * GPU hangs on at least Skylake. Since we don't know the current state of
+ * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.
+ */
+ blorp_emit(batch, GENX(3DSTATE_WM), wm);
/* If we can't alter the depth stencil config and multiple layers are
* involved, the HiZ op will fail. This is because the op requires that a
blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp) {
switch (params->hiz_op) {
- case BLORP_HIZ_OP_DEPTH_CLEAR:
+ case ISL_AUX_OP_FAST_CLEAR:
hzp.StencilBufferClearEnable = params->stencil.enabled;
hzp.DepthBufferClearEnable = params->depth.enabled;
hzp.StencilClearValue = params->stencil_ref;
hzp.FullSurfaceDepthandStencilClear = params->full_surface_hiz_op;
break;
- case BLORP_HIZ_OP_DEPTH_RESOLVE:
+ case ISL_AUX_OP_FULL_RESOLVE:
assert(params->full_surface_hiz_op);
hzp.DepthBufferResolveEnable = true;
break;
- case BLORP_HIZ_OP_HIZ_RESOLVE:
+ case ISL_AUX_OP_AMBIGUATE:
assert(params->full_surface_hiz_op);
hzp.HierarchicalDepthBufferResolveEnable = true;
break;
- case BLORP_HIZ_OP_NONE:
+ case ISL_AUX_OP_PARTIAL_RESOLVE:
+ case ISL_AUX_OP_NONE:
unreachable("Invalid HIZ op");
}
}
#endif
+static void
+blorp_update_clear_color(struct blorp_batch *batch,
+ const struct brw_blorp_surface_info *info,
+ enum isl_aux_op op)
+{
+ if (info->clear_color_addr.buffer && op == ISL_AUX_OP_FAST_CLEAR) {
+#if GEN_GEN >= 9
+ for (int i = 0; i < 4; i++) {
+ blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = info->clear_color_addr;
+ sdi.Address.offset += i * 4;
+ sdi.ImmediateData = info->clear_color.u32[i];
+ }
+ }
+#elif GEN_GEN >= 7
+ blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = info->clear_color_addr;
+ sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
+ ISL_CHANNEL_SELECT_GREEN << 22 |
+ ISL_CHANNEL_SELECT_BLUE << 19 |
+ ISL_CHANNEL_SELECT_ALPHA << 16;
+ if (isl_format_has_int_channel(info->view.format)) {
+ for (unsigned i = 0; i < 4; i++) {
+ assert(info->clear_color.u32[i] == 0 ||
+ info->clear_color.u32[i] == 1);
+ }
+ sdi.ImmediateData |= (info->clear_color.u32[0] != 0) << 31;
+ sdi.ImmediateData |= (info->clear_color.u32[1] != 0) << 30;
+ sdi.ImmediateData |= (info->clear_color.u32[2] != 0) << 29;
+ sdi.ImmediateData |= (info->clear_color.u32[3] != 0) << 28;
+ } else {
+ for (unsigned i = 0; i < 4; i++) {
+ assert(info->clear_color.f32[i] == 0.0f ||
+ info->clear_color.f32[i] == 1.0f);
+ }
+ sdi.ImmediateData |= (info->clear_color.f32[0] != 0.0f) << 31;
+ sdi.ImmediateData |= (info->clear_color.f32[1] != 0.0f) << 30;
+ sdi.ImmediateData |= (info->clear_color.f32[2] != 0.0f) << 29;
+ sdi.ImmediateData |= (info->clear_color.f32[3] != 0.0f) << 28;
+ }
+ }
+#endif
+ }
+}
+
/**
* \brief Execute a blit or render pass operation.
*
static void
blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
{
+ if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) {
+ blorp_update_clear_color(batch, ¶ms->dst, params->fast_clear_op);
+ blorp_update_clear_color(batch, ¶ms->depth, params->hiz_op);
+ }
+
#if GEN_GEN >= 8
- if (params->hiz_op != BLORP_HIZ_OP_NONE) {
+ if (params->hiz_op != ISL_AUX_OP_NONE) {
blorp_emit_gen8_hiz_op(batch, params);
return;
}
blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
prim.VertexAccessType = SEQUENTIAL;
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
+#if GEN_GEN >= 7
+ prim.PredicateEnable = batch->flags & BLORP_BATCH_PREDICATE_ENABLE;
+#endif
prim.VertexCountPerInstance = 3;
prim.InstanceCount = params->num_layers;
}