static void
add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image_view *iview,
+ enum isl_aux_usage aux_usage,
struct anv_state state)
{
const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
state.offset + isl_dev->ss.addr_offset,
iview->bo, iview->offset);
+
+ if (aux_usage != ISL_AUX_USAGE_NONE) {
+ uint32_t aux_offset = iview->offset + iview->image->aux_surface.offset;
+
+ /* On gen7 and prior, the bottom 12 bits of the MCS base address are
+ * used to store other information. This should be ok, however, because
+ * surface buffer addresses are always 4K page alinged.
+ */
+ assert((aux_offset & 0xfff) == 0);
+ uint32_t *aux_addr_dw = state.map + isl_dev->ss.aux_addr_offset;
+ aux_offset += *aux_addr_dw & 0xfff;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.aux_addr_offset,
+ iview->bo, aux_offset);
+ }
+}
+
+static enum isl_aux_usage
+fb_attachment_get_aux_usage(struct anv_device *device,
+ struct anv_framebuffer *fb,
+ uint32_t attachment)
+{
+ struct anv_image_view *iview = fb->attachments[attachment];
+
+ if (iview->image->aux_surface.isl.size == 0)
+ return ISL_AUX_USAGE_NONE; /* No aux surface */
+
+ assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
+
+ if (isl_format_supports_lossless_compression(&device->info,
+ iview->isl.format))
+ return ISL_AUX_USAGE_CCS_E;
+
+ return ISL_AUX_USAGE_NONE;
}
/**
assert(iview->vk_format == att->format);
if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ state->attachments[i].aux_usage =
+ fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i);
+
struct isl_view view = iview->isl;
view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
isl_surf_fill_state(isl_dev,
state->attachments[i].color_rt_state.map,
.surf = &iview->image->color_surface.isl,
.view = &view,
+ .aux_surf = &iview->image->aux_surface.isl,
+ .aux_usage = state->attachments[i].aux_usage,
.mocs = cmd_buffer->device->default_mocs);
add_image_view_relocs(cmd_buffer, iview,
+ state->attachments[i].aux_usage,
state->attachments[i].color_rt_state);
+ } else {
+ state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
}
}
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT:
- case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
surface_state = desc->image_view->sampler_surface_state;
assert(surface_state.alloc_size);
- add_image_view_relocs(cmd_buffer, desc->image_view, surface_state);
+ add_image_view_relocs(cmd_buffer, desc->image_view,
+ desc->image_view->image->aux_usage,
+ surface_state);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
surface_state = desc->image_view->storage_surface_state;
assert(surface_state.alloc_size);
- add_image_view_relocs(cmd_buffer, desc->image_view, surface_state);
+ add_image_view_relocs(cmd_buffer, desc->image_view,
+ desc->image_view->image->aux_usage,
+ surface_state);
struct brw_image_param *image_param =
&cmd_buffer->state.push_constants[stage]->images[image++];
struct anv_state surfaces = { 0, }, samplers = { 0, };
VkResult result;
- result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
- if (result != VK_SUCCESS)
- return result;
result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS)
- return result;
+ if (result != VK_SUCCESS) {
+ assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
+ assert(result == VK_SUCCESS);
- struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
+ /* Re-emit state base addresses so we get the new surface state base
+ * address before we start emitting binding tables etc.
+ */
+ genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
- if (push_state.alloc_size) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
- curbe.CURBETotalDataLength = push_state.alloc_size;
- curbe.CURBEDataStartAddress = push_state.offset;
- }
+ result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
+ assert(result == VK_SUCCESS);
}
+ result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
+ assert(result == VK_SUCCESS);
+
uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.BindingTablePointer = surfaces.offset,
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
- if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
+ if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) {
+ /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
+ *
+ * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+ * the only bits that are changed are scoreboard related: Scoreboard
+ * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+ * these scoreboard related states, a MEDIA_STATE_FLUSH is
+ * sufficient."
+ */
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+ }
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
+ if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
+ struct anv_state push_state =
+ anv_cmd_buffer_cs_push_constants(cmd_buffer);
+
+ if (push_state.alloc_size) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
+ curbe.CURBETotalDataLength = push_state.alloc_size;
+ curbe.CURBEDataStartAddress = push_state.offset;
+ }
+ }
+ }
+
cmd_buffer->state.compute_dirty = 0;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
}
+void
+genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer)
+{
+ if (GEN_GEN >= 8)
+ return;
+
+ /* From the Haswell PRM, documentation for 3DSTATE_DEPTH_BUFFER:
+ *
+ * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., any
+ * combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+ * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
+ * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
+ * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
+ * Depth Flush Bit set, followed by another pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
+ * guarantee that the pipeline from WM onwards is already flushed (e.g.,
+ * via a preceding MI_FLUSH)."
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthStallEnable = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthCacheFlushEnable = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthStallEnable = true;
+ }
+}
+
static void
cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
{
/* FIXME: Implement the PMA stall W/A */
/* FIXME: Width and Height are wrong */
+ genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
+
/* Emit 3DSTATE_DEPTH_BUFFER */
if (has_depth) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) {