#include <stdbool.h>
#include "anv_private.h"
+#include "vk_format_info.h"
#include "common/gen_l3_config.h"
#include "genxml/gen_macros.h"
}
}
+static void
+add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_state state,
+ struct anv_bo *bo, uint32_t offset)
+{
+ const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.addr_offset, bo, offset);
+}
+
+static void
+add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image_view *iview,
+ enum isl_aux_usage aux_usage,
+ struct anv_state state)
+{
+ const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.addr_offset,
+ iview->bo, iview->offset);
+
+ if (aux_usage != ISL_AUX_USAGE_NONE) {
+ uint32_t aux_offset = iview->offset + iview->image->aux_surface.offset;
+
+ /* On gen7 and prior, the bottom 12 bits of the MCS base address are
+ * used to store other information. This should be ok, however, because
+ * surface buffer addresses are always 4K page alinged.
+ */
+ assert((aux_offset & 0xfff) == 0);
+ uint32_t *aux_addr_dw = state.map + isl_dev->ss.aux_addr_offset;
+ aux_offset += *aux_addr_dw & 0xfff;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.aux_addr_offset,
+ iview->bo, aux_offset);
+ }
+}
+
+static enum isl_aux_usage
+fb_attachment_get_aux_usage(struct anv_device *device,
+ struct anv_framebuffer *fb,
+ uint32_t attachment)
+{
+ struct anv_image_view *iview = fb->attachments[attachment];
+
+ if (iview->image->aux_surface.isl.size == 0)
+ return ISL_AUX_USAGE_NONE; /* No aux surface */
+
+ assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
+
+ if (isl_format_supports_lossless_compression(&device->info,
+ iview->isl.format))
+ return ISL_AUX_USAGE_CCS_E;
+
+ return ISL_AUX_USAGE_NONE;
+}
+
+/**
+ * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
+ */
+static void
+genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_render_pass *pass,
+ struct anv_framebuffer *framebuffer,
+ const VkClearValue *clear_values)
+{
+ const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+ struct anv_cmd_state *state = &cmd_buffer->state;
+
+ vk_free(&cmd_buffer->pool->alloc, state->attachments);
+
+ if (pass->attachment_count == 0) {
+ state->attachments = NULL;
+ return;
+ }
+
+ state->attachments = vk_alloc(&cmd_buffer->pool->alloc,
+ pass->attachment_count *
+ sizeof(state->attachments[0]),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (state->attachments == NULL) {
+ /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
+ abort();
+ }
+
+ bool need_null_state = false;
+ unsigned num_states = 0;
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ if (vk_format_is_color(pass->attachments[i].format)) {
+ num_states++;
+ } else {
+ /* We need a null state for any depth-stencil-only subpasses.
+ * Importantly, this includes depth/stencil clears so we create one
+ * whenever we have depth or stencil
+ */
+ need_null_state = true;
+ }
+ }
+ num_states += need_null_state;
+
+ const uint32_t ss_stride = align_u32(isl_dev->ss.size, isl_dev->ss.align);
+ state->render_pass_states =
+ anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
+ num_states * ss_stride, isl_dev->ss.align);
+
+ struct anv_state next_state = state->render_pass_states;
+ next_state.alloc_size = isl_dev->ss.size;
+
+ if (need_null_state) {
+ state->null_surface_state = next_state;
+ next_state.offset += ss_stride;
+ next_state.map += ss_stride;
+ }
+
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ if (vk_format_is_color(pass->attachments[i].format)) {
+ state->attachments[i].color_rt_state = next_state;
+ next_state.offset += ss_stride;
+ next_state.map += ss_stride;
+ }
+ }
+ assert(next_state.offset == state->render_pass_states.offset +
+ state->render_pass_states.alloc_size);
+
+ if (framebuffer) {
+ assert(pass->attachment_count == framebuffer->attachment_count);
+
+ if (need_null_state) {
+ struct GENX(RENDER_SURFACE_STATE) null_ss = {
+ .SurfaceType = SURFTYPE_NULL,
+ .SurfaceArray = framebuffer->layers > 0,
+ .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,
+#if GEN_GEN >= 8
+ .TileMode = YMAJOR,
+#else
+ .TiledSurface = true,
+#endif
+ .Width = framebuffer->width - 1,
+ .Height = framebuffer->height - 1,
+ .Depth = framebuffer->layers - 1,
+ .RenderTargetViewExtent = framebuffer->layers - 1,
+ };
+ GENX(RENDER_SURFACE_STATE_pack)(NULL, state->null_surface_state.map,
+ &null_ss);
+ }
+
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ struct anv_render_pass_attachment *att = &pass->attachments[i];
+ VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
+ VkImageAspectFlags clear_aspects = 0;
+
+ if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ /* color attachment */
+ if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
+ }
+ } else {
+ /* depthstencil attachment */
+ if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+ att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ }
+ if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ }
+
+ state->attachments[i].pending_clear_aspects = clear_aspects;
+ if (clear_aspects)
+ state->attachments[i].clear_value = clear_values[i];
+
+ struct anv_image_view *iview = framebuffer->attachments[i];
+ assert(iview->vk_format == att->format);
+
+ if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ state->attachments[i].aux_usage =
+ fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i);
+
+ struct isl_view view = iview->isl;
+ view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
+ isl_surf_fill_state(isl_dev,
+ state->attachments[i].color_rt_state.map,
+ .surf = &iview->image->color_surface.isl,
+ .view = &view,
+ .aux_surf = &iview->image->aux_surface.isl,
+ .aux_usage = state->attachments[i].aux_usage,
+ .mocs = cmd_buffer->device->default_mocs);
+
+ add_image_view_relocs(cmd_buffer, iview,
+ state->attachments[i].aux_usage,
+ state->attachments[i].color_rt_state);
+ } else {
+ state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
+ }
+ }
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state->render_pass_states);
+ }
+}
+
VkResult
genX(BeginCommandBuffer)(
VkCommandBuffer commandBuffer,
if (cmd_buffer->usage_flags &
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
- cmd_buffer->state.framebuffer =
- anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
cmd_buffer->state.pass =
anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
cmd_buffer->state.subpass =
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+ cmd_buffer->state.framebuffer = NULL;
+
+ genX(cmd_buffer_setup_attachments)(cmd_buffer, cmd_buffer->state.pass,
+ NULL, NULL);
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
}
assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+ if (secondary->usage_flags &
+ VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
+ /* If we're continuing a render pass from the primary, we need to
+ * copy the surface states for the current subpass into the storage
+ * we allocated for them in BeginCommandBuffer.
+ */
+ struct anv_bo *ss_bo = &primary->device->surface_state_block_pool.bo;
+ struct anv_state src_state = primary->state.render_pass_states;
+ struct anv_state dst_state = secondary->state.render_pass_states;
+ assert(src_state.alloc_size == dst_state.alloc_size);
+
+ genX(cmd_buffer_gpu_memcpy)(primary, ss_bo, dst_state.offset,
+ ss_bo, src_state.offset,
+ src_state.alloc_size);
+ }
+
anv_cmd_buffer_add_secondary(primary, secondary);
}
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT:
- case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
}
-static void
-add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
- struct anv_state state, struct anv_bo *bo,
- uint32_t offset)
-{
- const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
-
- anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
- state.offset + isl_dev->ss.addr_offset, bo, offset);
-}
-
-static struct anv_state
-alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer,
- struct anv_framebuffer *fb)
-{
- struct anv_state state =
- anv_cmd_buffer_alloc_surface_state(cmd_buffer);
-
- struct GENX(RENDER_SURFACE_STATE) null_ss = {
- .SurfaceType = SURFTYPE_NULL,
- .SurfaceArray = fb->layers > 0,
- .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,
-#if GEN_GEN >= 8
- .TileMode = YMAJOR,
-#else
- .TiledSurface = true,
-#endif
- .Width = fb->width - 1,
- .Height = fb->height - 1,
- .Depth = fb->layers - 1,
- .RenderTargetViewExtent = fb->layers - 1,
- };
-
- GENX(RENDER_SURFACE_STATE_pack)(NULL, state.map, &null_ss);
-
- if (!cmd_buffer->device->info.has_llc)
- anv_state_clflush(state);
-
- return state;
-}
-
-
static VkResult
emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
struct anv_state *bt_state)
{
- struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
struct anv_pipeline *pipeline;
uint32_t bias, state_offset;
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s];
struct anv_state surface_state;
- struct anv_bo *bo;
- uint32_t bo_offset;
if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) {
/* Color attachment binding */
assert(stage == MESA_SHADER_FRAGMENT);
assert(binding->binding == 0);
if (binding->index < subpass->color_count) {
- const struct anv_image_view *iview =
- fb->attachments[subpass->color_attachments[binding->index]];
-
- assert(iview->color_rt_surface_state.alloc_size);
- surface_state = iview->color_rt_surface_state;
- add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state,
- iview->bo, iview->offset);
+ const unsigned att = subpass->color_attachments[binding->index];
+ surface_state = cmd_buffer->state.attachments[att].color_rt_state;
} else {
- /* Null render target */
- struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
- surface_state = alloc_null_surface_state(cmd_buffer, fb);
+ surface_state = cmd_buffer->state.null_surface_state;
}
bt_map[bias + s] = surface_state.offset + state_offset;
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
surface_state = desc->image_view->sampler_surface_state;
assert(surface_state.alloc_size);
- bo = desc->image_view->bo;
- bo_offset = desc->image_view->offset;
+ add_image_view_relocs(cmd_buffer, desc->image_view,
+ desc->image_view->image->aux_usage,
+ surface_state);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
surface_state = desc->image_view->storage_surface_state;
assert(surface_state.alloc_size);
- bo = desc->image_view->bo;
- bo_offset = desc->image_view->offset;
+ add_image_view_relocs(cmd_buffer, desc->image_view,
+ desc->image_view->image->aux_usage,
+ surface_state);
struct brw_image_param *image_param =
&cmd_buffer->state.push_constants[stage]->images[image++];
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
surface_state = desc->buffer_view->surface_state;
assert(surface_state.alloc_size);
- bo = desc->buffer_view->bo;
- bo_offset = desc->buffer_view->offset;
+ add_surface_state_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->bo,
+ desc->buffer_view->offset);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
surface_state = desc->buffer_view->storage_surface_state;
assert(surface_state.alloc_size);
- bo = desc->buffer_view->bo;
- bo_offset = desc->buffer_view->offset;
+ add_surface_state_reloc(cmd_buffer, surface_state,
+ desc->buffer_view->bo,
+ desc->buffer_view->offset);
struct brw_image_param *image_param =
&cmd_buffer->state.push_constants[stage]->images[image++];
}
bt_map[bias + s] = surface_state.offset + state_offset;
- add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
}
assert(image == map->image_count);
struct anv_state surfaces = { 0, }, samplers = { 0, };
VkResult result;
- result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
- if (result != VK_SUCCESS)
- return result;
result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS)
- return result;
+ if (result != VK_SUCCESS) {
+ assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
+ assert(result == VK_SUCCESS);
- struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
+ /* Re-emit state base addresses so we get the new surface state base
+ * address before we start emitting binding tables etc.
+ */
+ genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
- if (push_state.alloc_size) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
- curbe.CURBETotalDataLength = push_state.alloc_size;
- curbe.CURBEDataStartAddress = push_state.offset;
- }
+ result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
+ assert(result == VK_SUCCESS);
}
+ result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
+ assert(result == VK_SUCCESS);
+
uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.BindingTablePointer = surfaces.offset,
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
- if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
+ if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) {
+ /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
+ *
+ * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+ * the only bits that are changed are scoreboard related: Scoreboard
+ * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+ * these scoreboard related states, a MEDIA_STATE_FLUSH is
+ * sufficient."
+ */
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+ }
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
+ if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
+ struct anv_state push_state =
+ anv_cmd_buffer_cs_push_constants(cmd_buffer);
+
+ if (push_state.alloc_size) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
+ curbe.CURBETotalDataLength = push_state.alloc_size;
+ curbe.CURBEDataStartAddress = push_state.offset;
+ }
+ }
+ }
+
cmd_buffer->state.compute_dirty = 0;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
}
+void
+genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer)
+{
+ if (GEN_GEN >= 8)
+ return;
+
+ /* From the Haswell PRM, documentation for 3DSTATE_DEPTH_BUFFER:
+ *
+ * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., any
+ * combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+ * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
+ * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
+ * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
+ * Depth Flush Bit set, followed by another pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
+ * guarantee that the pipeline from WM onwards is already flushed (e.g.,
+ * via a preceding MI_FLUSH)."
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthStallEnable = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthCacheFlushEnable = true;
+ }
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthStallEnable = true;
+ }
+}
+
static void
cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
{
/* FIXME: Implement the PMA stall W/A */
/* FIXME: Width and Height are wrong */
+ genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
+
/* Emit 3DSTATE_DEPTH_BUFFER */
if (has_depth) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
if (has_hiz) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hdb) {
hdb.HierarchicalDepthBufferObjectControlState = GENX(MOCS);
- hdb.SurfacePitch = image->hiz_surface.isl.row_pitch - 1;
+ hdb.SurfacePitch = image->aux_surface.isl.row_pitch - 1;
hdb.SurfaceBaseAddress = (struct anv_address) {
.bo = image->bo,
- .offset = image->offset + image->hiz_surface.offset,
+ .offset = image->offset + image->aux_surface.offset,
};
#if GEN_GEN >= 8
/* From the SKL PRM Vol2a:
* - SURFTYPE_3D: distance in rows between R - slices
*/
hdb.SurfaceQPitch =
- image->hiz_surface.isl.dim == ISL_SURF_DIM_1D ?
- isl_surf_get_array_pitch_el(&image->hiz_surface.isl) >> 2 :
- isl_surf_get_array_pitch_el_rows(&image->hiz_surface.isl) >> 2;
+ image->aux_surface.isl.dim == ISL_SURF_DIM_1D ?
+ isl_surf_get_array_pitch_el(&image->aux_surface.isl) >> 2 :
+ isl_surf_get_array_pitch_el_rows(&image->aux_surface.isl) >> 2;
#endif
}
} else {
cmd_buffer->state.framebuffer = framebuffer;
cmd_buffer->state.pass = pass;
cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
- anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
+ genX(cmd_buffer_setup_attachments)(cmd_buffer, pass, framebuffer,
+ pRenderPassBegin->pClearValues);
genX(flush_pipeline_select_3d)(cmd_buffer);