+static void
+add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_state state,
+ struct anv_bo *bo, uint32_t offset)
+{
+ const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.addr_offset, bo, offset);
+}
+
+static void
+add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image_view *iview,
+ enum isl_aux_usage aux_usage,
+ struct anv_state state)
+{
+ const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.addr_offset,
+ iview->bo, iview->offset);
+
+ if (aux_usage != ISL_AUX_USAGE_NONE) {
+ uint32_t aux_offset = iview->offset + iview->image->aux_surface.offset;
+
+ /* On gen7 and prior, the bottom 12 bits of the MCS base address are
+ * used to store other information. This should be ok, however, because
+ * surface buffer addresses are always 4K page alinged.
+ */
+ assert((aux_offset & 0xfff) == 0);
+ uint32_t *aux_addr_dw = state.map + isl_dev->ss.aux_addr_offset;
+ aux_offset += *aux_addr_dw & 0xfff;
+
+ anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+ state.offset + isl_dev->ss.aux_addr_offset,
+ iview->bo, aux_offset);
+ }
+}
+
+static bool
+color_is_zero_one(VkClearColorValue value, enum isl_format format)
+{
+ if (isl_format_has_int_channel(format)) {
+ for (unsigned i = 0; i < 4; i++) {
+ if (value.int32[i] != 0 && value.int32[i] != 1)
+ return false;
+ }
+ } else {
+ for (unsigned i = 0; i < 4; i++) {
+ if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void
+color_attachment_compute_aux_usage(struct anv_device *device,
+ struct anv_attachment_state *att_state,
+ struct anv_image_view *iview,
+ VkRect2D render_area,
+ union isl_color_value *fast_clear_color)
+{
+ if (iview->image->aux_surface.isl.size == 0) {
+ att_state->aux_usage = ISL_AUX_USAGE_NONE;
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ att_state->fast_clear = false;
+ return;
+ }
+
+ assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
+
+ att_state->clear_color_is_zero_one =
+ color_is_zero_one(att_state->clear_value.color, iview->isl.format);
+
+ if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ /* Start off assuming fast clears are possible */
+ att_state->fast_clear = true;
+
+ /* Potentially, we could do partial fast-clears but doing so has crazy
+ * alignment restrictions. It's easier to just restrict to full size
+ * fast clears for now.
+ */
+ if (render_area.offset.x != 0 ||
+ render_area.offset.y != 0 ||
+ render_area.extent.width != iview->extent.width ||
+ render_area.extent.height != iview->extent.height)
+ att_state->fast_clear = false;
+
+ if (GEN_GEN <= 7) {
+ /* On gen7, we can't do multi-LOD or multi-layer fast-clears. We
+ * technically can, but it comes with crazy restrictions that we
+ * don't want to deal with now.
+ */
+ if (iview->isl.base_level > 0 ||
+ iview->isl.base_array_layer > 0 ||
+ iview->isl.array_len > 1)
+ att_state->fast_clear = false;
+ }
+
+ /* On Broadwell and earlier, we can only handle 0/1 clear colors */
+ if (GEN_GEN <= 8 && !att_state->clear_color_is_zero_one)
+ att_state->fast_clear = false;
+
+ if (att_state->fast_clear) {
+ memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
+ sizeof(fast_clear_color->u32));
+ }
+ } else {
+ att_state->fast_clear = false;
+ }
+
+ if (isl_format_supports_lossless_compression(&device->info,
+ iview->isl.format)) {
+ att_state->aux_usage = ISL_AUX_USAGE_CCS_E;
+ att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
+ } else if (att_state->fast_clear) {
+ att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
+ if (GEN_GEN >= 9) {
+ /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
+ *
+ * "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
+ * setting is only allowed if Surface Format supported for Fast
+ * Clear. In addition, if the surface is bound to the sampling
+ * engine, Surface Format must be supported for Render Target
+ * Compression for surfaces bound to the sampling engine."
+ *
+ * In other words, we can't sample from a fast-cleared image if it
+ * doesn't also support color compression.
+ */
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ } else if (GEN_GEN == 8) {
+ /* Broadwell can sample from fast-cleared images */
+ att_state->input_aux_usage = ISL_AUX_USAGE_CCS_D;
+ } else {
+ /* Ivy Bridge and Haswell cannot */
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ }
+ } else {
+ att_state->aux_usage = ISL_AUX_USAGE_NONE;
+ att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ }
+}
+
+static bool
+need_input_attachment_state(const struct anv_render_pass_attachment *att)
+{
+ if (!(att->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+ return false;
+
+ /* We only allocate input attachment states for color and depth surfaces.
+ * Stencil doesn't allow compression so we can just use the texture surface
+ * state from the view
+ */
+ return vk_format_is_color(att->format) || vk_format_has_depth(att->format);
+}
+
+/**
+ * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
+ */
+static void
+genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_render_pass *pass,
+ const VkRenderPassBeginInfo *begin)
+{
+ const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+ struct anv_cmd_state *state = &cmd_buffer->state;
+
+ vk_free(&cmd_buffer->pool->alloc, state->attachments);
+
+ if (pass->attachment_count == 0) {
+ state->attachments = NULL;
+ return;
+ }
+
+ state->attachments = vk_alloc(&cmd_buffer->pool->alloc,
+ pass->attachment_count *
+ sizeof(state->attachments[0]),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (state->attachments == NULL) {
+ /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
+ abort();
+ }
+
+ bool need_null_state = false;
+ unsigned num_states = 0;
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ if (vk_format_is_color(pass->attachments[i].format)) {
+ num_states++;
+ } else {
+ /* We need a null state for any depth-stencil-only subpasses.
+ * Importantly, this includes depth/stencil clears so we create one
+ * whenever we have depth or stencil
+ */
+ need_null_state = true;
+ }
+
+ if (need_input_attachment_state(&pass->attachments[i]))
+ num_states++;
+ }
+ num_states += need_null_state;
+
+ const uint32_t ss_stride = align_u32(isl_dev->ss.size, isl_dev->ss.align);
+ state->render_pass_states =
+ anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
+ num_states * ss_stride, isl_dev->ss.align);
+
+ struct anv_state next_state = state->render_pass_states;
+ next_state.alloc_size = isl_dev->ss.size;
+
+ if (need_null_state) {
+ state->null_surface_state = next_state;
+ next_state.offset += ss_stride;
+ next_state.map += ss_stride;
+ }
+
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ if (vk_format_is_color(pass->attachments[i].format)) {
+ state->attachments[i].color_rt_state = next_state;
+ next_state.offset += ss_stride;
+ next_state.map += ss_stride;
+ }
+
+ if (need_input_attachment_state(&pass->attachments[i])) {
+ state->attachments[i].input_att_state = next_state;
+ next_state.offset += ss_stride;
+ next_state.map += ss_stride;
+ }
+ }
+ assert(next_state.offset == state->render_pass_states.offset +
+ state->render_pass_states.alloc_size);
+
+ if (begin) {
+ ANV_FROM_HANDLE(anv_framebuffer, framebuffer, begin->framebuffer);
+ assert(pass->attachment_count == framebuffer->attachment_count);
+
+ if (need_null_state) {
+ struct GENX(RENDER_SURFACE_STATE) null_ss = {
+ .SurfaceType = SURFTYPE_NULL,
+ .SurfaceArray = framebuffer->layers > 0,
+ .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,
+#if GEN_GEN >= 8
+ .TileMode = YMAJOR,
+#else
+ .TiledSurface = true,
+#endif
+ .Width = framebuffer->width - 1,
+ .Height = framebuffer->height - 1,
+ .Depth = framebuffer->layers - 1,
+ .RenderTargetViewExtent = framebuffer->layers - 1,
+ };
+ GENX(RENDER_SURFACE_STATE_pack)(NULL, state->null_surface_state.map,
+ &null_ss);
+ }
+
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ struct anv_render_pass_attachment *att = &pass->attachments[i];
+ VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
+ VkImageAspectFlags clear_aspects = 0;
+
+ if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ /* color attachment */
+ if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
+ }
+ } else {
+ /* depthstencil attachment */
+ if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+ att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ }
+ if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ }
+
+ state->attachments[i].pending_clear_aspects = clear_aspects;
+ if (clear_aspects)
+ state->attachments[i].clear_value = begin->pClearValues[i];
+
+ struct anv_image_view *iview = framebuffer->attachments[i];
+ assert(iview->vk_format == att->format);
+
+ union isl_color_value clear_color = { .u32 = { 0, } };
+ if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ color_attachment_compute_aux_usage(cmd_buffer->device,
+ &state->attachments[i],
+ iview, begin->renderArea,
+ &clear_color);
+
+ struct isl_view view = iview->isl;
+ view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
+ isl_surf_fill_state(isl_dev,
+ state->attachments[i].color_rt_state.map,
+ .surf = &iview->image->color_surface.isl,
+ .view = &view,
+ .aux_surf = &iview->image->aux_surface.isl,
+ .aux_usage = state->attachments[i].aux_usage,
+ .clear_color = clear_color,
+ .mocs = cmd_buffer->device->default_mocs);
+
+ add_image_view_relocs(cmd_buffer, iview,
+ state->attachments[i].aux_usage,
+ state->attachments[i].color_rt_state);
+ } else {
+ state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
+ state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
+ }
+
+ if (need_input_attachment_state(&pass->attachments[i])) {
+ const struct isl_surf *surf;
+ if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ surf = &iview->image->color_surface.isl;
+ } else {
+ surf = &iview->image->depth_surface.isl;
+ }
+
+ struct isl_view view = iview->isl;
+ view.usage |= ISL_SURF_USAGE_TEXTURE_BIT;
+ isl_surf_fill_state(isl_dev,
+ state->attachments[i].input_att_state.map,
+ .surf = surf,
+ .view = &view,
+ .aux_surf = &iview->image->aux_surface.isl,
+ .aux_usage = state->attachments[i].input_aux_usage,
+ .clear_color = clear_color,
+ .mocs = cmd_buffer->device->default_mocs);
+
+ add_image_view_relocs(cmd_buffer, iview,
+ state->attachments[i].input_aux_usage,
+ state->attachments[i].input_att_state);
+ }
+ }
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state->render_pass_states);
+ }
+}
+
+VkResult
+genX(BeginCommandBuffer)(
+ VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo* pBeginInfo)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ /* If this is the first vkBeginCommandBuffer, we must *initialize* the
+ * command buffer's state. Otherwise, we must *reset* its state. In both
+ * cases we reset it.
+ *
+ * From the Vulkan 1.0 spec:
+ *
+ * If a command buffer is in the executable state and the command buffer
+ * was allocated from a command pool with the
+ * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then
+ * vkBeginCommandBuffer implicitly resets the command buffer, behaving
+ * as if vkResetCommandBuffer had been called with
+ * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts
+ * the command buffer in the recording state.
+ */
+ anv_cmd_buffer_reset(cmd_buffer);
+
+ cmd_buffer->usage_flags = pBeginInfo->flags;
+
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ||
+ !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT));
+
+ genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
+
+ if (cmd_buffer->usage_flags &
+ VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
+ cmd_buffer->state.pass =
+ anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
+ cmd_buffer->state.subpass =
+ &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+ cmd_buffer->state.framebuffer = NULL;
+
+ genX(cmd_buffer_setup_attachments)(cmd_buffer, cmd_buffer->state.pass,
+ NULL);
+
+ cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+genX(EndCommandBuffer)(
+ VkCommandBuffer commandBuffer)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+ anv_cmd_buffer_end_batch_buffer(cmd_buffer);
+
+ return VK_SUCCESS;
+}
+
+void
+genX(CmdExecuteCommands)(
+ VkCommandBuffer commandBuffer,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer* pCmdBuffers)
+{
+ ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer);
+
+ assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
+
+ assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+
+ if (secondary->usage_flags &
+ VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
+ /* If we're continuing a render pass from the primary, we need to
+ * copy the surface states for the current subpass into the storage
+ * we allocated for them in BeginCommandBuffer.
+ */
+ struct anv_bo *ss_bo = &primary->device->surface_state_block_pool.bo;
+ struct anv_state src_state = primary->state.render_pass_states;
+ struct anv_state dst_state = secondary->state.render_pass_states;
+ assert(src_state.alloc_size == dst_state.alloc_size);
+
+ genX(cmd_buffer_gpu_memcpy)(primary, ss_bo, dst_state.offset,
+ ss_bo, src_state.offset,
+ src_state.alloc_size);
+ }
+
+ anv_cmd_buffer_add_secondary(primary, secondary);
+ }
+
+ /* Each of the secondary command buffers will use its own state base
+ * address. We need to re-emit state base address for the primary after
+ * all of the secondaries are done.
+ *
+ * TODO: Maybe we want to make this a dirty bit to avoid extra state base
+ * address calls?
+ */
+ genX(cmd_buffer_emit_state_base_address)(primary);
+}
+