* these fields. However, since we will be growing the BO's live, we
* just set them all to the maximum.
*/
- sba.GeneralStateBufferSize = 0xfffff;
+ sba.GeneralStateBufferSize = 0xfffff;
+ sba.IndirectObjectBufferSize = 0xfffff;
+ if (device->physical->use_softpin) {
+ /* With softpin, we use fixed addresses so we actually know how big
+ * our base addresses are.
+ */
+ sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
+ sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
+ } else {
+ sba.DynamicStateBufferSize = 0xfffff;
+ sba.InstructionBufferSize = 0xfffff;
+ }
sba.GeneralStateBufferSizeModifyEnable = true;
- sba.DynamicStateBufferSize = 0xfffff;
- sba.DynamicStateBufferSizeModifyEnable = true;
- sba.IndirectObjectBufferSize = 0xfffff;
sba.IndirectObjectBufferSizeModifyEnable = true;
- sba.InstructionBufferSize = 0xfffff;
+ sba.DynamicStateBufferSizeModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
# else
/* On gen7, we have upper bounds instead. According to the docs,
att_state->aux_usage =
anv_layout_to_aux_usage(&device->info, iview->image,
VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
/* If we don't have aux, then we should have returned early in the layer
*/
if (cmd_state->pass->attachments[att].first_subpass_layout ==
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
- anv_perf_warn(device->instance, iview->image,
+ anv_perf_warn(device, iview->image,
"Not temporarily enabling CCS_E.");
}
} else {
if (att_state->fast_clear &&
(iview->planes[0].isl.base_level > 0 ||
iview->planes[0].isl.base_array_layer > 0)) {
- anv_perf_warn(device->instance, iview->image,
+ anv_perf_warn(device, iview->image,
"Rendering with multi-lod or multi-layer framebuffer "
"with LOAD_OP_LOAD and baseMipLevel > 0 or "
"baseArrayLayer > 0. Not fast clearing.");
att_state->fast_clear = false;
} else if (att_state->fast_clear && cmd_state->framebuffer->layers > 1) {
- anv_perf_warn(device->instance, iview->image,
+ anv_perf_warn(device, iview->image,
"Rendering to a multi-layer framebuffer with "
"LOAD_OP_CLEAR. Only fast-clearing the first slice");
}
att_state->aux_usage = ISL_AUX_USAGE_NONE;
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
+ /* This is unused for depth/stencil but valgrind complains if it
+ * isn't initialized
+ */
+ att_state->clear_color_is_zero_one = false;
+
if (GEN_GEN == 7) {
/* We don't do any HiZ or depth fast-clears on gen7 yet */
att_state->fast_clear = false;
const enum isl_aux_usage first_subpass_aux_usage =
anv_layout_to_aux_usage(&device->info, iview->image,
VK_IMAGE_ASPECT_DEPTH_BIT,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
pass_att->first_subpass_layout);
if (!blorp_can_hiz_clear_depth(&device->info,
&iview->image->planes[0].surface.isl,
VkImageLayout initial_layout,
VkImageLayout final_layout)
{
- const bool hiz_enabled = ISL_AUX_USAGE_HIZ ==
- anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
- VK_IMAGE_ASPECT_DEPTH_BIT, initial_layout);
- const bool enable_hiz = ISL_AUX_USAGE_HIZ ==
- anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
- VK_IMAGE_ASPECT_DEPTH_BIT, final_layout);
-
- enum isl_aux_op hiz_op;
- if (hiz_enabled && !enable_hiz) {
- hiz_op = ISL_AUX_OP_FULL_RESOLVE;
- } else if (!hiz_enabled && enable_hiz) {
- hiz_op = ISL_AUX_OP_AMBIGUATE;
- } else {
- assert(hiz_enabled == enable_hiz);
- /* If the same buffer will be used, no resolves are necessary. */
- hiz_op = ISL_AUX_OP_NONE;
- }
+ uint32_t depth_plane =
+ anv_image_aspect_to_plane(image->aspects, VK_IMAGE_ASPECT_DEPTH_BIT);
+ if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE)
+ return;
+
+ const enum isl_aux_state initial_state =
+ anv_layout_to_aux_state(&cmd_buffer->device->info, image,
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ initial_layout);
+ const enum isl_aux_state final_state =
+ anv_layout_to_aux_state(&cmd_buffer->device->info, image,
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ final_layout);
+
+ const bool initial_depth_valid =
+ isl_aux_state_has_valid_primary(initial_state);
+ const bool initial_hiz_valid =
+ isl_aux_state_has_valid_aux(initial_state);
+ const bool final_needs_depth =
+ isl_aux_state_has_valid_primary(final_state);
+ const bool final_needs_hiz =
+ isl_aux_state_has_valid_aux(final_state);
+
+ /* Getting into the pass-through state for Depth is tricky and involves
+ * both a resolve and an ambiguate. We don't handle that state right now
+ * as anv_layout_to_aux_state never returns it.
+ */
+ assert(final_state != ISL_AUX_STATE_PASS_THROUGH);
- if (hiz_op != ISL_AUX_OP_NONE)
+ if (final_needs_depth && !initial_depth_valid) {
+ assert(initial_hiz_valid);
+ anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
+ 0, 0, 1, ISL_AUX_OP_FULL_RESOLVE);
+ } else if (final_needs_hiz && !initial_hiz_valid) {
+ assert(initial_depth_valid);
anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
- 0, 0, 1, hiz_op);
+ 0, 0, 1, ISL_AUX_OP_AMBIGUATE);
+ }
}
static inline bool
* to do a partial resolve on a CCS_D surface.
*/
if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE &&
- image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
+ image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_D)
resolve_op = ISL_AUX_OP_FULL_RESOLVE;
anv_image_ccs_op(cmd_buffer, image, format, aspect, level,
}
}
+#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
+
+#if GEN_GEN == 12
+static void
+anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
+ const struct anv_image *image,
+ VkImageAspectFlagBits aspect,
+ uint32_t base_level, uint32_t level_count,
+ uint32_t base_layer, uint32_t layer_count)
+{
+ uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+
+ uint64_t base_address =
+ anv_address_physical(image->planes[plane].address);
+
+ const struct isl_surf *isl_surf = &image->planes[plane].surface.isl;
+ uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf);
+
+ /* We're about to live-update the AUX-TT. We really don't want anyone else
+ * trying to read it while we're doing this. We could probably get away
+ * with not having this stall in some cases if we were really careful but
+ * it's better to play it safe. Full stall the GPU.
+ */
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+ struct gen_mi_builder b;
+ gen_mi_builder_init(&b, &cmd_buffer->batch);
+
+ for (uint32_t a = 0; a < layer_count; a++) {
+ const uint32_t layer = base_layer + a;
+
+ uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0;
+ for (uint32_t l = 0; l < level_count; l++) {
+ const uint32_t level = base_level + l;
+
+ uint32_t logical_array_layer, logical_z_offset_px;
+ if (image->type == VK_IMAGE_TYPE_3D) {
+ logical_array_layer = 0;
+
+ /* If the given miplevel does not have this layer, then any higher
+ * miplevels won't either because miplevels only get smaller the
+ * higher the LOD.
+ */
+ assert(layer < image->extent.depth);
+ if (layer >= anv_minify(image->extent.depth, level))
+ break;
+ logical_z_offset_px = layer;
+ } else {
+ assert(layer < image->array_size);
+ logical_array_layer = layer;
+ logical_z_offset_px = 0;
+ }
+
+ uint32_t slice_start_offset_B, slice_end_offset_B;
+ isl_surf_get_image_range_B_tile(isl_surf, level,
+ logical_array_layer,
+ logical_z_offset_px,
+ &slice_start_offset_B,
+ &slice_end_offset_B);
+
+ start_offset_B = MIN2(start_offset_B, slice_start_offset_B);
+ end_offset_B = MAX2(end_offset_B, slice_end_offset_B);
+ }
+
+ /* Aux operates 64K at a time */
+ start_offset_B = align_down_u64(start_offset_B, 64 * 1024);
+ end_offset_B = align_u64(end_offset_B, 64 * 1024);
+
+ for (uint64_t offset = start_offset_B;
+ offset < end_offset_B; offset += 64 * 1024) {
+ uint64_t address = base_address + offset;
+
+ uint64_t aux_entry_addr64, *aux_entry_map;
+ aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
+ address, &aux_entry_addr64);
+
+ assert(cmd_buffer->device->physical->use_softpin);
+ struct anv_address aux_entry_address = {
+ .bo = NULL,
+ .offset = aux_entry_addr64,
+ };
+
+ const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
+ uint64_t new_aux_entry =
+ (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits;
+
+ if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
+ new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;
+
+ gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
+ gen_mi_imm(new_aux_entry));
+ }
+ }
+
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
+}
+#endif /* GEN_GEN == 12 */
+
/**
* @brief Transitions a color buffer from one layout to another.
*
VkImageLayout initial_layout,
VkImageLayout final_layout)
{
- const struct gen_device_info *devinfo = &cmd_buffer->device->info;
+ struct anv_device *device = cmd_buffer->device;
+ const struct gen_device_info *devinfo = &device->info;
/* Validate the inputs. */
assert(cmd_buffer);
assert(image && image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
+#if GEN_GEN == 12
+ if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
+ anv_image_init_aux_tt(cmd_buffer, image, aspect,
+ base_level, level_count,
+ base_layer, layer_count);
+ }
+#else
+ assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map));
+#endif
+
/* A subresource in the undefined layout may have been aliased and
* populated with any arrangement of bits. Therefore, we must initialize
* the related aux buffer and clear buffer entry with desirable values.
}
} else {
if (image->samples == 4 || image->samples == 16) {
- anv_perf_warn(cmd_buffer->device->instance, image,
+ anv_perf_warn(cmd_buffer->device, image,
"Doing a potentially unnecessary fast-clear to "
"define an MCS buffer.");
}
}
const enum isl_aux_usage initial_aux_usage =
- anv_layout_to_aux_usage(devinfo, image, aspect, initial_layout);
+ anv_layout_to_aux_usage(devinfo, image, aspect, 0, initial_layout);
const enum isl_aux_usage final_aux_usage =
- anv_layout_to_aux_usage(devinfo, image, aspect, final_layout);
+ anv_layout_to_aux_usage(devinfo, image, aspect, 0, final_layout);
/* The current code assumes that there is no mixing of CCS_E and CCS_D.
* We can handle transitions between CCS_D/E to and from NONE. What we
*/
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ /* Re-emit the aux table register in every command buffer. This way we're
+ * ensured that we have the table even if this command buffer doesn't
+ * initialize any images.
+ */
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT;
+
/* We send an "Indirect State Pointers Disable" packet at
* EndCommandBuffer, so all push contant packets are ignored during a
* context restore. Documentation says after that command, we need to
enum isl_aux_usage aux_usage =
anv_layout_to_aux_usage(&cmd_buffer->device->info, iview->image,
- VK_IMAGE_ASPECT_DEPTH_BIT, layout);
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ layout);
cmd_buffer->state.hiz_enabled = aux_usage == ISL_AUX_USAGE_HIZ;
}
uint32_t l3cr;
anv_pack_struct(&l3cr, L3_ALLOCATION_REG,
-#if GEN_GEN < 12
+#if GEN_GEN < 11
.SLMEnable = has_slm,
#endif
#if GEN_GEN == 11
bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
}
+ /* GEN:BUG:1409226450, Wait for EU to be idle before pipe control which
+ * invalidates the instruction cache
+ */
+ if (GEN_GEN == 12 && (bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT))
+ bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+
if ((GEN_GEN >= 8 && GEN_GEN <= 9) &&
(bits & ANV_PIPE_CS_STALL_BIT) &&
(bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) {
}
}
+#if GEN_GEN == 12
+ if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) &&
+ cmd_buffer->device->info.has_aux_map) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+ lri.RegisterOffset = GENX(GFX_CCS_AUX_INV_num);
+ lri.DataDWord = 1;
+ }
+ }
+#endif
+
bits &= ~ANV_PIPE_INVALIDATE_BITS;
}
cmd_buffer->state.push_constants_dirty &= ~flushed;
}
-#if GEN_GEN >= 12
-void
-genX(cmd_buffer_aux_map_state)(struct anv_cmd_buffer *cmd_buffer)
-{
- void *aux_map_ctx = cmd_buffer->device->aux_map_ctx;
- if (!aux_map_ctx)
- return;
- uint32_t aux_map_state_num = gen_aux_map_get_state_num(aux_map_ctx);
- if (cmd_buffer->state.last_aux_map_state != aux_map_state_num) {
- /* If the aux-map state number increased, then we need to rewrite the
- * register. Rewriting the register is used to both set the aux-map
- * translation table address, and also to invalidate any previously
- * cached translations.
- */
- uint64_t base_addr = gen_aux_map_get_base(aux_map_ctx);
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
- lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num);
- lri.DataDWord = base_addr & 0xffffffff;
- }
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
- lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num) + 4;
- lri.DataDWord = base_addr >> 32;
- }
- cmd_buffer->state.last_aux_map_state = aux_map_state_num;
- }
-}
-#endif
-
void
genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
{
genX(flush_pipeline_select_3d)(cmd_buffer);
-#if GEN_GEN >= 12
- genX(cmd_buffer_aux_map_state)(cmd_buffer);
-#endif
-
if (vb_emit) {
const uint32_t num_buffers = __builtin_popcount(vb_emit);
const uint32_t num_dwords = 1 + num_buffers * 4;
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
-#if GEN_GEN >= 12
- genX(cmd_buffer_aux_map_state)(cmd_buffer);
-#endif
-
if (cmd_buffer->state.compute.pipeline_dirty) {
/* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
*
const char *function)
{
if (device->physical->cmd_parser_version < required_version) {
- return vk_errorf(device->instance, device->instance,
+ return vk_errorf(device, device->physical,
VK_ERROR_FEATURE_NOT_PRESENT,
"cmd parser version %d is required for %s",
required_version, function);
(att_state->fast_clear && !att_state->clear_color_is_zero_one) ||
att_state->input_aux_usage != att_state->aux_usage;
- VkImageLayout target_layout, target_stencil_layout;
+ VkImageLayout target_layout;
if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV &&
!input_needs_resolve) {
/* Layout transitions before the final only help to enable sampling
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else {
target_layout = subpass->attachments[i].layout;
- target_stencil_layout = subpass->attachments[i].stencil_layout;
}
+ VkImageLayout target_stencil_layout =
+ subpass->attachments[i].stencil_layout;
+
uint32_t base_layer, layer_count;
if (image->type == VK_IMAGE_TYPE_3D) {
base_layer = 0;
att_state->current_layout, target_layout);
att_state->aux_usage =
anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
- VK_IMAGE_ASPECT_DEPTH_BIT, target_layout);
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ target_layout);
}
if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
if (GEN_GEN < 10 &&
(att_state->pending_load_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) &&
- image->planes[0].aux_surface.isl.size_B > 0 &&
+ image->planes[0].aux_usage != ISL_AUX_USAGE_NONE &&
iview->planes[0].isl.base_level == 0 &&
iview->planes[0].isl.base_array_layer == 0) {
if (att_state->aux_usage != ISL_AUX_USAGE_NONE) {
* is set due to new association of BTI, PS Scoreboard Stall bit must
* be set in this packet."
*/
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.RenderTargetCacheFlushEnable = true;
- pc.StallAtPixelScoreboard = true;
-#if GEN_GEN >= 12
- pc.TileCacheFlushEnable = true;
-#endif
- }
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
+ ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
#endif
}
*/
transition_depth_buffer(cmd_buffer, src_iview->image,
src_state->current_layout,
- VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
src_state->aux_usage =
anv_layout_to_aux_usage(&cmd_buffer->device->info, src_iview->image,
VK_IMAGE_ASPECT_DEPTH_BIT,
- VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
- src_state->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+ src_state->current_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
/* MSAA resolves write to the resolve attachment as if it were any
* other transfer op. Transition the resolve attachment accordingly.
dst_state->aux_usage =
anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_iview->image,
VK_IMAGE_ASPECT_DEPTH_BIT,
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
dst_state->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
if ((src_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
- src_state->current_stencil_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ src_state->current_stencil_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
dst_state->current_stencil_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
enum isl_aux_usage src_aux_usage = ISL_AUX_USAGE_NONE;
* SRGB view & a UNORM image).
*/
if (fast_clear_type != ANV_FAST_CLEAR_NONE) {
- anv_perf_warn(cmd_buffer->device->instance, iview,
+ anv_perf_warn(cmd_buffer->device, iview,
"Doing a partial resolve to get rid of clear color at the "
"end of a renderpass due to an image/view format mismatch");