cmd_buffer->gsvs_ring_size_needed = 0;
cmd_buffer->tess_rings_needed = false;
cmd_buffer->gds_needed = false;
+ cmd_buffer->gds_oa_needed = false;
cmd_buffer->sample_positions_needed = false;
if (cmd_buffer->upload.upload_bo)
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
+static void
+radv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer)
+{
+ if (!cmd_buffer->device->pbb_allowed)
+ return;
+
+ struct radv_binning_settings settings =
+ radv_get_binning_settings(cmd_buffer->device->physical_device);
+ bool break_for_new_ps =
+ (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] !=
+ cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) &&
+ (settings.context_states_per_bin > 1 ||
+ settings.persistent_states_per_bin > 1);
+ bool break_for_new_cb_target_mask =
+ (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask !=
+ cmd_buffer->state.pipeline->graphics.cb_target_mask) &&
+ settings.context_states_per_bin > 1;
+
+ if (!break_for_new_ps && !break_for_new_cb_target_mask)
+ return;
+
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
+}
+
static void
radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
+ radv_emit_batch_break_on_new_ps(cmd_buffer);
+
for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
if (!pipeline->shaders[i])
continue;
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* OOB_SELECT chooses the out-of-bounds check:
+ * - 1: index >= NUM_RECORDS (Structured)
+ * - 3: offset >= NUM_RECORDS (Raw)
+ */
+ int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW;
+
desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(1) |
+ S_008F0C_OOB_SELECT(oob_select) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(3) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
desc[3] |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx,
begin_subpass);
- radv_handle_image_transition(cmd_buffer,
- view->image,
- cmd_buffer->state.attachments[idx].current_layout,
- cmd_buffer->state.attachments[idx].current_in_render_loop,
- att.layout, att.in_render_loop,
- 0, 0, &range, sample_locs);
+ /* Determine if the subpass uses separate depth/stencil layouts. */
+ bool uses_separate_depth_stencil_layouts = false;
+ if ((cmd_buffer->state.attachments[idx].current_layout !=
+ cmd_buffer->state.attachments[idx].current_stencil_layout) ||
+ (att.layout != att.stencil_layout)) {
+ uses_separate_depth_stencil_layouts = true;
+ }
+
+ /* For separate layouts, perform depth and stencil transitions
+ * separately.
+ */
+ if (uses_separate_depth_stencil_layouts &&
+ (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT))) {
+ /* Depth-only transitions. */
+ range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+ radv_handle_image_transition(cmd_buffer,
+ view->image,
+ cmd_buffer->state.attachments[idx].current_layout,
+ cmd_buffer->state.attachments[idx].current_in_render_loop,
+ att.layout, att.in_render_loop,
+ 0, 0, &range, sample_locs);
+
+ /* Stencil-only transitions. */
+ range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
+ radv_handle_image_transition(cmd_buffer,
+ view->image,
+ cmd_buffer->state.attachments[idx].current_stencil_layout,
+ cmd_buffer->state.attachments[idx].current_in_render_loop,
+ att.stencil_layout, att.in_render_loop,
+ 0, 0, &range, sample_locs);
+ } else {
+ radv_handle_image_transition(cmd_buffer,
+ view->image,
+ cmd_buffer->state.attachments[idx].current_layout,
+ cmd_buffer->state.attachments[idx].current_in_render_loop,
+ att.layout, att.in_render_loop,
+ 0, 0, &range, sample_locs);
+ }
cmd_buffer->state.attachments[idx].current_layout = att.layout;
+ cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout;
cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop;
const VkRenderPassBeginInfo *info)
{
struct radv_cmd_state *state = &cmd_buffer->state;
- const struct VkRenderPassAttachmentBeginInfoKHR *attachment_info = NULL;
+ const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL;
if (info) {
attachment_info = vk_find_struct_const(info->pNext,
- RENDER_PASS_ATTACHMENT_BEGIN_INFO_KHR);
+ RENDER_PASS_ATTACHMENT_BEGIN_INFO);
}
}
state->attachments[i].current_layout = att->initial_layout;
+ state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
state->attachments[i].sample_location.count = 0;
struct radv_image_view *iview;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(3) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
continue;
VkImageLayout layout = state->pass->attachments[a].final_layout;
- struct radv_subpass_attachment att = { a, layout };
+ VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout;
+ struct radv_subpass_attachment att = { a, layout, stencil_layout };
radv_handle_subpass_image_transition(cmd_buffer, att, false);
}
}
radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
}
-void radv_CmdBeginRenderPass2KHR(
+void radv_CmdBeginRenderPass2(
VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo* pRenderPassBeginInfo,
- const VkSubpassBeginInfoKHR* pSubpassBeginInfo)
+ const VkSubpassBeginInfo* pSubpassBeginInfo)
{
radv_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo,
pSubpassBeginInfo->contents);
radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
}
-void radv_CmdNextSubpass2KHR(
+void radv_CmdNextSubpass2(
VkCommandBuffer commandBuffer,
- const VkSubpassBeginInfoKHR* pSubpassBeginInfo,
- const VkSubpassEndInfoKHR* pSubpassEndInfo)
+ const VkSubpassBeginInfo* pSubpassBeginInfo,
+ const VkSubpassEndInfo* pSubpassEndInfo)
{
radv_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents);
}
radv_draw(cmd_buffer, &info);
}
-void radv_CmdDrawIndirectCountKHR(
+void radv_CmdDrawIndirectCount(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
radv_draw(cmd_buffer, &info);
}
-void radv_CmdDrawIndexedIndirectCountKHR(
+void radv_CmdDrawIndexedIndirectCount(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
cmd_buffer->state.subpass_sample_locs = NULL;
}
-void radv_CmdEndRenderPass2KHR(
+void radv_CmdEndRenderPass2(
VkCommandBuffer commandBuffer,
- const VkSubpassEndInfoKHR* pSubpassEndInfo)
+ const VkSubpassEndInfo* pSubpassEndInfo)
{
radv_CmdEndRenderPass(commandBuffer);
}
*/
static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t clear_word)
+ const VkImageSubresourceRange *range)
{
assert(range->baseMipLevel == 0);
assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS);
VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t htile_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
VkClearDepthStencilValue value = {};
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, clear_word);
+ state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
return;
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
- uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
-
- if (radv_layout_is_htile_compressed(image, dst_layout, dst_render_loop,
- dst_queue_mask)) {
- clear_value = 0;
- }
-
- radv_initialize_htile(cmd_buffer, image, range, clear_value);
+ radv_initialize_htile(cmd_buffer, image, range);
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_render_loop, src_queue_mask) &&
radv_layout_is_htile_compressed(image, dst_layout, dst_render_loop, dst_queue_mask)) {
- uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
- radv_initialize_htile(cmd_buffer, image, range, clear_value);
+ radv_initialize_htile(cmd_buffer, image, range);
} else if (radv_layout_is_htile_compressed(image, src_layout, src_render_loop, src_queue_mask) &&
!radv_layout_is_htile_compressed(image, dst_layout, dst_render_loop, dst_queue_mask)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
(old_hw_enabled_mask != so->hw_enabled_mask)))
radv_emit_streamout_enable(cmd_buffer);
- if (cmd_buffer->device->physical_device->use_ngg_streamout)
+ if (cmd_buffer->device->physical_device->use_ngg_streamout) {
cmd_buffer->gds_needed = true;
+ cmd_buffer->gds_oa_needed = true;
+ }
}
static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)