tu_cs_image_flag_ref(cs, iview, layer);
}
+static void
+r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ assert(iview->image->samples == 1);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
+ tu_cs_emit(cs, iview->stencil_PITCH);
+}
+
static void
r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
{
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
}
+static void
+r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
+ tu_cs_image_stencil_ref(cs, iview, layer);
+ tu_cs_emit(cs, 0);
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
+}
+
static void
r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
{
return format;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return VK_FORMAT_R32_UINT;
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ return VK_FORMAT_S8_UINT;
+ assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
+ return VK_FORMAT_D32_SFLOAT;
}
}
clear_image(struct tu_cmd_buffer *cmd,
struct tu_image *image,
const VkClearValue *clear_value,
- const VkImageSubresourceRange *range)
+ const VkImageSubresourceRange *range,
+ VkImageAspectFlags aspect_mask)
{
uint32_t level_count = tu_get_levelCount(image, range);
uint32_t layer_count = tu_get_layerCount(image, range);
struct tu_cs *cs = &cmd->cs;
VkFormat format = image->vk_format;
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- format = VK_FORMAT_R32_UINT;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ format = copy_format(format, aspect_mask, false);
if (image->type == VK_IMAGE_TYPE_3D) {
assert(layer_count == 1);
const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops;
- ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true, image->layout[0].ubwc);
- ops->clear_value(cs, image->vk_format, clear_value);
+ ops->setup(cmd, cs, format, aspect_mask, ROTATE_0, true, image->layout[0].ubwc);
+ if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value);
+ else
+ ops->clear_value(cs, format, clear_value);
for (unsigned j = 0; j < level_count; j++) {
if (image->type == VK_IMAGE_TYPE_3D)
struct tu_image_view dst;
tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
- .aspectMask = range->aspectMask,
+ .aspectMask = aspect_mask,
.mipLevel = range->baseMipLevel + j,
.baseArrayLayer = range->baseArrayLayer,
.layerCount = 1,
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i);
+ clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT);
}
void
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
- for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmd, image, (const VkClearValue*) pDepthStencil, pRanges + i);
+ for (unsigned i = 0; i < rangeCount; i++) {
+ const VkImageSubresourceRange *range = &pRanges[i];
+
+ if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ /* can't clear both depth and stencil at once, split up the aspect mask */
+ uint32_t b;
+ for_each_bit(b, range->aspectMask)
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b));
+ continue;
+ }
+
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
+ }
}
static void
}
static void
-tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- VkImageAspectFlags mask,
- const VkClearValue *value)
+clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat format,
+ uint8_t clear_mask,
+ uint32_t gmem_offset,
+ const VkClearValue *value)
{
- VkFormat vk_format = cmd->state.pass->attachments[attachment].format;
-
-
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
- tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format)));
+ tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format)));
- tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1,
- .clear_mask = aspect_write_mask(vk_format, mask)));
+ tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
+ tu_cs_emit(cs, gmem_offset);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
tu_cs_emit(cs, 0);
uint32_t clear_vals[4] = {};
- pack_gmem_clear_value(value, vk_format, clear_vals);
+ pack_gmem_clear_value(value, format, clear_vals);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
tu_cs_emit_array(cs, clear_vals, 4);
tu6_emit_event_write(cmd, cs, BLIT);
}
+static void
+tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t attachment,
+ VkImageAspectFlags mask,
+ const VkClearValue *value)
+{
+ const struct tu_render_pass_attachment *att =
+ &cmd->state.pass->attachments[attachment];
+
+ if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
+ if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
+ return;
+ }
+
+ clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
+}
+
static void
tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
uint32_t attachment_count,
tu_cond_exec_end(cs);
}
+static void
+clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat format,
+ VkImageAspectFlags clear_mask,
+ const VkRenderPassBeginInfo *info,
+ uint32_t a,
+ bool separate_stencil)
+{
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ const struct tu_image_view *iview = fb->attachments[a].attachment;
+ const struct blit_ops *ops = &r2d_ops;
+ if (cmd->state.pass->attachments[a].samples > 1)
+ ops = &r3d_ops;
+
+ ops->setup(cmd, cs, format, clear_mask, ROTATE_0, true, iview->ubwc_enabled);
+ ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
+ ops->clear_value(cs, format, &info->pClearValues[a]);
+
+ for (uint32_t i = 0; i < fb->layers; i++) {
+ if (separate_stencil) {
+ if (ops == &r3d_ops)
+ r3d_dst_stencil(cs, iview, i);
+ else
+ r2d_dst_stencil(cs, iview, i);
+ } else {
+ ops->dst(cs, iview, i);
+ }
+ ops->run(cmd, cs);
+ }
+}
+
void
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info)
{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *iview = fb->attachments[a].attachment;
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
if (!attachment->clear_mask)
return;
- const struct blit_ops *ops = &r2d_ops;
- if (attachment->samples > 1)
- ops = &r3d_ops;
-
- ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0,
- true, iview->ubwc_enabled);
- ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
- ops->clear_value(cs, attachment->format, &info->pClearValues[a]);
-
/* Wait for any flushes at the beginning of the renderpass to complete */
tu_cs_emit_wfi(cs);
- for (uint32_t i = 0; i < fb->layers; i++) {
- ops->dst(cs, iview, i);
- ops->run(cmd, cs);
+ if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
+ info, a, false);
+ }
+ if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
+ info, a, true);
+ }
+ } else {
+ clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask,
+ info, a, false);
}
/* The spec doesn't explicitly say, but presumably the initial renderpass
struct tu_cs *cs,
const struct tu_image_view *iview,
const struct tu_render_pass_attachment *attachment,
- bool resolve)
+ bool resolve,
+ bool separate_stencil)
{
tu_cs_emit_regs(cs,
A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
.integer = vk_format_is_int(attachment->format)));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
- tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
- tu_cs_image_ref_2d(cs, iview, 0, false);
+ if (separate_stencil) {
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr);
+ tu_cs_emit(cs, iview->stencil_PITCH);
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
- tu_cs_image_flag_ref(cs, iview, 0);
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
+ } else {
+ tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
+ tu_cs_image_ref_2d(cs, iview, 0, false);
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
+ tu_cs_image_flag_ref(cs, iview, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ }
tu6_emit_event_write(cmd, cs, BLIT);
}
&cmd->state.pass->attachments[a];
if (attachment->load || force_load)
- tu_emit_blit(cmd, cs, iview, attachment, false);
+ tu_emit_blit(cmd, cs, iview, attachment, false, false);
+
+ if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
+ tu_emit_blit(cmd, cs, iview, attachment, false, true);
+}
+
+static void
+store_cp_blit(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image_view *iview,
+ uint32_t samples,
+ bool separate_stencil,
+ VkFormat format,
+ uint32_t gmem_offset,
+ uint32_t cpp)
+{
+ r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false,
+ iview->ubwc_enabled, true);
+ if (separate_stencil)
+ r2d_dst_stencil(cs, iview, 0);
+ else
+ r2d_dst(cs, iview, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_INFO(
+ .color_format = tu6_format_texture(format, TILE6_2).fmt,
+ .tile_mode = TILE6_2,
+ .srgb = vk_format_is_srgb(format),
+ .samples = tu_msaa_samples(samples),
+ .samples_average = !vk_format_is_int(format),
+ .unk20 = 1,
+ .unk22 = 1),
+ /* note: src size does not matter when not scaling */
+ A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
+ A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + gmem_offset),
+ A6XX_SP_PS_2D_SRC_HI(),
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.framebuffer->tile0.width * cpp));
+
+ /* sync GMEM writes with CACHE. */
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+
+ /* Wait for CACHE_INVALIDATE to land */
+ tu_cs_emit_wfi(cs);
+
+ tu_cs_emit_pkt7(cs, CP_BLIT, 1);
+ tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
+ * sysmem, and we generally assume that GMEM renderpasses leave their
+ * results in sysmem, so we need to flush manually here.
+ */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
}
void
uint32_t a,
uint32_t gmem_a)
{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
const VkRect2D *render_area = &cmd->state.render_area;
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
- struct tu_image_view *iview = fb->attachments[a].attachment;
+ struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
- if (!dst->store)
+ if (!dst->store && !dst->store_stencil)
return;
uint32_t x1 = render_area->offset.x;
/* use fast path when render area is aligned, except for unsupported resolve cases */
if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
- tu_emit_blit(cmd, cs, iview, src, true);
+ if (dst->store)
+ tu_emit_blit(cmd, cs, iview, src, true, false);
+ if (dst->store_stencil)
+ tu_emit_blit(cmd, cs, iview, src, true, true);
return;
}
return;
}
- r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT,
- ROTATE_0, false, iview->ubwc_enabled, true);
- r2d_dst(cs, iview, 0);
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
- tu_cs_emit_regs(cs,
- A6XX_SP_PS_2D_SRC_INFO(
- .color_format = tu6_format_texture(src->format, TILE6_2).fmt,
- .tile_mode = TILE6_2,
- .srgb = vk_format_is_srgb(src->format),
- .samples = tu_msaa_samples(src->samples),
- .samples_average = !vk_format_is_int(src->format),
- .unk20 = 1,
- .unk22 = 1),
- /* note: src size does not matter when not scaling */
- A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
- A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
- A6XX_SP_PS_2D_SRC_HI(),
- A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
-
- /* sync GMEM writes with CACHE. */
- tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+ VkFormat format = src->format;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+ format = VK_FORMAT_D32_SFLOAT;
- /* Wait for CACHE_INVALIDATE to land */
- tu_cs_emit_wfi(cs);
-
- tu_cs_emit_pkt7(cs, CP_BLIT, 1);
- tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
- * sysmem, and we generally assume that GMEM renderpasses leave their
- * results in sysmem, so we need to flush manually here.
- */
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+ if (dst->store) {
+ store_cp_blit(cmd, cs, iview, src->samples, false, format,
+ src->gmem_offset, src->cpp);
+ }
+ if (dst->store_stencil) {
+ store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
+ src->gmem_offset_stencil, src->samples);
+ }
}