tu_cs_image_flag_ref(cs, iview, layer);
}
+static void
+r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ assert(iview->image->samples == 1);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
+ tu_cs_emit(cs, iview->stencil_PITCH);
+}
+
static void
r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
{
r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false);
}
+static void
+r2d_teardown(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs)
+{
+ /* nothing to do here */
+}
+
static void
r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
}
+static void
+r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
+ tu_cs_image_stencil_ref(cs, iview, layer);
+ tu_cs_emit(cs, 0);
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
+}
+
static void
r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
{
.component_enable = aspect_write_mask(vk_format, aspect_mask)));
tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format)));
tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format)));
+
+ if (cmd->state.predication_active) {
+ tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
+ tu_cs_emit(cs, 0);
+ }
}
static void
tu_cs_emit(cs, 2); /* vertex count */
}
+static void
+r3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ if (cmd->state.predication_active) {
+ tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
+ tu_cs_emit(cs, 1);
+ }
+}
+
/* blit ops - common interface for 2d/shader paths */
struct blit_ops {
bool clear,
bool ubwc);
void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
+ void (*teardown)(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs);
};
static const struct blit_ops r2d_ops = {
.dst_buffer = r2d_dst_buffer,
.setup = r2d_setup,
.run = r2d_run,
+ .teardown = r2d_teardown,
};
static const struct blit_ops r3d_ops = {
.dst_buffer = r3d_dst_buffer,
.setup = r3d_setup,
.run = r3d_run,
+ .teardown = r3d_teardown,
};
/* passthrough set coords from 3D extents */
return format;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return VK_FORMAT_R32_UINT;
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ return VK_FORMAT_S8_UINT;
+ assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
+ return VK_FORMAT_D32_SFLOAT;
}
}
filter == VK_FILTER_CUBIC_EXT)
ops = &r3d_ops;
- /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests,
- * figure out why (should be able to pass all tests with only shader path)
- */
-
ops->setup(cmd, cs, dst_image->vk_format, info->dstSubresource.aspectMask,
rotate[mirror_y][mirror_x], false, dst_image->layout[0].ubwc);
ops->src(cmd, cs, &src, i, filter);
ops->run(cmd, cs);
}
+
+ ops->teardown(cmd, cs);
}
void
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
dst_va += width * block_size;
blocks -= width;
}
+
+ ops->teardown(cmd, cs);
}
void
dst_va += width * 4;
blocks -= width;
}
+
+ ops->teardown(cmd, cs);
}
void
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
ops->dst(cs, dst, i);
ops->run(cmd, cs);
}
+
+ ops->teardown(cmd, cs);
}
static void
clear_image(struct tu_cmd_buffer *cmd,
struct tu_image *image,
const VkClearValue *clear_value,
- const VkImageSubresourceRange *range)
+ const VkImageSubresourceRange *range,
+ VkImageAspectFlags aspect_mask)
{
uint32_t level_count = tu_get_levelCount(image, range);
uint32_t layer_count = tu_get_layerCount(image, range);
struct tu_cs *cs = &cmd->cs;
VkFormat format = image->vk_format;
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- format = VK_FORMAT_R32_UINT;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ format = copy_format(format, aspect_mask, false);
if (image->type == VK_IMAGE_TYPE_3D) {
assert(layer_count == 1);
const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops;
- ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true, image->layout[0].ubwc);
- ops->clear_value(cs, image->vk_format, clear_value);
+ ops->setup(cmd, cs, format, aspect_mask, ROTATE_0, true, image->layout[0].ubwc);
+ if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value);
+ else
+ ops->clear_value(cs, format, clear_value);
for (unsigned j = 0; j < level_count; j++) {
if (image->type == VK_IMAGE_TYPE_3D)
struct tu_image_view dst;
tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
- .aspectMask = range->aspectMask,
+ .aspectMask = aspect_mask,
.mipLevel = range->baseMipLevel + j,
.baseArrayLayer = range->baseArrayLayer,
.layerCount = 1,
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i);
+ clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT);
}
void
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
- for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmd, image, (const VkClearValue*) pDepthStencil, pRanges + i);
-}
+ for (unsigned i = 0; i < rangeCount; i++) {
+ const VkImageSubresourceRange *range = &pRanges[i];
-static void
-tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer *cmd,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t rect_count,
- const VkClearRect *rects)
-{
- const struct tu_subpass *subpass = cmd->state.subpass;
- /* note: cannot use shader path here.. there is a special shader path
- * in tu_clear_sysmem_attachments()
- */
- const struct blit_ops *ops = &r2d_ops;
- struct tu_cs *cs = &cmd->draw_cs;
-
- for (uint32_t j = 0; j < attachment_count; j++) {
- /* The vulkan spec, section 17.2 "Clearing Images Inside a Render
- * Pass Instance" says that:
- *
- * Unlike other clear commands, vkCmdClearAttachments executes as
- * a drawing command, rather than a transfer command, with writes
- * performed by it executing in rasterization order. Clears to
- * color attachments are executed as color attachment writes, by
- * the VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage.
- * Clears to depth/stencil attachments are executed as depth
- * writes and writes by the
- * VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT and
- * VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT stages.
- *
- * However, the 2d path here is executed the same way as a
- * transfer command, using the CCU color cache exclusively with
- * a special depth-as-color format for depth clears. This means that
- * we can't rely on the normal pipeline barrier mechanism here, and
- * have to manually flush whenever using a different cache domain
- * from what the 3d path would've used. This happens when we clear
- * depth/stencil, since normally depth attachments use CCU depth, but
- * we clear it using a special depth-as-color format. Since the clear
- * potentially uses a different attachment state we also need to
- * invalidate color beforehand and flush it afterwards.
- */
-
- uint32_t a;
- if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- a = subpass->color_attachments[attachments[j].colorAttachment].attachment;
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- } else {
- a = subpass->depth_stencil_attachment.attachment;
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR);
- }
-
- if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
- const struct tu_image_view *iview =
- cmd->state.framebuffer->attachments[a].attachment;
-
- ops->setup(cmd, cs, iview->image->vk_format, attachments[j].aspectMask,
- ROTATE_0, true, iview->ubwc_enabled);
- ops->clear_value(cs, iview->image->vk_format, &attachments[j].clearValue);
-
- /* Wait for the flushes we triggered manually to complete */
- tu_cs_emit_wfi(cs);
-
- for (uint32_t i = 0; i < rect_count; i++) {
- ops->coords(cs, &rects[i].rect.offset, NULL, &rects[i].rect.extent);
- for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) {
- ops->dst(cs, iview, rects[i].baseArrayLayer + layer);
- ops->run(cmd, cs);
- }
- }
+ if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ /* can't clear both depth and stencil at once, split up the aspect mask */
+ uint32_t b;
+ for_each_bit(b, range->aspectMask)
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b));
+ continue;
+ }
- if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR);
- } else {
- /* sync color into depth */
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH);
- }
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
}
}
max_samples = MAX2(max_samples, pass->attachments[a].samples);
}
- /* prefer to use 2D path for clears
- * 2D can't clear separate depth/stencil and msaa, needs known framebuffer
- */
- if (max_samples == 1 && cmd->state.framebuffer) {
- tu_clear_sysmem_attachments_2d(cmd, attachment_count, attachments, rect_count, rects);
- return;
- }
-
/* disable all draw states so they don't interfere
- * TODO: use and re-use draw states for this path
+ * TODO: use and re-use draw states
* we have to disable draw states individually to preserve
* input attachment states, because a secondary command buffer
* won't be able to restore them
}
static void
-tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- VkImageAspectFlags mask,
- const VkClearValue *value)
+clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat format,
+ uint8_t clear_mask,
+ uint32_t gmem_offset,
+ const VkClearValue *value)
{
- VkFormat vk_format = cmd->state.pass->attachments[attachment].format;
-
-
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
- tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format)));
+ tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format)));
- tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1,
- .clear_mask = aspect_write_mask(vk_format, mask)));
+ tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
+ tu_cs_emit(cs, gmem_offset);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
tu_cs_emit(cs, 0);
uint32_t clear_vals[4] = {};
- pack_gmem_clear_value(value, vk_format, clear_vals);
+ pack_gmem_clear_value(value, format, clear_vals);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
tu_cs_emit_array(cs, clear_vals, 4);
tu6_emit_event_write(cmd, cs, BLIT);
}
+static void
+tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t attachment,
+ VkImageAspectFlags mask,
+ const VkClearValue *value)
+{
+ const struct tu_render_pass_attachment *att =
+ &cmd->state.pass->attachments[attachment];
+
+ if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
+ if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
+ return;
+ }
+
+ clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
+}
+
static void
tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
uint32_t attachment_count,
*/
tu_emit_cache_flush_renderpass(cmd, cs);
+ /* vkCmdClearAttachments is supposed to respect the predicate if active.
+ * The easiest way to do this is to always use the 3d path, which always
+ * works even with GMEM because it's just a simple draw using the existing
+ * attachment state. However it seems that IGNORE_VISIBILITY draws must be
+ * skipped in the binning pass, since otherwise they produce binning data
+ * which isn't consumed and leads to the wrong binning data being read, so
+ * condition on GMEM | SYSMEM.
+ */
+ if (cmd->state.predication_active) {
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM |
+ CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
+ tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
+ tu_cond_exec_end(cs);
+ return;
+ }
+
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
tu_cond_exec_end(cs);
tu_cond_exec_end(cs);
}
+static void
+clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat format,
+ VkImageAspectFlags clear_mask,
+ const VkRenderPassBeginInfo *info,
+ uint32_t a,
+ bool separate_stencil)
+{
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ const struct tu_image_view *iview = fb->attachments[a].attachment;
+ const struct blit_ops *ops = &r2d_ops;
+ if (cmd->state.pass->attachments[a].samples > 1)
+ ops = &r3d_ops;
+
+ ops->setup(cmd, cs, format, clear_mask, ROTATE_0, true, iview->ubwc_enabled);
+ ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
+ ops->clear_value(cs, format, &info->pClearValues[a]);
+
+ for (uint32_t i = 0; i < fb->layers; i++) {
+ if (separate_stencil) {
+ if (ops == &r3d_ops)
+ r3d_dst_stencil(cs, iview, i);
+ else
+ r2d_dst_stencil(cs, iview, i);
+ } else {
+ ops->dst(cs, iview, i);
+ }
+ ops->run(cmd, cs);
+ }
+
+ ops->teardown(cmd, cs);
+}
+
void
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info)
{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *iview = fb->attachments[a].attachment;
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
if (!attachment->clear_mask)
return;
- const struct blit_ops *ops = &r2d_ops;
- if (attachment->samples > 1)
- ops = &r3d_ops;
-
- ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0,
- true, iview->ubwc_enabled);
- ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
- ops->clear_value(cs, attachment->format, &info->pClearValues[a]);
-
/* Wait for any flushes at the beginning of the renderpass to complete */
tu_cs_emit_wfi(cs);
- for (uint32_t i = 0; i < fb->layers; i++) {
- ops->dst(cs, iview, i);
- ops->run(cmd, cs);
+ if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
+ info, a, false);
+ }
+ if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
+ info, a, true);
+ }
+ } else {
+ clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask,
+ info, a, false);
}
/* The spec doesn't explicitly say, but presumably the initial renderpass
struct tu_cs *cs,
const struct tu_image_view *iview,
const struct tu_render_pass_attachment *attachment,
- bool resolve)
+ bool resolve,
+ bool separate_stencil)
{
tu_cs_emit_regs(cs,
A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
.integer = vk_format_is_int(attachment->format)));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
- tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
- tu_cs_image_ref_2d(cs, iview, 0, false);
+ if (separate_stencil) {
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr);
+ tu_cs_emit(cs, iview->stencil_PITCH);
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
- tu_cs_image_flag_ref(cs, iview, 0);
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
+ } else {
+ tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
+ tu_cs_image_ref_2d(cs, iview, 0, false);
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
+ tu_cs_image_flag_ref(cs, iview, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ }
tu6_emit_event_write(cmd, cs, BLIT);
}
&cmd->state.pass->attachments[a];
if (attachment->load || force_load)
- tu_emit_blit(cmd, cs, iview, attachment, false);
+ tu_emit_blit(cmd, cs, iview, attachment, false, false);
+
+ if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
+ tu_emit_blit(cmd, cs, iview, attachment, false, true);
+}
+
+static void
+store_cp_blit(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image_view *iview,
+ uint32_t samples,
+ bool separate_stencil,
+ VkFormat format,
+ uint32_t gmem_offset,
+ uint32_t cpp)
+{
+ r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false,
+ iview->ubwc_enabled, true);
+ if (separate_stencil)
+ r2d_dst_stencil(cs, iview, 0);
+ else
+ r2d_dst(cs, iview, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_INFO(
+ .color_format = tu6_format_texture(format, TILE6_2).fmt,
+ .tile_mode = TILE6_2,
+ .srgb = vk_format_is_srgb(format),
+ .samples = tu_msaa_samples(samples),
+ .samples_average = !vk_format_is_int(format),
+ .unk20 = 1,
+ .unk22 = 1),
+ /* note: src size does not matter when not scaling */
+ A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
+ A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + gmem_offset),
+ A6XX_SP_PS_2D_SRC_HI(),
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.framebuffer->tile0.width * cpp));
+
+ /* sync GMEM writes with CACHE. */
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+
+ /* Wait for CACHE_INVALIDATE to land */
+ tu_cs_emit_wfi(cs);
+
+ tu_cs_emit_pkt7(cs, CP_BLIT, 1);
+ tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
+ * sysmem, and we generally assume that GMEM renderpasses leave their
+ * results in sysmem, so we need to flush manually here.
+ */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
}
void
uint32_t a,
uint32_t gmem_a)
{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
const VkRect2D *render_area = &cmd->state.render_area;
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
- struct tu_image_view *iview = fb->attachments[a].attachment;
+ struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
- if (!dst->store)
+ if (!dst->store && !dst->store_stencil)
return;
uint32_t x1 = render_area->offset.x;
/* use fast path when render area is aligned, except for unsupported resolve cases */
if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
- tu_emit_blit(cmd, cs, iview, src, true);
+ if (dst->store)
+ tu_emit_blit(cmd, cs, iview, src, true, false);
+ if (dst->store_stencil)
+ tu_emit_blit(cmd, cs, iview, src, true, true);
return;
}
return;
}
- r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT,
- ROTATE_0, false, iview->ubwc_enabled, true);
- r2d_dst(cs, iview, 0);
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
- tu_cs_emit_regs(cs,
- A6XX_SP_PS_2D_SRC_INFO(
- .color_format = tu6_format_texture(src->format, TILE6_2).fmt,
- .tile_mode = TILE6_2,
- .srgb = vk_format_is_srgb(src->format),
- .samples = tu_msaa_samples(src->samples),
- .samples_average = !vk_format_is_int(src->format),
- .unk20 = 1,
- .unk22 = 1),
- /* note: src size does not matter when not scaling */
- A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
- A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
- A6XX_SP_PS_2D_SRC_HI(),
- A6XX_SP_PS_2D_SRC_PITCH(.pitch = fb->tile0.width * src->cpp));
-
- /* sync GMEM writes with CACHE. */
- tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+ VkFormat format = src->format;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+ format = VK_FORMAT_D32_SFLOAT;
- /* Wait for CACHE_INVALIDATE to land */
- tu_cs_emit_wfi(cs);
-
- tu_cs_emit_pkt7(cs, CP_BLIT, 1);
- tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
- * sysmem, and we generally assume that GMEM renderpasses leave their
- * results in sysmem, so we need to flush manually here.
- */
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+ if (dst->store) {
+ store_cp_blit(cmd, cs, iview, src->samples, false, format,
+ src->gmem_offset, src->cpp);
+ }
+ if (dst->store_stencil) {
+ store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
+ src->gmem_offset_stencil, src->samples);
+ }
}