#include "util/format_srgb.h"
#include "util/u_half.h"
-/* helper functions previously in tu_formats.c */
-
-static uint32_t
-tu_pack_mask(int bits)
-{
- assert(bits <= 32);
- return (1ull << bits) - 1;
-}
-
static uint32_t
tu_pack_float32_for_unorm(float val, int bits)
{
- const uint32_t max = tu_pack_mask(bits);
- if (val < 0.0f)
- return 0;
- else if (val > 1.0f)
- return max;
- else
- return _mesa_lroundevenf(val * (float) max);
-}
-
-static uint32_t
-tu_pack_float32_for_snorm(float val, int bits)
-{
- const int32_t max = tu_pack_mask(bits - 1);
- int32_t tmp;
- if (val < -1.0f)
- tmp = -max;
- else if (val > 1.0f)
- tmp = max;
- else
- tmp = _mesa_lroundevenf(val * (float) max);
-
- return tmp & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_float32_for_uscaled(float val, int bits)
-{
- const uint32_t max = tu_pack_mask(bits);
- if (val < 0.0f)
- return 0;
- else if (val > (float) max)
- return max;
- else
- return (uint32_t) val;
-}
-
-static uint32_t
-tu_pack_float32_for_sscaled(float val, int bits)
-{
- const int32_t max = tu_pack_mask(bits - 1);
- const int32_t min = -max - 1;
- int32_t tmp;
- if (val < (float) min)
- tmp = min;
- else if (val > (float) max)
- tmp = max;
- else
- tmp = (int32_t) val;
-
- return tmp & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_uint32_for_uint(uint32_t val, int bits)
-{
- return val & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_int32_for_sint(int32_t val, int bits)
-{
- return val & tu_pack_mask(bits);
-}
-
-static uint32_t
-tu_pack_float32_for_sfloat(float val, int bits)
-{
- assert(bits == 16 || bits == 32);
- return bits == 16 ? util_float_to_half(val) : fui(val);
-}
-
-union tu_clear_component_value {
- float float32;
- int32_t int32;
- uint32_t uint32;
-};
-
-static uint32_t
-tu_pack_clear_component_value(union tu_clear_component_value val,
- const struct util_format_channel_description *ch)
-{
- uint32_t packed;
-
- switch (ch->type) {
- case UTIL_FORMAT_TYPE_UNSIGNED:
- /* normalized, scaled, or pure integer */
- if (ch->normalized)
- packed = tu_pack_float32_for_unorm(val.float32, ch->size);
- else if (ch->pure_integer)
- packed = tu_pack_uint32_for_uint(val.uint32, ch->size);
- else
- packed = tu_pack_float32_for_uscaled(val.float32, ch->size);
- break;
- case UTIL_FORMAT_TYPE_SIGNED:
- /* normalized, scaled, or pure integer */
- if (ch->normalized)
- packed = tu_pack_float32_for_snorm(val.float32, ch->size);
- else if (ch->pure_integer)
- packed = tu_pack_int32_for_sint(val.int32, ch->size);
- else
- packed = tu_pack_float32_for_sscaled(val.float32, ch->size);
- break;
- case UTIL_FORMAT_TYPE_FLOAT:
- packed = tu_pack_float32_for_sfloat(val.float32, ch->size);
- break;
- default:
- unreachable("unexpected channel type");
- packed = 0;
- break;
- }
-
- assert((packed & tu_pack_mask(ch->size)) == packed);
- return packed;
-}
-
-static const struct util_format_channel_description *
-tu_get_format_channel_description(const struct util_format_description *desc,
- int comp)
-{
- switch (desc->swizzle[comp]) {
- case PIPE_SWIZZLE_X:
- return &desc->channel[0];
- case PIPE_SWIZZLE_Y:
- return &desc->channel[1];
- case PIPE_SWIZZLE_Z:
- return &desc->channel[2];
- case PIPE_SWIZZLE_W:
- return &desc->channel[3];
- default:
- return NULL;
- }
-}
-
-static union tu_clear_component_value
-tu_get_clear_component_value(const VkClearValue *val, int comp,
- enum util_format_colorspace colorspace)
-{
- assert(comp < 4);
-
- union tu_clear_component_value tmp;
- switch (colorspace) {
- case UTIL_FORMAT_COLORSPACE_ZS:
- assert(comp < 2);
- if (comp == 0)
- tmp.float32 = val->depthStencil.depth;
- else
- tmp.uint32 = val->depthStencil.stencil;
- break;
- case UTIL_FORMAT_COLORSPACE_SRGB:
- if (comp < 3) {
- tmp.float32 = util_format_linear_to_srgb_float(val->color.float32[comp]);
- break;
- }
- default:
- assert(comp < 4);
- tmp.uint32 = val->color.uint32[comp];
- break;
- }
-
- return tmp;
+ return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1));
}
/* r2d_ = BLIT_OP_SCALE operations */
static enum a6xx_2d_ifmt
-format_to_ifmt(enum a6xx_format fmt)
-{
- switch (fmt) {
- case FMT6_A8_UNORM:
- case FMT6_8_UNORM:
- case FMT6_8_SNORM:
- case FMT6_8_8_UNORM:
- case FMT6_8_8_SNORM:
- case FMT6_8_8_8_8_UNORM:
- case FMT6_8_8_8_X8_UNORM:
- case FMT6_8_8_8_8_SNORM:
- case FMT6_4_4_4_4_UNORM:
- case FMT6_5_5_5_1_UNORM:
- case FMT6_5_6_5_UNORM:
- case FMT6_Z24_UNORM_S8_UINT:
- case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
+format_to_ifmt(VkFormat format)
+{
+ if (format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ format == VK_FORMAT_X8_D24_UNORM_PACK32)
return R2D_UNORM8;
- case FMT6_32_UINT:
- case FMT6_32_SINT:
- case FMT6_32_32_UINT:
- case FMT6_32_32_SINT:
- case FMT6_32_32_32_32_UINT:
- case FMT6_32_32_32_32_SINT:
- return R2D_INT32;
-
- case FMT6_16_UINT:
- case FMT6_16_SINT:
- case FMT6_16_16_UINT:
- case FMT6_16_16_SINT:
- case FMT6_16_16_16_16_UINT:
- case FMT6_16_16_16_16_SINT:
- case FMT6_10_10_10_2_UINT:
- return R2D_INT16;
-
- case FMT6_8_UINT:
- case FMT6_8_SINT:
- case FMT6_8_8_UINT:
- case FMT6_8_8_SINT:
- case FMT6_8_8_8_8_UINT:
- case FMT6_8_8_8_8_SINT:
- return R2D_INT8;
-
- case FMT6_16_UNORM:
- case FMT6_16_SNORM:
- case FMT6_16_16_UNORM:
- case FMT6_16_16_SNORM:
- case FMT6_16_16_16_16_UNORM:
- case FMT6_16_16_16_16_SNORM:
- case FMT6_32_FLOAT:
- case FMT6_32_32_FLOAT:
- case FMT6_32_32_32_32_FLOAT:
+ /* get_component_bits doesn't work with depth/stencil formats: */
+ if (format == VK_FORMAT_D16_UNORM || format == VK_FORMAT_D32_SFLOAT)
return R2D_FLOAT32;
+ if (format == VK_FORMAT_S8_UINT)
+ return R2D_INT8;
- case FMT6_16_FLOAT:
- case FMT6_16_16_FLOAT:
- case FMT6_16_16_16_16_FLOAT:
- case FMT6_11_11_10_FLOAT:
- case FMT6_10_10_10_2_UNORM:
- case FMT6_10_10_10_2_UNORM_DEST:
- return R2D_FLOAT16;
-
- default:
+ /* use the size of the red channel to find the corresponding "ifmt" */
+ bool is_int = vk_format_is_int(format);
+ switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
+ case 4: case 5: case 8:
+ return is_int ? R2D_INT8 : R2D_UNORM8;
+ case 10: case 11:
+ return is_int ? R2D_INT16 : R2D_FLOAT16;
+ case 16:
+ if (vk_format_is_float(format))
+ return R2D_FLOAT16;
+ return is_int ? R2D_INT16 : R2D_FLOAT32;
+ case 32:
+ return is_int ? R2D_INT32 : R2D_FLOAT32;
+ default:
unreachable("bad format");
return 0;
}
return;
tu_cs_emit_regs(cs,
- A6XX_GRAS_2D_SRC_TL_X(.x = src->x),
- A6XX_GRAS_2D_SRC_BR_X(.x = src->x + extent->width - 1),
- A6XX_GRAS_2D_SRC_TL_Y(.y = src->y),
- A6XX_GRAS_2D_SRC_BR_Y(.y = src->y + extent->height - 1));
+ A6XX_GRAS_2D_SRC_TL_X(src->x),
+ A6XX_GRAS_2D_SRC_BR_X(src->x + extent->width - 1),
+ A6XX_GRAS_2D_SRC_TL_Y(src->y),
+ A6XX_GRAS_2D_SRC_BR_Y(src->y + extent->height - 1));
}
static void
default:
assert(!vk_format_is_depth_or_stencil(format));
const struct util_format_description *desc = vk_format_description(format);
- enum a6xx_2d_ifmt ifmt = format_to_ifmt(tu6_base_format(format));
+ enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
linear = util_format_linear_to_srgb_float(val->color.float32[i]);
if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
- clear_value[i] = tu_pack_float32_for_snorm(linear, 8);
+ clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f);
else
clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
} else if (ifmt == R2D_FLOAT16) {
tu_cs_image_flag_ref(cs, iview, layer);
}
+static void
+r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ assert(iview->image->samples == 1);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
+ tu_cs_emit(cs, iview->stencil_PITCH);
+}
+
static void
r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
{
.srgb = vk_format_is_srgb(vk_format)),
A6XX_RB_2D_DST_LO((uint32_t) va),
A6XX_RB_2D_DST_HI(va >> 32),
- A6XX_RB_2D_DST_SIZE(.pitch = pitch));
+ A6XX_RB_2D_DST_PITCH(pitch));
}
static void
r2d_setup_common(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
VkFormat vk_format,
+ VkImageAspectFlags aspect_mask,
enum a6xx_rotation rotation,
bool clear,
- uint8_t mask,
+ bool ubwc,
bool scissor)
{
enum a6xx_format format = tu6_base_format(vk_format);
- enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
+ enum a6xx_2d_ifmt ifmt = format_to_ifmt(vk_format);
uint32_t unknown_8c01 = 0;
- if (format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) {
- /* preserve depth channels */
- if (mask == 0x8)
- unknown_8c01 = 0x00084001;
+ if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
+ format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+ }
+
+ /* note: the only format with partial clearing is D24S8 */
+ if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
/* preserve stencil channel */
- if (mask == 0x7)
+ if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
unknown_8c01 = 0x08000041;
+ /* preserve depth channels */
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ unknown_8c01 = 0x00084001;
}
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
tu_cs_emit(cs, unknown_8c01);
uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL(
if (format == FMT6_10_10_10_2_UNORM_DEST)
format = FMT6_16_16_16_16_FLOAT;
- tu_cs_emit_regs(cs, A6XX_SP_2D_SRC_FORMAT(
+ tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT(
.sint = vk_format_is_sint(vk_format),
.uint = vk_format_is_uint(vk_format),
.color_format = format,
r2d_setup(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
VkFormat vk_format,
+ VkImageAspectFlags aspect_mask,
enum a6xx_rotation rotation,
bool clear,
- uint8_t mask)
+ bool ubwc)
{
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
- r2d_setup_common(cmd, cs, vk_format, rotation, clear, mask, false);
+ r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false);
+}
+
+static void
+r2d_teardown(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs)
+{
+ /* nothing to do here */
}
static void
/* r3d_ = shader path operations */
+void
+tu_init_clear_blit_shaders(struct tu6_global *global)
+{
+#define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } }
+#define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } }
+#define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } }
+
+ static const instr_t vs_code[] = {
+ /* r0.xyz = r0.w ? c1.xyz : c0.xyz
+ * r1.xy = r0.w ? c1.zw : c0.zw
+ * r0.w = 1.0f
+ */
+ CAT3(OPC_SEL_B32, .repeat = 2, .dst = 0,
+ .c1 = {.src1_c = 1, .src1 = 4}, .src1_r = 1,
+ .src2 = 3,
+ .c2 = {.src3_c = 1, .dummy = 1, .src3 = 0}),
+ CAT3(OPC_SEL_B32, .repeat = 1, .dst = 4,
+ .c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1,
+ .src2 = 3,
+ .c2 = {.src3_c = 1, .dummy = 1, .src3 = 2}),
+ MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f ),
+ { .cat0 = { .opc = OPC_END } },
+ };
+
+ static const instr_t fs_blit[] = {
+ /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
+ * blit path (its not clear what allows it to not have it)
+ */
+ CAT2(OPC_BARY_F, .ei = 1, .full = 1, .dst = 63 * 4, .src1_im = 1),
+ { .cat0 = { .opc = OPC_END } },
+ };
+
+ memcpy(&global->shaders[GLOBAL_SH_VS], vs_code, sizeof(vs_code));
+ memcpy(&global->shaders[GLOBAL_SH_FS_BLIT], fs_blit, sizeof(fs_blit));
+
+ for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) {
+ instr_t *code = global->shaders[GLOBAL_SH_FS_CLEAR0 + num_rts];
+ for (uint32_t i = 0; i < num_rts; i++) {
+ /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
+ *code++ = (instr_t) MOV(.repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4);
+ }
+ *code++ = (instr_t) { .cat0 = { .opc = OPC_END } };
+ }
+}
+
static void
r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts,
bool layered_clear)
{
+ struct ir3_const_state dummy_const_state = {};
struct ir3_shader dummy_shader = {};
struct ir3_shader_variant vs = {
.type = MESA_SHADER_VERTEX,
.instrlen = 1,
- .constlen = 2,
+ .constlen = 4,
.info.max_reg = 1,
.inputs_count = 1,
.inputs[0] = {
.regid = regid(1, 0),
},
.shader = &dummy_shader,
+ .const_state = &dummy_const_state,
};
if (layered_clear) {
- vs = (struct ir3_shader_variant) {
- .type = MESA_SHADER_VERTEX,
- .instrlen = 1,
- .info.max_reg = 0,
- .shader = &dummy_shader,
- };
+ vs.outputs[1].slot = VARYING_SLOT_LAYER;
+ vs.outputs[1].regid = regid(1, 1);
+ vs.outputs_count = 2;
}
struct ir3_shader_variant fs = {
.type = MESA_SHADER_FRAGMENT,
.instrlen = 1, /* max of 9 instructions with num_rts = 8 */
- .constlen = num_rts,
+ .constlen = align(num_rts, 4),
.info.max_reg = MAX2(num_rts, 1) - 1,
.total_in = blit ? 2 : 0,
.num_samp = blit ? 1 : 0,
.cmd = 4,
},
.shader = &dummy_shader,
+ .const_state = &dummy_const_state,
};
- struct ir3_shader_variant gs_shader = {
- .type = MESA_SHADER_GEOMETRY,
- .instrlen = 1,
- .constlen = 2,
- .info.max_reg = 1,
- .inputs_count = 1,
- .inputs[0] = {
- .slot = SYSTEM_VALUE_GS_HEADER_IR3,
- .regid = regid(0, 0),
- .sysval = true,
- },
- .outputs_count = 3,
- .outputs[0] = {
- .slot = VARYING_SLOT_POS,
- .regid = regid(0, 0),
- },
- .outputs[1] = {
- .slot = VARYING_SLOT_LAYER,
- .regid = regid(1, 1),
- },
- .outputs[2] = {
- .slot = VARYING_SLOT_GS_VERTEX_FLAGS_IR3,
- .regid = regid(1, 0),
- },
- .shader = &dummy_shader,
- }, *gs = layered_clear ? &gs_shader : NULL;
-
-
-#define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_F32, .dst_type = TYPE_F32, args } }
-#define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } }
-#define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } }
-
- static const instr_t vs_code[] = {
- /* r0.xyz = r0.w ? c1.xyz : c0.xyz
- * r1.xy = r0.w ? c1.zw : c0.zw
- * r0.w = 1.0f
- */
- CAT3(OPC_SEL_B32, .repeat = 2, .dst = 0,
- .c1 = {.src1_c = 1, .src1 = 4}, .src1_r = 1,
- .src2 = 3,
- .c2 = {.src3_c = 1, .dummy = 1, .src3 = 0}),
- CAT3(OPC_SEL_B32, .repeat = 1, .dst = 4,
- .c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1,
- .src2 = 3,
- .c2 = {.src3_c = 1, .dummy = 1, .src3 = 2}),
- MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f ),
- { .cat0 = { .opc = OPC_END } },
- };
-
- static const instr_t vs_layered[] = {
- { .cat0 = { .opc = OPC_CHMASK } },
- { .cat0 = { .opc = OPC_CHSH } },
- };
-
- static const instr_t gs_code[16] = {
- /* (sy)(ss)(nop3)shr.b r0.w, r0.x, 16 (extract local_id) */
- CAT2(OPC_SHR_B, .dst = 3, .src1 = 0, .src2_im = 1, .src2 = 16,
- .src1_r = 1, .src2_r = 1, .ss = 1, .sync = 1),
- /* x = (local_id & 1) ? c1.x : c0.x */
- CAT2(OPC_AND_B, .dst = 0, .src1 = 3, .src2_im = 1, .src2 = 1),
- /* y = (local_id & 2) ? c1.y : c0.y */
- CAT2(OPC_AND_B, .dst = 1, .src1 = 3, .src2_im = 1, .src2 = 2),
- /* pred = (local_id >= 4), used by OPC_KILL */
- CAT2(OPC_CMPS_S, .dst = REG_P0 * 4, .cond = IR3_COND_GE, .src1 = 3, .src2_im = 1, .src2 = 4),
- /* vertex_flags_out = (local_id == 0) ? 4 : 0 - first vertex flag */
- CAT2(OPC_CMPS_S, .dst = 4, .cond = IR3_COND_EQ, .src1 = 3, .src2_im = 1, .src2 = 0),
-
- MOV(.dst = 2, .src_c = 1, .src = 2), /* depth clear value from c0.z */
- MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f),
- MOV(.dst = 5, .src_c = 1, .src = 3), /* layer id from c0.w */
-
- /* (rpt1)sel.b32 r0.x, (r)c1.x, (r)r0.x, (r)c0.x */
- CAT3(OPC_SEL_B32, .repeat = 1, .dst = 0,
- .c1 = {.src1_c = 1, .src1 = 4, .dummy = 4}, .src1_r = 1,
- .src2 = 0,
- .c2 = {.src3_c = 1, .dummy = 1, .src3 = 0}),
-
- CAT2(OPC_SHL_B, .dst = 4, .src1 = 4, .src2_im = 1, .src2 = 2),
-
- { .cat0 = { .opc = OPC_KILL } },
- { .cat0 = { .opc = OPC_END, .ss = 1, .sync = 1 } },
- };
-#define FS_OFFSET (16 * sizeof(instr_t))
-#define GS_OFFSET (32 * sizeof(instr_t))
-
- /* shaders */
- struct ts_cs_memory shaders = { };
- VkResult result = tu_cs_alloc(&cmd->sub_cs, 2 + layered_clear,
- 16 * sizeof(instr_t), &shaders);
- assert(result == VK_SUCCESS);
-
- if (layered_clear) {
- memcpy(shaders.map, vs_layered, sizeof(vs_layered));
- memcpy((uint8_t*) shaders.map + GS_OFFSET, gs_code, sizeof(gs_code));
- } else {
- memcpy(shaders.map, vs_code, sizeof(vs_code));
- }
-
- instr_t *fs_code = (instr_t*) ((uint8_t*) shaders.map + FS_OFFSET);
- for (uint32_t i = 0; i < num_rts; i++) {
- /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
- *fs_code++ = (instr_t) { .cat1 = {
- .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32,
- .repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4
- } };
- }
-
- /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
- * blit path (its not clear what allows it to not have it)
- */
- if (blit) {
- *fs_code++ = (instr_t) { .cat2 = {
- .opc_cat = 2, .opc = OPC_BARY_F & 63, .ei = 1, .full = 1,
- .dst = regid(63, 0), .src1_im = 1
- } };
- }
- *fs_code++ = (instr_t) { .cat0 = { .opc = OPC_END } };
- /* note: assumed <= 16 instructions (MAX_RTS is 8) */
-
- tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
-
- tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, shaders.iova);
+ tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+ .vs_state = true,
+ .hs_state = true,
+ .ds_state = true,
+ .gs_state = true,
+ .fs_state = true,
+ .cs_state = true,
+ .gfx_ibo = true,
+ .cs_ibo = true,
+ .gfx_shared_const = true,
+ .gfx_bindless = 0x1f,
+ .cs_bindless = 0x1f));
+
+ tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS]));
tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0);
tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, 0);
- tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, gs, shaders.iova + GS_OFFSET);
- tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, shaders.iova + FS_OFFSET);
+ tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, 0);
+ tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs,
+ global_iova(cmd, shaders[blit ? GLOBAL_SH_FS_BLIT : (GLOBAL_SH_FS_CLEAR0 + num_rts)]));
tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0());
tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0());
- tu6_emit_vpc(cs, &vs, gs, &fs, NULL);
+ tu6_emit_vpc(cs, &vs, NULL, NULL, NULL, &fs, 0, false);
/* REPL_MODE for varying with RECTLIST (2 vertices only) */
tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
.persp_division_disable = 1,
.vp_xform_disable = 1,
.vp_clip_code_ignore = 1,
- .clip_disable = 1),
- A6XX_GRAS_UNKNOWN_8001(0));
+ .clip_disable = 1));
tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
tu_cs_emit_regs(cs,
- A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0(.x = 0, .y = 0),
- A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0(.x = 0x7fff, .y = 0x7fff));
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = 0, .y = 0),
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff));
tu_cs_emit_regs(cs,
- A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x = 0, .y = 0),
- A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x = 0x7fff, .y = 0x7fff));
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = 0, .y = 0),
+ A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff));
tu_cs_emit_regs(cs,
A6XX_VFD_INDEX_OFFSET(),
}
static void
-r3d_coords_raw(struct tu_cs *cs, bool gs, const float *coords)
+r3d_coords_raw(struct tu_cs *cs, const float *coords)
{
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
- CP_LOAD_STATE6_0_STATE_BLOCK(gs ? SB6_GS_SHADER : SB6_VS_SHADER) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
CP_LOAD_STATE6_0_NUM_UNIT(2));
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
{
int32_t src_x1 = src ? src->x : 0;
int32_t src_y1 = src ? src->y : 0;
- r3d_coords_raw(cs, false, (float[]) {
+ r3d_coords_raw(cs, (float[]) {
dst->x, dst->y,
src_x1, src_y1,
dst->x + extent->width, dst->y + extent->height,
uint32_t offset_ubwc,
VkFilter filter)
{
- struct ts_cs_memory texture = { };
+ struct tu_cs_memory texture = { };
VkResult result = tu_cs_alloc(&cmd->sub_cs,
2, /* allocate space for a sampler too */
A6XX_TEX_CONST_DWORDS, &texture);
A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W);
desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
desc[2] =
- A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(vk_format)) |
A6XX_TEX_CONST_2_PITCH(pitch) |
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
desc[3] = 0;
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
}
+static void
+r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
+{
+ tu6_emit_msaa(cs, iview->image->samples); /* TODO: move to setup */
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
+ tu_cs_image_stencil_ref(cs, iview, layer);
+ tu_cs_emit(cs, 0);
+
+ tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
+}
+
static void
r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
{
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
}
+static uint8_t
+aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask)
+{
+ uint8_t mask = 0xf;
+ assert(aspect_mask);
+ /* note: the only format with partial writing is D24S8,
+ * clear/blit uses the _AS_R8G8B8A8 format to access it
+ */
+ if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
+ mask = 0x7;
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ mask = 0x8;
+ }
+ return mask;
+}
+
static void
r3d_setup(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
VkFormat vk_format,
+ VkImageAspectFlags aspect_mask,
enum a6xx_rotation rotation,
bool clear,
- uint8_t mask)
+ bool ubwc)
{
+ enum a6xx_format format = tu6_base_format(vk_format);
+
+ if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
+ format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+ }
+
if (!cmd->state.pass) {
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
- tu6_emit_window_scissor(cs, 0, 0, 0x7fff, 0x7fff);
+ tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
}
tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000));
tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf));
tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0,
- .color_format = tu6_base_format(vk_format),
+ .color_format = format,
.color_sint = vk_format_is_sint(vk_format),
.color_uint = vk_format_is_uint(vk_format)));
- tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, .component_enable = mask));
+ tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0,
+ .component_enable = aspect_write_mask(vk_format, aspect_mask)));
tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format)));
tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format)));
+
+ if (cmd->state.predication_active) {
+ tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
+ tu_cs_emit(cs, 0);
+ }
}
static void
tu_cs_emit(cs, 2); /* vertex count */
}
+static void
+r3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ if (cmd->state.predication_active) {
+ tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
+ tu_cs_emit(cs, 1);
+ }
+}
+
/* blit ops - common interface for 2d/shader paths */
struct blit_ops {
void (*setup)(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
VkFormat vk_format,
+ VkImageAspectFlags aspect_mask,
enum a6xx_rotation rotation,
bool clear,
- uint8_t mask);
+ bool ubwc);
void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
+ void (*teardown)(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs);
};
static const struct blit_ops r2d_ops = {
.dst_buffer = r2d_dst_buffer,
.setup = r2d_setup,
.run = r2d_run,
+ .teardown = r2d_teardown,
};
static const struct blit_ops r3d_ops = {
.dst_buffer = r3d_dst_buffer,
.setup = r3d_setup,
.run = r3d_run,
+ .teardown = r3d_teardown,
};
/* passthrough set coords from 3D extents */
ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent);
}
+static VkFormat
+copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer)
+{
+ if (vk_format_is_compressed(format)) {
+ switch (vk_format_get_blocksize(format)) {
+ case 1: return VK_FORMAT_R8_UINT;
+ case 2: return VK_FORMAT_R16_UINT;
+ case 4: return VK_FORMAT_R32_UINT;
+ case 8: return VK_FORMAT_R32G32_UINT;
+ case 16:return VK_FORMAT_R32G32B32A32_UINT;
+ default:
+ unreachable("unhandled format size");
+ }
+ }
+
+ switch (format) {
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT)
+ return VK_FORMAT_R8G8_UNORM;
+ /* fallthrough */
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ return VK_FORMAT_R8_UNORM;
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT && copy_buffer)
+ return VK_FORMAT_R8_UNORM;
+ /* fallthrough */
+ default:
+ return format;
+ case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ return VK_FORMAT_R32_UINT;
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ return VK_FORMAT_S8_UINT;
+ assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
+ return VK_FORMAT_D32_SFLOAT;
+ }
+}
+
static void
-tu_image_view_blit2(struct tu_image_view *iview,
- struct tu_image *image,
- VkFormat format,
- const VkImageSubresourceLayers *subres,
- uint32_t layer,
- bool stencil_read)
+tu_image_view_copy_blit(struct tu_image_view *iview,
+ struct tu_image *image,
+ VkFormat format,
+ const VkImageSubresourceLayers *subres,
+ uint32_t layer,
+ bool stencil_read)
{
VkImageAspectFlags aspect_mask = subres->aspectMask;
.baseArrayLayer = subres->baseArrayLayer + layer,
.layerCount = 1,
},
- });
+ }, false);
+}
+
+static void
+tu_image_view_copy(struct tu_image_view *iview,
+ struct tu_image *image,
+ VkFormat format,
+ const VkImageSubresourceLayers *subres,
+ uint32_t layer,
+ bool stencil_read)
+{
+ format = copy_format(format, subres->aspectMask, false);
+ tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read);
}
static void
const VkImageSubresourceLayers *subres,
uint32_t layer)
{
- tu_image_view_blit2(iview, image, image->vk_format, subres, layer, false);
+ tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false);
}
static void
layers = info->dstSubresource.layerCount;
}
- uint8_t mask = 0xf;
- if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
- assert(info->srcSubresource.aspectMask == info->dstSubresource.aspectMask);
- if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
- mask = 0x7;
- if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
- mask = 0x8;
- }
-
/* BC1_RGB_* formats need to have their last components overriden with 1
* when sampling, which is normally handled with the texture descriptor
* swizzle. The 2d path can't handle that, so use the 3d path.
filter == VK_FILTER_CUBIC_EXT)
ops = &r3d_ops;
- /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests,
- * figure out why (should be able to pass all tests with only shader path)
+ /* use the right format in setup() for D32_S8
+ * TODO: this probably should use a helper
*/
+ VkFormat format = dst_image->vk_format;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
+ format = VK_FORMAT_D32_SFLOAT;
+ else if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ format = VK_FORMAT_S8_UINT;
+ else
+ unreachable("unexpected D32_S8 aspect mask in blit_image");
+ }
- ops->setup(cmd, cs, dst_image->vk_format, rotate[mirror_y][mirror_x], false, mask);
+ ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
+ rotate[mirror_y][mirror_x], false, dst_image->layout[0].ubwc);
if (ops == &r3d_ops) {
- r3d_coords_raw(cs, false, (float[]) {
+ r3d_coords_raw(cs, (float[]) {
info->dstOffsets[0].x, info->dstOffsets[0].y,
info->srcOffsets[0].x, info->srcOffsets[0].y,
info->dstOffsets[1].x, info->dstOffsets[1].y,
A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1,
.y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1));
tu_cs_emit_regs(cs,
- A6XX_GRAS_2D_SRC_TL_X(.x = MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)),
- A6XX_GRAS_2D_SRC_BR_X(.x = MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1),
- A6XX_GRAS_2D_SRC_TL_Y(.y = MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)),
- A6XX_GRAS_2D_SRC_BR_Y(.y = MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1));
+ A6XX_GRAS_2D_SRC_TL_X(MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)),
+ A6XX_GRAS_2D_SRC_BR_X(MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1),
+ A6XX_GRAS_2D_SRC_TL_Y(MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)),
+ A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1));
}
struct tu_image_view dst, src;
ops->src(cmd, cs, &src, i, filter);
ops->run(cmd, cs);
}
+
+ ops->teardown(cmd, cs);
}
void
tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
- for (uint32_t i = 0; i < regionCount; ++i)
+ for (uint32_t i = 0; i < regionCount; ++i) {
+ /* can't blit both depth and stencil at once with D32_S8
+ * TODO: more advanced 3D blit path to support it instead?
+ */
+ if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+ dst_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ VkImageBlit region = pRegions[i];
+ uint32_t b;
+ for_each_bit(b, pRegions[i].dstSubresource.aspectMask) {
+ region.srcSubresource.aspectMask = BIT(b);
+ region.dstSubresource.aspectMask = BIT(b);
+ tu6_blit_image(cmd, src_image, dst_image, ®ion, filter);
+ }
+ continue;
+ }
tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter);
-}
-
-static VkFormat
-copy_format(VkFormat format)
-{
- switch (vk_format_get_blocksize(format)) {
- case 1: return VK_FORMAT_R8_UINT;
- case 2: return VK_FORMAT_R16_UINT;
- case 4: return VK_FORMAT_R32_UINT;
- case 8: return VK_FORMAT_R32G32_UINT;
- case 12:return VK_FORMAT_R32G32B32_UINT;
- case 16:return VK_FORMAT_R32G32B32A32_UINT;
- default:
- unreachable("unhandled format size");
}
}
{
struct tu_cs *cs = &cmd->cs;
uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
- VkFormat dst_format = dst_image->vk_format;
- VkFormat src_format = dst_image->vk_format;
+ VkFormat src_format =
+ copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, true);
const struct blit_ops *ops = &r2d_ops;
- uint8_t mask = 0xf;
-
- if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
- switch (info->imageSubresource.aspectMask) {
- case VK_IMAGE_ASPECT_STENCIL_BIT:
- src_format = VK_FORMAT_R8_UNORM; /* changes how src buffer is interpreted */
- mask = 0x8;
- ops = &r3d_ops;
- break;
- case VK_IMAGE_ASPECT_DEPTH_BIT:
- mask = 0x7;
- break;
- }
+ /* special case for buffer to stencil */
+ if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
+ info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ ops = &r3d_ops;
}
+ /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format,
+ * which matters for UBWC. buffer_to_image/etc can fail because of this
+ */
+
VkOffset3D offset = info->imageOffset;
VkExtent3D extent = info->imageExtent;
uint32_t src_width = info->bufferRowLength ?: extent.width;
uint32_t src_height = info->bufferImageHeight ?: extent.height;
- if (dst_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 || vk_format_is_compressed(src_format)) {
- assert(src_format == dst_format);
- copy_compressed(dst_format, &offset, &extent, &src_width, &src_height);
- src_format = dst_format = copy_format(dst_format);
- }
+ copy_compressed(dst_image->vk_format, &offset, &extent, &src_width, &src_height);
uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
uint32_t layer_size = src_height * pitch;
- /* note: the src_va/pitch alignment of 64 is for 2D engine,
- * it is also valid for 1cpp format with shader path (stencil aspect path)
- */
-
- ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
+ ops->setup(cmd, cs,
+ copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false),
+ info->imageSubresource.aspectMask, ROTATE_0, false, dst_image->layout[0].ubwc);
struct tu_image_view dst;
- tu_image_view_blit2(&dst, dst_image, dst_format, &info->imageSubresource, offset.z, false);
+ tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false);
for (uint32_t i = 0; i < layers; i++) {
ops->dst(cs, &dst, i);
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
{
struct tu_cs *cs = &cmd->cs;
uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
- VkFormat src_format = src_image->vk_format;
- VkFormat dst_format = src_image->vk_format;
+ VkFormat dst_format =
+ copy_format(src_image->vk_format, info->imageSubresource.aspectMask, true);
bool stencil_read = false;
if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
- dst_format = VK_FORMAT_R8_UNORM;
stencil_read = true;
}
uint32_t dst_width = info->bufferRowLength ?: extent.width;
uint32_t dst_height = info->bufferImageHeight ?: extent.height;
- if (dst_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 || vk_format_is_compressed(dst_format)) {
- assert(src_format == dst_format);
- copy_compressed(dst_format, &offset, &extent, &dst_width, &dst_height);
- src_format = dst_format = copy_format(dst_format);
- }
+ copy_compressed(src_image->vk_format, &offset, &extent, &dst_width, &dst_height);
uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
uint32_t layer_size = pitch * dst_height;
- /* note: the dst_va/pitch alignment of 64 is for 2D engine,
- * it is also valid for 1cpp format with shader path (stencil aspect)
- */
-
- ops->setup(cmd, cs, dst_format, ROTATE_0, false, 0xf);
+ ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false);
struct tu_image_view src;
- tu_image_view_blit2(&src, src_image, src_format, &info->imageSubresource, offset.z, stencil_read);
+ tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read);
for (uint32_t i = 0; i < layers; i++) {
ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST);
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
static bool
image_is_r8g8(struct tu_image *image)
{
- return image->layout.cpp == 2 &&
+ return image->layout[0].cpp == 2 &&
vk_format_get_nr_components(image->vk_format) == 2;
}
const struct blit_ops *ops = &r2d_ops;
struct tu_cs *cs = &cmd->cs;
- uint8_t mask = 0xf;
- if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
- if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
- mask = 0x7;
- if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
- mask = 0x8;
- }
-
if (dst_image->samples > 1)
ops = &r3d_ops;
- assert(info->srcSubresource.aspectMask == info->dstSubresource.aspectMask);
-
VkFormat format = VK_FORMAT_UNDEFINED;
VkOffset3D src_offset = info->srcOffset;
VkOffset3D dst_offset = info->dstOffset;
copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL);
copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL);
- VkFormat dst_format = vk_format_is_compressed(dst_image->vk_format) ?
- copy_format(dst_image->vk_format) : dst_image->vk_format;
- VkFormat src_format = vk_format_is_compressed(src_image->vk_format) ?
- copy_format(src_image->vk_format) : src_image->vk_format;
+ VkFormat dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false);
+ VkFormat src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false);
bool use_staging_blit = false;
* the same as a blit.
*/
format = src_format;
- } else if (!src_image->layout.tile_mode) {
+ } else if (!src_image->layout[0].tile_mode) {
/* If an image is linear, we can always safely reinterpret it with the
* other image's format and then do a regular blit.
*/
format = dst_format;
- } else if (!dst_image->layout.tile_mode) {
+ } else if (!dst_image->layout[0].tile_mode) {
format = src_format;
} else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) {
/* We can't currently copy r8g8 images to/from other cpp=2 images,
* to/from it.
*/
use_staging_blit = true;
- } else if (!src_image->layout.ubwc) {
+ } else if (!src_image->layout[0].ubwc) {
format = dst_format;
- } else if (!dst_image->layout.ubwc) {
+ } else if (!dst_image->layout[0].ubwc) {
format = src_format;
} else {
/* Both formats use UBWC and so neither can be reinterpreted.
struct tu_image_view dst, src;
if (use_staging_blit) {
- tu_image_view_blit2(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false);
- tu_image_view_blit2(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false);
+ tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false);
+ tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false);
struct tu_image staging_image = {
.vk_format = src_format,
VkOffset3D staging_offset = { 0 };
- staging_image.layout.tile_mode = TILE6_LINEAR;
- staging_image.layout.ubwc = false;
+ staging_image.layout[0].tile_mode = TILE6_LINEAR;
+ staging_image.layout[0].ubwc = false;
- fdl6_layout(&staging_image.layout,
+ fdl6_layout(&staging_image.layout[0],
vk_format_to_pipe_format(staging_image.vk_format),
staging_image.samples,
staging_image.extent.width,
NULL);
VkResult result = tu_get_scratch_bo(cmd->device,
- staging_image.layout.size,
+ staging_image.layout[0].size,
&staging_image.bo);
if (result != VK_SUCCESS) {
cmd->record_result = result;
MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
struct tu_image_view staging;
- tu_image_view_blit2(&staging, &staging_image, src_format,
- &staging_subresource, 0, false);
+ tu_image_view_copy(&staging, &staging_image, src_format,
+ &staging_subresource, 0, false);
- ops->setup(cmd, cs, src_format, ROTATE_0, false, mask);
+ ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false);
coords(ops, cs, &staging_offset, &src_offset, &extent);
for (uint32_t i = 0; i < info->extent.depth; i++) {
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
- tu_image_view_blit2(&staging, &staging_image, dst_format,
- &staging_subresource, 0, false);
+ tu_image_view_copy(&staging, &staging_image, dst_format,
+ &staging_subresource, 0, false);
- ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
+ ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask,
+ ROTATE_0, false, dst_image->layout[0].ubwc);
coords(ops, cs, &dst_offset, &staging_offset, &extent);
for (uint32_t i = 0; i < info->extent.depth; i++) {
ops->run(cmd, cs);
}
} else {
- tu_image_view_blit2(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false);
- tu_image_view_blit2(&src, src_image, format, &info->srcSubresource, src_offset.z, false);
+ tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false);
+ tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false);
- ops->setup(cmd, cs, format, ROTATE_0, false, mask);
+ ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
+ ROTATE_0, false, dst_image->layout[0].ubwc);
coords(ops, cs, &dst_offset, &src_offset, &extent);
for (uint32_t i = 0; i < info->extent.depth; i++) {
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM;
uint64_t blocks = size / block_size;
- ops->setup(cmd, cs, format, ROTATE_0, false, 0xf);
+ ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false);
while (blocks) {
uint32_t src_x = (src_va & 63) / block_size;
dst_va += width * block_size;
blocks -= width;
}
+
+ ops->teardown(cmd, cs);
}
void
tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
- struct ts_cs_memory tmp;
+ struct tu_cs_memory tmp;
VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp);
if (result != VK_SUCCESS) {
cmd->record_result = result;
uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset;
uint32_t blocks = fillSize / 4;
- ops->setup(cmd, cs, VK_FORMAT_R32_UINT, ROTATE_0, true, 0xf);
+ ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true, false);
ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
while (blocks) {
dst_va += width * 4;
blocks -= width;
}
+
+ ops->teardown(cmd, cs);
}
void
tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
- ops->setup(cmd, cs, dst_image->vk_format, ROTATE_0, false, 0xf);
+ ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
+ ROTATE_0, false, dst_image->layout[0].ubwc);
for (uint32_t i = 0; i < regionCount; ++i) {
const VkImageResolve *info = &pRegions[i];
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
assert(src->image->vk_format == dst->image->vk_format);
- ops->setup(cmd, cs, dst->image->vk_format, ROTATE_0, false, 0xf);
+ ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
+ ROTATE_0, false, dst->ubwc_enabled);
ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
for (uint32_t i = 0; i < layers; i++) {
ops->dst(cs, dst, i);
ops->run(cmd, cs);
}
+
+ ops->teardown(cmd, cs);
}
static void
clear_image(struct tu_cmd_buffer *cmd,
struct tu_image *image,
const VkClearValue *clear_value,
- const VkImageSubresourceRange *range)
+ const VkImageSubresourceRange *range,
+ VkImageAspectFlags aspect_mask)
{
uint32_t level_count = tu_get_levelCount(image, range);
uint32_t layer_count = tu_get_layerCount(image, range);
struct tu_cs *cs = &cmd->cs;
VkFormat format = image->vk_format;
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- format = VK_FORMAT_R32_UINT;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ format = copy_format(format, aspect_mask, false);
if (image->type == VK_IMAGE_TYPE_3D) {
assert(layer_count == 1);
assert(range->baseArrayLayer == 0);
}
- uint8_t mask = 0xf;
- if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
- mask = 0;
- if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
- mask |= 0x7;
- if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
- mask |= 0x8;
- }
-
const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops;
- ops->setup(cmd, cs, format, ROTATE_0, true, mask);
- ops->clear_value(cs, image->vk_format, clear_value);
+ ops->setup(cmd, cs, format, aspect_mask, ROTATE_0, true, image->layout[0].ubwc);
+ if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value);
+ else
+ ops->clear_value(cs, format, clear_value);
for (unsigned j = 0; j < level_count; j++) {
if (image->type == VK_IMAGE_TYPE_3D)
});
struct tu_image_view dst;
- tu_image_view_blit2(&dst, image, format, &(VkImageSubresourceLayers) {
- .aspectMask = range->aspectMask,
+ tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
+ .aspectMask = aspect_mask,
.mipLevel = range->baseMipLevel + j,
.baseArrayLayer = range->baseArrayLayer,
.layerCount = 1,
ops->run(cmd, cs);
}
}
+
+ ops->teardown(cmd, cs);
}
void
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i);
+ clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT);
}
void
tu_bo_list_add(&cmd->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
- for (unsigned i = 0; i < rangeCount; i++)
- clear_image(cmd, image, (const VkClearValue*) pDepthStencil, pRanges + i);
-}
-
-static void
-tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer *cmd,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t rect_count,
- const VkClearRect *rects)
-{
- const struct tu_subpass *subpass = cmd->state.subpass;
- /* note: cannot use shader path here.. there is a special shader path
- * in tu_clear_sysmem_attachments()
- */
- const struct blit_ops *ops = &r2d_ops;
- struct tu_cs *cs = &cmd->draw_cs;
-
- for (uint32_t j = 0; j < attachment_count; j++) {
- /* The vulkan spec, section 17.2 "Clearing Images Inside a Render
- * Pass Instance" says that:
- *
- * Unlike other clear commands, vkCmdClearAttachments executes as
- * a drawing command, rather than a transfer command, with writes
- * performed by it executing in rasterization order. Clears to
- * color attachments are executed as color attachment writes, by
- * the VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage.
- * Clears to depth/stencil attachments are executed as depth
- * writes and writes by the
- * VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT and
- * VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT stages.
- *
- * However, the 2d path here is executed the same way as a
- * transfer command, using the CCU color cache exclusively with
- * a special depth-as-color format for depth clears. This means that
- * we can't rely on the normal pipeline barrier mechanism here, and
- * have to manually flush whenever using a different cache domain
- * from what the 3d path would've used. This happens when we clear
- * depth/stencil, since normally depth attachments use CCU depth, but
- * we clear it using a special depth-as-color format. Since the clear
- * potentially uses a different attachment state we also need to
- * invalidate color beforehand and flush it afterwards.
- */
-
- uint32_t a;
- if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- a = subpass->color_attachments[attachments[j].colorAttachment].attachment;
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- } else {
- a = subpass->depth_stencil_attachment.attachment;
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR);
- }
-
- if (a == VK_ATTACHMENT_UNUSED)
- continue;
-
- uint8_t mask = 0xf;
- if (cmd->state.pass->attachments[a].format == VK_FORMAT_D24_UNORM_S8_UINT) {
- if (!(attachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT))
- mask &= ~0x7;
- if (!(attachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT))
- mask &= ~0x8;
- }
-
- const struct tu_image_view *iview =
- cmd->state.framebuffer->attachments[a].attachment;
-
- ops->setup(cmd, cs, iview->image->vk_format, ROTATE_0, true, mask);
- ops->clear_value(cs, iview->image->vk_format, &attachments[j].clearValue);
+ for (unsigned i = 0; i < rangeCount; i++) {
+ const VkImageSubresourceRange *range = &pRanges[i];
- /* Wait for the flushes we triggered manually to complete */
- tu_cs_emit_wfi(cs);
-
- for (uint32_t i = 0; i < rect_count; i++) {
- ops->coords(cs, &rects[i].rect.offset, NULL, &rects[i].rect.extent);
- for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) {
- ops->dst(cs, iview, rects[i].baseArrayLayer + layer);
- ops->run(cmd, cs);
- }
- }
+ if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ /* can't clear both depth and stencil at once, split up the aspect mask */
+ uint32_t b;
+ for_each_bit(b, range->aspectMask)
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b));
+ continue;
+ }
- if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR);
- } else {
- /* sync color into depth */
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
- tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH);
- }
+ clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
}
}
max_samples = MAX2(max_samples, pass->attachments[a].samples);
}
- /* prefer to use 2D path for clears
- * 2D can't clear separate depth/stencil and msaa, needs known framebuffer
- */
- if (max_samples == 1 && cmd->state.framebuffer) {
- tu_clear_sysmem_attachments_2d(cmd, attachment_count, attachments, rect_count, rects);
- return;
- }
-
- /* This clear path behaves like a draw, needs the same flush as tu_draw */
- tu_emit_cache_flush_renderpass(cmd, cs);
-
/* disable all draw states so they don't interfere
- * TODO: use and re-use draw states for this path
+ * TODO: use and re-use draw states
* we have to disable draw states individually to preserve
* input attachment states, because a secondary command buffer
* won't be able to restore them
for (uint32_t i = 0; i < rect_count; i++) {
for (uint32_t layer = 0; layer < rects[i].layerCount; layer++) {
- r3d_coords_raw(cs, layered_clear, (float[]) {
+ r3d_coords_raw(cs, (float[]) {
rects[i].rect.offset.x, rects[i].rect.offset.y,
z_clear_val, uif(rects[i].baseArrayLayer + layer),
rects[i].rect.offset.x + rects[i].rect.extent.width,
rects[i].rect.offset.y + rects[i].rect.extent.height,
z_clear_val, 1.0f,
});
-
- if (layered_clear) {
- tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
- tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_POINTLIST) |
- CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
- CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) |
- CP_DRAW_INDX_OFFSET_0_GS_ENABLE);
- tu_cs_emit(cs, 1); /* instance count */
- tu_cs_emit(cs, 1); /* vertex count */
- } else {
- r3d_run(cmd, cs);
- }
+ r3d_run(cmd, cs);
}
}
}
-/**
- * Pack a VkClearValue into a 128-bit buffer. format is respected except
- * for the component order. The components are always packed in WZYX order,
- * because gmem is tiled and tiled formats always have WZYX swap
- */
static void
-pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4])
+pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_value[4])
{
- const struct util_format_description *desc = vk_format_description(format);
-
switch (format) {
- case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
- buf[0] = float3_to_r11g11b10f(val->color.float32);
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) |
+ val->depthStencil.stencil << 24;
return;
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
- buf[0] = float3_to_rgb9e5(val->color.float32);
+ case VK_FORMAT_D16_UNORM:
+ clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16);
+ return;
+ case VK_FORMAT_D32_SFLOAT:
+ clear_value[0] = fui(val->depthStencil.depth);
+ return;
+ case VK_FORMAT_S8_UINT:
+ clear_value[0] = val->depthStencil.stencil;
return;
default:
break;
}
- assert(desc && desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
-
- /* S8_UINT is special and has no depth */
- const int max_components =
- format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels;
-
- int buf_offset = 0;
- int bit_shift = 0;
- for (int comp = 0; comp < max_components; comp++) {
- const struct util_format_channel_description *ch =
- tu_get_format_channel_description(desc, comp);
- if (!ch) {
- assert((format == VK_FORMAT_S8_UINT && comp == 0) ||
- (format == VK_FORMAT_X8_D24_UNORM_PACK32 && comp == 1));
- continue;
- }
-
- union tu_clear_component_value v = tu_get_clear_component_value(
- val, comp, desc->colorspace);
-
- /* move to the next uint32_t when there is not enough space */
- assert(ch->size <= 32);
- if (bit_shift + ch->size > 32) {
- buf_offset++;
- bit_shift = 0;
- }
-
- if (bit_shift == 0)
- buf[buf_offset] = 0;
+ float tmp[4];
+ memcpy(tmp, val->color.float32, 4 * sizeof(float));
+ if (vk_format_is_srgb(format)) {
+ for (int i = 0; i < 4; i++)
+ tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
+ }
- buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift;
- bit_shift += ch->size;
+#define PACK_F(type) util_format_##type##_pack_rgba_float \
+ ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
+ switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
+ case 4:
+ PACK_F(r4g4b4a4_unorm);
+ break;
+ case 5:
+ if (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
+ PACK_F(r5g6b5_unorm);
+ else
+ PACK_F(r5g5b5a1_unorm);
+ break;
+ case 8:
+ if (vk_format_is_snorm(format))
+ PACK_F(r8g8b8a8_snorm);
+ else if (vk_format_is_unorm(format))
+ PACK_F(r8g8b8a8_unorm);
+ else
+ pack_int8(clear_value, val->color.uint32);
+ break;
+ case 10:
+ if (vk_format_is_int(format))
+ pack_int10_2(clear_value, val->color.uint32);
+ else
+ PACK_F(r10g10b10a2_unorm);
+ break;
+ case 11:
+ clear_value[0] = float3_to_r11g11b10f(val->color.float32);
+ break;
+ case 16:
+ if (vk_format_is_snorm(format))
+ PACK_F(r16g16b16a16_snorm);
+ else if (vk_format_is_unorm(format))
+ PACK_F(r16g16b16a16_unorm);
+ else if (vk_format_is_float(format))
+ PACK_F(r16g16b16a16_float);
+ else
+ pack_int16(clear_value, val->color.uint32);
+ break;
+ case 32:
+ memcpy(clear_value, val->color.float32, 4 * sizeof(float));
+ break;
+ default:
+ unreachable("unexpected channel size");
}
+#undef PACK_F
}
static void
-tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
- struct tu_cs *cs,
- uint32_t attachment,
- uint8_t component_mask,
- const VkClearValue *value)
+clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat format,
+ uint8_t clear_mask,
+ uint32_t gmem_offset,
+ const VkClearValue *value)
{
- VkFormat vk_format = cmd->state.pass->attachments[attachment].format;
- /* note: component_mask is 0x7 for depth and 0x8 for stencil
- * because D24S8 is cleared with AS_R8G8B8A8 format
- */
-
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
- tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format)));
+ tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format)));
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
- tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask));
+ tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
- tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
+ tu_cs_emit(cs, gmem_offset);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
tu_cs_emit(cs, 0);
uint32_t clear_vals[4] = {};
- pack_gmem_clear_value(value, vk_format, clear_vals);
+ pack_gmem_clear_value(value, format, clear_vals);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
tu_cs_emit_array(cs, clear_vals, 4);
tu6_emit_event_write(cmd, cs, BLIT);
}
+static void
+tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ uint32_t attachment,
+ VkImageAspectFlags mask,
+ const VkClearValue *value)
+{
+ const struct tu_render_pass_attachment *att =
+ &cmd->state.pass->attachments[attachment];
+
+ if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
+ if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
+ return;
+ }
+
+ clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
+}
+
static void
tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
uint32_t attachment_count,
if (a == VK_ATTACHMENT_UNUSED)
continue;
- unsigned clear_mask = 0xf;
- if (cmd->state.pass->attachments[a].format == VK_FORMAT_D24_UNORM_S8_UINT) {
- if (!(attachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT))
- clear_mask &= ~0x7;
- if (!(attachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT))
- clear_mask &= ~0x8;
- }
-
- tu_emit_clear_gmem_attachment(cmd, cs, a, clear_mask,
+ tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
&attachments[j].clearValue);
}
}
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
+ /* sysmem path behaves like a draw, note we don't have a way of using different
+ * flushes for sysmem/gmem, so this needs to be outside of the cond_exec
+ */
+ tu_emit_cache_flush_renderpass(cmd, cs);
+
+ /* vkCmdClearAttachments is supposed to respect the predicate if active.
+ * The easiest way to do this is to always use the 3d path, which always
+ * works even with GMEM because it's just a simple draw using the existing
+ * attachment state. However it seems that IGNORE_VISIBILITY draws must be
+ * skipped in the binning pass, since otherwise they produce binning data
+ * which isn't consumed and leads to the wrong binning data being read, so
+ * condition on GMEM | SYSMEM.
+ */
+ if (cmd->state.predication_active) {
+ tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM |
+ CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
+ tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
+ tu_cond_exec_end(cs);
+ return;
+ }
+
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
tu_cond_exec_end(cs);
tu_cond_exec_end(cs);
}
+static void
+clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ VkFormat format,
+ VkImageAspectFlags clear_mask,
+ const VkRenderPassBeginInfo *info,
+ uint32_t a,
+ bool separate_stencil)
+{
+ const struct tu_framebuffer *fb = cmd->state.framebuffer;
+ const struct tu_image_view *iview = fb->attachments[a].attachment;
+ const struct blit_ops *ops = &r2d_ops;
+ if (cmd->state.pass->attachments[a].samples > 1)
+ ops = &r3d_ops;
+
+ ops->setup(cmd, cs, format, clear_mask, ROTATE_0, true, iview->ubwc_enabled);
+ ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
+ ops->clear_value(cs, format, &info->pClearValues[a]);
+
+ for (uint32_t i = 0; i < fb->layers; i++) {
+ if (separate_stencil) {
+ if (ops == &r3d_ops)
+ r3d_dst_stencil(cs, iview, i);
+ else
+ r2d_dst_stencil(cs, iview, i);
+ } else {
+ ops->dst(cs, iview, i);
+ }
+ ops->run(cmd, cs);
+ }
+
+ ops->teardown(cmd, cs);
+}
+
void
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info)
{
- const struct tu_framebuffer *fb = cmd->state.framebuffer;
- const struct tu_image_view *iview = fb->attachments[a].attachment;
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
- uint8_t mask = 0;
- if (attachment->clear_mask == VK_IMAGE_ASPECT_COLOR_BIT)
- mask = 0xf;
- if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
- mask |= 0x7;
- if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
- mask |= 0x8;
-
- if (!mask)
+ if (!attachment->clear_mask)
return;
- const struct blit_ops *ops = &r2d_ops;
- if (attachment->samples > 1)
- ops = &r3d_ops;
-
- ops->setup(cmd, cs, attachment->format, ROTATE_0, true, mask);
- ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
- ops->clear_value(cs, attachment->format, &info->pClearValues[a]);
-
/* Wait for any flushes at the beginning of the renderpass to complete */
tu_cs_emit_wfi(cs);
- for (uint32_t i = 0; i < fb->layers; i++) {
- ops->dst(cs, iview, i);
- ops->run(cmd, cs);
+ if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
+ info, a, false);
+ }
+ if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
+ info, a, true);
+ }
+ } else {
+ clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask,
+ info, a, false);
}
/* The spec doesn't explicitly say, but presumably the initial renderpass
{
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
- unsigned clear_mask = 0;
-
- if (attachment->clear_mask == VK_IMAGE_ASPECT_COLOR_BIT)
- clear_mask = 0xf;
- if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
- clear_mask |= 0x7;
- if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
- clear_mask |= 0x8;
- if (!clear_mask)
+ if (!attachment->clear_mask)
return;
tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
- tu_emit_clear_gmem_attachment(cmd, cs, a, clear_mask,
+ tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask,
&info->pClearValues[a]);
}
struct tu_cs *cs,
const struct tu_image_view *iview,
const struct tu_render_pass_attachment *attachment,
- bool resolve)
+ bool resolve,
+ bool separate_stencil)
{
tu_cs_emit_regs(cs,
A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
.integer = vk_format_is_int(attachment->format)));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
- tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
- tu_cs_image_ref_2d(cs, iview, 0, false);
+ if (separate_stencil) {
+ tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
+ tu_cs_emit_qw(cs, iview->stencil_base_addr);
+ tu_cs_emit(cs, iview->stencil_PITCH);
- tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
- tu_cs_image_flag_ref(cs, iview, 0);
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
+ } else {
+ tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
+ tu_cs_image_ref_2d(cs, iview, 0, false);
- tu_cs_emit_regs(cs,
- A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
+ tu_cs_image_flag_ref(cs, iview, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
+ }
tu6_emit_event_write(cmd, cs, BLIT);
}
&cmd->state.pass->attachments[a];
if (attachment->load || force_load)
- tu_emit_blit(cmd, cs, iview, attachment, false);
+ tu_emit_blit(cmd, cs, iview, attachment, false, false);
+
+ if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
+ tu_emit_blit(cmd, cs, iview, attachment, false, true);
+}
+
+static void
+store_cp_blit(struct tu_cmd_buffer *cmd,
+ struct tu_cs *cs,
+ struct tu_image_view *iview,
+ uint32_t samples,
+ bool separate_stencil,
+ VkFormat format,
+ uint32_t gmem_offset,
+ uint32_t cpp)
+{
+ r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false,
+ iview->ubwc_enabled, true);
+ if (separate_stencil)
+ r2d_dst_stencil(cs, iview, 0);
+ else
+ r2d_dst(cs, iview, 0);
+
+ tu_cs_emit_regs(cs,
+ A6XX_SP_PS_2D_SRC_INFO(
+ .color_format = tu6_format_texture(format, TILE6_2).fmt,
+ .tile_mode = TILE6_2,
+ .srgb = vk_format_is_srgb(format),
+ .samples = tu_msaa_samples(samples),
+ .samples_average = !vk_format_is_int(format),
+ .unk20 = 1,
+ .unk22 = 1),
+ /* note: src size does not matter when not scaling */
+ A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
+ A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + gmem_offset),
+ A6XX_SP_PS_2D_SRC_HI(),
+ A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.framebuffer->tile0.width * cpp));
+
+ /* sync GMEM writes with CACHE. */
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
+
+ /* Wait for CACHE_INVALIDATE to land */
+ tu_cs_emit_wfi(cs);
+
+ tu_cs_emit_pkt7(cs, CP_BLIT, 1);
+ tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+ /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
+ * sysmem, and we generally assume that GMEM renderpasses leave their
+ * results in sysmem, so we need to flush manually here.
+ */
+ tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
}
void
uint32_t a,
uint32_t gmem_a)
{
- const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
- const VkRect2D *render_area = &tiling->render_area;
+ const VkRect2D *render_area = &cmd->state.render_area;
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
- if (!dst->store)
+ if (!dst->store && !dst->store_stencil)
return;
uint32_t x1 = render_area->offset.x;
/* use fast path when render area is aligned, except for unsupported resolve cases */
if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
- tu_emit_blit(cmd, cs, iview, src, true);
+ if (dst->store)
+ tu_emit_blit(cmd, cs, iview, src, true, false);
+ if (dst->store_stencil)
+ tu_emit_blit(cmd, cs, iview, src, true, true);
return;
}
return;
}
- r2d_setup_common(cmd, cs, dst->format, ROTATE_0, false, 0xf, true);
- r2d_dst(cs, iview, 0);
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
- tu_cs_emit_regs(cs,
- A6XX_SP_PS_2D_SRC_INFO(
- .color_format = tu6_format_texture(src->format, TILE6_2).fmt,
- .tile_mode = TILE6_2,
- .srgb = vk_format_is_srgb(src->format),
- .samples = tu_msaa_samples(src->samples),
- .samples_average = !vk_format_is_int(src->format),
- .unk20 = 1,
- .unk22 = 1),
- /* note: src size does not matter when not scaling */
- A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
- A6XX_SP_PS_2D_SRC_LO(cmd->device->physical_device->gmem_base + src->gmem_offset),
- A6XX_SP_PS_2D_SRC_HI(),
- A6XX_SP_PS_2D_SRC_PITCH(.pitch = tiling->tile0.extent.width * src->cpp));
+ VkFormat format = src->format;
+ if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+ format = VK_FORMAT_D32_SFLOAT;
- /* sync GMEM writes with CACHE. */
- tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
-
- /* Wait for CACHE_INVALIDATE to land */
- tu_cs_emit_wfi(cs);
-
- tu_cs_emit_pkt7(cs, CP_BLIT, 1);
- tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
- /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
- * sysmem, and we generally assume that GMEM renderpasses leave their
- * results in sysmem, so we need to flush manually here.
- */
- tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
+ if (dst->store) {
+ store_cp_blit(cmd, cs, iview, src->samples, false, format,
+ src->gmem_offset, src->cpp);
+ }
+ if (dst->store_stencil) {
+ store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
+ src->gmem_offset_stencil, src->samples);
+ }
}