2 * Copyright 2019-2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
6 * Jonathan Marek <jonathan@marek.ca>
9 #include "tu_private.h"
12 #include "vk_format.h"
14 #include "util/format_r11g11b10f.h"
15 #include "util/format_rgb9e5.h"
16 #include "util/format_srgb.h"
17 #include "util/u_half.h"
19 /* helper functions previously in tu_formats.c */
22 tu_pack_mask(int bits
)
25 return (1ull << bits
) - 1;
29 tu_pack_float32_for_unorm(float val
, int bits
)
31 const uint32_t max
= tu_pack_mask(bits
);
37 return _mesa_lroundevenf(val
* (float) max
);
41 tu_pack_float32_for_snorm(float val
, int bits
)
43 const int32_t max
= tu_pack_mask(bits
- 1);
50 tmp
= _mesa_lroundevenf(val
* (float) max
);
52 return tmp
& tu_pack_mask(bits
);
56 tu_pack_float32_for_uscaled(float val
, int bits
)
58 const uint32_t max
= tu_pack_mask(bits
);
61 else if (val
> (float) max
)
64 return (uint32_t) val
;
68 tu_pack_float32_for_sscaled(float val
, int bits
)
70 const int32_t max
= tu_pack_mask(bits
- 1);
71 const int32_t min
= -max
- 1;
73 if (val
< (float) min
)
75 else if (val
> (float) max
)
80 return tmp
& tu_pack_mask(bits
);
84 tu_pack_uint32_for_uint(uint32_t val
, int bits
)
86 return val
& tu_pack_mask(bits
);
90 tu_pack_int32_for_sint(int32_t val
, int bits
)
92 return val
& tu_pack_mask(bits
);
96 tu_pack_float32_for_sfloat(float val
, int bits
)
98 assert(bits
== 16 || bits
== 32);
99 return bits
== 16 ? util_float_to_half(val
) : fui(val
);
102 union tu_clear_component_value
{
109 tu_pack_clear_component_value(union tu_clear_component_value val
,
110 const struct util_format_channel_description
*ch
)
115 case UTIL_FORMAT_TYPE_UNSIGNED
:
116 /* normalized, scaled, or pure integer */
118 packed
= tu_pack_float32_for_unorm(val
.float32
, ch
->size
);
119 else if (ch
->pure_integer
)
120 packed
= tu_pack_uint32_for_uint(val
.uint32
, ch
->size
);
122 packed
= tu_pack_float32_for_uscaled(val
.float32
, ch
->size
);
124 case UTIL_FORMAT_TYPE_SIGNED
:
125 /* normalized, scaled, or pure integer */
127 packed
= tu_pack_float32_for_snorm(val
.float32
, ch
->size
);
128 else if (ch
->pure_integer
)
129 packed
= tu_pack_int32_for_sint(val
.int32
, ch
->size
);
131 packed
= tu_pack_float32_for_sscaled(val
.float32
, ch
->size
);
133 case UTIL_FORMAT_TYPE_FLOAT
:
134 packed
= tu_pack_float32_for_sfloat(val
.float32
, ch
->size
);
137 unreachable("unexpected channel type");
142 assert((packed
& tu_pack_mask(ch
->size
)) == packed
);
146 static const struct util_format_channel_description
*
147 tu_get_format_channel_description(const struct util_format_description
*desc
,
150 switch (desc
->swizzle
[comp
]) {
152 return &desc
->channel
[0];
154 return &desc
->channel
[1];
156 return &desc
->channel
[2];
158 return &desc
->channel
[3];
164 static union tu_clear_component_value
165 tu_get_clear_component_value(const VkClearValue
*val
, int comp
,
166 enum util_format_colorspace colorspace
)
170 union tu_clear_component_value tmp
;
171 switch (colorspace
) {
172 case UTIL_FORMAT_COLORSPACE_ZS
:
175 tmp
.float32
= val
->depthStencil
.depth
;
177 tmp
.uint32
= val
->depthStencil
.stencil
;
179 case UTIL_FORMAT_COLORSPACE_SRGB
:
181 tmp
.float32
= util_format_linear_to_srgb_float(val
->color
.float32
[comp
]);
186 tmp
.uint32
= val
->color
.uint32
[comp
];
193 /* r2d_ = BLIT_OP_SCALE operations */
195 static enum a6xx_2d_ifmt
196 format_to_ifmt(enum a6xx_format fmt
)
204 case FMT6_8_8_8_8_UNORM
:
205 case FMT6_8_8_8_X8_UNORM
:
206 case FMT6_8_8_8_8_SNORM
:
207 case FMT6_4_4_4_4_UNORM
:
208 case FMT6_5_5_5_1_UNORM
:
209 case FMT6_5_6_5_UNORM
:
210 case FMT6_Z24_UNORM_S8_UINT
:
211 case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
:
216 case FMT6_32_32_UINT
:
217 case FMT6_32_32_SINT
:
218 case FMT6_32_32_32_32_UINT
:
219 case FMT6_32_32_32_32_SINT
:
224 case FMT6_16_16_UINT
:
225 case FMT6_16_16_SINT
:
226 case FMT6_16_16_16_16_UINT
:
227 case FMT6_16_16_16_16_SINT
:
228 case FMT6_10_10_10_2_UINT
:
235 case FMT6_8_8_8_8_UINT
:
236 case FMT6_8_8_8_8_SINT
:
241 case FMT6_16_16_UNORM
:
242 case FMT6_16_16_SNORM
:
243 case FMT6_16_16_16_16_UNORM
:
244 case FMT6_16_16_16_16_SNORM
:
246 case FMT6_32_32_FLOAT
:
247 case FMT6_32_32_32_32_FLOAT
:
251 case FMT6_16_16_FLOAT
:
252 case FMT6_16_16_16_16_FLOAT
:
253 case FMT6_11_11_10_FLOAT
:
254 case FMT6_10_10_10_2_UNORM
:
255 case FMT6_10_10_10_2_UNORM_DEST
:
259 unreachable("bad format");
265 r2d_coords(struct tu_cs
*cs
,
266 const VkOffset2D
*dst
,
267 const VkOffset2D
*src
,
268 const VkExtent2D
*extent
)
271 A6XX_GRAS_2D_DST_TL(.x
= dst
->x
, .y
= dst
->y
),
272 A6XX_GRAS_2D_DST_BR(.x
= dst
->x
+ extent
->width
- 1, .y
= dst
->y
+ extent
->height
- 1));
278 A6XX_GRAS_2D_SRC_TL_X(.x
= src
->x
),
279 A6XX_GRAS_2D_SRC_BR_X(.x
= src
->x
+ extent
->width
- 1),
280 A6XX_GRAS_2D_SRC_TL_Y(.y
= src
->y
),
281 A6XX_GRAS_2D_SRC_BR_Y(.y
= src
->y
+ extent
->height
- 1));
285 r2d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
287 uint32_t clear_value
[4] = {};
290 case VK_FORMAT_X8_D24_UNORM_PACK32
:
291 case VK_FORMAT_D24_UNORM_S8_UINT
:
292 /* cleared as r8g8b8a8_unorm using special format */
293 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
294 clear_value
[1] = clear_value
[0] >> 8;
295 clear_value
[2] = clear_value
[0] >> 16;
296 clear_value
[3] = val
->depthStencil
.stencil
;
298 case VK_FORMAT_D16_UNORM
:
299 case VK_FORMAT_D32_SFLOAT
:
301 clear_value
[0] = fui(val
->depthStencil
.depth
);
303 case VK_FORMAT_S8_UINT
:
304 clear_value
[0] = val
->depthStencil
.stencil
;
306 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
307 /* cleared as UINT32 */
308 clear_value
[0] = float3_to_rgb9e5(val
->color
.float32
);
311 assert(!vk_format_is_depth_or_stencil(format
));
312 const struct util_format_description
*desc
= vk_format_description(format
);
313 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(tu6_base_format(format
));
315 assert(desc
&& (desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
||
316 format
== VK_FORMAT_B10G11R11_UFLOAT_PACK32
));
318 for (unsigned i
= 0; i
< desc
->nr_channels
; i
++) {
319 const struct util_format_channel_description
*ch
= &desc
->channel
[i
];
320 if (ifmt
== R2D_UNORM8
) {
321 float linear
= val
->color
.float32
[i
];
322 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
&& i
< 3)
323 linear
= util_format_linear_to_srgb_float(val
->color
.float32
[i
]);
325 if (ch
->type
== UTIL_FORMAT_TYPE_SIGNED
)
326 clear_value
[i
] = tu_pack_float32_for_snorm(linear
, 8);
328 clear_value
[i
] = tu_pack_float32_for_unorm(linear
, 8);
329 } else if (ifmt
== R2D_FLOAT16
) {
330 clear_value
[i
] = util_float_to_half(val
->color
.float32
[i
]);
332 assert(ifmt
== R2D_FLOAT32
|| ifmt
== R2D_INT32
||
333 ifmt
== R2D_INT16
|| ifmt
== R2D_INT8
);
334 clear_value
[i
] = val
->color
.uint32
[i
];
340 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
341 tu_cs_emit_array(cs
, clear_value
, 4);
345 r2d_src(struct tu_cmd_buffer
*cmd
,
347 const struct tu_image_view
*iview
,
351 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 5);
352 tu_cs_emit(cs
, iview
->SP_PS_2D_SRC_INFO
|
353 COND(linear_filter
, A6XX_SP_PS_2D_SRC_INFO_FILTER
));
354 tu_cs_emit(cs
, iview
->SP_PS_2D_SRC_SIZE
);
355 tu_cs_image_ref_2d(cs
, iview
, layer
, true);
357 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 3);
358 tu_cs_image_flag_ref(cs
, iview
, layer
);
362 r2d_src_buffer(struct tu_cmd_buffer
*cmd
,
365 uint64_t va
, uint32_t pitch
,
366 uint32_t width
, uint32_t height
)
368 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
371 A6XX_SP_PS_2D_SRC_INFO(
372 .color_format
= format
.fmt
,
373 .color_swap
= format
.swap
,
374 .srgb
= vk_format_is_srgb(vk_format
),
377 A6XX_SP_PS_2D_SRC_SIZE(.width
= width
, .height
= height
),
378 A6XX_SP_PS_2D_SRC_LO((uint32_t) va
),
379 A6XX_SP_PS_2D_SRC_HI(va
>> 32),
380 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= pitch
));
384 r2d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
386 assert(iview
->image
->samples
== 1);
388 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 4);
389 tu_cs_emit(cs
, iview
->RB_2D_DST_INFO
);
390 tu_cs_image_ref_2d(cs
, iview
, layer
, false);
392 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 3);
393 tu_cs_image_flag_ref(cs
, iview
, layer
);
397 r2d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
399 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
403 .color_format
= format
.fmt
,
404 .color_swap
= format
.swap
,
405 .srgb
= vk_format_is_srgb(vk_format
)),
406 A6XX_RB_2D_DST_LO((uint32_t) va
),
407 A6XX_RB_2D_DST_HI(va
>> 32),
408 A6XX_RB_2D_DST_SIZE(.pitch
= pitch
));
412 r2d_setup_common(struct tu_cmd_buffer
*cmd
,
415 enum a6xx_rotation rotation
,
420 enum a6xx_format format
= tu6_base_format(vk_format
);
421 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(format
);
422 uint32_t unknown_8c01
= 0;
424 if (format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
) {
425 /* preserve depth channels */
427 unknown_8c01
= 0x00084001;
428 /* preserve stencil channel */
430 unknown_8c01
= 0x08000041;
433 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8C01
, 1);
434 tu_cs_emit(cs
, unknown_8c01
);
436 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL(
439 .solid_color
= clear
,
440 .d24s8
= format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
&& !clear
,
441 .color_format
= format
,
443 .ifmt
= vk_format_is_srgb(vk_format
) ? R2D_UNORM8_SRGB
: ifmt
,
446 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
447 tu_cs_emit(cs
, blit_cntl
);
449 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
450 tu_cs_emit(cs
, blit_cntl
);
452 if (format
== FMT6_10_10_10_2_UNORM_DEST
)
453 format
= FMT6_16_16_16_16_FLOAT
;
455 tu_cs_emit_regs(cs
, A6XX_SP_2D_SRC_FORMAT(
456 .sint
= vk_format_is_sint(vk_format
),
457 .uint
= vk_format_is_uint(vk_format
),
458 .color_format
= format
,
459 .srgb
= vk_format_is_srgb(vk_format
),
464 r2d_setup(struct tu_cmd_buffer
*cmd
,
467 enum a6xx_rotation rotation
,
471 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
473 r2d_setup_common(cmd
, cs
, vk_format
, rotation
, clear
, mask
, false);
477 r2d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
479 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
480 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
483 /* r3d_ = shader path operations */
486 r3d_pipeline(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, bool blit
, uint32_t num_rts
)
488 struct ir3_shader dummy_shader
= {};
490 struct ir3_shader_variant vs
= {
491 .type
= MESA_SHADER_VERTEX
,
497 .slot
= SYSTEM_VALUE_VERTEX_ID
,
498 .regid
= regid(0, 3),
501 .outputs_count
= blit
? 2 : 1,
503 .slot
= VARYING_SLOT_POS
,
504 .regid
= regid(0, 0),
507 .slot
= VARYING_SLOT_VAR0
,
508 .regid
= regid(1, 0),
510 .shader
= &dummy_shader
,
513 struct ir3_shader_variant fs
= {
514 .type
= MESA_SHADER_FRAGMENT
,
515 .instrlen
= 1, /* max of 9 instructions with num_rts = 8 */
517 .info
.max_reg
= MAX2(num_rts
, 1) - 1,
518 .total_in
= blit
? 2 : 0,
519 .num_samp
= blit
? 1 : 0,
520 .inputs_count
= blit
? 2 : 0,
522 .slot
= VARYING_SLOT_VAR0
,
528 .slot
= SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL
,
529 .regid
= regid(0, 0),
532 .num_sampler_prefetch
= blit
? 1 : 0,
533 .sampler_prefetch
[0] = {
538 .shader
= &dummy_shader
,
541 static const instr_t vs_code
[] = {
542 /* r0.xyz = r0.w ? c1.xyz : c0.xyz
543 * r1.xy = r0.w ? c1.zw : c0.zw
547 .opc_cat
= 3, .opc
= OPC_SEL_B32
& 63, .repeat
= 2, .dst
= 0,
548 .c1
= {.src1_c
= 1, .src1
= 4}, .src1_r
= 1,
550 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 0},
553 .opc_cat
= 3, .opc
= OPC_SEL_B32
& 63, .repeat
= 1, .dst
= 4,
554 .c1
= {.src1_c
= 1, .src1
= 6}, .src1_r
= 1,
556 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 2},
558 { .cat1
= { .opc_cat
= 1, .src_type
= TYPE_F32
, .dst_type
= TYPE_F32
, .dst
= 3,
559 .src_im
= 1, .fim_val
= 1.0f
} },
560 { .cat0
= { .opc
= OPC_END
} },
562 #define FS_OFFSET (16 * sizeof(instr_t))
563 STATIC_ASSERT(sizeof(vs_code
) <= FS_OFFSET
);
566 struct ts_cs_memory shaders
= { };
567 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, 2, 16 * sizeof(instr_t
), &shaders
);
568 assert(result
== VK_SUCCESS
);
570 memcpy(shaders
.map
, vs_code
, sizeof(vs_code
));
572 instr_t
*fs_code
= (instr_t
*) ((uint8_t*) shaders
.map
+ FS_OFFSET
);
573 for (uint32_t i
= 0; i
< num_rts
; i
++) {
574 /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
575 *fs_code
++ = (instr_t
) { .cat1
= {
576 .opc_cat
= 1, .src_type
= TYPE_S32
, .dst_type
= TYPE_S32
,
577 .repeat
= 3, .dst
= i
* 4, .src_c
= 1, .src_r
= 1, .src
= i
* 4
581 /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
582 * blit path (its not clear what allows it to not have it)
585 *fs_code
++ = (instr_t
) { .cat2
= {
586 .opc_cat
= 2, .opc
= OPC_BARY_F
& 63, .ei
= 1, .full
= 1,
587 .dst
= regid(63, 0), .src1_im
= 1
590 *fs_code
++ = (instr_t
) { .cat0
= { .opc
= OPC_END
} };
591 /* note: assumed <= 16 instructions (MAX_RTS is 8) */
593 tu_cs_emit_regs(cs
, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
595 tu6_emit_xs_config(cs
, MESA_SHADER_VERTEX
, &vs
, shaders
.iova
);
596 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_CTRL
, NULL
, 0);
597 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_EVAL
, NULL
, 0);
598 tu6_emit_xs_config(cs
, MESA_SHADER_GEOMETRY
, NULL
, 0);
599 tu6_emit_xs_config(cs
, MESA_SHADER_FRAGMENT
, &fs
, shaders
.iova
+ FS_OFFSET
);
601 tu_cs_emit_regs(cs
, A6XX_PC_PRIMITIVE_CNTL_0());
602 tu_cs_emit_regs(cs
, A6XX_VFD_CONTROL_0());
604 tu6_emit_vpc(cs
, &vs
, NULL
, &fs
, NULL
);
606 /* REPL_MODE for varying with RECTLIST (2 vertices only) */
607 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
608 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0));
610 tu6_emit_fs_inputs(cs
, &fs
);
614 .persp_division_disable
= 1,
615 .vp_xform_disable
= 1,
616 .vp_clip_code_ignore
= 1,
618 A6XX_GRAS_UNKNOWN_8001(0));
619 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
622 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0(.x
= 0, .y
= 0),
623 A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0(.x
= 0x7fff, .y
= 0x7fff));
625 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x
= 0, .y
= 0),
626 A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x
= 0x7fff, .y
= 0x7fff));
630 r3d_coords_raw(struct tu_cs
*cs
, const float *coords
)
632 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_GEOM
, 3 + 8);
633 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
634 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
635 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
636 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER
) |
637 CP_LOAD_STATE6_0_NUM_UNIT(2));
638 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
639 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
640 tu_cs_emit_array(cs
, (const uint32_t *) coords
, 8);
644 r3d_coords(struct tu_cs
*cs
,
645 const VkOffset2D
*dst
,
646 const VkOffset2D
*src
,
647 const VkExtent2D
*extent
)
649 int32_t src_x1
= src
? src
->x
: 0;
650 int32_t src_y1
= src
? src
->y
: 0;
651 r3d_coords_raw(cs
, (float[]) {
654 dst
->x
+ extent
->width
, dst
->y
+ extent
->height
,
655 src_x1
+ extent
->width
, src_y1
+ extent
->height
,
660 r3d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
662 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4);
663 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
664 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
665 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
666 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
667 CP_LOAD_STATE6_0_NUM_UNIT(1));
668 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
669 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
671 case VK_FORMAT_X8_D24_UNORM_PACK32
:
672 case VK_FORMAT_D24_UNORM_S8_UINT
: {
673 /* cleared as r8g8b8a8_unorm using special format */
674 uint32_t tmp
= tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
675 tu_cs_emit(cs
, fui((tmp
& 0xff) / 255.0f
));
676 tu_cs_emit(cs
, fui((tmp
>> 8 & 0xff) / 255.0f
));
677 tu_cs_emit(cs
, fui((tmp
>> 16 & 0xff) / 255.0f
));
678 tu_cs_emit(cs
, fui((val
->depthStencil
.stencil
& 0xff) / 255.0f
));
680 case VK_FORMAT_D16_UNORM
:
681 case VK_FORMAT_D32_SFLOAT
:
682 tu_cs_emit(cs
, fui(val
->depthStencil
.depth
));
687 case VK_FORMAT_S8_UINT
:
688 tu_cs_emit(cs
, val
->depthStencil
.stencil
& 0xff);
694 /* as color formats use clear value as-is */
695 assert(!vk_format_is_depth_or_stencil(format
));
696 tu_cs_emit_array(cs
, val
->color
.uint32
, 4);
702 r3d_src_common(struct tu_cmd_buffer
*cmd
,
704 const uint32_t *tex_const
,
705 uint32_t offset_base
,
706 uint32_t offset_ubwc
,
709 struct ts_cs_memory texture
= { };
710 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
,
711 2, /* allocate space for a sampler too */
712 A6XX_TEX_CONST_DWORDS
, &texture
);
713 assert(result
== VK_SUCCESS
);
715 memcpy(texture
.map
, tex_const
, A6XX_TEX_CONST_DWORDS
* 4);
717 /* patch addresses for layer offset */
718 *(uint64_t*) (texture
.map
+ 4) += offset_base
;
719 uint64_t ubwc_addr
= (texture
.map
[7] | (uint64_t) texture
.map
[8] << 32) + offset_ubwc
;
720 texture
.map
[7] = ubwc_addr
;
721 texture
.map
[8] = ubwc_addr
>> 32;
723 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 0] =
724 A6XX_TEX_SAMP_0_XY_MAG(linear_filter
? A6XX_TEX_LINEAR
: A6XX_TEX_NEAREST
) |
725 A6XX_TEX_SAMP_0_XY_MIN(linear_filter
? A6XX_TEX_LINEAR
: A6XX_TEX_NEAREST
) |
726 A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE
) |
727 A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE
) |
728 A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE
) |
729 0x60000; /* XXX used by blob, doesn't seem necessary */
730 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 1] =
731 0x1 | /* XXX used by blob, doesn't seem necessary */
732 A6XX_TEX_SAMP_1_UNNORM_COORDS
|
733 A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR
;
734 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 2] = 0;
735 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 3] = 0;
737 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
738 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
739 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
740 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
741 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
742 CP_LOAD_STATE6_0_NUM_UNIT(1));
743 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
745 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_SAMP_LO
, 2);
746 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
748 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
749 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
750 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
751 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
752 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
753 CP_LOAD_STATE6_0_NUM_UNIT(1));
754 tu_cs_emit_qw(cs
, texture
.iova
);
756 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_CONST_LO
, 2);
757 tu_cs_emit_qw(cs
, texture
.iova
);
759 tu_cs_emit_regs(cs
, A6XX_SP_FS_TEX_COUNT(1));
763 r3d_src(struct tu_cmd_buffer
*cmd
,
765 const struct tu_image_view
*iview
,
769 r3d_src_common(cmd
, cs
, iview
->descriptor
,
770 iview
->layer_size
* layer
,
771 iview
->ubwc_layer_size
* layer
,
776 r3d_src_buffer(struct tu_cmd_buffer
*cmd
,
779 uint64_t va
, uint32_t pitch
,
780 uint32_t width
, uint32_t height
)
782 uint32_t desc
[A6XX_TEX_CONST_DWORDS
];
784 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
787 COND(vk_format_is_srgb(vk_format
), A6XX_TEX_CONST_0_SRGB
) |
788 A6XX_TEX_CONST_0_FMT(format
.fmt
) |
789 A6XX_TEX_CONST_0_SWAP(format
.swap
) |
790 A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X
) |
791 // XXX to swizzle into .w for stencil buffer_to_image
792 A6XX_TEX_CONST_0_SWIZ_Y(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Y
) |
793 A6XX_TEX_CONST_0_SWIZ_Z(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Z
) |
794 A6XX_TEX_CONST_0_SWIZ_W(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_W
);
795 desc
[1] = A6XX_TEX_CONST_1_WIDTH(width
) | A6XX_TEX_CONST_1_HEIGHT(height
);
797 A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(vk_format
)) |
798 A6XX_TEX_CONST_2_PITCH(pitch
) |
799 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D
);
803 for (uint32_t i
= 6; i
< A6XX_TEX_CONST_DWORDS
; i
++)
806 r3d_src_common(cmd
, cs
, desc
, 0, 0, false);
810 r3d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
812 tu6_emit_msaa(cs
, iview
->image
->samples
); /* TODO: move to setup */
814 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
815 tu_cs_emit(cs
, iview
->RB_MRT_BUF_INFO
);
816 tu_cs_image_ref(cs
, iview
, layer
);
819 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
820 tu_cs_image_flag_ref(cs
, iview
, layer
);
822 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL(.flag_mrts
= iview
->ubwc_enabled
));
826 r3d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
828 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
830 tu6_emit_msaa(cs
, 1); /* TODO: move to setup */
833 A6XX_RB_MRT_BUF_INFO(0, .color_format
= format
.fmt
, .color_swap
= format
.swap
),
834 A6XX_RB_MRT_PITCH(0, pitch
),
835 A6XX_RB_MRT_ARRAY_PITCH(0, 0),
836 A6XX_RB_MRT_BASE_LO(0, (uint32_t) va
),
837 A6XX_RB_MRT_BASE_HI(0, va
>> 32),
838 A6XX_RB_MRT_BASE_GMEM(0, 0));
840 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
844 r3d_setup(struct tu_cmd_buffer
*cmd
,
847 enum a6xx_rotation rotation
,
851 if (!cmd
->state
.pass
) {
852 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
853 tu6_emit_window_scissor(cs
, 0, 0, 0x7fff, 0x7fff);
856 tu_cs_emit_regs(cs
, A6XX_GRAS_BIN_CONTROL(.dword
= 0xc00000));
857 tu_cs_emit_regs(cs
, A6XX_RB_BIN_CONTROL(.dword
= 0xc00000));
859 r3d_pipeline(cmd
, cs
, !clear
, clear
? 1 : 0);
861 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
862 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
863 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
865 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
867 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
868 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(0));
871 A6XX_RB_FS_OUTPUT_CNTL0(),
872 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= 1));
874 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
875 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.sample_mask
= 0xffff));
876 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
878 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
879 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL());
880 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
881 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL());
882 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK());
883 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK());
884 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF());
886 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_COMPONENTS(.rt0
= 0xf));
887 tu_cs_emit_regs(cs
, A6XX_SP_FS_RENDER_COMPONENTS(.rt0
= 0xf));
889 tu_cs_emit_regs(cs
, A6XX_SP_FS_MRT_REG(0,
890 .color_format
= tu6_base_format(vk_format
),
891 .color_sint
= vk_format_is_sint(vk_format
),
892 .color_uint
= vk_format_is_uint(vk_format
)));
894 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(0, .component_enable
= mask
));
895 tu_cs_emit_regs(cs
, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
896 tu_cs_emit_regs(cs
, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
900 r3d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
902 tu_cs_emit_pkt7(cs
, CP_DRAW_INDX_OFFSET
, 3);
903 tu_cs_emit(cs
, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST
) |
904 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX
) |
905 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY
));
906 tu_cs_emit(cs
, 1); /* instance count */
907 tu_cs_emit(cs
, 2); /* vertex count */
910 /* blit ops - common interface for 2d/shader paths */
913 void (*coords
)(struct tu_cs
*cs
,
914 const VkOffset2D
*dst
,
915 const VkOffset2D
*src
,
916 const VkExtent2D
*extent
);
917 void (*clear_value
)(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
);
919 struct tu_cmd_buffer
*cmd
,
921 const struct tu_image_view
*iview
,
924 void (*src_buffer
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
,
926 uint64_t va
, uint32_t pitch
,
927 uint32_t width
, uint32_t height
);
928 void (*dst
)(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
);
929 void (*dst_buffer
)(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
);
930 void (*setup
)(struct tu_cmd_buffer
*cmd
,
933 enum a6xx_rotation rotation
,
936 void (*run
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
);
939 static const struct blit_ops r2d_ops
= {
940 .coords
= r2d_coords
,
941 .clear_value
= r2d_clear_value
,
943 .src_buffer
= r2d_src_buffer
,
945 .dst_buffer
= r2d_dst_buffer
,
950 static const struct blit_ops r3d_ops
= {
951 .coords
= r3d_coords
,
952 .clear_value
= r3d_clear_value
,
954 .src_buffer
= r3d_src_buffer
,
956 .dst_buffer
= r3d_dst_buffer
,
961 /* passthrough set coords from 3D extents */
963 coords(const struct blit_ops
*ops
,
965 const VkOffset3D
*dst
,
966 const VkOffset3D
*src
,
967 const VkExtent3D
*extent
)
969 ops
->coords(cs
, (const VkOffset2D
*) dst
, (const VkOffset2D
*) src
, (const VkExtent2D
*) extent
);
973 tu_image_view_blit2(struct tu_image_view
*iview
,
974 struct tu_image
*image
,
976 const VkImageSubresourceLayers
*subres
,
980 VkImageAspectFlags aspect_mask
= subres
->aspectMask
;
982 /* always use the AS_R8G8B8A8 format for these */
983 if (format
== VK_FORMAT_D24_UNORM_S8_UINT
||
984 format
== VK_FORMAT_X8_D24_UNORM_PACK32
) {
985 aspect_mask
= VK_IMAGE_ASPECT_COLOR_BIT
;
988 tu_image_view_init(iview
, &(VkImageViewCreateInfo
) {
989 .image
= tu_image_to_handle(image
),
990 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
992 /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
993 .components
.r
= stencil_read
? VK_COMPONENT_SWIZZLE_A
: VK_COMPONENT_SWIZZLE_R
,
994 .subresourceRange
= {
995 .aspectMask
= aspect_mask
,
996 .baseMipLevel
= subres
->mipLevel
,
998 .baseArrayLayer
= subres
->baseArrayLayer
+ layer
,
1005 tu_image_view_blit(struct tu_image_view
*iview
,
1006 struct tu_image
*image
,
1007 const VkImageSubresourceLayers
*subres
,
1010 tu_image_view_blit2(iview
, image
, image
->vk_format
, subres
, layer
, false);
1014 tu6_blit_image(struct tu_cmd_buffer
*cmd
,
1015 struct tu_image
*src_image
,
1016 struct tu_image
*dst_image
,
1017 const VkImageBlit
*info
,
1020 const struct blit_ops
*ops
= &r2d_ops
;
1021 struct tu_cs
*cs
= &cmd
->cs
;
1024 /* 2D blit can't do rotation mirroring from just coordinates */
1025 static const enum a6xx_rotation rotate
[2][2] = {
1026 {ROTATE_0
, ROTATE_HFLIP
},
1027 {ROTATE_VFLIP
, ROTATE_180
},
1030 bool mirror_x
= (info
->srcOffsets
[1].x
< info
->srcOffsets
[0].x
) !=
1031 (info
->dstOffsets
[1].x
< info
->dstOffsets
[0].x
);
1032 bool mirror_y
= (info
->srcOffsets
[1].y
< info
->srcOffsets
[0].y
) !=
1033 (info
->dstOffsets
[1].y
< info
->dstOffsets
[0].y
);
1034 bool mirror_z
= (info
->srcOffsets
[1].z
< info
->srcOffsets
[0].z
) !=
1035 (info
->dstOffsets
[1].z
< info
->dstOffsets
[0].z
);
1038 tu_finishme("blit z mirror\n");
1042 if (info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
!=
1043 info
->dstOffsets
[1].z
- info
->dstOffsets
[0].z
) {
1044 tu_finishme("blit z filter\n");
1048 layers
= info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
;
1049 if (info
->dstSubresource
.layerCount
> 1) {
1050 assert(layers
<= 1);
1051 layers
= info
->dstSubresource
.layerCount
;
1055 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1056 assert(info
->srcSubresource
.aspectMask
== info
->dstSubresource
.aspectMask
);
1057 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
1059 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
1063 /* BC1_RGB_* formats need to have their last components overriden with 1
1064 * when sampling, which is normally handled with the texture descriptor
1065 * swizzle. The 2d path can't handle that, so use the 3d path.
1067 * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with
1071 if (dst_image
->samples
> 1 ||
1072 src_image
->vk_format
== VK_FORMAT_BC1_RGB_UNORM_BLOCK
||
1073 src_image
->vk_format
== VK_FORMAT_BC1_RGB_SRGB_BLOCK
)
1076 /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests,
1077 * figure out why (should be able to pass all tests with only shader path)
1080 ops
->setup(cmd
, cs
, dst_image
->vk_format
, rotate
[mirror_y
][mirror_x
], false, mask
);
1082 if (ops
== &r3d_ops
) {
1083 r3d_coords_raw(cs
, (float[]) {
1084 info
->dstOffsets
[0].x
, info
->dstOffsets
[0].y
,
1085 info
->srcOffsets
[0].x
, info
->srcOffsets
[0].y
,
1086 info
->dstOffsets
[1].x
, info
->dstOffsets
[1].y
,
1087 info
->srcOffsets
[1].x
, info
->srcOffsets
[1].y
1091 A6XX_GRAS_2D_DST_TL(.x
= MIN2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
),
1092 .y
= MIN2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
)),
1093 A6XX_GRAS_2D_DST_BR(.x
= MAX2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
) - 1,
1094 .y
= MAX2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
) - 1));
1096 A6XX_GRAS_2D_SRC_TL_X(.x
= MIN2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
)),
1097 A6XX_GRAS_2D_SRC_BR_X(.x
= MAX2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
) - 1),
1098 A6XX_GRAS_2D_SRC_TL_Y(.y
= MIN2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
)),
1099 A6XX_GRAS_2D_SRC_BR_Y(.y
= MAX2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
) - 1));
1102 struct tu_image_view dst
, src
;
1103 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffsets
[0].z
);
1104 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffsets
[0].z
);
1106 for (uint32_t i
= 0; i
< layers
; i
++) {
1107 ops
->dst(cs
, &dst
, i
);
1108 ops
->src(cmd
, cs
, &src
, i
, filter
== VK_FILTER_LINEAR
);
1114 tu_CmdBlitImage(VkCommandBuffer commandBuffer
,
1116 VkImageLayout srcImageLayout
,
1118 VkImageLayout dstImageLayout
,
1119 uint32_t regionCount
,
1120 const VkImageBlit
*pRegions
,
1124 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1125 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1126 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1128 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1129 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1131 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1132 tu6_blit_image(cmd
, src_image
, dst_image
, pRegions
+ i
, filter
);
1136 copy_format(VkFormat format
)
1138 switch (vk_format_get_blocksizebits(format
)) {
1139 case 8: return VK_FORMAT_R8_UINT
;
1140 case 16: return VK_FORMAT_R16_UINT
;
1141 case 32: return VK_FORMAT_R32_UINT
;
1142 case 64: return VK_FORMAT_R32G32_UINT
;
1143 case 96: return VK_FORMAT_R32G32B32_UINT
;
1144 case 128:return VK_FORMAT_R32G32B32A32_UINT
;
1146 unreachable("unhandled format size");
1151 copy_compressed(VkFormat format
,
1157 if (!vk_format_is_compressed(format
))
1160 uint32_t block_width
= vk_format_get_blockwidth(format
);
1161 uint32_t block_height
= vk_format_get_blockheight(format
);
1163 offset
->x
/= block_width
;
1164 offset
->y
/= block_height
;
1167 extent
->width
= DIV_ROUND_UP(extent
->width
, block_width
);
1168 extent
->height
= DIV_ROUND_UP(extent
->height
, block_height
);
1171 *width
= DIV_ROUND_UP(*width
, block_width
);
1173 *height
= DIV_ROUND_UP(*height
, block_height
);
1177 tu_copy_buffer_to_image(struct tu_cmd_buffer
*cmd
,
1178 struct tu_buffer
*src_buffer
,
1179 struct tu_image
*dst_image
,
1180 const VkBufferImageCopy
*info
)
1182 struct tu_cs
*cs
= &cmd
->cs
;
1183 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1184 VkFormat dst_format
= dst_image
->vk_format
;
1185 VkFormat src_format
= dst_image
->vk_format
;
1186 const struct blit_ops
*ops
= &r2d_ops
;
1190 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1191 switch (info
->imageSubresource
.aspectMask
) {
1192 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1193 src_format
= VK_FORMAT_R8_UNORM
; /* changes how src buffer is interpreted */
1197 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1203 VkOffset3D offset
= info
->imageOffset
;
1204 VkExtent3D extent
= info
->imageExtent
;
1205 uint32_t src_width
= info
->bufferRowLength
?: extent
.width
;
1206 uint32_t src_height
= info
->bufferImageHeight
?: extent
.height
;
1208 if (dst_format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
|| vk_format_is_compressed(src_format
)) {
1209 assert(src_format
== dst_format
);
1210 copy_compressed(dst_format
, &offset
, &extent
, &src_width
, &src_height
);
1211 src_format
= dst_format
= copy_format(dst_format
);
1214 uint32_t pitch
= src_width
* vk_format_get_blocksize(src_format
);
1215 uint32_t layer_size
= src_height
* pitch
;
1217 /* note: the src_va/pitch alignment of 64 is for 2D engine,
1218 * it is also valid for 1cpp format with shader path (stencil aspect path)
1221 ops
->setup(cmd
, cs
, dst_format
, ROTATE_0
, false, mask
);
1223 struct tu_image_view dst
;
1224 tu_image_view_blit2(&dst
, dst_image
, dst_format
, &info
->imageSubresource
, offset
.z
, false);
1226 for (uint32_t i
= 0; i
< layers
; i
++) {
1227 ops
->dst(cs
, &dst
, i
);
1229 uint64_t src_va
= tu_buffer_iova(src_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1230 if ((src_va
& 63) || (pitch
& 63)) {
1231 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1232 uint32_t x
= (src_va
& 63) / vk_format_get_blocksize(src_format
);
1233 ops
->src_buffer(cmd
, cs
, src_format
, src_va
& ~63, pitch
,
1234 x
+ extent
.width
, 1);
1235 ops
->coords(cs
, &(VkOffset2D
){offset
.x
, offset
.y
+ y
}, &(VkOffset2D
){x
},
1236 &(VkExtent2D
) {extent
.width
, 1});
1241 ops
->src_buffer(cmd
, cs
, src_format
, src_va
, pitch
, extent
.width
, extent
.height
);
1242 coords(ops
, cs
, &offset
, &(VkOffset3D
){}, &extent
);
1249 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer
,
1252 VkImageLayout dstImageLayout
,
1253 uint32_t regionCount
,
1254 const VkBufferImageCopy
*pRegions
)
1256 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1257 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1258 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1260 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1261 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1263 for (unsigned i
= 0; i
< regionCount
; ++i
)
1264 tu_copy_buffer_to_image(cmd
, src_buffer
, dst_image
, pRegions
+ i
);
1268 tu_copy_image_to_buffer(struct tu_cmd_buffer
*cmd
,
1269 struct tu_image
*src_image
,
1270 struct tu_buffer
*dst_buffer
,
1271 const VkBufferImageCopy
*info
)
1273 struct tu_cs
*cs
= &cmd
->cs
;
1274 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1275 VkFormat src_format
= src_image
->vk_format
;
1276 VkFormat dst_format
= src_image
->vk_format
;
1277 bool stencil_read
= false;
1279 if (src_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1280 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1281 dst_format
= VK_FORMAT_R8_UNORM
;
1282 stencil_read
= true;
1285 const struct blit_ops
*ops
= stencil_read
? &r3d_ops
: &r2d_ops
;
1286 VkOffset3D offset
= info
->imageOffset
;
1287 VkExtent3D extent
= info
->imageExtent
;
1288 uint32_t dst_width
= info
->bufferRowLength
?: extent
.width
;
1289 uint32_t dst_height
= info
->bufferImageHeight
?: extent
.height
;
1291 if (dst_format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
|| vk_format_is_compressed(dst_format
)) {
1292 assert(src_format
== dst_format
);
1293 copy_compressed(dst_format
, &offset
, &extent
, &dst_width
, &dst_height
);
1294 src_format
= dst_format
= copy_format(dst_format
);
1297 uint32_t pitch
= dst_width
* vk_format_get_blocksize(dst_format
);
1298 uint32_t layer_size
= pitch
* dst_height
;
1300 /* note: the dst_va/pitch alignment of 64 is for 2D engine,
1301 * it is also valid for 1cpp format with shader path (stencil aspect)
1304 ops
->setup(cmd
, cs
, dst_format
, ROTATE_0
, false, 0xf);
1306 struct tu_image_view src
;
1307 tu_image_view_blit2(&src
, src_image
, src_format
, &info
->imageSubresource
, offset
.z
, stencil_read
);
1309 for (uint32_t i
= 0; i
< layers
; i
++) {
1310 ops
->src(cmd
, cs
, &src
, i
, false);
1312 uint64_t dst_va
= tu_buffer_iova(dst_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1313 if ((dst_va
& 63) || (pitch
& 63)) {
1314 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1315 uint32_t x
= (dst_va
& 63) / vk_format_get_blocksize(dst_format
);
1316 ops
->dst_buffer(cs
, dst_format
, dst_va
& ~63, 0);
1317 ops
->coords(cs
, &(VkOffset2D
) {x
}, &(VkOffset2D
){offset
.x
, offset
.y
+ y
},
1318 &(VkExtent2D
) {extent
.width
, 1});
1323 ops
->dst_buffer(cs
, dst_format
, dst_va
, pitch
);
1324 coords(ops
, cs
, &(VkOffset3D
) {0, 0}, &offset
, &extent
);
1331 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer
,
1333 VkImageLayout srcImageLayout
,
1335 uint32_t regionCount
,
1336 const VkBufferImageCopy
*pRegions
)
1338 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1339 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1340 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1342 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1343 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1345 for (unsigned i
= 0; i
< regionCount
; ++i
)
1346 tu_copy_image_to_buffer(cmd
, src_image
, dst_buffer
, pRegions
+ i
);
1349 /* Tiled formats don't support swapping, which means that we can't support
1350 * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
1351 * formats like B5G5R5A1 have a separate linear-only format when sampling.
1352 * Currently we fake support for tiled swapped formats and use the unswapped
1353 * format instead, but this means that reinterpreting copies to and from
1354 * swapped formats can't be performed correctly unless we can swizzle the
1355 * components by reinterpreting the other image as the "correct" swapped
1356 * format, i.e. only when the other image is linear.
1360 is_swapped_format(VkFormat format
)
1362 struct tu_native_format linear
= tu6_format_texture(format
, TILE6_LINEAR
);
1363 struct tu_native_format tiled
= tu6_format_texture(format
, TILE6_3
);
1364 return linear
.fmt
!= tiled
.fmt
|| linear
.swap
!= tiled
.swap
;
1367 /* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
1368 * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
1369 * versa). This should mirror the logic in fdl6_layout.
1372 image_is_r8g8(struct tu_image
*image
)
1374 return image
->layout
.cpp
== 2 &&
1375 vk_format_get_nr_components(image
->vk_format
) == 2;
1379 tu_copy_image_to_image(struct tu_cmd_buffer
*cmd
,
1380 struct tu_image
*src_image
,
1381 struct tu_image
*dst_image
,
1382 const VkImageCopy
*info
)
1384 const struct blit_ops
*ops
= &r2d_ops
;
1385 struct tu_cs
*cs
= &cmd
->cs
;
1388 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1389 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
1391 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
1395 if (dst_image
->samples
> 1)
1398 assert(info
->srcSubresource
.aspectMask
== info
->dstSubresource
.aspectMask
);
1400 VkFormat format
= VK_FORMAT_UNDEFINED
;
1401 VkOffset3D src_offset
= info
->srcOffset
;
1402 VkOffset3D dst_offset
= info
->dstOffset
;
1403 VkExtent3D extent
= info
->extent
;
1405 /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
1408 * When copying between compressed and uncompressed formats the extent
1409 * members represent the texel dimensions of the source image and not
1410 * the destination. When copying from a compressed image to an
1411 * uncompressed image the image texel dimensions written to the
1412 * uncompressed image will be source extent divided by the compressed
1413 * texel block dimensions. When copying from an uncompressed image to a
1414 * compressed image the image texel dimensions written to the compressed
1415 * image will be the source extent multiplied by the compressed texel
1418 * This means we only have to adjust the extent if the source image is
1421 copy_compressed(src_image
->vk_format
, &src_offset
, &extent
, NULL
, NULL
);
1422 copy_compressed(dst_image
->vk_format
, &dst_offset
, NULL
, NULL
, NULL
);
1424 VkFormat dst_format
= vk_format_is_compressed(dst_image
->vk_format
) ?
1425 copy_format(dst_image
->vk_format
) : dst_image
->vk_format
;
1426 VkFormat src_format
= vk_format_is_compressed(src_image
->vk_format
) ?
1427 copy_format(src_image
->vk_format
) : src_image
->vk_format
;
1429 bool use_staging_blit
= false;
1431 if (src_format
== dst_format
) {
1432 /* Images that share a format can always be copied directly because it's
1433 * the same as a blit.
1435 format
= src_format
;
1436 } else if (!src_image
->layout
.tile_mode
) {
1437 /* If an image is linear, we can always safely reinterpret it with the
1438 * other image's format and then do a regular blit.
1440 format
= dst_format
;
1441 } else if (!dst_image
->layout
.tile_mode
) {
1442 format
= src_format
;
1443 } else if (image_is_r8g8(src_image
) != image_is_r8g8(dst_image
)) {
1444 /* We can't currently copy r8g8 images to/from other cpp=2 images,
1445 * due to the different tile layout.
1447 use_staging_blit
= true;
1448 } else if (is_swapped_format(src_format
) ||
1449 is_swapped_format(dst_format
)) {
1450 /* If either format has a non-identity swap, then we can't copy
1453 use_staging_blit
= true;
1454 } else if (!src_image
->layout
.ubwc
) {
1455 format
= dst_format
;
1456 } else if (!dst_image
->layout
.ubwc
) {
1457 format
= src_format
;
1459 /* Both formats use UBWC and so neither can be reinterpreted.
1460 * TODO: We could do an in-place decompression of the dst instead.
1462 use_staging_blit
= true;
1465 struct tu_image_view dst
, src
;
1467 if (use_staging_blit
) {
1468 tu_image_view_blit2(&dst
, dst_image
, dst_format
, &info
->dstSubresource
, dst_offset
.z
, false);
1469 tu_image_view_blit2(&src
, src_image
, src_format
, &info
->srcSubresource
, src_offset
.z
, false);
1471 struct tu_image staging_image
= {
1472 .vk_format
= src_format
,
1473 .type
= src_image
->type
,
1474 .tiling
= VK_IMAGE_TILING_LINEAR
,
1477 .layer_count
= info
->srcSubresource
.layerCount
,
1478 .samples
= src_image
->samples
,
1482 VkImageSubresourceLayers staging_subresource
= {
1483 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
1485 .baseArrayLayer
= 0,
1486 .layerCount
= info
->srcSubresource
.layerCount
,
1489 VkOffset3D staging_offset
= { 0 };
1491 staging_image
.layout
.tile_mode
= TILE6_LINEAR
;
1492 staging_image
.layout
.ubwc
= false;
1494 fdl6_layout(&staging_image
.layout
,
1495 vk_format_to_pipe_format(staging_image
.vk_format
),
1496 staging_image
.samples
,
1497 staging_image
.extent
.width
,
1498 staging_image
.extent
.height
,
1499 staging_image
.extent
.depth
,
1500 staging_image
.level_count
,
1501 staging_image
.layer_count
,
1502 staging_image
.type
== VK_IMAGE_TYPE_3D
,
1505 VkResult result
= tu_get_scratch_bo(cmd
->device
,
1506 staging_image
.layout
.size
,
1508 if (result
!= VK_SUCCESS
) {
1509 cmd
->record_result
= result
;
1513 tu_bo_list_add(&cmd
->bo_list
, staging_image
.bo
,
1514 MSM_SUBMIT_BO_READ
| MSM_SUBMIT_BO_WRITE
);
1516 struct tu_image_view staging
;
1517 tu_image_view_blit2(&staging
, &staging_image
, src_format
,
1518 &staging_subresource
, 0, false);
1520 ops
->setup(cmd
, cs
, src_format
, ROTATE_0
, false, mask
);
1521 coords(ops
, cs
, &staging_offset
, &src_offset
, &extent
);
1523 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1524 ops
->src(cmd
, cs
, &src
, i
, false);
1525 ops
->dst(cs
, &staging
, i
);
1529 /* When executed by the user there has to be a pipeline barrier here,
1530 * but since we're doing it manually we'll have to flush ourselves.
1532 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1533 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
1535 tu_image_view_blit2(&staging
, &staging_image
, dst_format
,
1536 &staging_subresource
, 0, false);
1538 ops
->setup(cmd
, cs
, dst_format
, ROTATE_0
, false, mask
);
1539 coords(ops
, cs
, &dst_offset
, &staging_offset
, &extent
);
1541 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1542 ops
->src(cmd
, cs
, &staging
, i
, false);
1543 ops
->dst(cs
, &dst
, i
);
1547 tu_image_view_blit2(&dst
, dst_image
, format
, &info
->dstSubresource
, dst_offset
.z
, false);
1548 tu_image_view_blit2(&src
, src_image
, format
, &info
->srcSubresource
, src_offset
.z
, false);
1550 ops
->setup(cmd
, cs
, format
, ROTATE_0
, false, mask
);
1551 coords(ops
, cs
, &dst_offset
, &src_offset
, &extent
);
1553 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1554 ops
->src(cmd
, cs
, &src
, i
, false);
1555 ops
->dst(cs
, &dst
, i
);
1562 tu_CmdCopyImage(VkCommandBuffer commandBuffer
,
1564 VkImageLayout srcImageLayout
,
1566 VkImageLayout destImageLayout
,
1567 uint32_t regionCount
,
1568 const VkImageCopy
*pRegions
)
1570 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1571 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1572 TU_FROM_HANDLE(tu_image
, dst_image
, destImage
);
1574 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1575 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1577 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1578 tu_copy_image_to_image(cmd
, src_image
, dst_image
, pRegions
+ i
);
1582 copy_buffer(struct tu_cmd_buffer
*cmd
,
1586 uint32_t block_size
)
1588 const struct blit_ops
*ops
= &r2d_ops
;
1589 struct tu_cs
*cs
= &cmd
->cs
;
1590 VkFormat format
= block_size
== 4 ? VK_FORMAT_R32_UINT
: VK_FORMAT_R8_UNORM
;
1591 uint64_t blocks
= size
/ block_size
;
1593 ops
->setup(cmd
, cs
, format
, ROTATE_0
, false, 0xf);
1596 uint32_t src_x
= (src_va
& 63) / block_size
;
1597 uint32_t dst_x
= (dst_va
& 63) / block_size
;
1598 uint32_t width
= MIN2(MIN2(blocks
, 0x4000 - src_x
), 0x4000 - dst_x
);
1600 ops
->src_buffer(cmd
, cs
, format
, src_va
& ~63, 0, src_x
+ width
, 1);
1601 ops
->dst_buffer( cs
, format
, dst_va
& ~63, 0);
1602 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, &(VkOffset2D
) {src_x
}, &(VkExtent2D
) {width
, 1});
1605 src_va
+= width
* block_size
;
1606 dst_va
+= width
* block_size
;
1612 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer
,
1615 uint32_t regionCount
,
1616 const VkBufferCopy
*pRegions
)
1618 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1619 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1620 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1622 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1623 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1625 for (unsigned i
= 0; i
< regionCount
; ++i
) {
1627 tu_buffer_iova(dst_buffer
) + pRegions
[i
].dstOffset
,
1628 tu_buffer_iova(src_buffer
) + pRegions
[i
].srcOffset
,
1629 pRegions
[i
].size
, 1);
1634 tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer
,
1636 VkDeviceSize dstOffset
,
1637 VkDeviceSize dataSize
,
1640 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1641 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1643 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1645 struct ts_cs_memory tmp
;
1646 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, DIV_ROUND_UP(dataSize
, 64), 64, &tmp
);
1647 if (result
!= VK_SUCCESS
) {
1648 cmd
->record_result
= result
;
1652 memcpy(tmp
.map
, pData
, dataSize
);
1653 copy_buffer(cmd
, tu_buffer_iova(buffer
) + dstOffset
, tmp
.iova
, dataSize
, 4);
1657 tu_CmdFillBuffer(VkCommandBuffer commandBuffer
,
1659 VkDeviceSize dstOffset
,
1660 VkDeviceSize fillSize
,
1663 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1664 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1665 const struct blit_ops
*ops
= &r2d_ops
;
1666 struct tu_cs
*cs
= &cmd
->cs
;
1668 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1670 if (fillSize
== VK_WHOLE_SIZE
)
1671 fillSize
= buffer
->size
- dstOffset
;
1673 uint64_t dst_va
= tu_buffer_iova(buffer
) + dstOffset
;
1674 uint32_t blocks
= fillSize
/ 4;
1676 ops
->setup(cmd
, cs
, VK_FORMAT_R32_UINT
, ROTATE_0
, true, 0xf);
1677 ops
->clear_value(cs
, VK_FORMAT_R32_UINT
, &(VkClearValue
){.color
= {.uint32
[0] = data
}});
1680 uint32_t dst_x
= (dst_va
& 63) / 4;
1681 uint32_t width
= MIN2(blocks
, 0x4000 - dst_x
);
1683 ops
->dst_buffer(cs
, VK_FORMAT_R32_UINT
, dst_va
& ~63, 0);
1684 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, NULL
, &(VkExtent2D
) {width
, 1});
1687 dst_va
+= width
* 4;
1693 tu_CmdResolveImage(VkCommandBuffer commandBuffer
,
1695 VkImageLayout srcImageLayout
,
1697 VkImageLayout dstImageLayout
,
1698 uint32_t regionCount
,
1699 const VkImageResolve
*pRegions
)
1701 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1702 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1703 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1704 const struct blit_ops
*ops
= &r2d_ops
;
1705 struct tu_cs
*cs
= &cmd
->cs
;
1707 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1708 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1710 ops
->setup(cmd
, cs
, dst_image
->vk_format
, ROTATE_0
, false, 0xf);
1712 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1713 const VkImageResolve
*info
= &pRegions
[i
];
1714 uint32_t layers
= MAX2(info
->extent
.depth
, info
->dstSubresource
.layerCount
);
1716 assert(info
->srcSubresource
.layerCount
== info
->dstSubresource
.layerCount
);
1717 /* TODO: aspect masks possible ? */
1719 coords(ops
, cs
, &info
->dstOffset
, &info
->srcOffset
, &info
->extent
);
1721 struct tu_image_view dst
, src
;
1722 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffset
.z
);
1723 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffset
.z
);
1725 for (uint32_t i
= 0; i
< layers
; i
++) {
1726 ops
->src(cmd
, cs
, &src
, i
, false);
1727 ops
->dst(cs
, &dst
, i
);
1734 tu_resolve_sysmem(struct tu_cmd_buffer
*cmd
,
1736 struct tu_image_view
*src
,
1737 struct tu_image_view
*dst
,
1739 const VkRect2D
*rect
)
1741 const struct blit_ops
*ops
= &r2d_ops
;
1743 tu_bo_list_add(&cmd
->bo_list
, src
->image
->bo
, MSM_SUBMIT_BO_READ
);
1744 tu_bo_list_add(&cmd
->bo_list
, dst
->image
->bo
, MSM_SUBMIT_BO_WRITE
);
1746 assert(src
->image
->vk_format
== dst
->image
->vk_format
);
1748 ops
->setup(cmd
, cs
, dst
->image
->vk_format
, ROTATE_0
, false, 0xf);
1749 ops
->coords(cs
, &rect
->offset
, &rect
->offset
, &rect
->extent
);
1751 for (uint32_t i
= 0; i
< layers
; i
++) {
1752 ops
->src(cmd
, cs
, src
, i
, false);
1753 ops
->dst(cs
, dst
, i
);
1759 clear_image(struct tu_cmd_buffer
*cmd
,
1760 struct tu_image
*image
,
1761 const VkClearValue
*clear_value
,
1762 const VkImageSubresourceRange
*range
)
1764 uint32_t level_count
= tu_get_levelCount(image
, range
);
1765 uint32_t layer_count
= tu_get_layerCount(image
, range
);
1766 struct tu_cs
*cs
= &cmd
->cs
;
1767 VkFormat format
= image
->vk_format
;
1768 if (format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1769 format
= VK_FORMAT_R32_UINT
;
1771 if (image
->type
== VK_IMAGE_TYPE_3D
) {
1772 assert(layer_count
== 1);
1773 assert(range
->baseArrayLayer
== 0);
1777 if (image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1779 if (range
->aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
1781 if (range
->aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
1785 const struct blit_ops
*ops
= image
->samples
> 1 ? &r3d_ops
: &r2d_ops
;
1787 ops
->setup(cmd
, cs
, format
, ROTATE_0
, true, mask
);
1788 ops
->clear_value(cs
, image
->vk_format
, clear_value
);
1790 for (unsigned j
= 0; j
< level_count
; j
++) {
1791 if (image
->type
== VK_IMAGE_TYPE_3D
)
1792 layer_count
= u_minify(image
->extent
.depth
, range
->baseMipLevel
+ j
);
1794 ops
->coords(cs
, &(VkOffset2D
){}, NULL
, &(VkExtent2D
) {
1795 u_minify(image
->extent
.width
, range
->baseMipLevel
+ j
),
1796 u_minify(image
->extent
.height
, range
->baseMipLevel
+ j
)
1799 struct tu_image_view dst
;
1800 tu_image_view_blit2(&dst
, image
, format
, &(VkImageSubresourceLayers
) {
1801 .aspectMask
= range
->aspectMask
,
1802 .mipLevel
= range
->baseMipLevel
+ j
,
1803 .baseArrayLayer
= range
->baseArrayLayer
,
1807 for (uint32_t i
= 0; i
< layer_count
; i
++) {
1808 ops
->dst(cs
, &dst
, i
);
1815 tu_CmdClearColorImage(VkCommandBuffer commandBuffer
,
1817 VkImageLayout imageLayout
,
1818 const VkClearColorValue
*pColor
,
1819 uint32_t rangeCount
,
1820 const VkImageSubresourceRange
*pRanges
)
1822 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1823 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1825 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1827 for (unsigned i
= 0; i
< rangeCount
; i
++)
1828 clear_image(cmd
, image
, (const VkClearValue
*) pColor
, pRanges
+ i
);
1832 tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer
,
1834 VkImageLayout imageLayout
,
1835 const VkClearDepthStencilValue
*pDepthStencil
,
1836 uint32_t rangeCount
,
1837 const VkImageSubresourceRange
*pRanges
)
1839 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1840 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1842 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1844 for (unsigned i
= 0; i
< rangeCount
; i
++)
1845 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, pRanges
+ i
);
1849 tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer
*cmd
,
1850 uint32_t attachment_count
,
1851 const VkClearAttachment
*attachments
,
1852 uint32_t rect_count
,
1853 const VkClearRect
*rects
)
1855 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1856 /* note: cannot use shader path here.. there is a special shader path
1857 * in tu_clear_sysmem_attachments()
1859 const struct blit_ops
*ops
= &r2d_ops
;
1860 struct tu_cs
*cs
= &cmd
->draw_cs
;
1862 for (uint32_t j
= 0; j
< attachment_count
; j
++) {
1863 /* The vulkan spec, section 17.2 "Clearing Images Inside a Render
1864 * Pass Instance" says that:
1866 * Unlike other clear commands, vkCmdClearAttachments executes as
1867 * a drawing command, rather than a transfer command, with writes
1868 * performed by it executing in rasterization order. Clears to
1869 * color attachments are executed as color attachment writes, by
1870 * the VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage.
1871 * Clears to depth/stencil attachments are executed as depth
1872 * writes and writes by the
1873 * VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT and
1874 * VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT stages.
1876 * However, the 2d path here is executed the same way as a
1877 * transfer command, using the CCU color cache exclusively with
1878 * a special depth-as-color format for depth clears. This means that
1879 * we can't rely on the normal pipeline barrier mechanism here, and
1880 * have to manually flush whenever using a different cache domain
1881 * from what the 3d path would've used. This happens when we clear
1882 * depth/stencil, since normally depth attachments use CCU depth, but
1883 * we clear it using a special depth-as-color format. Since the clear
1884 * potentially uses a different attachment state we also need to
1885 * invalidate color beforehand and flush it afterwards.
1889 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1890 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
1891 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1893 a
= subpass
->depth_stencil_attachment
.attachment
;
1894 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
);
1895 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1896 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
1899 if (a
== VK_ATTACHMENT_UNUSED
)
1903 if (cmd
->state
.pass
->attachments
[a
].format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1904 if (!(attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
))
1906 if (!(attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
))
1910 const struct tu_image_view
*iview
=
1911 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
1913 ops
->setup(cmd
, cs
, iview
->image
->vk_format
, ROTATE_0
, true, mask
);
1914 ops
->clear_value(cs
, iview
->image
->vk_format
, &attachments
[j
].clearValue
);
1916 /* Wait for the flushes we triggered manually to complete */
1919 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1920 ops
->coords(cs
, &rects
[i
].rect
.offset
, NULL
, &rects
[i
].rect
.extent
);
1921 for (uint32_t layer
= 0; layer
< rects
[i
].layerCount
; layer
++) {
1922 ops
->dst(cs
, iview
, rects
[i
].baseArrayLayer
+ layer
);
1927 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1928 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1929 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
1931 /* sync color into depth */
1932 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1933 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
);
1939 tu_clear_sysmem_attachments(struct tu_cmd_buffer
*cmd
,
1940 uint32_t attachment_count
,
1941 const VkClearAttachment
*attachments
,
1942 uint32_t rect_count
,
1943 const VkClearRect
*rects
)
1945 /* the shader path here is special, it avoids changing MRT/etc state */
1946 const struct tu_render_pass
*pass
= cmd
->state
.pass
;
1947 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1948 const uint32_t mrt_count
= subpass
->color_count
;
1949 struct tu_cs
*cs
= &cmd
->draw_cs
;
1950 uint32_t clear_value
[MAX_RTS
][4];
1951 float z_clear_val
= 0.0f
;
1952 uint8_t s_clear_val
= 0;
1953 uint32_t clear_rts
= 0, clear_components
= 0, num_rts
= 0, b
;
1954 bool z_clear
= false;
1955 bool s_clear
= false;
1956 uint32_t max_samples
= 1;
1958 for (uint32_t i
= 0; i
< attachment_count
; i
++) {
1960 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1961 uint32_t c
= attachments
[i
].colorAttachment
;
1962 a
= subpass
->color_attachments
[c
].attachment
;
1963 if (a
== VK_ATTACHMENT_UNUSED
)
1966 clear_rts
|= 1 << c
;
1967 clear_components
|= 0xf << (c
* 4);
1968 memcpy(clear_value
[c
], &attachments
[i
].clearValue
, 4 * sizeof(uint32_t));
1970 a
= subpass
->depth_stencil_attachment
.attachment
;
1971 if (a
== VK_ATTACHMENT_UNUSED
)
1974 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
1976 z_clear_val
= attachments
[i
].clearValue
.depthStencil
.depth
;
1979 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
1981 s_clear_val
= attachments
[i
].clearValue
.depthStencil
.stencil
& 0xff;
1985 max_samples
= MAX2(max_samples
, pass
->attachments
[a
].samples
);
1988 /* prefer to use 2D path for clears
1989 * 2D can't clear separate depth/stencil and msaa, needs known framebuffer
1991 if (max_samples
== 1 && cmd
->state
.framebuffer
) {
1992 tu_clear_sysmem_attachments_2d(cmd
, attachment_count
, attachments
, rect_count
, rects
);
1996 /* TODO: this path doesn't take into account multilayer rendering */
1998 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
1999 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
2000 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
2002 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count
));
2004 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count
);
2005 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
2006 if (clear_rts
& (1 << i
))
2007 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts
++ * 4));
2012 r3d_pipeline(cmd
, cs
, false, num_rts
);
2015 A6XX_SP_FS_RENDER_COMPONENTS(.dword
= clear_components
));
2017 A6XX_RB_RENDER_COMPONENTS(.dword
= clear_components
));
2020 A6XX_RB_FS_OUTPUT_CNTL0(),
2021 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= mrt_count
));
2023 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
2024 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.independent_blend
= 1, .sample_mask
= 0xffff));
2025 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
2026 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
2027 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(i
,
2028 .component_enable
= COND(clear_rts
& (1 << i
), 0xf)));
2031 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
2032 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL(
2033 .z_enable
= z_clear
,
2034 .z_write_enable
= z_clear
,
2035 .zfunc
= FUNC_ALWAYS
));
2036 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
2037 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL(
2038 .stencil_enable
= s_clear
,
2039 .func
= FUNC_ALWAYS
,
2040 .zpass
= STENCIL_REPLACE
));
2041 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK(.mask
= 0xff));
2042 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK(.wrmask
= 0xff));
2043 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF(.ref
= s_clear_val
));
2045 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4 * num_rts
);
2046 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
2047 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
2048 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
2049 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
2050 CP_LOAD_STATE6_0_NUM_UNIT(num_rts
));
2051 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
2052 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
2053 for_each_bit(b
, clear_rts
)
2054 tu_cs_emit_array(cs
, clear_value
[b
], 4);
2056 for (uint32_t i
= 0; i
< rect_count
; i
++) {
2057 r3d_coords_raw(cs
, (float[]) {
2058 rects
[i
].rect
.offset
.x
, rects
[i
].rect
.offset
.y
,
2060 rects
[i
].rect
.offset
.x
+ rects
[i
].rect
.extent
.width
,
2061 rects
[i
].rect
.offset
.y
+ rects
[i
].rect
.extent
.height
,
2067 cmd
->state
.dirty
|= TU_CMD_DIRTY_PIPELINE
|
2068 TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK
|
2069 TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK
|
2070 TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE
|
2071 TU_CMD_DIRTY_DYNAMIC_VIEWPORT
|
2072 TU_CMD_DIRTY_DYNAMIC_SCISSOR
;
2076 * Pack a VkClearValue into a 128-bit buffer. format is respected except
2077 * for the component order. The components are always packed in WZYX order,
2078 * because gmem is tiled and tiled formats always have WZYX swap
2081 pack_gmem_clear_value(const VkClearValue
*val
, VkFormat format
, uint32_t buf
[4])
2083 const struct util_format_description
*desc
= vk_format_description(format
);
2086 case VK_FORMAT_B10G11R11_UFLOAT_PACK32
:
2087 buf
[0] = float3_to_r11g11b10f(val
->color
.float32
);
2089 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
2090 buf
[0] = float3_to_rgb9e5(val
->color
.float32
);
2096 assert(desc
&& desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
2098 /* S8_UINT is special and has no depth */
2099 const int max_components
=
2100 format
== VK_FORMAT_S8_UINT
? 2 : desc
->nr_channels
;
2104 for (int comp
= 0; comp
< max_components
; comp
++) {
2105 const struct util_format_channel_description
*ch
=
2106 tu_get_format_channel_description(desc
, comp
);
2108 assert((format
== VK_FORMAT_S8_UINT
&& comp
== 0) ||
2109 (format
== VK_FORMAT_X8_D24_UNORM_PACK32
&& comp
== 1));
2113 union tu_clear_component_value v
= tu_get_clear_component_value(
2114 val
, comp
, desc
->colorspace
);
2116 /* move to the next uint32_t when there is not enough space */
2117 assert(ch
->size
<= 32);
2118 if (bit_shift
+ ch
->size
> 32) {
2124 buf
[buf_offset
] = 0;
2126 buf
[buf_offset
] |= tu_pack_clear_component_value(v
, ch
) << bit_shift
;
2127 bit_shift
+= ch
->size
;
2132 tu_emit_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2134 uint32_t attachment
,
2135 uint8_t component_mask
,
2136 const VkClearValue
*value
)
2138 VkFormat vk_format
= cmd
->state
.pass
->attachments
[attachment
].format
;
2139 /* note: component_mask is 0x7 for depth and 0x8 for stencil
2140 * because D24S8 is cleared with AS_R8G8B8A8 format
2143 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 1);
2144 tu_cs_emit(cs
, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format
)));
2146 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_INFO
, 1);
2147 tu_cs_emit(cs
, A6XX_RB_BLIT_INFO_GMEM
| A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask
));
2149 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_BASE_GMEM
, 1);
2150 tu_cs_emit(cs
, cmd
->state
.pass
->attachments
[attachment
].gmem_offset
);
2152 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_88D0
, 1);
2155 uint32_t clear_vals
[4] = {};
2156 pack_gmem_clear_value(value
, vk_format
, clear_vals
);
2158 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0
, 4);
2159 tu_cs_emit_array(cs
, clear_vals
, 4);
2161 tu6_emit_event_write(cmd
, cs
, BLIT
);
2165 tu_clear_gmem_attachments(struct tu_cmd_buffer
*cmd
,
2166 uint32_t attachment_count
,
2167 const VkClearAttachment
*attachments
,
2168 uint32_t rect_count
,
2169 const VkClearRect
*rects
)
2171 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
2172 struct tu_cs
*cs
= &cmd
->draw_cs
;
2174 /* TODO: swap the loops for smaller cmdstream */
2175 for (unsigned i
= 0; i
< rect_count
; i
++) {
2176 unsigned x1
= rects
[i
].rect
.offset
.x
;
2177 unsigned y1
= rects
[i
].rect
.offset
.y
;
2178 unsigned x2
= x1
+ rects
[i
].rect
.extent
.width
- 1;
2179 unsigned y2
= y1
+ rects
[i
].rect
.extent
.height
- 1;
2181 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_SCISSOR_TL
, 2);
2182 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_TL_X(x1
) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1
));
2183 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_BR_X(x2
) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2
));
2185 for (unsigned j
= 0; j
< attachment_count
; j
++) {
2187 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
)
2188 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
2190 a
= subpass
->depth_stencil_attachment
.attachment
;
2192 if (a
== VK_ATTACHMENT_UNUSED
)
2195 unsigned clear_mask
= 0xf;
2196 if (cmd
->state
.pass
->attachments
[a
].format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
2197 if (!(attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
))
2199 if (!(attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
))
2203 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, clear_mask
,
2204 &attachments
[j
].clearValue
);
2210 tu_CmdClearAttachments(VkCommandBuffer commandBuffer
,
2211 uint32_t attachmentCount
,
2212 const VkClearAttachment
*pAttachments
,
2214 const VkClearRect
*pRects
)
2216 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
2217 struct tu_cs
*cs
= &cmd
->draw_cs
;
2219 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
);
2220 tu_clear_gmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2221 tu_cond_exec_end(cs
);
2223 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2224 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2225 tu_cond_exec_end(cs
);
2229 tu_clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2232 const VkRenderPassBeginInfo
*info
)
2234 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2235 const struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2236 const struct tu_render_pass_attachment
*attachment
=
2237 &cmd
->state
.pass
->attachments
[a
];
2240 if (attachment
->clear_mask
== VK_IMAGE_ASPECT_COLOR_BIT
)
2242 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
2244 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
2250 const struct blit_ops
*ops
= &r2d_ops
;
2251 if (attachment
->samples
> 1)
2254 ops
->setup(cmd
, cs
, attachment
->format
, ROTATE_0
, true, mask
);
2255 ops
->coords(cs
, &info
->renderArea
.offset
, NULL
, &info
->renderArea
.extent
);
2256 ops
->clear_value(cs
, attachment
->format
, &info
->pClearValues
[a
]);
2258 /* Wait for any flushes at the beginning of the renderpass to complete */
2261 for (uint32_t i
= 0; i
< fb
->layers
; i
++) {
2262 ops
->dst(cs
, iview
, i
);
2266 /* The spec doesn't explicitly say, but presumably the initial renderpass
2267 * clear is considered part of the renderpass, and therefore barriers
2268 * aren't required inside the subpass/renderpass. Therefore we need to
2269 * flush CCU color into CCU depth here, just like with
2270 * vkCmdClearAttachments(). Note that because this only happens at the
2271 * beginning of a renderpass, and renderpass writes are considered
2272 * "incoherent", we shouldn't have to worry about syncing depth into color
2273 * beforehand as depth should already be flushed.
2275 if (vk_format_is_depth_or_stencil(attachment
->format
)) {
2276 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2277 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
);
2279 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2280 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
2285 tu_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2288 const VkRenderPassBeginInfo
*info
)
2290 const struct tu_render_pass_attachment
*attachment
=
2291 &cmd
->state
.pass
->attachments
[a
];
2292 unsigned clear_mask
= 0;
2294 if (attachment
->clear_mask
== VK_IMAGE_ASPECT_COLOR_BIT
)
2296 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
2298 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
2304 tu_cs_emit_regs(cs
, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2306 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, clear_mask
,
2307 &info
->pClearValues
[a
]);
2311 tu_emit_blit(struct tu_cmd_buffer
*cmd
,
2313 const struct tu_image_view
*iview
,
2314 const struct tu_render_pass_attachment
*attachment
,
2318 A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2320 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(
2323 /* "integer" bit disables msaa resolve averaging */
2324 .integer
= vk_format_is_int(attachment
->format
)));
2326 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 4);
2327 tu_cs_emit(cs
, iview
->RB_BLIT_DST_INFO
);
2328 tu_cs_image_ref_2d(cs
, iview
, 0, false);
2330 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_FLAG_DST_LO
, 3);
2331 tu_cs_image_flag_ref(cs
, iview
, 0);
2334 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset
));
2336 tu6_emit_event_write(cmd
, cs
, BLIT
);
2340 blit_can_resolve(VkFormat format
)
2342 const struct util_format_description
*desc
= vk_format_description(format
);
2344 /* blit event can only do resolve for simple cases:
2345 * averaging samples as unsigned integers or choosing only one sample
2347 if (vk_format_is_snorm(format
) || vk_format_is_srgb(format
))
2350 /* can't do formats with larger channel sizes
2351 * note: this includes all float formats
2352 * note2: single channel integer formats seem OK
2354 if (desc
->channel
[0].size
> 10)
2358 /* for unknown reasons blit event can't msaa resolve these formats when tiled
2359 * likely related to these formats having different layout from other cpp=2 formats
2361 case VK_FORMAT_R8G8_UNORM
:
2362 case VK_FORMAT_R8G8_UINT
:
2363 case VK_FORMAT_R8G8_SINT
:
2364 /* TODO: this one should be able to work? */
2365 case VK_FORMAT_D24_UNORM_S8_UINT
:
2375 tu_load_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2380 const struct tu_image_view
*iview
=
2381 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2382 const struct tu_render_pass_attachment
*attachment
=
2383 &cmd
->state
.pass
->attachments
[a
];
2385 if (attachment
->load
|| force_load
)
2386 tu_emit_blit(cmd
, cs
, iview
, attachment
, false);
2390 tu_store_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2395 const struct tu_tiling_config
*tiling
= &cmd
->state
.tiling_config
;
2396 const VkRect2D
*render_area
= &tiling
->render_area
;
2397 struct tu_render_pass_attachment
*dst
= &cmd
->state
.pass
->attachments
[a
];
2398 struct tu_image_view
*iview
= cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2399 struct tu_render_pass_attachment
*src
= &cmd
->state
.pass
->attachments
[gmem_a
];
2404 uint32_t x1
= render_area
->offset
.x
;
2405 uint32_t y1
= render_area
->offset
.y
;
2406 uint32_t x2
= x1
+ render_area
->extent
.width
;
2407 uint32_t y2
= y1
+ render_area
->extent
.height
;
2408 /* x2/y2 can be unaligned if equal to the size of the image,
2409 * since it will write into padding space
2410 * the one exception is linear levels which don't have the
2411 * required y padding in the layout (except for the last level)
2413 bool need_y2_align
=
2414 y2
!= iview
->extent
.height
|| iview
->need_y2_align
;
2417 x1
% GMEM_ALIGN_W
|| (x2
% GMEM_ALIGN_W
&& x2
!= iview
->extent
.width
) ||
2418 y1
% GMEM_ALIGN_H
|| (y2
% GMEM_ALIGN_H
&& need_y2_align
);
2420 /* use fast path when render area is aligned, except for unsupported resolve cases */
2421 if (!unaligned
&& (a
== gmem_a
|| blit_can_resolve(dst
->format
))) {
2422 tu_emit_blit(cmd
, cs
, iview
, src
, true);
2426 if (dst
->samples
> 1) {
2427 /* I guess we need to use shader path in this case?
2428 * need a testcase which fails because of this
2430 tu_finishme("unaligned store of msaa attachment\n");
2434 r2d_setup_common(cmd
, cs
, dst
->format
, ROTATE_0
, false, 0xf, true);
2435 r2d_dst(cs
, iview
, 0);
2436 r2d_coords(cs
, &render_area
->offset
, &render_area
->offset
, &render_area
->extent
);
2439 A6XX_SP_PS_2D_SRC_INFO(
2440 .color_format
= tu6_format_texture(src
->format
, TILE6_2
).fmt
,
2441 .tile_mode
= TILE6_2
,
2442 .srgb
= vk_format_is_srgb(src
->format
),
2443 .samples
= tu_msaa_samples(src
->samples
),
2444 .samples_average
= !vk_format_is_int(src
->format
),
2447 /* note: src size does not matter when not scaling */
2448 A6XX_SP_PS_2D_SRC_SIZE( .width
= 0x3fff, .height
= 0x3fff),
2449 A6XX_SP_PS_2D_SRC_LO(cmd
->device
->physical_device
->gmem_base
+ src
->gmem_offset
),
2450 A6XX_SP_PS_2D_SRC_HI(),
2451 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= tiling
->tile0
.extent
.width
* src
->cpp
));
2453 /* sync GMEM writes with CACHE. */
2454 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
2456 /* Wait for CACHE_INVALIDATE to land */
2459 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
2460 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
2462 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
2463 * sysmem, and we generally assume that GMEM renderpasses leave their
2464 * results in sysmem, so we need to flush manually here.
2466 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);