2 * Copyright 2019-2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
6 * Jonathan Marek <jonathan@marek.ca>
9 #include "tu_private.h"
12 #include "vk_format.h"
14 #include "util/format_r11g11b10f.h"
15 #include "util/format_rgb9e5.h"
16 #include "util/format_srgb.h"
17 #include "util/u_half.h"
19 /* helper functions previously in tu_formats.c */
22 tu_pack_mask(int bits
)
25 return (1ull << bits
) - 1;
29 tu_pack_float32_for_unorm(float val
, int bits
)
31 const uint32_t max
= tu_pack_mask(bits
);
37 return _mesa_lroundevenf(val
* (float) max
);
41 tu_pack_float32_for_snorm(float val
, int bits
)
43 const int32_t max
= tu_pack_mask(bits
- 1);
50 tmp
= _mesa_lroundevenf(val
* (float) max
);
52 return tmp
& tu_pack_mask(bits
);
56 tu_pack_float32_for_uscaled(float val
, int bits
)
58 const uint32_t max
= tu_pack_mask(bits
);
61 else if (val
> (float) max
)
64 return (uint32_t) val
;
68 tu_pack_float32_for_sscaled(float val
, int bits
)
70 const int32_t max
= tu_pack_mask(bits
- 1);
71 const int32_t min
= -max
- 1;
73 if (val
< (float) min
)
75 else if (val
> (float) max
)
80 return tmp
& tu_pack_mask(bits
);
84 tu_pack_uint32_for_uint(uint32_t val
, int bits
)
86 return val
& tu_pack_mask(bits
);
90 tu_pack_int32_for_sint(int32_t val
, int bits
)
92 return val
& tu_pack_mask(bits
);
96 tu_pack_float32_for_sfloat(float val
, int bits
)
98 assert(bits
== 16 || bits
== 32);
99 return bits
== 16 ? util_float_to_half(val
) : fui(val
);
102 union tu_clear_component_value
{
109 tu_pack_clear_component_value(union tu_clear_component_value val
,
110 const struct util_format_channel_description
*ch
)
115 case UTIL_FORMAT_TYPE_UNSIGNED
:
116 /* normalized, scaled, or pure integer */
118 packed
= tu_pack_float32_for_unorm(val
.float32
, ch
->size
);
119 else if (ch
->pure_integer
)
120 packed
= tu_pack_uint32_for_uint(val
.uint32
, ch
->size
);
122 packed
= tu_pack_float32_for_uscaled(val
.float32
, ch
->size
);
124 case UTIL_FORMAT_TYPE_SIGNED
:
125 /* normalized, scaled, or pure integer */
127 packed
= tu_pack_float32_for_snorm(val
.float32
, ch
->size
);
128 else if (ch
->pure_integer
)
129 packed
= tu_pack_int32_for_sint(val
.int32
, ch
->size
);
131 packed
= tu_pack_float32_for_sscaled(val
.float32
, ch
->size
);
133 case UTIL_FORMAT_TYPE_FLOAT
:
134 packed
= tu_pack_float32_for_sfloat(val
.float32
, ch
->size
);
137 unreachable("unexpected channel type");
142 assert((packed
& tu_pack_mask(ch
->size
)) == packed
);
146 static const struct util_format_channel_description
*
147 tu_get_format_channel_description(const struct util_format_description
*desc
,
150 switch (desc
->swizzle
[comp
]) {
152 return &desc
->channel
[0];
154 return &desc
->channel
[1];
156 return &desc
->channel
[2];
158 return &desc
->channel
[3];
164 static union tu_clear_component_value
165 tu_get_clear_component_value(const VkClearValue
*val
, int comp
,
166 enum util_format_colorspace colorspace
)
170 union tu_clear_component_value tmp
;
171 switch (colorspace
) {
172 case UTIL_FORMAT_COLORSPACE_ZS
:
175 tmp
.float32
= val
->depthStencil
.depth
;
177 tmp
.uint32
= val
->depthStencil
.stencil
;
179 case UTIL_FORMAT_COLORSPACE_SRGB
:
181 tmp
.float32
= util_format_linear_to_srgb_float(val
->color
.float32
[comp
]);
186 tmp
.uint32
= val
->color
.uint32
[comp
];
193 /* r2d_ = BLIT_OP_SCALE operations */
195 static enum a6xx_2d_ifmt
196 format_to_ifmt(enum a6xx_format fmt
)
204 case FMT6_8_8_8_8_UNORM
:
205 case FMT6_8_8_8_X8_UNORM
:
206 case FMT6_8_8_8_8_SNORM
:
207 case FMT6_4_4_4_4_UNORM
:
208 case FMT6_5_5_5_1_UNORM
:
209 case FMT6_5_6_5_UNORM
:
210 case FMT6_Z24_UNORM_S8_UINT
:
211 case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
:
216 case FMT6_32_32_UINT
:
217 case FMT6_32_32_SINT
:
218 case FMT6_32_32_32_32_UINT
:
219 case FMT6_32_32_32_32_SINT
:
224 case FMT6_16_16_UINT
:
225 case FMT6_16_16_SINT
:
226 case FMT6_16_16_16_16_UINT
:
227 case FMT6_16_16_16_16_SINT
:
228 case FMT6_10_10_10_2_UINT
:
235 case FMT6_8_8_8_8_UINT
:
236 case FMT6_8_8_8_8_SINT
:
241 case FMT6_16_16_UNORM
:
242 case FMT6_16_16_SNORM
:
243 case FMT6_16_16_16_16_UNORM
:
244 case FMT6_16_16_16_16_SNORM
:
246 case FMT6_32_32_FLOAT
:
247 case FMT6_32_32_32_32_FLOAT
:
251 case FMT6_16_16_FLOAT
:
252 case FMT6_16_16_16_16_FLOAT
:
253 case FMT6_11_11_10_FLOAT
:
254 case FMT6_10_10_10_2_UNORM
:
255 case FMT6_10_10_10_2_UNORM_DEST
:
259 unreachable("bad format");
265 r2d_coords(struct tu_cs
*cs
,
266 const VkOffset2D
*dst
,
267 const VkOffset2D
*src
,
268 const VkExtent2D
*extent
)
271 A6XX_GRAS_2D_DST_TL(.x
= dst
->x
, .y
= dst
->y
),
272 A6XX_GRAS_2D_DST_BR(.x
= dst
->x
+ extent
->width
- 1, .y
= dst
->y
+ extent
->height
- 1));
278 A6XX_GRAS_2D_SRC_TL_X(.x
= src
->x
),
279 A6XX_GRAS_2D_SRC_BR_X(.x
= src
->x
+ extent
->width
- 1),
280 A6XX_GRAS_2D_SRC_TL_Y(.y
= src
->y
),
281 A6XX_GRAS_2D_SRC_BR_Y(.y
= src
->y
+ extent
->height
- 1));
285 r2d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
287 uint32_t clear_value
[4] = {};
290 case VK_FORMAT_X8_D24_UNORM_PACK32
:
291 case VK_FORMAT_D24_UNORM_S8_UINT
:
292 /* cleared as r8g8b8a8_unorm using special format */
293 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
294 clear_value
[1] = clear_value
[0] >> 8;
295 clear_value
[2] = clear_value
[0] >> 16;
296 clear_value
[3] = val
->depthStencil
.stencil
;
298 case VK_FORMAT_D16_UNORM
:
299 case VK_FORMAT_D32_SFLOAT
:
301 clear_value
[0] = fui(val
->depthStencil
.depth
);
303 case VK_FORMAT_S8_UINT
:
304 clear_value
[0] = val
->depthStencil
.stencil
;
306 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
307 /* cleared as UINT32 */
308 clear_value
[0] = float3_to_rgb9e5(val
->color
.float32
);
311 assert(!vk_format_is_depth_or_stencil(format
));
312 const struct util_format_description
*desc
= vk_format_description(format
);
313 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(tu6_base_format(format
));
315 assert(desc
&& (desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
||
316 format
== VK_FORMAT_B10G11R11_UFLOAT_PACK32
));
318 for (unsigned i
= 0; i
< desc
->nr_channels
; i
++) {
319 const struct util_format_channel_description
*ch
= &desc
->channel
[i
];
320 if (ifmt
== R2D_UNORM8
) {
321 float linear
= val
->color
.float32
[i
];
322 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
&& i
< 3)
323 linear
= util_format_linear_to_srgb_float(val
->color
.float32
[i
]);
325 if (ch
->type
== UTIL_FORMAT_TYPE_SIGNED
)
326 clear_value
[i
] = tu_pack_float32_for_snorm(linear
, 8);
328 clear_value
[i
] = tu_pack_float32_for_unorm(linear
, 8);
329 } else if (ifmt
== R2D_FLOAT16
) {
330 clear_value
[i
] = util_float_to_half(val
->color
.float32
[i
]);
332 assert(ifmt
== R2D_FLOAT32
|| ifmt
== R2D_INT32
||
333 ifmt
== R2D_INT16
|| ifmt
== R2D_INT8
);
334 clear_value
[i
] = val
->color
.uint32
[i
];
340 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
341 tu_cs_emit_array(cs
, clear_value
, 4);
345 r2d_src(struct tu_cmd_buffer
*cmd
,
347 struct tu_image
*image
,
354 struct tu_native_format format
= tu6_format_image_src(image
, vk_format
, level
);
356 /* stencil readout path fails with UBWC enabled (why?) */
357 assert(!stencil_read
|| !image
->layout
.ubwc_layer_size
);
363 A6XX_SP_PS_2D_SRC_INFO(
364 .color_format
= format
.fmt
,
365 .tile_mode
= format
.tile_mode
,
366 .color_swap
= format
.swap
,
367 .flags
= image
->layout
.ubwc_layer_size
!= 0,
368 .srgb
= vk_format_is_srgb(vk_format
),
369 .samples
= tu_msaa_samples(image
->samples
),
370 .filter
= linear_filter
,
371 .samples_average
= image
->samples
> 1 &&
372 !vk_format_is_int(vk_format
) &&
373 !vk_format_is_depth_or_stencil(vk_format
),
376 A6XX_SP_PS_2D_SRC_SIZE(
377 .width
= tu_minify(image
->extent
.width
, level
),
378 .height
= tu_minify(image
->extent
.height
, level
)),
379 A6XX_SP_PS_2D_SRC(tu_image_base_ref(image
, level
, layer
)),
380 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= tu_image_pitch(image
, level
)));
382 if (image
->layout
.ubwc_layer_size
) {
384 A6XX_SP_PS_2D_SRC_FLAGS(tu_image_ubwc_base_ref(image
, level
, layer
)),
385 A6XX_SP_PS_2D_SRC_FLAGS_PITCH(.pitch
= tu_image_ubwc_pitch(image
, level
)));
390 r2d_src_buffer(struct tu_cmd_buffer
*cmd
,
393 uint64_t va
, uint32_t pitch
,
394 uint32_t width
, uint32_t height
)
396 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
399 A6XX_SP_PS_2D_SRC_INFO(
400 .color_format
= format
.fmt
,
401 .color_swap
= format
.swap
,
402 .srgb
= vk_format_is_srgb(vk_format
),
405 A6XX_SP_PS_2D_SRC_SIZE(.width
= width
, .height
= height
),
406 A6XX_SP_PS_2D_SRC_LO((uint32_t) va
),
407 A6XX_SP_PS_2D_SRC_HI(va
>> 32),
408 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= pitch
));
412 r2d_dst(struct tu_cs
*cs
,
413 struct tu_image
*image
,
418 struct tu_native_format format
= tu6_format_image(image
, vk_format
, level
);
420 assert(image
->samples
== 1);
424 .color_format
= format
.fmt
,
425 .tile_mode
= format
.tile_mode
,
426 .color_swap
= format
.swap
,
427 .flags
= image
->layout
.ubwc_layer_size
!= 0,
428 .srgb
= vk_format_is_srgb(image
->vk_format
)),
429 A6XX_RB_2D_DST(tu_image_base_ref(image
, level
, layer
)),
430 A6XX_RB_2D_DST_SIZE(.pitch
= tu_image_pitch(image
, level
)));
432 if (image
->layout
.ubwc_layer_size
) {
434 A6XX_RB_2D_DST_FLAGS(tu_image_ubwc_base_ref(image
, level
, layer
)),
435 A6XX_RB_2D_DST_FLAGS_PITCH(.pitch
= tu_image_ubwc_pitch(image
, level
)));
440 r2d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
442 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
446 .color_format
= format
.fmt
,
447 .color_swap
= format
.swap
,
448 .srgb
= vk_format_is_srgb(vk_format
)),
449 A6XX_RB_2D_DST_LO((uint32_t) va
),
450 A6XX_RB_2D_DST_HI(va
>> 32),
451 A6XX_RB_2D_DST_SIZE(.pitch
= pitch
));
455 r2d_setup_common(struct tu_cmd_buffer
*cmd
,
458 enum a6xx_rotation rotation
,
463 enum a6xx_format format
= tu6_base_format(vk_format
);
464 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(format
);
465 uint32_t unknown_8c01
= 0;
467 if (format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
) {
468 /* preserve depth channels */
470 unknown_8c01
= 0x00084001;
471 /* preserve stencil channel */
473 unknown_8c01
= 0x08000041;
476 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8C01
, 1);
477 tu_cs_emit(cs
, unknown_8c01
);
479 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL(
482 .solid_color
= clear
,
483 .d24s8
= format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
&& !clear
,
484 .color_format
= format
,
486 .ifmt
= vk_format_is_srgb(vk_format
) ? R2D_UNORM8_SRGB
: ifmt
,
489 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
490 tu_cs_emit(cs
, blit_cntl
);
492 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
493 tu_cs_emit(cs
, blit_cntl
);
495 if (format
== FMT6_10_10_10_2_UNORM_DEST
)
496 format
= FMT6_16_16_16_16_FLOAT
;
498 tu_cs_emit_regs(cs
, A6XX_SP_2D_SRC_FORMAT(
499 .sint
= vk_format_is_sint(vk_format
),
500 .uint
= vk_format_is_uint(vk_format
),
501 .color_format
= format
,
502 .srgb
= vk_format_is_srgb(vk_format
),
507 r2d_setup(struct tu_cmd_buffer
*cmd
,
510 enum a6xx_rotation rotation
,
514 const struct tu_physical_device
*phys_dev
= cmd
->device
->physical_device
;
516 /* TODO: flushing with barriers instead of blindly always flushing */
517 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
518 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
519 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
, false);
520 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
, false);
521 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
, false);
525 A6XX_RB_CCU_CNTL(.offset
= phys_dev
->ccu_offset_bypass
));
527 r2d_setup_common(cmd
, cs
, vk_format
, rotation
, clear
, mask
, false);
531 r2d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
533 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
534 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
536 /* TODO: flushing with barriers instead of blindly always flushing */
537 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
538 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
539 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
, false);
542 /* r3d_ = shader path operations */
545 r3d_pipeline(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, bool blit
, uint32_t num_rts
)
547 static const instr_t vs_code
[] = {
548 /* r0.xyz = r0.w ? c1.xyz : c0.xyz
549 * r1.xy = r0.w ? c1.zw : c0.zw
553 .opc_cat
= 3, .opc
= OPC_SEL_B32
& 63, .repeat
= 2, .dst
= 0,
554 .c1
= {.src1_c
= 1, .src1
= 4}, .src1_r
= 1,
556 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 0},
559 .opc_cat
= 3, .opc
= OPC_SEL_B32
& 63, .repeat
= 1, .dst
= 4,
560 .c1
= {.src1_c
= 1, .src1
= 6}, .src1_r
= 1,
562 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 2},
564 { .cat1
= { .opc_cat
= 1, .src_type
= TYPE_F32
, .dst_type
= TYPE_F32
, .dst
= 3,
565 .src_im
= 1, .fim_val
= 1.0f
} },
566 { .cat0
= { .opc
= OPC_END
} },
568 #define FS_OFFSET (16 * sizeof(instr_t))
569 STATIC_ASSERT(sizeof(vs_code
) <= FS_OFFSET
);
571 /* vs inputs: only vtx id in r0.w */
572 tu_cs_emit_pkt4(cs
, REG_A6XX_VFD_CONTROL_0
, 7);
573 tu_cs_emit(cs
, 0x00000000);
574 tu_cs_emit(cs
, 0xfcfcfc00 | A6XX_VFD_CONTROL_1_REGID4VTX(3));
575 tu_cs_emit(cs
, 0x0000fcfc);
576 tu_cs_emit(cs
, 0xfcfcfcfc);
577 tu_cs_emit(cs
, 0x000000fc);
578 tu_cs_emit(cs
, 0x0000fcfc);
579 tu_cs_emit(cs
, 0x00000000);
581 /* vs outputs: position in r0.xyzw, blit coords in r1.xy */
582 tu_cs_emit_pkt4(cs
, REG_A6XX_VPC_VAR_DISABLE(0), 4);
583 tu_cs_emit(cs
, blit
? 0xffffffcf : 0xffffffff);
584 tu_cs_emit(cs
, 0xffffffff);
585 tu_cs_emit(cs
, 0xffffffff);
586 tu_cs_emit(cs
, 0xffffffff);
588 tu_cs_emit_regs(cs
, A6XX_SP_VS_OUT_REG(0,
589 .a_regid
= 0, .a_compmask
= 0xf,
590 .b_regid
= 4, .b_compmask
= 0x3));
591 tu_cs_emit_regs(cs
, A6XX_SP_VS_VPC_DST_REG(0, .outloc0
= 0, .outloc1
= 4));
593 tu_cs_emit_pkt4(cs
, REG_A6XX_VPC_CNTL_0
, 1);
594 tu_cs_emit(cs
, 0xff00ff00 |
595 COND(blit
, A6XX_VPC_CNTL_0_VARYING
) |
596 A6XX_VPC_CNTL_0_NUMNONPOSVAR(blit
? 8 : 0));
598 tu_cs_emit_regs(cs
, A6XX_VPC_PACK(
601 .stride_in_vpc
= blit
? 6 : 4));
602 tu_cs_emit_regs(cs
, A6XX_SP_PRIMITIVE_CNTL(.vsout
= blit
? 2 : 1));
604 A6XX_PC_PRIMITIVE_CNTL_0(),
605 A6XX_PC_PRIMITIVE_CNTL_1(.stride_in_vpc
= blit
? 6 : 4));
608 tu_cs_emit_pkt4(cs
, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
609 tu_cs_emit(cs
, blit
? 0xe000 : 0); // I think this can just be 0
610 for (uint32_t i
= 1; i
< 8; i
++)
613 tu_cs_emit_pkt4(cs
, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
614 for (uint32_t i
= 0; i
< 8; i
++)
615 tu_cs_emit(cs
, 0x99999999);
617 /* fs inputs: none, prefetch in blit case */
618 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_PREFETCH_CNTL
, 1 + blit
);
619 tu_cs_emit(cs
, A6XX_SP_FS_PREFETCH_CNTL_COUNT(blit
) |
620 A6XX_SP_FS_PREFETCH_CNTL_UNK4(0xfc) |
623 tu_cs_emit(cs
, A6XX_SP_FS_PREFETCH_CMD_SRC(4) |
624 A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(0) |
625 A6XX_SP_FS_PREFETCH_CMD_TEX_ID(0) |
626 A6XX_SP_FS_PREFETCH_CMD_DST(0) |
627 A6XX_SP_FS_PREFETCH_CMD_WRMASK(0xf) |
628 A6XX_SP_FS_PREFETCH_CMD_CMD(0x4));
631 tu_cs_emit_pkt4(cs
, REG_A6XX_HLSQ_CONTROL_1_REG
, 5);
632 tu_cs_emit(cs
, 0x3); // XXX blob uses 3 in blit path
633 tu_cs_emit(cs
, 0xfcfcfcfc);
634 tu_cs_emit(cs
, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(blit
? 0 : 0xfc) |
635 A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(0xfc) |
637 tu_cs_emit(cs
, 0xfcfcfcfc);
638 tu_cs_emit(cs
, 0xfcfc);
640 tu_cs_emit_regs(cs
, A6XX_HLSQ_UNKNOWN_B980(blit
? 3 : 1));
641 tu_cs_emit_regs(cs
, A6XX_GRAS_CNTL(.varying
= blit
));
643 A6XX_RB_RENDER_CONTROL0(.varying
= blit
, .unk10
= blit
),
644 A6XX_RB_RENDER_CONTROL1());
646 tu_cs_emit_regs(cs
, A6XX_RB_SAMPLE_CNTL());
647 tu_cs_emit_regs(cs
, A6XX_GRAS_UNKNOWN_8101());
648 tu_cs_emit_regs(cs
, A6XX_GRAS_SAMPLE_CNTL());
651 struct ts_cs_memory shaders
= { };
652 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, 2, 16 * sizeof(instr_t
), &shaders
);
653 assert(result
== VK_SUCCESS
);
655 memcpy(shaders
.map
, vs_code
, sizeof(vs_code
));
657 instr_t
*fs
= (instr_t
*) ((uint8_t*) shaders
.map
+ FS_OFFSET
);
658 for (uint32_t i
= 0; i
< num_rts
; i
++) {
659 /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
660 fs
[i
] = (instr_t
) { .cat1
= { .opc_cat
= 1, .src_type
= TYPE_S32
, .dst_type
= TYPE_S32
,
661 .repeat
= 3, .dst
= i
* 4, .src_c
= 1, .src_r
= 1, .src
= i
* 4 } };
663 fs
[num_rts
] = (instr_t
) { .cat0
= { .opc
= OPC_END
} };
664 /* note: assumed <= 16 instructions (MAX_RTS is 8) */
666 tu_cs_emit_regs(cs
, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
668 A6XX_HLSQ_VS_CNTL(.constlen
= 8, .enabled
= true),
671 A6XX_HLSQ_GS_CNTL());
672 tu_cs_emit_regs(cs
, A6XX_HLSQ_FS_CNTL(.constlen
= 4 * num_rts
, .enabled
= true));
675 A6XX_SP_VS_CONFIG(.enabled
= true),
676 A6XX_SP_VS_INSTRLEN(1));
677 tu_cs_emit_regs(cs
, A6XX_SP_HS_CONFIG());
678 tu_cs_emit_regs(cs
, A6XX_SP_DS_CONFIG());
679 tu_cs_emit_regs(cs
, A6XX_SP_GS_CONFIG());
681 A6XX_SP_FS_CONFIG(.enabled
= true, .ntex
= blit
, .nsamp
= blit
),
682 A6XX_SP_FS_INSTRLEN(1));
684 tu_cs_emit_regs(cs
, A6XX_SP_VS_CTRL_REG0(
685 .threadsize
= FOUR_QUADS
,
686 .fullregfootprint
= 2,
687 .mergedregs
= true));
688 tu_cs_emit_regs(cs
, A6XX_SP_FS_CTRL_REG0(
690 .threadsize
= FOUR_QUADS
,
691 /* could this be 0 in !blit && !num_rts case ? */
692 .fullregfootprint
= MAX2(1, num_rts
),
693 .mergedregs
= true)); /* note: tu_pipeline also sets 0x1000000 bit */
695 tu_cs_emit_regs(cs
, A6XX_SP_IBO_COUNT(0));
697 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_GEOM
, 3);
698 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
699 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
700 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
701 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER
) |
702 CP_LOAD_STATE6_0_NUM_UNIT(1));
703 tu_cs_emit_qw(cs
, shaders
.iova
);
705 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_VS_OBJ_START_LO
, 2);
706 tu_cs_emit_qw(cs
, shaders
.iova
);
708 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
709 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
710 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
711 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
712 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
713 CP_LOAD_STATE6_0_NUM_UNIT(1));
714 tu_cs_emit_qw(cs
, shaders
.iova
+ FS_OFFSET
);
716 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OBJ_START_LO
, 2);
717 tu_cs_emit_qw(cs
, shaders
.iova
+ FS_OFFSET
);
721 .persp_division_disable
= 1,
722 .vp_xform_disable
= 1,
723 .vp_clip_code_ignore
= 1,
725 A6XX_GRAS_UNKNOWN_8001(0));
726 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
729 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0(.x
= 0, .y
= 0),
730 A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0(.x
= 0x7fff, .y
= 0x7fff));
732 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x
= 0, .y
= 0),
733 A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x
= 0x7fff, .y
= 0x7fff));
737 r3d_coords_raw(struct tu_cs
*cs
, const float *coords
)
739 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_GEOM
, 3 + 8);
740 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
741 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
742 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
743 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER
) |
744 CP_LOAD_STATE6_0_NUM_UNIT(2));
745 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
746 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
747 tu_cs_emit_array(cs
, (const uint32_t *) coords
, 8);
751 r3d_coords(struct tu_cs
*cs
,
752 const VkOffset2D
*dst
,
753 const VkOffset2D
*src
,
754 const VkExtent2D
*extent
)
756 int32_t src_x1
= src
? src
->x
: 0;
757 int32_t src_y1
= src
? src
->y
: 0;
758 r3d_coords_raw(cs
, (float[]) {
761 dst
->x
+ extent
->width
, dst
->y
+ extent
->height
,
762 src_x1
+ extent
->width
, src_y1
+ extent
->height
,
767 r3d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
769 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4);
770 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
771 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
772 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
773 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
774 CP_LOAD_STATE6_0_NUM_UNIT(1));
775 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
776 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
778 case VK_FORMAT_X8_D24_UNORM_PACK32
:
779 case VK_FORMAT_D24_UNORM_S8_UINT
: {
780 /* cleared as r8g8b8a8_unorm using special format */
781 uint32_t tmp
= tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
782 tu_cs_emit(cs
, fui((tmp
& 0xff) / 255.0f
));
783 tu_cs_emit(cs
, fui((tmp
>> 8 & 0xff) / 255.0f
));
784 tu_cs_emit(cs
, fui((tmp
>> 16 & 0xff) / 255.0f
));
785 tu_cs_emit(cs
, fui((val
->depthStencil
.stencil
& 0xff) / 255.0f
));
787 case VK_FORMAT_D16_UNORM
:
788 case VK_FORMAT_D32_SFLOAT
:
789 tu_cs_emit(cs
, fui(val
->depthStencil
.depth
));
794 case VK_FORMAT_S8_UINT
:
795 tu_cs_emit(cs
, val
->depthStencil
.stencil
& 0xff);
801 /* as color formats use clear value as-is */
802 assert(!vk_format_is_depth_or_stencil(format
));
803 tu_cs_emit_array(cs
, val
->color
.uint32
, 4);
809 r3d_src_common(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, uint32_t *tex_const
, bool linear_filter
)
811 struct ts_cs_memory texture
= { };
812 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
,
813 2, /* allocate space for a sampler too */
814 A6XX_TEX_CONST_DWORDS
, &texture
);
815 assert(result
== VK_SUCCESS
);
817 memcpy(texture
.map
, tex_const
, A6XX_TEX_CONST_DWORDS
* 4);
819 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 0] =
820 A6XX_TEX_SAMP_0_XY_MAG(linear_filter
? A6XX_TEX_LINEAR
: A6XX_TEX_NEAREST
) |
821 A6XX_TEX_SAMP_0_XY_MIN(linear_filter
? A6XX_TEX_LINEAR
: A6XX_TEX_NEAREST
) |
822 A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE
) |
823 A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE
) |
824 A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE
) |
825 0x60000; /* XXX used by blob, doesn't seem necessary */
826 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 1] =
827 0x1 | /* XXX used by blob, doesn't seem necessary */
828 A6XX_TEX_SAMP_1_UNNORM_COORDS
|
829 A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR
;
830 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 2] = 0;
831 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 3] = 0;
833 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
834 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
835 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
836 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
837 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
838 CP_LOAD_STATE6_0_NUM_UNIT(1));
839 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
841 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_SAMP_LO
, 2);
842 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
844 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
845 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
846 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
847 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
848 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
849 CP_LOAD_STATE6_0_NUM_UNIT(1));
850 tu_cs_emit_qw(cs
, texture
.iova
);
852 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_CONST_LO
, 2);
853 tu_cs_emit_qw(cs
, texture
.iova
);
855 tu_cs_emit_regs(cs
, A6XX_SP_FS_TEX_COUNT(1));
859 r3d_src(struct tu_cmd_buffer
*cmd
,
861 struct tu_image
*image
,
868 struct tu_image_view view
;
870 /* use tu_image_view_init to fill out a view descriptor */
871 tu_image_view_init(&view
, cmd
->device
, &(VkImageViewCreateInfo
) {
872 .image
= tu_image_to_handle(image
),
873 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
875 /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
876 .components
.r
= stencil_read
? VK_COMPONENT_SWIZZLE_A
: VK_COMPONENT_SWIZZLE_R
,
877 .subresourceRange
= {
878 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
879 .baseMipLevel
= level
,
881 .baseArrayLayer
= layer
,
885 r3d_src_common(cmd
, cs
, view
.descriptor
, linear_filter
);
889 r3d_src_buffer(struct tu_cmd_buffer
*cmd
,
892 uint64_t va
, uint32_t pitch
,
893 uint32_t width
, uint32_t height
)
895 uint32_t desc
[A6XX_TEX_CONST_DWORDS
];
897 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
900 COND(vk_format_is_srgb(vk_format
), A6XX_TEX_CONST_0_SRGB
) |
901 A6XX_TEX_CONST_0_FMT(format
.fmt
) |
902 A6XX_TEX_CONST_0_SWAP(format
.swap
) |
903 A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X
) |
904 // XXX to swizzle into .w for stencil buffer_to_image
905 A6XX_TEX_CONST_0_SWIZ_Y(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Y
) |
906 A6XX_TEX_CONST_0_SWIZ_Z(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Z
) |
907 A6XX_TEX_CONST_0_SWIZ_W(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_W
);
908 desc
[1] = A6XX_TEX_CONST_1_WIDTH(width
) | A6XX_TEX_CONST_1_HEIGHT(height
);
910 A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(vk_format
)) |
911 A6XX_TEX_CONST_2_PITCH(pitch
) |
912 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D
);
916 for (uint32_t i
= 6; i
< A6XX_TEX_CONST_DWORDS
; i
++)
919 r3d_src_common(cmd
, cs
, desc
, false);
923 r3d_dst(struct tu_cs
*cs
,
924 struct tu_image
*image
,
929 tu6_emit_msaa(cs
, image
->samples
); /* TODO: move to setup */
931 struct tu_native_format format
= tu6_format_image(image
, vk_format
, level
);
934 A6XX_RB_MRT_BUF_INFO(0,
935 .color_tile_mode
= format
.tile_mode
,
936 .color_format
= format
.fmt
,
937 .color_swap
= format
.swap
),
938 A6XX_RB_MRT_PITCH(0, tu_image_pitch(image
, level
)),
939 A6XX_RB_MRT_ARRAY_PITCH(0, image
->layout
.layer_size
),
940 A6XX_RB_MRT_BASE(0, tu_image_base_ref(image
, level
, layer
)),
941 A6XX_RB_MRT_BASE_GMEM(0, 0));
944 A6XX_RB_MRT_FLAG_BUFFER_ADDR(0, tu_image_ubwc_base_ref(image
, level
, layer
)),
945 A6XX_RB_MRT_FLAG_BUFFER_PITCH(0, .pitch
= tu_image_ubwc_pitch(image
, level
)));
947 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL(.flag_mrts
= image
->layout
.ubwc_layer_size
!= 0));
951 r3d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
953 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
955 tu6_emit_msaa(cs
, 1); /* TODO: move to setup */
958 A6XX_RB_MRT_BUF_INFO(0, .color_format
= format
.fmt
, .color_swap
= format
.swap
),
959 A6XX_RB_MRT_PITCH(0, pitch
),
960 A6XX_RB_MRT_ARRAY_PITCH(0, 0),
961 A6XX_RB_MRT_BASE_LO(0, (uint32_t) va
),
962 A6XX_RB_MRT_BASE_HI(0, va
>> 32),
963 A6XX_RB_MRT_BASE_GMEM(0, 0));
965 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
969 r3d_setup(struct tu_cmd_buffer
*cmd
,
972 enum a6xx_rotation rotation
,
976 const struct tu_physical_device
*phys_dev
= cmd
->device
->physical_device
;
978 if (!cmd
->state
.pass
) {
979 /* TODO: flushing with barriers instead of blindly always flushing */
980 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
981 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
982 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
, false);
983 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
, false);
984 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
, false);
987 A6XX_RB_CCU_CNTL(.offset
= phys_dev
->ccu_offset_bypass
));
989 tu6_emit_window_scissor(cs
, 0, 0, 0x7fff, 0x7fff);
991 tu_cs_emit_regs(cs
, A6XX_GRAS_BIN_CONTROL(.dword
= 0xc00000));
992 tu_cs_emit_regs(cs
, A6XX_RB_BIN_CONTROL(.dword
= 0xc00000));
994 r3d_pipeline(cmd
, cs
, !clear
, clear
? 1 : 0);
996 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
997 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
998 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
1000 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
1002 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
1003 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(0));
1006 A6XX_RB_FS_OUTPUT_CNTL0(),
1007 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= 1));
1009 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
1010 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.sample_mask
= 0xffff));
1011 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
1013 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
1014 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL());
1015 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
1016 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL());
1017 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK());
1018 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK());
1019 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF());
1021 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_COMPONENTS(.rt0
= 0xf));
1022 tu_cs_emit_regs(cs
, A6XX_SP_FS_RENDER_COMPONENTS(.rt0
= 0xf));
1024 tu_cs_emit_regs(cs
, A6XX_SP_FS_MRT_REG(0,
1025 .color_format
= tu6_base_format(vk_format
),
1026 .color_sint
= vk_format_is_sint(vk_format
),
1027 .color_uint
= vk_format_is_uint(vk_format
)));
1029 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(0, .component_enable
= mask
));
1030 tu_cs_emit_regs(cs
, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
1031 tu_cs_emit_regs(cs
, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
1035 r3d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
1037 tu_cs_emit_pkt7(cs
, CP_DRAW_INDX_OFFSET
, 3);
1038 tu_cs_emit(cs
, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST
) |
1039 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX
) |
1040 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY
));
1041 tu_cs_emit(cs
, 1); /* instance count */
1042 tu_cs_emit(cs
, 2); /* vertex count */
1044 if (!cmd
->state
.pass
) {
1045 /* TODO: flushing with barriers instead of blindly always flushing */
1046 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
1047 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
1048 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
, false);
1052 /* blit ops - common interface for 2d/shader paths */
1055 void (*coords
)(struct tu_cs
*cs
,
1056 const VkOffset2D
*dst
,
1057 const VkOffset2D
*src
,
1058 const VkExtent2D
*extent
);
1059 void (*clear_value
)(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
);
1061 struct tu_cmd_buffer
*cmd
,
1063 struct tu_image
*image
,
1069 void (*src_buffer
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
,
1071 uint64_t va
, uint32_t pitch
,
1072 uint32_t width
, uint32_t height
);
1073 void (*dst
)(struct tu_cs
*cs
,
1074 struct tu_image
*image
,
1078 void (*dst_buffer
)(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
);
1079 void (*setup
)(struct tu_cmd_buffer
*cmd
,
1082 enum a6xx_rotation rotation
,
1085 void (*run
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
);
1088 static const struct blit_ops r2d_ops
= {
1089 .coords
= r2d_coords
,
1090 .clear_value
= r2d_clear_value
,
1092 .src_buffer
= r2d_src_buffer
,
1094 .dst_buffer
= r2d_dst_buffer
,
1099 static const struct blit_ops r3d_ops
= {
1100 .coords
= r3d_coords
,
1101 .clear_value
= r3d_clear_value
,
1103 .src_buffer
= r3d_src_buffer
,
1105 .dst_buffer
= r3d_dst_buffer
,
1110 /* passthrough set coords from 3D extents */
1112 coords(const struct blit_ops
*ops
,
1114 const VkOffset3D
*dst
,
1115 const VkOffset3D
*src
,
1116 const VkExtent3D
*extent
)
1118 ops
->coords(cs
, (const VkOffset2D
*) dst
, (const VkOffset2D
*) src
, (const VkExtent2D
*) extent
);
1122 tu6_blit_image(struct tu_cmd_buffer
*cmd
,
1123 struct tu_image
*src_image
,
1124 struct tu_image
*dst_image
,
1125 const VkImageBlit
*info
,
1128 const struct blit_ops
*ops
= &r2d_ops
;
1129 struct tu_cs
*cs
= &cmd
->cs
;
1132 /* 2D blit can't do rotation mirroring from just coordinates */
1133 static const enum a6xx_rotation rotate
[2][2] = {
1134 {ROTATE_0
, ROTATE_HFLIP
},
1135 {ROTATE_VFLIP
, ROTATE_180
},
1138 bool mirror_x
= (info
->srcOffsets
[1].x
< info
->srcOffsets
[0].x
) !=
1139 (info
->dstOffsets
[1].x
< info
->dstOffsets
[0].x
);
1140 bool mirror_y
= (info
->srcOffsets
[1].y
< info
->srcOffsets
[0].y
) !=
1141 (info
->dstOffsets
[1].y
< info
->dstOffsets
[0].y
);
1142 bool mirror_z
= (info
->srcOffsets
[1].z
< info
->srcOffsets
[0].z
) !=
1143 (info
->dstOffsets
[1].z
< info
->dstOffsets
[0].z
);
1146 tu_finishme("blit z mirror\n");
1150 if (info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
!=
1151 info
->dstOffsets
[1].z
- info
->dstOffsets
[0].z
) {
1152 tu_finishme("blit z filter\n");
1156 layers
= info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
;
1157 if (info
->dstSubresource
.layerCount
> 1) {
1158 assert(layers
<= 1);
1159 layers
= info
->dstSubresource
.layerCount
;
1163 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1164 assert(info
->srcSubresource
.aspectMask
== info
->dstSubresource
.aspectMask
);
1165 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
1167 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
1171 if (dst_image
->samples
> 1)
1174 /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests,
1175 * figure out why (should be able to pass all tests with only shader path)
1178 ops
->setup(cmd
, cs
, dst_image
->vk_format
, rotate
[mirror_y
][mirror_x
], false, mask
);
1180 if (ops
== &r3d_ops
) {
1181 r3d_coords_raw(cs
, (float[]) {
1182 info
->dstOffsets
[0].x
, info
->dstOffsets
[0].y
,
1183 info
->srcOffsets
[0].x
, info
->srcOffsets
[0].y
,
1184 info
->dstOffsets
[1].x
, info
->dstOffsets
[1].y
,
1185 info
->srcOffsets
[1].x
, info
->srcOffsets
[1].y
1189 A6XX_GRAS_2D_DST_TL(.x
= MIN2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
),
1190 .y
= MIN2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
)),
1191 A6XX_GRAS_2D_DST_BR(.x
= MAX2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
) - 1,
1192 .y
= MAX2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
) - 1));
1194 A6XX_GRAS_2D_SRC_TL_X(.x
= MIN2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
)),
1195 A6XX_GRAS_2D_SRC_BR_X(.x
= MAX2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
) - 1),
1196 A6XX_GRAS_2D_SRC_TL_Y(.y
= MIN2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
)),
1197 A6XX_GRAS_2D_SRC_BR_Y(.y
= MAX2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
) - 1));
1200 for (uint32_t i
= 0; i
< layers
; i
++) {
1201 ops
->src(cmd
, cs
, src_image
, src_image
->vk_format
,
1202 info
->srcSubresource
.mipLevel
,
1203 info
->srcSubresource
.baseArrayLayer
+ info
->srcOffsets
[0].z
+ i
,
1204 filter
== VK_FILTER_LINEAR
, false);
1205 ops
->dst(cs
, dst_image
, dst_image
->vk_format
,
1206 info
->dstSubresource
.mipLevel
,
1207 info
->dstSubresource
.baseArrayLayer
+ info
->dstOffsets
[0].z
+ i
);
1213 tu_CmdBlitImage(VkCommandBuffer commandBuffer
,
1215 VkImageLayout srcImageLayout
,
1217 VkImageLayout dstImageLayout
,
1218 uint32_t regionCount
,
1219 const VkImageBlit
*pRegions
,
1223 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1224 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1225 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1227 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1228 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1230 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1231 tu6_blit_image(cmd
, src_image
, dst_image
, pRegions
+ i
, filter
);
1235 copy_format(VkFormat format
)
1237 switch (vk_format_get_blocksizebits(format
)) {
1238 case 8: return VK_FORMAT_R8_UINT
;
1239 case 16: return VK_FORMAT_R16_UINT
;
1240 case 32: return VK_FORMAT_R32_UINT
;
1241 case 64: return VK_FORMAT_R32G32_UINT
;
1242 case 96: return VK_FORMAT_R32G32B32_UINT
;
1243 case 128:return VK_FORMAT_R32G32B32A32_UINT
;
1245 unreachable("unhandled format size");
1250 copy_compressed(VkFormat format
,
1254 uint32_t *layer_size
)
1256 if (!vk_format_is_compressed(format
))
1259 uint32_t block_width
= vk_format_get_blockwidth(format
);
1260 uint32_t block_height
= vk_format_get_blockheight(format
);
1262 offset
->x
/= block_width
;
1263 offset
->y
/= block_height
;
1266 extent
->width
= DIV_ROUND_UP(extent
->width
, block_width
);
1267 extent
->height
= DIV_ROUND_UP(extent
->height
, block_height
);
1270 *pitch
/= block_width
;
1272 *layer_size
/= (block_width
* block_height
);
1276 tu_copy_buffer_to_image(struct tu_cmd_buffer
*cmd
,
1277 struct tu_buffer
*src_buffer
,
1278 struct tu_image
*dst_image
,
1279 const VkBufferImageCopy
*info
)
1281 struct tu_cs
*cs
= &cmd
->cs
;
1282 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1283 VkFormat dst_format
= dst_image
->vk_format
;
1284 VkFormat src_format
= dst_image
->vk_format
;
1285 const struct blit_ops
*ops
= &r2d_ops
;
1288 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1289 switch (info
->imageSubresource
.aspectMask
) {
1290 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1291 src_format
= VK_FORMAT_R8_UNORM
; /* changes how src buffer is interpreted */
1295 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1301 VkOffset3D offset
= info
->imageOffset
;
1302 VkExtent3D extent
= info
->imageExtent
;
1304 (info
->bufferRowLength
?: extent
.width
) * vk_format_get_blocksize(src_format
);
1305 uint32_t layer_size
= (info
->bufferImageHeight
?: extent
.height
) * pitch
;
1307 if (dst_format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
|| vk_format_is_compressed(dst_format
)) {
1308 assert(src_format
== dst_format
);
1309 copy_compressed(dst_format
, &offset
, &extent
, &pitch
, &layer_size
);
1310 src_format
= dst_format
= copy_format(dst_format
);
1313 /* note: the src_va/pitch alignment of 64 is for 2D engine,
1314 * it is also valid for 1cpp format with shader path (stencil aspect path)
1317 ops
->setup(cmd
, cs
, dst_format
, ROTATE_0
, false, mask
);
1319 for (uint32_t i
= 0; i
< layers
; i
++) {
1320 ops
->dst(cs
, dst_image
, dst_format
,
1321 info
->imageSubresource
.mipLevel
,
1322 info
->imageSubresource
.baseArrayLayer
+ info
->imageOffset
.z
+ i
);
1324 uint64_t src_va
= tu_buffer_iova(src_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1325 if ((src_va
& 63) || (pitch
& 63)) {
1326 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1327 uint32_t x
= (src_va
& 63) / vk_format_get_blocksize(src_format
);
1328 ops
->src_buffer(cmd
, cs
, src_format
, src_va
& ~63, pitch
,
1329 x
+ extent
.width
, 1);
1330 ops
->coords(cs
, &(VkOffset2D
){offset
.x
, offset
.y
+ y
}, &(VkOffset2D
){x
},
1331 &(VkExtent2D
) {extent
.width
, 1});
1336 ops
->src_buffer(cmd
, cs
, src_format
, src_va
, pitch
, extent
.width
, extent
.height
);
1337 coords(ops
, cs
, &offset
, &(VkOffset3D
){}, &extent
);
1344 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer
,
1347 VkImageLayout dstImageLayout
,
1348 uint32_t regionCount
,
1349 const VkBufferImageCopy
*pRegions
)
1351 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1352 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1353 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1355 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1356 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1358 for (unsigned i
= 0; i
< regionCount
; ++i
)
1359 tu_copy_buffer_to_image(cmd
, src_buffer
, dst_image
, pRegions
+ i
);
1363 tu_copy_image_to_buffer(struct tu_cmd_buffer
*cmd
,
1364 struct tu_image
*src_image
,
1365 struct tu_buffer
*dst_buffer
,
1366 const VkBufferImageCopy
*info
)
1368 struct tu_cs
*cs
= &cmd
->cs
;
1369 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1370 VkFormat src_format
= src_image
->vk_format
;
1371 VkFormat dst_format
= src_image
->vk_format
;
1372 bool stencil_read
= false;
1374 if (src_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1375 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1376 dst_format
= VK_FORMAT_R8_UNORM
;
1377 stencil_read
= true;
1380 const struct blit_ops
*ops
= stencil_read
? &r3d_ops
: &r2d_ops
;
1381 VkOffset3D offset
= info
->imageOffset
;
1382 VkExtent3D extent
= info
->imageExtent
;
1383 uint32_t pitch
= (info
->bufferRowLength
?: extent
.width
) * vk_format_get_blocksize(dst_format
);
1384 uint32_t layer_size
= (info
->bufferImageHeight
?: extent
.height
) * pitch
;
1386 if (src_format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
|| vk_format_is_compressed(src_format
)) {
1387 assert(src_format
== dst_format
);
1388 copy_compressed(dst_format
, &offset
, &extent
, &pitch
, &layer_size
);
1389 src_format
= dst_format
= copy_format(dst_format
);
1392 /* note: the dst_va/pitch alignment of 64 is for 2D engine,
1393 * it is also valid for 1cpp format with shader path (stencil aspect)
1396 ops
->setup(cmd
, cs
, dst_format
, ROTATE_0
, false, 0xf);
1398 for (uint32_t i
= 0; i
< layers
; i
++) {
1399 ops
->src(cmd
, cs
, src_image
, src_format
,
1400 info
->imageSubresource
.mipLevel
,
1401 info
->imageSubresource
.baseArrayLayer
+ info
->imageOffset
.z
+ i
,
1402 false, stencil_read
);
1404 uint64_t dst_va
= tu_buffer_iova(dst_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1405 if ((dst_va
& 63) || (pitch
& 63)) {
1406 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1407 uint32_t x
= (dst_va
& 63) / vk_format_get_blocksize(dst_format
);
1408 ops
->dst_buffer(cs
, dst_format
, dst_va
& ~63, 0);
1409 ops
->coords(cs
, &(VkOffset2D
) {x
}, &(VkOffset2D
){offset
.x
, offset
.y
+ y
},
1410 &(VkExtent2D
) {extent
.width
, 1});
1415 ops
->dst_buffer(cs
, dst_format
, dst_va
, pitch
);
1416 coords(ops
, cs
, &(VkOffset3D
) {0, 0}, &offset
, &extent
);
1423 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer
,
1425 VkImageLayout srcImageLayout
,
1427 uint32_t regionCount
,
1428 const VkBufferImageCopy
*pRegions
)
1430 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1431 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1432 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1434 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1435 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1437 for (unsigned i
= 0; i
< regionCount
; ++i
)
1438 tu_copy_image_to_buffer(cmd
, src_image
, dst_buffer
, pRegions
+ i
);
1442 tu_copy_image_to_image(struct tu_cmd_buffer
*cmd
,
1443 struct tu_image
*src_image
,
1444 struct tu_image
*dst_image
,
1445 const VkImageCopy
*info
)
1447 const struct blit_ops
*ops
= &r2d_ops
;
1448 struct tu_cs
*cs
= &cmd
->cs
;
1451 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1452 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
1454 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
1458 if (dst_image
->samples
> 1)
1461 assert(info
->srcSubresource
.aspectMask
== info
->dstSubresource
.aspectMask
);
1463 VkFormat format
= VK_FORMAT_UNDEFINED
;
1464 VkOffset3D src_offset
= info
->srcOffset
;
1465 VkOffset3D dst_offset
= info
->dstOffset
;
1466 VkExtent3D extent
= info
->extent
;
1468 /* TODO: should check (ubwc || (tile_mode && swap)) instead */
1469 if (src_image
->layout
.tile_mode
&& src_image
->vk_format
!= VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1470 format
= src_image
->vk_format
;
1472 if (dst_image
->layout
.tile_mode
&& dst_image
->vk_format
!= VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
) {
1473 if (format
!= VK_FORMAT_UNDEFINED
&& format
!= dst_image
->vk_format
) {
1474 /* can be clever in some cases but in some cases we need and intermediate
1477 tu_finishme("image copy between two tiled/ubwc images\n");
1480 format
= dst_image
->vk_format
;
1483 if (format
== VK_FORMAT_UNDEFINED
)
1484 format
= copy_format(src_image
->vk_format
);
1486 copy_compressed(src_image
->vk_format
, &src_offset
, &extent
, NULL
, NULL
);
1487 copy_compressed(dst_image
->vk_format
, &dst_offset
, NULL
, NULL
, NULL
);
1489 ops
->setup(cmd
, cs
, format
, ROTATE_0
, false, mask
);
1490 coords(ops
, cs
, &dst_offset
, &src_offset
, &extent
);
1492 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1493 ops
->src(cmd
, cs
, src_image
, format
,
1494 info
->srcSubresource
.mipLevel
,
1495 info
->srcSubresource
.baseArrayLayer
+ info
->srcOffset
.z
+ i
,
1497 ops
->dst(cs
, dst_image
, format
,
1498 info
->dstSubresource
.mipLevel
,
1499 info
->dstSubresource
.baseArrayLayer
+ info
->dstOffset
.z
+ i
);
1505 tu_CmdCopyImage(VkCommandBuffer commandBuffer
,
1507 VkImageLayout srcImageLayout
,
1509 VkImageLayout destImageLayout
,
1510 uint32_t regionCount
,
1511 const VkImageCopy
*pRegions
)
1513 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1514 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1515 TU_FROM_HANDLE(tu_image
, dst_image
, destImage
);
1517 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1518 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1520 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1521 tu_copy_image_to_image(cmd
, src_image
, dst_image
, pRegions
+ i
);
1525 copy_buffer(struct tu_cmd_buffer
*cmd
,
1529 uint32_t block_size
)
1531 const struct blit_ops
*ops
= &r2d_ops
;
1532 struct tu_cs
*cs
= &cmd
->cs
;
1533 VkFormat format
= block_size
== 4 ? VK_FORMAT_R32_UINT
: VK_FORMAT_R8_UNORM
;
1534 uint64_t blocks
= size
/ block_size
;
1536 ops
->setup(cmd
, cs
, format
, ROTATE_0
, false, 0xf);
1539 uint32_t src_x
= (src_va
& 63) / block_size
;
1540 uint32_t dst_x
= (dst_va
& 63) / block_size
;
1541 uint32_t width
= MIN2(MIN2(blocks
, 0x4000 - src_x
), 0x4000 - dst_x
);
1543 ops
->src_buffer(cmd
, cs
, format
, src_va
& ~63, 0, src_x
+ width
, 1);
1544 ops
->dst_buffer( cs
, format
, dst_va
& ~63, 0);
1545 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, &(VkOffset2D
) {src_x
}, &(VkExtent2D
) {width
, 1});
1548 src_va
+= width
* block_size
;
1549 dst_va
+= width
* block_size
;
1555 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer
,
1558 uint32_t regionCount
,
1559 const VkBufferCopy
*pRegions
)
1561 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1562 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1563 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1565 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1566 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1568 for (unsigned i
= 0; i
< regionCount
; ++i
) {
1570 tu_buffer_iova(dst_buffer
) + pRegions
[i
].dstOffset
,
1571 tu_buffer_iova(src_buffer
) + pRegions
[i
].srcOffset
,
1572 pRegions
[i
].size
, 1);
1577 tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer
,
1579 VkDeviceSize dstOffset
,
1580 VkDeviceSize dataSize
,
1583 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1584 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1586 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1588 struct ts_cs_memory tmp
;
1589 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, DIV_ROUND_UP(dataSize
, 64), 64, &tmp
);
1590 if (result
!= VK_SUCCESS
) {
1591 cmd
->record_result
= result
;
1595 memcpy(tmp
.map
, pData
, dataSize
);
1596 copy_buffer(cmd
, tu_buffer_iova(buffer
) + dstOffset
, tmp
.iova
, dataSize
, 4);
1600 tu_CmdFillBuffer(VkCommandBuffer commandBuffer
,
1602 VkDeviceSize dstOffset
,
1603 VkDeviceSize fillSize
,
1606 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1607 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1608 const struct blit_ops
*ops
= &r2d_ops
;
1609 struct tu_cs
*cs
= &cmd
->cs
;
1611 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1613 if (fillSize
== VK_WHOLE_SIZE
)
1614 fillSize
= buffer
->size
- dstOffset
;
1616 uint64_t dst_va
= tu_buffer_iova(buffer
) + dstOffset
;
1617 uint32_t blocks
= fillSize
/ 4;
1619 ops
->setup(cmd
, cs
, VK_FORMAT_R32_UINT
, ROTATE_0
, true, 0xf);
1620 ops
->clear_value(cs
, VK_FORMAT_R32_UINT
, &(VkClearValue
){.color
= {.uint32
[0] = data
}});
1623 uint32_t dst_x
= (dst_va
& 63) / 4;
1624 uint32_t width
= MIN2(blocks
, 0x4000 - dst_x
);
1626 ops
->dst_buffer(cs
, VK_FORMAT_R32_UINT
, dst_va
& ~63, 0);
1627 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, NULL
, &(VkExtent2D
) {width
, 1});
1630 dst_va
+= width
* 4;
1636 tu_CmdResolveImage(VkCommandBuffer commandBuffer
,
1638 VkImageLayout srcImageLayout
,
1640 VkImageLayout dstImageLayout
,
1641 uint32_t regionCount
,
1642 const VkImageResolve
*pRegions
)
1644 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1645 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1646 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1647 const struct blit_ops
*ops
= &r2d_ops
;
1648 struct tu_cs
*cs
= &cmd
->cs
;
1650 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1651 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1653 ops
->setup(cmd
, cs
, dst_image
->vk_format
, ROTATE_0
, false, 0xf);
1655 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1656 const VkImageResolve
*info
= &pRegions
[i
];
1657 uint32_t layers
= MAX2(info
->extent
.depth
, info
->dstSubresource
.layerCount
);
1659 assert(info
->srcSubresource
.layerCount
== info
->dstSubresource
.layerCount
);
1660 /* TODO: aspect masks possible ? */
1662 coords(ops
, cs
, &info
->dstOffset
, &info
->srcOffset
, &info
->extent
);
1664 for (uint32_t i
= 0; i
< layers
; i
++) {
1665 ops
->src(cmd
, cs
, src_image
, src_image
->vk_format
,
1666 info
->srcSubresource
.mipLevel
,
1667 info
->srcSubresource
.baseArrayLayer
+ info
->srcOffset
.z
+ i
,
1669 ops
->dst(cs
, dst_image
, dst_image
->vk_format
,
1670 info
->dstSubresource
.mipLevel
,
1671 info
->dstSubresource
.baseArrayLayer
+ info
->dstOffset
.z
+ i
);
1678 tu_resolve_sysmem(struct tu_cmd_buffer
*cmd
,
1680 struct tu_image_view
*src
,
1681 struct tu_image_view
*dst
,
1683 const VkRect2D
*rect
)
1685 const struct blit_ops
*ops
= &r2d_ops
;
1687 tu_bo_list_add(&cmd
->bo_list
, src
->image
->bo
, MSM_SUBMIT_BO_READ
);
1688 tu_bo_list_add(&cmd
->bo_list
, dst
->image
->bo
, MSM_SUBMIT_BO_WRITE
);
1690 assert(src
->vk_format
== dst
->vk_format
);
1692 ops
->setup(cmd
, cs
, dst
->vk_format
, ROTATE_0
, false, 0xf);
1693 ops
->coords(cs
, &rect
->offset
, &rect
->offset
, &rect
->extent
);
1695 for (uint32_t i
= 0; i
< layers
; i
++) {
1696 ops
->src(cmd
, cs
, src
->image
, src
->vk_format
,
1698 src
->base_layer
+ i
,
1700 ops
->dst(cs
, dst
->image
, dst
->vk_format
,
1702 dst
->base_layer
+ i
);
1708 clear_image(struct tu_cmd_buffer
*cmd
,
1709 struct tu_image
*image
,
1710 const VkClearValue
*clear_value
,
1711 const VkImageSubresourceRange
*range
)
1713 uint32_t level_count
= tu_get_levelCount(image
, range
);
1714 uint32_t layer_count
= tu_get_layerCount(image
, range
);
1715 struct tu_cs
*cs
= &cmd
->cs
;
1716 VkFormat format
= image
->vk_format
;
1717 if (format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1718 format
= VK_FORMAT_R32_UINT
;
1720 if (image
->type
== VK_IMAGE_TYPE_3D
) {
1721 assert(layer_count
== 1);
1722 assert(range
->baseArrayLayer
== 0);
1726 if (image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
1728 if (range
->aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
1730 if (range
->aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
1734 const struct blit_ops
*ops
= image
->samples
> 1 ? &r3d_ops
: &r2d_ops
;
1736 ops
->setup(cmd
, cs
, format
, ROTATE_0
, true, mask
);
1737 ops
->clear_value(cs
, image
->vk_format
, clear_value
);
1739 for (unsigned j
= 0; j
< level_count
; j
++) {
1740 if (image
->type
== VK_IMAGE_TYPE_3D
)
1741 layer_count
= u_minify(image
->extent
.depth
, range
->baseMipLevel
+ j
);
1743 ops
->coords(cs
, &(VkOffset2D
){}, NULL
, &(VkExtent2D
) {
1744 u_minify(image
->extent
.width
, range
->baseMipLevel
+ j
),
1745 u_minify(image
->extent
.height
, range
->baseMipLevel
+ j
)
1748 for (uint32_t i
= 0; i
< layer_count
; i
++) {
1749 ops
->dst(cs
, image
, format
, range
->baseMipLevel
+ j
, range
->baseArrayLayer
+ i
);
1756 tu_CmdClearColorImage(VkCommandBuffer commandBuffer
,
1758 VkImageLayout imageLayout
,
1759 const VkClearColorValue
*pColor
,
1760 uint32_t rangeCount
,
1761 const VkImageSubresourceRange
*pRanges
)
1763 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1764 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1766 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1768 for (unsigned i
= 0; i
< rangeCount
; i
++)
1769 clear_image(cmd
, image
, (const VkClearValue
*) pColor
, pRanges
+ i
);
1773 tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer
,
1775 VkImageLayout imageLayout
,
1776 const VkClearDepthStencilValue
*pDepthStencil
,
1777 uint32_t rangeCount
,
1778 const VkImageSubresourceRange
*pRanges
)
1780 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1781 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1783 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1785 for (unsigned i
= 0; i
< rangeCount
; i
++)
1786 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, pRanges
+ i
);
1790 tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer
*cmd
,
1791 uint32_t attachment_count
,
1792 const VkClearAttachment
*attachments
,
1793 uint32_t rect_count
,
1794 const VkClearRect
*rects
)
1796 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1797 /* note: cannot use shader path here.. there is a special shader path
1798 * in tu_clear_sysmem_attachments()
1800 const struct blit_ops
*ops
= &r2d_ops
;
1801 struct tu_cs
*cs
= &cmd
->draw_cs
;
1803 for (uint32_t j
= 0; j
< attachment_count
; j
++) {
1807 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1808 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
1810 a
= subpass
->depth_stencil_attachment
.attachment
;
1812 /* sync depth into color */
1813 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
1814 /* also flush color to avoid losing contents from invalidate */
1815 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
1816 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
, false);
1819 if (!(attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
))
1821 if (!(attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
))
1825 if (a
== VK_ATTACHMENT_UNUSED
)
1828 const struct tu_image_view
*iview
=
1829 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
1831 ops
->setup(cmd
, cs
, iview
->vk_format
, ROTATE_0
, true, mask
);
1832 ops
->clear_value(cs
, iview
->vk_format
, &attachments
[j
].clearValue
);
1834 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1835 ops
->coords(cs
, &rects
[i
].rect
.offset
, NULL
, &rects
[i
].rect
.extent
);
1836 for (uint32_t layer
= 0; layer
< rects
[i
].layerCount
; layer
++) {
1837 ops
->dst(cs
, iview
->image
, iview
->vk_format
, iview
->base_mip
,
1838 iview
->base_layer
+ rects
[i
].baseArrayLayer
+ layer
);
1843 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1844 /* does not use CCU - flush
1845 * note: cache invalidate might be needed to, and just not covered by test cases
1847 if (attachments
[j
].colorAttachment
> 0)
1848 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
1850 /* sync color into depth */
1851 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
1852 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
, false);
1858 tu_clear_sysmem_attachments(struct tu_cmd_buffer
*cmd
,
1859 uint32_t attachment_count
,
1860 const VkClearAttachment
*attachments
,
1861 uint32_t rect_count
,
1862 const VkClearRect
*rects
)
1864 /* the shader path here is special, it avoids changing MRT/etc state */
1865 const struct tu_render_pass
*pass
= cmd
->state
.pass
;
1866 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1867 const uint32_t mrt_count
= subpass
->color_count
;
1868 struct tu_cs
*cs
= &cmd
->draw_cs
;
1869 uint32_t clear_value
[MAX_RTS
][4];
1870 float z_clear_val
= 0.0f
;
1871 uint8_t s_clear_val
= 0;
1872 uint32_t clear_rts
= 0, num_rts
= 0, b
;
1873 bool z_clear
= false;
1874 bool s_clear
= false;
1875 uint32_t max_samples
= 1;
1877 for (uint32_t i
= 0; i
< attachment_count
; i
++) {
1879 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1880 uint32_t c
= attachments
[i
].colorAttachment
;
1881 a
= subpass
->color_attachments
[c
].attachment
;
1882 if (a
== VK_ATTACHMENT_UNUSED
)
1885 clear_rts
|= 1 << c
;
1886 memcpy(clear_value
[c
], &attachments
[i
].clearValue
, 4 * sizeof(uint32_t));
1888 a
= subpass
->depth_stencil_attachment
.attachment
;
1889 if (a
== VK_ATTACHMENT_UNUSED
)
1892 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
1894 z_clear_val
= attachments
[i
].clearValue
.depthStencil
.depth
;
1897 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
1899 s_clear_val
= attachments
[i
].clearValue
.depthStencil
.stencil
& 0xff;
1903 max_samples
= MAX2(max_samples
, pass
->attachments
[a
].samples
);
1906 /* prefer to use 2D path for clears
1907 * 2D can't clear separate depth/stencil and msaa, needs known framebuffer
1909 if (max_samples
== 1 && cmd
->state
.framebuffer
) {
1910 tu_clear_sysmem_attachments_2d(cmd
, attachment_count
, attachments
, rect_count
, rects
);
1914 /* TODO: this path doesn't take into account multilayer rendering */
1916 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
1917 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
1918 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
1920 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count
));
1922 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count
);
1923 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1924 if (clear_rts
& (1 << i
))
1925 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts
++ * 4));
1930 r3d_pipeline(cmd
, cs
, false, num_rts
);
1933 A6XX_RB_FS_OUTPUT_CNTL0(),
1934 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= mrt_count
));
1936 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
1937 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.independent_blend
= 1, .sample_mask
= 0xffff));
1938 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
1939 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1940 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(i
,
1941 .component_enable
= COND(clear_rts
& (1 << i
), 0xf)));
1944 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
1945 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL(
1946 .z_enable
= z_clear
,
1947 .z_write_enable
= z_clear
,
1948 .zfunc
= FUNC_ALWAYS
));
1949 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
1950 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL(
1951 .stencil_enable
= s_clear
,
1952 .func
= FUNC_ALWAYS
,
1953 .zpass
= VK_STENCIL_OP_REPLACE
));
1954 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK(.mask
= 0xff));
1955 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK(.wrmask
= 0xff));
1956 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF(.ref
= s_clear_val
));
1958 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4 * num_rts
);
1959 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
1960 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
1961 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
1962 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
1963 CP_LOAD_STATE6_0_NUM_UNIT(num_rts
));
1964 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1965 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1966 for_each_bit(b
, clear_rts
)
1967 tu_cs_emit_array(cs
, clear_value
[b
], 4);
1969 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1970 r3d_coords_raw(cs
, (float[]) {
1971 rects
[i
].rect
.offset
.x
, rects
[i
].rect
.offset
.y
,
1973 rects
[i
].rect
.offset
.x
+ rects
[i
].rect
.extent
.width
,
1974 rects
[i
].rect
.offset
.y
+ rects
[i
].rect
.extent
.height
,
1980 cmd
->state
.dirty
|= TU_CMD_DIRTY_PIPELINE
|
1981 TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK
|
1982 TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK
|
1983 TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE
|
1984 TU_CMD_DIRTY_DYNAMIC_VIEWPORT
|
1985 TU_CMD_DIRTY_DYNAMIC_SCISSOR
;
1989 * Pack a VkClearValue into a 128-bit buffer. format is respected except
1990 * for the component order. The components are always packed in WZYX order,
1991 * because gmem is tiled and tiled formats always have WZYX swap
1994 pack_gmem_clear_value(const VkClearValue
*val
, VkFormat format
, uint32_t buf
[4])
1996 const struct util_format_description
*desc
= vk_format_description(format
);
1999 case VK_FORMAT_B10G11R11_UFLOAT_PACK32
:
2000 buf
[0] = float3_to_r11g11b10f(val
->color
.float32
);
2002 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
2003 buf
[0] = float3_to_rgb9e5(val
->color
.float32
);
2009 assert(desc
&& desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
2011 /* S8_UINT is special and has no depth */
2012 const int max_components
=
2013 format
== VK_FORMAT_S8_UINT
? 2 : desc
->nr_channels
;
2017 for (int comp
= 0; comp
< max_components
; comp
++) {
2018 const struct util_format_channel_description
*ch
=
2019 tu_get_format_channel_description(desc
, comp
);
2021 assert((format
== VK_FORMAT_S8_UINT
&& comp
== 0) ||
2022 (format
== VK_FORMAT_X8_D24_UNORM_PACK32
&& comp
== 1));
2026 union tu_clear_component_value v
= tu_get_clear_component_value(
2027 val
, comp
, desc
->colorspace
);
2029 /* move to the next uint32_t when there is not enough space */
2030 assert(ch
->size
<= 32);
2031 if (bit_shift
+ ch
->size
> 32) {
2037 buf
[buf_offset
] = 0;
2039 buf
[buf_offset
] |= tu_pack_clear_component_value(v
, ch
) << bit_shift
;
2040 bit_shift
+= ch
->size
;
2045 tu_emit_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2047 uint32_t attachment
,
2048 uint8_t component_mask
,
2049 const VkClearValue
*value
)
2051 VkFormat vk_format
= cmd
->state
.pass
->attachments
[attachment
].format
;
2052 /* note: component_mask is 0x7 for depth and 0x8 for stencil
2053 * because D24S8 is cleared with AS_R8G8B8A8 format
2056 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 1);
2057 tu_cs_emit(cs
, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format
)));
2059 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_INFO
, 1);
2060 tu_cs_emit(cs
, A6XX_RB_BLIT_INFO_GMEM
| A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask
));
2062 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_BASE_GMEM
, 1);
2063 tu_cs_emit(cs
, cmd
->state
.pass
->attachments
[attachment
].gmem_offset
);
2065 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_88D0
, 1);
2068 uint32_t clear_vals
[4] = {};
2069 pack_gmem_clear_value(value
, vk_format
, clear_vals
);
2071 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0
, 4);
2072 tu_cs_emit_array(cs
, clear_vals
, 4);
2074 tu6_emit_event_write(cmd
, cs
, BLIT
, false);
2078 tu_clear_gmem_attachments(struct tu_cmd_buffer
*cmd
,
2079 uint32_t attachment_count
,
2080 const VkClearAttachment
*attachments
,
2081 uint32_t rect_count
,
2082 const VkClearRect
*rects
)
2084 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
2085 struct tu_cs
*cs
= &cmd
->draw_cs
;
2087 /* TODO: swap the loops for smaller cmdstream */
2088 for (unsigned i
= 0; i
< rect_count
; i
++) {
2089 unsigned x1
= rects
[i
].rect
.offset
.x
;
2090 unsigned y1
= rects
[i
].rect
.offset
.y
;
2091 unsigned x2
= x1
+ rects
[i
].rect
.extent
.width
- 1;
2092 unsigned y2
= y1
+ rects
[i
].rect
.extent
.height
- 1;
2094 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_SCISSOR_TL
, 2);
2095 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_TL_X(x1
) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1
));
2096 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_BR_X(x2
) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2
));
2098 for (unsigned j
= 0; j
< attachment_count
; j
++) {
2100 unsigned clear_mask
= 0;
2101 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
2103 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
2105 a
= subpass
->depth_stencil_attachment
.attachment
;
2106 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
2108 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
2112 if (a
== VK_ATTACHMENT_UNUSED
)
2115 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, clear_mask
,
2116 &attachments
[j
].clearValue
);
2122 tu_CmdClearAttachments(VkCommandBuffer commandBuffer
,
2123 uint32_t attachmentCount
,
2124 const VkClearAttachment
*pAttachments
,
2126 const VkClearRect
*pRects
)
2128 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
2129 struct tu_cs
*cs
= &cmd
->draw_cs
;
2131 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
);
2132 tu_clear_gmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2133 tu_cond_exec_end(cs
);
2135 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2136 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2137 tu_cond_exec_end(cs
);
2141 tu_clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2144 const VkRenderPassBeginInfo
*info
)
2146 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2147 const struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2148 const struct tu_render_pass_attachment
*attachment
=
2149 &cmd
->state
.pass
->attachments
[a
];
2152 if (attachment
->load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
)
2155 if (iview
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
2157 if (attachment
->stencil_load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
)
2161 /* gmem_offset<0 means it isn't used by any subpass and shouldn't be cleared */
2162 if (attachment
->gmem_offset
< 0 || !mask
)
2165 const struct blit_ops
*ops
= &r2d_ops
;
2166 if (attachment
->samples
> 1)
2169 ops
->setup(cmd
, cs
, iview
->vk_format
, ROTATE_0
, true, mask
);
2170 ops
->coords(cs
, &info
->renderArea
.offset
, NULL
, &info
->renderArea
.extent
);
2171 ops
->clear_value(cs
, iview
->vk_format
, &info
->pClearValues
[a
]);
2173 for (uint32_t i
= 0; i
< fb
->layers
; i
++) {
2174 ops
->dst(cs
, iview
->image
, iview
->vk_format
, iview
->base_mip
, iview
->base_layer
+ i
);
2180 tu_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2183 const VkRenderPassBeginInfo
*info
)
2185 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2186 const struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2187 const struct tu_render_pass_attachment
*attachment
=
2188 &cmd
->state
.pass
->attachments
[a
];
2189 unsigned clear_mask
= 0;
2191 /* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
2192 if (attachment
->gmem_offset
< 0)
2195 if (attachment
->load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
)
2198 if (vk_format_has_stencil(iview
->vk_format
)) {
2200 if (attachment
->stencil_load_op
== VK_ATTACHMENT_LOAD_OP_CLEAR
)
2206 tu_cs_emit_regs(cs
, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2208 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, clear_mask
,
2209 &info
->pClearValues
[a
]);
2213 tu_emit_blit(struct tu_cmd_buffer
*cmd
,
2215 const struct tu_image_view
*iview
,
2216 struct tu_render_pass_attachment
*attachment
,
2219 const struct tu_native_format format
=
2220 tu6_format_image(iview
->image
, iview
->vk_format
, iview
->base_mip
);
2223 A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2225 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(
2228 /* "integer" bit disables msaa resolve averaging */
2229 .integer
= vk_format_is_int(iview
->vk_format
)));
2232 A6XX_RB_BLIT_DST_INFO(
2233 .tile_mode
= format
.tile_mode
,
2234 .samples
= tu_msaa_samples(iview
->image
->samples
),
2235 .color_format
= format
.fmt
,
2236 .color_swap
= format
.swap
,
2237 .flags
= iview
->image
->layout
.ubwc_layer_size
!= 0),
2238 A6XX_RB_BLIT_DST(tu_image_view_base_ref(iview
)),
2239 A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview
->image
, iview
->base_mip
)),
2240 A6XX_RB_BLIT_DST_ARRAY_PITCH(iview
->image
->layout
.layer_size
));
2242 if (iview
->image
->layout
.ubwc_layer_size
) {
2244 A6XX_RB_BLIT_FLAG_DST(tu_image_view_ubwc_base_ref(iview
)),
2245 A6XX_RB_BLIT_FLAG_DST_PITCH(tu_image_view_ubwc_pitches(iview
)));
2249 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset
));
2251 tu6_emit_event_write(cmd
, cs
, BLIT
, false);
2255 blit_can_resolve(VkFormat format
)
2257 const struct util_format_description
*desc
= vk_format_description(format
);
2259 /* blit event can only do resolve for simple cases:
2260 * averaging samples as unsigned integers or choosing only one sample
2262 if (vk_format_is_snorm(format
) || vk_format_is_srgb(format
))
2265 /* can't do formats with larger channel sizes
2266 * note: this includes all float formats
2267 * note2: single channel integer formats seem OK
2269 if (desc
->channel
[0].size
> 10)
2273 /* for unknown reasons blit event can't msaa resolve these formats when tiled
2274 * likely related to these formats having different layout from other cpp=2 formats
2276 case VK_FORMAT_R8G8_UNORM
:
2277 case VK_FORMAT_R8G8_UINT
:
2278 case VK_FORMAT_R8G8_SINT
:
2279 /* TODO: this one should be able to work? */
2280 case VK_FORMAT_D24_UNORM_S8_UINT
:
2290 tu_emit_load_gmem_attachment(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, uint32_t a
)
2292 tu_emit_blit(cmd
, cs
,
2293 cmd
->state
.framebuffer
->attachments
[a
].attachment
,
2294 &cmd
->state
.pass
->attachments
[a
],
2299 tu_load_gmem_attachment(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, uint32_t a
)
2301 const struct tu_render_pass_attachment
*attachment
=
2302 &cmd
->state
.pass
->attachments
[a
];
2304 if (attachment
->gmem_offset
< 0)
2307 if (attachment
->load_op
== VK_ATTACHMENT_LOAD_OP_LOAD
||
2308 (vk_format_has_stencil(attachment
->format
) &&
2309 attachment
->stencil_load_op
== VK_ATTACHMENT_LOAD_OP_LOAD
)) {
2310 tu_emit_load_gmem_attachment(cmd
, cs
, a
);
2315 tu_store_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2320 const struct tu_tiling_config
*tiling
= &cmd
->state
.tiling_config
;
2321 const VkRect2D
*render_area
= &tiling
->render_area
;
2322 struct tu_render_pass_attachment
*dst
= &cmd
->state
.pass
->attachments
[a
];
2323 struct tu_image_view
*iview
= cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2324 struct tu_render_pass_attachment
*src
= &cmd
->state
.pass
->attachments
[gmem_a
];
2326 if (dst
->store_op
== VK_ATTACHMENT_STORE_OP_DONT_CARE
)
2329 uint32_t x1
= render_area
->offset
.x
;
2330 uint32_t y1
= render_area
->offset
.y
;
2331 uint32_t x2
= x1
+ render_area
->extent
.width
;
2332 uint32_t y2
= y1
+ render_area
->extent
.height
;
2333 /* x2/y2 can be unaligned if equal to the size of the image,
2334 * since it will write into padding space
2335 * the one exception is linear levels which don't have the
2336 * required y padding in the layout (except for the last level)
2338 bool need_y2_align
=
2339 y2
!= iview
->extent
.height
||
2340 (tu6_get_image_tile_mode(iview
->image
, iview
->base_mip
) == TILE6_LINEAR
&&
2341 iview
->base_mip
!= iview
->image
->level_count
- 1);
2344 x1
% GMEM_ALIGN_W
|| (x2
% GMEM_ALIGN_W
&& x2
!= iview
->extent
.width
) ||
2345 y1
% GMEM_ALIGN_H
|| (y2
% GMEM_ALIGN_H
&& need_y2_align
);
2347 /* use fast path when render area is aligned, except for unsupported resolve cases */
2348 if (!unaligned
&& (a
== gmem_a
|| blit_can_resolve(iview
->vk_format
))) {
2349 tu_emit_blit(cmd
, cs
, iview
, src
, true);
2353 if (dst
->samples
> 1) {
2354 /* I guess we need to use shader path in this case?
2355 * need a testcase which fails because of this
2357 tu_finishme("unaligned store of msaa attachment\n");
2361 r2d_setup_common(cmd
, cs
, iview
->vk_format
, ROTATE_0
, false, 0xf, true);
2362 r2d_dst(cs
, iview
->image
, iview
->vk_format
, iview
->base_mip
, iview
->base_layer
);
2363 r2d_coords(cs
, &render_area
->offset
, &render_area
->offset
, &render_area
->extent
);
2366 A6XX_SP_PS_2D_SRC_INFO(
2367 .color_format
= tu6_format_texture(src
->format
, TILE6_2
).fmt
,
2368 .tile_mode
= TILE6_2
,
2369 .srgb
= vk_format_is_srgb(src
->format
),
2370 .samples
= tu_msaa_samples(src
->samples
),
2371 .samples_average
= !vk_format_is_int(src
->format
),
2374 /* note: src size does not matter when not scaling */
2375 A6XX_SP_PS_2D_SRC_SIZE( .width
= 0x3fff, .height
= 0x3fff),
2376 A6XX_SP_PS_2D_SRC_LO(cmd
->device
->physical_device
->gmem_base
+ src
->gmem_offset
),
2377 A6XX_SP_PS_2D_SRC_HI(),
2378 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= tiling
->tile0
.extent
.width
* src
->cpp
));
2380 /* sync GMEM writes with CACHE */
2381 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
, false);
2383 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
2384 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
2386 /* TODO: flushing with barriers instead of blindly always flushing */
2387 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
, true);
2388 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
, true);
2389 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
, false);