2 * Copyright 2019-2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
6 * Jonathan Marek <jonathan@marek.ca>
9 #include "tu_private.h"
12 #include "vk_format.h"
14 #include "util/format_r11g11b10f.h"
15 #include "util/format_rgb9e5.h"
16 #include "util/format_srgb.h"
17 #include "util/u_half.h"
20 tu_pack_float32_for_unorm(float val
, int bits
)
22 return _mesa_lroundevenf(CLAMP(val
, 0.0f
, 1.0f
) * (float) ((1 << bits
) - 1));
25 /* r2d_ = BLIT_OP_SCALE operations */
27 static enum a6xx_2d_ifmt
28 format_to_ifmt(VkFormat format
)
30 if (format
== VK_FORMAT_D24_UNORM_S8_UINT
||
31 format
== VK_FORMAT_X8_D24_UNORM_PACK32
)
34 /* get_component_bits doesn't work with depth/stencil formats: */
35 if (format
== VK_FORMAT_D16_UNORM
|| format
== VK_FORMAT_D32_SFLOAT
)
37 if (format
== VK_FORMAT_S8_UINT
)
40 /* use the size of the red channel to find the corresponding "ifmt" */
41 bool is_int
= vk_format_is_int(format
);
42 switch (vk_format_get_component_bits(format
, UTIL_FORMAT_COLORSPACE_RGB
, PIPE_SWIZZLE_X
)) {
43 case 4: case 5: case 8:
44 return is_int
? R2D_INT8
: R2D_UNORM8
;
46 return is_int
? R2D_INT16
: R2D_FLOAT16
;
48 if (vk_format_is_float(format
))
50 return is_int
? R2D_INT16
: R2D_FLOAT32
;
52 return is_int
? R2D_INT32
: R2D_FLOAT32
;
54 unreachable("bad format");
60 r2d_coords(struct tu_cs
*cs
,
61 const VkOffset2D
*dst
,
62 const VkOffset2D
*src
,
63 const VkExtent2D
*extent
)
66 A6XX_GRAS_2D_DST_TL(.x
= dst
->x
, .y
= dst
->y
),
67 A6XX_GRAS_2D_DST_BR(.x
= dst
->x
+ extent
->width
- 1, .y
= dst
->y
+ extent
->height
- 1));
73 A6XX_GRAS_2D_SRC_TL_X(src
->x
),
74 A6XX_GRAS_2D_SRC_BR_X(src
->x
+ extent
->width
- 1),
75 A6XX_GRAS_2D_SRC_TL_Y(src
->y
),
76 A6XX_GRAS_2D_SRC_BR_Y(src
->y
+ extent
->height
- 1));
80 r2d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
82 uint32_t clear_value
[4] = {};
85 case VK_FORMAT_X8_D24_UNORM_PACK32
:
86 case VK_FORMAT_D24_UNORM_S8_UINT
:
87 /* cleared as r8g8b8a8_unorm using special format */
88 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
89 clear_value
[1] = clear_value
[0] >> 8;
90 clear_value
[2] = clear_value
[0] >> 16;
91 clear_value
[3] = val
->depthStencil
.stencil
;
93 case VK_FORMAT_D16_UNORM
:
94 case VK_FORMAT_D32_SFLOAT
:
96 clear_value
[0] = fui(val
->depthStencil
.depth
);
98 case VK_FORMAT_S8_UINT
:
99 clear_value
[0] = val
->depthStencil
.stencil
;
101 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
102 /* cleared as UINT32 */
103 clear_value
[0] = float3_to_rgb9e5(val
->color
.float32
);
106 assert(!vk_format_is_depth_or_stencil(format
));
107 const struct util_format_description
*desc
= vk_format_description(format
);
108 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(format
);
110 assert(desc
&& (desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
||
111 format
== VK_FORMAT_B10G11R11_UFLOAT_PACK32
));
113 for (unsigned i
= 0; i
< desc
->nr_channels
; i
++) {
114 const struct util_format_channel_description
*ch
= &desc
->channel
[i
];
115 if (ifmt
== R2D_UNORM8
) {
116 float linear
= val
->color
.float32
[i
];
117 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
&& i
< 3)
118 linear
= util_format_linear_to_srgb_float(val
->color
.float32
[i
]);
120 if (ch
->type
== UTIL_FORMAT_TYPE_SIGNED
)
121 clear_value
[i
] = _mesa_lroundevenf(CLAMP(linear
, -1.0f
, 1.0f
) * 127.0f
);
123 clear_value
[i
] = tu_pack_float32_for_unorm(linear
, 8);
124 } else if (ifmt
== R2D_FLOAT16
) {
125 clear_value
[i
] = util_float_to_half(val
->color
.float32
[i
]);
127 assert(ifmt
== R2D_FLOAT32
|| ifmt
== R2D_INT32
||
128 ifmt
== R2D_INT16
|| ifmt
== R2D_INT8
);
129 clear_value
[i
] = val
->color
.uint32
[i
];
135 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
136 tu_cs_emit_array(cs
, clear_value
, 4);
140 r2d_src(struct tu_cmd_buffer
*cmd
,
142 const struct tu_image_view
*iview
,
146 uint32_t src_info
= iview
->SP_PS_2D_SRC_INFO
;
147 if (filter
!= VK_FILTER_NEAREST
)
148 src_info
|= A6XX_SP_PS_2D_SRC_INFO_FILTER
;
150 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 5);
151 tu_cs_emit(cs
, src_info
);
152 tu_cs_emit(cs
, iview
->SP_PS_2D_SRC_SIZE
);
153 tu_cs_image_ref_2d(cs
, iview
, layer
, true);
155 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 3);
156 tu_cs_image_flag_ref(cs
, iview
, layer
);
160 r2d_src_buffer(struct tu_cmd_buffer
*cmd
,
163 uint64_t va
, uint32_t pitch
,
164 uint32_t width
, uint32_t height
)
166 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
169 A6XX_SP_PS_2D_SRC_INFO(
170 .color_format
= format
.fmt
,
171 .color_swap
= format
.swap
,
172 .srgb
= vk_format_is_srgb(vk_format
),
175 A6XX_SP_PS_2D_SRC_SIZE(.width
= width
, .height
= height
),
176 A6XX_SP_PS_2D_SRC_LO((uint32_t) va
),
177 A6XX_SP_PS_2D_SRC_HI(va
>> 32),
178 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= pitch
));
182 r2d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
184 assert(iview
->image
->samples
== 1);
186 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 4);
187 tu_cs_emit(cs
, iview
->RB_2D_DST_INFO
);
188 tu_cs_image_ref_2d(cs
, iview
, layer
, false);
190 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 3);
191 tu_cs_image_flag_ref(cs
, iview
, layer
);
195 r2d_dst_stencil(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
197 assert(iview
->image
->samples
== 1);
199 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 4);
200 tu_cs_emit(cs
, tu_image_view_stencil(iview
, RB_2D_DST_INFO
) & ~A6XX_RB_2D_DST_INFO_FLAGS
);
201 tu_cs_emit_qw(cs
, iview
->stencil_base_addr
+ iview
->stencil_layer_size
* layer
);
202 tu_cs_emit(cs
, iview
->stencil_PITCH
);
206 r2d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
208 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
212 .color_format
= format
.fmt
,
213 .color_swap
= format
.swap
,
214 .srgb
= vk_format_is_srgb(vk_format
)),
215 A6XX_RB_2D_DST_LO((uint32_t) va
),
216 A6XX_RB_2D_DST_HI(va
>> 32),
217 A6XX_RB_2D_DST_PITCH(pitch
));
221 r2d_setup_common(struct tu_cmd_buffer
*cmd
,
224 VkImageAspectFlags aspect_mask
,
225 enum a6xx_rotation rotation
,
230 enum a6xx_format format
= tu6_base_format(vk_format
);
231 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(vk_format
);
232 uint32_t unknown_8c01
= 0;
234 if ((vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
||
235 vk_format
== VK_FORMAT_X8_D24_UNORM_PACK32
) && ubwc
) {
236 format
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
239 /* note: the only format with partial clearing is D24S8 */
240 if (vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
241 /* preserve stencil channel */
242 if (aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
243 unknown_8c01
= 0x08000041;
244 /* preserve depth channels */
245 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
246 unknown_8c01
= 0x00084001;
249 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_UNKNOWN_8C01
, 1);
250 tu_cs_emit(cs
, unknown_8c01
);
252 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL(
255 .solid_color
= clear
,
256 .d24s8
= format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
&& !clear
,
257 .color_format
= format
,
259 .ifmt
= vk_format_is_srgb(vk_format
) ? R2D_UNORM8_SRGB
: ifmt
,
262 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
263 tu_cs_emit(cs
, blit_cntl
);
265 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
266 tu_cs_emit(cs
, blit_cntl
);
268 if (format
== FMT6_10_10_10_2_UNORM_DEST
)
269 format
= FMT6_16_16_16_16_FLOAT
;
271 tu_cs_emit_regs(cs
, A6XX_SP_2D_DST_FORMAT(
272 .sint
= vk_format_is_sint(vk_format
),
273 .uint
= vk_format_is_uint(vk_format
),
274 .color_format
= format
,
275 .srgb
= vk_format_is_srgb(vk_format
),
280 r2d_setup(struct tu_cmd_buffer
*cmd
,
283 VkImageAspectFlags aspect_mask
,
284 enum a6xx_rotation rotation
,
288 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
290 r2d_setup_common(cmd
, cs
, vk_format
, aspect_mask
, rotation
, clear
, ubwc
, false);
294 r2d_teardown(struct tu_cmd_buffer
*cmd
,
297 /* nothing to do here */
301 r2d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
303 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
304 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
307 /* r3d_ = shader path operations */
310 tu_init_clear_blit_shaders(struct tu6_global
*global
)
312 #define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } }
313 #define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } }
314 #define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } }
316 static const instr_t vs_code
[] = {
317 /* r0.xyz = r0.w ? c1.xyz : c0.xyz
318 * r1.xy = r0.w ? c1.zw : c0.zw
321 CAT3(OPC_SEL_B32
, .repeat
= 2, .dst
= 0,
322 .c1
= {.src1_c
= 1, .src1
= 4}, .src1_r
= 1,
324 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 0}),
325 CAT3(OPC_SEL_B32
, .repeat
= 1, .dst
= 4,
326 .c1
= {.src1_c
= 1, .src1
= 6}, .src1_r
= 1,
328 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 2}),
329 MOV(.dst
= 3, .src_im
= 1, .fim_val
= 1.0f
),
330 { .cat0
= { .opc
= OPC_END
} },
333 static const instr_t fs_blit
[] = {
334 /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
335 * blit path (its not clear what allows it to not have it)
337 CAT2(OPC_BARY_F
, .ei
= 1, .full
= 1, .dst
= 63 * 4, .src1_im
= 1),
338 { .cat0
= { .opc
= OPC_END
} },
341 memcpy(&global
->shaders
[GLOBAL_SH_VS
], vs_code
, sizeof(vs_code
));
342 memcpy(&global
->shaders
[GLOBAL_SH_FS_BLIT
], fs_blit
, sizeof(fs_blit
));
344 for (uint32_t num_rts
= 0; num_rts
<= MAX_RTS
; num_rts
++) {
345 instr_t
*code
= global
->shaders
[GLOBAL_SH_FS_CLEAR0
+ num_rts
];
346 for (uint32_t i
= 0; i
< num_rts
; i
++) {
347 /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
348 *code
++ = (instr_t
) MOV(.repeat
= 3, .dst
= i
* 4, .src_c
= 1, .src_r
= 1, .src
= i
* 4);
350 *code
++ = (instr_t
) { .cat0
= { .opc
= OPC_END
} };
355 r3d_common(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, bool blit
, uint32_t num_rts
,
358 struct ir3_const_state dummy_const_state
= {};
359 struct ir3_shader dummy_shader
= {};
361 struct ir3_shader_variant vs
= {
362 .type
= MESA_SHADER_VERTEX
,
368 .slot
= SYSTEM_VALUE_VERTEX_ID
,
369 .regid
= regid(0, 3),
372 .outputs_count
= blit
? 2 : 1,
374 .slot
= VARYING_SLOT_POS
,
375 .regid
= regid(0, 0),
378 .slot
= VARYING_SLOT_VAR0
,
379 .regid
= regid(1, 0),
381 .shader
= &dummy_shader
,
382 .const_state
= &dummy_const_state
,
385 vs
.outputs
[1].slot
= VARYING_SLOT_LAYER
;
386 vs
.outputs
[1].regid
= regid(1, 1);
387 vs
.outputs_count
= 2;
390 struct ir3_shader_variant fs
= {
391 .type
= MESA_SHADER_FRAGMENT
,
392 .instrlen
= 1, /* max of 9 instructions with num_rts = 8 */
393 .constlen
= align(num_rts
, 4),
394 .info
.max_reg
= MAX2(num_rts
, 1) - 1,
395 .total_in
= blit
? 2 : 0,
396 .num_samp
= blit
? 1 : 0,
397 .inputs_count
= blit
? 2 : 0,
399 .slot
= VARYING_SLOT_VAR0
,
405 .slot
= SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL
,
406 .regid
= regid(0, 0),
409 .num_sampler_prefetch
= blit
? 1 : 0,
410 .sampler_prefetch
[0] = {
415 .shader
= &dummy_shader
,
416 .const_state
= &dummy_const_state
,
419 tu_cs_emit_regs(cs
, A6XX_HLSQ_INVALIDATE_CMD(
428 .gfx_shared_const
= true,
429 .gfx_bindless
= 0x1f,
430 .cs_bindless
= 0x1f));
432 tu6_emit_xs_config(cs
, MESA_SHADER_VERTEX
, &vs
, global_iova(cmd
, shaders
[GLOBAL_SH_VS
]));
433 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_CTRL
, NULL
, 0);
434 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_EVAL
, NULL
, 0);
435 tu6_emit_xs_config(cs
, MESA_SHADER_GEOMETRY
, NULL
, 0);
436 tu6_emit_xs_config(cs
, MESA_SHADER_FRAGMENT
, &fs
,
437 global_iova(cmd
, shaders
[blit
? GLOBAL_SH_FS_BLIT
: (GLOBAL_SH_FS_CLEAR0
+ num_rts
)]));
439 tu_cs_emit_regs(cs
, A6XX_PC_PRIMITIVE_CNTL_0());
440 tu_cs_emit_regs(cs
, A6XX_VFD_CONTROL_0());
442 /* Copy what the blob does here. This will emit an extra 0x3f
443 * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
444 * this is working around yet.
446 tu_cs_emit_pkt7(cs
, CP_REG_WRITE
, 3);
447 tu_cs_emit(cs
, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE
));
448 tu_cs_emit(cs
, REG_A6XX_PC_MULTIVIEW_CNTL
);
450 tu_cs_emit_regs(cs
, A6XX_VFD_MULTIVIEW_CNTL());
452 tu6_emit_vpc(cs
, &vs
, NULL
, NULL
, NULL
, &fs
, 0, false);
454 /* REPL_MODE for varying with RECTLIST (2 vertices only) */
455 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
456 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0));
458 tu6_emit_fs_inputs(cs
, &fs
);
462 .persp_division_disable
= 1,
463 .vp_xform_disable
= 1,
464 .vp_clip_code_ignore
= 1,
466 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
469 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x
= 0, .y
= 0),
470 A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x
= 0x7fff, .y
= 0x7fff));
472 A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x
= 0, .y
= 0),
473 A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x
= 0x7fff, .y
= 0x7fff));
476 A6XX_VFD_INDEX_OFFSET(),
477 A6XX_VFD_INSTANCE_START_OFFSET());
481 r3d_coords_raw(struct tu_cs
*cs
, const float *coords
)
483 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_GEOM
, 3 + 8);
484 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
485 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
486 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
487 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER
) |
488 CP_LOAD_STATE6_0_NUM_UNIT(2));
489 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
490 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
491 tu_cs_emit_array(cs
, (const uint32_t *) coords
, 8);
495 r3d_coords(struct tu_cs
*cs
,
496 const VkOffset2D
*dst
,
497 const VkOffset2D
*src
,
498 const VkExtent2D
*extent
)
500 int32_t src_x1
= src
? src
->x
: 0;
501 int32_t src_y1
= src
? src
->y
: 0;
502 r3d_coords_raw(cs
, (float[]) {
505 dst
->x
+ extent
->width
, dst
->y
+ extent
->height
,
506 src_x1
+ extent
->width
, src_y1
+ extent
->height
,
511 r3d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
513 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4);
514 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
515 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
516 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
517 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
518 CP_LOAD_STATE6_0_NUM_UNIT(1));
519 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
520 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
522 case VK_FORMAT_X8_D24_UNORM_PACK32
:
523 case VK_FORMAT_D24_UNORM_S8_UINT
: {
524 /* cleared as r8g8b8a8_unorm using special format */
525 uint32_t tmp
= tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
526 tu_cs_emit(cs
, fui((tmp
& 0xff) / 255.0f
));
527 tu_cs_emit(cs
, fui((tmp
>> 8 & 0xff) / 255.0f
));
528 tu_cs_emit(cs
, fui((tmp
>> 16 & 0xff) / 255.0f
));
529 tu_cs_emit(cs
, fui((val
->depthStencil
.stencil
& 0xff) / 255.0f
));
531 case VK_FORMAT_D16_UNORM
:
532 case VK_FORMAT_D32_SFLOAT
:
533 tu_cs_emit(cs
, fui(val
->depthStencil
.depth
));
538 case VK_FORMAT_S8_UINT
:
539 tu_cs_emit(cs
, val
->depthStencil
.stencil
& 0xff);
545 /* as color formats use clear value as-is */
546 assert(!vk_format_is_depth_or_stencil(format
));
547 tu_cs_emit_array(cs
, val
->color
.uint32
, 4);
553 r3d_src_common(struct tu_cmd_buffer
*cmd
,
555 const uint32_t *tex_const
,
556 uint32_t offset_base
,
557 uint32_t offset_ubwc
,
560 struct tu_cs_memory texture
= { };
561 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
,
562 2, /* allocate space for a sampler too */
563 A6XX_TEX_CONST_DWORDS
, &texture
);
564 assert(result
== VK_SUCCESS
);
566 memcpy(texture
.map
, tex_const
, A6XX_TEX_CONST_DWORDS
* 4);
568 /* patch addresses for layer offset */
569 *(uint64_t*) (texture
.map
+ 4) += offset_base
;
570 uint64_t ubwc_addr
= (texture
.map
[7] | (uint64_t) texture
.map
[8] << 32) + offset_ubwc
;
571 texture
.map
[7] = ubwc_addr
;
572 texture
.map
[8] = ubwc_addr
>> 32;
574 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 0] =
575 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter
, false)) |
576 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter
, false)) |
577 A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE
) |
578 A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE
) |
579 A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE
) |
580 0x60000; /* XXX used by blob, doesn't seem necessary */
581 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 1] =
582 0x1 | /* XXX used by blob, doesn't seem necessary */
583 A6XX_TEX_SAMP_1_UNNORM_COORDS
|
584 A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR
;
585 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 2] = 0;
586 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 3] = 0;
588 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
589 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
590 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
591 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
592 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
593 CP_LOAD_STATE6_0_NUM_UNIT(1));
594 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
596 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_SAMP_LO
, 2);
597 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
599 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
600 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
601 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
602 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
603 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
604 CP_LOAD_STATE6_0_NUM_UNIT(1));
605 tu_cs_emit_qw(cs
, texture
.iova
);
607 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_CONST_LO
, 2);
608 tu_cs_emit_qw(cs
, texture
.iova
);
610 tu_cs_emit_regs(cs
, A6XX_SP_FS_TEX_COUNT(1));
614 r3d_src(struct tu_cmd_buffer
*cmd
,
616 const struct tu_image_view
*iview
,
620 r3d_src_common(cmd
, cs
, iview
->descriptor
,
621 iview
->layer_size
* layer
,
622 iview
->ubwc_layer_size
* layer
,
627 r3d_src_buffer(struct tu_cmd_buffer
*cmd
,
630 uint64_t va
, uint32_t pitch
,
631 uint32_t width
, uint32_t height
)
633 uint32_t desc
[A6XX_TEX_CONST_DWORDS
];
635 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
638 COND(vk_format_is_srgb(vk_format
), A6XX_TEX_CONST_0_SRGB
) |
639 A6XX_TEX_CONST_0_FMT(format
.fmt
) |
640 A6XX_TEX_CONST_0_SWAP(format
.swap
) |
641 A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X
) |
642 // XXX to swizzle into .w for stencil buffer_to_image
643 A6XX_TEX_CONST_0_SWIZ_Y(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Y
) |
644 A6XX_TEX_CONST_0_SWIZ_Z(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Z
) |
645 A6XX_TEX_CONST_0_SWIZ_W(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_W
);
646 desc
[1] = A6XX_TEX_CONST_1_WIDTH(width
) | A6XX_TEX_CONST_1_HEIGHT(height
);
648 A6XX_TEX_CONST_2_PITCH(pitch
) |
649 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D
);
653 for (uint32_t i
= 6; i
< A6XX_TEX_CONST_DWORDS
; i
++)
656 r3d_src_common(cmd
, cs
, desc
, 0, 0, VK_FILTER_NEAREST
);
660 r3d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
662 tu6_emit_msaa(cs
, iview
->image
->samples
); /* TODO: move to setup */
664 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
665 tu_cs_emit(cs
, iview
->RB_MRT_BUF_INFO
);
666 tu_cs_image_ref(cs
, iview
, layer
);
669 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
670 tu_cs_image_flag_ref(cs
, iview
, layer
);
672 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL(.flag_mrts
= iview
->ubwc_enabled
));
676 r3d_dst_stencil(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
678 tu6_emit_msaa(cs
, iview
->image
->samples
); /* TODO: move to setup */
680 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
681 tu_cs_emit(cs
, tu_image_view_stencil(iview
, RB_MRT_BUF_INFO
));
682 tu_cs_image_stencil_ref(cs
, iview
, layer
);
685 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
689 r3d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
691 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
693 tu6_emit_msaa(cs
, 1); /* TODO: move to setup */
696 A6XX_RB_MRT_BUF_INFO(0, .color_format
= format
.fmt
, .color_swap
= format
.swap
),
697 A6XX_RB_MRT_PITCH(0, pitch
),
698 A6XX_RB_MRT_ARRAY_PITCH(0, 0),
699 A6XX_RB_MRT_BASE_LO(0, (uint32_t) va
),
700 A6XX_RB_MRT_BASE_HI(0, va
>> 32),
701 A6XX_RB_MRT_BASE_GMEM(0, 0));
703 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
707 aspect_write_mask(VkFormat vk_format
, VkImageAspectFlags aspect_mask
)
711 /* note: the only format with partial writing is D24S8,
712 * clear/blit uses the _AS_R8G8B8A8 format to access it
714 if (vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
715 if (aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
717 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
724 r3d_setup(struct tu_cmd_buffer
*cmd
,
727 VkImageAspectFlags aspect_mask
,
728 enum a6xx_rotation rotation
,
732 enum a6xx_format format
= tu6_base_format(vk_format
);
734 if ((vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
||
735 vk_format
== VK_FORMAT_X8_D24_UNORM_PACK32
) && ubwc
) {
736 format
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
739 if (!cmd
->state
.pass
) {
740 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
741 tu6_emit_window_scissor(cs
, 0, 0, 0x3fff, 0x3fff);
744 tu_cs_emit_regs(cs
, A6XX_GRAS_BIN_CONTROL(.dword
= 0xc00000));
745 tu_cs_emit_regs(cs
, A6XX_RB_BIN_CONTROL(.dword
= 0xc00000));
747 r3d_common(cmd
, cs
, !clear
, clear
? 1 : 0, false);
749 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
750 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
751 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
753 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
755 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
756 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(0));
759 A6XX_RB_FS_OUTPUT_CNTL0(),
760 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= 1));
762 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
763 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.sample_mask
= 0xffff));
764 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
766 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
767 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL());
768 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
769 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL());
770 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK());
771 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK());
772 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF());
774 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_COMPONENTS(.rt0
= 0xf));
775 tu_cs_emit_regs(cs
, A6XX_SP_FS_RENDER_COMPONENTS(.rt0
= 0xf));
777 tu_cs_emit_regs(cs
, A6XX_SP_FS_MRT_REG(0,
778 .color_format
= format
,
779 .color_sint
= vk_format_is_sint(vk_format
),
780 .color_uint
= vk_format_is_uint(vk_format
)));
782 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(0,
783 .component_enable
= aspect_write_mask(vk_format
, aspect_mask
)));
784 tu_cs_emit_regs(cs
, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
785 tu_cs_emit_regs(cs
, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
787 if (cmd
->state
.predication_active
) {
788 tu_cs_emit_pkt7(cs
, CP_DRAW_PRED_ENABLE_LOCAL
, 1);
794 r3d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
796 tu_cs_emit_pkt7(cs
, CP_DRAW_INDX_OFFSET
, 3);
797 tu_cs_emit(cs
, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST
) |
798 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX
) |
799 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY
));
800 tu_cs_emit(cs
, 1); /* instance count */
801 tu_cs_emit(cs
, 2); /* vertex count */
805 r3d_teardown(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
807 if (cmd
->state
.predication_active
) {
808 tu_cs_emit_pkt7(cs
, CP_DRAW_PRED_ENABLE_LOCAL
, 1);
813 /* blit ops - common interface for 2d/shader paths */
816 void (*coords
)(struct tu_cs
*cs
,
817 const VkOffset2D
*dst
,
818 const VkOffset2D
*src
,
819 const VkExtent2D
*extent
);
820 void (*clear_value
)(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
);
822 struct tu_cmd_buffer
*cmd
,
824 const struct tu_image_view
*iview
,
827 void (*src_buffer
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
,
829 uint64_t va
, uint32_t pitch
,
830 uint32_t width
, uint32_t height
);
831 void (*dst
)(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
);
832 void (*dst_buffer
)(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
);
833 void (*setup
)(struct tu_cmd_buffer
*cmd
,
836 VkImageAspectFlags aspect_mask
,
837 enum a6xx_rotation rotation
,
840 void (*run
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
);
841 void (*teardown
)(struct tu_cmd_buffer
*cmd
,
845 static const struct blit_ops r2d_ops
= {
846 .coords
= r2d_coords
,
847 .clear_value
= r2d_clear_value
,
849 .src_buffer
= r2d_src_buffer
,
851 .dst_buffer
= r2d_dst_buffer
,
854 .teardown
= r2d_teardown
,
857 static const struct blit_ops r3d_ops
= {
858 .coords
= r3d_coords
,
859 .clear_value
= r3d_clear_value
,
861 .src_buffer
= r3d_src_buffer
,
863 .dst_buffer
= r3d_dst_buffer
,
866 .teardown
= r3d_teardown
,
869 /* passthrough set coords from 3D extents */
871 coords(const struct blit_ops
*ops
,
873 const VkOffset3D
*dst
,
874 const VkOffset3D
*src
,
875 const VkExtent3D
*extent
)
877 ops
->coords(cs
, (const VkOffset2D
*) dst
, (const VkOffset2D
*) src
, (const VkExtent2D
*) extent
);
881 copy_format(VkFormat format
, VkImageAspectFlags aspect_mask
, bool copy_buffer
)
883 if (vk_format_is_compressed(format
)) {
884 switch (vk_format_get_blocksize(format
)) {
885 case 1: return VK_FORMAT_R8_UINT
;
886 case 2: return VK_FORMAT_R16_UINT
;
887 case 4: return VK_FORMAT_R32_UINT
;
888 case 8: return VK_FORMAT_R32G32_UINT
;
889 case 16:return VK_FORMAT_R32G32B32A32_UINT
;
891 unreachable("unhandled format size");
896 case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM
:
897 if (aspect_mask
== VK_IMAGE_ASPECT_PLANE_1_BIT
)
898 return VK_FORMAT_R8G8_UNORM
;
900 case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM
:
901 return VK_FORMAT_R8_UNORM
;
902 case VK_FORMAT_D24_UNORM_S8_UINT
:
903 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
&& copy_buffer
)
904 return VK_FORMAT_R8_UNORM
;
908 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
909 return VK_FORMAT_R32_UINT
;
910 case VK_FORMAT_D32_SFLOAT_S8_UINT
:
911 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
912 return VK_FORMAT_S8_UINT
;
913 assert(aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
);
914 return VK_FORMAT_D32_SFLOAT
;
919 tu_image_view_copy_blit(struct tu_image_view
*iview
,
920 struct tu_image
*image
,
922 const VkImageSubresourceLayers
*subres
,
926 VkImageAspectFlags aspect_mask
= subres
->aspectMask
;
928 /* always use the AS_R8G8B8A8 format for these */
929 if (format
== VK_FORMAT_D24_UNORM_S8_UINT
||
930 format
== VK_FORMAT_X8_D24_UNORM_PACK32
) {
931 aspect_mask
= VK_IMAGE_ASPECT_COLOR_BIT
;
934 tu_image_view_init(iview
, &(VkImageViewCreateInfo
) {
935 .image
= tu_image_to_handle(image
),
936 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
938 /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
939 .components
.r
= stencil_read
? VK_COMPONENT_SWIZZLE_A
: VK_COMPONENT_SWIZZLE_R
,
940 .subresourceRange
= {
941 .aspectMask
= aspect_mask
,
942 .baseMipLevel
= subres
->mipLevel
,
944 .baseArrayLayer
= subres
->baseArrayLayer
+ layer
,
951 tu_image_view_copy(struct tu_image_view
*iview
,
952 struct tu_image
*image
,
954 const VkImageSubresourceLayers
*subres
,
958 format
= copy_format(format
, subres
->aspectMask
, false);
959 tu_image_view_copy_blit(iview
, image
, format
, subres
, layer
, stencil_read
);
963 tu_image_view_blit(struct tu_image_view
*iview
,
964 struct tu_image
*image
,
965 const VkImageSubresourceLayers
*subres
,
968 tu_image_view_copy_blit(iview
, image
, image
->vk_format
, subres
, layer
, false);
972 tu6_blit_image(struct tu_cmd_buffer
*cmd
,
973 struct tu_image
*src_image
,
974 struct tu_image
*dst_image
,
975 const VkImageBlit
*info
,
978 const struct blit_ops
*ops
= &r2d_ops
;
979 struct tu_cs
*cs
= &cmd
->cs
;
982 /* 2D blit can't do rotation mirroring from just coordinates */
983 static const enum a6xx_rotation rotate
[2][2] = {
984 {ROTATE_0
, ROTATE_HFLIP
},
985 {ROTATE_VFLIP
, ROTATE_180
},
988 bool mirror_x
= (info
->srcOffsets
[1].x
< info
->srcOffsets
[0].x
) !=
989 (info
->dstOffsets
[1].x
< info
->dstOffsets
[0].x
);
990 bool mirror_y
= (info
->srcOffsets
[1].y
< info
->srcOffsets
[0].y
) !=
991 (info
->dstOffsets
[1].y
< info
->dstOffsets
[0].y
);
992 bool mirror_z
= (info
->srcOffsets
[1].z
< info
->srcOffsets
[0].z
) !=
993 (info
->dstOffsets
[1].z
< info
->dstOffsets
[0].z
);
996 tu_finishme("blit z mirror\n");
1000 if (info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
!=
1001 info
->dstOffsets
[1].z
- info
->dstOffsets
[0].z
) {
1002 tu_finishme("blit z filter\n");
1006 layers
= info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
;
1007 if (info
->dstSubresource
.layerCount
> 1) {
1008 assert(layers
<= 1);
1009 layers
= info
->dstSubresource
.layerCount
;
1012 /* BC1_RGB_* formats need to have their last components overriden with 1
1013 * when sampling, which is normally handled with the texture descriptor
1014 * swizzle. The 2d path can't handle that, so use the 3d path.
1016 * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with
1020 if (dst_image
->samples
> 1 ||
1021 src_image
->vk_format
== VK_FORMAT_BC1_RGB_UNORM_BLOCK
||
1022 src_image
->vk_format
== VK_FORMAT_BC1_RGB_SRGB_BLOCK
||
1023 filter
== VK_FILTER_CUBIC_EXT
)
1026 /* use the right format in setup() for D32_S8
1027 * TODO: this probably should use a helper
1029 VkFormat format
= dst_image
->vk_format
;
1030 if (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
1031 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
1032 format
= VK_FORMAT_D32_SFLOAT
;
1033 else if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
1034 format
= VK_FORMAT_S8_UINT
;
1036 unreachable("unexpected D32_S8 aspect mask in blit_image");
1039 ops
->setup(cmd
, cs
, format
, info
->dstSubresource
.aspectMask
,
1040 rotate
[mirror_y
][mirror_x
], false, dst_image
->layout
[0].ubwc
);
1042 if (ops
== &r3d_ops
) {
1043 r3d_coords_raw(cs
, (float[]) {
1044 info
->dstOffsets
[0].x
, info
->dstOffsets
[0].y
,
1045 info
->srcOffsets
[0].x
, info
->srcOffsets
[0].y
,
1046 info
->dstOffsets
[1].x
, info
->dstOffsets
[1].y
,
1047 info
->srcOffsets
[1].x
, info
->srcOffsets
[1].y
1051 A6XX_GRAS_2D_DST_TL(.x
= MIN2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
),
1052 .y
= MIN2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
)),
1053 A6XX_GRAS_2D_DST_BR(.x
= MAX2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
) - 1,
1054 .y
= MAX2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
) - 1));
1056 A6XX_GRAS_2D_SRC_TL_X(MIN2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
)),
1057 A6XX_GRAS_2D_SRC_BR_X(MAX2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
) - 1),
1058 A6XX_GRAS_2D_SRC_TL_Y(MIN2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
)),
1059 A6XX_GRAS_2D_SRC_BR_Y(MAX2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
) - 1));
1062 struct tu_image_view dst
, src
;
1063 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffsets
[0].z
);
1064 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffsets
[0].z
);
1066 for (uint32_t i
= 0; i
< layers
; i
++) {
1067 ops
->dst(cs
, &dst
, i
);
1068 ops
->src(cmd
, cs
, &src
, i
, filter
);
1072 ops
->teardown(cmd
, cs
);
1076 tu_CmdBlitImage(VkCommandBuffer commandBuffer
,
1078 VkImageLayout srcImageLayout
,
1080 VkImageLayout dstImageLayout
,
1081 uint32_t regionCount
,
1082 const VkImageBlit
*pRegions
,
1086 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1087 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1088 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1090 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1091 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1093 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1094 /* can't blit both depth and stencil at once with D32_S8
1095 * TODO: more advanced 3D blit path to support it instead?
1097 if (src_image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
1098 dst_image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
1099 VkImageBlit region
= pRegions
[i
];
1101 for_each_bit(b
, pRegions
[i
].dstSubresource
.aspectMask
) {
1102 region
.srcSubresource
.aspectMask
= BIT(b
);
1103 region
.dstSubresource
.aspectMask
= BIT(b
);
1104 tu6_blit_image(cmd
, src_image
, dst_image
, ®ion
, filter
);
1108 tu6_blit_image(cmd
, src_image
, dst_image
, pRegions
+ i
, filter
);
1113 copy_compressed(VkFormat format
,
1119 if (!vk_format_is_compressed(format
))
1122 uint32_t block_width
= vk_format_get_blockwidth(format
);
1123 uint32_t block_height
= vk_format_get_blockheight(format
);
1125 offset
->x
/= block_width
;
1126 offset
->y
/= block_height
;
1129 extent
->width
= DIV_ROUND_UP(extent
->width
, block_width
);
1130 extent
->height
= DIV_ROUND_UP(extent
->height
, block_height
);
1133 *width
= DIV_ROUND_UP(*width
, block_width
);
1135 *height
= DIV_ROUND_UP(*height
, block_height
);
1139 tu_copy_buffer_to_image(struct tu_cmd_buffer
*cmd
,
1140 struct tu_buffer
*src_buffer
,
1141 struct tu_image
*dst_image
,
1142 const VkBufferImageCopy
*info
)
1144 struct tu_cs
*cs
= &cmd
->cs
;
1145 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1146 VkFormat src_format
=
1147 copy_format(dst_image
->vk_format
, info
->imageSubresource
.aspectMask
, true);
1148 const struct blit_ops
*ops
= &r2d_ops
;
1150 /* special case for buffer to stencil */
1151 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1152 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1156 /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format,
1157 * which matters for UBWC. buffer_to_image/etc can fail because of this
1160 VkOffset3D offset
= info
->imageOffset
;
1161 VkExtent3D extent
= info
->imageExtent
;
1162 uint32_t src_width
= info
->bufferRowLength
?: extent
.width
;
1163 uint32_t src_height
= info
->bufferImageHeight
?: extent
.height
;
1165 copy_compressed(dst_image
->vk_format
, &offset
, &extent
, &src_width
, &src_height
);
1167 uint32_t pitch
= src_width
* vk_format_get_blocksize(src_format
);
1168 uint32_t layer_size
= src_height
* pitch
;
1171 copy_format(dst_image
->vk_format
, info
->imageSubresource
.aspectMask
, false),
1172 info
->imageSubresource
.aspectMask
, ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1174 struct tu_image_view dst
;
1175 tu_image_view_copy(&dst
, dst_image
, dst_image
->vk_format
, &info
->imageSubresource
, offset
.z
, false);
1177 for (uint32_t i
= 0; i
< layers
; i
++) {
1178 ops
->dst(cs
, &dst
, i
);
1180 uint64_t src_va
= tu_buffer_iova(src_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1181 if ((src_va
& 63) || (pitch
& 63)) {
1182 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1183 uint32_t x
= (src_va
& 63) / vk_format_get_blocksize(src_format
);
1184 ops
->src_buffer(cmd
, cs
, src_format
, src_va
& ~63, pitch
,
1185 x
+ extent
.width
, 1);
1186 ops
->coords(cs
, &(VkOffset2D
){offset
.x
, offset
.y
+ y
}, &(VkOffset2D
){x
},
1187 &(VkExtent2D
) {extent
.width
, 1});
1192 ops
->src_buffer(cmd
, cs
, src_format
, src_va
, pitch
, extent
.width
, extent
.height
);
1193 coords(ops
, cs
, &offset
, &(VkOffset3D
){}, &extent
);
1198 ops
->teardown(cmd
, cs
);
1202 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer
,
1205 VkImageLayout dstImageLayout
,
1206 uint32_t regionCount
,
1207 const VkBufferImageCopy
*pRegions
)
1209 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1210 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1211 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1213 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1214 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1216 for (unsigned i
= 0; i
< regionCount
; ++i
)
1217 tu_copy_buffer_to_image(cmd
, src_buffer
, dst_image
, pRegions
+ i
);
1221 tu_copy_image_to_buffer(struct tu_cmd_buffer
*cmd
,
1222 struct tu_image
*src_image
,
1223 struct tu_buffer
*dst_buffer
,
1224 const VkBufferImageCopy
*info
)
1226 struct tu_cs
*cs
= &cmd
->cs
;
1227 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1228 VkFormat dst_format
=
1229 copy_format(src_image
->vk_format
, info
->imageSubresource
.aspectMask
, true);
1230 bool stencil_read
= false;
1232 if (src_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1233 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1234 stencil_read
= true;
1237 const struct blit_ops
*ops
= stencil_read
? &r3d_ops
: &r2d_ops
;
1238 VkOffset3D offset
= info
->imageOffset
;
1239 VkExtent3D extent
= info
->imageExtent
;
1240 uint32_t dst_width
= info
->bufferRowLength
?: extent
.width
;
1241 uint32_t dst_height
= info
->bufferImageHeight
?: extent
.height
;
1243 copy_compressed(src_image
->vk_format
, &offset
, &extent
, &dst_width
, &dst_height
);
1245 uint32_t pitch
= dst_width
* vk_format_get_blocksize(dst_format
);
1246 uint32_t layer_size
= pitch
* dst_height
;
1248 ops
->setup(cmd
, cs
, dst_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, false);
1250 struct tu_image_view src
;
1251 tu_image_view_copy(&src
, src_image
, src_image
->vk_format
, &info
->imageSubresource
, offset
.z
, stencil_read
);
1253 for (uint32_t i
= 0; i
< layers
; i
++) {
1254 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1256 uint64_t dst_va
= tu_buffer_iova(dst_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1257 if ((dst_va
& 63) || (pitch
& 63)) {
1258 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1259 uint32_t x
= (dst_va
& 63) / vk_format_get_blocksize(dst_format
);
1260 ops
->dst_buffer(cs
, dst_format
, dst_va
& ~63, 0);
1261 ops
->coords(cs
, &(VkOffset2D
) {x
}, &(VkOffset2D
){offset
.x
, offset
.y
+ y
},
1262 &(VkExtent2D
) {extent
.width
, 1});
1267 ops
->dst_buffer(cs
, dst_format
, dst_va
, pitch
);
1268 coords(ops
, cs
, &(VkOffset3D
) {0, 0}, &offset
, &extent
);
1273 ops
->teardown(cmd
, cs
);
1277 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer
,
1279 VkImageLayout srcImageLayout
,
1281 uint32_t regionCount
,
1282 const VkBufferImageCopy
*pRegions
)
1284 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1285 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1286 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1288 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1289 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1291 for (unsigned i
= 0; i
< regionCount
; ++i
)
1292 tu_copy_image_to_buffer(cmd
, src_image
, dst_buffer
, pRegions
+ i
);
1295 /* Tiled formats don't support swapping, which means that we can't support
1296 * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
1297 * formats like B5G5R5A1 have a separate linear-only format when sampling.
1298 * Currently we fake support for tiled swapped formats and use the unswapped
1299 * format instead, but this means that reinterpreting copies to and from
1300 * swapped formats can't be performed correctly unless we can swizzle the
1301 * components by reinterpreting the other image as the "correct" swapped
1302 * format, i.e. only when the other image is linear.
1306 is_swapped_format(VkFormat format
)
1308 struct tu_native_format linear
= tu6_format_texture(format
, TILE6_LINEAR
);
1309 struct tu_native_format tiled
= tu6_format_texture(format
, TILE6_3
);
1310 return linear
.fmt
!= tiled
.fmt
|| linear
.swap
!= tiled
.swap
;
1313 /* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
1314 * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
1315 * versa). This should mirror the logic in fdl6_layout.
1318 image_is_r8g8(struct tu_image
*image
)
1320 return image
->layout
[0].cpp
== 2 &&
1321 vk_format_get_nr_components(image
->vk_format
) == 2;
1325 tu_copy_image_to_image(struct tu_cmd_buffer
*cmd
,
1326 struct tu_image
*src_image
,
1327 struct tu_image
*dst_image
,
1328 const VkImageCopy
*info
)
1330 const struct blit_ops
*ops
= &r2d_ops
;
1331 struct tu_cs
*cs
= &cmd
->cs
;
1333 if (dst_image
->samples
> 1)
1336 VkFormat format
= VK_FORMAT_UNDEFINED
;
1337 VkOffset3D src_offset
= info
->srcOffset
;
1338 VkOffset3D dst_offset
= info
->dstOffset
;
1339 VkExtent3D extent
= info
->extent
;
1341 /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
1344 * When copying between compressed and uncompressed formats the extent
1345 * members represent the texel dimensions of the source image and not
1346 * the destination. When copying from a compressed image to an
1347 * uncompressed image the image texel dimensions written to the
1348 * uncompressed image will be source extent divided by the compressed
1349 * texel block dimensions. When copying from an uncompressed image to a
1350 * compressed image the image texel dimensions written to the compressed
1351 * image will be the source extent multiplied by the compressed texel
1354 * This means we only have to adjust the extent if the source image is
1357 copy_compressed(src_image
->vk_format
, &src_offset
, &extent
, NULL
, NULL
);
1358 copy_compressed(dst_image
->vk_format
, &dst_offset
, NULL
, NULL
, NULL
);
1360 VkFormat dst_format
= copy_format(dst_image
->vk_format
, info
->dstSubresource
.aspectMask
, false);
1361 VkFormat src_format
= copy_format(src_image
->vk_format
, info
->srcSubresource
.aspectMask
, false);
1363 bool use_staging_blit
= false;
1365 if (src_format
== dst_format
) {
1366 /* Images that share a format can always be copied directly because it's
1367 * the same as a blit.
1369 format
= src_format
;
1370 } else if (!src_image
->layout
[0].tile_mode
) {
1371 /* If an image is linear, we can always safely reinterpret it with the
1372 * other image's format and then do a regular blit.
1374 format
= dst_format
;
1375 } else if (!dst_image
->layout
[0].tile_mode
) {
1376 format
= src_format
;
1377 } else if (image_is_r8g8(src_image
) != image_is_r8g8(dst_image
)) {
1378 /* We can't currently copy r8g8 images to/from other cpp=2 images,
1379 * due to the different tile layout.
1381 use_staging_blit
= true;
1382 } else if (is_swapped_format(src_format
) ||
1383 is_swapped_format(dst_format
)) {
1384 /* If either format has a non-identity swap, then we can't copy
1387 use_staging_blit
= true;
1388 } else if (!src_image
->layout
[0].ubwc
) {
1389 format
= dst_format
;
1390 } else if (!dst_image
->layout
[0].ubwc
) {
1391 format
= src_format
;
1393 /* Both formats use UBWC and so neither can be reinterpreted.
1394 * TODO: We could do an in-place decompression of the dst instead.
1396 use_staging_blit
= true;
1399 struct tu_image_view dst
, src
;
1401 if (use_staging_blit
) {
1402 tu_image_view_copy(&dst
, dst_image
, dst_format
, &info
->dstSubresource
, dst_offset
.z
, false);
1403 tu_image_view_copy(&src
, src_image
, src_format
, &info
->srcSubresource
, src_offset
.z
, false);
1405 struct tu_image staging_image
= {
1406 .vk_format
= src_format
,
1407 .type
= src_image
->type
,
1408 .tiling
= VK_IMAGE_TILING_LINEAR
,
1411 .layer_count
= info
->srcSubresource
.layerCount
,
1412 .samples
= src_image
->samples
,
1416 VkImageSubresourceLayers staging_subresource
= {
1417 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
1419 .baseArrayLayer
= 0,
1420 .layerCount
= info
->srcSubresource
.layerCount
,
1423 VkOffset3D staging_offset
= { 0 };
1425 staging_image
.layout
[0].tile_mode
= TILE6_LINEAR
;
1426 staging_image
.layout
[0].ubwc
= false;
1428 fdl6_layout(&staging_image
.layout
[0],
1429 vk_format_to_pipe_format(staging_image
.vk_format
),
1430 staging_image
.samples
,
1431 staging_image
.extent
.width
,
1432 staging_image
.extent
.height
,
1433 staging_image
.extent
.depth
,
1434 staging_image
.level_count
,
1435 staging_image
.layer_count
,
1436 staging_image
.type
== VK_IMAGE_TYPE_3D
,
1439 VkResult result
= tu_get_scratch_bo(cmd
->device
,
1440 staging_image
.layout
[0].size
,
1442 if (result
!= VK_SUCCESS
) {
1443 cmd
->record_result
= result
;
1447 tu_bo_list_add(&cmd
->bo_list
, staging_image
.bo
,
1448 MSM_SUBMIT_BO_READ
| MSM_SUBMIT_BO_WRITE
);
1450 struct tu_image_view staging
;
1451 tu_image_view_copy(&staging
, &staging_image
, src_format
,
1452 &staging_subresource
, 0, false);
1454 ops
->setup(cmd
, cs
, src_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, false);
1455 coords(ops
, cs
, &staging_offset
, &src_offset
, &extent
);
1457 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1458 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1459 ops
->dst(cs
, &staging
, i
);
1463 /* When executed by the user there has to be a pipeline barrier here,
1464 * but since we're doing it manually we'll have to flush ourselves.
1466 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1467 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
1469 tu_image_view_copy(&staging
, &staging_image
, dst_format
,
1470 &staging_subresource
, 0, false);
1472 ops
->setup(cmd
, cs
, dst_format
, info
->dstSubresource
.aspectMask
,
1473 ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1474 coords(ops
, cs
, &dst_offset
, &staging_offset
, &extent
);
1476 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1477 ops
->src(cmd
, cs
, &staging
, i
, VK_FILTER_NEAREST
);
1478 ops
->dst(cs
, &dst
, i
);
1482 tu_image_view_copy(&dst
, dst_image
, format
, &info
->dstSubresource
, dst_offset
.z
, false);
1483 tu_image_view_copy(&src
, src_image
, format
, &info
->srcSubresource
, src_offset
.z
, false);
1485 ops
->setup(cmd
, cs
, format
, info
->dstSubresource
.aspectMask
,
1486 ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1487 coords(ops
, cs
, &dst_offset
, &src_offset
, &extent
);
1489 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1490 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1491 ops
->dst(cs
, &dst
, i
);
1496 ops
->teardown(cmd
, cs
);
1500 tu_CmdCopyImage(VkCommandBuffer commandBuffer
,
1502 VkImageLayout srcImageLayout
,
1504 VkImageLayout destImageLayout
,
1505 uint32_t regionCount
,
1506 const VkImageCopy
*pRegions
)
1508 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1509 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1510 TU_FROM_HANDLE(tu_image
, dst_image
, destImage
);
1512 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1513 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1515 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1516 tu_copy_image_to_image(cmd
, src_image
, dst_image
, pRegions
+ i
);
1520 copy_buffer(struct tu_cmd_buffer
*cmd
,
1524 uint32_t block_size
)
1526 const struct blit_ops
*ops
= &r2d_ops
;
1527 struct tu_cs
*cs
= &cmd
->cs
;
1528 VkFormat format
= block_size
== 4 ? VK_FORMAT_R32_UINT
: VK_FORMAT_R8_UNORM
;
1529 uint64_t blocks
= size
/ block_size
;
1531 ops
->setup(cmd
, cs
, format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, false);
1534 uint32_t src_x
= (src_va
& 63) / block_size
;
1535 uint32_t dst_x
= (dst_va
& 63) / block_size
;
1536 uint32_t width
= MIN2(MIN2(blocks
, 0x4000 - src_x
), 0x4000 - dst_x
);
1538 ops
->src_buffer(cmd
, cs
, format
, src_va
& ~63, 0, src_x
+ width
, 1);
1539 ops
->dst_buffer( cs
, format
, dst_va
& ~63, 0);
1540 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, &(VkOffset2D
) {src_x
}, &(VkExtent2D
) {width
, 1});
1543 src_va
+= width
* block_size
;
1544 dst_va
+= width
* block_size
;
1548 ops
->teardown(cmd
, cs
);
1552 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer
,
1555 uint32_t regionCount
,
1556 const VkBufferCopy
*pRegions
)
1558 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1559 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1560 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1562 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1563 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1565 for (unsigned i
= 0; i
< regionCount
; ++i
) {
1567 tu_buffer_iova(dst_buffer
) + pRegions
[i
].dstOffset
,
1568 tu_buffer_iova(src_buffer
) + pRegions
[i
].srcOffset
,
1569 pRegions
[i
].size
, 1);
1574 tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer
,
1576 VkDeviceSize dstOffset
,
1577 VkDeviceSize dataSize
,
1580 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1581 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1583 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1585 struct tu_cs_memory tmp
;
1586 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, DIV_ROUND_UP(dataSize
, 64), 64, &tmp
);
1587 if (result
!= VK_SUCCESS
) {
1588 cmd
->record_result
= result
;
1592 memcpy(tmp
.map
, pData
, dataSize
);
1593 copy_buffer(cmd
, tu_buffer_iova(buffer
) + dstOffset
, tmp
.iova
, dataSize
, 4);
1597 tu_CmdFillBuffer(VkCommandBuffer commandBuffer
,
1599 VkDeviceSize dstOffset
,
1600 VkDeviceSize fillSize
,
1603 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1604 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1605 const struct blit_ops
*ops
= &r2d_ops
;
1606 struct tu_cs
*cs
= &cmd
->cs
;
1608 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1610 if (fillSize
== VK_WHOLE_SIZE
)
1611 fillSize
= buffer
->size
- dstOffset
;
1613 uint64_t dst_va
= tu_buffer_iova(buffer
) + dstOffset
;
1614 uint32_t blocks
= fillSize
/ 4;
1616 ops
->setup(cmd
, cs
, VK_FORMAT_R32_UINT
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, true, false);
1617 ops
->clear_value(cs
, VK_FORMAT_R32_UINT
, &(VkClearValue
){.color
= {.uint32
[0] = data
}});
1620 uint32_t dst_x
= (dst_va
& 63) / 4;
1621 uint32_t width
= MIN2(blocks
, 0x4000 - dst_x
);
1623 ops
->dst_buffer(cs
, VK_FORMAT_R32_UINT
, dst_va
& ~63, 0);
1624 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, NULL
, &(VkExtent2D
) {width
, 1});
1627 dst_va
+= width
* 4;
1631 ops
->teardown(cmd
, cs
);
1635 tu_CmdResolveImage(VkCommandBuffer commandBuffer
,
1637 VkImageLayout srcImageLayout
,
1639 VkImageLayout dstImageLayout
,
1640 uint32_t regionCount
,
1641 const VkImageResolve
*pRegions
)
1643 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1644 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1645 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1646 const struct blit_ops
*ops
= &r2d_ops
;
1647 struct tu_cs
*cs
= &cmd
->cs
;
1649 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1650 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1652 ops
->setup(cmd
, cs
, dst_image
->vk_format
, VK_IMAGE_ASPECT_COLOR_BIT
,
1653 ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1655 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1656 const VkImageResolve
*info
= &pRegions
[i
];
1657 uint32_t layers
= MAX2(info
->extent
.depth
, info
->dstSubresource
.layerCount
);
1659 assert(info
->srcSubresource
.layerCount
== info
->dstSubresource
.layerCount
);
1660 /* TODO: aspect masks possible ? */
1662 coords(ops
, cs
, &info
->dstOffset
, &info
->srcOffset
, &info
->extent
);
1664 struct tu_image_view dst
, src
;
1665 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffset
.z
);
1666 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffset
.z
);
1668 for (uint32_t i
= 0; i
< layers
; i
++) {
1669 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1670 ops
->dst(cs
, &dst
, i
);
1675 ops
->teardown(cmd
, cs
);
1678 #define for_each_layer(layer, layer_mask, layers) \
1679 for (uint32_t layer = 0; \
1680 layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \
1682 if (!layer_mask || (layer_mask & BIT(layer)))
1685 tu_resolve_sysmem(struct tu_cmd_buffer
*cmd
,
1687 struct tu_image_view
*src
,
1688 struct tu_image_view
*dst
,
1689 uint32_t layer_mask
,
1691 const VkRect2D
*rect
)
1693 const struct blit_ops
*ops
= &r2d_ops
;
1695 tu_bo_list_add(&cmd
->bo_list
, src
->image
->bo
, MSM_SUBMIT_BO_READ
);
1696 tu_bo_list_add(&cmd
->bo_list
, dst
->image
->bo
, MSM_SUBMIT_BO_WRITE
);
1698 assert(src
->image
->vk_format
== dst
->image
->vk_format
);
1700 ops
->setup(cmd
, cs
, dst
->image
->vk_format
, VK_IMAGE_ASPECT_COLOR_BIT
,
1701 ROTATE_0
, false, dst
->ubwc_enabled
);
1702 ops
->coords(cs
, &rect
->offset
, &rect
->offset
, &rect
->extent
);
1704 for_each_layer(i
, layer_mask
, layers
) {
1705 ops
->src(cmd
, cs
, src
, i
, VK_FILTER_NEAREST
);
1706 ops
->dst(cs
, dst
, i
);
1710 ops
->teardown(cmd
, cs
);
1714 clear_image(struct tu_cmd_buffer
*cmd
,
1715 struct tu_image
*image
,
1716 const VkClearValue
*clear_value
,
1717 const VkImageSubresourceRange
*range
,
1718 VkImageAspectFlags aspect_mask
)
1720 uint32_t level_count
= tu_get_levelCount(image
, range
);
1721 uint32_t layer_count
= tu_get_layerCount(image
, range
);
1722 struct tu_cs
*cs
= &cmd
->cs
;
1723 VkFormat format
= image
->vk_format
;
1724 if (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
|| format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1725 format
= copy_format(format
, aspect_mask
, false);
1727 if (image
->type
== VK_IMAGE_TYPE_3D
) {
1728 assert(layer_count
== 1);
1729 assert(range
->baseArrayLayer
== 0);
1732 const struct blit_ops
*ops
= image
->samples
> 1 ? &r3d_ops
: &r2d_ops
;
1734 ops
->setup(cmd
, cs
, format
, aspect_mask
, ROTATE_0
, true, image
->layout
[0].ubwc
);
1735 if (image
->vk_format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1736 ops
->clear_value(cs
, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
, clear_value
);
1738 ops
->clear_value(cs
, format
, clear_value
);
1740 for (unsigned j
= 0; j
< level_count
; j
++) {
1741 if (image
->type
== VK_IMAGE_TYPE_3D
)
1742 layer_count
= u_minify(image
->extent
.depth
, range
->baseMipLevel
+ j
);
1744 ops
->coords(cs
, &(VkOffset2D
){}, NULL
, &(VkExtent2D
) {
1745 u_minify(image
->extent
.width
, range
->baseMipLevel
+ j
),
1746 u_minify(image
->extent
.height
, range
->baseMipLevel
+ j
)
1749 struct tu_image_view dst
;
1750 tu_image_view_copy_blit(&dst
, image
, format
, &(VkImageSubresourceLayers
) {
1751 .aspectMask
= aspect_mask
,
1752 .mipLevel
= range
->baseMipLevel
+ j
,
1753 .baseArrayLayer
= range
->baseArrayLayer
,
1757 for (uint32_t i
= 0; i
< layer_count
; i
++) {
1758 ops
->dst(cs
, &dst
, i
);
1763 ops
->teardown(cmd
, cs
);
1767 tu_CmdClearColorImage(VkCommandBuffer commandBuffer
,
1769 VkImageLayout imageLayout
,
1770 const VkClearColorValue
*pColor
,
1771 uint32_t rangeCount
,
1772 const VkImageSubresourceRange
*pRanges
)
1774 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1775 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1777 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1779 for (unsigned i
= 0; i
< rangeCount
; i
++)
1780 clear_image(cmd
, image
, (const VkClearValue
*) pColor
, pRanges
+ i
, VK_IMAGE_ASPECT_COLOR_BIT
);
1784 tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer
,
1786 VkImageLayout imageLayout
,
1787 const VkClearDepthStencilValue
*pDepthStencil
,
1788 uint32_t rangeCount
,
1789 const VkImageSubresourceRange
*pRanges
)
1791 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1792 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1794 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1796 for (unsigned i
= 0; i
< rangeCount
; i
++) {
1797 const VkImageSubresourceRange
*range
= &pRanges
[i
];
1799 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
1800 /* can't clear both depth and stencil at once, split up the aspect mask */
1802 for_each_bit(b
, range
->aspectMask
)
1803 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, range
, BIT(b
));
1807 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, range
, range
->aspectMask
);
1812 tu_clear_sysmem_attachments(struct tu_cmd_buffer
*cmd
,
1813 uint32_t attachment_count
,
1814 const VkClearAttachment
*attachments
,
1815 uint32_t rect_count
,
1816 const VkClearRect
*rects
)
1818 /* the shader path here is special, it avoids changing MRT/etc state */
1819 const struct tu_render_pass
*pass
= cmd
->state
.pass
;
1820 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1821 const uint32_t mrt_count
= subpass
->color_count
;
1822 struct tu_cs
*cs
= &cmd
->draw_cs
;
1823 uint32_t clear_value
[MAX_RTS
][4];
1824 float z_clear_val
= 0.0f
;
1825 uint8_t s_clear_val
= 0;
1826 uint32_t clear_rts
= 0, clear_components
= 0, num_rts
= 0, b
;
1827 bool z_clear
= false;
1828 bool s_clear
= false;
1829 bool layered_clear
= false;
1830 uint32_t max_samples
= 1;
1832 for (uint32_t i
= 0; i
< attachment_count
; i
++) {
1834 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1835 uint32_t c
= attachments
[i
].colorAttachment
;
1836 a
= subpass
->color_attachments
[c
].attachment
;
1837 if (a
== VK_ATTACHMENT_UNUSED
)
1840 clear_rts
|= 1 << c
;
1841 clear_components
|= 0xf << (c
* 4);
1842 memcpy(clear_value
[c
], &attachments
[i
].clearValue
, 4 * sizeof(uint32_t));
1844 a
= subpass
->depth_stencil_attachment
.attachment
;
1845 if (a
== VK_ATTACHMENT_UNUSED
)
1848 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
1850 z_clear_val
= attachments
[i
].clearValue
.depthStencil
.depth
;
1853 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
1855 s_clear_val
= attachments
[i
].clearValue
.depthStencil
.stencil
& 0xff;
1859 max_samples
= MAX2(max_samples
, pass
->attachments
[a
].samples
);
1862 /* disable all draw states so they don't interfere
1863 * TODO: use and re-use draw states
1864 * we have to disable draw states individually to preserve
1865 * input attachment states, because a secondary command buffer
1866 * won't be able to restore them
1868 tu_cs_emit_pkt7(cs
, CP_SET_DRAW_STATE
, 3 * (TU_DRAW_STATE_COUNT
- 2));
1869 for (uint32_t i
= 0; i
< TU_DRAW_STATE_COUNT
; i
++) {
1870 if (i
== TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM
||
1871 i
== TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM
)
1873 tu_cs_emit(cs
, CP_SET_DRAW_STATE__0_GROUP_ID(i
) |
1874 CP_SET_DRAW_STATE__0_DISABLE
);
1875 tu_cs_emit_qw(cs
, 0);
1877 cmd
->state
.dirty
|= TU_CMD_DIRTY_DRAW_STATE
;
1879 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
1880 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
1881 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
1883 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count
));
1885 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count
);
1886 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1887 if (clear_rts
& (1 << i
))
1888 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts
++ * 4));
1893 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1894 if (rects
[i
].baseArrayLayer
|| rects
[i
].layerCount
> 1)
1895 layered_clear
= true;
1898 /* a630 doesn't support multiview masks, which means that we can't use the
1899 * normal multiview path without potentially recompiling a shader on-demand
1900 * or using a more complicated variant that takes the mask as a const. Just
1901 * use the layered path instead, since it shouldn't be much worse.
1903 if (subpass
->multiview_mask
) {
1904 layered_clear
= true;
1907 r3d_common(cmd
, cs
, false, num_rts
, layered_clear
);
1910 A6XX_SP_FS_RENDER_COMPONENTS(.dword
= clear_components
));
1912 A6XX_RB_RENDER_COMPONENTS(.dword
= clear_components
));
1915 A6XX_RB_FS_OUTPUT_CNTL0(),
1916 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= mrt_count
));
1918 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
1919 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.independent_blend
= 1, .sample_mask
= 0xffff));
1920 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
1921 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1922 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(i
,
1923 .component_enable
= COND(clear_rts
& (1 << i
), 0xf)));
1926 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
1927 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL(
1928 .z_enable
= z_clear
,
1929 .z_write_enable
= z_clear
,
1930 .zfunc
= FUNC_ALWAYS
));
1931 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
1932 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL(
1933 .stencil_enable
= s_clear
,
1934 .func
= FUNC_ALWAYS
,
1935 .zpass
= STENCIL_REPLACE
));
1936 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK(.mask
= 0xff));
1937 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK(.wrmask
= 0xff));
1938 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF(.ref
= s_clear_val
));
1940 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4 * num_rts
);
1941 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
1942 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
1943 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
1944 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
1945 CP_LOAD_STATE6_0_NUM_UNIT(num_rts
));
1946 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1947 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1948 for_each_bit(b
, clear_rts
)
1949 tu_cs_emit_array(cs
, clear_value
[b
], 4);
1951 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1952 /* This should be true because of this valid usage for
1953 * vkCmdClearAttachments:
1955 * "If the render pass instance this is recorded in uses multiview,
1956 * then baseArrayLayer must be zero and layerCount must be one"
1958 assert(!subpass
->multiview_mask
|| rects
[i
].baseArrayLayer
== 0);
1960 for_each_layer(layer
, subpass
->multiview_mask
, rects
[i
].layerCount
) {
1961 r3d_coords_raw(cs
, (float[]) {
1962 rects
[i
].rect
.offset
.x
, rects
[i
].rect
.offset
.y
,
1963 z_clear_val
, uif(rects
[i
].baseArrayLayer
+ layer
),
1964 rects
[i
].rect
.offset
.x
+ rects
[i
].rect
.extent
.width
,
1965 rects
[i
].rect
.offset
.y
+ rects
[i
].rect
.extent
.height
,
1974 pack_gmem_clear_value(const VkClearValue
*val
, VkFormat format
, uint32_t clear_value
[4])
1977 case VK_FORMAT_X8_D24_UNORM_PACK32
:
1978 case VK_FORMAT_D24_UNORM_S8_UINT
:
1979 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24) |
1980 val
->depthStencil
.stencil
<< 24;
1982 case VK_FORMAT_D16_UNORM
:
1983 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 16);
1985 case VK_FORMAT_D32_SFLOAT
:
1986 clear_value
[0] = fui(val
->depthStencil
.depth
);
1988 case VK_FORMAT_S8_UINT
:
1989 clear_value
[0] = val
->depthStencil
.stencil
;
1996 memcpy(tmp
, val
->color
.float32
, 4 * sizeof(float));
1997 if (vk_format_is_srgb(format
)) {
1998 for (int i
= 0; i
< 4; i
++)
1999 tmp
[i
] = util_format_linear_to_srgb_float(tmp
[i
]);
2002 #define PACK_F(type) util_format_##type##_pack_rgba_float \
2003 ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
2004 switch (vk_format_get_component_bits(format
, UTIL_FORMAT_COLORSPACE_RGB
, PIPE_SWIZZLE_X
)) {
2006 PACK_F(r4g4b4a4_unorm
);
2009 if (vk_format_get_component_bits(format
, UTIL_FORMAT_COLORSPACE_RGB
, PIPE_SWIZZLE_Y
) == 6)
2010 PACK_F(r5g6b5_unorm
);
2012 PACK_F(r5g5b5a1_unorm
);
2015 if (vk_format_is_snorm(format
))
2016 PACK_F(r8g8b8a8_snorm
);
2017 else if (vk_format_is_unorm(format
))
2018 PACK_F(r8g8b8a8_unorm
);
2020 pack_int8(clear_value
, val
->color
.uint32
);
2023 if (vk_format_is_int(format
))
2024 pack_int10_2(clear_value
, val
->color
.uint32
);
2026 PACK_F(r10g10b10a2_unorm
);
2029 clear_value
[0] = float3_to_r11g11b10f(val
->color
.float32
);
2032 if (vk_format_is_snorm(format
))
2033 PACK_F(r16g16b16a16_snorm
);
2034 else if (vk_format_is_unorm(format
))
2035 PACK_F(r16g16b16a16_unorm
);
2036 else if (vk_format_is_float(format
))
2037 PACK_F(r16g16b16a16_float
);
2039 pack_int16(clear_value
, val
->color
.uint32
);
2042 memcpy(clear_value
, val
->color
.float32
, 4 * sizeof(float));
2045 unreachable("unexpected channel size");
2051 clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2055 uint32_t gmem_offset
,
2056 const VkClearValue
*value
)
2058 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 1);
2059 tu_cs_emit(cs
, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format
)));
2061 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(.gmem
= 1, .clear_mask
= clear_mask
));
2063 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_BASE_GMEM
, 1);
2064 tu_cs_emit(cs
, gmem_offset
);
2066 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_88D0
, 1);
2069 uint32_t clear_vals
[4] = {};
2070 pack_gmem_clear_value(value
, format
, clear_vals
);
2072 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0
, 4);
2073 tu_cs_emit_array(cs
, clear_vals
, 4);
2075 tu6_emit_event_write(cmd
, cs
, BLIT
);
2079 tu_emit_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2081 uint32_t attachment
,
2082 VkImageAspectFlags mask
,
2083 const VkClearValue
*value
)
2085 const struct tu_render_pass_attachment
*att
=
2086 &cmd
->state
.pass
->attachments
[attachment
];
2088 if (att
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
2089 if (mask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
2090 clear_gmem_attachment(cmd
, cs
, VK_FORMAT_D32_SFLOAT
, 0xf, att
->gmem_offset
, value
);
2091 if (mask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
2092 clear_gmem_attachment(cmd
, cs
, VK_FORMAT_S8_UINT
, 0xf, att
->gmem_offset_stencil
, value
);
2096 clear_gmem_attachment(cmd
, cs
, att
->format
, aspect_write_mask(att
->format
, mask
), att
->gmem_offset
, value
);
2100 tu_clear_gmem_attachments(struct tu_cmd_buffer
*cmd
,
2101 uint32_t attachment_count
,
2102 const VkClearAttachment
*attachments
,
2103 uint32_t rect_count
,
2104 const VkClearRect
*rects
)
2106 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
2107 struct tu_cs
*cs
= &cmd
->draw_cs
;
2109 /* TODO: swap the loops for smaller cmdstream */
2110 for (unsigned i
= 0; i
< rect_count
; i
++) {
2111 unsigned x1
= rects
[i
].rect
.offset
.x
;
2112 unsigned y1
= rects
[i
].rect
.offset
.y
;
2113 unsigned x2
= x1
+ rects
[i
].rect
.extent
.width
- 1;
2114 unsigned y2
= y1
+ rects
[i
].rect
.extent
.height
- 1;
2116 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_SCISSOR_TL
, 2);
2117 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_TL_X(x1
) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1
));
2118 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_BR_X(x2
) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2
));
2120 for (unsigned j
= 0; j
< attachment_count
; j
++) {
2122 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
)
2123 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
2125 a
= subpass
->depth_stencil_attachment
.attachment
;
2127 if (a
== VK_ATTACHMENT_UNUSED
)
2130 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, attachments
[j
].aspectMask
,
2131 &attachments
[j
].clearValue
);
2137 tu_CmdClearAttachments(VkCommandBuffer commandBuffer
,
2138 uint32_t attachmentCount
,
2139 const VkClearAttachment
*pAttachments
,
2141 const VkClearRect
*pRects
)
2143 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
2144 struct tu_cs
*cs
= &cmd
->draw_cs
;
2146 /* sysmem path behaves like a draw, note we don't have a way of using different
2147 * flushes for sysmem/gmem, so this needs to be outside of the cond_exec
2149 tu_emit_cache_flush_renderpass(cmd
, cs
);
2151 /* vkCmdClearAttachments is supposed to respect the predicate if active.
2152 * The easiest way to do this is to always use the 3d path, which always
2153 * works even with GMEM because it's just a simple draw using the existing
2154 * attachment state. However it seems that IGNORE_VISIBILITY draws must be
2155 * skipped in the binning pass, since otherwise they produce binning data
2156 * which isn't consumed and leads to the wrong binning data being read, so
2157 * condition on GMEM | SYSMEM.
2159 if (cmd
->state
.predication_active
) {
2160 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
|
2161 CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2162 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2163 tu_cond_exec_end(cs
);
2167 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
);
2168 tu_clear_gmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2169 tu_cond_exec_end(cs
);
2171 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2172 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2173 tu_cond_exec_end(cs
);
2177 clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2180 VkImageAspectFlags clear_mask
,
2181 const VkRenderPassBeginInfo
*info
,
2183 bool separate_stencil
)
2185 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2186 const struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2187 const uint32_t clear_views
= cmd
->state
.pass
->attachments
[a
].clear_views
;
2188 const struct blit_ops
*ops
= &r2d_ops
;
2189 if (cmd
->state
.pass
->attachments
[a
].samples
> 1)
2192 ops
->setup(cmd
, cs
, format
, clear_mask
, ROTATE_0
, true, iview
->ubwc_enabled
);
2193 ops
->coords(cs
, &info
->renderArea
.offset
, NULL
, &info
->renderArea
.extent
);
2194 ops
->clear_value(cs
, format
, &info
->pClearValues
[a
]);
2196 for_each_layer(i
, clear_views
, fb
->layers
) {
2197 if (separate_stencil
) {
2198 if (ops
== &r3d_ops
)
2199 r3d_dst_stencil(cs
, iview
, i
);
2201 r2d_dst_stencil(cs
, iview
, i
);
2203 ops
->dst(cs
, iview
, i
);
2208 ops
->teardown(cmd
, cs
);
2212 tu_clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2215 const VkRenderPassBeginInfo
*info
)
2217 const struct tu_render_pass_attachment
*attachment
=
2218 &cmd
->state
.pass
->attachments
[a
];
2220 if (!attachment
->clear_mask
)
2223 /* Wait for any flushes at the beginning of the renderpass to complete */
2226 if (attachment
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
2227 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
2228 clear_sysmem_attachment(cmd
, cs
, VK_FORMAT_D32_SFLOAT
, VK_IMAGE_ASPECT_COLOR_BIT
,
2231 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
2232 clear_sysmem_attachment(cmd
, cs
, VK_FORMAT_S8_UINT
, VK_IMAGE_ASPECT_COLOR_BIT
,
2236 clear_sysmem_attachment(cmd
, cs
, attachment
->format
, attachment
->clear_mask
,
2240 /* The spec doesn't explicitly say, but presumably the initial renderpass
2241 * clear is considered part of the renderpass, and therefore barriers
2242 * aren't required inside the subpass/renderpass. Therefore we need to
2243 * flush CCU color into CCU depth here, just like with
2244 * vkCmdClearAttachments(). Note that because this only happens at the
2245 * beginning of a renderpass, and renderpass writes are considered
2246 * "incoherent", we shouldn't have to worry about syncing depth into color
2247 * beforehand as depth should already be flushed.
2249 if (vk_format_is_depth_or_stencil(attachment
->format
)) {
2250 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2251 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
);
2253 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2254 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
2259 tu_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2262 const VkRenderPassBeginInfo
*info
)
2264 const struct tu_render_pass_attachment
*attachment
=
2265 &cmd
->state
.pass
->attachments
[a
];
2267 if (!attachment
->clear_mask
)
2270 tu_cs_emit_regs(cs
, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2272 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, attachment
->clear_mask
,
2273 &info
->pClearValues
[a
]);
2277 tu_emit_blit(struct tu_cmd_buffer
*cmd
,
2279 const struct tu_image_view
*iview
,
2280 const struct tu_render_pass_attachment
*attachment
,
2282 bool separate_stencil
)
2285 A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2287 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(
2290 /* "integer" bit disables msaa resolve averaging */
2291 .integer
= vk_format_is_int(attachment
->format
)));
2293 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 4);
2294 if (separate_stencil
) {
2295 tu_cs_emit(cs
, tu_image_view_stencil(iview
, RB_BLIT_DST_INFO
) & ~A6XX_RB_BLIT_DST_INFO_FLAGS
);
2296 tu_cs_emit_qw(cs
, iview
->stencil_base_addr
);
2297 tu_cs_emit(cs
, iview
->stencil_PITCH
);
2300 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset_stencil
));
2302 tu_cs_emit(cs
, iview
->RB_BLIT_DST_INFO
);
2303 tu_cs_image_ref_2d(cs
, iview
, 0, false);
2305 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_FLAG_DST_LO
, 3);
2306 tu_cs_image_flag_ref(cs
, iview
, 0);
2309 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset
));
2312 tu6_emit_event_write(cmd
, cs
, BLIT
);
2316 blit_can_resolve(VkFormat format
)
2318 const struct util_format_description
*desc
= vk_format_description(format
);
2320 /* blit event can only do resolve for simple cases:
2321 * averaging samples as unsigned integers or choosing only one sample
2323 if (vk_format_is_snorm(format
) || vk_format_is_srgb(format
))
2326 /* can't do formats with larger channel sizes
2327 * note: this includes all float formats
2328 * note2: single channel integer formats seem OK
2330 if (desc
->channel
[0].size
> 10)
2334 /* for unknown reasons blit event can't msaa resolve these formats when tiled
2335 * likely related to these formats having different layout from other cpp=2 formats
2337 case VK_FORMAT_R8G8_UNORM
:
2338 case VK_FORMAT_R8G8_UINT
:
2339 case VK_FORMAT_R8G8_SINT
:
2340 /* TODO: this one should be able to work? */
2341 case VK_FORMAT_D24_UNORM_S8_UINT
:
2351 tu_load_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2356 const struct tu_image_view
*iview
=
2357 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2358 const struct tu_render_pass_attachment
*attachment
=
2359 &cmd
->state
.pass
->attachments
[a
];
2361 if (attachment
->load
|| force_load
)
2362 tu_emit_blit(cmd
, cs
, iview
, attachment
, false, false);
2364 if (attachment
->load_stencil
|| (attachment
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
&& force_load
))
2365 tu_emit_blit(cmd
, cs
, iview
, attachment
, false, true);
2369 store_cp_blit(struct tu_cmd_buffer
*cmd
,
2371 struct tu_image_view
*iview
,
2373 bool separate_stencil
,
2375 uint32_t gmem_offset
,
2378 r2d_setup_common(cmd
, cs
, format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false,
2379 iview
->ubwc_enabled
, true);
2380 if (separate_stencil
)
2381 r2d_dst_stencil(cs
, iview
, 0);
2383 r2d_dst(cs
, iview
, 0);
2386 A6XX_SP_PS_2D_SRC_INFO(
2387 .color_format
= tu6_format_texture(format
, TILE6_2
).fmt
,
2388 .tile_mode
= TILE6_2
,
2389 .srgb
= vk_format_is_srgb(format
),
2390 .samples
= tu_msaa_samples(samples
),
2391 .samples_average
= !vk_format_is_int(format
),
2394 /* note: src size does not matter when not scaling */
2395 A6XX_SP_PS_2D_SRC_SIZE( .width
= 0x3fff, .height
= 0x3fff),
2396 A6XX_SP_PS_2D_SRC_LO(cmd
->device
->physical_device
->gmem_base
+ gmem_offset
),
2397 A6XX_SP_PS_2D_SRC_HI(),
2398 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= cmd
->state
.framebuffer
->tile0
.width
* cpp
));
2400 /* sync GMEM writes with CACHE. */
2401 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
2403 /* Wait for CACHE_INVALIDATE to land */
2406 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
2407 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
2409 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
2410 * sysmem, and we generally assume that GMEM renderpasses leave their
2411 * results in sysmem, so we need to flush manually here.
2413 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2417 tu_store_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2422 const VkRect2D
*render_area
= &cmd
->state
.render_area
;
2423 struct tu_render_pass_attachment
*dst
= &cmd
->state
.pass
->attachments
[a
];
2424 struct tu_image_view
*iview
= cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2425 struct tu_render_pass_attachment
*src
= &cmd
->state
.pass
->attachments
[gmem_a
];
2427 if (!dst
->store
&& !dst
->store_stencil
)
2430 uint32_t x1
= render_area
->offset
.x
;
2431 uint32_t y1
= render_area
->offset
.y
;
2432 uint32_t x2
= x1
+ render_area
->extent
.width
;
2433 uint32_t y2
= y1
+ render_area
->extent
.height
;
2434 /* x2/y2 can be unaligned if equal to the size of the image,
2435 * since it will write into padding space
2436 * the one exception is linear levels which don't have the
2437 * required y padding in the layout (except for the last level)
2439 bool need_y2_align
=
2440 y2
!= iview
->extent
.height
|| iview
->need_y2_align
;
2443 x1
% GMEM_ALIGN_W
|| (x2
% GMEM_ALIGN_W
&& x2
!= iview
->extent
.width
) ||
2444 y1
% GMEM_ALIGN_H
|| (y2
% GMEM_ALIGN_H
&& need_y2_align
);
2446 /* use fast path when render area is aligned, except for unsupported resolve cases */
2447 if (!unaligned
&& (a
== gmem_a
|| blit_can_resolve(dst
->format
))) {
2449 tu_emit_blit(cmd
, cs
, iview
, src
, true, false);
2450 if (dst
->store_stencil
)
2451 tu_emit_blit(cmd
, cs
, iview
, src
, true, true);
2455 if (dst
->samples
> 1) {
2456 /* I guess we need to use shader path in this case?
2457 * need a testcase which fails because of this
2459 tu_finishme("unaligned store of msaa attachment\n");
2463 r2d_coords(cs
, &render_area
->offset
, &render_area
->offset
, &render_area
->extent
);
2465 VkFormat format
= src
->format
;
2466 if (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
2467 format
= VK_FORMAT_D32_SFLOAT
;
2470 store_cp_blit(cmd
, cs
, iview
, src
->samples
, false, format
,
2471 src
->gmem_offset
, src
->cpp
);
2473 if (dst
->store_stencil
) {
2474 store_cp_blit(cmd
, cs
, iview
, src
->samples
, true, VK_FORMAT_S8_UINT
,
2475 src
->gmem_offset_stencil
, src
->samples
);