2 * Copyright 2019-2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
6 * Jonathan Marek <jonathan@marek.ca>
9 #include "tu_private.h"
12 #include "vk_format.h"
14 #include "util/format_r11g11b10f.h"
15 #include "util/format_rgb9e5.h"
16 #include "util/format_srgb.h"
17 #include "util/u_half.h"
20 tu_pack_float32_for_unorm(float val
, int bits
)
22 return _mesa_lroundevenf(CLAMP(val
, 0.0f
, 1.0f
) * (float) ((1 << bits
) - 1));
25 /* r2d_ = BLIT_OP_SCALE operations */
27 static enum a6xx_2d_ifmt
28 format_to_ifmt(VkFormat format
)
30 if (format
== VK_FORMAT_D24_UNORM_S8_UINT
||
31 format
== VK_FORMAT_X8_D24_UNORM_PACK32
)
34 /* get_component_bits doesn't work with depth/stencil formats: */
35 if (format
== VK_FORMAT_D16_UNORM
|| format
== VK_FORMAT_D32_SFLOAT
)
37 if (format
== VK_FORMAT_S8_UINT
)
40 /* use the size of the red channel to find the corresponding "ifmt" */
41 bool is_int
= vk_format_is_int(format
);
42 switch (vk_format_get_component_bits(format
, UTIL_FORMAT_COLORSPACE_RGB
, PIPE_SWIZZLE_X
)) {
43 case 4: case 5: case 8:
44 return is_int
? R2D_INT8
: R2D_UNORM8
;
46 return is_int
? R2D_INT16
: R2D_FLOAT16
;
48 if (vk_format_is_float(format
))
50 return is_int
? R2D_INT16
: R2D_FLOAT32
;
52 return is_int
? R2D_INT32
: R2D_FLOAT32
;
54 unreachable("bad format");
60 r2d_coords(struct tu_cs
*cs
,
61 const VkOffset2D
*dst
,
62 const VkOffset2D
*src
,
63 const VkExtent2D
*extent
)
66 A6XX_GRAS_2D_DST_TL(.x
= dst
->x
, .y
= dst
->y
),
67 A6XX_GRAS_2D_DST_BR(.x
= dst
->x
+ extent
->width
- 1, .y
= dst
->y
+ extent
->height
- 1));
73 A6XX_GRAS_2D_SRC_TL_X(src
->x
),
74 A6XX_GRAS_2D_SRC_BR_X(src
->x
+ extent
->width
- 1),
75 A6XX_GRAS_2D_SRC_TL_Y(src
->y
),
76 A6XX_GRAS_2D_SRC_BR_Y(src
->y
+ extent
->height
- 1));
80 r2d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
82 uint32_t clear_value
[4] = {};
85 case VK_FORMAT_X8_D24_UNORM_PACK32
:
86 case VK_FORMAT_D24_UNORM_S8_UINT
:
87 /* cleared as r8g8b8a8_unorm using special format */
88 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
89 clear_value
[1] = clear_value
[0] >> 8;
90 clear_value
[2] = clear_value
[0] >> 16;
91 clear_value
[3] = val
->depthStencil
.stencil
;
93 case VK_FORMAT_D16_UNORM
:
94 case VK_FORMAT_D32_SFLOAT
:
96 clear_value
[0] = fui(val
->depthStencil
.depth
);
98 case VK_FORMAT_S8_UINT
:
99 clear_value
[0] = val
->depthStencil
.stencil
;
101 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
102 /* cleared as UINT32 */
103 clear_value
[0] = float3_to_rgb9e5(val
->color
.float32
);
106 assert(!vk_format_is_depth_or_stencil(format
));
107 const struct util_format_description
*desc
= vk_format_description(format
);
108 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(format
);
110 assert(desc
&& (desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
||
111 format
== VK_FORMAT_B10G11R11_UFLOAT_PACK32
));
113 for (unsigned i
= 0; i
< desc
->nr_channels
; i
++) {
114 const struct util_format_channel_description
*ch
= &desc
->channel
[i
];
115 if (ifmt
== R2D_UNORM8
) {
116 float linear
= val
->color
.float32
[i
];
117 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
&& i
< 3)
118 linear
= util_format_linear_to_srgb_float(val
->color
.float32
[i
]);
120 if (ch
->type
== UTIL_FORMAT_TYPE_SIGNED
)
121 clear_value
[i
] = _mesa_lroundevenf(CLAMP(linear
, -1.0f
, 1.0f
) * 127.0f
);
123 clear_value
[i
] = tu_pack_float32_for_unorm(linear
, 8);
124 } else if (ifmt
== R2D_FLOAT16
) {
125 clear_value
[i
] = util_float_to_half(val
->color
.float32
[i
]);
127 assert(ifmt
== R2D_FLOAT32
|| ifmt
== R2D_INT32
||
128 ifmt
== R2D_INT16
|| ifmt
== R2D_INT8
);
129 clear_value
[i
] = val
->color
.uint32
[i
];
135 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
136 tu_cs_emit_array(cs
, clear_value
, 4);
140 r2d_src(struct tu_cmd_buffer
*cmd
,
142 const struct tu_image_view
*iview
,
146 uint32_t src_info
= iview
->SP_PS_2D_SRC_INFO
;
147 if (filter
!= VK_FILTER_NEAREST
)
148 src_info
|= A6XX_SP_PS_2D_SRC_INFO_FILTER
;
150 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 5);
151 tu_cs_emit(cs
, src_info
);
152 tu_cs_emit(cs
, iview
->SP_PS_2D_SRC_SIZE
);
153 tu_cs_image_ref_2d(cs
, iview
, layer
, true);
155 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 3);
156 tu_cs_image_flag_ref(cs
, iview
, layer
);
160 r2d_src_buffer(struct tu_cmd_buffer
*cmd
,
163 uint64_t va
, uint32_t pitch
,
164 uint32_t width
, uint32_t height
)
166 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
169 A6XX_SP_PS_2D_SRC_INFO(
170 .color_format
= format
.fmt
,
171 .color_swap
= format
.swap
,
172 .srgb
= vk_format_is_srgb(vk_format
),
175 A6XX_SP_PS_2D_SRC_SIZE(.width
= width
, .height
= height
),
176 A6XX_SP_PS_2D_SRC_LO((uint32_t) va
),
177 A6XX_SP_PS_2D_SRC_HI(va
>> 32),
178 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= pitch
));
182 r2d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
184 assert(iview
->image
->samples
== 1);
186 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 4);
187 tu_cs_emit(cs
, iview
->RB_2D_DST_INFO
);
188 tu_cs_image_ref_2d(cs
, iview
, layer
, false);
190 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 3);
191 tu_cs_image_flag_ref(cs
, iview
, layer
);
195 r2d_dst_stencil(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
197 assert(iview
->image
->samples
== 1);
199 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 4);
200 tu_cs_emit(cs
, tu_image_view_stencil(iview
, RB_2D_DST_INFO
) & ~A6XX_RB_2D_DST_INFO_FLAGS
);
201 tu_cs_emit_qw(cs
, iview
->stencil_base_addr
+ iview
->stencil_layer_size
* layer
);
202 tu_cs_emit(cs
, iview
->stencil_PITCH
);
206 r2d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
208 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
212 .color_format
= format
.fmt
,
213 .color_swap
= format
.swap
,
214 .srgb
= vk_format_is_srgb(vk_format
)),
215 A6XX_RB_2D_DST_LO((uint32_t) va
),
216 A6XX_RB_2D_DST_HI(va
>> 32),
217 A6XX_RB_2D_DST_PITCH(pitch
));
221 r2d_setup_common(struct tu_cmd_buffer
*cmd
,
224 VkImageAspectFlags aspect_mask
,
225 enum a6xx_rotation rotation
,
230 enum a6xx_format format
= tu6_base_format(vk_format
);
231 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(vk_format
);
232 uint32_t unknown_8c01
= 0;
234 if ((vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
||
235 vk_format
== VK_FORMAT_X8_D24_UNORM_PACK32
) && ubwc
) {
236 format
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
239 /* note: the only format with partial clearing is D24S8 */
240 if (vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
241 /* preserve stencil channel */
242 if (aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
243 unknown_8c01
= 0x08000041;
244 /* preserve depth channels */
245 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
246 unknown_8c01
= 0x00084001;
249 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_UNKNOWN_8C01
, 1);
250 tu_cs_emit(cs
, unknown_8c01
);
252 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL(
255 .solid_color
= clear
,
256 .d24s8
= format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
&& !clear
,
257 .color_format
= format
,
259 .ifmt
= vk_format_is_srgb(vk_format
) ? R2D_UNORM8_SRGB
: ifmt
,
262 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
263 tu_cs_emit(cs
, blit_cntl
);
265 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
266 tu_cs_emit(cs
, blit_cntl
);
268 if (format
== FMT6_10_10_10_2_UNORM_DEST
)
269 format
= FMT6_16_16_16_16_FLOAT
;
271 tu_cs_emit_regs(cs
, A6XX_SP_2D_DST_FORMAT(
272 .sint
= vk_format_is_sint(vk_format
),
273 .uint
= vk_format_is_uint(vk_format
),
274 .color_format
= format
,
275 .srgb
= vk_format_is_srgb(vk_format
),
280 r2d_setup(struct tu_cmd_buffer
*cmd
,
283 VkImageAspectFlags aspect_mask
,
284 enum a6xx_rotation rotation
,
288 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
290 r2d_setup_common(cmd
, cs
, vk_format
, aspect_mask
, rotation
, clear
, ubwc
, false);
294 r2d_teardown(struct tu_cmd_buffer
*cmd
,
297 /* nothing to do here */
301 r2d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
303 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
304 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
307 /* r3d_ = shader path operations */
310 tu_init_clear_blit_shaders(struct tu6_global
*global
)
312 #define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } }
313 #define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } }
314 #define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } }
316 static const instr_t vs_code
[] = {
317 /* r0.xyz = r0.w ? c1.xyz : c0.xyz
318 * r1.xy = r0.w ? c1.zw : c0.zw
321 CAT3(OPC_SEL_B32
, .repeat
= 2, .dst
= 0,
322 .c1
= {.src1_c
= 1, .src1
= 4}, .src1_r
= 1,
324 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 0}),
325 CAT3(OPC_SEL_B32
, .repeat
= 1, .dst
= 4,
326 .c1
= {.src1_c
= 1, .src1
= 6}, .src1_r
= 1,
328 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 2}),
329 MOV(.dst
= 3, .src_im
= 1, .fim_val
= 1.0f
),
330 { .cat0
= { .opc
= OPC_END
} },
333 static const instr_t fs_blit
[] = {
334 /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
335 * blit path (its not clear what allows it to not have it)
337 CAT2(OPC_BARY_F
, .ei
= 1, .full
= 1, .dst
= 63 * 4, .src1_im
= 1),
338 { .cat0
= { .opc
= OPC_END
} },
341 memcpy(&global
->shaders
[GLOBAL_SH_VS
], vs_code
, sizeof(vs_code
));
342 memcpy(&global
->shaders
[GLOBAL_SH_FS_BLIT
], fs_blit
, sizeof(fs_blit
));
344 for (uint32_t num_rts
= 0; num_rts
<= MAX_RTS
; num_rts
++) {
345 instr_t
*code
= global
->shaders
[GLOBAL_SH_FS_CLEAR0
+ num_rts
];
346 for (uint32_t i
= 0; i
< num_rts
; i
++) {
347 /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
348 *code
++ = (instr_t
) MOV(.repeat
= 3, .dst
= i
* 4, .src_c
= 1, .src_r
= 1, .src
= i
* 4);
350 *code
++ = (instr_t
) { .cat0
= { .opc
= OPC_END
} };
355 r3d_common(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, bool blit
, uint32_t num_rts
,
358 struct ir3_const_state dummy_const_state
= {};
359 struct ir3_shader dummy_shader
= {};
361 struct ir3_shader_variant vs
= {
362 .type
= MESA_SHADER_VERTEX
,
368 .slot
= SYSTEM_VALUE_VERTEX_ID
,
369 .regid
= regid(0, 3),
372 .outputs_count
= blit
? 2 : 1,
374 .slot
= VARYING_SLOT_POS
,
375 .regid
= regid(0, 0),
378 .slot
= VARYING_SLOT_VAR0
,
379 .regid
= regid(1, 0),
381 .shader
= &dummy_shader
,
382 .const_state
= &dummy_const_state
,
385 vs
.outputs
[1].slot
= VARYING_SLOT_LAYER
;
386 vs
.outputs
[1].regid
= regid(1, 1);
387 vs
.outputs_count
= 2;
390 struct ir3_shader_variant fs
= {
391 .type
= MESA_SHADER_FRAGMENT
,
392 .instrlen
= 1, /* max of 9 instructions with num_rts = 8 */
393 .constlen
= align(num_rts
, 4),
394 .info
.max_reg
= MAX2(num_rts
, 1) - 1,
395 .total_in
= blit
? 2 : 0,
396 .num_samp
= blit
? 1 : 0,
397 .inputs_count
= blit
? 2 : 0,
399 .slot
= VARYING_SLOT_VAR0
,
405 .slot
= SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL
,
406 .regid
= regid(0, 0),
409 .num_sampler_prefetch
= blit
? 1 : 0,
410 .sampler_prefetch
[0] = {
415 .shader
= &dummy_shader
,
416 .const_state
= &dummy_const_state
,
419 tu_cs_emit_regs(cs
, A6XX_HLSQ_INVALIDATE_CMD(
428 .gfx_shared_const
= true,
429 .gfx_bindless
= 0x1f,
430 .cs_bindless
= 0x1f));
432 tu6_emit_xs_config(cs
, MESA_SHADER_VERTEX
, &vs
, global_iova(cmd
, shaders
[GLOBAL_SH_VS
]));
433 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_CTRL
, NULL
, 0);
434 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_EVAL
, NULL
, 0);
435 tu6_emit_xs_config(cs
, MESA_SHADER_GEOMETRY
, NULL
, 0);
436 tu6_emit_xs_config(cs
, MESA_SHADER_FRAGMENT
, &fs
,
437 global_iova(cmd
, shaders
[blit
? GLOBAL_SH_FS_BLIT
: (GLOBAL_SH_FS_CLEAR0
+ num_rts
)]));
439 tu_cs_emit_regs(cs
, A6XX_PC_PRIMITIVE_CNTL_0());
440 tu_cs_emit_regs(cs
, A6XX_VFD_CONTROL_0());
442 tu6_emit_vpc(cs
, &vs
, NULL
, NULL
, NULL
, &fs
, 0, false);
444 /* REPL_MODE for varying with RECTLIST (2 vertices only) */
445 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
446 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0));
448 tu6_emit_fs_inputs(cs
, &fs
);
452 .persp_division_disable
= 1,
453 .vp_xform_disable
= 1,
454 .vp_clip_code_ignore
= 1,
456 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
459 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x
= 0, .y
= 0),
460 A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x
= 0x7fff, .y
= 0x7fff));
462 A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x
= 0, .y
= 0),
463 A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x
= 0x7fff, .y
= 0x7fff));
466 A6XX_VFD_INDEX_OFFSET(),
467 A6XX_VFD_INSTANCE_START_OFFSET());
471 r3d_coords_raw(struct tu_cs
*cs
, const float *coords
)
473 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_GEOM
, 3 + 8);
474 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
475 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
476 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
477 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER
) |
478 CP_LOAD_STATE6_0_NUM_UNIT(2));
479 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
480 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
481 tu_cs_emit_array(cs
, (const uint32_t *) coords
, 8);
485 r3d_coords(struct tu_cs
*cs
,
486 const VkOffset2D
*dst
,
487 const VkOffset2D
*src
,
488 const VkExtent2D
*extent
)
490 int32_t src_x1
= src
? src
->x
: 0;
491 int32_t src_y1
= src
? src
->y
: 0;
492 r3d_coords_raw(cs
, (float[]) {
495 dst
->x
+ extent
->width
, dst
->y
+ extent
->height
,
496 src_x1
+ extent
->width
, src_y1
+ extent
->height
,
501 r3d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
503 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4);
504 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
505 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
506 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
507 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
508 CP_LOAD_STATE6_0_NUM_UNIT(1));
509 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
510 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
512 case VK_FORMAT_X8_D24_UNORM_PACK32
:
513 case VK_FORMAT_D24_UNORM_S8_UINT
: {
514 /* cleared as r8g8b8a8_unorm using special format */
515 uint32_t tmp
= tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
516 tu_cs_emit(cs
, fui((tmp
& 0xff) / 255.0f
));
517 tu_cs_emit(cs
, fui((tmp
>> 8 & 0xff) / 255.0f
));
518 tu_cs_emit(cs
, fui((tmp
>> 16 & 0xff) / 255.0f
));
519 tu_cs_emit(cs
, fui((val
->depthStencil
.stencil
& 0xff) / 255.0f
));
521 case VK_FORMAT_D16_UNORM
:
522 case VK_FORMAT_D32_SFLOAT
:
523 tu_cs_emit(cs
, fui(val
->depthStencil
.depth
));
528 case VK_FORMAT_S8_UINT
:
529 tu_cs_emit(cs
, val
->depthStencil
.stencil
& 0xff);
535 /* as color formats use clear value as-is */
536 assert(!vk_format_is_depth_or_stencil(format
));
537 tu_cs_emit_array(cs
, val
->color
.uint32
, 4);
543 r3d_src_common(struct tu_cmd_buffer
*cmd
,
545 const uint32_t *tex_const
,
546 uint32_t offset_base
,
547 uint32_t offset_ubwc
,
550 struct tu_cs_memory texture
= { };
551 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
,
552 2, /* allocate space for a sampler too */
553 A6XX_TEX_CONST_DWORDS
, &texture
);
554 assert(result
== VK_SUCCESS
);
556 memcpy(texture
.map
, tex_const
, A6XX_TEX_CONST_DWORDS
* 4);
558 /* patch addresses for layer offset */
559 *(uint64_t*) (texture
.map
+ 4) += offset_base
;
560 uint64_t ubwc_addr
= (texture
.map
[7] | (uint64_t) texture
.map
[8] << 32) + offset_ubwc
;
561 texture
.map
[7] = ubwc_addr
;
562 texture
.map
[8] = ubwc_addr
>> 32;
564 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 0] =
565 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter
, false)) |
566 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter
, false)) |
567 A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE
) |
568 A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE
) |
569 A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE
) |
570 0x60000; /* XXX used by blob, doesn't seem necessary */
571 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 1] =
572 0x1 | /* XXX used by blob, doesn't seem necessary */
573 A6XX_TEX_SAMP_1_UNNORM_COORDS
|
574 A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR
;
575 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 2] = 0;
576 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 3] = 0;
578 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
579 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
580 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
581 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
582 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
583 CP_LOAD_STATE6_0_NUM_UNIT(1));
584 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
586 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_SAMP_LO
, 2);
587 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
589 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
590 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
591 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
592 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
593 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
594 CP_LOAD_STATE6_0_NUM_UNIT(1));
595 tu_cs_emit_qw(cs
, texture
.iova
);
597 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_CONST_LO
, 2);
598 tu_cs_emit_qw(cs
, texture
.iova
);
600 tu_cs_emit_regs(cs
, A6XX_SP_FS_TEX_COUNT(1));
604 r3d_src(struct tu_cmd_buffer
*cmd
,
606 const struct tu_image_view
*iview
,
610 r3d_src_common(cmd
, cs
, iview
->descriptor
,
611 iview
->layer_size
* layer
,
612 iview
->ubwc_layer_size
* layer
,
617 r3d_src_buffer(struct tu_cmd_buffer
*cmd
,
620 uint64_t va
, uint32_t pitch
,
621 uint32_t width
, uint32_t height
)
623 uint32_t desc
[A6XX_TEX_CONST_DWORDS
];
625 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
628 COND(vk_format_is_srgb(vk_format
), A6XX_TEX_CONST_0_SRGB
) |
629 A6XX_TEX_CONST_0_FMT(format
.fmt
) |
630 A6XX_TEX_CONST_0_SWAP(format
.swap
) |
631 A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X
) |
632 // XXX to swizzle into .w for stencil buffer_to_image
633 A6XX_TEX_CONST_0_SWIZ_Y(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Y
) |
634 A6XX_TEX_CONST_0_SWIZ_Z(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Z
) |
635 A6XX_TEX_CONST_0_SWIZ_W(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_W
);
636 desc
[1] = A6XX_TEX_CONST_1_WIDTH(width
) | A6XX_TEX_CONST_1_HEIGHT(height
);
638 A6XX_TEX_CONST_2_PITCH(pitch
) |
639 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D
);
643 for (uint32_t i
= 6; i
< A6XX_TEX_CONST_DWORDS
; i
++)
646 r3d_src_common(cmd
, cs
, desc
, 0, 0, VK_FILTER_NEAREST
);
650 r3d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
652 tu6_emit_msaa(cs
, iview
->image
->samples
); /* TODO: move to setup */
654 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
655 tu_cs_emit(cs
, iview
->RB_MRT_BUF_INFO
);
656 tu_cs_image_ref(cs
, iview
, layer
);
659 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
660 tu_cs_image_flag_ref(cs
, iview
, layer
);
662 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL(.flag_mrts
= iview
->ubwc_enabled
));
666 r3d_dst_stencil(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
668 tu6_emit_msaa(cs
, iview
->image
->samples
); /* TODO: move to setup */
670 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
671 tu_cs_emit(cs
, tu_image_view_stencil(iview
, RB_MRT_BUF_INFO
));
672 tu_cs_image_stencil_ref(cs
, iview
, layer
);
675 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
679 r3d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
681 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
683 tu6_emit_msaa(cs
, 1); /* TODO: move to setup */
686 A6XX_RB_MRT_BUF_INFO(0, .color_format
= format
.fmt
, .color_swap
= format
.swap
),
687 A6XX_RB_MRT_PITCH(0, pitch
),
688 A6XX_RB_MRT_ARRAY_PITCH(0, 0),
689 A6XX_RB_MRT_BASE_LO(0, (uint32_t) va
),
690 A6XX_RB_MRT_BASE_HI(0, va
>> 32),
691 A6XX_RB_MRT_BASE_GMEM(0, 0));
693 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
697 aspect_write_mask(VkFormat vk_format
, VkImageAspectFlags aspect_mask
)
701 /* note: the only format with partial writing is D24S8,
702 * clear/blit uses the _AS_R8G8B8A8 format to access it
704 if (vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
705 if (aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
707 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
714 r3d_setup(struct tu_cmd_buffer
*cmd
,
717 VkImageAspectFlags aspect_mask
,
718 enum a6xx_rotation rotation
,
722 enum a6xx_format format
= tu6_base_format(vk_format
);
724 if ((vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
||
725 vk_format
== VK_FORMAT_X8_D24_UNORM_PACK32
) && ubwc
) {
726 format
= FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
;
729 if (!cmd
->state
.pass
) {
730 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
731 tu6_emit_window_scissor(cs
, 0, 0, 0x3fff, 0x3fff);
734 tu_cs_emit_regs(cs
, A6XX_GRAS_BIN_CONTROL(.dword
= 0xc00000));
735 tu_cs_emit_regs(cs
, A6XX_RB_BIN_CONTROL(.dword
= 0xc00000));
737 r3d_common(cmd
, cs
, !clear
, clear
? 1 : 0, false);
739 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
740 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
741 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
743 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
745 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
746 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(0));
749 A6XX_RB_FS_OUTPUT_CNTL0(),
750 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= 1));
752 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
753 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.sample_mask
= 0xffff));
754 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
756 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
757 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL());
758 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
759 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL());
760 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK());
761 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK());
762 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF());
764 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_COMPONENTS(.rt0
= 0xf));
765 tu_cs_emit_regs(cs
, A6XX_SP_FS_RENDER_COMPONENTS(.rt0
= 0xf));
767 tu_cs_emit_regs(cs
, A6XX_SP_FS_MRT_REG(0,
768 .color_format
= format
,
769 .color_sint
= vk_format_is_sint(vk_format
),
770 .color_uint
= vk_format_is_uint(vk_format
)));
772 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(0,
773 .component_enable
= aspect_write_mask(vk_format
, aspect_mask
)));
774 tu_cs_emit_regs(cs
, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
775 tu_cs_emit_regs(cs
, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
777 if (cmd
->state
.predication_active
) {
778 tu_cs_emit_pkt7(cs
, CP_DRAW_PRED_ENABLE_LOCAL
, 1);
784 r3d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
786 tu_cs_emit_pkt7(cs
, CP_DRAW_INDX_OFFSET
, 3);
787 tu_cs_emit(cs
, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST
) |
788 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX
) |
789 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY
));
790 tu_cs_emit(cs
, 1); /* instance count */
791 tu_cs_emit(cs
, 2); /* vertex count */
795 r3d_teardown(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
797 if (cmd
->state
.predication_active
) {
798 tu_cs_emit_pkt7(cs
, CP_DRAW_PRED_ENABLE_LOCAL
, 1);
803 /* blit ops - common interface for 2d/shader paths */
806 void (*coords
)(struct tu_cs
*cs
,
807 const VkOffset2D
*dst
,
808 const VkOffset2D
*src
,
809 const VkExtent2D
*extent
);
810 void (*clear_value
)(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
);
812 struct tu_cmd_buffer
*cmd
,
814 const struct tu_image_view
*iview
,
817 void (*src_buffer
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
,
819 uint64_t va
, uint32_t pitch
,
820 uint32_t width
, uint32_t height
);
821 void (*dst
)(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
);
822 void (*dst_buffer
)(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
);
823 void (*setup
)(struct tu_cmd_buffer
*cmd
,
826 VkImageAspectFlags aspect_mask
,
827 enum a6xx_rotation rotation
,
830 void (*run
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
);
831 void (*teardown
)(struct tu_cmd_buffer
*cmd
,
835 static const struct blit_ops r2d_ops
= {
836 .coords
= r2d_coords
,
837 .clear_value
= r2d_clear_value
,
839 .src_buffer
= r2d_src_buffer
,
841 .dst_buffer
= r2d_dst_buffer
,
844 .teardown
= r2d_teardown
,
847 static const struct blit_ops r3d_ops
= {
848 .coords
= r3d_coords
,
849 .clear_value
= r3d_clear_value
,
851 .src_buffer
= r3d_src_buffer
,
853 .dst_buffer
= r3d_dst_buffer
,
856 .teardown
= r3d_teardown
,
859 /* passthrough set coords from 3D extents */
861 coords(const struct blit_ops
*ops
,
863 const VkOffset3D
*dst
,
864 const VkOffset3D
*src
,
865 const VkExtent3D
*extent
)
867 ops
->coords(cs
, (const VkOffset2D
*) dst
, (const VkOffset2D
*) src
, (const VkExtent2D
*) extent
);
871 copy_format(VkFormat format
, VkImageAspectFlags aspect_mask
, bool copy_buffer
)
873 if (vk_format_is_compressed(format
)) {
874 switch (vk_format_get_blocksize(format
)) {
875 case 1: return VK_FORMAT_R8_UINT
;
876 case 2: return VK_FORMAT_R16_UINT
;
877 case 4: return VK_FORMAT_R32_UINT
;
878 case 8: return VK_FORMAT_R32G32_UINT
;
879 case 16:return VK_FORMAT_R32G32B32A32_UINT
;
881 unreachable("unhandled format size");
886 case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM
:
887 if (aspect_mask
== VK_IMAGE_ASPECT_PLANE_1_BIT
)
888 return VK_FORMAT_R8G8_UNORM
;
890 case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM
:
891 return VK_FORMAT_R8_UNORM
;
892 case VK_FORMAT_D24_UNORM_S8_UINT
:
893 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
&& copy_buffer
)
894 return VK_FORMAT_R8_UNORM
;
898 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
899 return VK_FORMAT_R32_UINT
;
900 case VK_FORMAT_D32_SFLOAT_S8_UINT
:
901 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
902 return VK_FORMAT_S8_UINT
;
903 assert(aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
);
904 return VK_FORMAT_D32_SFLOAT
;
909 tu_image_view_copy_blit(struct tu_image_view
*iview
,
910 struct tu_image
*image
,
912 const VkImageSubresourceLayers
*subres
,
916 VkImageAspectFlags aspect_mask
= subres
->aspectMask
;
918 /* always use the AS_R8G8B8A8 format for these */
919 if (format
== VK_FORMAT_D24_UNORM_S8_UINT
||
920 format
== VK_FORMAT_X8_D24_UNORM_PACK32
) {
921 aspect_mask
= VK_IMAGE_ASPECT_COLOR_BIT
;
924 tu_image_view_init(iview
, &(VkImageViewCreateInfo
) {
925 .image
= tu_image_to_handle(image
),
926 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
928 /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
929 .components
.r
= stencil_read
? VK_COMPONENT_SWIZZLE_A
: VK_COMPONENT_SWIZZLE_R
,
930 .subresourceRange
= {
931 .aspectMask
= aspect_mask
,
932 .baseMipLevel
= subres
->mipLevel
,
934 .baseArrayLayer
= subres
->baseArrayLayer
+ layer
,
941 tu_image_view_copy(struct tu_image_view
*iview
,
942 struct tu_image
*image
,
944 const VkImageSubresourceLayers
*subres
,
948 format
= copy_format(format
, subres
->aspectMask
, false);
949 tu_image_view_copy_blit(iview
, image
, format
, subres
, layer
, stencil_read
);
953 tu_image_view_blit(struct tu_image_view
*iview
,
954 struct tu_image
*image
,
955 const VkImageSubresourceLayers
*subres
,
958 tu_image_view_copy_blit(iview
, image
, image
->vk_format
, subres
, layer
, false);
962 tu6_blit_image(struct tu_cmd_buffer
*cmd
,
963 struct tu_image
*src_image
,
964 struct tu_image
*dst_image
,
965 const VkImageBlit
*info
,
968 const struct blit_ops
*ops
= &r2d_ops
;
969 struct tu_cs
*cs
= &cmd
->cs
;
972 /* 2D blit can't do rotation mirroring from just coordinates */
973 static const enum a6xx_rotation rotate
[2][2] = {
974 {ROTATE_0
, ROTATE_HFLIP
},
975 {ROTATE_VFLIP
, ROTATE_180
},
978 bool mirror_x
= (info
->srcOffsets
[1].x
< info
->srcOffsets
[0].x
) !=
979 (info
->dstOffsets
[1].x
< info
->dstOffsets
[0].x
);
980 bool mirror_y
= (info
->srcOffsets
[1].y
< info
->srcOffsets
[0].y
) !=
981 (info
->dstOffsets
[1].y
< info
->dstOffsets
[0].y
);
982 bool mirror_z
= (info
->srcOffsets
[1].z
< info
->srcOffsets
[0].z
) !=
983 (info
->dstOffsets
[1].z
< info
->dstOffsets
[0].z
);
986 tu_finishme("blit z mirror\n");
990 if (info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
!=
991 info
->dstOffsets
[1].z
- info
->dstOffsets
[0].z
) {
992 tu_finishme("blit z filter\n");
996 layers
= info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
;
997 if (info
->dstSubresource
.layerCount
> 1) {
999 layers
= info
->dstSubresource
.layerCount
;
1002 /* BC1_RGB_* formats need to have their last components overriden with 1
1003 * when sampling, which is normally handled with the texture descriptor
1004 * swizzle. The 2d path can't handle that, so use the 3d path.
1006 * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with
1010 if (dst_image
->samples
> 1 ||
1011 src_image
->vk_format
== VK_FORMAT_BC1_RGB_UNORM_BLOCK
||
1012 src_image
->vk_format
== VK_FORMAT_BC1_RGB_SRGB_BLOCK
||
1013 filter
== VK_FILTER_CUBIC_EXT
)
1016 /* use the right format in setup() for D32_S8
1017 * TODO: this probably should use a helper
1019 VkFormat format
= dst_image
->vk_format
;
1020 if (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
1021 if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
1022 format
= VK_FORMAT_D32_SFLOAT
;
1023 else if (info
->dstSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
1024 format
= VK_FORMAT_S8_UINT
;
1026 unreachable("unexpected D32_S8 aspect mask in blit_image");
1029 ops
->setup(cmd
, cs
, format
, info
->dstSubresource
.aspectMask
,
1030 rotate
[mirror_y
][mirror_x
], false, dst_image
->layout
[0].ubwc
);
1032 if (ops
== &r3d_ops
) {
1033 r3d_coords_raw(cs
, (float[]) {
1034 info
->dstOffsets
[0].x
, info
->dstOffsets
[0].y
,
1035 info
->srcOffsets
[0].x
, info
->srcOffsets
[0].y
,
1036 info
->dstOffsets
[1].x
, info
->dstOffsets
[1].y
,
1037 info
->srcOffsets
[1].x
, info
->srcOffsets
[1].y
1041 A6XX_GRAS_2D_DST_TL(.x
= MIN2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
),
1042 .y
= MIN2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
)),
1043 A6XX_GRAS_2D_DST_BR(.x
= MAX2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
) - 1,
1044 .y
= MAX2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
) - 1));
1046 A6XX_GRAS_2D_SRC_TL_X(MIN2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
)),
1047 A6XX_GRAS_2D_SRC_BR_X(MAX2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
) - 1),
1048 A6XX_GRAS_2D_SRC_TL_Y(MIN2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
)),
1049 A6XX_GRAS_2D_SRC_BR_Y(MAX2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
) - 1));
1052 struct tu_image_view dst
, src
;
1053 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffsets
[0].z
);
1054 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffsets
[0].z
);
1056 for (uint32_t i
= 0; i
< layers
; i
++) {
1057 ops
->dst(cs
, &dst
, i
);
1058 ops
->src(cmd
, cs
, &src
, i
, filter
);
1062 ops
->teardown(cmd
, cs
);
1066 tu_CmdBlitImage(VkCommandBuffer commandBuffer
,
1068 VkImageLayout srcImageLayout
,
1070 VkImageLayout dstImageLayout
,
1071 uint32_t regionCount
,
1072 const VkImageBlit
*pRegions
,
1076 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1077 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1078 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1080 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1081 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1083 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1084 /* can't blit both depth and stencil at once with D32_S8
1085 * TODO: more advanced 3D blit path to support it instead?
1087 if (src_image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
1088 dst_image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
1089 VkImageBlit region
= pRegions
[i
];
1091 for_each_bit(b
, pRegions
[i
].dstSubresource
.aspectMask
) {
1092 region
.srcSubresource
.aspectMask
= BIT(b
);
1093 region
.dstSubresource
.aspectMask
= BIT(b
);
1094 tu6_blit_image(cmd
, src_image
, dst_image
, ®ion
, filter
);
1098 tu6_blit_image(cmd
, src_image
, dst_image
, pRegions
+ i
, filter
);
1103 copy_compressed(VkFormat format
,
1109 if (!vk_format_is_compressed(format
))
1112 uint32_t block_width
= vk_format_get_blockwidth(format
);
1113 uint32_t block_height
= vk_format_get_blockheight(format
);
1115 offset
->x
/= block_width
;
1116 offset
->y
/= block_height
;
1119 extent
->width
= DIV_ROUND_UP(extent
->width
, block_width
);
1120 extent
->height
= DIV_ROUND_UP(extent
->height
, block_height
);
1123 *width
= DIV_ROUND_UP(*width
, block_width
);
1125 *height
= DIV_ROUND_UP(*height
, block_height
);
1129 tu_copy_buffer_to_image(struct tu_cmd_buffer
*cmd
,
1130 struct tu_buffer
*src_buffer
,
1131 struct tu_image
*dst_image
,
1132 const VkBufferImageCopy
*info
)
1134 struct tu_cs
*cs
= &cmd
->cs
;
1135 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1136 VkFormat src_format
=
1137 copy_format(dst_image
->vk_format
, info
->imageSubresource
.aspectMask
, true);
1138 const struct blit_ops
*ops
= &r2d_ops
;
1140 /* special case for buffer to stencil */
1141 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1142 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1146 /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format,
1147 * which matters for UBWC. buffer_to_image/etc can fail because of this
1150 VkOffset3D offset
= info
->imageOffset
;
1151 VkExtent3D extent
= info
->imageExtent
;
1152 uint32_t src_width
= info
->bufferRowLength
?: extent
.width
;
1153 uint32_t src_height
= info
->bufferImageHeight
?: extent
.height
;
1155 copy_compressed(dst_image
->vk_format
, &offset
, &extent
, &src_width
, &src_height
);
1157 uint32_t pitch
= src_width
* vk_format_get_blocksize(src_format
);
1158 uint32_t layer_size
= src_height
* pitch
;
1161 copy_format(dst_image
->vk_format
, info
->imageSubresource
.aspectMask
, false),
1162 info
->imageSubresource
.aspectMask
, ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1164 struct tu_image_view dst
;
1165 tu_image_view_copy(&dst
, dst_image
, dst_image
->vk_format
, &info
->imageSubresource
, offset
.z
, false);
1167 for (uint32_t i
= 0; i
< layers
; i
++) {
1168 ops
->dst(cs
, &dst
, i
);
1170 uint64_t src_va
= tu_buffer_iova(src_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1171 if ((src_va
& 63) || (pitch
& 63)) {
1172 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1173 uint32_t x
= (src_va
& 63) / vk_format_get_blocksize(src_format
);
1174 ops
->src_buffer(cmd
, cs
, src_format
, src_va
& ~63, pitch
,
1175 x
+ extent
.width
, 1);
1176 ops
->coords(cs
, &(VkOffset2D
){offset
.x
, offset
.y
+ y
}, &(VkOffset2D
){x
},
1177 &(VkExtent2D
) {extent
.width
, 1});
1182 ops
->src_buffer(cmd
, cs
, src_format
, src_va
, pitch
, extent
.width
, extent
.height
);
1183 coords(ops
, cs
, &offset
, &(VkOffset3D
){}, &extent
);
1188 ops
->teardown(cmd
, cs
);
1192 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer
,
1195 VkImageLayout dstImageLayout
,
1196 uint32_t regionCount
,
1197 const VkBufferImageCopy
*pRegions
)
1199 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1200 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1201 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1203 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1204 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1206 for (unsigned i
= 0; i
< regionCount
; ++i
)
1207 tu_copy_buffer_to_image(cmd
, src_buffer
, dst_image
, pRegions
+ i
);
1211 tu_copy_image_to_buffer(struct tu_cmd_buffer
*cmd
,
1212 struct tu_image
*src_image
,
1213 struct tu_buffer
*dst_buffer
,
1214 const VkBufferImageCopy
*info
)
1216 struct tu_cs
*cs
= &cmd
->cs
;
1217 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1218 VkFormat dst_format
=
1219 copy_format(src_image
->vk_format
, info
->imageSubresource
.aspectMask
, true);
1220 bool stencil_read
= false;
1222 if (src_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1223 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1224 stencil_read
= true;
1227 const struct blit_ops
*ops
= stencil_read
? &r3d_ops
: &r2d_ops
;
1228 VkOffset3D offset
= info
->imageOffset
;
1229 VkExtent3D extent
= info
->imageExtent
;
1230 uint32_t dst_width
= info
->bufferRowLength
?: extent
.width
;
1231 uint32_t dst_height
= info
->bufferImageHeight
?: extent
.height
;
1233 copy_compressed(src_image
->vk_format
, &offset
, &extent
, &dst_width
, &dst_height
);
1235 uint32_t pitch
= dst_width
* vk_format_get_blocksize(dst_format
);
1236 uint32_t layer_size
= pitch
* dst_height
;
1238 ops
->setup(cmd
, cs
, dst_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, false);
1240 struct tu_image_view src
;
1241 tu_image_view_copy(&src
, src_image
, src_image
->vk_format
, &info
->imageSubresource
, offset
.z
, stencil_read
);
1243 for (uint32_t i
= 0; i
< layers
; i
++) {
1244 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1246 uint64_t dst_va
= tu_buffer_iova(dst_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1247 if ((dst_va
& 63) || (pitch
& 63)) {
1248 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1249 uint32_t x
= (dst_va
& 63) / vk_format_get_blocksize(dst_format
);
1250 ops
->dst_buffer(cs
, dst_format
, dst_va
& ~63, 0);
1251 ops
->coords(cs
, &(VkOffset2D
) {x
}, &(VkOffset2D
){offset
.x
, offset
.y
+ y
},
1252 &(VkExtent2D
) {extent
.width
, 1});
1257 ops
->dst_buffer(cs
, dst_format
, dst_va
, pitch
);
1258 coords(ops
, cs
, &(VkOffset3D
) {0, 0}, &offset
, &extent
);
1263 ops
->teardown(cmd
, cs
);
1267 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer
,
1269 VkImageLayout srcImageLayout
,
1271 uint32_t regionCount
,
1272 const VkBufferImageCopy
*pRegions
)
1274 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1275 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1276 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1278 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1279 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1281 for (unsigned i
= 0; i
< regionCount
; ++i
)
1282 tu_copy_image_to_buffer(cmd
, src_image
, dst_buffer
, pRegions
+ i
);
1285 /* Tiled formats don't support swapping, which means that we can't support
1286 * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
1287 * formats like B5G5R5A1 have a separate linear-only format when sampling.
1288 * Currently we fake support for tiled swapped formats and use the unswapped
1289 * format instead, but this means that reinterpreting copies to and from
1290 * swapped formats can't be performed correctly unless we can swizzle the
1291 * components by reinterpreting the other image as the "correct" swapped
1292 * format, i.e. only when the other image is linear.
1296 is_swapped_format(VkFormat format
)
1298 struct tu_native_format linear
= tu6_format_texture(format
, TILE6_LINEAR
);
1299 struct tu_native_format tiled
= tu6_format_texture(format
, TILE6_3
);
1300 return linear
.fmt
!= tiled
.fmt
|| linear
.swap
!= tiled
.swap
;
1303 /* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
1304 * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
1305 * versa). This should mirror the logic in fdl6_layout.
1308 image_is_r8g8(struct tu_image
*image
)
1310 return image
->layout
[0].cpp
== 2 &&
1311 vk_format_get_nr_components(image
->vk_format
) == 2;
1315 tu_copy_image_to_image(struct tu_cmd_buffer
*cmd
,
1316 struct tu_image
*src_image
,
1317 struct tu_image
*dst_image
,
1318 const VkImageCopy
*info
)
1320 const struct blit_ops
*ops
= &r2d_ops
;
1321 struct tu_cs
*cs
= &cmd
->cs
;
1323 if (dst_image
->samples
> 1)
1326 VkFormat format
= VK_FORMAT_UNDEFINED
;
1327 VkOffset3D src_offset
= info
->srcOffset
;
1328 VkOffset3D dst_offset
= info
->dstOffset
;
1329 VkExtent3D extent
= info
->extent
;
1331 /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
1334 * When copying between compressed and uncompressed formats the extent
1335 * members represent the texel dimensions of the source image and not
1336 * the destination. When copying from a compressed image to an
1337 * uncompressed image the image texel dimensions written to the
1338 * uncompressed image will be source extent divided by the compressed
1339 * texel block dimensions. When copying from an uncompressed image to a
1340 * compressed image the image texel dimensions written to the compressed
1341 * image will be the source extent multiplied by the compressed texel
1344 * This means we only have to adjust the extent if the source image is
1347 copy_compressed(src_image
->vk_format
, &src_offset
, &extent
, NULL
, NULL
);
1348 copy_compressed(dst_image
->vk_format
, &dst_offset
, NULL
, NULL
, NULL
);
1350 VkFormat dst_format
= copy_format(dst_image
->vk_format
, info
->dstSubresource
.aspectMask
, false);
1351 VkFormat src_format
= copy_format(src_image
->vk_format
, info
->srcSubresource
.aspectMask
, false);
1353 bool use_staging_blit
= false;
1355 if (src_format
== dst_format
) {
1356 /* Images that share a format can always be copied directly because it's
1357 * the same as a blit.
1359 format
= src_format
;
1360 } else if (!src_image
->layout
[0].tile_mode
) {
1361 /* If an image is linear, we can always safely reinterpret it with the
1362 * other image's format and then do a regular blit.
1364 format
= dst_format
;
1365 } else if (!dst_image
->layout
[0].tile_mode
) {
1366 format
= src_format
;
1367 } else if (image_is_r8g8(src_image
) != image_is_r8g8(dst_image
)) {
1368 /* We can't currently copy r8g8 images to/from other cpp=2 images,
1369 * due to the different tile layout.
1371 use_staging_blit
= true;
1372 } else if (is_swapped_format(src_format
) ||
1373 is_swapped_format(dst_format
)) {
1374 /* If either format has a non-identity swap, then we can't copy
1377 use_staging_blit
= true;
1378 } else if (!src_image
->layout
[0].ubwc
) {
1379 format
= dst_format
;
1380 } else if (!dst_image
->layout
[0].ubwc
) {
1381 format
= src_format
;
1383 /* Both formats use UBWC and so neither can be reinterpreted.
1384 * TODO: We could do an in-place decompression of the dst instead.
1386 use_staging_blit
= true;
1389 struct tu_image_view dst
, src
;
1391 if (use_staging_blit
) {
1392 tu_image_view_copy(&dst
, dst_image
, dst_format
, &info
->dstSubresource
, dst_offset
.z
, false);
1393 tu_image_view_copy(&src
, src_image
, src_format
, &info
->srcSubresource
, src_offset
.z
, false);
1395 struct tu_image staging_image
= {
1396 .vk_format
= src_format
,
1397 .type
= src_image
->type
,
1398 .tiling
= VK_IMAGE_TILING_LINEAR
,
1401 .layer_count
= info
->srcSubresource
.layerCount
,
1402 .samples
= src_image
->samples
,
1406 VkImageSubresourceLayers staging_subresource
= {
1407 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
1409 .baseArrayLayer
= 0,
1410 .layerCount
= info
->srcSubresource
.layerCount
,
1413 VkOffset3D staging_offset
= { 0 };
1415 staging_image
.layout
[0].tile_mode
= TILE6_LINEAR
;
1416 staging_image
.layout
[0].ubwc
= false;
1418 fdl6_layout(&staging_image
.layout
[0],
1419 vk_format_to_pipe_format(staging_image
.vk_format
),
1420 staging_image
.samples
,
1421 staging_image
.extent
.width
,
1422 staging_image
.extent
.height
,
1423 staging_image
.extent
.depth
,
1424 staging_image
.level_count
,
1425 staging_image
.layer_count
,
1426 staging_image
.type
== VK_IMAGE_TYPE_3D
,
1429 VkResult result
= tu_get_scratch_bo(cmd
->device
,
1430 staging_image
.layout
[0].size
,
1432 if (result
!= VK_SUCCESS
) {
1433 cmd
->record_result
= result
;
1437 tu_bo_list_add(&cmd
->bo_list
, staging_image
.bo
,
1438 MSM_SUBMIT_BO_READ
| MSM_SUBMIT_BO_WRITE
);
1440 struct tu_image_view staging
;
1441 tu_image_view_copy(&staging
, &staging_image
, src_format
,
1442 &staging_subresource
, 0, false);
1444 ops
->setup(cmd
, cs
, src_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, false);
1445 coords(ops
, cs
, &staging_offset
, &src_offset
, &extent
);
1447 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1448 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1449 ops
->dst(cs
, &staging
, i
);
1453 /* When executed by the user there has to be a pipeline barrier here,
1454 * but since we're doing it manually we'll have to flush ourselves.
1456 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1457 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
1459 tu_image_view_copy(&staging
, &staging_image
, dst_format
,
1460 &staging_subresource
, 0, false);
1462 ops
->setup(cmd
, cs
, dst_format
, info
->dstSubresource
.aspectMask
,
1463 ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1464 coords(ops
, cs
, &dst_offset
, &staging_offset
, &extent
);
1466 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1467 ops
->src(cmd
, cs
, &staging
, i
, VK_FILTER_NEAREST
);
1468 ops
->dst(cs
, &dst
, i
);
1472 tu_image_view_copy(&dst
, dst_image
, format
, &info
->dstSubresource
, dst_offset
.z
, false);
1473 tu_image_view_copy(&src
, src_image
, format
, &info
->srcSubresource
, src_offset
.z
, false);
1475 ops
->setup(cmd
, cs
, format
, info
->dstSubresource
.aspectMask
,
1476 ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1477 coords(ops
, cs
, &dst_offset
, &src_offset
, &extent
);
1479 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1480 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1481 ops
->dst(cs
, &dst
, i
);
1486 ops
->teardown(cmd
, cs
);
1490 tu_CmdCopyImage(VkCommandBuffer commandBuffer
,
1492 VkImageLayout srcImageLayout
,
1494 VkImageLayout destImageLayout
,
1495 uint32_t regionCount
,
1496 const VkImageCopy
*pRegions
)
1498 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1499 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1500 TU_FROM_HANDLE(tu_image
, dst_image
, destImage
);
1502 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1503 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1505 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1506 tu_copy_image_to_image(cmd
, src_image
, dst_image
, pRegions
+ i
);
1510 copy_buffer(struct tu_cmd_buffer
*cmd
,
1514 uint32_t block_size
)
1516 const struct blit_ops
*ops
= &r2d_ops
;
1517 struct tu_cs
*cs
= &cmd
->cs
;
1518 VkFormat format
= block_size
== 4 ? VK_FORMAT_R32_UINT
: VK_FORMAT_R8_UNORM
;
1519 uint64_t blocks
= size
/ block_size
;
1521 ops
->setup(cmd
, cs
, format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, false);
1524 uint32_t src_x
= (src_va
& 63) / block_size
;
1525 uint32_t dst_x
= (dst_va
& 63) / block_size
;
1526 uint32_t width
= MIN2(MIN2(blocks
, 0x4000 - src_x
), 0x4000 - dst_x
);
1528 ops
->src_buffer(cmd
, cs
, format
, src_va
& ~63, 0, src_x
+ width
, 1);
1529 ops
->dst_buffer( cs
, format
, dst_va
& ~63, 0);
1530 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, &(VkOffset2D
) {src_x
}, &(VkExtent2D
) {width
, 1});
1533 src_va
+= width
* block_size
;
1534 dst_va
+= width
* block_size
;
1538 ops
->teardown(cmd
, cs
);
1542 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer
,
1545 uint32_t regionCount
,
1546 const VkBufferCopy
*pRegions
)
1548 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1549 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1550 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1552 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1553 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1555 for (unsigned i
= 0; i
< regionCount
; ++i
) {
1557 tu_buffer_iova(dst_buffer
) + pRegions
[i
].dstOffset
,
1558 tu_buffer_iova(src_buffer
) + pRegions
[i
].srcOffset
,
1559 pRegions
[i
].size
, 1);
1564 tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer
,
1566 VkDeviceSize dstOffset
,
1567 VkDeviceSize dataSize
,
1570 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1571 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1573 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1575 struct tu_cs_memory tmp
;
1576 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, DIV_ROUND_UP(dataSize
, 64), 64, &tmp
);
1577 if (result
!= VK_SUCCESS
) {
1578 cmd
->record_result
= result
;
1582 memcpy(tmp
.map
, pData
, dataSize
);
1583 copy_buffer(cmd
, tu_buffer_iova(buffer
) + dstOffset
, tmp
.iova
, dataSize
, 4);
1587 tu_CmdFillBuffer(VkCommandBuffer commandBuffer
,
1589 VkDeviceSize dstOffset
,
1590 VkDeviceSize fillSize
,
1593 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1594 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1595 const struct blit_ops
*ops
= &r2d_ops
;
1596 struct tu_cs
*cs
= &cmd
->cs
;
1598 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1600 if (fillSize
== VK_WHOLE_SIZE
)
1601 fillSize
= buffer
->size
- dstOffset
;
1603 uint64_t dst_va
= tu_buffer_iova(buffer
) + dstOffset
;
1604 uint32_t blocks
= fillSize
/ 4;
1606 ops
->setup(cmd
, cs
, VK_FORMAT_R32_UINT
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, true, false);
1607 ops
->clear_value(cs
, VK_FORMAT_R32_UINT
, &(VkClearValue
){.color
= {.uint32
[0] = data
}});
1610 uint32_t dst_x
= (dst_va
& 63) / 4;
1611 uint32_t width
= MIN2(blocks
, 0x4000 - dst_x
);
1613 ops
->dst_buffer(cs
, VK_FORMAT_R32_UINT
, dst_va
& ~63, 0);
1614 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, NULL
, &(VkExtent2D
) {width
, 1});
1617 dst_va
+= width
* 4;
1621 ops
->teardown(cmd
, cs
);
1625 tu_CmdResolveImage(VkCommandBuffer commandBuffer
,
1627 VkImageLayout srcImageLayout
,
1629 VkImageLayout dstImageLayout
,
1630 uint32_t regionCount
,
1631 const VkImageResolve
*pRegions
)
1633 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1634 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1635 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1636 const struct blit_ops
*ops
= &r2d_ops
;
1637 struct tu_cs
*cs
= &cmd
->cs
;
1639 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1640 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1642 ops
->setup(cmd
, cs
, dst_image
->vk_format
, VK_IMAGE_ASPECT_COLOR_BIT
,
1643 ROTATE_0
, false, dst_image
->layout
[0].ubwc
);
1645 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1646 const VkImageResolve
*info
= &pRegions
[i
];
1647 uint32_t layers
= MAX2(info
->extent
.depth
, info
->dstSubresource
.layerCount
);
1649 assert(info
->srcSubresource
.layerCount
== info
->dstSubresource
.layerCount
);
1650 /* TODO: aspect masks possible ? */
1652 coords(ops
, cs
, &info
->dstOffset
, &info
->srcOffset
, &info
->extent
);
1654 struct tu_image_view dst
, src
;
1655 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffset
.z
);
1656 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffset
.z
);
1658 for (uint32_t i
= 0; i
< layers
; i
++) {
1659 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1660 ops
->dst(cs
, &dst
, i
);
1665 ops
->teardown(cmd
, cs
);
1669 tu_resolve_sysmem(struct tu_cmd_buffer
*cmd
,
1671 struct tu_image_view
*src
,
1672 struct tu_image_view
*dst
,
1674 const VkRect2D
*rect
)
1676 const struct blit_ops
*ops
= &r2d_ops
;
1678 tu_bo_list_add(&cmd
->bo_list
, src
->image
->bo
, MSM_SUBMIT_BO_READ
);
1679 tu_bo_list_add(&cmd
->bo_list
, dst
->image
->bo
, MSM_SUBMIT_BO_WRITE
);
1681 assert(src
->image
->vk_format
== dst
->image
->vk_format
);
1683 ops
->setup(cmd
, cs
, dst
->image
->vk_format
, VK_IMAGE_ASPECT_COLOR_BIT
,
1684 ROTATE_0
, false, dst
->ubwc_enabled
);
1685 ops
->coords(cs
, &rect
->offset
, &rect
->offset
, &rect
->extent
);
1687 for (uint32_t i
= 0; i
< layers
; i
++) {
1688 ops
->src(cmd
, cs
, src
, i
, VK_FILTER_NEAREST
);
1689 ops
->dst(cs
, dst
, i
);
1693 ops
->teardown(cmd
, cs
);
1697 clear_image(struct tu_cmd_buffer
*cmd
,
1698 struct tu_image
*image
,
1699 const VkClearValue
*clear_value
,
1700 const VkImageSubresourceRange
*range
,
1701 VkImageAspectFlags aspect_mask
)
1703 uint32_t level_count
= tu_get_levelCount(image
, range
);
1704 uint32_t layer_count
= tu_get_layerCount(image
, range
);
1705 struct tu_cs
*cs
= &cmd
->cs
;
1706 VkFormat format
= image
->vk_format
;
1707 if (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
|| format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1708 format
= copy_format(format
, aspect_mask
, false);
1710 if (image
->type
== VK_IMAGE_TYPE_3D
) {
1711 assert(layer_count
== 1);
1712 assert(range
->baseArrayLayer
== 0);
1715 const struct blit_ops
*ops
= image
->samples
> 1 ? &r3d_ops
: &r2d_ops
;
1717 ops
->setup(cmd
, cs
, format
, aspect_mask
, ROTATE_0
, true, image
->layout
[0].ubwc
);
1718 if (image
->vk_format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1719 ops
->clear_value(cs
, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
, clear_value
);
1721 ops
->clear_value(cs
, format
, clear_value
);
1723 for (unsigned j
= 0; j
< level_count
; j
++) {
1724 if (image
->type
== VK_IMAGE_TYPE_3D
)
1725 layer_count
= u_minify(image
->extent
.depth
, range
->baseMipLevel
+ j
);
1727 ops
->coords(cs
, &(VkOffset2D
){}, NULL
, &(VkExtent2D
) {
1728 u_minify(image
->extent
.width
, range
->baseMipLevel
+ j
),
1729 u_minify(image
->extent
.height
, range
->baseMipLevel
+ j
)
1732 struct tu_image_view dst
;
1733 tu_image_view_copy_blit(&dst
, image
, format
, &(VkImageSubresourceLayers
) {
1734 .aspectMask
= aspect_mask
,
1735 .mipLevel
= range
->baseMipLevel
+ j
,
1736 .baseArrayLayer
= range
->baseArrayLayer
,
1740 for (uint32_t i
= 0; i
< layer_count
; i
++) {
1741 ops
->dst(cs
, &dst
, i
);
1746 ops
->teardown(cmd
, cs
);
1750 tu_CmdClearColorImage(VkCommandBuffer commandBuffer
,
1752 VkImageLayout imageLayout
,
1753 const VkClearColorValue
*pColor
,
1754 uint32_t rangeCount
,
1755 const VkImageSubresourceRange
*pRanges
)
1757 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1758 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1760 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1762 for (unsigned i
= 0; i
< rangeCount
; i
++)
1763 clear_image(cmd
, image
, (const VkClearValue
*) pColor
, pRanges
+ i
, VK_IMAGE_ASPECT_COLOR_BIT
);
1767 tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer
,
1769 VkImageLayout imageLayout
,
1770 const VkClearDepthStencilValue
*pDepthStencil
,
1771 uint32_t rangeCount
,
1772 const VkImageSubresourceRange
*pRanges
)
1774 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1775 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1777 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1779 for (unsigned i
= 0; i
< rangeCount
; i
++) {
1780 const VkImageSubresourceRange
*range
= &pRanges
[i
];
1782 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
1783 /* can't clear both depth and stencil at once, split up the aspect mask */
1785 for_each_bit(b
, range
->aspectMask
)
1786 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, range
, BIT(b
));
1790 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, range
, range
->aspectMask
);
1795 tu_clear_sysmem_attachments(struct tu_cmd_buffer
*cmd
,
1796 uint32_t attachment_count
,
1797 const VkClearAttachment
*attachments
,
1798 uint32_t rect_count
,
1799 const VkClearRect
*rects
)
1801 /* the shader path here is special, it avoids changing MRT/etc state */
1802 const struct tu_render_pass
*pass
= cmd
->state
.pass
;
1803 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1804 const uint32_t mrt_count
= subpass
->color_count
;
1805 struct tu_cs
*cs
= &cmd
->draw_cs
;
1806 uint32_t clear_value
[MAX_RTS
][4];
1807 float z_clear_val
= 0.0f
;
1808 uint8_t s_clear_val
= 0;
1809 uint32_t clear_rts
= 0, clear_components
= 0, num_rts
= 0, b
;
1810 bool z_clear
= false;
1811 bool s_clear
= false;
1812 bool layered_clear
= false;
1813 uint32_t max_samples
= 1;
1815 for (uint32_t i
= 0; i
< attachment_count
; i
++) {
1817 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1818 uint32_t c
= attachments
[i
].colorAttachment
;
1819 a
= subpass
->color_attachments
[c
].attachment
;
1820 if (a
== VK_ATTACHMENT_UNUSED
)
1823 clear_rts
|= 1 << c
;
1824 clear_components
|= 0xf << (c
* 4);
1825 memcpy(clear_value
[c
], &attachments
[i
].clearValue
, 4 * sizeof(uint32_t));
1827 a
= subpass
->depth_stencil_attachment
.attachment
;
1828 if (a
== VK_ATTACHMENT_UNUSED
)
1831 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
1833 z_clear_val
= attachments
[i
].clearValue
.depthStencil
.depth
;
1836 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
1838 s_clear_val
= attachments
[i
].clearValue
.depthStencil
.stencil
& 0xff;
1842 max_samples
= MAX2(max_samples
, pass
->attachments
[a
].samples
);
1845 /* disable all draw states so they don't interfere
1846 * TODO: use and re-use draw states
1847 * we have to disable draw states individually to preserve
1848 * input attachment states, because a secondary command buffer
1849 * won't be able to restore them
1851 tu_cs_emit_pkt7(cs
, CP_SET_DRAW_STATE
, 3 * (TU_DRAW_STATE_COUNT
- 2));
1852 for (uint32_t i
= 0; i
< TU_DRAW_STATE_COUNT
; i
++) {
1853 if (i
== TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM
||
1854 i
== TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM
)
1856 tu_cs_emit(cs
, CP_SET_DRAW_STATE__0_GROUP_ID(i
) |
1857 CP_SET_DRAW_STATE__0_DISABLE
);
1858 tu_cs_emit_qw(cs
, 0);
1860 cmd
->state
.dirty
|= TU_CMD_DIRTY_DRAW_STATE
;
1862 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
1863 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
1864 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
1866 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count
));
1868 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count
);
1869 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1870 if (clear_rts
& (1 << i
))
1871 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts
++ * 4));
1876 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1877 if (rects
[i
].baseArrayLayer
|| rects
[i
].layerCount
> 1)
1878 layered_clear
= true;
1881 r3d_common(cmd
, cs
, false, num_rts
, layered_clear
);
1884 A6XX_SP_FS_RENDER_COMPONENTS(.dword
= clear_components
));
1886 A6XX_RB_RENDER_COMPONENTS(.dword
= clear_components
));
1889 A6XX_RB_FS_OUTPUT_CNTL0(),
1890 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= mrt_count
));
1892 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
1893 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.independent_blend
= 1, .sample_mask
= 0xffff));
1894 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
1895 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1896 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(i
,
1897 .component_enable
= COND(clear_rts
& (1 << i
), 0xf)));
1900 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
1901 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL(
1902 .z_enable
= z_clear
,
1903 .z_write_enable
= z_clear
,
1904 .zfunc
= FUNC_ALWAYS
));
1905 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
1906 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL(
1907 .stencil_enable
= s_clear
,
1908 .func
= FUNC_ALWAYS
,
1909 .zpass
= STENCIL_REPLACE
));
1910 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK(.mask
= 0xff));
1911 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK(.wrmask
= 0xff));
1912 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF(.ref
= s_clear_val
));
1914 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4 * num_rts
);
1915 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
1916 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
1917 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
1918 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
1919 CP_LOAD_STATE6_0_NUM_UNIT(num_rts
));
1920 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1921 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1922 for_each_bit(b
, clear_rts
)
1923 tu_cs_emit_array(cs
, clear_value
[b
], 4);
1925 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1926 for (uint32_t layer
= 0; layer
< rects
[i
].layerCount
; layer
++) {
1927 r3d_coords_raw(cs
, (float[]) {
1928 rects
[i
].rect
.offset
.x
, rects
[i
].rect
.offset
.y
,
1929 z_clear_val
, uif(rects
[i
].baseArrayLayer
+ layer
),
1930 rects
[i
].rect
.offset
.x
+ rects
[i
].rect
.extent
.width
,
1931 rects
[i
].rect
.offset
.y
+ rects
[i
].rect
.extent
.height
,
1940 pack_gmem_clear_value(const VkClearValue
*val
, VkFormat format
, uint32_t clear_value
[4])
1943 case VK_FORMAT_X8_D24_UNORM_PACK32
:
1944 case VK_FORMAT_D24_UNORM_S8_UINT
:
1945 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24) |
1946 val
->depthStencil
.stencil
<< 24;
1948 case VK_FORMAT_D16_UNORM
:
1949 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 16);
1951 case VK_FORMAT_D32_SFLOAT
:
1952 clear_value
[0] = fui(val
->depthStencil
.depth
);
1954 case VK_FORMAT_S8_UINT
:
1955 clear_value
[0] = val
->depthStencil
.stencil
;
1962 memcpy(tmp
, val
->color
.float32
, 4 * sizeof(float));
1963 if (vk_format_is_srgb(format
)) {
1964 for (int i
= 0; i
< 4; i
++)
1965 tmp
[i
] = util_format_linear_to_srgb_float(tmp
[i
]);
1968 #define PACK_F(type) util_format_##type##_pack_rgba_float \
1969 ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
1970 switch (vk_format_get_component_bits(format
, UTIL_FORMAT_COLORSPACE_RGB
, PIPE_SWIZZLE_X
)) {
1972 PACK_F(r4g4b4a4_unorm
);
1975 if (vk_format_get_component_bits(format
, UTIL_FORMAT_COLORSPACE_RGB
, PIPE_SWIZZLE_Y
) == 6)
1976 PACK_F(r5g6b5_unorm
);
1978 PACK_F(r5g5b5a1_unorm
);
1981 if (vk_format_is_snorm(format
))
1982 PACK_F(r8g8b8a8_snorm
);
1983 else if (vk_format_is_unorm(format
))
1984 PACK_F(r8g8b8a8_unorm
);
1986 pack_int8(clear_value
, val
->color
.uint32
);
1989 if (vk_format_is_int(format
))
1990 pack_int10_2(clear_value
, val
->color
.uint32
);
1992 PACK_F(r10g10b10a2_unorm
);
1995 clear_value
[0] = float3_to_r11g11b10f(val
->color
.float32
);
1998 if (vk_format_is_snorm(format
))
1999 PACK_F(r16g16b16a16_snorm
);
2000 else if (vk_format_is_unorm(format
))
2001 PACK_F(r16g16b16a16_unorm
);
2002 else if (vk_format_is_float(format
))
2003 PACK_F(r16g16b16a16_float
);
2005 pack_int16(clear_value
, val
->color
.uint32
);
2008 memcpy(clear_value
, val
->color
.float32
, 4 * sizeof(float));
2011 unreachable("unexpected channel size");
2017 clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2021 uint32_t gmem_offset
,
2022 const VkClearValue
*value
)
2024 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 1);
2025 tu_cs_emit(cs
, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format
)));
2027 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(.gmem
= 1, .clear_mask
= clear_mask
));
2029 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_BASE_GMEM
, 1);
2030 tu_cs_emit(cs
, gmem_offset
);
2032 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_88D0
, 1);
2035 uint32_t clear_vals
[4] = {};
2036 pack_gmem_clear_value(value
, format
, clear_vals
);
2038 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0
, 4);
2039 tu_cs_emit_array(cs
, clear_vals
, 4);
2041 tu6_emit_event_write(cmd
, cs
, BLIT
);
2045 tu_emit_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2047 uint32_t attachment
,
2048 VkImageAspectFlags mask
,
2049 const VkClearValue
*value
)
2051 const struct tu_render_pass_attachment
*att
=
2052 &cmd
->state
.pass
->attachments
[attachment
];
2054 if (att
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
2055 if (mask
& VK_IMAGE_ASPECT_DEPTH_BIT
)
2056 clear_gmem_attachment(cmd
, cs
, VK_FORMAT_D32_SFLOAT
, 0xf, att
->gmem_offset
, value
);
2057 if (mask
& VK_IMAGE_ASPECT_STENCIL_BIT
)
2058 clear_gmem_attachment(cmd
, cs
, VK_FORMAT_S8_UINT
, 0xf, att
->gmem_offset_stencil
, value
);
2062 clear_gmem_attachment(cmd
, cs
, att
->format
, aspect_write_mask(att
->format
, mask
), att
->gmem_offset
, value
);
2066 tu_clear_gmem_attachments(struct tu_cmd_buffer
*cmd
,
2067 uint32_t attachment_count
,
2068 const VkClearAttachment
*attachments
,
2069 uint32_t rect_count
,
2070 const VkClearRect
*rects
)
2072 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
2073 struct tu_cs
*cs
= &cmd
->draw_cs
;
2075 /* TODO: swap the loops for smaller cmdstream */
2076 for (unsigned i
= 0; i
< rect_count
; i
++) {
2077 unsigned x1
= rects
[i
].rect
.offset
.x
;
2078 unsigned y1
= rects
[i
].rect
.offset
.y
;
2079 unsigned x2
= x1
+ rects
[i
].rect
.extent
.width
- 1;
2080 unsigned y2
= y1
+ rects
[i
].rect
.extent
.height
- 1;
2082 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_SCISSOR_TL
, 2);
2083 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_TL_X(x1
) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1
));
2084 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_BR_X(x2
) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2
));
2086 for (unsigned j
= 0; j
< attachment_count
; j
++) {
2088 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
)
2089 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
2091 a
= subpass
->depth_stencil_attachment
.attachment
;
2093 if (a
== VK_ATTACHMENT_UNUSED
)
2096 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, attachments
[j
].aspectMask
,
2097 &attachments
[j
].clearValue
);
2103 tu_CmdClearAttachments(VkCommandBuffer commandBuffer
,
2104 uint32_t attachmentCount
,
2105 const VkClearAttachment
*pAttachments
,
2107 const VkClearRect
*pRects
)
2109 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
2110 struct tu_cs
*cs
= &cmd
->draw_cs
;
2112 /* sysmem path behaves like a draw, note we don't have a way of using different
2113 * flushes for sysmem/gmem, so this needs to be outside of the cond_exec
2115 tu_emit_cache_flush_renderpass(cmd
, cs
);
2117 /* vkCmdClearAttachments is supposed to respect the predicate if active.
2118 * The easiest way to do this is to always use the 3d path, which always
2119 * works even with GMEM because it's just a simple draw using the existing
2120 * attachment state. However it seems that IGNORE_VISIBILITY draws must be
2121 * skipped in the binning pass, since otherwise they produce binning data
2122 * which isn't consumed and leads to the wrong binning data being read, so
2123 * condition on GMEM | SYSMEM.
2125 if (cmd
->state
.predication_active
) {
2126 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
|
2127 CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2128 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2129 tu_cond_exec_end(cs
);
2133 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
);
2134 tu_clear_gmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2135 tu_cond_exec_end(cs
);
2137 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2138 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2139 tu_cond_exec_end(cs
);
2143 clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2146 VkImageAspectFlags clear_mask
,
2147 const VkRenderPassBeginInfo
*info
,
2149 bool separate_stencil
)
2151 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2152 const struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2153 const struct blit_ops
*ops
= &r2d_ops
;
2154 if (cmd
->state
.pass
->attachments
[a
].samples
> 1)
2157 ops
->setup(cmd
, cs
, format
, clear_mask
, ROTATE_0
, true, iview
->ubwc_enabled
);
2158 ops
->coords(cs
, &info
->renderArea
.offset
, NULL
, &info
->renderArea
.extent
);
2159 ops
->clear_value(cs
, format
, &info
->pClearValues
[a
]);
2161 for (uint32_t i
= 0; i
< fb
->layers
; i
++) {
2162 if (separate_stencil
) {
2163 if (ops
== &r3d_ops
)
2164 r3d_dst_stencil(cs
, iview
, i
);
2166 r2d_dst_stencil(cs
, iview
, i
);
2168 ops
->dst(cs
, iview
, i
);
2173 ops
->teardown(cmd
, cs
);
2177 tu_clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2180 const VkRenderPassBeginInfo
*info
)
2182 const struct tu_render_pass_attachment
*attachment
=
2183 &cmd
->state
.pass
->attachments
[a
];
2185 if (!attachment
->clear_mask
)
2188 /* Wait for any flushes at the beginning of the renderpass to complete */
2191 if (attachment
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
) {
2192 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
2193 clear_sysmem_attachment(cmd
, cs
, VK_FORMAT_D32_SFLOAT
, VK_IMAGE_ASPECT_COLOR_BIT
,
2196 if (attachment
->clear_mask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
2197 clear_sysmem_attachment(cmd
, cs
, VK_FORMAT_S8_UINT
, VK_IMAGE_ASPECT_COLOR_BIT
,
2201 clear_sysmem_attachment(cmd
, cs
, attachment
->format
, attachment
->clear_mask
,
2205 /* The spec doesn't explicitly say, but presumably the initial renderpass
2206 * clear is considered part of the renderpass, and therefore barriers
2207 * aren't required inside the subpass/renderpass. Therefore we need to
2208 * flush CCU color into CCU depth here, just like with
2209 * vkCmdClearAttachments(). Note that because this only happens at the
2210 * beginning of a renderpass, and renderpass writes are considered
2211 * "incoherent", we shouldn't have to worry about syncing depth into color
2212 * beforehand as depth should already be flushed.
2214 if (vk_format_is_depth_or_stencil(attachment
->format
)) {
2215 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2216 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
);
2218 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2219 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
2224 tu_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2227 const VkRenderPassBeginInfo
*info
)
2229 const struct tu_render_pass_attachment
*attachment
=
2230 &cmd
->state
.pass
->attachments
[a
];
2232 if (!attachment
->clear_mask
)
2235 tu_cs_emit_regs(cs
, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2237 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, attachment
->clear_mask
,
2238 &info
->pClearValues
[a
]);
2242 tu_emit_blit(struct tu_cmd_buffer
*cmd
,
2244 const struct tu_image_view
*iview
,
2245 const struct tu_render_pass_attachment
*attachment
,
2247 bool separate_stencil
)
2250 A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2252 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(
2255 /* "integer" bit disables msaa resolve averaging */
2256 .integer
= vk_format_is_int(attachment
->format
)));
2258 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 4);
2259 if (separate_stencil
) {
2260 tu_cs_emit(cs
, tu_image_view_stencil(iview
, RB_BLIT_DST_INFO
) & ~A6XX_RB_BLIT_DST_INFO_FLAGS
);
2261 tu_cs_emit_qw(cs
, iview
->stencil_base_addr
);
2262 tu_cs_emit(cs
, iview
->stencil_PITCH
);
2265 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset_stencil
));
2267 tu_cs_emit(cs
, iview
->RB_BLIT_DST_INFO
);
2268 tu_cs_image_ref_2d(cs
, iview
, 0, false);
2270 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_FLAG_DST_LO
, 3);
2271 tu_cs_image_flag_ref(cs
, iview
, 0);
2274 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset
));
2277 tu6_emit_event_write(cmd
, cs
, BLIT
);
2281 blit_can_resolve(VkFormat format
)
2283 const struct util_format_description
*desc
= vk_format_description(format
);
2285 /* blit event can only do resolve for simple cases:
2286 * averaging samples as unsigned integers or choosing only one sample
2288 if (vk_format_is_snorm(format
) || vk_format_is_srgb(format
))
2291 /* can't do formats with larger channel sizes
2292 * note: this includes all float formats
2293 * note2: single channel integer formats seem OK
2295 if (desc
->channel
[0].size
> 10)
2299 /* for unknown reasons blit event can't msaa resolve these formats when tiled
2300 * likely related to these formats having different layout from other cpp=2 formats
2302 case VK_FORMAT_R8G8_UNORM
:
2303 case VK_FORMAT_R8G8_UINT
:
2304 case VK_FORMAT_R8G8_SINT
:
2305 /* TODO: this one should be able to work? */
2306 case VK_FORMAT_D24_UNORM_S8_UINT
:
2316 tu_load_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2321 const struct tu_image_view
*iview
=
2322 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2323 const struct tu_render_pass_attachment
*attachment
=
2324 &cmd
->state
.pass
->attachments
[a
];
2326 if (attachment
->load
|| force_load
)
2327 tu_emit_blit(cmd
, cs
, iview
, attachment
, false, false);
2329 if (attachment
->load_stencil
|| (attachment
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
&& force_load
))
2330 tu_emit_blit(cmd
, cs
, iview
, attachment
, false, true);
2334 store_cp_blit(struct tu_cmd_buffer
*cmd
,
2336 struct tu_image_view
*iview
,
2338 bool separate_stencil
,
2340 uint32_t gmem_offset
,
2343 r2d_setup_common(cmd
, cs
, format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false,
2344 iview
->ubwc_enabled
, true);
2345 if (separate_stencil
)
2346 r2d_dst_stencil(cs
, iview
, 0);
2348 r2d_dst(cs
, iview
, 0);
2351 A6XX_SP_PS_2D_SRC_INFO(
2352 .color_format
= tu6_format_texture(format
, TILE6_2
).fmt
,
2353 .tile_mode
= TILE6_2
,
2354 .srgb
= vk_format_is_srgb(format
),
2355 .samples
= tu_msaa_samples(samples
),
2356 .samples_average
= !vk_format_is_int(format
),
2359 /* note: src size does not matter when not scaling */
2360 A6XX_SP_PS_2D_SRC_SIZE( .width
= 0x3fff, .height
= 0x3fff),
2361 A6XX_SP_PS_2D_SRC_LO(cmd
->device
->physical_device
->gmem_base
+ gmem_offset
),
2362 A6XX_SP_PS_2D_SRC_HI(),
2363 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= cmd
->state
.framebuffer
->tile0
.width
* cpp
));
2365 /* sync GMEM writes with CACHE. */
2366 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
2368 /* Wait for CACHE_INVALIDATE to land */
2371 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
2372 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
2374 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
2375 * sysmem, and we generally assume that GMEM renderpasses leave their
2376 * results in sysmem, so we need to flush manually here.
2378 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2382 tu_store_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2387 const VkRect2D
*render_area
= &cmd
->state
.render_area
;
2388 struct tu_render_pass_attachment
*dst
= &cmd
->state
.pass
->attachments
[a
];
2389 struct tu_image_view
*iview
= cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2390 struct tu_render_pass_attachment
*src
= &cmd
->state
.pass
->attachments
[gmem_a
];
2392 if (!dst
->store
&& !dst
->store_stencil
)
2395 uint32_t x1
= render_area
->offset
.x
;
2396 uint32_t y1
= render_area
->offset
.y
;
2397 uint32_t x2
= x1
+ render_area
->extent
.width
;
2398 uint32_t y2
= y1
+ render_area
->extent
.height
;
2399 /* x2/y2 can be unaligned if equal to the size of the image,
2400 * since it will write into padding space
2401 * the one exception is linear levels which don't have the
2402 * required y padding in the layout (except for the last level)
2404 bool need_y2_align
=
2405 y2
!= iview
->extent
.height
|| iview
->need_y2_align
;
2408 x1
% GMEM_ALIGN_W
|| (x2
% GMEM_ALIGN_W
&& x2
!= iview
->extent
.width
) ||
2409 y1
% GMEM_ALIGN_H
|| (y2
% GMEM_ALIGN_H
&& need_y2_align
);
2411 /* use fast path when render area is aligned, except for unsupported resolve cases */
2412 if (!unaligned
&& (a
== gmem_a
|| blit_can_resolve(dst
->format
))) {
2414 tu_emit_blit(cmd
, cs
, iview
, src
, true, false);
2415 if (dst
->store_stencil
)
2416 tu_emit_blit(cmd
, cs
, iview
, src
, true, true);
2420 if (dst
->samples
> 1) {
2421 /* I guess we need to use shader path in this case?
2422 * need a testcase which fails because of this
2424 tu_finishme("unaligned store of msaa attachment\n");
2428 r2d_coords(cs
, &render_area
->offset
, &render_area
->offset
, &render_area
->extent
);
2430 VkFormat format
= src
->format
;
2431 if (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
2432 format
= VK_FORMAT_D32_SFLOAT
;
2435 store_cp_blit(cmd
, cs
, iview
, src
->samples
, false, format
,
2436 src
->gmem_offset
, src
->cpp
);
2438 if (dst
->store_stencil
) {
2439 store_cp_blit(cmd
, cs
, iview
, src
->samples
, true, VK_FORMAT_S8_UINT
,
2440 src
->gmem_offset_stencil
, src
->samples
);