2 * Copyright 2019-2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
6 * Jonathan Marek <jonathan@marek.ca>
9 #include "tu_private.h"
12 #include "vk_format.h"
14 #include "util/format_r11g11b10f.h"
15 #include "util/format_rgb9e5.h"
16 #include "util/format_srgb.h"
17 #include "util/u_half.h"
20 tu_pack_float32_for_unorm(float val
, int bits
)
22 return _mesa_lroundevenf(CLAMP(val
, 0.0f
, 1.0f
) * (float) ((1 << bits
) - 1));
25 /* r2d_ = BLIT_OP_SCALE operations */
27 static enum a6xx_2d_ifmt
28 format_to_ifmt(enum a6xx_format fmt
)
36 case FMT6_8_8_8_8_UNORM
:
37 case FMT6_8_8_8_X8_UNORM
:
38 case FMT6_8_8_8_8_SNORM
:
39 case FMT6_4_4_4_4_UNORM
:
40 case FMT6_5_5_5_1_UNORM
:
41 case FMT6_5_6_5_UNORM
:
42 case FMT6_Z24_UNORM_S8_UINT
:
43 case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
:
50 case FMT6_32_32_32_32_UINT
:
51 case FMT6_32_32_32_32_SINT
:
58 case FMT6_16_16_16_16_UINT
:
59 case FMT6_16_16_16_16_SINT
:
60 case FMT6_10_10_10_2_UINT
:
67 case FMT6_8_8_8_8_UINT
:
68 case FMT6_8_8_8_8_SINT
:
73 case FMT6_16_16_UNORM
:
74 case FMT6_16_16_SNORM
:
75 case FMT6_16_16_16_16_UNORM
:
76 case FMT6_16_16_16_16_SNORM
:
78 case FMT6_32_32_FLOAT
:
79 case FMT6_32_32_32_32_FLOAT
:
83 case FMT6_16_16_FLOAT
:
84 case FMT6_16_16_16_16_FLOAT
:
85 case FMT6_11_11_10_FLOAT
:
86 case FMT6_10_10_10_2_UNORM
:
87 case FMT6_10_10_10_2_UNORM_DEST
:
91 unreachable("bad format");
97 r2d_coords(struct tu_cs
*cs
,
98 const VkOffset2D
*dst
,
99 const VkOffset2D
*src
,
100 const VkExtent2D
*extent
)
103 A6XX_GRAS_2D_DST_TL(.x
= dst
->x
, .y
= dst
->y
),
104 A6XX_GRAS_2D_DST_BR(.x
= dst
->x
+ extent
->width
- 1, .y
= dst
->y
+ extent
->height
- 1));
110 A6XX_GRAS_2D_SRC_TL_X(.x
= src
->x
),
111 A6XX_GRAS_2D_SRC_BR_X(.x
= src
->x
+ extent
->width
- 1),
112 A6XX_GRAS_2D_SRC_TL_Y(.y
= src
->y
),
113 A6XX_GRAS_2D_SRC_BR_Y(.y
= src
->y
+ extent
->height
- 1));
117 r2d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
119 uint32_t clear_value
[4] = {};
122 case VK_FORMAT_X8_D24_UNORM_PACK32
:
123 case VK_FORMAT_D24_UNORM_S8_UINT
:
124 /* cleared as r8g8b8a8_unorm using special format */
125 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
126 clear_value
[1] = clear_value
[0] >> 8;
127 clear_value
[2] = clear_value
[0] >> 16;
128 clear_value
[3] = val
->depthStencil
.stencil
;
130 case VK_FORMAT_D16_UNORM
:
131 case VK_FORMAT_D32_SFLOAT
:
133 clear_value
[0] = fui(val
->depthStencil
.depth
);
135 case VK_FORMAT_S8_UINT
:
136 clear_value
[0] = val
->depthStencil
.stencil
;
138 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
139 /* cleared as UINT32 */
140 clear_value
[0] = float3_to_rgb9e5(val
->color
.float32
);
143 assert(!vk_format_is_depth_or_stencil(format
));
144 const struct util_format_description
*desc
= vk_format_description(format
);
145 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(tu6_base_format(format
));
147 assert(desc
&& (desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
||
148 format
== VK_FORMAT_B10G11R11_UFLOAT_PACK32
));
150 for (unsigned i
= 0; i
< desc
->nr_channels
; i
++) {
151 const struct util_format_channel_description
*ch
= &desc
->channel
[i
];
152 if (ifmt
== R2D_UNORM8
) {
153 float linear
= val
->color
.float32
[i
];
154 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
&& i
< 3)
155 linear
= util_format_linear_to_srgb_float(val
->color
.float32
[i
]);
157 if (ch
->type
== UTIL_FORMAT_TYPE_SIGNED
)
158 clear_value
[i
] = _mesa_lroundevenf(CLAMP(linear
, -1.0f
, 1.0f
) * 127.0f
);
160 clear_value
[i
] = tu_pack_float32_for_unorm(linear
, 8);
161 } else if (ifmt
== R2D_FLOAT16
) {
162 clear_value
[i
] = util_float_to_half(val
->color
.float32
[i
]);
164 assert(ifmt
== R2D_FLOAT32
|| ifmt
== R2D_INT32
||
165 ifmt
== R2D_INT16
|| ifmt
== R2D_INT8
);
166 clear_value
[i
] = val
->color
.uint32
[i
];
172 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_SRC_SOLID_C0
, 4);
173 tu_cs_emit_array(cs
, clear_value
, 4);
177 r2d_src(struct tu_cmd_buffer
*cmd
,
179 const struct tu_image_view
*iview
,
183 uint32_t src_info
= iview
->SP_PS_2D_SRC_INFO
;
184 if (filter
!= VK_FILTER_NEAREST
)
185 src_info
|= A6XX_SP_PS_2D_SRC_INFO_FILTER
;
187 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_INFO
, 5);
188 tu_cs_emit(cs
, src_info
);
189 tu_cs_emit(cs
, iview
->SP_PS_2D_SRC_SIZE
);
190 tu_cs_image_ref_2d(cs
, iview
, layer
, true);
192 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO
, 3);
193 tu_cs_image_flag_ref(cs
, iview
, layer
);
197 r2d_src_buffer(struct tu_cmd_buffer
*cmd
,
200 uint64_t va
, uint32_t pitch
,
201 uint32_t width
, uint32_t height
)
203 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
206 A6XX_SP_PS_2D_SRC_INFO(
207 .color_format
= format
.fmt
,
208 .color_swap
= format
.swap
,
209 .srgb
= vk_format_is_srgb(vk_format
),
212 A6XX_SP_PS_2D_SRC_SIZE(.width
= width
, .height
= height
),
213 A6XX_SP_PS_2D_SRC_LO((uint32_t) va
),
214 A6XX_SP_PS_2D_SRC_HI(va
>> 32),
215 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= pitch
));
219 r2d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
221 assert(iview
->image
->samples
== 1);
223 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_INFO
, 4);
224 tu_cs_emit(cs
, iview
->RB_2D_DST_INFO
);
225 tu_cs_image_ref_2d(cs
, iview
, layer
, false);
227 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_DST_FLAGS_LO
, 3);
228 tu_cs_image_flag_ref(cs
, iview
, layer
);
232 r2d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
234 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
238 .color_format
= format
.fmt
,
239 .color_swap
= format
.swap
,
240 .srgb
= vk_format_is_srgb(vk_format
)),
241 A6XX_RB_2D_DST_LO((uint32_t) va
),
242 A6XX_RB_2D_DST_HI(va
>> 32),
243 A6XX_RB_2D_DST_SIZE(.pitch
= pitch
));
247 r2d_setup_common(struct tu_cmd_buffer
*cmd
,
250 VkImageAspectFlags aspect_mask
,
251 enum a6xx_rotation rotation
,
255 enum a6xx_format format
= tu6_base_format(vk_format
);
256 enum a6xx_2d_ifmt ifmt
= format_to_ifmt(format
);
257 uint32_t unknown_8c01
= 0;
259 /* note: the only format with partial clearing is D24S8 */
260 if (vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
261 /* preserve stencil channel */
262 if (aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
263 unknown_8c01
= 0x08000041;
264 /* preserve depth channels */
265 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
266 unknown_8c01
= 0x00084001;
269 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_8C01
, 1);
270 tu_cs_emit(cs
, unknown_8c01
);
272 uint32_t blit_cntl
= A6XX_RB_2D_BLIT_CNTL(
275 .solid_color
= clear
,
276 .d24s8
= format
== FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8
&& !clear
,
277 .color_format
= format
,
279 .ifmt
= vk_format_is_srgb(vk_format
) ? R2D_UNORM8_SRGB
: ifmt
,
282 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_2D_BLIT_CNTL
, 1);
283 tu_cs_emit(cs
, blit_cntl
);
285 tu_cs_emit_pkt4(cs
, REG_A6XX_GRAS_2D_BLIT_CNTL
, 1);
286 tu_cs_emit(cs
, blit_cntl
);
288 if (format
== FMT6_10_10_10_2_UNORM_DEST
)
289 format
= FMT6_16_16_16_16_FLOAT
;
291 tu_cs_emit_regs(cs
, A6XX_SP_2D_SRC_FORMAT(
292 .sint
= vk_format_is_sint(vk_format
),
293 .uint
= vk_format_is_uint(vk_format
),
294 .color_format
= format
,
295 .srgb
= vk_format_is_srgb(vk_format
),
300 r2d_setup(struct tu_cmd_buffer
*cmd
,
303 VkImageAspectFlags aspect_mask
,
304 enum a6xx_rotation rotation
,
307 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
309 r2d_setup_common(cmd
, cs
, vk_format
, aspect_mask
, rotation
, clear
, false);
313 r2d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
315 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
316 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
319 /* r3d_ = shader path operations */
322 tu_init_clear_blit_shaders(struct tu6_global
*global
)
324 #define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } }
325 #define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } }
326 #define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } }
328 static const instr_t vs_code
[] = {
329 /* r0.xyz = r0.w ? c1.xyz : c0.xyz
330 * r1.xy = r0.w ? c1.zw : c0.zw
333 CAT3(OPC_SEL_B32
, .repeat
= 2, .dst
= 0,
334 .c1
= {.src1_c
= 1, .src1
= 4}, .src1_r
= 1,
336 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 0}),
337 CAT3(OPC_SEL_B32
, .repeat
= 1, .dst
= 4,
338 .c1
= {.src1_c
= 1, .src1
= 6}, .src1_r
= 1,
340 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 2}),
341 MOV(.dst
= 3, .src_im
= 1, .fim_val
= 1.0f
),
342 { .cat0
= { .opc
= OPC_END
} },
345 static const instr_t vs_layered
[] = {
346 { .cat0
= { .opc
= OPC_CHMASK
} },
347 { .cat0
= { .opc
= OPC_CHSH
} },
350 static const instr_t gs_code
[] = {
351 /* (sy)(ss)(nop3)shr.b r0.w, r0.x, 16 (extract local_id) */
352 CAT2(OPC_SHR_B
, .dst
= 3, .src1
= 0, .src2_im
= 1, .src2
= 16,
353 .src1_r
= 1, .src2_r
= 1, .ss
= 1, .sync
= 1),
354 /* x = (local_id & 1) ? c1.x : c0.x */
355 CAT2(OPC_AND_B
, .dst
= 0, .src1
= 3, .src2_im
= 1, .src2
= 1),
356 /* y = (local_id & 2) ? c1.y : c0.y */
357 CAT2(OPC_AND_B
, .dst
= 1, .src1
= 3, .src2_im
= 1, .src2
= 2),
358 /* pred = (local_id >= 4), used by OPC_KILL */
359 CAT2(OPC_CMPS_S
, .dst
= REG_P0
* 4, .cond
= IR3_COND_GE
, .src1
= 3, .src2_im
= 1, .src2
= 4),
360 /* vertex_flags_out = (local_id == 0) ? 4 : 0 - first vertex flag */
361 CAT2(OPC_CMPS_S
, .dst
= 4, .cond
= IR3_COND_EQ
, .src1
= 3, .src2_im
= 1, .src2
= 0),
363 MOV(.dst
= 2, .src_c
= 1, .src
= 2), /* depth clear value from c0.z */
364 MOV(.dst
= 3, .src_im
= 1, .fim_val
= 1.0f
),
365 MOV(.dst
= 5, .src_c
= 1, .src
= 3), /* layer id from c0.w */
367 /* (rpt1)sel.b32 r0.x, (r)c1.x, (r)r0.x, (r)c0.x */
368 CAT3(OPC_SEL_B32
, .repeat
= 1, .dst
= 0,
369 .c1
= {.src1_c
= 1, .src1
= 4, .dummy
= 4}, .src1_r
= 1,
371 .c2
= {.src3_c
= 1, .dummy
= 1, .src3
= 0}),
373 CAT2(OPC_SHL_B
, .dst
= 4, .src1
= 4, .src2_im
= 1, .src2
= 2),
375 { .cat0
= { .opc
= OPC_KILL
} },
376 { .cat0
= { .opc
= OPC_END
, .ss
= 1, .sync
= 1 } },
379 static const instr_t fs_blit
[] = {
380 /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
381 * blit path (its not clear what allows it to not have it)
383 CAT2(OPC_BARY_F
, .ei
= 1, .full
= 1, .dst
= 63 * 4, .src1_im
= 1),
384 { .cat0
= { .opc
= OPC_END
} },
387 memcpy(&global
->shaders
[GLOBAL_SH_VS
], vs_code
, sizeof(vs_code
));
388 memcpy(&global
->shaders
[GLOBAL_SH_VS_LAYER
], vs_layered
, sizeof(vs_layered
));
389 memcpy(&global
->shaders
[GLOBAL_SH_GS_LAYER
], gs_code
, sizeof(gs_code
));
390 memcpy(&global
->shaders
[GLOBAL_SH_FS_BLIT
], fs_blit
, sizeof(fs_blit
));
392 for (uint32_t num_rts
= 0; num_rts
<= MAX_RTS
; num_rts
++) {
393 instr_t
*code
= global
->shaders
[GLOBAL_SH_FS_CLEAR0
+ num_rts
];
394 for (uint32_t i
= 0; i
< num_rts
; i
++) {
395 /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
396 *code
++ = (instr_t
) MOV(.repeat
= 3, .dst
= i
* 4, .src_c
= 1, .src_r
= 1, .src
= i
* 4);
398 *code
++ = (instr_t
) { .cat0
= { .opc
= OPC_END
} };
403 r3d_common(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
, bool blit
, uint32_t num_rts
,
406 struct ir3_const_state dummy_const_state
= {};
407 struct ir3_shader dummy_shader
= {};
409 struct ir3_shader_variant vs
= {
410 .type
= MESA_SHADER_VERTEX
,
416 .slot
= SYSTEM_VALUE_VERTEX_ID
,
417 .regid
= regid(0, 3),
420 .outputs_count
= blit
? 2 : 1,
422 .slot
= VARYING_SLOT_POS
,
423 .regid
= regid(0, 0),
426 .slot
= VARYING_SLOT_VAR0
,
427 .regid
= regid(1, 0),
429 .shader
= &dummy_shader
,
430 .const_state
= &dummy_const_state
,
433 vs
= (struct ir3_shader_variant
) {
434 .type
= MESA_SHADER_VERTEX
,
437 .shader
= &dummy_shader
,
438 .const_state
= &dummy_const_state
,
442 struct ir3_shader_variant fs
= {
443 .type
= MESA_SHADER_FRAGMENT
,
444 .instrlen
= 1, /* max of 9 instructions with num_rts = 8 */
445 .constlen
= align(num_rts
, 4),
446 .info
.max_reg
= MAX2(num_rts
, 1) - 1,
447 .total_in
= blit
? 2 : 0,
448 .num_samp
= blit
? 1 : 0,
449 .inputs_count
= blit
? 2 : 0,
451 .slot
= VARYING_SLOT_VAR0
,
457 .slot
= SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL
,
458 .regid
= regid(0, 0),
461 .num_sampler_prefetch
= blit
? 1 : 0,
462 .sampler_prefetch
[0] = {
467 .shader
= &dummy_shader
,
468 .const_state
= &dummy_const_state
,
471 struct ir3_shader_variant gs_shader
= {
472 .type
= MESA_SHADER_GEOMETRY
,
478 .slot
= SYSTEM_VALUE_GS_HEADER_IR3
,
479 .regid
= regid(0, 0),
484 .slot
= VARYING_SLOT_POS
,
485 .regid
= regid(0, 0),
488 .slot
= VARYING_SLOT_LAYER
,
489 .regid
= regid(1, 1),
492 .slot
= VARYING_SLOT_GS_VERTEX_FLAGS_IR3
,
493 .regid
= regid(1, 0),
495 .shader
= &dummy_shader
,
496 .const_state
= &dummy_const_state
,
497 }, *gs
= layered_clear
? &gs_shader
: NULL
;
500 tu_cs_emit_regs(cs
, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
502 tu6_emit_xs_config(cs
, MESA_SHADER_VERTEX
, &vs
,
503 global_iova(cmd
, shaders
[gs
? GLOBAL_SH_VS_LAYER
: GLOBAL_SH_VS
]));
504 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_CTRL
, NULL
, 0);
505 tu6_emit_xs_config(cs
, MESA_SHADER_TESS_EVAL
, NULL
, 0);
506 tu6_emit_xs_config(cs
, MESA_SHADER_GEOMETRY
, gs
,
507 global_iova(cmd
, shaders
[GLOBAL_SH_GS_LAYER
]));
508 tu6_emit_xs_config(cs
, MESA_SHADER_FRAGMENT
, &fs
,
509 global_iova(cmd
, shaders
[blit
? GLOBAL_SH_FS_BLIT
: (GLOBAL_SH_FS_CLEAR0
+ num_rts
)]));
511 tu_cs_emit_regs(cs
, A6XX_PC_PRIMITIVE_CNTL_0());
512 tu_cs_emit_regs(cs
, A6XX_VFD_CONTROL_0());
514 tu6_emit_vpc(cs
, &vs
, NULL
, NULL
, gs
, &fs
);
516 /* REPL_MODE for varying with RECTLIST (2 vertices only) */
517 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
518 tu_cs_emit_regs(cs
, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0));
520 tu6_emit_fs_inputs(cs
, &fs
);
524 .persp_division_disable
= 1,
525 .vp_xform_disable
= 1,
526 .vp_clip_code_ignore
= 1,
528 A6XX_GRAS_UNKNOWN_8001(0));
529 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
532 A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0(.x
= 0, .y
= 0),
533 A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0(.x
= 0x7fff, .y
= 0x7fff));
535 A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x
= 0, .y
= 0),
536 A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x
= 0x7fff, .y
= 0x7fff));
539 A6XX_VFD_INDEX_OFFSET(),
540 A6XX_VFD_INSTANCE_START_OFFSET());
544 r3d_coords_raw(struct tu_cs
*cs
, bool gs
, const float *coords
)
546 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_GEOM
, 3 + 8);
547 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
548 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
549 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
550 CP_LOAD_STATE6_0_STATE_BLOCK(gs
? SB6_GS_SHADER
: SB6_VS_SHADER
) |
551 CP_LOAD_STATE6_0_NUM_UNIT(2));
552 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
553 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
554 tu_cs_emit_array(cs
, (const uint32_t *) coords
, 8);
558 r3d_coords(struct tu_cs
*cs
,
559 const VkOffset2D
*dst
,
560 const VkOffset2D
*src
,
561 const VkExtent2D
*extent
)
563 int32_t src_x1
= src
? src
->x
: 0;
564 int32_t src_y1
= src
? src
->y
: 0;
565 r3d_coords_raw(cs
, false, (float[]) {
568 dst
->x
+ extent
->width
, dst
->y
+ extent
->height
,
569 src_x1
+ extent
->width
, src_y1
+ extent
->height
,
574 r3d_clear_value(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
)
576 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4);
577 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
578 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
579 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
580 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
581 CP_LOAD_STATE6_0_NUM_UNIT(1));
582 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
583 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
585 case VK_FORMAT_X8_D24_UNORM_PACK32
:
586 case VK_FORMAT_D24_UNORM_S8_UINT
: {
587 /* cleared as r8g8b8a8_unorm using special format */
588 uint32_t tmp
= tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24);
589 tu_cs_emit(cs
, fui((tmp
& 0xff) / 255.0f
));
590 tu_cs_emit(cs
, fui((tmp
>> 8 & 0xff) / 255.0f
));
591 tu_cs_emit(cs
, fui((tmp
>> 16 & 0xff) / 255.0f
));
592 tu_cs_emit(cs
, fui((val
->depthStencil
.stencil
& 0xff) / 255.0f
));
594 case VK_FORMAT_D16_UNORM
:
595 case VK_FORMAT_D32_SFLOAT
:
596 tu_cs_emit(cs
, fui(val
->depthStencil
.depth
));
601 case VK_FORMAT_S8_UINT
:
602 tu_cs_emit(cs
, val
->depthStencil
.stencil
& 0xff);
608 /* as color formats use clear value as-is */
609 assert(!vk_format_is_depth_or_stencil(format
));
610 tu_cs_emit_array(cs
, val
->color
.uint32
, 4);
616 r3d_src_common(struct tu_cmd_buffer
*cmd
,
618 const uint32_t *tex_const
,
619 uint32_t offset_base
,
620 uint32_t offset_ubwc
,
623 struct tu_cs_memory texture
= { };
624 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
,
625 2, /* allocate space for a sampler too */
626 A6XX_TEX_CONST_DWORDS
, &texture
);
627 assert(result
== VK_SUCCESS
);
629 memcpy(texture
.map
, tex_const
, A6XX_TEX_CONST_DWORDS
* 4);
631 /* patch addresses for layer offset */
632 *(uint64_t*) (texture
.map
+ 4) += offset_base
;
633 uint64_t ubwc_addr
= (texture
.map
[7] | (uint64_t) texture
.map
[8] << 32) + offset_ubwc
;
634 texture
.map
[7] = ubwc_addr
;
635 texture
.map
[8] = ubwc_addr
>> 32;
637 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 0] =
638 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter
, false)) |
639 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter
, false)) |
640 A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE
) |
641 A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE
) |
642 A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE
) |
643 0x60000; /* XXX used by blob, doesn't seem necessary */
644 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 1] =
645 0x1 | /* XXX used by blob, doesn't seem necessary */
646 A6XX_TEX_SAMP_1_UNNORM_COORDS
|
647 A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR
;
648 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 2] = 0;
649 texture
.map
[A6XX_TEX_CONST_DWORDS
+ 3] = 0;
651 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
652 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
653 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER
) |
654 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
655 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
656 CP_LOAD_STATE6_0_NUM_UNIT(1));
657 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
659 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_SAMP_LO
, 2);
660 tu_cs_emit_qw(cs
, texture
.iova
+ A6XX_TEX_CONST_DWORDS
* 4);
662 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3);
663 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
664 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
665 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT
) |
666 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX
) |
667 CP_LOAD_STATE6_0_NUM_UNIT(1));
668 tu_cs_emit_qw(cs
, texture
.iova
);
670 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_TEX_CONST_LO
, 2);
671 tu_cs_emit_qw(cs
, texture
.iova
);
673 tu_cs_emit_regs(cs
, A6XX_SP_FS_TEX_COUNT(1));
677 r3d_src(struct tu_cmd_buffer
*cmd
,
679 const struct tu_image_view
*iview
,
683 r3d_src_common(cmd
, cs
, iview
->descriptor
,
684 iview
->layer_size
* layer
,
685 iview
->ubwc_layer_size
* layer
,
690 r3d_src_buffer(struct tu_cmd_buffer
*cmd
,
693 uint64_t va
, uint32_t pitch
,
694 uint32_t width
, uint32_t height
)
696 uint32_t desc
[A6XX_TEX_CONST_DWORDS
];
698 struct tu_native_format format
= tu6_format_texture(vk_format
, TILE6_LINEAR
);
701 COND(vk_format_is_srgb(vk_format
), A6XX_TEX_CONST_0_SRGB
) |
702 A6XX_TEX_CONST_0_FMT(format
.fmt
) |
703 A6XX_TEX_CONST_0_SWAP(format
.swap
) |
704 A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X
) |
705 // XXX to swizzle into .w for stencil buffer_to_image
706 A6XX_TEX_CONST_0_SWIZ_Y(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Y
) |
707 A6XX_TEX_CONST_0_SWIZ_Z(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_Z
) |
708 A6XX_TEX_CONST_0_SWIZ_W(vk_format
== VK_FORMAT_R8_UNORM
? A6XX_TEX_X
: A6XX_TEX_W
);
709 desc
[1] = A6XX_TEX_CONST_1_WIDTH(width
) | A6XX_TEX_CONST_1_HEIGHT(height
);
711 A6XX_TEX_CONST_2_PITCH(pitch
) |
712 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D
);
716 for (uint32_t i
= 6; i
< A6XX_TEX_CONST_DWORDS
; i
++)
719 r3d_src_common(cmd
, cs
, desc
, 0, 0, VK_FILTER_NEAREST
);
723 r3d_dst(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
)
725 tu6_emit_msaa(cs
, iview
->image
->samples
); /* TODO: move to setup */
727 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
728 tu_cs_emit(cs
, iview
->RB_MRT_BUF_INFO
);
729 tu_cs_image_ref(cs
, iview
, layer
);
732 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
733 tu_cs_image_flag_ref(cs
, iview
, layer
);
735 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL(.flag_mrts
= iview
->ubwc_enabled
));
739 r3d_dst_buffer(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
)
741 struct tu_native_format format
= tu6_format_color(vk_format
, TILE6_LINEAR
);
743 tu6_emit_msaa(cs
, 1); /* TODO: move to setup */
746 A6XX_RB_MRT_BUF_INFO(0, .color_format
= format
.fmt
, .color_swap
= format
.swap
),
747 A6XX_RB_MRT_PITCH(0, pitch
),
748 A6XX_RB_MRT_ARRAY_PITCH(0, 0),
749 A6XX_RB_MRT_BASE_LO(0, (uint32_t) va
),
750 A6XX_RB_MRT_BASE_HI(0, va
>> 32),
751 A6XX_RB_MRT_BASE_GMEM(0, 0));
753 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_CNTL());
757 aspect_write_mask(VkFormat vk_format
, VkImageAspectFlags aspect_mask
)
761 /* note: the only format with partial writing is D24S8,
762 * clear/blit uses the _AS_R8G8B8A8 format to access it
764 if (vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
) {
765 if (aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
)
767 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
)
774 r3d_setup(struct tu_cmd_buffer
*cmd
,
777 VkImageAspectFlags aspect_mask
,
778 enum a6xx_rotation rotation
,
781 if (!cmd
->state
.pass
) {
782 tu_emit_cache_flush_ccu(cmd
, cs
, TU_CMD_CCU_SYSMEM
);
783 tu6_emit_window_scissor(cs
, 0, 0, 0x7fff, 0x7fff);
786 tu_cs_emit_regs(cs
, A6XX_GRAS_BIN_CONTROL(.dword
= 0xc00000));
787 tu_cs_emit_regs(cs
, A6XX_RB_BIN_CONTROL(.dword
= 0xc00000));
789 r3d_common(cmd
, cs
, !clear
, clear
? 1 : 0, false);
791 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
792 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
793 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
795 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
797 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
798 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(0));
801 A6XX_RB_FS_OUTPUT_CNTL0(),
802 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= 1));
804 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
805 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.sample_mask
= 0xffff));
806 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
808 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
809 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL());
810 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
811 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL());
812 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK());
813 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK());
814 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF());
816 tu_cs_emit_regs(cs
, A6XX_RB_RENDER_COMPONENTS(.rt0
= 0xf));
817 tu_cs_emit_regs(cs
, A6XX_SP_FS_RENDER_COMPONENTS(.rt0
= 0xf));
819 tu_cs_emit_regs(cs
, A6XX_SP_FS_MRT_REG(0,
820 .color_format
= tu6_base_format(vk_format
),
821 .color_sint
= vk_format_is_sint(vk_format
),
822 .color_uint
= vk_format_is_uint(vk_format
)));
824 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(0,
825 .component_enable
= aspect_write_mask(vk_format
, aspect_mask
)));
826 tu_cs_emit_regs(cs
, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
827 tu_cs_emit_regs(cs
, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format
)));
831 r3d_run(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
)
833 tu_cs_emit_pkt7(cs
, CP_DRAW_INDX_OFFSET
, 3);
834 tu_cs_emit(cs
, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST
) |
835 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX
) |
836 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY
));
837 tu_cs_emit(cs
, 1); /* instance count */
838 tu_cs_emit(cs
, 2); /* vertex count */
841 /* blit ops - common interface for 2d/shader paths */
844 void (*coords
)(struct tu_cs
*cs
,
845 const VkOffset2D
*dst
,
846 const VkOffset2D
*src
,
847 const VkExtent2D
*extent
);
848 void (*clear_value
)(struct tu_cs
*cs
, VkFormat format
, const VkClearValue
*val
);
850 struct tu_cmd_buffer
*cmd
,
852 const struct tu_image_view
*iview
,
855 void (*src_buffer
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
,
857 uint64_t va
, uint32_t pitch
,
858 uint32_t width
, uint32_t height
);
859 void (*dst
)(struct tu_cs
*cs
, const struct tu_image_view
*iview
, uint32_t layer
);
860 void (*dst_buffer
)(struct tu_cs
*cs
, VkFormat vk_format
, uint64_t va
, uint32_t pitch
);
861 void (*setup
)(struct tu_cmd_buffer
*cmd
,
864 VkImageAspectFlags aspect_mask
,
865 enum a6xx_rotation rotation
,
867 void (*run
)(struct tu_cmd_buffer
*cmd
, struct tu_cs
*cs
);
870 static const struct blit_ops r2d_ops
= {
871 .coords
= r2d_coords
,
872 .clear_value
= r2d_clear_value
,
874 .src_buffer
= r2d_src_buffer
,
876 .dst_buffer
= r2d_dst_buffer
,
881 static const struct blit_ops r3d_ops
= {
882 .coords
= r3d_coords
,
883 .clear_value
= r3d_clear_value
,
885 .src_buffer
= r3d_src_buffer
,
887 .dst_buffer
= r3d_dst_buffer
,
892 /* passthrough set coords from 3D extents */
894 coords(const struct blit_ops
*ops
,
896 const VkOffset3D
*dst
,
897 const VkOffset3D
*src
,
898 const VkExtent3D
*extent
)
900 ops
->coords(cs
, (const VkOffset2D
*) dst
, (const VkOffset2D
*) src
, (const VkExtent2D
*) extent
);
904 copy_format(VkFormat format
, VkImageAspectFlags aspect_mask
, bool copy_buffer
)
906 if (vk_format_is_compressed(format
)) {
907 switch (vk_format_get_blocksize(format
)) {
908 case 1: return VK_FORMAT_R8_UINT
;
909 case 2: return VK_FORMAT_R16_UINT
;
910 case 4: return VK_FORMAT_R32_UINT
;
911 case 8: return VK_FORMAT_R32G32_UINT
;
912 case 16:return VK_FORMAT_R32G32B32A32_UINT
;
914 unreachable("unhandled format size");
919 case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM
:
920 if (aspect_mask
== VK_IMAGE_ASPECT_PLANE_1_BIT
)
921 return VK_FORMAT_R8G8_UNORM
;
923 case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM
:
924 return VK_FORMAT_R8_UNORM
;
925 case VK_FORMAT_D24_UNORM_S8_UINT
:
926 if (aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
&& copy_buffer
)
927 return VK_FORMAT_R8_UNORM
;
931 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
:
932 return VK_FORMAT_R32_UINT
;
937 tu_image_view_copy_blit(struct tu_image_view
*iview
,
938 struct tu_image
*image
,
940 const VkImageSubresourceLayers
*subres
,
944 VkImageAspectFlags aspect_mask
= subres
->aspectMask
;
946 /* always use the AS_R8G8B8A8 format for these */
947 if (format
== VK_FORMAT_D24_UNORM_S8_UINT
||
948 format
== VK_FORMAT_X8_D24_UNORM_PACK32
) {
949 aspect_mask
= VK_IMAGE_ASPECT_COLOR_BIT
;
952 tu_image_view_init(iview
, &(VkImageViewCreateInfo
) {
953 .image
= tu_image_to_handle(image
),
954 .viewType
= VK_IMAGE_VIEW_TYPE_2D
,
956 /* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
957 .components
.r
= stencil_read
? VK_COMPONENT_SWIZZLE_A
: VK_COMPONENT_SWIZZLE_R
,
958 .subresourceRange
= {
959 .aspectMask
= aspect_mask
,
960 .baseMipLevel
= subres
->mipLevel
,
962 .baseArrayLayer
= subres
->baseArrayLayer
+ layer
,
969 tu_image_view_copy(struct tu_image_view
*iview
,
970 struct tu_image
*image
,
972 const VkImageSubresourceLayers
*subres
,
976 format
= copy_format(format
, subres
->aspectMask
, false);
977 tu_image_view_copy_blit(iview
, image
, format
, subres
, layer
, stencil_read
);
981 tu_image_view_blit(struct tu_image_view
*iview
,
982 struct tu_image
*image
,
983 const VkImageSubresourceLayers
*subres
,
986 tu_image_view_copy_blit(iview
, image
, image
->vk_format
, subres
, layer
, false);
990 tu6_blit_image(struct tu_cmd_buffer
*cmd
,
991 struct tu_image
*src_image
,
992 struct tu_image
*dst_image
,
993 const VkImageBlit
*info
,
996 const struct blit_ops
*ops
= &r2d_ops
;
997 struct tu_cs
*cs
= &cmd
->cs
;
1000 /* 2D blit can't do rotation mirroring from just coordinates */
1001 static const enum a6xx_rotation rotate
[2][2] = {
1002 {ROTATE_0
, ROTATE_HFLIP
},
1003 {ROTATE_VFLIP
, ROTATE_180
},
1006 bool mirror_x
= (info
->srcOffsets
[1].x
< info
->srcOffsets
[0].x
) !=
1007 (info
->dstOffsets
[1].x
< info
->dstOffsets
[0].x
);
1008 bool mirror_y
= (info
->srcOffsets
[1].y
< info
->srcOffsets
[0].y
) !=
1009 (info
->dstOffsets
[1].y
< info
->dstOffsets
[0].y
);
1010 bool mirror_z
= (info
->srcOffsets
[1].z
< info
->srcOffsets
[0].z
) !=
1011 (info
->dstOffsets
[1].z
< info
->dstOffsets
[0].z
);
1014 tu_finishme("blit z mirror\n");
1018 if (info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
!=
1019 info
->dstOffsets
[1].z
- info
->dstOffsets
[0].z
) {
1020 tu_finishme("blit z filter\n");
1024 layers
= info
->srcOffsets
[1].z
- info
->srcOffsets
[0].z
;
1025 if (info
->dstSubresource
.layerCount
> 1) {
1026 assert(layers
<= 1);
1027 layers
= info
->dstSubresource
.layerCount
;
1030 /* BC1_RGB_* formats need to have their last components overriden with 1
1031 * when sampling, which is normally handled with the texture descriptor
1032 * swizzle. The 2d path can't handle that, so use the 3d path.
1034 * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with
1038 if (dst_image
->samples
> 1 ||
1039 src_image
->vk_format
== VK_FORMAT_BC1_RGB_UNORM_BLOCK
||
1040 src_image
->vk_format
== VK_FORMAT_BC1_RGB_SRGB_BLOCK
||
1041 filter
== VK_FILTER_CUBIC_EXT
)
1044 /* TODO: shader path fails some of blit_image.all_formats.generate_mipmaps.* tests,
1045 * figure out why (should be able to pass all tests with only shader path)
1048 ops
->setup(cmd
, cs
, dst_image
->vk_format
, info
->dstSubresource
.aspectMask
,
1049 rotate
[mirror_y
][mirror_x
], false);
1051 if (ops
== &r3d_ops
) {
1052 r3d_coords_raw(cs
, false, (float[]) {
1053 info
->dstOffsets
[0].x
, info
->dstOffsets
[0].y
,
1054 info
->srcOffsets
[0].x
, info
->srcOffsets
[0].y
,
1055 info
->dstOffsets
[1].x
, info
->dstOffsets
[1].y
,
1056 info
->srcOffsets
[1].x
, info
->srcOffsets
[1].y
1060 A6XX_GRAS_2D_DST_TL(.x
= MIN2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
),
1061 .y
= MIN2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
)),
1062 A6XX_GRAS_2D_DST_BR(.x
= MAX2(info
->dstOffsets
[0].x
, info
->dstOffsets
[1].x
) - 1,
1063 .y
= MAX2(info
->dstOffsets
[0].y
, info
->dstOffsets
[1].y
) - 1));
1065 A6XX_GRAS_2D_SRC_TL_X(.x
= MIN2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
)),
1066 A6XX_GRAS_2D_SRC_BR_X(.x
= MAX2(info
->srcOffsets
[0].x
, info
->srcOffsets
[1].x
) - 1),
1067 A6XX_GRAS_2D_SRC_TL_Y(.y
= MIN2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
)),
1068 A6XX_GRAS_2D_SRC_BR_Y(.y
= MAX2(info
->srcOffsets
[0].y
, info
->srcOffsets
[1].y
) - 1));
1071 struct tu_image_view dst
, src
;
1072 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffsets
[0].z
);
1073 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffsets
[0].z
);
1075 for (uint32_t i
= 0; i
< layers
; i
++) {
1076 ops
->dst(cs
, &dst
, i
);
1077 ops
->src(cmd
, cs
, &src
, i
, filter
);
1083 tu_CmdBlitImage(VkCommandBuffer commandBuffer
,
1085 VkImageLayout srcImageLayout
,
1087 VkImageLayout dstImageLayout
,
1088 uint32_t regionCount
,
1089 const VkImageBlit
*pRegions
,
1093 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1094 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1095 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1097 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1098 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1100 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1101 tu6_blit_image(cmd
, src_image
, dst_image
, pRegions
+ i
, filter
);
1105 copy_compressed(VkFormat format
,
1111 if (!vk_format_is_compressed(format
))
1114 uint32_t block_width
= vk_format_get_blockwidth(format
);
1115 uint32_t block_height
= vk_format_get_blockheight(format
);
1117 offset
->x
/= block_width
;
1118 offset
->y
/= block_height
;
1121 extent
->width
= DIV_ROUND_UP(extent
->width
, block_width
);
1122 extent
->height
= DIV_ROUND_UP(extent
->height
, block_height
);
1125 *width
= DIV_ROUND_UP(*width
, block_width
);
1127 *height
= DIV_ROUND_UP(*height
, block_height
);
1131 tu_copy_buffer_to_image(struct tu_cmd_buffer
*cmd
,
1132 struct tu_buffer
*src_buffer
,
1133 struct tu_image
*dst_image
,
1134 const VkBufferImageCopy
*info
)
1136 struct tu_cs
*cs
= &cmd
->cs
;
1137 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1138 VkFormat src_format
=
1139 copy_format(dst_image
->vk_format
, info
->imageSubresource
.aspectMask
, true);
1140 const struct blit_ops
*ops
= &r2d_ops
;
1142 /* special case for buffer to stencil */
1143 if (dst_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1144 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1148 /* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format,
1149 * which matters for UBWC. buffer_to_image/etc can fail because of this
1152 VkOffset3D offset
= info
->imageOffset
;
1153 VkExtent3D extent
= info
->imageExtent
;
1154 uint32_t src_width
= info
->bufferRowLength
?: extent
.width
;
1155 uint32_t src_height
= info
->bufferImageHeight
?: extent
.height
;
1157 copy_compressed(dst_image
->vk_format
, &offset
, &extent
, &src_width
, &src_height
);
1159 uint32_t pitch
= src_width
* vk_format_get_blocksize(src_format
);
1160 uint32_t layer_size
= src_height
* pitch
;
1163 copy_format(dst_image
->vk_format
, info
->imageSubresource
.aspectMask
, false),
1164 info
->imageSubresource
.aspectMask
, ROTATE_0
, false);
1166 struct tu_image_view dst
;
1167 tu_image_view_copy(&dst
, dst_image
, dst_image
->vk_format
, &info
->imageSubresource
, offset
.z
, false);
1169 for (uint32_t i
= 0; i
< layers
; i
++) {
1170 ops
->dst(cs
, &dst
, i
);
1172 uint64_t src_va
= tu_buffer_iova(src_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1173 if ((src_va
& 63) || (pitch
& 63)) {
1174 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1175 uint32_t x
= (src_va
& 63) / vk_format_get_blocksize(src_format
);
1176 ops
->src_buffer(cmd
, cs
, src_format
, src_va
& ~63, pitch
,
1177 x
+ extent
.width
, 1);
1178 ops
->coords(cs
, &(VkOffset2D
){offset
.x
, offset
.y
+ y
}, &(VkOffset2D
){x
},
1179 &(VkExtent2D
) {extent
.width
, 1});
1184 ops
->src_buffer(cmd
, cs
, src_format
, src_va
, pitch
, extent
.width
, extent
.height
);
1185 coords(ops
, cs
, &offset
, &(VkOffset3D
){}, &extent
);
1192 tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer
,
1195 VkImageLayout dstImageLayout
,
1196 uint32_t regionCount
,
1197 const VkBufferImageCopy
*pRegions
)
1199 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1200 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1201 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1203 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1204 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1206 for (unsigned i
= 0; i
< regionCount
; ++i
)
1207 tu_copy_buffer_to_image(cmd
, src_buffer
, dst_image
, pRegions
+ i
);
1211 tu_copy_image_to_buffer(struct tu_cmd_buffer
*cmd
,
1212 struct tu_image
*src_image
,
1213 struct tu_buffer
*dst_buffer
,
1214 const VkBufferImageCopy
*info
)
1216 struct tu_cs
*cs
= &cmd
->cs
;
1217 uint32_t layers
= MAX2(info
->imageExtent
.depth
, info
->imageSubresource
.layerCount
);
1218 VkFormat dst_format
=
1219 copy_format(src_image
->vk_format
, info
->imageSubresource
.aspectMask
, true);
1220 bool stencil_read
= false;
1222 if (src_image
->vk_format
== VK_FORMAT_D24_UNORM_S8_UINT
&&
1223 info
->imageSubresource
.aspectMask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1224 stencil_read
= true;
1227 const struct blit_ops
*ops
= stencil_read
? &r3d_ops
: &r2d_ops
;
1228 VkOffset3D offset
= info
->imageOffset
;
1229 VkExtent3D extent
= info
->imageExtent
;
1230 uint32_t dst_width
= info
->bufferRowLength
?: extent
.width
;
1231 uint32_t dst_height
= info
->bufferImageHeight
?: extent
.height
;
1233 copy_compressed(src_image
->vk_format
, &offset
, &extent
, &dst_width
, &dst_height
);
1235 uint32_t pitch
= dst_width
* vk_format_get_blocksize(dst_format
);
1236 uint32_t layer_size
= pitch
* dst_height
;
1238 ops
->setup(cmd
, cs
, dst_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false);
1240 struct tu_image_view src
;
1241 tu_image_view_copy(&src
, src_image
, src_image
->vk_format
, &info
->imageSubresource
, offset
.z
, stencil_read
);
1243 for (uint32_t i
= 0; i
< layers
; i
++) {
1244 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1246 uint64_t dst_va
= tu_buffer_iova(dst_buffer
) + info
->bufferOffset
+ layer_size
* i
;
1247 if ((dst_va
& 63) || (pitch
& 63)) {
1248 for (uint32_t y
= 0; y
< extent
.height
; y
++) {
1249 uint32_t x
= (dst_va
& 63) / vk_format_get_blocksize(dst_format
);
1250 ops
->dst_buffer(cs
, dst_format
, dst_va
& ~63, 0);
1251 ops
->coords(cs
, &(VkOffset2D
) {x
}, &(VkOffset2D
){offset
.x
, offset
.y
+ y
},
1252 &(VkExtent2D
) {extent
.width
, 1});
1257 ops
->dst_buffer(cs
, dst_format
, dst_va
, pitch
);
1258 coords(ops
, cs
, &(VkOffset3D
) {0, 0}, &offset
, &extent
);
1265 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer
,
1267 VkImageLayout srcImageLayout
,
1269 uint32_t regionCount
,
1270 const VkBufferImageCopy
*pRegions
)
1272 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1273 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1274 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1276 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1277 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1279 for (unsigned i
= 0; i
< regionCount
; ++i
)
1280 tu_copy_image_to_buffer(cmd
, src_image
, dst_buffer
, pRegions
+ i
);
1283 /* Tiled formats don't support swapping, which means that we can't support
1284 * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
1285 * formats like B5G5R5A1 have a separate linear-only format when sampling.
1286 * Currently we fake support for tiled swapped formats and use the unswapped
1287 * format instead, but this means that reinterpreting copies to and from
1288 * swapped formats can't be performed correctly unless we can swizzle the
1289 * components by reinterpreting the other image as the "correct" swapped
1290 * format, i.e. only when the other image is linear.
1294 is_swapped_format(VkFormat format
)
1296 struct tu_native_format linear
= tu6_format_texture(format
, TILE6_LINEAR
);
1297 struct tu_native_format tiled
= tu6_format_texture(format
, TILE6_3
);
1298 return linear
.fmt
!= tiled
.fmt
|| linear
.swap
!= tiled
.swap
;
1301 /* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
1302 * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
1303 * versa). This should mirror the logic in fdl6_layout.
1306 image_is_r8g8(struct tu_image
*image
)
1308 return image
->layout
[0].cpp
== 2 &&
1309 vk_format_get_nr_components(image
->vk_format
) == 2;
1313 tu_copy_image_to_image(struct tu_cmd_buffer
*cmd
,
1314 struct tu_image
*src_image
,
1315 struct tu_image
*dst_image
,
1316 const VkImageCopy
*info
)
1318 const struct blit_ops
*ops
= &r2d_ops
;
1319 struct tu_cs
*cs
= &cmd
->cs
;
1321 if (dst_image
->samples
> 1)
1324 VkFormat format
= VK_FORMAT_UNDEFINED
;
1325 VkOffset3D src_offset
= info
->srcOffset
;
1326 VkOffset3D dst_offset
= info
->dstOffset
;
1327 VkExtent3D extent
= info
->extent
;
1329 /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
1332 * When copying between compressed and uncompressed formats the extent
1333 * members represent the texel dimensions of the source image and not
1334 * the destination. When copying from a compressed image to an
1335 * uncompressed image the image texel dimensions written to the
1336 * uncompressed image will be source extent divided by the compressed
1337 * texel block dimensions. When copying from an uncompressed image to a
1338 * compressed image the image texel dimensions written to the compressed
1339 * image will be the source extent multiplied by the compressed texel
1342 * This means we only have to adjust the extent if the source image is
1345 copy_compressed(src_image
->vk_format
, &src_offset
, &extent
, NULL
, NULL
);
1346 copy_compressed(dst_image
->vk_format
, &dst_offset
, NULL
, NULL
, NULL
);
1348 VkFormat dst_format
= copy_format(dst_image
->vk_format
, info
->dstSubresource
.aspectMask
, false);
1349 VkFormat src_format
= copy_format(src_image
->vk_format
, info
->srcSubresource
.aspectMask
, false);
1351 bool use_staging_blit
= false;
1353 if (src_format
== dst_format
) {
1354 /* Images that share a format can always be copied directly because it's
1355 * the same as a blit.
1357 format
= src_format
;
1358 } else if (!src_image
->layout
[0].tile_mode
) {
1359 /* If an image is linear, we can always safely reinterpret it with the
1360 * other image's format and then do a regular blit.
1362 format
= dst_format
;
1363 } else if (!dst_image
->layout
[0].tile_mode
) {
1364 format
= src_format
;
1365 } else if (image_is_r8g8(src_image
) != image_is_r8g8(dst_image
)) {
1366 /* We can't currently copy r8g8 images to/from other cpp=2 images,
1367 * due to the different tile layout.
1369 use_staging_blit
= true;
1370 } else if (is_swapped_format(src_format
) ||
1371 is_swapped_format(dst_format
)) {
1372 /* If either format has a non-identity swap, then we can't copy
1375 use_staging_blit
= true;
1376 } else if (!src_image
->layout
[0].ubwc
) {
1377 format
= dst_format
;
1378 } else if (!dst_image
->layout
[0].ubwc
) {
1379 format
= src_format
;
1381 /* Both formats use UBWC and so neither can be reinterpreted.
1382 * TODO: We could do an in-place decompression of the dst instead.
1384 use_staging_blit
= true;
1387 struct tu_image_view dst
, src
;
1389 if (use_staging_blit
) {
1390 tu_image_view_copy(&dst
, dst_image
, dst_format
, &info
->dstSubresource
, dst_offset
.z
, false);
1391 tu_image_view_copy(&src
, src_image
, src_format
, &info
->srcSubresource
, src_offset
.z
, false);
1393 struct tu_image staging_image
= {
1394 .vk_format
= src_format
,
1395 .type
= src_image
->type
,
1396 .tiling
= VK_IMAGE_TILING_LINEAR
,
1399 .layer_count
= info
->srcSubresource
.layerCount
,
1400 .samples
= src_image
->samples
,
1404 VkImageSubresourceLayers staging_subresource
= {
1405 .aspectMask
= VK_IMAGE_ASPECT_COLOR_BIT
,
1407 .baseArrayLayer
= 0,
1408 .layerCount
= info
->srcSubresource
.layerCount
,
1411 VkOffset3D staging_offset
= { 0 };
1413 staging_image
.layout
[0].tile_mode
= TILE6_LINEAR
;
1414 staging_image
.layout
[0].ubwc
= false;
1416 fdl6_layout(&staging_image
.layout
[0],
1417 vk_format_to_pipe_format(staging_image
.vk_format
),
1418 staging_image
.samples
,
1419 staging_image
.extent
.width
,
1420 staging_image
.extent
.height
,
1421 staging_image
.extent
.depth
,
1422 staging_image
.level_count
,
1423 staging_image
.layer_count
,
1424 staging_image
.type
== VK_IMAGE_TYPE_3D
,
1427 VkResult result
= tu_get_scratch_bo(cmd
->device
,
1428 staging_image
.layout
[0].size
,
1430 if (result
!= VK_SUCCESS
) {
1431 cmd
->record_result
= result
;
1435 tu_bo_list_add(&cmd
->bo_list
, staging_image
.bo
,
1436 MSM_SUBMIT_BO_READ
| MSM_SUBMIT_BO_WRITE
);
1438 struct tu_image_view staging
;
1439 tu_image_view_copy(&staging
, &staging_image
, src_format
,
1440 &staging_subresource
, 0, false);
1442 ops
->setup(cmd
, cs
, src_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false);
1443 coords(ops
, cs
, &staging_offset
, &src_offset
, &extent
);
1445 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1446 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1447 ops
->dst(cs
, &staging
, i
);
1451 /* When executed by the user there has to be a pipeline barrier here,
1452 * but since we're doing it manually we'll have to flush ourselves.
1454 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1455 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
1457 tu_image_view_copy(&staging
, &staging_image
, dst_format
,
1458 &staging_subresource
, 0, false);
1460 ops
->setup(cmd
, cs
, dst_format
, info
->dstSubresource
.aspectMask
, ROTATE_0
, false);
1461 coords(ops
, cs
, &dst_offset
, &staging_offset
, &extent
);
1463 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1464 ops
->src(cmd
, cs
, &staging
, i
, VK_FILTER_NEAREST
);
1465 ops
->dst(cs
, &dst
, i
);
1469 tu_image_view_copy(&dst
, dst_image
, format
, &info
->dstSubresource
, dst_offset
.z
, false);
1470 tu_image_view_copy(&src
, src_image
, format
, &info
->srcSubresource
, src_offset
.z
, false);
1472 ops
->setup(cmd
, cs
, format
, info
->dstSubresource
.aspectMask
, ROTATE_0
, false);
1473 coords(ops
, cs
, &dst_offset
, &src_offset
, &extent
);
1475 for (uint32_t i
= 0; i
< info
->extent
.depth
; i
++) {
1476 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1477 ops
->dst(cs
, &dst
, i
);
1484 tu_CmdCopyImage(VkCommandBuffer commandBuffer
,
1486 VkImageLayout srcImageLayout
,
1488 VkImageLayout destImageLayout
,
1489 uint32_t regionCount
,
1490 const VkImageCopy
*pRegions
)
1492 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1493 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1494 TU_FROM_HANDLE(tu_image
, dst_image
, destImage
);
1496 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1497 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1499 for (uint32_t i
= 0; i
< regionCount
; ++i
)
1500 tu_copy_image_to_image(cmd
, src_image
, dst_image
, pRegions
+ i
);
1504 copy_buffer(struct tu_cmd_buffer
*cmd
,
1508 uint32_t block_size
)
1510 const struct blit_ops
*ops
= &r2d_ops
;
1511 struct tu_cs
*cs
= &cmd
->cs
;
1512 VkFormat format
= block_size
== 4 ? VK_FORMAT_R32_UINT
: VK_FORMAT_R8_UNORM
;
1513 uint64_t blocks
= size
/ block_size
;
1515 ops
->setup(cmd
, cs
, format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false);
1518 uint32_t src_x
= (src_va
& 63) / block_size
;
1519 uint32_t dst_x
= (dst_va
& 63) / block_size
;
1520 uint32_t width
= MIN2(MIN2(blocks
, 0x4000 - src_x
), 0x4000 - dst_x
);
1522 ops
->src_buffer(cmd
, cs
, format
, src_va
& ~63, 0, src_x
+ width
, 1);
1523 ops
->dst_buffer( cs
, format
, dst_va
& ~63, 0);
1524 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, &(VkOffset2D
) {src_x
}, &(VkExtent2D
) {width
, 1});
1527 src_va
+= width
* block_size
;
1528 dst_va
+= width
* block_size
;
1534 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer
,
1537 uint32_t regionCount
,
1538 const VkBufferCopy
*pRegions
)
1540 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1541 TU_FROM_HANDLE(tu_buffer
, src_buffer
, srcBuffer
);
1542 TU_FROM_HANDLE(tu_buffer
, dst_buffer
, dstBuffer
);
1544 tu_bo_list_add(&cmd
->bo_list
, src_buffer
->bo
, MSM_SUBMIT_BO_READ
);
1545 tu_bo_list_add(&cmd
->bo_list
, dst_buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1547 for (unsigned i
= 0; i
< regionCount
; ++i
) {
1549 tu_buffer_iova(dst_buffer
) + pRegions
[i
].dstOffset
,
1550 tu_buffer_iova(src_buffer
) + pRegions
[i
].srcOffset
,
1551 pRegions
[i
].size
, 1);
1556 tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer
,
1558 VkDeviceSize dstOffset
,
1559 VkDeviceSize dataSize
,
1562 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1563 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1565 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1567 struct tu_cs_memory tmp
;
1568 VkResult result
= tu_cs_alloc(&cmd
->sub_cs
, DIV_ROUND_UP(dataSize
, 64), 64, &tmp
);
1569 if (result
!= VK_SUCCESS
) {
1570 cmd
->record_result
= result
;
1574 memcpy(tmp
.map
, pData
, dataSize
);
1575 copy_buffer(cmd
, tu_buffer_iova(buffer
) + dstOffset
, tmp
.iova
, dataSize
, 4);
1579 tu_CmdFillBuffer(VkCommandBuffer commandBuffer
,
1581 VkDeviceSize dstOffset
,
1582 VkDeviceSize fillSize
,
1585 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1586 TU_FROM_HANDLE(tu_buffer
, buffer
, dstBuffer
);
1587 const struct blit_ops
*ops
= &r2d_ops
;
1588 struct tu_cs
*cs
= &cmd
->cs
;
1590 tu_bo_list_add(&cmd
->bo_list
, buffer
->bo
, MSM_SUBMIT_BO_WRITE
);
1592 if (fillSize
== VK_WHOLE_SIZE
)
1593 fillSize
= buffer
->size
- dstOffset
;
1595 uint64_t dst_va
= tu_buffer_iova(buffer
) + dstOffset
;
1596 uint32_t blocks
= fillSize
/ 4;
1598 ops
->setup(cmd
, cs
, VK_FORMAT_R32_UINT
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, true);
1599 ops
->clear_value(cs
, VK_FORMAT_R32_UINT
, &(VkClearValue
){.color
= {.uint32
[0] = data
}});
1602 uint32_t dst_x
= (dst_va
& 63) / 4;
1603 uint32_t width
= MIN2(blocks
, 0x4000 - dst_x
);
1605 ops
->dst_buffer(cs
, VK_FORMAT_R32_UINT
, dst_va
& ~63, 0);
1606 ops
->coords(cs
, &(VkOffset2D
) {dst_x
}, NULL
, &(VkExtent2D
) {width
, 1});
1609 dst_va
+= width
* 4;
1615 tu_CmdResolveImage(VkCommandBuffer commandBuffer
,
1617 VkImageLayout srcImageLayout
,
1619 VkImageLayout dstImageLayout
,
1620 uint32_t regionCount
,
1621 const VkImageResolve
*pRegions
)
1623 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1624 TU_FROM_HANDLE(tu_image
, src_image
, srcImage
);
1625 TU_FROM_HANDLE(tu_image
, dst_image
, dstImage
);
1626 const struct blit_ops
*ops
= &r2d_ops
;
1627 struct tu_cs
*cs
= &cmd
->cs
;
1629 tu_bo_list_add(&cmd
->bo_list
, src_image
->bo
, MSM_SUBMIT_BO_READ
);
1630 tu_bo_list_add(&cmd
->bo_list
, dst_image
->bo
, MSM_SUBMIT_BO_WRITE
);
1632 ops
->setup(cmd
, cs
, dst_image
->vk_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false);
1634 for (uint32_t i
= 0; i
< regionCount
; ++i
) {
1635 const VkImageResolve
*info
= &pRegions
[i
];
1636 uint32_t layers
= MAX2(info
->extent
.depth
, info
->dstSubresource
.layerCount
);
1638 assert(info
->srcSubresource
.layerCount
== info
->dstSubresource
.layerCount
);
1639 /* TODO: aspect masks possible ? */
1641 coords(ops
, cs
, &info
->dstOffset
, &info
->srcOffset
, &info
->extent
);
1643 struct tu_image_view dst
, src
;
1644 tu_image_view_blit(&dst
, dst_image
, &info
->dstSubresource
, info
->dstOffset
.z
);
1645 tu_image_view_blit(&src
, src_image
, &info
->srcSubresource
, info
->srcOffset
.z
);
1647 for (uint32_t i
= 0; i
< layers
; i
++) {
1648 ops
->src(cmd
, cs
, &src
, i
, VK_FILTER_NEAREST
);
1649 ops
->dst(cs
, &dst
, i
);
1656 tu_resolve_sysmem(struct tu_cmd_buffer
*cmd
,
1658 struct tu_image_view
*src
,
1659 struct tu_image_view
*dst
,
1661 const VkRect2D
*rect
)
1663 const struct blit_ops
*ops
= &r2d_ops
;
1665 tu_bo_list_add(&cmd
->bo_list
, src
->image
->bo
, MSM_SUBMIT_BO_READ
);
1666 tu_bo_list_add(&cmd
->bo_list
, dst
->image
->bo
, MSM_SUBMIT_BO_WRITE
);
1668 assert(src
->image
->vk_format
== dst
->image
->vk_format
);
1670 ops
->setup(cmd
, cs
, dst
->image
->vk_format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false);
1671 ops
->coords(cs
, &rect
->offset
, &rect
->offset
, &rect
->extent
);
1673 for (uint32_t i
= 0; i
< layers
; i
++) {
1674 ops
->src(cmd
, cs
, src
, i
, VK_FILTER_NEAREST
);
1675 ops
->dst(cs
, dst
, i
);
1681 clear_image(struct tu_cmd_buffer
*cmd
,
1682 struct tu_image
*image
,
1683 const VkClearValue
*clear_value
,
1684 const VkImageSubresourceRange
*range
)
1686 uint32_t level_count
= tu_get_levelCount(image
, range
);
1687 uint32_t layer_count
= tu_get_layerCount(image
, range
);
1688 struct tu_cs
*cs
= &cmd
->cs
;
1689 VkFormat format
= image
->vk_format
;
1690 if (format
== VK_FORMAT_E5B9G9R9_UFLOAT_PACK32
)
1691 format
= VK_FORMAT_R32_UINT
;
1693 if (image
->type
== VK_IMAGE_TYPE_3D
) {
1694 assert(layer_count
== 1);
1695 assert(range
->baseArrayLayer
== 0);
1698 const struct blit_ops
*ops
= image
->samples
> 1 ? &r3d_ops
: &r2d_ops
;
1700 ops
->setup(cmd
, cs
, format
, range
->aspectMask
, ROTATE_0
, true);
1701 ops
->clear_value(cs
, image
->vk_format
, clear_value
);
1703 for (unsigned j
= 0; j
< level_count
; j
++) {
1704 if (image
->type
== VK_IMAGE_TYPE_3D
)
1705 layer_count
= u_minify(image
->extent
.depth
, range
->baseMipLevel
+ j
);
1707 ops
->coords(cs
, &(VkOffset2D
){}, NULL
, &(VkExtent2D
) {
1708 u_minify(image
->extent
.width
, range
->baseMipLevel
+ j
),
1709 u_minify(image
->extent
.height
, range
->baseMipLevel
+ j
)
1712 struct tu_image_view dst
;
1713 tu_image_view_copy_blit(&dst
, image
, format
, &(VkImageSubresourceLayers
) {
1714 .aspectMask
= range
->aspectMask
,
1715 .mipLevel
= range
->baseMipLevel
+ j
,
1716 .baseArrayLayer
= range
->baseArrayLayer
,
1720 for (uint32_t i
= 0; i
< layer_count
; i
++) {
1721 ops
->dst(cs
, &dst
, i
);
1728 tu_CmdClearColorImage(VkCommandBuffer commandBuffer
,
1730 VkImageLayout imageLayout
,
1731 const VkClearColorValue
*pColor
,
1732 uint32_t rangeCount
,
1733 const VkImageSubresourceRange
*pRanges
)
1735 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1736 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1738 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1740 for (unsigned i
= 0; i
< rangeCount
; i
++)
1741 clear_image(cmd
, image
, (const VkClearValue
*) pColor
, pRanges
+ i
);
1745 tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer
,
1747 VkImageLayout imageLayout
,
1748 const VkClearDepthStencilValue
*pDepthStencil
,
1749 uint32_t rangeCount
,
1750 const VkImageSubresourceRange
*pRanges
)
1752 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
1753 TU_FROM_HANDLE(tu_image
, image
, image_h
);
1755 tu_bo_list_add(&cmd
->bo_list
, image
->bo
, MSM_SUBMIT_BO_WRITE
);
1757 for (unsigned i
= 0; i
< rangeCount
; i
++)
1758 clear_image(cmd
, image
, (const VkClearValue
*) pDepthStencil
, pRanges
+ i
);
1762 tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer
*cmd
,
1763 uint32_t attachment_count
,
1764 const VkClearAttachment
*attachments
,
1765 uint32_t rect_count
,
1766 const VkClearRect
*rects
)
1768 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1769 /* note: cannot use shader path here.. there is a special shader path
1770 * in tu_clear_sysmem_attachments()
1772 const struct blit_ops
*ops
= &r2d_ops
;
1773 struct tu_cs
*cs
= &cmd
->draw_cs
;
1775 for (uint32_t j
= 0; j
< attachment_count
; j
++) {
1776 /* The vulkan spec, section 17.2 "Clearing Images Inside a Render
1777 * Pass Instance" says that:
1779 * Unlike other clear commands, vkCmdClearAttachments executes as
1780 * a drawing command, rather than a transfer command, with writes
1781 * performed by it executing in rasterization order. Clears to
1782 * color attachments are executed as color attachment writes, by
1783 * the VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage.
1784 * Clears to depth/stencil attachments are executed as depth
1785 * writes and writes by the
1786 * VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT and
1787 * VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT stages.
1789 * However, the 2d path here is executed the same way as a
1790 * transfer command, using the CCU color cache exclusively with
1791 * a special depth-as-color format for depth clears. This means that
1792 * we can't rely on the normal pipeline barrier mechanism here, and
1793 * have to manually flush whenever using a different cache domain
1794 * from what the 3d path would've used. This happens when we clear
1795 * depth/stencil, since normally depth attachments use CCU depth, but
1796 * we clear it using a special depth-as-color format. Since the clear
1797 * potentially uses a different attachment state we also need to
1798 * invalidate color beforehand and flush it afterwards.
1802 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1803 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
1804 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1806 a
= subpass
->depth_stencil_attachment
.attachment
;
1807 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_DEPTH_TS
);
1808 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1809 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
1812 if (a
== VK_ATTACHMENT_UNUSED
)
1815 const struct tu_image_view
*iview
=
1816 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
1818 ops
->setup(cmd
, cs
, iview
->image
->vk_format
, attachments
[j
].aspectMask
, ROTATE_0
, true);
1819 ops
->clear_value(cs
, iview
->image
->vk_format
, &attachments
[j
].clearValue
);
1821 /* Wait for the flushes we triggered manually to complete */
1824 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1825 ops
->coords(cs
, &rects
[i
].rect
.offset
, NULL
, &rects
[i
].rect
.extent
);
1826 for (uint32_t layer
= 0; layer
< rects
[i
].layerCount
; layer
++) {
1827 ops
->dst(cs
, iview
, rects
[i
].baseArrayLayer
+ layer
);
1832 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1833 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1834 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
1836 /* sync color into depth */
1837 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
1838 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
);
1844 tu_clear_sysmem_attachments(struct tu_cmd_buffer
*cmd
,
1845 uint32_t attachment_count
,
1846 const VkClearAttachment
*attachments
,
1847 uint32_t rect_count
,
1848 const VkClearRect
*rects
)
1850 /* the shader path here is special, it avoids changing MRT/etc state */
1851 const struct tu_render_pass
*pass
= cmd
->state
.pass
;
1852 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
1853 const uint32_t mrt_count
= subpass
->color_count
;
1854 struct tu_cs
*cs
= &cmd
->draw_cs
;
1855 uint32_t clear_value
[MAX_RTS
][4];
1856 float z_clear_val
= 0.0f
;
1857 uint8_t s_clear_val
= 0;
1858 uint32_t clear_rts
= 0, clear_components
= 0, num_rts
= 0, b
;
1859 bool z_clear
= false;
1860 bool s_clear
= false;
1861 bool layered_clear
= false;
1862 uint32_t max_samples
= 1;
1864 for (uint32_t i
= 0; i
< attachment_count
; i
++) {
1866 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
) {
1867 uint32_t c
= attachments
[i
].colorAttachment
;
1868 a
= subpass
->color_attachments
[c
].attachment
;
1869 if (a
== VK_ATTACHMENT_UNUSED
)
1872 clear_rts
|= 1 << c
;
1873 clear_components
|= 0xf << (c
* 4);
1874 memcpy(clear_value
[c
], &attachments
[i
].clearValue
, 4 * sizeof(uint32_t));
1876 a
= subpass
->depth_stencil_attachment
.attachment
;
1877 if (a
== VK_ATTACHMENT_UNUSED
)
1880 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_DEPTH_BIT
) {
1882 z_clear_val
= attachments
[i
].clearValue
.depthStencil
.depth
;
1885 if (attachments
[i
].aspectMask
& VK_IMAGE_ASPECT_STENCIL_BIT
) {
1887 s_clear_val
= attachments
[i
].clearValue
.depthStencil
.stencil
& 0xff;
1891 max_samples
= MAX2(max_samples
, pass
->attachments
[a
].samples
);
1894 /* prefer to use 2D path for clears
1895 * 2D can't clear separate depth/stencil and msaa, needs known framebuffer
1897 if (max_samples
== 1 && cmd
->state
.framebuffer
) {
1898 tu_clear_sysmem_attachments_2d(cmd
, attachment_count
, attachments
, rect_count
, rects
);
1902 /* This clear path behaves like a draw, needs the same flush as tu_draw */
1903 tu_emit_cache_flush_renderpass(cmd
, cs
);
1905 /* disable all draw states so they don't interfere
1906 * TODO: use and re-use draw states for this path
1907 * we have to disable draw states individually to preserve
1908 * input attachment states, because a secondary command buffer
1909 * won't be able to restore them
1911 tu_cs_emit_pkt7(cs
, CP_SET_DRAW_STATE
, 3 * (TU_DRAW_STATE_COUNT
- 2));
1912 for (uint32_t i
= 0; i
< TU_DRAW_STATE_COUNT
; i
++) {
1913 if (i
== TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM
||
1914 i
== TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM
)
1916 tu_cs_emit(cs
, CP_SET_DRAW_STATE__0_GROUP_ID(i
) |
1917 CP_SET_DRAW_STATE__0_DISABLE
);
1918 tu_cs_emit_qw(cs
, 0);
1920 cmd
->state
.dirty
|= TU_CMD_DIRTY_DRAW_STATE
;
1922 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_CNTL0
, 2);
1923 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
1924 A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
1926 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count
));
1928 tu_cs_emit_pkt4(cs
, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count
);
1929 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1930 if (clear_rts
& (1 << i
))
1931 tu_cs_emit(cs
, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts
++ * 4));
1936 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1937 if (rects
[i
].baseArrayLayer
|| rects
[i
].layerCount
> 1)
1938 layered_clear
= true;
1941 r3d_common(cmd
, cs
, false, num_rts
, layered_clear
);
1944 A6XX_SP_FS_RENDER_COMPONENTS(.dword
= clear_components
));
1946 A6XX_RB_RENDER_COMPONENTS(.dword
= clear_components
));
1949 A6XX_RB_FS_OUTPUT_CNTL0(),
1950 A6XX_RB_FS_OUTPUT_CNTL1(.mrt
= mrt_count
));
1952 tu_cs_emit_regs(cs
, A6XX_SP_BLEND_CNTL());
1953 tu_cs_emit_regs(cs
, A6XX_RB_BLEND_CNTL(.independent_blend
= 1, .sample_mask
= 0xffff));
1954 tu_cs_emit_regs(cs
, A6XX_RB_ALPHA_CONTROL());
1955 for (uint32_t i
= 0; i
< mrt_count
; i
++) {
1956 tu_cs_emit_regs(cs
, A6XX_RB_MRT_CONTROL(i
,
1957 .component_enable
= COND(clear_rts
& (1 << i
), 0xf)));
1960 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_PLANE_CNTL());
1961 tu_cs_emit_regs(cs
, A6XX_RB_DEPTH_CNTL(
1962 .z_enable
= z_clear
,
1963 .z_write_enable
= z_clear
,
1964 .zfunc
= FUNC_ALWAYS
));
1965 tu_cs_emit_regs(cs
, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
1966 tu_cs_emit_regs(cs
, A6XX_RB_STENCIL_CONTROL(
1967 .stencil_enable
= s_clear
,
1968 .func
= FUNC_ALWAYS
,
1969 .zpass
= STENCIL_REPLACE
));
1970 tu_cs_emit_regs(cs
, A6XX_RB_STENCILMASK(.mask
= 0xff));
1971 tu_cs_emit_regs(cs
, A6XX_RB_STENCILWRMASK(.wrmask
= 0xff));
1972 tu_cs_emit_regs(cs
, A6XX_RB_STENCILREF(.ref
= s_clear_val
));
1974 tu_cs_emit_pkt7(cs
, CP_LOAD_STATE6_FRAG
, 3 + 4 * num_rts
);
1975 tu_cs_emit(cs
, CP_LOAD_STATE6_0_DST_OFF(0) |
1976 CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS
) |
1977 CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT
) |
1978 CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER
) |
1979 CP_LOAD_STATE6_0_NUM_UNIT(num_rts
));
1980 tu_cs_emit(cs
, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1981 tu_cs_emit(cs
, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1982 for_each_bit(b
, clear_rts
)
1983 tu_cs_emit_array(cs
, clear_value
[b
], 4);
1985 for (uint32_t i
= 0; i
< rect_count
; i
++) {
1986 for (uint32_t layer
= 0; layer
< rects
[i
].layerCount
; layer
++) {
1987 r3d_coords_raw(cs
, layered_clear
, (float[]) {
1988 rects
[i
].rect
.offset
.x
, rects
[i
].rect
.offset
.y
,
1989 z_clear_val
, uif(rects
[i
].baseArrayLayer
+ layer
),
1990 rects
[i
].rect
.offset
.x
+ rects
[i
].rect
.extent
.width
,
1991 rects
[i
].rect
.offset
.y
+ rects
[i
].rect
.extent
.height
,
1995 if (layered_clear
) {
1996 tu_cs_emit_pkt7(cs
, CP_DRAW_INDX_OFFSET
, 3);
1997 tu_cs_emit(cs
, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_POINTLIST
) |
1998 CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX
) |
1999 CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY
) |
2000 CP_DRAW_INDX_OFFSET_0_GS_ENABLE
);
2001 tu_cs_emit(cs
, 1); /* instance count */
2002 tu_cs_emit(cs
, 1); /* vertex count */
2011 pack_gmem_clear_value(const VkClearValue
*val
, VkFormat format
, uint32_t clear_value
[4])
2013 enum pipe_format pformat
= vk_format_to_pipe_format(format
);
2016 case VK_FORMAT_X8_D24_UNORM_PACK32
:
2017 case VK_FORMAT_D24_UNORM_S8_UINT
:
2018 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 24) |
2019 val
->depthStencil
.stencil
<< 24;
2021 case VK_FORMAT_D16_UNORM
:
2022 clear_value
[0] = tu_pack_float32_for_unorm(val
->depthStencil
.depth
, 16);
2024 case VK_FORMAT_D32_SFLOAT
:
2025 clear_value
[0] = fui(val
->depthStencil
.depth
);
2027 case VK_FORMAT_S8_UINT
:
2028 clear_value
[0] = val
->depthStencil
.stencil
;
2030 /* these formats use a different base format when tiled
2031 * the same format can be used for both because GMEM is always in WZYX order
2033 case VK_FORMAT_R5G5B5A1_UNORM_PACK16
:
2034 case VK_FORMAT_B5G5R5A1_UNORM_PACK16
:
2035 pformat
= PIPE_FORMAT_B5G5R5A1_UNORM
;
2040 VkClearColorValue color
;
2043 * GMEM is tiled and wants the components in WZYX order,
2044 * apply swizzle to the color before packing, to counteract
2045 * deswizzling applied by packing functions
2047 pipe_swizzle_4f(color
.float32
, val
->color
.float32
,
2048 util_format_description(pformat
)->swizzle
);
2050 util_format_pack_rgba(pformat
, clear_value
, color
.uint32
, 1);
2054 tu_emit_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2056 uint32_t attachment
,
2057 VkImageAspectFlags mask
,
2058 const VkClearValue
*value
)
2060 VkFormat vk_format
= cmd
->state
.pass
->attachments
[attachment
].format
;
2063 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 1);
2064 tu_cs_emit(cs
, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(vk_format
)));
2066 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(.gmem
= 1,
2067 .clear_mask
= aspect_write_mask(vk_format
, mask
)));
2069 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_BASE_GMEM
, 1);
2070 tu_cs_emit(cs
, cmd
->state
.pass
->attachments
[attachment
].gmem_offset
);
2072 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_UNKNOWN_88D0
, 1);
2075 uint32_t clear_vals
[4] = {};
2076 pack_gmem_clear_value(value
, vk_format
, clear_vals
);
2078 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0
, 4);
2079 tu_cs_emit_array(cs
, clear_vals
, 4);
2081 tu6_emit_event_write(cmd
, cs
, BLIT
);
2085 tu_clear_gmem_attachments(struct tu_cmd_buffer
*cmd
,
2086 uint32_t attachment_count
,
2087 const VkClearAttachment
*attachments
,
2088 uint32_t rect_count
,
2089 const VkClearRect
*rects
)
2091 const struct tu_subpass
*subpass
= cmd
->state
.subpass
;
2092 struct tu_cs
*cs
= &cmd
->draw_cs
;
2094 /* TODO: swap the loops for smaller cmdstream */
2095 for (unsigned i
= 0; i
< rect_count
; i
++) {
2096 unsigned x1
= rects
[i
].rect
.offset
.x
;
2097 unsigned y1
= rects
[i
].rect
.offset
.y
;
2098 unsigned x2
= x1
+ rects
[i
].rect
.extent
.width
- 1;
2099 unsigned y2
= y1
+ rects
[i
].rect
.extent
.height
- 1;
2101 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_SCISSOR_TL
, 2);
2102 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_TL_X(x1
) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1
));
2103 tu_cs_emit(cs
, A6XX_RB_BLIT_SCISSOR_BR_X(x2
) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2
));
2105 for (unsigned j
= 0; j
< attachment_count
; j
++) {
2107 if (attachments
[j
].aspectMask
& VK_IMAGE_ASPECT_COLOR_BIT
)
2108 a
= subpass
->color_attachments
[attachments
[j
].colorAttachment
].attachment
;
2110 a
= subpass
->depth_stencil_attachment
.attachment
;
2112 if (a
== VK_ATTACHMENT_UNUSED
)
2115 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, attachments
[j
].aspectMask
,
2116 &attachments
[j
].clearValue
);
2122 tu_CmdClearAttachments(VkCommandBuffer commandBuffer
,
2123 uint32_t attachmentCount
,
2124 const VkClearAttachment
*pAttachments
,
2126 const VkClearRect
*pRects
)
2128 TU_FROM_HANDLE(tu_cmd_buffer
, cmd
, commandBuffer
);
2129 struct tu_cs
*cs
= &cmd
->draw_cs
;
2131 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_GMEM
);
2132 tu_clear_gmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2133 tu_cond_exec_end(cs
);
2135 tu_cond_exec_start(cs
, CP_COND_EXEC_0_RENDER_MODE_SYSMEM
);
2136 tu_clear_sysmem_attachments(cmd
, attachmentCount
, pAttachments
, rectCount
, pRects
);
2137 tu_cond_exec_end(cs
);
2141 tu_clear_sysmem_attachment(struct tu_cmd_buffer
*cmd
,
2144 const VkRenderPassBeginInfo
*info
)
2146 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2147 const struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2148 const struct tu_render_pass_attachment
*attachment
=
2149 &cmd
->state
.pass
->attachments
[a
];
2151 if (!attachment
->clear_mask
)
2154 const struct blit_ops
*ops
= &r2d_ops
;
2155 if (attachment
->samples
> 1)
2158 ops
->setup(cmd
, cs
, attachment
->format
, attachment
->clear_mask
, ROTATE_0
, true);
2159 ops
->coords(cs
, &info
->renderArea
.offset
, NULL
, &info
->renderArea
.extent
);
2160 ops
->clear_value(cs
, attachment
->format
, &info
->pClearValues
[a
]);
2162 /* Wait for any flushes at the beginning of the renderpass to complete */
2165 for (uint32_t i
= 0; i
< fb
->layers
; i
++) {
2166 ops
->dst(cs
, iview
, i
);
2170 /* The spec doesn't explicitly say, but presumably the initial renderpass
2171 * clear is considered part of the renderpass, and therefore barriers
2172 * aren't required inside the subpass/renderpass. Therefore we need to
2173 * flush CCU color into CCU depth here, just like with
2174 * vkCmdClearAttachments(). Note that because this only happens at the
2175 * beginning of a renderpass, and renderpass writes are considered
2176 * "incoherent", we shouldn't have to worry about syncing depth into color
2177 * beforehand as depth should already be flushed.
2179 if (vk_format_is_depth_or_stencil(attachment
->format
)) {
2180 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2181 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_DEPTH
);
2183 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);
2184 tu6_emit_event_write(cmd
, cs
, PC_CCU_INVALIDATE_COLOR
);
2189 tu_clear_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2192 const VkRenderPassBeginInfo
*info
)
2194 const struct tu_render_pass_attachment
*attachment
=
2195 &cmd
->state
.pass
->attachments
[a
];
2197 if (!attachment
->clear_mask
)
2200 tu_cs_emit_regs(cs
, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2202 tu_emit_clear_gmem_attachment(cmd
, cs
, a
, attachment
->clear_mask
,
2203 &info
->pClearValues
[a
]);
2207 tu_emit_blit(struct tu_cmd_buffer
*cmd
,
2209 const struct tu_image_view
*iview
,
2210 const struct tu_render_pass_attachment
*attachment
,
2214 A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment
->samples
)));
2216 tu_cs_emit_regs(cs
, A6XX_RB_BLIT_INFO(
2219 /* "integer" bit disables msaa resolve averaging */
2220 .integer
= vk_format_is_int(attachment
->format
)));
2222 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_DST_INFO
, 4);
2223 tu_cs_emit(cs
, iview
->RB_BLIT_DST_INFO
);
2224 tu_cs_image_ref_2d(cs
, iview
, 0, false);
2226 tu_cs_emit_pkt4(cs
, REG_A6XX_RB_BLIT_FLAG_DST_LO
, 3);
2227 tu_cs_image_flag_ref(cs
, iview
, 0);
2230 A6XX_RB_BLIT_BASE_GMEM(attachment
->gmem_offset
));
2232 tu6_emit_event_write(cmd
, cs
, BLIT
);
2236 blit_can_resolve(VkFormat format
)
2238 const struct util_format_description
*desc
= vk_format_description(format
);
2240 /* blit event can only do resolve for simple cases:
2241 * averaging samples as unsigned integers or choosing only one sample
2243 if (vk_format_is_snorm(format
) || vk_format_is_srgb(format
))
2246 /* can't do formats with larger channel sizes
2247 * note: this includes all float formats
2248 * note2: single channel integer formats seem OK
2250 if (desc
->channel
[0].size
> 10)
2254 /* for unknown reasons blit event can't msaa resolve these formats when tiled
2255 * likely related to these formats having different layout from other cpp=2 formats
2257 case VK_FORMAT_R8G8_UNORM
:
2258 case VK_FORMAT_R8G8_UINT
:
2259 case VK_FORMAT_R8G8_SINT
:
2260 /* TODO: this one should be able to work? */
2261 case VK_FORMAT_D24_UNORM_S8_UINT
:
2271 tu_load_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2276 const struct tu_image_view
*iview
=
2277 cmd
->state
.framebuffer
->attachments
[a
].attachment
;
2278 const struct tu_render_pass_attachment
*attachment
=
2279 &cmd
->state
.pass
->attachments
[a
];
2281 if (attachment
->load
|| force_load
)
2282 tu_emit_blit(cmd
, cs
, iview
, attachment
, false);
2286 tu_store_gmem_attachment(struct tu_cmd_buffer
*cmd
,
2291 const struct tu_framebuffer
*fb
= cmd
->state
.framebuffer
;
2292 const VkRect2D
*render_area
= &cmd
->state
.render_area
;
2293 struct tu_render_pass_attachment
*dst
= &cmd
->state
.pass
->attachments
[a
];
2294 struct tu_image_view
*iview
= fb
->attachments
[a
].attachment
;
2295 struct tu_render_pass_attachment
*src
= &cmd
->state
.pass
->attachments
[gmem_a
];
2300 uint32_t x1
= render_area
->offset
.x
;
2301 uint32_t y1
= render_area
->offset
.y
;
2302 uint32_t x2
= x1
+ render_area
->extent
.width
;
2303 uint32_t y2
= y1
+ render_area
->extent
.height
;
2304 /* x2/y2 can be unaligned if equal to the size of the image,
2305 * since it will write into padding space
2306 * the one exception is linear levels which don't have the
2307 * required y padding in the layout (except for the last level)
2309 bool need_y2_align
=
2310 y2
!= iview
->extent
.height
|| iview
->need_y2_align
;
2313 x1
% GMEM_ALIGN_W
|| (x2
% GMEM_ALIGN_W
&& x2
!= iview
->extent
.width
) ||
2314 y1
% GMEM_ALIGN_H
|| (y2
% GMEM_ALIGN_H
&& need_y2_align
);
2316 /* use fast path when render area is aligned, except for unsupported resolve cases */
2317 if (!unaligned
&& (a
== gmem_a
|| blit_can_resolve(dst
->format
))) {
2318 tu_emit_blit(cmd
, cs
, iview
, src
, true);
2322 if (dst
->samples
> 1) {
2323 /* I guess we need to use shader path in this case?
2324 * need a testcase which fails because of this
2326 tu_finishme("unaligned store of msaa attachment\n");
2330 r2d_setup_common(cmd
, cs
, dst
->format
, VK_IMAGE_ASPECT_COLOR_BIT
, ROTATE_0
, false, true);
2331 r2d_dst(cs
, iview
, 0);
2332 r2d_coords(cs
, &render_area
->offset
, &render_area
->offset
, &render_area
->extent
);
2335 A6XX_SP_PS_2D_SRC_INFO(
2336 .color_format
= tu6_format_texture(src
->format
, TILE6_2
).fmt
,
2337 .tile_mode
= TILE6_2
,
2338 .srgb
= vk_format_is_srgb(src
->format
),
2339 .samples
= tu_msaa_samples(src
->samples
),
2340 .samples_average
= !vk_format_is_int(src
->format
),
2343 /* note: src size does not matter when not scaling */
2344 A6XX_SP_PS_2D_SRC_SIZE( .width
= 0x3fff, .height
= 0x3fff),
2345 A6XX_SP_PS_2D_SRC_LO(cmd
->device
->physical_device
->gmem_base
+ src
->gmem_offset
),
2346 A6XX_SP_PS_2D_SRC_HI(),
2347 A6XX_SP_PS_2D_SRC_PITCH(.pitch
= fb
->tile0
.width
* src
->cpp
));
2349 /* sync GMEM writes with CACHE. */
2350 tu6_emit_event_write(cmd
, cs
, CACHE_INVALIDATE
);
2352 /* Wait for CACHE_INVALIDATE to land */
2355 tu_cs_emit_pkt7(cs
, CP_BLIT
, 1);
2356 tu_cs_emit(cs
, CP_BLIT_0_OP(BLIT_OP_SCALE
));
2358 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
2359 * sysmem, and we generally assume that GMEM renderpasses leave their
2360 * results in sysmem, so we need to flush manually here.
2362 tu6_emit_event_write(cmd
, cs
, PC_CCU_FLUSH_COLOR_TS
);