X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_cmd_buffer.c;h=6ad18f18aa77f352648c821de14a6fbe7cc5fb3c;hb=abaaf0b2e731ad4f10ffb2b6b5ae3d5b12d4cc12;hp=ef07bd6c27bcc3c04b46d97c38b6440cc01f54a0;hpb=57327626dc754be46841997c5c2f777e7f326c40;p=mesa.git diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index ef07bd6c27b..6ad18f18aa7 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -34,6 +34,7 @@ #include "vk_format.h" #include "tu_cs.h" +#include "tu_blit.h" void tu_bo_list_init(struct tu_bo_list *list) @@ -314,8 +315,8 @@ tu_tiling_config_get_tile(const struct tu_tiling_config *tiling, : tile->begin.y + tiling->tile0.extent.height; } -static enum a3xx_msaa_samples -tu6_msaa_samples(uint32_t samples) +enum a3xx_msaa_samples +tu_msaa_samples(uint32_t samples) { switch (samples) { case 1: @@ -332,6 +333,20 @@ tu6_msaa_samples(uint32_t samples) } } +static enum a4xx_index_size +tu6_index_size(VkIndexType type) +{ + switch (type) { + case VK_INDEX_TYPE_UINT16: + return INDEX4_SIZE_16_BIT; + case VK_INDEX_TYPE_UINT32: + return INDEX4_SIZE_32_BIT; + default: + unreachable("invalid VkIndexType"); + return INDEX4_SIZE_8_BIT; + } +} + static void tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { @@ -373,10 +388,28 @@ tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } } +static void +tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview) +{ + uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer); + uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip); + uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip); + if (iview->image->ubwc_size) { + tu_cs_emit_qw(cs, va); + tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) | + A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2)); + } else { + tu_cs_emit_qw(cs, 0); + tu_cs_emit(cs, 0); + } +} + static void tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { + const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_subpass *subpass = cmd->state.subpass; + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; const uint32_t a = subpass->depth_stencil_attachment.attachment; if (a == VK_ATTACHMENT_UNUSED) { @@ -405,6 +438,32 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) return; } + const struct tu_image_view *iview = fb->attachments[a].attachment; + enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip))); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layer_size)); + tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); + tu_cs_emit(cs, tiling->gmem_offsets[subpass->color_count]); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + tu6_emit_flag_buffer(cs, iview); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ + tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ + tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1); + tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */ + /* enable zs? */ } @@ -417,18 +476,14 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) unsigned char mrt_comp[MAX_RTS] = { 0 }; unsigned srgb_cntl = 0; - uint32_t gmem_index = 0; for (uint32_t i = 0; i < subpass->color_count; ++i) { uint32_t a = subpass->color_attachments[i].attachment; if (a == VK_ATTACHMENT_UNUSED) continue; const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_image_level *slice = - &iview->image->levels[iview->base_mip]; - const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - uint32_t stride = 0; - uint32_t offset = 0; + const enum a6xx_tile_mode tile_mode = + tu6_get_image_tile_mode(iview->image, iview->base_mip); mrt_comp[i] = 0xf; @@ -439,33 +494,23 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_get_native_format(iview->vk_format); assert(format && format->rb >= 0); - offset = slice->offset + slice->size * iview->base_layer; - stride = slice->pitch * vk_format_get_blocksize(iview->vk_format); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) | A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size)); - tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + - offset); /* BASE_LO/HI */ + tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip))); + tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layer_size)); + tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); tu_cs_emit( - cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */ + cs, tiling->gmem_offsets[i]); /* RB_MRT[i].BASE_GMEM */ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); - tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb)); + tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb) | + COND(vk_format_is_sint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_SINT) | + COND(vk_format_is_uint(iview->vk_format), A6XX_SP_FS_MRT_REG_COLOR_UINT)); -#if 0 - /* when we support UBWC, these would be the system memory - * addr/pitch/etc: - */ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4); - tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ - tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ - tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0)); - tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); -#endif + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3); + tu6_emit_flag_buffer(cs, iview); } tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1); @@ -500,28 +545,22 @@ tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { const struct tu_subpass *subpass = cmd->state.subpass; const enum a3xx_msaa_samples samples = - tu6_msaa_samples(subpass->max_sample_count); + tu_msaa_samples(subpass->max_sample_count); tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); - tu_cs_emit( - cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | - ((samples == MSAA_ONE) ? A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE - : 0)); + tu_cs_emit(cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | + COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE)); tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples)); - tu_cs_emit( - cs, - A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | - ((samples == MSAA_ONE) ? A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); + tu_cs_emit(cs, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | + COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2); tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); - tu_cs_emit( - cs, - A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | - ((samples == MSAA_ONE) ? A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); + tu_cs_emit(cs, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | + COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1); tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples)); @@ -587,31 +626,29 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd, uint32_t gmem_offset, uint32_t blit_info) { - const struct tu_image_level *slice = - &iview->image->levels[iview->base_mip]; - const uint32_t offset = slice->offset + slice->size * iview->base_layer; - const uint32_t stride = - slice->pitch * vk_format_get_blocksize(iview->vk_format); - const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); tu_cs_emit(cs, blit_info); - /* tile mode? */ const struct tu_native_format *format = tu6_get_native_format(iview->vk_format); assert(format && format->rb >= 0); + enum a6xx_tile_mode tile_mode = + tu6_get_image_tile_mode(iview->image, iview->base_mip); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5); tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | - A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) | A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | - A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit_qw(cs, - iview->image->bo->iova + iview->image->bo_offset + offset); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size)); + A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) | + COND(iview->image->ubwc_size, A6XX_RB_BLIT_DST_INFO_FLAGS)); + tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip))); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layer_size)); + + if (iview->image->ubwc_size) { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3); + tu6_emit_flag_buffer(cs, iview); + } tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); tu_cs_emit(cs, gmem_offset); @@ -624,20 +661,12 @@ tu6_emit_blit_clear(struct tu_cmd_buffer *cmd, uint32_t gmem_offset, const VkClearValue *clear_value) { - const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); - const struct tu_native_format *format = tu6_get_native_format(iview->vk_format); assert(format && format->rb >= 0); - /* must be WZYX; other values are ignored */ - const enum a3xx_color_swap swap = WZYX; tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | - A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | - A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap)); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); @@ -648,7 +677,6 @@ tu6_emit_blit_clear(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); tu_cs_emit(cs, 0); - /* pack clear_value into WZYX order */ uint32_t clear_vals[4] = { 0 }; tu_pack_clear_value(clear_value, iview->vk_format, clear_vals); @@ -715,11 +743,11 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, const struct tu_tile *tile) { tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); - tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(0x7)); + tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x7)); tu6_emit_marker(cmd, cs); tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); - tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); + tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); tu6_emit_marker(cmd, cs); const uint32_t x1 = tile->begin.x; @@ -744,38 +772,46 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, } static void -tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +tu6_emit_tile_load_attachment(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + uint32_t a, + uint32_t gmem_index) { const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_subpass *subpass = cmd->state.subpass; const struct tu_tiling_config *tiling = &cmd->state.tiling_config; const struct tu_attachment_state *attachments = cmd->state.attachments; + const struct tu_image_view *iview = fb->attachments[a].attachment; + const struct tu_attachment_state *att = attachments + a; + if (att->pending_clear_aspects) { + tu6_emit_blit_clear(cmd, cs, iview, + tiling->gmem_offsets[gmem_index], + &att->clear_value); + } else { + tu6_emit_blit_info(cmd, cs, iview, + tiling->gmem_offsets[gmem_index], + A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM); + } + + tu6_emit_blit(cmd, cs); +} + +static void +tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +{ + const struct tu_subpass *subpass = cmd->state.subpass; + tu6_emit_blit_scissor(cmd, cs); - uint32_t gmem_index = 0; for (uint32_t i = 0; i < subpass->color_count; ++i) { const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_attachment_state *att = attachments + a; - if (att->pending_clear_aspects) { - assert(att->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - tu6_emit_blit_clear(cmd, cs, iview, - tiling->gmem_offsets[gmem_index++], - &att->clear_value); - } else { - tu6_emit_blit_info(cmd, cs, iview, - tiling->gmem_offsets[gmem_index++], - A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM); - } - - tu6_emit_blit(cmd, cs); + if (a != VK_ATTACHMENT_UNUSED) + tu6_emit_tile_load_attachment(cmd, cs, a, i); } - /* load/clear zs? */ + const uint32_t a = subpass->depth_stencil_attachment.attachment; + if (a != VK_ATTACHMENT_UNUSED) + tu6_emit_tile_load_attachment(cmd, cs, a, subpass->color_count); } static void @@ -800,24 +836,38 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_marker(cmd, cs); tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); - tu_cs_emit(cs, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); + tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); tu6_emit_marker(cmd, cs); tu6_emit_blit_scissor(cmd, cs); - uint32_t gmem_index = 0; for (uint32_t i = 0; i < cmd->state.subpass->color_count; ++i) { uint32_t a = cmd->state.subpass->color_attachments[i].attachment; if (a == VK_ATTACHMENT_UNUSED) continue; const struct tu_image_view *iview = fb->attachments[a].attachment; - tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[gmem_index++], + tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[i], 0); + tu6_emit_blit(cmd, cs); + } + + const uint32_t a = cmd->state.subpass->depth_stencil_attachment.attachment; + if (a != VK_ATTACHMENT_UNUSED) { + const struct tu_image_view *iview = fb->attachments[a].attachment; + tu6_emit_blit_info(cmd, cs, iview, + tiling->gmem_offsets[cmd->state.subpass->color_count], 0); tu6_emit_blit(cmd, cs); } } +static void +tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index) +{ + tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1); + tu_cs_emit(cs, restart_index); +} + static void tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { @@ -859,13 +909,13 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8109, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401); tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8810, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0); @@ -888,8 +938,8 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B07, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0); @@ -1033,7 +1083,8 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_tile *tile) { - VkResult result = tu_cs_reserve_space(cmd->device, cs, 64); + const uint32_t render_tile_space = 64 + tu_cs_get_call_size(&cmd->draw_cs); + VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space); if (result != VK_SUCCESS) { cmd->record_result = result; return; @@ -1042,8 +1093,7 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, tu6_emit_tile_select(cmd, cs, tile); tu_cs_emit_ib(cs, &cmd->state.tile_load_ib); - /* draw IB? */ - + tu_cs_emit_call(cs, &cmd->draw_cs); cmd->wait_for_idle = true; tu_cs_emit_ib(cs, &cmd->state.tile_store_ib); @@ -1054,6 +1104,9 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, static void tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { + const struct tu_subpass *subpass = cmd->state.subpass; + const struct tu_framebuffer *fb = cmd->state.framebuffer; + VkResult result = tu_cs_reserve_space(cmd->device, cs, 16); if (result != VK_SUCCESS) { cmd->record_result = result; @@ -1067,6 +1120,31 @@ tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true); + if (subpass->has_resolve) { + for (uint32_t i = 0; i < subpass->color_count; ++i) { + struct tu_subpass_attachment src_att = subpass->color_attachments[i]; + struct tu_subpass_attachment dst_att = subpass->resolve_attachments[i]; + + if (dst_att.attachment == VK_ATTACHMENT_UNUSED) + continue; + + struct tu_image *src_img = fb->attachments[src_att.attachment].attachment->image; + struct tu_image *dst_img = fb->attachments[dst_att.attachment].attachment->image; + + assert(src_img->extent.width == dst_img->extent.width); + assert(src_img->extent.height == dst_img->extent.height); + + tu_bo_list_add(&cmd->bo_list, src_img->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmd->bo_list, dst_img->bo, MSM_SUBMIT_BO_WRITE); + + tu_blit(cmd, &(struct tu_blit) { + .dst = tu_blit_surf_whole(dst_img, 0, 0), + .src = tu_blit_surf_whole(src_img, 0, 0), + .layers = 1, + }); + } + } + tu_cs_sanity_check(cs); } @@ -1148,8 +1226,10 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, for (uint32_t i = 0; i < subpass->color_count; ++i) { const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) + if (a == VK_ATTACHMENT_UNUSED) { + buffer_cpp[buffer_count++] = 0; continue; + } const struct tu_render_pass_attachment *att = &pass->attachments[a]; buffer_cpp[buffer_count++] = @@ -1343,6 +1423,8 @@ tu_create_cmd_buffer(struct tu_device *device, tu_bo_list_init(&cmd_buffer->bo_list); tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096); + tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096); + tu_cs_init(&cmd_buffer->draw_state, TU_CS_MODE_SUB_STREAM, 2048); tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024); *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer); @@ -1370,6 +1452,8 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer) free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs); + tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs); + tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_state); tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs); tu_bo_list_destroy(&cmd_buffer->bo_list); @@ -1385,6 +1469,8 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) tu_bo_list_reset(&cmd_buffer->bo_list); tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs); + tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs); + tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_state); tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs); for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { @@ -1477,7 +1563,7 @@ tu_AllocateCommandBuffers(VkDevice _device, for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - if (!list_empty(&pool->free_cmd_buffers)) { + if (!list_is_empty(&pool->free_cmd_buffers)) { struct tu_cmd_buffer *cmd_buffer = list_first_entry( &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link); @@ -1592,6 +1678,18 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, const VkBuffer *pBuffers, const VkDeviceSize *pOffsets) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + assert(firstBinding + bindingCount <= MAX_VBS); + + for (uint32_t i = 0; i < bindingCount; i++) { + cmd->state.vb.buffers[firstBinding + i] = + tu_buffer_from_handle(pBuffers[i]); + cmd->state.vb.offsets[firstBinding + i] = pOffsets[i]; + } + + /* VB states depend on VkPipelineVertexInputStateCreateInfo */ + cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; } void @@ -1600,6 +1698,31 @@ tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkDeviceSize offset, VkIndexType indexType) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_buffer, buf, buffer); + + /* initialize/update the restart index */ + if (!cmd->state.index_buffer || cmd->state.index_type != indexType) { + struct tu_cs *draw_cs = &cmd->draw_cs; + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_restart_index( + draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff); + + tu_cs_sanity_check(draw_cs); + } + + /* track the BO */ + if (cmd->state.index_buffer != buf) + tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); + + cmd->state.index_buffer = buf; + cmd->state.index_offset = offset; + cmd->state.index_type = indexType; } void @@ -1612,6 +1735,30 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, uint32_t dynamicOffsetCount, const uint32_t *pDynamicOffsets) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout); + unsigned dyn_idx = 0; + + struct tu_descriptor_state *descriptors_state = + tu_get_descriptors_state(cmd_buffer, pipelineBindPoint); + + for (unsigned i = 0; i < descriptorSetCount; ++i) { + unsigned idx = i + firstSet; + TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]); + + descriptors_state->sets[idx] = set; + descriptors_state->valid |= (1u << idx); + + for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) { + unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; + assert(dyn_idx < dynamicOffsetCount); + + descriptors_state->dynamic_buffers[idx] = + set->dynamic_descriptors[j].va + pDynamicOffsets[dyn_idx]; + } + } + + cmd_buffer->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS; } void @@ -1622,6 +1769,8 @@ tu_CmdPushConstants(VkCommandBuffer commandBuffer, uint32_t size, const void *pValues) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + memcpy((void*) cmd_buffer->push_constants + offset, pValues, size); } VkResult @@ -1634,6 +1783,16 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) MSM_SUBMIT_BO_WRITE); } + for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) { + tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + + for (uint32_t i = 0; i < cmd_buffer->draw_state.bo_count; i++) { + tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_state.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) { tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i], MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); @@ -1653,6 +1812,21 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd->state.pipeline = pipeline; + cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE; + break; + case VK_PIPELINE_BIND_POINT_COMPUTE: + tu_finishme("binding compute pipeline"); + break; + default: + unreachable("unrecognized pipeline bind point"); + break; + } } void @@ -1661,6 +1835,19 @@ tu_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport *pViewports) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + assert(firstViewport == 0 && viewportCount == 1); + tu6_emit_viewport(draw_cs, pViewports); + + tu_cs_sanity_check(draw_cs); } void @@ -1669,11 +1856,30 @@ tu_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D *pScissors) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + assert(firstScissor == 0 && scissorCount == 1); + tu6_emit_scissor(draw_cs, pScissors); + + tu_cs_sanity_check(draw_cs); } void tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.dynamic.line_width = lineWidth; + + /* line width depends on VkPipelineRasterizationStateCreateInfo */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH; } void @@ -1682,12 +1888,37 @@ tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasClamp, float depthBiasSlopeFactor) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp, + depthBiasSlopeFactor); + + tu_cs_sanity_check(draw_cs); } void tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *draw_cs = &cmd->draw_cs; + + VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + tu6_emit_blend_constants(draw_cs, blendConstants); + + tu_cs_sanity_check(draw_cs); } void @@ -1702,6 +1933,15 @@ tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd->state.dynamic.stencil_compare_mask.back = compareMask; + + /* the front/back compare masks must be updated together */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; } void @@ -1709,6 +1949,15 @@ tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd->state.dynamic.stencil_write_mask.back = writeMask; + + /* the front/back write masks must be updated together */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; } void @@ -1716,6 +1965,15 @@ tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference) { + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd->state.dynamic.stencil_reference.back = reference; + + /* the front/back references must be updated together */ + cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; } void @@ -1802,7 +2060,7 @@ tu_ResetCommandPool(VkDevice device, void tu_TrimCommandPool(VkDevice device, VkCommandPool commandPool, - VkCommandPoolTrimFlagsKHR flags) + VkCommandPoolTrimFlags flags) { TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); @@ -1837,6 +2095,10 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea); tu_cmd_prepare_tile_load_ib(cmd_buffer); tu_cmd_prepare_tile_store_ib(cmd_buffer); + + /* draw_cs should contain entries only for this render pass */ + assert(!cmd_buffer->draw_cs.entry_count); + tu_cs_begin(&cmd_buffer->draw_cs); } void @@ -1916,9 +2178,660 @@ struct tu_draw_info uint64_t count_buffer_offset; }; +enum tu_draw_state_group_id +{ + TU_DRAW_STATE_PROGRAM, + TU_DRAW_STATE_PROGRAM_BINNING, + TU_DRAW_STATE_VI, + TU_DRAW_STATE_VI_BINNING, + TU_DRAW_STATE_VP, + TU_DRAW_STATE_RAST, + TU_DRAW_STATE_DS, + TU_DRAW_STATE_BLEND, + TU_DRAW_STATE_VS_CONST, + TU_DRAW_STATE_FS_CONST, + TU_DRAW_STATE_VS_TEX, + TU_DRAW_STATE_FS_TEX, + + TU_DRAW_STATE_COUNT, +}; + +struct tu_draw_state_group +{ + enum tu_draw_state_group_id id; + uint32_t enable_mask; + struct tu_cs_entry ib; +}; + +static struct tu_sampler* +sampler_ptr(struct tu_descriptor_state *descriptors_state, + const struct tu_descriptor_map *map, unsigned i) +{ + assert(descriptors_state->valid & (1 << map->set[i])); + + struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]]; + assert(map->binding[i] < set->layout->binding_count); + + const struct tu_descriptor_set_binding_layout *layout = + &set->layout->binding[map->binding[i]]; + + switch (layout->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4]; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS]; + default: + unreachable("unimplemented descriptor type"); + break; + } +} + +static uint32_t* +texture_ptr(struct tu_descriptor_state *descriptors_state, + const struct tu_descriptor_map *map, unsigned i) +{ + assert(descriptors_state->valid & (1 << map->set[i])); + + struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]]; + assert(map->binding[i] < set->layout->binding_count); + + const struct tu_descriptor_set_binding_layout *layout = + &set->layout->binding[map->binding[i]]; + + switch (layout->type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return &set->mapped_ptr[layout->offset / 4]; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return &set->mapped_ptr[layout->offset / 4]; + default: + unreachable("unimplemented descriptor type"); + break; + } +} + +static uint64_t +buffer_ptr(struct tu_descriptor_state *descriptors_state, + const struct tu_descriptor_map *map, + unsigned i) +{ + assert(descriptors_state->valid & (1 << map->set[i])); + + struct tu_descriptor_set *set = descriptors_state->sets[map->set[i]]; + assert(map->binding[i] < set->layout->binding_count); + + const struct tu_descriptor_set_binding_layout *layout = + &set->layout->binding[map->binding[i]]; + + switch (layout->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset]; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 | + set->mapped_ptr[layout->offset / 4]; + default: + unreachable("unimplemented descriptor type"); + break; + } +} + +static inline uint32_t +tu6_stage2opcode(gl_shader_stage type) +{ + switch (type) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + return CP_LOAD_STATE6_GEOM; + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + return CP_LOAD_STATE6_FRAG; + default: + unreachable("bad shader type"); + } +} + +static inline enum a6xx_state_block +tu6_stage2shadersb(gl_shader_stage type) +{ + switch (type) { + case MESA_SHADER_VERTEX: + return SB6_VS_SHADER; + case MESA_SHADER_FRAGMENT: + return SB6_FS_SHADER; + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + return SB6_CS_SHADER; + default: + unreachable("bad shader type"); + return ~0; + } +} + static void -tu_draw(struct tu_cmd_buffer *cmd_buffer, const struct tu_draw_info *info) +tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type, + uint32_t *push_constants) +{ + const struct tu_program_descriptor_linkage *link = + &pipeline->program.link[type]; + const struct ir3_ubo_analysis_state *state = &link->ubo_state; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { + if (state->range[i].start < state->range[i].end) { + uint32_t size = state->range[i].end - state->range[i].start; + uint32_t offset = state->range[i].start; + + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, (16 * link->constlen) - state->range[i].offset); + + if (size == 0) + continue; + + /* things should be aligned to vec4: */ + debug_assert((state->range[i].offset % 16) == 0); + debug_assert((size % 16) == 0); + debug_assert((offset % 16) == 0); + + if (i == 0) { + /* push constants */ + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4)); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + for (unsigned i = 0; i < size / 4; i++) + tu_cs_emit(cs, push_constants[i + offset / 4]); + continue; + } + + uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1); + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); + tu_cs_emit_qw(cs, va + offset); + } + } +} + +static void +tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type) +{ + const struct tu_program_descriptor_linkage *link = + &pipeline->program.link[type]; + + uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos); + uint32_t anum = align(num, 2); + uint32_t i; + + if (!num) + return; + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (2 * anum)); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(link->const_state.offsets.ubo) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(anum/2)); + tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + + for (i = 0; i < num; i++) + tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i)); + + for (; i < anum; i++) { + tu_cs_emit(cs, 0xffffffff); + tu_cs_emit(cs, 0xffffffff); + } +} + +static struct tu_cs_entry +tu6_emit_consts(struct tu_cmd_buffer *cmd, + const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type) +{ + struct tu_cs cs; + tu_cs_begin_sub_stream(cmd->device, &cmd->draw_state, 512, &cs); /* TODO: maximum size? */ + + tu6_emit_user_consts(&cs, pipeline, descriptors_state, type, cmd->push_constants); + tu6_emit_ubos(&cs, pipeline, descriptors_state, type); + + return tu_cs_end_sub_stream(&cmd->draw_state, &cs); +} + +static struct tu_cs_entry +tu6_emit_textures(struct tu_device *device, struct tu_cs *draw_state, + const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type, bool *needs_border) +{ + const struct tu_program_descriptor_linkage *link = + &pipeline->program.link[type]; + + uint32_t size = link->texture_map.num * A6XX_TEX_CONST_DWORDS + + link->sampler_map.num * A6XX_TEX_SAMP_DWORDS; + if (!size) + return (struct tu_cs_entry) {}; + + unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg; + enum a6xx_state_block sb; + + switch (type) { + case MESA_SHADER_VERTEX: + sb = SB6_VS_TEX; + opcode = CP_LOAD_STATE6_GEOM; + tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_VS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT; + break; + case MESA_SHADER_FRAGMENT: + sb = SB6_FS_TEX; + opcode = CP_LOAD_STATE6_FRAG; + tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_FS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT; + break; + case MESA_SHADER_COMPUTE: + sb = SB6_CS_TEX; + opcode = CP_LOAD_STATE6_FRAG; + tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP_LO; + tex_const_reg = REG_A6XX_SP_CS_TEX_CONST_LO; + tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT; + break; + default: + unreachable("bad state block"); + } + + struct tu_cs cs; + tu_cs_begin_sub_stream(device, draw_state, size, &cs); + + for (unsigned i = 0; i < link->texture_map.num; i++) { + uint32_t *ptr = texture_ptr(descriptors_state, &link->texture_map, i); + + for (unsigned j = 0; j < A6XX_TEX_CONST_DWORDS; j++) + tu_cs_emit(&cs, ptr[j]); + } + + for (unsigned i = 0; i < link->sampler_map.num; i++) { + struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i); + + for (unsigned j = 0; j < A6XX_TEX_SAMP_DWORDS; j++) + tu_cs_emit(&cs, sampler->state[j]); + + *needs_border |= sampler->needs_border; + } + + struct tu_cs_entry entry = tu_cs_end_sub_stream(draw_state, &cs); + + uint64_t tex_addr = entry.bo->iova + entry.offset; + uint64_t samp_addr = tex_addr + link->texture_map.num * A6XX_TEX_CONST_DWORDS*4; + + tu_cs_begin_sub_stream(device, draw_state, 64, &cs); + + /* output sampler state: */ + tu_cs_emit_pkt7(&cs, opcode, 3); + tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num)); + tu_cs_emit_qw(&cs, samp_addr); /* SRC_ADDR_LO/HI */ + + tu_cs_emit_pkt4(&cs, tex_samp_reg, 2); + tu_cs_emit_qw(&cs, samp_addr); /* SRC_ADDR_LO/HI */ + + /* emit texture state: */ + tu_cs_emit_pkt7(&cs, opcode, 3); + tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num)); + tu_cs_emit_qw(&cs, tex_addr); /* SRC_ADDR_LO/HI */ + + tu_cs_emit_pkt4(&cs, tex_const_reg, 2); + tu_cs_emit_qw(&cs, tex_addr); /* SRC_ADDR_LO/HI */ + + tu_cs_emit_pkt4(&cs, tex_count_reg, 1); + tu_cs_emit(&cs, link->texture_map.num); + + return tu_cs_end_sub_stream(draw_state, &cs); +} + +static void +tu6_emit_border_color(struct tu_cmd_buffer *cmd, + struct tu_cs *cs) { + const struct tu_pipeline *pipeline = cmd->state.pipeline; + +#define A6XX_BORDER_COLOR_DWORDS (128/4) + uint32_t size = A6XX_BORDER_COLOR_DWORDS * + (pipeline->program.link[MESA_SHADER_VERTEX].sampler_map.num + + pipeline->program.link[MESA_SHADER_FRAGMENT].sampler_map.num) + + A6XX_BORDER_COLOR_DWORDS - 1; /* room for alignment */ + + struct tu_cs border_cs; + tu_cs_begin_sub_stream(cmd->device, &cmd->draw_state, size, &border_cs); + + /* TODO: actually fill with border color */ + for (unsigned i = 0; i < size; i++) + tu_cs_emit(&border_cs, 0); + + struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->draw_state, &border_cs); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2); + tu_cs_emit_qw(cs, align(entry.bo->iova + entry.offset, 128)); +} + +static void +tu6_bind_draw_states(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_draw_info *draw) +{ + const struct tu_pipeline *pipeline = cmd->state.pipeline; + const struct tu_dynamic_state *dynamic = &cmd->state.dynamic; + struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT]; + uint32_t draw_state_group_count = 0; + + struct tu_descriptor_state *descriptors_state = + &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS]; + + VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + /* TODO lrz */ + + uint32_t pc_primitive_cntl = 0; + if (pipeline->ia.primitive_restart && draw->indexed) + pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART; + + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0); + tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); + tu_cs_emit(cs, pc_primitive_cntl); + + if (cmd->state.dirty & + (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) { + tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl, + dynamic->line_width); + } + + if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { + tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front, + dynamic->stencil_compare_mask.back); + } + + if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { + tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front, + dynamic->stencil_write_mask.back); + } + + if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) && + (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { + tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front, + dynamic->stencil_reference.back); + } + + if (cmd->state.dirty & + (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) { + for (uint32_t i = 0; i < pipeline->vi.count; i++) { + const uint32_t binding = pipeline->vi.bindings[i]; + const uint32_t stride = pipeline->vi.strides[i]; + const struct tu_buffer *buf = cmd->state.vb.buffers[binding]; + const VkDeviceSize offset = buf->bo_offset + + cmd->state.vb.offsets[binding] + + pipeline->vi.offsets[i]; + const VkDeviceSize size = + offset < buf->bo->size ? buf->bo->size - offset : 0; + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4); + tu_cs_emit_qw(cs, buf->bo->iova + offset); + tu_cs_emit(cs, size); + tu_cs_emit(cs, stride); + } + } + + if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_PROGRAM, + .enable_mask = 0x6, + .ib = pipeline->program.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_PROGRAM_BINNING, + .enable_mask = 0x1, + .ib = pipeline->program.binning_state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VI, + .enable_mask = 0x6, + .ib = pipeline->vi.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VI_BINNING, + .enable_mask = 0x1, + .ib = pipeline->vi.binning_state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VP, + .enable_mask = 0x7, + .ib = pipeline->vp.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_RAST, + .enable_mask = 0x7, + .ib = pipeline->rast.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_DS, + .enable_mask = 0x7, + .ib = pipeline->ds.state_ib, + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_BLEND, + .enable_mask = 0x7, + .ib = pipeline->blend.state_ib, + }; + } + + if (cmd->state.dirty & + (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DESCRIPTOR_SETS)) { + bool needs_border = false; + + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VS_CONST, + .enable_mask = 0x7, + .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX) + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_FS_CONST, + .enable_mask = 0x6, + .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT) + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_VS_TEX, + .enable_mask = 0x7, + .ib = tu6_emit_textures(cmd->device, &cmd->draw_state, pipeline, + descriptors_state, MESA_SHADER_VERTEX, + &needs_border) + }; + draw_state_groups[draw_state_group_count++] = + (struct tu_draw_state_group) { + .id = TU_DRAW_STATE_FS_TEX, + .enable_mask = 0x6, + .ib = tu6_emit_textures(cmd->device, &cmd->draw_state, pipeline, + descriptors_state, MESA_SHADER_FRAGMENT, + &needs_border) + }; + + if (needs_border) + tu6_emit_border_color(cmd, cs); + } + + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count); + for (uint32_t i = 0; i < draw_state_group_count; i++) { + const struct tu_draw_state_group *group = &draw_state_groups[i]; + + uint32_t cp_set_draw_state = + CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) | + CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) | + CP_SET_DRAW_STATE__0_GROUP_ID(group->id); + uint64_t iova; + if (group->ib.size) { + iova = group->ib.bo->iova + group->ib.offset; + } else { + cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE; + iova = 0; + } + + tu_cs_emit(cs, cp_set_draw_state); + tu_cs_emit_qw(cs, iova); + } + + tu_cs_sanity_check(cs); + + /* track BOs */ + if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { + tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo, + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) { + tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + } + if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) { + for (uint32_t i = 0; i < MAX_VBS; i++) { + const struct tu_buffer *buf = cmd->state.vb.buffers[i]; + if (buf) + tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); + } + } + if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) { + unsigned i; + for_each_bit(i, descriptors_state->valid) { + struct tu_descriptor_set *set = descriptors_state->sets[i]; + for (unsigned j = 0; j < set->layout->buffer_count; ++j) + if (set->descriptors[j]) { + tu_bo_list_add(&cmd->bo_list, set->descriptors[j], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); + } + } + } + cmd->state.dirty = 0; +} + +static void +tu6_emit_draw_direct(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_draw_info *draw) +{ + + const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype; + + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2); + tu_cs_emit(cs, draw->vertex_offset); + tu_cs_emit(cs, draw->first_instance); + + /* TODO hw binning */ + if (draw->indexed) { + const enum a4xx_index_size index_size = + tu6_index_size(cmd->state.index_type); + const uint32_t index_bytes = + (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2; + const struct tu_buffer *buf = cmd->state.index_buffer; + const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset + + index_bytes * draw->first_index; + const uint32_t size = index_bytes * draw->count; + + const uint32_t cp_draw_indx = + CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | + CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | + CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) | + CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; + + tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7); + tu_cs_emit(cs, cp_draw_indx); + tu_cs_emit(cs, draw->instance_count); + tu_cs_emit(cs, draw->count); + tu_cs_emit(cs, 0x0); /* XXX */ + tu_cs_emit_qw(cs, buf->bo->iova + offset); + tu_cs_emit(cs, size); + } else { + const uint32_t cp_draw_indx = + CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | + CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | + CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; + + tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); + tu_cs_emit(cs, cp_draw_indx); + tu_cs_emit(cs, draw->instance_count); + tu_cs_emit(cs, draw->count); + } +} + +static void +tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw) +{ + struct tu_cs *cs = &cmd->draw_cs; + + tu6_bind_draw_states(cmd, cs, draw); + + VkResult result = tu_cs_reserve_space(cmd->device, cs, 32); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } + + if (draw->indirect) { + tu_finishme("indirect draw"); + return; + } + + /* TODO tu6_emit_marker should pick different regs depending on cs */ + tu6_emit_marker(cmd, cs); + tu6_emit_draw_direct(cmd, cs, draw); + tu6_emit_marker(cmd, cs); + + cmd->wait_for_idle = true; + + tu_cs_sanity_check(cs); } void @@ -2080,8 +2993,13 @@ tu_CmdEndRenderPass(VkCommandBuffer commandBuffer) { TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + tu_cs_end(&cmd_buffer->draw_cs); + tu_cmd_render_tiles(cmd_buffer); + /* discard draw_cs entries now that the tiles are rendered */ + tu_cs_discard_entries(&cmd_buffer->draw_cs); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); cmd_buffer->state.attachments = NULL;