From 233610f8cf8d8810173a7eac0c046574aeb485f9 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Sun, 14 Jun 2020 10:52:37 -0400 Subject: [PATCH] turnip: refactor draw states and dynamic states This reworks dynamic states to use draw states, and reworks draw states. This moves towards doing as little as possible in bind_draw_states. Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_clear_blit.c | 18 +- src/freedreno/vulkan/tu_cmd_buffer.c | 457 +++++++++++++-------------- src/freedreno/vulkan/tu_pipeline.c | 249 ++++++--------- src/freedreno/vulkan/tu_private.h | 157 ++++----- 4 files changed, 381 insertions(+), 500 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 54d12c5bfb0..ab35bc60f14 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -2081,6 +2081,17 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, /* This clear path behaves like a draw, needs the same flush as tu_draw */ tu_emit_cache_flush_renderpass(cmd, cs); + /* disable all draw states so they don't interfere + * TODO: use and re-use draw states for this path + */ + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE; + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | @@ -2167,13 +2178,6 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, } } } - - cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE | - TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | - TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | - TU_CMD_DIRTY_DYNAMIC_VIEWPORT | - TU_CMD_DIRTY_DYNAMIC_SCISSOR; } /** diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index b050550cddb..affd1b3e9db 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -686,6 +686,58 @@ tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1) A6XX_SP_TP_WINDOW_OFFSET(.x = x1, .y = y1)); } +static void +tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) +{ + uint32_t enable_mask; + switch (id) { + case TU_DRAW_STATE_PROGRAM: + case TU_DRAW_STATE_VI: + case TU_DRAW_STATE_FS_CONST: + /* The blob seems to not enable this (DESC_SETS_LOAD) for binning, even + * when resources would actually be used in the binning shader. + * Presumably the overhead of prefetching the resources isn't + * worth it. + */ + case TU_DRAW_STATE_DESC_SETS_LOAD: + enable_mask = CP_SET_DRAW_STATE__0_GMEM | + CP_SET_DRAW_STATE__0_SYSMEM; + break; + case TU_DRAW_STATE_PROGRAM_BINNING: + case TU_DRAW_STATE_VI_BINNING: + enable_mask = CP_SET_DRAW_STATE__0_BINNING; + break; + case TU_DRAW_STATE_DESC_SETS_GMEM: + enable_mask = CP_SET_DRAW_STATE__0_GMEM; + break; + case TU_DRAW_STATE_DESC_SETS_SYSMEM: + enable_mask = CP_SET_DRAW_STATE__0_BINNING | + CP_SET_DRAW_STATE__0_SYSMEM; + break; + default: + enable_mask = CP_SET_DRAW_STATE__0_GMEM | + CP_SET_DRAW_STATE__0_SYSMEM | + CP_SET_DRAW_STATE__0_BINNING; + break; + } + + tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(state.size) | + enable_mask | + CP_SET_DRAW_STATE__0_GROUP_ID(id) | + COND(!state.size, CP_SET_DRAW_STATE__0_DISABLE)); + tu_cs_emit_qw(cs, state.iova); +} + +/* note: get rid of this eventually */ +static void +tu_cs_emit_sds_ib(struct tu_cs *cs, uint32_t id, struct tu_cs_entry entry) +{ + tu_cs_emit_draw_state(cs, id, (struct tu_draw_state) { + .iova = entry.size ? entry.bo->iova + entry.offset : 0, + .size = entry.size / 4, + }); +} + static bool use_hw_binning(struct tu_cmd_buffer *cmd) { @@ -1987,6 +2039,28 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) return cmd_buffer->record_result; } +static struct tu_cs +tu_cmd_dynamic_state(struct tu_cmd_buffer *cmd, uint32_t id, uint32_t size) +{ + struct ts_cs_memory memory; + struct tu_cs cs; + + /* TODO: share this logic with tu_pipeline_static_state */ + tu_cs_alloc(&cmd->sub_cs, size, 1, &memory); + tu_cs_init_external(&cs, memory.map, memory.map + size); + tu_cs_begin(&cs); + tu_cs_reserve_space(&cs, size); + + assert(id < ARRAY_SIZE(cmd->state.dynamic_state)); + cmd->state.dynamic_state[id].iova = memory.iova; + cmd->state.dynamic_state[id].size = size; + + tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DYNAMIC + id, cmd->state.dynamic_state[id]); + + return cs; +} + void tu_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -2011,7 +2085,23 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS); cmd->state.pipeline = pipeline; - cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_SHADER_CONSTS; + cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS; + + struct tu_cs *cs = &cmd->draw_cs; + uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT); + uint32_t i; + + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask))); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib); + + for_each_bit(i, mask) + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]); /* If the new pipeline requires more VBs than we had previously set up, we * need to re-emit them in SDS. If it requires the same set or fewer, we @@ -2023,6 +2113,18 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, /* If the pipeline needs a dynamic descriptor, re-emit descriptor sets */ if (pipeline->layout->dynamic_offset_count + pipeline->layout->input_attachment_count) cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS; + + /* dynamic linewidth state depends pipeline state's gras_su_cntl + * so the dynamic state ib must be updated when pipeline changes + */ + if (pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_LINE_WIDTH)) { + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2); + + cmd->state.dynamic_gras_su_cntl &= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; + cmd->state.dynamic_gras_su_cntl |= pipeline->gras_su_cntl; + + tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl)); + } } void @@ -2032,10 +2134,11 @@ tu_CmdSetViewport(VkCommandBuffer commandBuffer, const VkViewport *pViewports) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 18); assert(firstViewport == 0 && viewportCount == 1); - cmd->state.dynamic.viewport.viewports[0] = pViewports[0]; - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_VIEWPORT; + + tu6_emit_viewport(&cs, pViewports); } void @@ -2045,21 +2148,23 @@ tu_CmdSetScissor(VkCommandBuffer commandBuffer, const VkRect2D *pScissors) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 3); assert(firstScissor == 0 && scissorCount == 1); - cmd->state.dynamic.scissor.scissors[0] = pScissors[0]; - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_SCISSOR; + + tu6_emit_scissor(&cs, pScissors); } void tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_LINE_WIDTH, 2); - cmd->state.dynamic.line_width = lineWidth; + cmd->state.dynamic_gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; + cmd->state.dynamic_gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f); - /* line width depends on VkPipelineRasterizationStateCreateInfo */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH; + tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl)); } void @@ -2069,12 +2174,9 @@ tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasSlopeFactor) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *draw_cs = &cmd->draw_cs; + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BIAS, 4); - tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp, - depthBiasSlopeFactor); - - tu_cs_sanity_check(draw_cs); + tu6_emit_depth_bias(&cs, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); } void @@ -2082,11 +2184,10 @@ tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *draw_cs = &cmd->draw_cs; - - tu6_emit_blend_constants(draw_cs, blendConstants); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5); - tu_cs_sanity_check(draw_cs); + tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4); + tu_cs_emit_array(&cs, (const uint32_t *) blendConstants, 4); } void @@ -2096,20 +2197,26 @@ tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer, { } +static void +update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask) +{ + if (face & VK_STENCIL_FACE_FRONT_BIT) + *value |= A6XX_RB_STENCILMASK_MASK(mask); + if (face & VK_STENCIL_FACE_BACK_BIT) + *value |= A6XX_RB_STENCILMASK_BFMASK(mask); +} + void tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2); - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd->state.dynamic.stencil_compare_mask.front = compareMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd->state.dynamic.stencil_compare_mask.back = compareMask; + update_stencil_mask(&cmd->state.dynamic_stencil_mask, faceMask, compareMask); - /* the front/back compare masks must be updated together */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; + tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.dword = cmd->state.dynamic_stencil_mask)); } void @@ -2118,14 +2225,11 @@ tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, uint32_t writeMask) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2); - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd->state.dynamic.stencil_write_mask.front = writeMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd->state.dynamic.stencil_write_mask.back = writeMask; + update_stencil_mask(&cmd->state.dynamic_stencil_wrmask, faceMask, writeMask); - /* the front/back write masks must be updated together */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; + tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = cmd->state.dynamic_stencil_wrmask)); } void @@ -2134,14 +2238,11 @@ tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, uint32_t reference) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2); - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd->state.dynamic.stencil_reference.front = reference; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd->state.dynamic.stencil_reference.back = reference; + update_stencil_mask(&cmd->state.dynamic_stencil_ref, faceMask, reference); - /* the front/back references must be updated together */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; + tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.dword = cmd->state.dynamic_stencil_ref)); } void @@ -2149,8 +2250,11 @@ tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, const VkSampleLocationsInfoEXT* pSampleLocationsInfo) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 9); - tu6_emit_sample_locations(&cmd->draw_cs, pSampleLocationsInfo); + assert(pSampleLocationsInfo); + + tu6_emit_sample_locations(&cs, pSampleLocationsInfo); } static void @@ -2578,6 +2682,8 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, tu_bo_list_add(&cmd->bo_list, iview->image->bo, MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); } + + cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE; } void @@ -2701,39 +2807,6 @@ struct tu_draw_info uint64_t streamout_buffer_offset; }; -#define ENABLE_ALL (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM) -#define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM) -#define ENABLE_NON_GMEM (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_SYSMEM) - -enum tu_draw_state_group_id -{ - TU_DRAW_STATE_PROGRAM, - TU_DRAW_STATE_PROGRAM_BINNING, - TU_DRAW_STATE_VB, - TU_DRAW_STATE_VI, - TU_DRAW_STATE_VI_BINNING, - TU_DRAW_STATE_VP, - TU_DRAW_STATE_RAST, - TU_DRAW_STATE_DS, - TU_DRAW_STATE_BLEND, - TU_DRAW_STATE_VS_CONST, - TU_DRAW_STATE_GS_CONST, - TU_DRAW_STATE_FS_CONST, - TU_DRAW_STATE_DESC_SETS, - TU_DRAW_STATE_DESC_SETS_GMEM, - TU_DRAW_STATE_DESC_SETS_LOAD, - TU_DRAW_STATE_VS_PARAMS, - - TU_DRAW_STATE_COUNT, -}; - -struct tu_draw_state_group -{ - enum tu_draw_state_group_id id; - uint32_t enable_mask; - struct tu_cs_entry ib; -}; - static void tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, struct tu_descriptor_state *descriptors_state, @@ -3088,9 +3161,6 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw) { const struct tu_pipeline *pipeline = cmd->state.pipeline; - const struct tu_dynamic_state *dynamic = &cmd->state.dynamic; - struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT]; - uint32_t draw_state_group_count = 0; VkResult result; struct tu_descriptor_state *descriptors_state = @@ -3102,120 +3172,13 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, A6XX_PC_PRIMITIVE_CNTL_0(.primitive_restart = pipeline->ia.primitive_restart && draw->indexed)); - if (cmd->state.dirty & - (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) { - tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl, - dynamic->line_width); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { - tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front, - dynamic->stencil_compare_mask.back); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { - tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front, - dynamic->stencil_write_mask.back); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { - tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front, - dynamic->stencil_reference.back); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_VIEWPORT) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) { - tu6_emit_viewport(cs, &cmd->state.dynamic.viewport.viewports[0]); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_SCISSOR) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) { - tu6_emit_scissor(cs, &cmd->state.dynamic.scissor.scissors[0]); - } - - if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_PROGRAM, - .enable_mask = ENABLE_DRAW, - .ib = pipeline->program.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_PROGRAM_BINNING, - .enable_mask = CP_SET_DRAW_STATE__0_BINNING, - .ib = pipeline->program.binning_state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VI, - .enable_mask = ENABLE_DRAW, - .ib = pipeline->vi.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VI_BINNING, - .enable_mask = CP_SET_DRAW_STATE__0_BINNING, - .ib = pipeline->vi.binning_state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VP, - .enable_mask = ENABLE_ALL, - .ib = pipeline->vp.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_RAST, - .enable_mask = ENABLE_ALL, - .ib = pipeline->rast.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_DS, - .enable_mask = ENABLE_ALL, - .ib = pipeline->ds.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_BLEND, - .enable_mask = ENABLE_ALL, - .ib = pipeline->blend.state_ib, - }; - } - if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) { - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VS_CONST, - .enable_mask = ENABLE_ALL, - .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX) - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_GS_CONST, - .enable_mask = ENABLE_ALL, - .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_GEOMETRY) - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_FS_CONST, - .enable_mask = ENABLE_DRAW, - .ib = tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT) - }; - } - - if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) { - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VB, - .enable_mask = ENABLE_ALL, - .ib = tu6_emit_vertex_buffers(cmd, pipeline) - }; + cmd->state.shader_const_ib[MESA_SHADER_VERTEX] = + tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_VERTEX); + cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY] = + tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_GEOMETRY); + cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT] = + tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT); } if (cmd->state.dirty & TU_CMD_DIRTY_STREAMOUT_BUFFERS) @@ -3234,35 +3197,26 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, * could also only re-emit dynamic state. */ if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) { - struct tu_cs_entry desc_sets, desc_sets_gmem; bool need_gmem_desc_set = pipeline->layout->input_attachment_count > 0; result = tu6_emit_descriptor_sets(cmd, pipeline, VK_PIPELINE_BIND_POINT_GRAPHICS, - &desc_sets, false); + &cmd->state.desc_sets_ib, false); if (result != VK_SUCCESS) return result; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_DESC_SETS, - .enable_mask = need_gmem_desc_set ? ENABLE_NON_GMEM : ENABLE_ALL, - .ib = desc_sets, - }; - if (need_gmem_desc_set) { + cmd->state.desc_sets_sysmem_ib = cmd->state.desc_sets_ib; + cmd->state.desc_sets_ib.size = 0; + result = tu6_emit_descriptor_sets(cmd, pipeline, VK_PIPELINE_BIND_POINT_GRAPHICS, - &desc_sets_gmem, true); + &cmd->state.desc_sets_gmem_ib, true); if (result != VK_SUCCESS) return result; - - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_DESC_SETS_GMEM, - .enable_mask = CP_SET_DRAW_STATE__0_GMEM, - .ib = desc_sets_gmem, - }; + } else { + cmd->state.desc_sets_gmem_ib.size = 0; + cmd->state.desc_sets_sysmem_ib.size = 0; } /* We need to reload the descriptors every time the descriptor sets @@ -3286,52 +3240,79 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, tu_cs_emit_array(&load_cs, (uint32_t *)((char *)load_entry->bo->map + load_entry->offset), load_entry->size / 4); - struct tu_cs_entry load_copy = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs); - - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_DESC_SETS_LOAD, - /* The blob seems to not enable this for binning, even when - * resources would actually be used in the binning shader. - * Presumably the overhead of prefetching the resources isn't - * worth it. - */ - .enable_mask = ENABLE_DRAW, - .ib = load_copy, - }; + cmd->state.desc_sets_load_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs); + } else { + cmd->state.desc_sets_load_ib.size = 0; } } + if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) + cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline); + struct tu_cs_entry vs_params; result = tu6_emit_vs_params(cmd, draw, &vs_params); if (result != VK_SUCCESS) return result; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VS_PARAMS, - .enable_mask = ENABLE_ALL, - .ib = vs_params, - }; - - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count); - for (uint32_t i = 0; i < draw_state_group_count; i++) { - const struct tu_draw_state_group *group = &draw_state_groups[i]; - debug_assert((group->enable_mask & ~ENABLE_ALL) == 0); - uint32_t cp_set_draw_state = - CP_SET_DRAW_STATE__0_COUNT(group->ib.size / 4) | - group->enable_mask | - CP_SET_DRAW_STATE__0_GROUP_ID(group->id); - uint64_t iova; - if (group->ib.size) { - iova = group->ib.bo->iova + group->ib.offset; - } else { - cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE; - iova = 0; + /* for the first draw in a renderpass, re-emit all the draw states + * + * and if a draw-state disabling path (CmdClearAttachments 3D fallback) was + * used, then draw states must be re-emitted. note however this only happens + * in the sysmem path, so this can be skipped this for the gmem path (TODO) + */ + if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE) { + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * TU_DRAW_STATE_COUNT); + + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI, pipeline->vi.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params); + + for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) { + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, + ((pipeline->dynamic_state_mask & BIT(i)) ? + cmd->state.dynamic_state[i] : + pipeline->dynamic_state[i])); } + } else { - tu_cs_emit(cs, cp_set_draw_state); - tu_cs_emit_qw(cs, iova); + /* emit draw states that were just updated + * note we eventually don't want to have to emit anything here + */ + uint32_t draw_state_count = + ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 3 : 0) + + ((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 4 : 0) + + ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) + + 1; /* vs_params */ + + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_count); + + if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) { + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_CONST, cmd->state.shader_const_ib[MESA_SHADER_VERTEX]); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]); + } + if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) { + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_GMEM, cmd->state.desc_sets_gmem_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_SYSMEM, cmd->state.desc_sets_sysmem_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib); + } + if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VS_PARAMS, vs_params); } tu_cs_sanity_check(cs); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index d818f9f807c..e22b301099b 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -281,36 +281,6 @@ struct tu_pipeline_builder uint32_t render_components; }; -static enum tu_dynamic_state_bits -tu_dynamic_state_bit(VkDynamicState state) -{ - switch (state) { - case VK_DYNAMIC_STATE_VIEWPORT: - return TU_DYNAMIC_VIEWPORT; - case VK_DYNAMIC_STATE_SCISSOR: - return TU_DYNAMIC_SCISSOR; - case VK_DYNAMIC_STATE_LINE_WIDTH: - return TU_DYNAMIC_LINE_WIDTH; - case VK_DYNAMIC_STATE_DEPTH_BIAS: - return TU_DYNAMIC_DEPTH_BIAS; - case VK_DYNAMIC_STATE_BLEND_CONSTANTS: - return TU_DYNAMIC_BLEND_CONSTANTS; - case VK_DYNAMIC_STATE_DEPTH_BOUNDS: - return TU_DYNAMIC_DEPTH_BOUNDS; - case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: - return TU_DYNAMIC_STENCIL_COMPARE_MASK; - case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: - return TU_DYNAMIC_STENCIL_WRITE_MASK; - case VK_DYNAMIC_STATE_STENCIL_REFERENCE: - return TU_DYNAMIC_STENCIL_REFERENCE; - case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: - return TU_DYNAMIC_SAMPLE_LOCATIONS; - default: - unreachable("invalid dynamic state"); - return 0; - } -} - static bool tu_logic_op_reads_dst(VkLogicOp op) { @@ -1645,22 +1615,6 @@ tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp tu_cs_emit(cs, sample_locations); } -static void -tu6_emit_gras_unknowns(struct tu_cs *cs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1); - tu_cs_emit(cs, 0x0); -} - -static void -tu6_emit_point_size(struct tu_cs *cs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2); - tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) | - A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f)); - tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f).value); -} - static uint32_t tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, VkSampleCountFlagBits samples) @@ -1686,18 +1640,6 @@ tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, return gras_su_cntl; } -void -tu6_emit_gras_su_cntl(struct tu_cs *cs, - uint32_t gras_su_cntl, - float line_width) -{ - assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0); - gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1); - tu_cs_emit(cs, gras_su_cntl); -} - void tu6_emit_depth_bias(struct tu_cs *cs, float constant_factor, @@ -1710,13 +1652,6 @@ tu6_emit_depth_bias(struct tu_cs *cs, tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value); } -static void -tu6_emit_alpha_control_disable(struct tu_cs *cs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1); - tu_cs_emit(cs, 0); -} - static void tu6_emit_depth_control(struct tu_cs *cs, const VkPipelineDepthStencilStateCreateInfo *ds_info, @@ -1768,30 +1703,6 @@ tu6_emit_stencil_control(struct tu_cs *cs, tu_cs_emit(cs, rb_stencil_control); } -void -tu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1); - tu_cs_emit( - cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back)); -} - -void -tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1); - tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) | - A6XX_RB_STENCILWRMASK_BFWRMASK(back)); -} - -void -tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1); - tu_cs_emit(cs, - A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back)); -} - static uint32_t tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att, bool has_alpha) @@ -1912,13 +1823,6 @@ tu6_emit_blend_control(struct tu_cs *cs, .alpha_to_one = msaa_info->alphaToOneEnable)); } -void -tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4); - tu_cs_emit_array(cs, (const uint32_t *) constants, 4); -} - static VkResult tu_pipeline_create(struct tu_device *dev, struct tu_pipeline_layout *layout, @@ -2095,8 +1999,18 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, return; for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { - pipeline->dynamic_state.mask |= - tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]); + VkDynamicState state = dynamic_info->pDynamicStates[i]; + switch (state) { + case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: + pipeline->dynamic_state_mask |= BIT(state); + break; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS); + break; + default: + assert(!"unsupported dynamic state"); + break; + } } } @@ -2186,6 +2100,27 @@ tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder, pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable; } +static bool +tu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs, + uint32_t id, uint32_t size) +{ + struct ts_cs_memory memory; + + if (pipeline->dynamic_state_mask & BIT(id)) + return false; + + /* TODO: share this logc with tu_cmd_dynamic_state */ + tu_cs_alloc(&pipeline->cs, size, 1, &memory); + tu_cs_init_external(cs, memory.map, memory.map + size); + tu_cs_begin(cs); + tu_cs_reserve_space(cs, size); + + assert(id < ARRAY_SIZE(pipeline->dynamic_state)); + pipeline->dynamic_state[id].iova = memory.iova; + pipeline->dynamic_state[id].size = size; + return true; +} + static void tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -2204,20 +2139,13 @@ tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, const VkPipelineViewportStateCreateInfo *vp_info = builder->create_info->pViewportState; - struct tu_cs vp_cs; - tu_cs_begin_sub_stream(&pipeline->cs, 21, &vp_cs); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) { - assert(vp_info->viewportCount == 1); - tu6_emit_viewport(&vp_cs, vp_info->pViewports); - } + struct tu_cs cs; - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) { - assert(vp_info->scissorCount == 1); - tu6_emit_scissor(&vp_cs, vp_info->pScissors); - } + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 18)) + tu6_emit_viewport(&cs, vp_info->pViewports); - pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs); + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 3)) + tu6_emit_scissor(&cs, vp_info->pScissors); } static void @@ -2229,11 +2157,10 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL); - struct tu_cs rast_cs; - tu_cs_begin_sub_stream(&pipeline->cs, 20, &rast_cs); - + struct tu_cs cs; + tu_cs_begin_sub_stream(&pipeline->cs, 7, &cs); - tu_cs_emit_regs(&rast_cs, + tu_cs_emit_regs(&cs, A6XX_GRAS_CL_CNTL( .znear_clip_disable = rast_info->depthClampEnable, .zfar_clip_disable = rast_info->depthClampEnable, @@ -2241,24 +2168,28 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, .zero_gb_scale_z = 1, .vp_clip_code_ignore = 1)); /* move to hw ctx init? */ - tu6_emit_gras_unknowns(&rast_cs); - tu6_emit_point_size(&rast_cs); + tu_cs_emit_regs(&cs, A6XX_GRAS_UNKNOWN_8001()); + tu_cs_emit_regs(&cs, + A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f), + A6XX_GRAS_SU_POINT_SIZE(1.0f)); + + pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs); - const uint32_t gras_su_cntl = + pipeline->gras_su_cntl = tu6_gras_su_cntl(rast_info, builder->samples); - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) - tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth); + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_LINE_WIDTH, 2)) { + pipeline->gras_su_cntl |= + A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f); + tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl)); + } - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) { - tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor, + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) { + tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor, rast_info->depthBiasClamp, rast_info->depthBiasSlopeFactor); } - pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs); - - pipeline->rast.gras_su_cntl = gras_su_cntl; } static void @@ -2286,30 +2217,31 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, builder->depth_attachment_format != VK_FORMAT_S8_UINT ? ds_info : &dummy_ds_info; - struct tu_cs ds_cs; - tu_cs_begin_sub_stream(&pipeline->cs, 12, &ds_cs); + struct tu_cs cs; + tu_cs_begin_sub_stream(&pipeline->cs, 6, &cs); /* move to hw ctx init? */ - tu6_emit_alpha_control_disable(&ds_cs); - - tu6_emit_depth_control(&ds_cs, ds_info_depth, + tu_cs_emit_regs(&cs, A6XX_RB_ALPHA_CONTROL()); + tu6_emit_depth_control(&cs, ds_info_depth, builder->create_info->pRasterizationState); - tu6_emit_stencil_control(&ds_cs, ds_info); + tu6_emit_stencil_control(&cs, ds_info); - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { - tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask, - ds_info->back.compareMask); - } - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { - tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask, - ds_info->back.writeMask); + pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs); + + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) { + tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff, + .bfmask = ds_info->back.compareMask & 0xff)); } - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { - tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference, - ds_info->back.reference); + + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) { + tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.wrmask = ds_info->front.writeMask & 0xff, + .bfwrmask = ds_info->back.writeMask & 0xff)); } - pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs); + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) { + tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff, + .bfref = ds_info->back.reference & 0xff)); + } } static void @@ -2342,32 +2274,35 @@ tu_pipeline_builder_parse_multisample_and_color_blend( builder->use_color_attachments ? builder->create_info->pColorBlendState : &dummy_blend_info; - struct tu_cs blend_cs; - tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 18, &blend_cs); + struct tu_cs cs; + tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 4, &cs); uint32_t blend_enable_mask; - tu6_emit_rb_mrt_controls(&blend_cs, blend_info, + tu6_emit_rb_mrt_controls(&cs, blend_info, builder->color_attachment_formats, &blend_enable_mask); - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS)) - tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) { - const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = - vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); - const VkSampleLocationsInfoEXT *samp_loc = NULL; + tu6_emit_blend_control(&cs, blend_enable_mask, + builder->use_dual_src_blend, msaa_info); - if (sample_locations && sample_locations->sampleLocationsEnable) - samp_loc = &sample_locations->sampleLocationsInfo; + pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs); - tu6_emit_sample_locations(&blend_cs, samp_loc); + if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) { + tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4); + tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4); } - tu6_emit_blend_control(&blend_cs, blend_enable_mask, - builder->use_dual_src_blend, msaa_info); + const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = + vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); + const VkSampleLocationsInfoEXT *samp_loc = NULL; + + if (sample_locations && sample_locations->sampleLocationsEnable) + samp_loc = &sample_locations->sampleLocationsInfo; - pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs); + if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, + samp_loc ? 9 : 6)) { + tu6_emit_sample_locations(&cs, samp_loc); + } } static void diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 527bf817bbc..ddb25677b48 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -129,6 +129,7 @@ tu_minify(uint32_t n, uint32_t levels) }) #define COND(bool, val) ((bool) ? (val) : 0) +#define BIT(bit) (1u << (bit)) /* Whenever we generate an error, pass it through this function. Useful for * debugging, where we can break on it. Only call at error site, not when @@ -409,6 +410,42 @@ struct ts_cs_memory { uint64_t iova; }; +struct tu_draw_state { + uint64_t iova : 48; + uint32_t size : 16; +}; + +enum tu_dynamic_state +{ + /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */ + TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1, + TU_DYNAMIC_STATE_COUNT, +}; + +enum tu_draw_state_group_id +{ + TU_DRAW_STATE_PROGRAM, + TU_DRAW_STATE_PROGRAM_BINNING, + TU_DRAW_STATE_VB, + TU_DRAW_STATE_VI, + TU_DRAW_STATE_VI_BINNING, + TU_DRAW_STATE_RAST, + TU_DRAW_STATE_DS, + TU_DRAW_STATE_BLEND, + TU_DRAW_STATE_VS_CONST, + TU_DRAW_STATE_GS_CONST, + TU_DRAW_STATE_FS_CONST, + TU_DRAW_STATE_DESC_SETS, + TU_DRAW_STATE_DESC_SETS_GMEM, + TU_DRAW_STATE_DESC_SETS_SYSMEM, + TU_DRAW_STATE_DESC_SETS_LOAD, + TU_DRAW_STATE_VS_PARAMS, + + /* dynamic state related draw states */ + TU_DRAW_STATE_DYNAMIC, + TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT, +}; + enum tu_cs_mode { @@ -578,73 +615,12 @@ tu_buffer_iova(struct tu_buffer *buffer) return buffer->bo->iova + buffer->bo_offset; } -enum tu_dynamic_state_bits -{ - TU_DYNAMIC_VIEWPORT = 1 << 0, - TU_DYNAMIC_SCISSOR = 1 << 1, - TU_DYNAMIC_LINE_WIDTH = 1 << 2, - TU_DYNAMIC_DEPTH_BIAS = 1 << 3, - TU_DYNAMIC_BLEND_CONSTANTS = 1 << 4, - TU_DYNAMIC_DEPTH_BOUNDS = 1 << 5, - TU_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, - TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, - TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8, - TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - TU_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, - TU_DYNAMIC_ALL = (1 << 11) - 1, -}; - struct tu_vertex_binding { struct tu_buffer *buffer; VkDeviceSize offset; }; -struct tu_viewport_state -{ - uint32_t count; - VkViewport viewports[MAX_VIEWPORTS]; -}; - -struct tu_scissor_state -{ - uint32_t count; - VkRect2D scissors[MAX_SCISSORS]; -}; - -struct tu_dynamic_state -{ - /** - * Bitmask of (1 << VK_DYNAMIC_STATE_*). - * Defines the set of saved dynamic state. - */ - uint32_t mask; - - struct tu_viewport_state viewport; - - struct tu_scissor_state scissor; - - float line_width; - - struct - { - uint32_t front; - uint32_t back; - } stencil_compare_mask; - - struct - { - uint32_t front; - uint32_t back; - } stencil_write_mask; - - struct - { - uint32_t front; - uint32_t back; - } stencil_reference; -}; - const char * tu_get_debug_option_name(int id); @@ -693,21 +669,14 @@ struct tu_tiling_config enum tu_cmd_dirty_bits { - TU_CMD_DIRTY_PIPELINE = 1 << 0, TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1, TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2, - TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3, TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4, TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5, TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6, - - TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16, - TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17, - TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 18, - TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19, - TU_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 20, - TU_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 21, + /* all draw states were disabled and need to be re-enabled: */ + TU_CMD_DIRTY_DRAW_STATE = 1 << 7, }; struct tu_streamout_state { @@ -842,7 +811,17 @@ struct tu_cmd_state VkDeviceSize offsets[MAX_VBS]; } vb; - struct tu_dynamic_state dynamic; + /* for dynamic states that can't be emitted directly */ + uint32_t dynamic_stencil_mask; + uint32_t dynamic_stencil_wrmask; + uint32_t dynamic_stencil_ref; + uint32_t dynamic_gras_su_cntl; + + /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ + struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; + struct tu_cs_entry vertex_buffers_ib; + struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES]; + struct tu_cs_entry desc_sets_ib, desc_sets_gmem_ib, desc_sets_sysmem_ib, desc_sets_load_ib; /* Stream output buffers */ struct @@ -1106,8 +1085,6 @@ struct tu_pipeline { struct tu_cs cs; - struct tu_dynamic_state dynamic_state; - struct tu_pipeline_layout *layout; bool need_indirect_descriptor_sets; @@ -1116,6 +1093,15 @@ struct tu_pipeline struct tu_streamout_state streamout; + /* mask of enabled dynamic states + * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used + */ + uint32_t dynamic_state_mask; + struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; + + /* gras_su_cntl without line width, used for dynamic line width state */ + uint32_t gras_su_cntl; + struct { struct tu_bo binary_bo; @@ -1147,12 +1133,6 @@ struct tu_pipeline struct { struct tu_cs_entry state_ib; - } vp; - - struct - { - uint32_t gras_su_cntl; - struct tu_cs_entry state_ib; } rast; struct @@ -1180,31 +1160,12 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor); void tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc); -void -tu6_emit_gras_su_cntl(struct tu_cs *cs, - uint32_t gras_su_cntl, - float line_width); - void tu6_emit_depth_bias(struct tu_cs *cs, float constant_factor, float clamp, float slope_factor); -void -tu6_emit_stencil_compare_mask(struct tu_cs *cs, - uint32_t front, - uint32_t back); - -void -tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back); - -void -tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back); - -void -tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]); - void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples); void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2); -- 2.30.2