From: Jonathan Marek Date: Mon, 24 Feb 2020 14:02:41 +0000 (-0500) Subject: turnip: automatically reserve cmdstream space in emit_pkt4/emit_pkt7 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cf94124e1c28c0c5953454ad1dbe71b9de8adc55;p=mesa.git turnip: automatically reserve cmdstream space in emit_pkt4/emit_pkt7 Signed-off-by: Jonathan Marek Reviewed-by: Eric Anholt Part-of: --- diff --git a/src/freedreno/vulkan/tu_blit.c b/src/freedreno/vulkan/tu_blit.c index 73b04b4b0a0..a81a2f0c119 100644 --- a/src/freedreno/vulkan/tu_blit.c +++ b/src/freedreno/vulkan/tu_blit.c @@ -84,8 +84,6 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs, { struct tu_physical_device *phys_dev = cmdbuf->device->physical_device; - tu_cs_reserve_space(cs, 66); - enum a6xx_format fmt = tu6_get_native_format(blt->dst.fmt)->rb; if (fmt == FMT6_Z24_UNORM_S8_UINT) fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; @@ -280,8 +278,6 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs, assert(blt->dst.samples == 1); } - tu_cs_reserve_space(cs, 18); - tu6_emit_event_write(cmdbuf, cs, LRZ_FLUSH, false); tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true); tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true); @@ -365,8 +361,6 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs, blt->src.ubwc_va += blt->src.ubwc_size; } - tu_cs_reserve_space(cs, 17); - tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true); tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true); tu6_emit_event_write(cmdbuf, cs, CACHE_FLUSH_TS, true); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 9403f37b12d..275e14be620 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -639,6 +639,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, * that means the packets we're emitting also happen during binning. So * we need to guard the write on !BINNING at CP execution time. */ + tu_cs_reserve(cs, 3 + 4); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM | CP_COND_REG_EXEC_0_SYSMEM); @@ -811,6 +812,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, A6XX_CP_REG_TEST_0_BIT(0) | A6XX_CP_REG_TEST_0_WAIT_FOR_ME); + tu_cs_reserve(cs, 3 + 11); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(11)); @@ -1061,12 +1063,6 @@ tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index) static void tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { - VkResult result = tu_cs_reserve_space(cs, 256); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu6_emit_cache_flush(cmd, cs); tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff); @@ -1334,6 +1330,7 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs) A6XX_CP_REG_TEST_0_BIT(0) | A6XX_CP_REG_TEST_0_WAIT_FOR_ME); + tu_cs_reserve(cs, 3 + 7); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(7)); @@ -1503,7 +1500,6 @@ tu_cmd_prepare_sysmem_clear_ib(struct tu_cmd_buffer *cmd, /* TODO: We shouldn't need this flush, but without it we'd have an empty IB * when nothing clears which we currently can't handle. */ - tu_cs_reserve_space(&sub_cs, 5); tu6_emit_event_write(cmd, &sub_cs, PC_CCU_FLUSH_COLOR_TS, true); cmd->state.sysmem_clear_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &sub_cs); @@ -1513,12 +1509,6 @@ static void tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct VkRect2D *renderArea) { - VkResult result = tu_cs_reserve_space(cs, 1024); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - const struct tu_framebuffer *fb = cmd->state.framebuffer; if (fb->width > 0 && fb->height > 0) { tu6_emit_window_scissor(cmd, cs, @@ -1581,13 +1571,6 @@ tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } } - const uint32_t space = 14 + tu_cs_get_call_size(&cmd->draw_epilogue_cs); - VkResult result = tu_cs_reserve_space(cs, space); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu_cs_emit_call(cs, &cmd->draw_epilogue_cs); tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); @@ -1607,12 +1590,6 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { struct tu_physical_device *phys_dev = cmd->device->physical_device; - VkResult result = tu_cs_reserve_space(cs, 1024); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu6_emit_lrz_flush(cmd, cs); /* lrz clear? */ @@ -1667,13 +1644,6 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_tile *tile) { - const uint32_t render_tile_space = 256 + tu_cs_get_call_size(&cmd->draw_cs); - VkResult result = tu_cs_reserve_space(cs, render_tile_space); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu6_emit_tile_select(cmd, cs, tile); tu_cs_emit_ib(cs, &cmd->state.tile_load_ib); @@ -1686,9 +1656,10 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, A6XX_CP_REG_TEST_0_BIT(0) | A6XX_CP_REG_TEST_0_WAIT_FOR_ME); + tu_cs_reserve(cs, 3 + 2); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); - tu_cs_emit(cs, 0x10000000); - tu_cs_emit(cs, 2); /* conditionally execute next 2 dwords */ + tu_cs_emit(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); + tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(2)); /* if (no overflow) */ { tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); @@ -1704,13 +1675,6 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, static void tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { - const uint32_t space = 16 + tu_cs_get_call_size(&cmd->draw_epilogue_cs); - VkResult result = tu_cs_reserve_space(cs, space); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu_cs_emit_call(cs, &cmd->draw_epilogue_cs); tu_cs_emit_regs(cs, @@ -1748,13 +1712,6 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd) tu6_sysmem_render_begin(cmd, &cmd->cs, &tiling->render_area); - const uint32_t space = tu_cs_get_call_size(&cmd->draw_cs); - VkResult result = tu_cs_reserve_space(&cmd->cs, space); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu_cs_emit_call(&cmd->cs, &cmd->draw_cs); cmd->wait_for_idle = true; @@ -2236,11 +2193,6 @@ tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, /* initialize/update the restart index */ if (!cmd->state.index_buffer || cmd->state.index_type != indexType) { struct tu_cs *draw_cs = &cmd->draw_cs; - VkResult result = tu_cs_reserve_space(draw_cs, 2); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } tu6_emit_restart_index( draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff); @@ -2386,12 +2338,6 @@ tu_CmdSetViewport(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *draw_cs = &cmd->draw_cs; - VkResult result = tu_cs_reserve_space(draw_cs, 12); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - assert(firstViewport == 0 && viewportCount == 1); tu6_emit_viewport(draw_cs, pViewports); @@ -2407,12 +2353,6 @@ tu_CmdSetScissor(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *draw_cs = &cmd->draw_cs; - VkResult result = tu_cs_reserve_space(draw_cs, 3); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - assert(firstScissor == 0 && scissorCount == 1); tu6_emit_scissor(draw_cs, pScissors); @@ -2439,12 +2379,6 @@ tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *draw_cs = &cmd->draw_cs; - VkResult result = tu_cs_reserve_space(draw_cs, 4); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp, depthBiasSlopeFactor); @@ -2458,12 +2392,6 @@ tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *draw_cs = &cmd->draw_cs; - VkResult result = tu_cs_reserve_space(draw_cs, 5); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu6_emit_blend_constants(draw_cs, blendConstants); tu_cs_sanity_check(draw_cs); @@ -2668,12 +2596,6 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, tu_cmd_prepare_tile_load_ib(cmd, pRenderPassBegin); tu_cmd_prepare_tile_store_ib(cmd); - VkResult result = tu_cs_reserve_space(&cmd->draw_cs, 1024); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_msaa(cmd, cmd->state.subpass, &cmd->draw_cs); @@ -2722,12 +2644,6 @@ tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) } } - VkResult result = tu_cs_reserve_space(&cmd->draw_cs, 1024); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - /* invalidate because reading input attachments will cache GMEM and * the cache isn''t updated when GMEM is written * TODO: is there a no-cache bit for textures? @@ -3524,14 +3440,11 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd, const struct tu_dynamic_state *dynamic = &cmd->state.dynamic; struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT]; uint32_t draw_state_group_count = 0; + VkResult result; struct tu_descriptor_state *descriptors_state = &cmd->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS]; - VkResult result = tu_cs_reserve_space(cs, 256); - if (result != VK_SUCCESS) - return result; - /* TODO lrz */ tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); @@ -3840,12 +3753,6 @@ tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw) return; } - result = tu_cs_reserve_space(cs, 32); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - if (draw->indirect) { tu_finishme("indirect draw"); return; @@ -4014,12 +3921,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, struct tu_pipeline *pipeline = cmd->state.compute_pipeline; struct tu_descriptor_state *descriptors_state = &cmd->descriptors[VK_PIPELINE_BIND_POINT_COMPUTE]; - - VkResult result = tu_cs_reserve_space(cs, 256); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } + VkResult result; if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_PIPELINE) tu_cs_emit_ib(cs, &pipeline->program.state_ib); @@ -4247,12 +4149,6 @@ write_event(struct tu_cmd_buffer *cmd, struct tu_event *event, unsigned value) { struct tu_cs *cs = &cmd->cs; - VkResult result = tu_cs_reserve_space(cs, 4); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - tu_bo_list_add(&cmd->bo_list, &event->bo, MSM_SUBMIT_BO_WRITE); /* TODO: any flush required before/after ? */ @@ -4300,12 +4196,6 @@ tu_CmdWaitEvents(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *cs = &cmd->cs; - VkResult result = tu_cs_reserve_space(cs, eventCount * 7); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - /* TODO: any flush required before/after? (CP_WAIT_FOR_ME?) */ for (uint32_t i = 0; i < eventCount; i++) { diff --git a/src/freedreno/vulkan/tu_cs.c b/src/freedreno/vulkan/tu_cs.c index 6cb96b0926d..072a52cbd3c 100644 --- a/src/freedreno/vulkan/tu_cs.c +++ b/src/freedreno/vulkan/tu_cs.c @@ -90,15 +90,6 @@ tu_cs_get_size(const struct tu_cs *cs) return cs->cur - cs->start; } -/** - * Get the size of the remaining space in the current BO. - */ -static uint32_t -tu_cs_get_space(const struct tu_cs *cs) -{ - return cs->end - cs->cur; -} - /** * Return true if there is no command packet emitted since the last call to * tu_cs_add_entry. diff --git a/src/freedreno/vulkan/tu_cs.h b/src/freedreno/vulkan/tu_cs.h index 45e774c581a..f5c331cdd30 100644 --- a/src/freedreno/vulkan/tu_cs.h +++ b/src/freedreno/vulkan/tu_cs.h @@ -134,12 +134,42 @@ tu_odd_parity_bit(unsigned val) return (~0x6996 >> val) & 1; } +/** + * Get the size of the remaining space in the current BO. + */ +static inline uint32_t +tu_cs_get_space(const struct tu_cs *cs) +{ + return cs->end - cs->cur; +} + +static inline void +tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size) +{ + if (cs->mode != TU_CS_MODE_GROW) { + assert(tu_cs_get_space(cs) >= reserved_size); + assert(cs->reserved_end == cs->end); + return; + } + + if (tu_cs_get_space(cs) >= reserved_size && + cs->entry_count < cs->entry_capacity) { + cs->reserved_end = cs->cur + reserved_size; + return; + } + + VkResult result = tu_cs_reserve_space(cs, reserved_size); + /* TODO: set this error in tu_cs and use it */ + assert(result == VK_SUCCESS); +} + /** * Emit a type-4 command packet header into a command stream. */ static inline void tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt) { + tu_cs_reserve(cs, cnt + 1); tu_cs_emit(cs, CP_TYPE4_PKT | cnt | (tu_odd_parity_bit(cnt) << 7) | ((regindx & 0x3ffff) << 8) | ((tu_odd_parity_bit(regindx) << 27))); @@ -151,6 +181,7 @@ tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt) static inline void tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) { + tu_cs_reserve(cs, cnt + 1); tu_cs_emit(cs, CP_TYPE7_PKT | cnt | (tu_odd_parity_bit(cnt) << 15) | ((opcode & 0x7f) << 16) | ((tu_odd_parity_bit(opcode) << 23))); @@ -297,12 +328,8 @@ tu_cond_exec_end(struct tu_cs *cs, struct tu_cond_exec_state *state) STATIC_ASSERT(count > 0); \ STATIC_ASSERT(count <= 16); \ \ + tu_cs_emit_pkt4(cs, regs[0].reg, count); \ uint32_t *p = cs->cur; \ - *p++ = CP_TYPE4_PKT | count | \ - (tu_odd_parity_bit(count) << 7) | \ - ((regs[0].reg & 0x3ffff) << 8) | \ - ((tu_odd_parity_bit(regs[0].reg) << 27)); \ - \ __ONE_REG( 0, regs); \ __ONE_REG( 1, regs); \ __ONE_REG( 2, regs); \ diff --git a/src/freedreno/vulkan/tu_query.c b/src/freedreno/vulkan/tu_query.c index 1dad575965d..94921dd2c57 100644 --- a/src/freedreno/vulkan/tu_query.c +++ b/src/freedreno/vulkan/tu_query.c @@ -290,7 +290,6 @@ copy_query_value_gpu(struct tu_cmd_buffer *cmdbuf, sizeof(uint64_t) : sizeof(uint32_t); uint64_t write_iova = base_write_iova + (offset * element_size); - tu_cs_reserve_space(cs, 6); tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5); uint32_t mem_to_mem_flags = flags & VK_QUERY_RESULT_64_BIT ? CP_MEM_TO_MEM_0_DOUBLE : 0; @@ -319,7 +318,6 @@ emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf, * To ensure that previous writes to the available bit are coherent, first * wait for all writes to complete. */ - tu_cs_reserve_space(cs, 1); tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); for (uint32_t i = 0; i < queryCount; i++) { @@ -330,7 +328,6 @@ emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf, /* Wait for the available bit to be set if executed with the * VK_QUERY_RESULT_WAIT_BIT flag. */ if (flags & VK_QUERY_RESULT_WAIT_BIT) { - tu_cs_reserve_space(cs, 7); tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6); tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | CP_WAIT_REG_MEM_0_POLL_MEMORY); @@ -356,7 +353,7 @@ emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf, * tests that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests * that 0 < available < 2, aka available == 1. */ - tu_cs_reserve_space(cs, 7 + 6); + tu_cs_reserve(cs, 7 + 6); tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6); tu_cs_emit_qw(cs, available_iova); tu_cs_emit_qw(cs, available_iova); @@ -419,7 +416,6 @@ emit_reset_occlusion_query_pool(struct tu_cmd_buffer *cmdbuf, uint32_t query = firstQuery + i; uint64_t available_iova = occlusion_query_iova(pool, query, available); uint64_t result_iova = occlusion_query_iova(pool, query, result); - tu_cs_reserve_space(cs, 11); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); tu_cs_emit_qw(cs, available_iova); tu_cs_emit_qw(cs, 0x0); @@ -475,7 +471,6 @@ emit_begin_occlusion_query(struct tu_cmd_buffer *cmdbuf, uint64_t begin_iova = occlusion_query_iova(pool, query, begin); - tu_cs_reserve_space(cs, 7); tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true)); @@ -540,7 +535,6 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf, uint64_t begin_iova = occlusion_query_iova(pool, query, begin); uint64_t end_iova = occlusion_query_iova(pool, query, end); uint64_t result_iova = occlusion_query_iova(pool, query, result); - tu_cs_reserve_space(cs, 31); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); tu_cs_emit_qw(cs, end_iova); tu_cs_emit_qw(cs, 0xffffffffffffffffull); @@ -583,7 +577,6 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf, */ cs = &cmdbuf->draw_epilogue_cs; - tu_cs_reserve_space(cs, 5); tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); tu_cs_emit_qw(cs, available_iova); tu_cs_emit_qw(cs, 0x1);