From 6c4c04690f30af655ad675b590836d3a84440c3a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= Date: Fri, 5 Feb 2016 22:36:53 -0800 Subject: [PATCH] anv: Deduplicate dispatch calls This can all be shared between gen8+ and pre-gen8. --- src/vulkan/anv_private.h | 5 ++ src/vulkan/gen7_cmd_buffer.c | 100 +---------------------------------- src/vulkan/gen8_cmd_buffer.c | 92 +------------------------------- src/vulkan/genX_cmd_buffer.c | 80 ++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 188 deletions(-) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 022aae12007..d26c0b2b88d 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1279,6 +1279,11 @@ void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); + struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage); diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c index 8c563c9b5f1..e843b942b1d 100644 --- a/src/vulkan/gen7_cmd_buffer.c +++ b/src/vulkan/gen7_cmd_buffer.c @@ -326,25 +326,8 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_MEM, - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GEN7_MI_LOAD_REGISTER_IMM, - .RegisterOffset = reg, - .DataDWord = imm); -} - -static void -cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; @@ -568,85 +551,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } -void genX(CmdDispatch)( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - if (prog_data->uses_num_work_groups) { - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); - uint32_t *sizes = state.map; - sizes[0] = x; - sizes[1] = y; - sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - cmd_buffer->state.num_workgroups_offset = state.offset; - cmd_buffer->state.num_workgroups_bo = - &cmd_buffer->device->dynamic_state_block_pool.bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void genX(CmdDispatchIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - if (prog_data->uses_num_work_groups) { - cmd_buffer->state.num_workgroups_offset = bo_offset; - cmd_buffer->state.num_workgroups_bo = bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GEN7_GPGPU_WALKER, - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH); -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c index b997a2ecf05..3ba5bbcf4a5 100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@ -146,15 +146,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } #endif -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - static void emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) { @@ -577,8 +568,8 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; VkResult result; @@ -624,85 +615,6 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.compute_dirty = 0; } -void genX(CmdDispatch)( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - if (prog_data->uses_num_work_groups) { - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); - uint32_t *sizes = state.map; - sizes[0] = x; - sizes[1] = y; - sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - cmd_buffer->state.num_workgroups_offset = state.offset; - cmd_buffer->state.num_workgroups_bo = - &cmd_buffer->device->dynamic_state_block_pool.bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void genX(CmdDispatchIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - if (prog_data->uses_num_work_groups) { - cmd_buffer->state.num_workgroups_offset = bo_offset; - cmd_buffer->state.num_workgroups_bo = bo; - } - - cmd_buffer_flush_compute_state(cmd_buffer); - - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c index e3fa043c66b..923f2086717 100644 --- a/src/vulkan/genX_cmd_buffer.c +++ b/src/vulkan/genX_cmd_buffer.c @@ -455,3 +455,83 @@ void genX(CmdDrawIndexedIndirect)( .VertexAccessType = RANDOM, .PrimitiveTopologyType = pipeline->topology); } + + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} -- 2.30.2