From: Jason Ekstrand Date: Mon, 17 Oct 2016 17:03:16 +0000 (-0700) Subject: anv/cmd_buffer: Unify flush_compute_state across gens X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1f3e6468d2a3efd9f2da99fa337dfe4b804bcda6;p=mesa.git anv/cmd_buffer: Unify flush_compute_state across gens With one small genxml change, the two versions were basically identical. The only differences were one #define for HSW+ and a field that is missing on Haswell but exists everywhere else. Signed-off-by: Jason Ekstrand Reviewed-by: Anuj Phogat --- diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index ee62614d5af..1455aa18fef 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -214,7 +214,7 @@ - + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 9c81c5aa90c..bf6ce800e8a 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -203,7 +203,7 @@ - + diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 225533cb26a..c1b7724599f 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -121,94 +121,6 @@ void genX(CmdBindIndexBuffer)( cmd_buffer->state.gen7.index_offset = offset; } -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = push_state.alloc_size; - curbe.CURBEDataStartAddress = push_state.offset; - } - } - - const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared); - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(INTERFACE_DESCRIPTOR_DATA), 64, - .KernelStartPointer = pipeline->cs_simd, - .BindingTablePointer = surfaces.offset, - .SamplerStatePointer = samplers.offset, - .ConstantURBEntryReadLength = - cs_prog_data->push.per_thread.regs, -#if GEN_IS_HASWELL - .CrossThreadConstantDataReadLength = - cs_prog_data->push.cross_thread.regs, -#else - .ConstantURBEntryReadOffset = 0, -#endif - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - cs_prog_data->threads); - - const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, - GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), idl) { - idl.InterfaceDescriptorTotalLength = size; - idl.InterfaceDescriptorDataStartAddress = state.offset; - } - - return VK_SUCCESS; -} - -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - MAYBE_UNUSED VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); - - genX(flush_pipeline_select_gpgpu)(cmd_buffer); - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - /* FIXME: figure out descriptors for gen7 */ - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; - - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); -} - void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 44ffcbff12a..0548a5ee730 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -312,93 +312,6 @@ void genX(CmdBindIndexBuffer)( cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; } -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = push_state.alloc_size; - curbe.CURBEDataStartAddress = push_state.offset; - } - } - - const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared); - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(INTERFACE_DESCRIPTOR_DATA), 64, - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .ConstantIndirectURBEntryReadLength = - cs_prog_data->push.per_thread.regs, - .ConstantURBEntryReadOffset = 0, - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - cs_prog_data->threads, - .CrossThreadConstantDataReadLength = - cs_prog_data->push.cross_thread.regs); - - uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, - GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) { - mid.InterfaceDescriptorTotalLength = size; - mid.InterfaceDescriptorDataStartAddress = state.offset; - } - - return VK_SUCCESS; -} - -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - MAYBE_UNUSED VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); - - genX(flush_pipeline_select_gpgpu)(cmd_buffer); - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; - - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); -} - /** * Emit the HZ_OP packet in the sequence specified by the BDW PRM section diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index d61b9719e62..a8ecbd24d58 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1022,6 +1022,97 @@ void genX(CmdDrawIndexedIndirect)( } } +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { + curbe.CURBETotalDataLength = push_state.alloc_size; + curbe.CURBEDataStartAddress = push_state.offset; + } + } + + const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared); + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, +#if !GEN_IS_HASWELL + .ConstantURBEntryReadOffset = 0, +#endif + .ConstantURBEntryReadLength = + cs_prog_data->push.per_thread.regs, +#if GEN_GEN >= 8 || GEN_IS_HASWELL + .CrossThreadConstantDataReadLength = + cs_prog_data->push.cross_thread.regs, +#endif + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + cs_prog_data->threads); + + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, + GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), mid) { + mid.InterfaceDescriptorTotalLength = size; + mid.InterfaceDescriptorDataStartAddress = state.offset; + } + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + MAYBE_UNUSED VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); + + genX(flush_pipeline_select_gpgpu)(cmd_buffer); + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + /* FIXME: figure out descriptors for gen7 */ + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; + + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); +} + #if GEN_GEN == 7 static bool