-static VkResult
-flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
-{
- struct anv_device *device = cmd_buffer->device;
- struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct anv_state surfaces = { 0, }, samplers = { 0, };
- VkResult result;
-
- result = anv_cmd_buffer_emit_samplers(cmd_buffer,
- MESA_SHADER_COMPUTE, &samplers);
- if (result != VK_SUCCESS)
- return result;
- result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
- MESA_SHADER_COMPUTE, &surfaces);
- if (result != VK_SUCCESS)
- return result;
-
- struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
-
- const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
- const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
-
- unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
- unsigned push_constant_data_size =
- (prog_data->nr_params + local_id_dwords) * 4;
- unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
- unsigned push_constant_regs = reg_aligned_constant_size / 32;
-
- if (push_state.alloc_size) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
- .CURBETotalDataLength = push_state.alloc_size,
- .CURBEDataStartAddress = push_state.offset);
- }
-
- assert(prog_data->total_shared <= 64 * 1024);
- uint32_t slm_size = 0;
- if (prog_data->total_shared > 0) {
- /* slm_size is in 4k increments, but must be a power of 2. */
- slm_size = 4 * 1024;
- while (slm_size < prog_data->total_shared)
- slm_size <<= 1;
- slm_size /= 4 * 1024;
- }
-
- struct anv_state state =
- anv_state_pool_emit(&device->dynamic_state_pool,
- GENX(INTERFACE_DESCRIPTOR_DATA), 64,
- .KernelStartPointer = pipeline->cs_simd,
- .BindingTablePointer = surfaces.offset,
- .SamplerStatePointer = samplers.offset,
- .ConstantURBEntryReadLength =
- push_constant_regs,
-#if !GEN_IS_HASWELL
- .ConstantURBEntryReadOffset = 0,
-#endif
- .BarrierEnable = cs_prog_data->uses_barrier,
- .SharedLocalMemorySize = slm_size,
- .NumberofThreadsinGPGPUThreadGroup =
- pipeline->cs_thread_width_max);
-
- const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
- anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
- .InterfaceDescriptorTotalLength = size,
- .InterfaceDescriptorDataStartAddress = state.offset);
-
- return VK_SUCCESS;
-}
-
-static void
-emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
-{
- anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
- .RegisterOffset = reg,
- .DataDWord = imm);
-}
-
-#define GEN7_L3SQCREG1 0xb010
-#define GEN7_L3CNTLREG2 0xb020
-#define GEN7_L3CNTLREG3 0xb024
-
-static void
-config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
-{
- /* References for GL state:
- *
- * - commits e307cfa..228d5a3
- * - src/mesa/drivers/dri/i965/gen7_l3_state.c
- */
-
- uint32_t l3c2_val = enable_slm ?
- /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */
- /*0x02040021*/0x010000a1 :
- /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */
- /*0x04080040*/0x02000030;
- bool changed = cmd_buffer->state.current_l3_config != l3c2_val;
-
- if (changed) {
- /* According to the hardware docs, the L3 partitioning can only be changed
- * while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline and
- * initiates invalidation of the relevant caches...
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
- .TextureCacheInvalidationEnable = true,
- .ConstantCacheInvalidationEnable = true,
- .InstructionCacheInvalidateEnable = true,
- .DCFlushEnable = true,
- .PostSyncOperation = NoWrite,
- .CommandStreamerStallEnable = true);
-
- /* ...followed by a second stalling flush which guarantees that
- * invalidation is complete when the L3 configuration registers are
- * modified.
- */
- anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
- .DCFlushEnable = true,
- .PostSyncOperation = NoWrite,
- .CommandStreamerStallEnable = true);
-
- anv_finishme("write GEN7_L3SQCREG1");
- emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val);
- emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3,
- enable_slm ? 0x00040810 : 0x00040410);
- cmd_buffer->state.current_l3_config = l3c2_val;
- }
-}
-
-void
-genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)