X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fgen7_cmd_buffer.c;h=c5281e527effb7407c2bcaa05144b85588992ada;hb=d1c4e64a69e49c64148529024ecb700d18d3c1c8;hp=26339bbf0d968d2a4d7227ac9000ac6db694a7bc;hpb=1af5dacd76afe410374d442f4e0cd50820103fe8;p=mesa.git diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 26339bbf0d9..c5281e527ef 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -28,138 +28,13 @@ #include #include "anv_private.h" +#include "vk_format_info.h" #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - #if GEN_GEN == 7 && !GEN_IS_HASWELL -void -gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages) -{ - static const uint32_t sampler_state_opcodes[] = { - [MESA_SHADER_VERTEX] = 43, - [MESA_SHADER_TESS_CTRL] = 44, /* HS */ - [MESA_SHADER_TESS_EVAL] = 45, /* DS */ - [MESA_SHADER_GEOMETRY] = 46, - [MESA_SHADER_FRAGMENT] = 47, - [MESA_SHADER_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [MESA_SHADER_VERTEX] = 38, - [MESA_SHADER_TESS_CTRL] = 39, - [MESA_SHADER_TESS_EVAL] = 40, - [MESA_SHADER_GEOMETRY] = 41, - [MESA_SHADER_FRAGMENT] = 42, - [MESA_SHADER_COMPUTE] = 0, - }; - - anv_foreach_stage(s, stages) { - if (cmd_buffer->state.samplers[s].alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), - ._3DCommandSubOpcode = sampler_state_opcodes[s], - .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); - } - - /* Always emit binding table pointers if we're asked to, since on SKL - * this is what flushes push constants. */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), - ._3DCommandSubOpcode = binding_table_opcodes[s], - .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); - } -} - -uint32_t -gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) -{ - VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - anv_foreach_stage(s, dirty) { - result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, - &cmd_buffer->state.samplers[s]); - if (result != VK_SUCCESS) - break; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - dirty |= cmd_buffer->state.pipeline->active_stages; - anv_foreach_stage(s, dirty) { - result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, - &cmd_buffer->state.samplers[s]); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); - if (result != VK_SUCCESS) - return result; - } - } - - cmd_buffer->state.descriptors_dirty &= ~dirty; - - return dirty; -} -#endif /* GEN_GEN == 7 && !GEN_IS_HASWELL */ - -static inline int64_t +static int64_t clamp_int64(int64_t x, int64_t min, int64_t max) { if (x < min) @@ -170,11 +45,12 @@ clamp_int64(int64_t x, int64_t min, int64_t max) return max; } -#if GEN_GEN == 7 && !GEN_IS_HASWELL -static void -emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkRect2D *scissors) +void +gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) { + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + uint32_t count = cmd_buffer->state.gfx.dynamic.scissor.count; + const VkRect2D *scissors = cmd_buffer->state.gfx.dynamic.scissor.scissors; struct anv_state scissor_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); @@ -194,12 +70,36 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, }; const int max = 0xffff; + + uint32_t y_min = s->offset.y; + uint32_t x_min = s->offset.x; + uint32_t y_max = s->offset.y + s->extent.height - 1; + uint32_t x_max = s->offset.x + s->extent.width - 1; + + /* Do this math using int64_t so overflow gets clamped correctly. */ + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + y_min = clamp_int64((uint64_t) y_min, + cmd_buffer->state.render_area.offset.y, max); + x_min = clamp_int64((uint64_t) x_min, + cmd_buffer->state.render_area.offset.x, max); + y_max = clamp_int64((uint64_t) y_max, 0, + cmd_buffer->state.render_area.offset.y + + cmd_buffer->state.render_area.extent.height - 1); + x_max = clamp_int64((uint64_t) x_max, 0, + cmd_buffer->state.render_area.offset.x + + cmd_buffer->state.render_area.extent.width - 1); + } else if (fb) { + y_min = clamp_int64((uint64_t) y_min, 0, max); + x_min = clamp_int64((uint64_t) x_min, 0, max); + y_max = clamp_int64((uint64_t) y_max, 0, fb->height - 1); + x_max = clamp_int64((uint64_t) x_max, 0, fb->width - 1); + } + struct GEN7_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + .ScissorRectangleYMin = y_min, + .ScissorRectangleXMin = x_min, + .ScissorRectangleYMax = y_max, + .ScissorRectangleXMax = x_max }; if (s->extent.width <= 0 || s->extent.height <= 0) { @@ -210,42 +110,40 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, } } - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = scissor_state.offset); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(scissor_state); + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SCISSOR_STATE_POINTERS, ssp) { + ssp.ScissorRectPointer = scissor_state.offset; + } } +#endif -void -gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) +static uint32_t vk_to_gen_index_type(VkIndexType type) { - if (cmd_buffer->state.dynamic.scissor.count > 0) { - emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, - cmd_buffer->state.dynamic.scissor.scissors); - } else { - /* Emit a default scissor based on the currently bound framebuffer */ - emit_scissor_state(cmd_buffer, 1, - &(VkRect2D) { - .offset = { .x = 0, .y = 0, }, - .extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - }, - }); + switch (type) { + case VK_INDEX_TYPE_UINT8_EXT: + return INDEX_BYTE; + case VK_INDEX_TYPE_UINT16: + return INDEX_WORD; + case VK_INDEX_TYPE_UINT32: + return INDEX_DWORD; + default: + unreachable("invalid index type"); } } -#endif - -static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, -}; -static const uint32_t restart_index_for_type[] = { - [VK_INDEX_TYPE_UINT16] = UINT16_MAX, - [VK_INDEX_TYPE_UINT32] = UINT32_MAX, -}; +static uint32_t restart_index_for_type(VkIndexType type) +{ + switch (type) { + case VK_INDEX_TYPE_UINT8_EXT: + return UINT8_MAX; + case VK_INDEX_TYPE_UINT16: + return UINT16_MAX; + case VK_INDEX_TYPE_UINT32: + return UINT32_MAX; + default: + unreachable("invalid index type"); + } +} void genX(CmdBindIndexBuffer)( VkCommandBuffer commandBuffer, @@ -256,340 +154,114 @@ void genX(CmdBindIndexBuffer)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; if (GEN_IS_HASWELL) - cmd_buffer->state.restart_index = restart_index_for_type[indexType]; - cmd_buffer->state.gen7.index_buffer = buffer; - cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; - cmd_buffer->state.gen7.index_offset = offset; + cmd_buffer->state.restart_index = restart_index_for_type(indexType); + cmd_buffer->state.gfx.gen7.index_buffer = buffer; + cmd_buffer->state.gfx.gen7.index_type = vk_to_gen_index_type(indexType); + cmd_buffer->state.gfx.gen7.index_offset = offset; } -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * 4; - unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - unsigned push_constant_regs = reg_aligned_constant_size / 32; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), - .CURBETotalDataLength = push_state.alloc_size, - .CURBEDataStartAddress = push_state.offset); - } - - assert(prog_data->total_shared <= 64 * 1024); - uint32_t slm_size = 0; - if (prog_data->total_shared > 0) { - /* slm_size is in 4k increments, but must be a power of 2. */ - slm_size = 4 * 1024; - while (slm_size < prog_data->total_shared) - slm_size <<= 1; - slm_size /= 4 * 1024; - } - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(INTERFACE_DESCRIPTOR_DATA), 64, - .KernelStartPointer = pipeline->cs_simd, - .BindingTablePointer = surfaces.offset, - .SamplerStatePointer = samplers.offset, - .ConstantURBEntryReadLength = - push_constant_regs, -#if !GEN_IS_HASWELL - .ConstantURBEntryReadOffset = 0, -#endif - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - pipeline->cs_thread_width_max); - - const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +static uint32_t +get_depth_format(struct anv_cmd_buffer *cmd_buffer) { - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} + const struct anv_render_pass *pass = cmd_buffer->state.pass; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; -#define GEN7_L3SQCREG1 0xb010 -#define GEN7_L3CNTLREG2 0xb020 -#define GEN7_L3CNTLREG3 0xb024 + if (!subpass->depth_stencil_attachment) + return D16_UNORM; -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) -{ - /* References for GL state: - * - * - commits e307cfa..228d5a3 - * - src/mesa/drivers/dri/i965/gen7_l3_state.c - */ - - uint32_t l3c2_val = enable_slm ? - /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */ - /*0x02040021*/0x010000a1 : - /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */ - /*0x04080040*/0x02000030; - bool changed = cmd_buffer->state.current_l3_config != l3c2_val; - - if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, - .DCFlushEnable = true, - .PostSyncOperation = NoWrite, - .CommandStreamerStallEnable = true); - - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DCFlushEnable = true, - .PostSyncOperation = NoWrite, - .CommandStreamerStallEnable = true); - - anv_finishme("write GEN7_L3SQCREG1"); - emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val); - emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3, - enable_slm ? 0x00040810 : 0x00040410); - cmd_buffer->state.current_l3_config = l3c2_val; - } -} + struct anv_render_pass_attachment *att = + &pass->attachments[subpass->depth_stencil_attachment->attachment]; -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; + switch (att->format) { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D16_UNORM_S8_UINT: + return D16_UNORM; - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + case VK_FORMAT_X8_D24_UNORM_PACK32: + case VK_FORMAT_D24_UNORM_S8_UINT: + return D24_UNORM_X8_UINT; - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return D32_FLOAT; - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; + default: + return D16_UNORM; } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - /* FIXME: figure out descriptors for gen7 */ - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; } void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) { - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, - .InstanceDataStepRate = 1 - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || - cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, - * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, - * 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one - * PIPE_CONTROL needs to be sent before any combination of VS - * associated 3DSTATE." - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &cmd_buffer->device->workaround_bo, 0 }); - } - - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) { - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - } - - if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); - - /* We use the gen8 state here because it only contains the additional - * min/max fields and, since they occur at the end of the packet and - * don't change the stride, they work on gen7 too. - */ - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS | - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { - - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const struct anv_format *anv_format = - iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - const bool has_depth = iview && anv_format->has_depth; - const uint32_t depth_format = has_depth ? - isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, - &image->depth_surface.isl) : D16_UNORM; + struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic; + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { uint32_t sf_dw[GENX(3DSTATE_SF_length)]; struct GENX(3DSTATE_SF) sf = { GENX(3DSTATE_SF_header), - .DepthBufferSurfaceFormat = depth_format, - .LineWidth = cmd_buffer->state.dynamic.line_width, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, - .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + .DepthBufferSurfaceFormat = get_depth_format(cmd_buffer), + .LineWidth = d->line_width, + .GlobalDepthOffsetConstant = d->depth_bias.bias, + .GlobalDepthOffsetScale = d->depth_bias.slope, + .GlobalDepthOffsetClamp = d->depth_bias.clamp }; GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); } - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, 64); struct GENX(COLOR_CALC_STATE) cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, + .BlendConstantColorRed = d->blend_constants[0], + .BlendConstantColorGreen = d->blend_constants[1], + .BlendConstantColorBlue = d->blend_constants[2], + .BlendConstantColorAlpha = d->blend_constants[3], + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS), - .ColorCalcStatePointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { + ccp.ColorCalcStatePointer = cc_state.offset; + } } - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { + ls.LineStipplePattern = d->line_stipple.pattern; + ls.LineStippleInverseRepeatCount = + 1.0f / MAX2(1, d->line_stipple.factor); + ls.LineStippleRepeatCount = d->line_stipple.factor; + } + } - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { - .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + .StencilBufferWriteEnable = + (d->stencil_write_mask.front || d->stencil_write_mask.back) && + pipeline->writes_stencil, }; GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil); @@ -599,64 +271,45 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) GENX(DEPTH_STENCIL_STATE_length), 64); anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), - .PointertoDEPTH_STENCIL_STATE = ds_state.offset); + GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), dsp) { + dsp.PointertoDEPTH_STENCIL_STATE = ds_state.offset; + } } - if (cmd_buffer->state.gen7.index_buffer && - cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_INDEX_BUFFER)) { - struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; - uint32_t offset = cmd_buffer->state.gen7.index_offset; + if (cmd_buffer->state.gfx.gen7.index_buffer && + cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + struct anv_buffer *buffer = cmd_buffer->state.gfx.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gfx.gen7.index_offset; #if GEN_IS_HASWELL - anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index); + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, vf) { + vf.IndexedDrawCutIndexEnable = pipeline->primitive_restart; + vf.CutIndex = cmd_buffer->state.restart_index; + } #endif - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { #if !GEN_IS_HASWELL - .CutIndexEnable = pipeline->primitive_restart, + ib.CutIndexEnable = pipeline->primitive_restart; #endif - .IndexFormat = cmd_buffer->state.gen7.index_type, - .MemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type; + ib.MOCS = anv_mocs_for_bo(cmd_buffer->device, + buffer->address.bo); + + ib.BufferStartingAddress = anv_address_add(buffer->address, + offset); + ib.BufferEndingAddress = anv_address_add(buffer->address, + buffer->size); + } } - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); + cmd_buffer->state.gfx.dirty = 0; } -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) +void +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, + bool enable) { - stub(); + /* The NP PMA fix doesn't exist on gen7 */ }