X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fgen7_cmd_buffer.c;h=8dce586eec7f259f9c932564d8a985a713e3dfbd;hb=3d4f2b0927acaac05e87ed07ae492e39b4c82ff7;hp=9681f22dc3de6078f52aeeb7c3a140b7f1968b67;hpb=ad50896c8769adcf141619774f8c156a2bcf920a;p=mesa.git diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 9681f22dc3d..8dce586eec7 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -52,20 +52,20 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); + if (state.offset == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage]); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + } } - cmd_buffer->state.push_constants_dirty &= ~flushed; + cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS; return flushed; } @@ -283,7 +283,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -332,14 +332,77 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) return VK_SUCCESS; } +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN7_L3SQCREG1 0xb010 +#define GEN7_L3CNTLREG2 0xb020 +#define GEN7_L3CNTLREG3 0xb024 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t l3c2_val = enable_slm ? + /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */ + /*0x02040021*/0x010000a1 : + /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */ + /*0x04080040*/0x02000030; + bool changed = cmd_buffer->state.current_l3_config != l3c2_val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + anv_finishme("write GEN7_L3SQCREG1"); + emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val); + emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3, + enable_slm ? 0x00040810 : 0x00040410); + cmd_buffer->state.current_l3_config = l3c2_val; + } +} + void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + bool needs_slm = cs_prog_data->base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + if (cmd_buffer->state.current_pipeline != GPGPU) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), .PipelineSelection = GPGPU); @@ -408,6 +471,17 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) gen7_cmd_buffer_emit_state_base_address(cmd_buffer); anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS: + * + * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to + * the next 3DPRIMITIVE command after programming the + * 3DSTATE_PUSH_CONSTANT_ALLOC_VS" + * + * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of + * pipeline setup, we need to dirty push constants. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; } if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || @@ -452,9 +526,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - const struct anv_image_view *iview = anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); const struct anv_image *image = iview ? iview->image : NULL; @@ -470,9 +541,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) GENX(3DSTATE_SF_header), .DepthBufferSurfaceFormat = depth_format, .LineWidth = cmd_buffer->state.dynamic.line_width, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp @@ -484,6 +552,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct anv_state cc_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, GENX(COLOR_CALC_STATE_length) * 4, @@ -493,10 +562,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, + .StencilReferenceValue = d->stencil_reference.front & 0xff, + .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); if (!cmd_buffer->device->info.has_llc) @@ -512,22 +579,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)]; - - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; struct GENX(DEPTH_STENCIL_STATE) depth_stencil = { - .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, }; GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);