struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
- if (state.offset == 0)
- continue;
-
- anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
- ._3DCommandSubOpcode = push_constant_opcodes[stage],
- .ConstantBody = {
- .PointerToConstantBuffer0 = { .offset = state.offset },
- .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
- });
-
- flushed |= mesa_to_vk_shader_stage(stage);
+ if (state.offset == 0) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
+ ._3DCommandSubOpcode = push_constant_opcodes[stage]);
+ } else {
+ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
+ ._3DCommandSubOpcode = push_constant_opcodes[stage],
+ .ConstantBody = {
+ .PointerToConstantBuffer0 = { .offset = state.offset },
+ .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
+ });
+ }
}
- cmd_buffer->state.push_constants_dirty &= ~flushed;
+ cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
return flushed;
}
struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
- const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+ const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
return VK_SUCCESS;
}
+static void
+emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
+{
+ anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
+ .RegisterOffset = reg,
+ .DataDWord = imm);
+}
+
+#define GEN7_L3SQCREG1 0xb010
+#define GEN7_L3CNTLREG2 0xb020
+#define GEN7_L3CNTLREG3 0xb024
+
+static void
+config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
+{
+ /* References for GL state:
+ *
+ * - commits e307cfa..228d5a3
+ * - src/mesa/drivers/dri/i965/gen7_l3_state.c
+ */
+
+ uint32_t l3c2_val = enable_slm ?
+ /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */
+ /*0x02040021*/0x010000a1 :
+ /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */
+ /*0x04080040*/0x02000030;
+ bool changed = cmd_buffer->state.current_l3_config != l3c2_val;
+
+ if (changed) {
+ /* According to the hardware docs, the L3 partitioning can only be changed
+ * while the pipeline is completely drained and the caches are flushed,
+ * which involves a first PIPE_CONTROL flush which stalls the pipeline and
+ * initiates invalidation of the relevant caches...
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+ .TextureCacheInvalidationEnable = true,
+ .ConstantCacheInvalidationEnable = true,
+ .InstructionCacheInvalidateEnable = true,
+ .DCFlushEnable = true,
+ .PostSyncOperation = NoWrite,
+ .CommandStreamerStallEnable = true);
+
+ /* ...followed by a second stalling flush which guarantees that
+ * invalidation is complete when the L3 configuration registers are
+ * modified.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+ .DCFlushEnable = true,
+ .PostSyncOperation = NoWrite,
+ .CommandStreamerStallEnable = true);
+
+ anv_finishme("write GEN7_L3SQCREG1");
+ emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val);
+ emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3,
+ enable_slm ? 0x00040810 : 0x00040410);
+ cmd_buffer->state.current_l3_config = l3c2_val;
+ }
+}
+
void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
VkResult result;
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
+ bool needs_slm = cs_prog_data->base.total_shared > 0;
+ config_l3(cmd_buffer, needs_slm);
+
if (cmd_buffer->state.current_pipeline != GPGPU) {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
.PipelineSelection = GPGPU);
gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+ /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:
+ *
+ * "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to
+ * the next 3DPRIMITIVE command after programming the
+ * 3DSTATE_PUSH_CONSTANT_ALLOC_VS"
+ *
+ * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of
+ * pipeline setup, we need to dirty push constants.
+ */
+ cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
}
if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT ||
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) {
- bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f ||
- cmd_buffer->state.dynamic.depth_bias.slope != 0.0f;
-
const struct anv_image_view *iview =
anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
const struct anv_image *image = iview ? iview->image : NULL;
- const uint32_t depth_format = image ?
+ const struct anv_format *anv_format =
+ iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
+ const bool has_depth = iview && anv_format->has_depth;
+ const uint32_t depth_format = has_depth ?
isl_surf_get_depth_format(&cmd_buffer->device->isl_dev,
&image->depth_surface.isl) : D16_UNORM;
GENX(3DSTATE_SF_header),
.DepthBufferSurfaceFormat = depth_format,
.LineWidth = cmd_buffer->state.dynamic.line_width,
- .GlobalDepthOffsetEnableSolid = enable_bias,
- .GlobalDepthOffsetEnableWireframe = enable_bias,
- .GlobalDepthOffsetEnablePoint = enable_bias,
.GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
.GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
.GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
+ struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
- .StencilReferenceValue =
- cmd_buffer->state.dynamic.stencil_reference.front,
- .BackFaceStencilReferenceValue =
- cmd_buffer->state.dynamic.stencil_reference.back,
+ .StencilReferenceValue = d->stencil_reference.front & 0xff,
+ .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
if (!cmd_buffer->device->info.has_llc)
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
-
- const struct anv_image_view *iview =
- anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+ struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
- .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT),
-
- .StencilTestMask =
- cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff,
- .StencilWriteMask =
- cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff,
+ .StencilTestMask = d->stencil_compare_mask.front & 0xff,
+ .StencilWriteMask = d->stencil_write_mask.front & 0xff,
- .BackfaceStencilTestMask =
- cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff,
- .BackfaceStencilWriteMask =
- cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff,
+ .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
+ .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
};
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);