anv/allocator: Move the alignment assert for the pointer free list
[mesa.git] / src / intel / vulkan / gen7_cmd_buffer.c
index 9681f22dc3de6078f52aeeb7c3a140b7f1968b67..8dce586eec7f259f9c932564d8a985a713e3dfbd 100644 (file)
@@ -52,20 +52,20 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
 
       struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
 
-      if (state.offset == 0)
-         continue;
-
-      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
-                     ._3DCommandSubOpcode = push_constant_opcodes[stage],
-                     .ConstantBody = {
-                        .PointerToConstantBuffer0 = { .offset = state.offset },
-                        .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
-                     });
-
-      flushed |= mesa_to_vk_shader_stage(stage);
+      if (state.offset == 0) {
+         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
+                        ._3DCommandSubOpcode = push_constant_opcodes[stage]);
+      } else {
+         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
+                        ._3DCommandSubOpcode = push_constant_opcodes[stage],
+                        .ConstantBody = {
+                           .PointerToConstantBuffer0 = { .offset = state.offset },
+                           .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
+                        });
+      }
    }
 
-   cmd_buffer->state.push_constants_dirty &= ~flushed;
+   cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
 
    return flushed;
 }
@@ -283,7 +283,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
 
    struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
 
-   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
    unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
@@ -332,14 +332,77 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
    return VK_SUCCESS;
 }
 
+static void
+emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
+{
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
+                  .RegisterOffset = reg,
+                  .DataDWord = imm);
+}
+
+#define GEN7_L3SQCREG1                     0xb010
+#define GEN7_L3CNTLREG2                    0xb020
+#define GEN7_L3CNTLREG3                    0xb024
+
+static void
+config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
+{
+   /* References for GL state:
+    *
+    * - commits e307cfa..228d5a3
+    * - src/mesa/drivers/dri/i965/gen7_l3_state.c
+    */
+
+   uint32_t l3c2_val = enable_slm ?
+      /* All = 0 ways; URB = 16 ways; DC and RO = 16; SLM = 1 */
+      /*0x02040021*/0x010000a1 :
+      /* All = 0 ways; URB = 32 ways; DC = 0; RO = 32; SLM = 0 */
+      /*0x04080040*/0x02000030;
+   bool changed = cmd_buffer->state.current_l3_config != l3c2_val;
+
+   if (changed) {
+      /* According to the hardware docs, the L3 partitioning can only be changed
+       * while the pipeline is completely drained and the caches are flushed,
+       * which involves a first PIPE_CONTROL flush which stalls the pipeline and
+       * initiates invalidation of the relevant caches...
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .TextureCacheInvalidationEnable = true,
+                     .ConstantCacheInvalidationEnable = true,
+                     .InstructionCacheInvalidateEnable = true,
+                     .DCFlushEnable = true,
+                     .PostSyncOperation = NoWrite,
+                     .CommandStreamerStallEnable = true);
+
+      /* ...followed by a second stalling flush which guarantees that
+       * invalidation is complete when the L3 configuration registers are
+       * modified.
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .DCFlushEnable = true,
+                     .PostSyncOperation = NoWrite,
+                     .CommandStreamerStallEnable = true);
+
+      anv_finishme("write GEN7_L3SQCREG1");
+      emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG2, l3c2_val);
+      emit_lri(&cmd_buffer->batch, GEN7_L3CNTLREG3,
+               enable_slm ? 0x00040810 : 0x00040410);
+      cmd_buffer->state.current_l3_config = l3c2_val;
+   }
+}
+
 void
 genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    VkResult result;
 
    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
 
+   bool needs_slm = cs_prog_data->base.total_shared > 0;
+   config_l3(cmd_buffer, needs_slm);
+
    if (cmd_buffer->state.current_pipeline != GPGPU) {
       anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
                      .PipelineSelection = GPGPU);
@@ -408,6 +471,17 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
          gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
 
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+      /* From the BDW PRM for 3DSTATE_PUSH_CONSTANT_ALLOC_VS:
+       *
+       *    "The 3DSTATE_CONSTANT_VS must be reprogrammed prior to
+       *    the next 3DPRIMITIVE command after programming the
+       *    3DSTATE_PUSH_CONSTANT_ALLOC_VS"
+       *
+       * Since 3DSTATE_PUSH_CONSTANT_ALLOC_VS is programmed as part of
+       * pipeline setup, we need to dirty push constants.
+       */
+      cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
    }
 
    if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT ||
@@ -452,9 +526,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
                                   ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
                                   ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) {
 
-      bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f ||
-         cmd_buffer->state.dynamic.depth_bias.slope != 0.0f;
-
       const struct anv_image_view *iview =
          anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
       const struct anv_image *image = iview ? iview->image : NULL;
@@ -470,9 +541,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
          GENX(3DSTATE_SF_header),
          .DepthBufferSurfaceFormat = depth_format,
          .LineWidth = cmd_buffer->state.dynamic.line_width,
-         .GlobalDepthOffsetEnableSolid = enable_bias,
-         .GlobalDepthOffsetEnableWireframe = enable_bias,
-         .GlobalDepthOffsetEnablePoint = enable_bias,
          .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
          .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
          .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
@@ -484,6 +552,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
 
    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
+      struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
       struct anv_state cc_state =
          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
                                             GENX(COLOR_CALC_STATE_length) * 4,
@@ -493,10 +562,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
-         .StencilReferenceValue =
-            cmd_buffer->state.dynamic.stencil_reference.front,
-         .BackFaceStencilReferenceValue =
-            cmd_buffer->state.dynamic.stencil_reference.back,
+         .StencilReferenceValue = d->stencil_reference.front & 0xff,
+         .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
       };
       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
       if (!cmd_buffer->device->info.has_llc)
@@ -512,22 +579,14 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
       uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
-
-      const struct anv_image_view *iview =
-         anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+      struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
 
       struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
-         .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT),
-
-         .StencilTestMask =
-            cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff,
-         .StencilWriteMask =
-            cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff,
+         .StencilTestMask = d->stencil_compare_mask.front & 0xff,
+         .StencilWriteMask = d->stencil_write_mask.front & 0xff,
 
-         .BackfaceStencilTestMask =
-            cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff,
-         .BackfaceStencilWriteMask =
-            cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff,
+         .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
+         .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
       };
       GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);