anv: Add pipe_state_for_stage() helper

[mesa.git] / src / intel / vulkan / genX_cmd_buffer.c
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index 0b6a78fcda818d125f15e57a85d952d968c4a385..51b14f2cb9d52ac165fd9ab4aaf41e5be8597b11 100644 (file)
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -132,13 +132,21 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
         * these fields.  However, since we will be growing the BO's live, we
         * just set them all to the maximum.
         */
-      sba.GeneralStateBufferSize                = 0xfffff;
+      sba.GeneralStateBufferSize       = 0xfffff;
+      sba.IndirectObjectBufferSize     = 0xfffff;
+      if (device->physical->use_softpin) {
+         /* With softpin, we use fixed addresses so we actually know how big
+          * our base addresses are.
+          */
+         sba.DynamicStateBufferSize    = DYNAMIC_STATE_POOL_SIZE / 4096;
+         sba.InstructionBufferSize     = INSTRUCTION_STATE_POOL_SIZE / 4096;
+      } else {
+         sba.DynamicStateBufferSize    = 0xfffff;
+         sba.InstructionBufferSize     = 0xfffff;
+      }
        sba.GeneralStateBufferSizeModifyEnable    = true;
-      sba.DynamicStateBufferSize                = 0xfffff;
-      sba.DynamicStateBufferSizeModifyEnable    = true;
-      sba.IndirectObjectBufferSize              = 0xfffff;
        sba.IndirectObjectBufferSizeModifyEnable  = true;
-      sba.InstructionBufferSize                 = 0xfffff;
+      sba.DynamicStateBufferSizeModifyEnable    = true;
        sba.InstructionBuffersizeModifyEnable     = true;
  #  else
        /* On gen7, we have upper bounds instead.  According to the docs,
@@ -1001,7 +1009,6 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
                        uint32_t base_layer, uint32_t layer_count)
  {
     uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
-   assert(isl_aux_usage_has_ccs(image->planes[plane].aux_usage));
  
     uint64_t base_address =
        anv_address_physical(image->planes[plane].address);
@@ -1017,6 +1024,9 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
     cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
     genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
  
+   struct gen_mi_builder b;
+   gen_mi_builder_init(&b, &cmd_buffer->batch);
+
     for (uint32_t a = 0; a < layer_count; a++) {
        const uint32_t layer = base_layer + a;
  
@@ -1061,24 +1071,25 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
             offset < end_offset_B; offset += 64 * 1024) {
           uint64_t address = base_address + offset;
  
-         uint64_t aux_entry_address, *aux_entry_map;
+         uint64_t aux_entry_addr64, *aux_entry_map;
           aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
-                                               address, &aux_entry_address);
+                                               address, &aux_entry_addr64);
+
+         assert(cmd_buffer->device->physical->use_softpin);
+         struct anv_address aux_entry_address = {
+            .bo = NULL,
+            .offset = aux_entry_addr64,
+         };
  
           const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
           uint64_t new_aux_entry =
-            (old_aux_entry & ~GEN_AUX_MAP_FORMAT_BITS_MASK) | format_bits;
+            (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits;
  
-         /* We're only going to update the top 32 bits */
-         assert((uint32_t)old_aux_entry == (uint32_t)new_aux_entry);
+         if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
+            new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;
  
-         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-            sdi.Address = (struct anv_address) {
-               .bo = NULL,
-               .offset = aux_entry_address + 4,
-            };
-            sdi.ImmediateData = new_aux_entry >> 32;
-         }
+         gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
+                          gen_mi_imm(new_aux_entry));
        }
     }
  
@@ -1157,8 +1168,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
     if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
         initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
  #if GEN_GEN == 12
-      if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage) &&
-          device->physical->has_implicit_ccs && devinfo->has_aux_map) {
+      if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
           anv_image_init_aux_tt(cmd_buffer, image, aspect,
                                 base_level, level_count,
                                 base_layer, layer_count);
@@ -1881,7 +1891,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
  
     uint32_t l3cr;
     anv_pack_struct(&l3cr, L3_ALLOCATION_REG,
-#if GEN_GEN < 12
+#if GEN_GEN < 11
                     .SLMEnable = has_slm,
  #endif
  #if GEN_GEN == 11
@@ -1983,6 +1993,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
  void
  genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
  {
+   UNUSED const struct gen_device_info *devinfo = &cmd_buffer->device->info;
     enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
  
     if (cmd_buffer->device->physical->always_flush_cache)
@@ -2040,6 +2051,24 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
               sizeof(cmd_buffer->state.gfx.ib_dirty_range));
     }
  
+   /* Project: SKL / Argument: LRI Post Sync Operation [23]
+    *
+    * "PIPECONTROL command with “Command Streamer Stall Enable” must be
+    *  programmed prior to programming a PIPECONTROL command with "LRI
+    *  Post Sync Operation" in GPGPU mode of operation (i.e when
+    *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
+    *
+    * The same text exists a few rows below for Post Sync Op.
+    *
+    * On Gen12 this is GEN:BUG:1607156449.
+    */
+   if (bits & ANV_PIPE_POST_SYNC_BIT) {
+      if ((GEN_GEN == 9 || (GEN_GEN == 12 && devinfo->revision == 0 /* A0 */)) &&
+          cmd_buffer->state.current_pipeline == GPGPU)
+         bits |= ANV_PIPE_CS_STALL_BIT;
+      bits &= ~ANV_PIPE_POST_SYNC_BIT;
+   }
+
     if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
        anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
  #if GEN_GEN >= 12
@@ -2322,25 +2351,37 @@ anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer,
     }
  }
  
+static struct anv_cmd_pipeline_state *
+pipe_state_for_stage(struct anv_cmd_buffer *cmd_buffer,
+                     gl_shader_stage stage)
+{
+   switch (stage) {
+   case MESA_SHADER_COMPUTE:
+      return &cmd_buffer->state.compute.base;
+
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_TESS_EVAL:
+   case MESA_SHADER_GEOMETRY:
+   case MESA_SHADER_FRAGMENT:
+      return &cmd_buffer->state.gfx.base;
+
+   default:
+      unreachable("invalid stage");
+   }
+}
+
  static VkResult
  emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                     gl_shader_stage stage,
                     struct anv_state *bt_state)
  {
     struct anv_subpass *subpass = cmd_buffer->state.subpass;
-   struct anv_cmd_pipeline_state *pipe_state;
-   struct anv_pipeline *pipeline;
     uint32_t state_offset;
  
-   switch (stage) {
-   case  MESA_SHADER_COMPUTE:
-      pipe_state = &cmd_buffer->state.compute.base;
-      break;
-   default:
-      pipe_state = &cmd_buffer->state.gfx.base;
-      break;
-   }
-   pipeline = pipe_state->pipeline;
+   struct anv_cmd_pipeline_state *pipe_state =
+      pipe_state_for_stage(cmd_buffer, stage);
+   struct anv_pipeline *pipeline = pipe_state->pipeline;
  
     if (!anv_pipeline_has_stage(pipeline, stage)) {
        *bt_state = (struct anv_state) { 0, };
@@ -2589,8 +2630,7 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer,
                struct anv_state *state)
  {
     struct anv_cmd_pipeline_state *pipe_state =
-      stage == MESA_SHADER_COMPUTE ? &cmd_buffer->state.compute.base :
-                                     &cmd_buffer->state.gfx.base;
+      pipe_state_for_stage(cmd_buffer, stage);
     struct anv_pipeline *pipeline = pipe_state->pipeline;
  
     if (!anv_pipeline_has_stage(pipeline, stage)) {
@@ -2808,6 +2848,10 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
           const struct anv_pipeline_bind_map *bind_map =
              &pipeline->shaders[stage]->bind_map;
  
+#if GEN_GEN >= 12
+         c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
+#endif
+
  #if GEN_GEN >= 8 || GEN_IS_HASWELL
           /* The Skylake PRM contains the following restriction:
            *
@@ -2868,6 +2912,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
     if (count == 0) {
        anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
           c.ShaderUpdateEnable = shader_mask;
+         c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
        }
        return;
     }
@@ -2898,7 +2943,8 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
     dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
                          GENX(3DSTATE_CONSTANT_ALL),
                          .ShaderUpdateEnable = shader_mask,
-                        .PointerBufferMask = buffers);
+                        .PointerBufferMask = buffers,
+                        .MOCS = cmd_buffer->device->isl_dev.mocs.internal);
  
     for (int i = 0; i < count; i++) {
        const struct anv_push_range *range = &bind_map->push_ranges[i];
@@ -4609,6 +4655,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
     isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
  
     if (GEN_GEN >= 12) {
+      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+      genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
        /* GEN:BUG:1408224581
         *
         * Workaround: Gen12LP Astep only An additional pipe control with
@@ -5560,6 +5609,9 @@ void genX(CmdSetEvent)(
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
     ANV_FROM_HANDLE(anv_event, event, _event);
  
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
     anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
        if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
           pc.StallAtPixelScoreboard = true;
@@ -5584,6 +5636,9 @@ void genX(CmdResetEvent)(
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
     ANV_FROM_HANDLE(anv_event, event, _event);
  
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
     anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
        if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
           pc.StallAtPixelScoreboard = true;