anv: Add pipe_state_for_stage() helper
[mesa.git] / src / intel / vulkan / genX_cmd_buffer.c
index 0b6a78fcda818d125f15e57a85d952d968c4a385..51b14f2cb9d52ac165fd9ab4aaf41e5be8597b11 100644 (file)
@@ -132,13 +132,21 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
        * these fields.  However, since we will be growing the BO's live, we
        * just set them all to the maximum.
        */
-      sba.GeneralStateBufferSize                = 0xfffff;
+      sba.GeneralStateBufferSize       = 0xfffff;
+      sba.IndirectObjectBufferSize     = 0xfffff;
+      if (device->physical->use_softpin) {
+         /* With softpin, we use fixed addresses so we actually know how big
+          * our base addresses are.
+          */
+         sba.DynamicStateBufferSize    = DYNAMIC_STATE_POOL_SIZE / 4096;
+         sba.InstructionBufferSize     = INSTRUCTION_STATE_POOL_SIZE / 4096;
+      } else {
+         sba.DynamicStateBufferSize    = 0xfffff;
+         sba.InstructionBufferSize     = 0xfffff;
+      }
       sba.GeneralStateBufferSizeModifyEnable    = true;
-      sba.DynamicStateBufferSize                = 0xfffff;
-      sba.DynamicStateBufferSizeModifyEnable    = true;
-      sba.IndirectObjectBufferSize              = 0xfffff;
       sba.IndirectObjectBufferSizeModifyEnable  = true;
-      sba.InstructionBufferSize                 = 0xfffff;
+      sba.DynamicStateBufferSizeModifyEnable    = true;
       sba.InstructionBuffersizeModifyEnable     = true;
 #  else
       /* On gen7, we have upper bounds instead.  According to the docs,
@@ -1001,7 +1009,6 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
                       uint32_t base_layer, uint32_t layer_count)
 {
    uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
-   assert(isl_aux_usage_has_ccs(image->planes[plane].aux_usage));
 
    uint64_t base_address =
       anv_address_physical(image->planes[plane].address);
@@ -1017,6 +1024,9 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
    cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
+   struct gen_mi_builder b;
+   gen_mi_builder_init(&b, &cmd_buffer->batch);
+
    for (uint32_t a = 0; a < layer_count; a++) {
       const uint32_t layer = base_layer + a;
 
@@ -1061,24 +1071,25 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
            offset < end_offset_B; offset += 64 * 1024) {
          uint64_t address = base_address + offset;
 
-         uint64_t aux_entry_address, *aux_entry_map;
+         uint64_t aux_entry_addr64, *aux_entry_map;
          aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
-                                               address, &aux_entry_address);
+                                               address, &aux_entry_addr64);
+
+         assert(cmd_buffer->device->physical->use_softpin);
+         struct anv_address aux_entry_address = {
+            .bo = NULL,
+            .offset = aux_entry_addr64,
+         };
 
          const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
          uint64_t new_aux_entry =
-            (old_aux_entry & ~GEN_AUX_MAP_FORMAT_BITS_MASK) | format_bits;
+            (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits;
 
-         /* We're only going to update the top 32 bits */
-         assert((uint32_t)old_aux_entry == (uint32_t)new_aux_entry);
+         if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
+            new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;
 
-         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-            sdi.Address = (struct anv_address) {
-               .bo = NULL,
-               .offset = aux_entry_address + 4,
-            };
-            sdi.ImmediateData = new_aux_entry >> 32;
-         }
+         gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
+                          gen_mi_imm(new_aux_entry));
       }
    }
 
@@ -1157,8 +1168,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
    if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
        initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
 #if GEN_GEN == 12
-      if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage) &&
-          device->physical->has_implicit_ccs && devinfo->has_aux_map) {
+      if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
          anv_image_init_aux_tt(cmd_buffer, image, aspect,
                                base_level, level_count,
                                base_layer, layer_count);
@@ -1881,7 +1891,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
 
    uint32_t l3cr;
    anv_pack_struct(&l3cr, L3_ALLOCATION_REG,
-#if GEN_GEN < 12
+#if GEN_GEN < 11
                    .SLMEnable = has_slm,
 #endif
 #if GEN_GEN == 11
@@ -1983,6 +1993,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
 void
 genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
 {
+   UNUSED const struct gen_device_info *devinfo = &cmd_buffer->device->info;
    enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
 
    if (cmd_buffer->device->physical->always_flush_cache)
@@ -2040,6 +2051,24 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
              sizeof(cmd_buffer->state.gfx.ib_dirty_range));
    }
 
+   /* Project: SKL / Argument: LRI Post Sync Operation [23]
+    *
+    * "PIPECONTROL command with “Command Streamer Stall Enable” must be
+    *  programmed prior to programming a PIPECONTROL command with "LRI
+    *  Post Sync Operation" in GPGPU mode of operation (i.e when
+    *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
+    *
+    * The same text exists a few rows below for Post Sync Op.
+    *
+    * On Gen12 this is GEN:BUG:1607156449.
+    */
+   if (bits & ANV_PIPE_POST_SYNC_BIT) {
+      if ((GEN_GEN == 9 || (GEN_GEN == 12 && devinfo->revision == 0 /* A0 */)) &&
+          cmd_buffer->state.current_pipeline == GPGPU)
+         bits |= ANV_PIPE_CS_STALL_BIT;
+      bits &= ~ANV_PIPE_POST_SYNC_BIT;
+   }
+
    if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
 #if GEN_GEN >= 12
@@ -2322,25 +2351,37 @@ anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
+static struct anv_cmd_pipeline_state *
+pipe_state_for_stage(struct anv_cmd_buffer *cmd_buffer,
+                     gl_shader_stage stage)
+{
+   switch (stage) {
+   case MESA_SHADER_COMPUTE:
+      return &cmd_buffer->state.compute.base;
+
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_TESS_EVAL:
+   case MESA_SHADER_GEOMETRY:
+   case MESA_SHADER_FRAGMENT:
+      return &cmd_buffer->state.gfx.base;
+
+   default:
+      unreachable("invalid stage");
+   }
+}
+
 static VkResult
 emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                    gl_shader_stage stage,
                    struct anv_state *bt_state)
 {
    struct anv_subpass *subpass = cmd_buffer->state.subpass;
-   struct anv_cmd_pipeline_state *pipe_state;
-   struct anv_pipeline *pipeline;
    uint32_t state_offset;
 
-   switch (stage) {
-   case  MESA_SHADER_COMPUTE:
-      pipe_state = &cmd_buffer->state.compute.base;
-      break;
-   default:
-      pipe_state = &cmd_buffer->state.gfx.base;
-      break;
-   }
-   pipeline = pipe_state->pipeline;
+   struct anv_cmd_pipeline_state *pipe_state =
+      pipe_state_for_stage(cmd_buffer, stage);
+   struct anv_pipeline *pipeline = pipe_state->pipeline;
 
    if (!anv_pipeline_has_stage(pipeline, stage)) {
       *bt_state = (struct anv_state) { 0, };
@@ -2589,8 +2630,7 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer,
               struct anv_state *state)
 {
    struct anv_cmd_pipeline_state *pipe_state =
-      stage == MESA_SHADER_COMPUTE ? &cmd_buffer->state.compute.base :
-                                     &cmd_buffer->state.gfx.base;
+      pipe_state_for_stage(cmd_buffer, stage);
    struct anv_pipeline *pipeline = pipe_state->pipeline;
 
    if (!anv_pipeline_has_stage(pipeline, stage)) {
@@ -2808,6 +2848,10 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
          const struct anv_pipeline_bind_map *bind_map =
             &pipeline->shaders[stage]->bind_map;
 
+#if GEN_GEN >= 12
+         c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
+#endif
+
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
          /* The Skylake PRM contains the following restriction:
           *
@@ -2868,6 +2912,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
    if (count == 0) {
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
          c.ShaderUpdateEnable = shader_mask;
+         c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
       }
       return;
    }
@@ -2898,7 +2943,8 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
    dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
                         GENX(3DSTATE_CONSTANT_ALL),
                         .ShaderUpdateEnable = shader_mask,
-                        .PointerBufferMask = buffers);
+                        .PointerBufferMask = buffers,
+                        .MOCS = cmd_buffer->device->isl_dev.mocs.internal);
 
    for (int i = 0; i < count; i++) {
       const struct anv_push_range *range = &bind_map->push_ranges[i];
@@ -4609,6 +4655,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
    isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
 
    if (GEN_GEN >= 12) {
+      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+      genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
       /* GEN:BUG:1408224581
        *
        * Workaround: Gen12LP Astep only An additional pipe control with
@@ -5560,6 +5609,9 @@ void genX(CmdSetEvent)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_event, event, _event);
 
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
       if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
          pc.StallAtPixelScoreboard = true;
@@ -5584,6 +5636,9 @@ void genX(CmdResetEvent)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_event, event, _event);
 
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
       if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
          pc.StallAtPixelScoreboard = true;