anv: Move gen8+ push constant packet workaround.
[mesa.git] / src / intel / vulkan / genX_cmd_buffer.c
index 567e556f5a5e2a39acaa99b049df27828ca599cc..bceae33cfbcf4ef431ec2516e7cbcf5e94eede6e 100644 (file)
@@ -1800,6 +1800,9 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
 {
    enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
 
+   if (cmd_buffer->device->instance->physicalDevice.always_flush_cache)
+      bits |= ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS;
+
    /* Flushes are pipelined while invalidations are handled immediately.
     * Therefore, if we're flushing anything then we need to schedule a stall
     * before any invalidations can happen.
@@ -2122,8 +2125,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
       return VK_SUCCESS;
    }
 
-   struct anv_shader_bin *bin = pipeline->shaders[stage];
-   struct anv_pipeline_bind_map *map = &bin->bind_map;
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (map->surface_count == 0) {
       *bt_state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -2148,7 +2150,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
       struct anv_state surface_state;
 
-      if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) {
+      switch (binding->set) {
+      case ANV_DESCRIPTOR_SET_NULL:
+         bt_map[s] = 0;
+         break;
+
+      case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
          /* Color attachment binding */
          assert(stage == MESA_SHADER_FRAGMENT);
          if (binding->index < subpass->color_count) {
@@ -2171,8 +2178,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
          }
 
          bt_map[s] = surface_state.offset + state_offset;
-         continue;
-      } else if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) {
+         break;
+
+      case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: {
          struct anv_state surface_state =
             anv_cmd_buffer_alloc_surface_state(cmd_buffer);
 
@@ -2191,12 +2199,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
          bt_map[s] = surface_state.offset + state_offset;
          add_surface_reloc(cmd_buffer, surface_state, constant_data);
-         continue;
-      } else if (binding->set == ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS) {
+         break;
+      }
+
+      case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: {
          /* This is always the first binding for compute shaders */
          assert(stage == MESA_SHADER_COMPUTE && s == 0);
-         if (!get_cs_prog_data(pipeline)->uses_num_work_groups)
-            continue;
 
          struct anv_state surface_state =
             anv_cmd_buffer_alloc_surface_state(cmd_buffer);
@@ -2212,8 +2220,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             add_surface_reloc(cmd_buffer, surface_state,
                               cmd_buffer->state.compute.num_workgroups);
          }
-         continue;
-      } else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
+         break;
+      }
+
+      case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
          /* This is a descriptor set buffer so the set index is actually
           * given by binding->binding.  (Yes, that's confusing.)
           */
@@ -2224,134 +2234,128 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
          bt_map[s] = set->desc_surface_state.offset + state_offset;
          add_surface_reloc(cmd_buffer, set->desc_surface_state,
                            anv_descriptor_set_address(cmd_buffer, set));
-         continue;
+         break;
       }
 
-      const struct anv_descriptor *desc =
-         &pipe_state->descriptors[binding->set]->descriptors[binding->index];
+      default: {
+         assert(binding->set < MAX_SETS);
+         const struct anv_descriptor *desc =
+            &pipe_state->descriptors[binding->set]->descriptors[binding->index];
 
-      switch (desc->type) {
-      case VK_DESCRIPTOR_TYPE_SAMPLER:
-         /* Nothing for us to do here */
-         continue;
+         switch (desc->type) {
+         case VK_DESCRIPTOR_TYPE_SAMPLER:
+            /* Nothing for us to do here */
+            continue;
 
-      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
-         struct anv_surface_state sstate =
-            (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
-            desc->image_view->planes[binding->plane].general_sampler_surface_state :
-            desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
-         surface_state = sstate.state;
-         assert(surface_state.alloc_size);
-         if (need_client_mem_relocs)
-            add_surface_state_relocs(cmd_buffer, sstate);
-         break;
-      }
-      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-         assert(stage == MESA_SHADER_FRAGMENT);
-         if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
-            /* For depth and stencil input attachments, we treat it like any
-             * old texture that a user may have bound.
-             */
-            assert(desc->image_view->n_planes == 1);
+         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
             struct anv_surface_state sstate =
                (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
-               desc->image_view->planes[0].general_sampler_surface_state :
-               desc->image_view->planes[0].optimal_sampler_surface_state;
+               desc->image_view->planes[binding->plane].general_sampler_surface_state :
+               desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
             surface_state = sstate.state;
             assert(surface_state.alloc_size);
             if (need_client_mem_relocs)
                add_surface_state_relocs(cmd_buffer, sstate);
-         } else {
-            /* For color input attachments, we create the surface state at
-             * vkBeginRenderPass time so that we can include aux and clear
-             * color information.
-             */
-            assert(binding->input_attachment_index < subpass->input_count);
-            const unsigned subpass_att = binding->input_attachment_index;
-            const unsigned att = subpass->input_attachments[subpass_att].attachment;
-            surface_state = cmd_buffer->state.attachments[att].input.state;
+            break;
          }
-         break;
-
-      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
-         struct anv_surface_state sstate = (binding->write_only)
-            ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
-            : desc->image_view->planes[binding->plane].storage_surface_state;
-         surface_state = sstate.state;
-         assert(surface_state.alloc_size);
-         if (need_client_mem_relocs)
-            add_surface_state_relocs(cmd_buffer, sstate);
-         break;
-      }
+         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+            assert(stage == MESA_SHADER_FRAGMENT);
+            if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
+               /* For depth and stencil input attachments, we treat it like any
+                * old texture that a user may have bound.
+                */
+               assert(desc->image_view->n_planes == 1);
+               struct anv_surface_state sstate =
+                  (desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
+                  desc->image_view->planes[0].general_sampler_surface_state :
+                  desc->image_view->planes[0].optimal_sampler_surface_state;
+               surface_state = sstate.state;
+               assert(surface_state.alloc_size);
+               if (need_client_mem_relocs)
+                  add_surface_state_relocs(cmd_buffer, sstate);
+            } else {
+               /* For color input attachments, we create the surface state at
+                * vkBeginRenderPass time so that we can include aux and clear
+                * color information.
+                */
+               assert(binding->input_attachment_index < subpass->input_count);
+               const unsigned subpass_att = binding->input_attachment_index;
+               const unsigned att = subpass->input_attachments[subpass_att].attachment;
+               surface_state = cmd_buffer->state.attachments[att].input.state;
+            }
+            break;
 
-      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-         surface_state = desc->buffer_view->surface_state;
-         assert(surface_state.alloc_size);
-         if (need_client_mem_relocs) {
-            add_surface_reloc(cmd_buffer, surface_state,
-                              desc->buffer_view->address);
+         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
+            struct anv_surface_state sstate = (binding->write_only)
+               ? desc->image_view->planes[binding->plane].writeonly_storage_surface_state
+               : desc->image_view->planes[binding->plane].storage_surface_state;
+            surface_state = sstate.state;
+            assert(surface_state.alloc_size);
+            if (need_client_mem_relocs)
+               add_surface_state_relocs(cmd_buffer, sstate);
+            break;
          }
-         break;
-
-      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-         /* If the shader never does any UBO pulls (this is a fairly common
-          * case) then we don't need to fill out those binding table entries.
-          * The real cost savings here is that we don't have to build the
-          * surface state for them which is surprisingly expensive when it's
-          * on the hot-path.
-          */
-         if (!bin->prog_data->has_ubo_pull)
-            continue;
-         /* Fall through */
-
-      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
-         /* Compute the offset within the buffer */
-         struct anv_push_constants *push =
-            &cmd_buffer->state.push_constants[stage];
-
-         uint32_t dynamic_offset =
-            push->dynamic_offsets[binding->dynamic_offset_index];
-         uint64_t offset = desc->offset + dynamic_offset;
-         /* Clamp to the buffer size */
-         offset = MIN2(offset, desc->buffer->size);
-         /* Clamp the range to the buffer size */
-         uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
-
-         struct anv_address address =
-            anv_address_add(desc->buffer->address, offset);
 
-         surface_state =
-            anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
-         enum isl_format format =
-            anv_isl_format_for_descriptor_type(desc->type);
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+            surface_state = desc->buffer_view->surface_state;
+            assert(surface_state.alloc_size);
+            if (need_client_mem_relocs) {
+               add_surface_reloc(cmd_buffer, surface_state,
+                                 desc->buffer_view->address);
+            }
+            break;
+
+         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+            /* Compute the offset within the buffer */
+            struct anv_push_constants *push =
+               &cmd_buffer->state.push_constants[stage];
+
+            uint32_t dynamic_offset =
+               push->dynamic_offsets[binding->dynamic_offset_index];
+            uint64_t offset = desc->offset + dynamic_offset;
+            /* Clamp to the buffer size */
+            offset = MIN2(offset, desc->buffer->size);
+            /* Clamp the range to the buffer size */
+            uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
+
+            struct anv_address address =
+               anv_address_add(desc->buffer->address, offset);
+
+            surface_state =
+               anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
+            enum isl_format format =
+               anv_isl_format_for_descriptor_type(desc->type);
+
+            anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
+                                          format, address, range, 1);
+            if (need_client_mem_relocs)
+               add_surface_reloc(cmd_buffer, surface_state, address);
+            break;
+         }
 
-         anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
-                                       format, address, range, 1);
-         if (need_client_mem_relocs)
-            add_surface_reloc(cmd_buffer, surface_state, address);
-         break;
-      }
+         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+            surface_state = (binding->write_only)
+               ? desc->buffer_view->writeonly_storage_surface_state
+               : desc->buffer_view->storage_surface_state;
+            assert(surface_state.alloc_size);
+            if (need_client_mem_relocs) {
+               add_surface_reloc(cmd_buffer, surface_state,
+                                 desc->buffer_view->address);
+            }
+            break;
 
-      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-         surface_state = (binding->write_only)
-            ? desc->buffer_view->writeonly_storage_surface_state
-            : desc->buffer_view->storage_surface_state;
-         assert(surface_state.alloc_size);
-         if (need_client_mem_relocs) {
-            add_surface_reloc(cmd_buffer, surface_state,
-                              desc->buffer_view->address);
+         default:
+            assert(!"Invalid descriptor type");
+            continue;
          }
+         bt_map[s] = surface_state.offset + state_offset;
          break;
-
-      default:
-         assert(!"Invalid descriptor type");
-         continue;
       }
-
-      bt_map[s] = surface_state.offset + state_offset;
+      }
    }
 
    return VK_SUCCESS;
@@ -2534,10 +2538,32 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
             const struct anv_pipeline_bind_map *bind_map =
                &pipeline->shaders[stage]->bind_map;
 
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+            unsigned buffer_count = 0;
             for (unsigned i = 0; i < 4; i++) {
                const struct anv_push_range *range = &bind_map->push_ranges[i];
-               if (range->length == 0)
-                  continue;
+               if (range->length > 0)
+                  buffer_count++;
+            }
+
+            /* The Skylake PRM contains the following restriction:
+             *
+             *    "The driver must ensure The following case does not occur
+             *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+             *     buffer 3 read length equal to zero committed followed by a
+             *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+             *     zero committed."
+             *
+             * To avoid this, we program the buffers in the highest slots.
+             * This way, slot 0 is only used if slot 3 is also used.
+             */
+            assert(buffer_count <= 4);
+            const unsigned shift = 4 - buffer_count;
+            for (unsigned i = 0; i < buffer_count; i++) {
+               const struct anv_push_range *range = &bind_map->push_ranges[i];
+
+               /* At this point we only have non-empty ranges */
+               assert(range->length > 0);
 
                struct anv_address addr;
                switch (range->set) {
@@ -2583,10 +2609,26 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
                }
                }
 
-               c.ConstantBody.ReadLength[i] = range->length;
-               c.ConstantBody.Buffer[i] =
+               c.ConstantBody.ReadLength[i + shift] = range->length;
+               c.ConstantBody.Buffer[i + shift] =
                   anv_address_add(addr, range->start * 32);
             }
+#else
+            /* For Ivy Bridge, push constants are relative to dynamic state
+             * base address and we only ever push actual push constants.
+             */
+            if (bind_map->push_ranges[0].length > 0) {
+               assert(bind_map->push_ranges[0].set ==
+                      ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
+               struct anv_state state =
+                  anv_cmd_buffer_push_constants(cmd_buffer, stage);
+               c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
+               c.ConstantBody.Buffer[0].offset = state.offset;
+            }
+            assert(bind_map->push_ranges[1].length == 0);
+            assert(bind_map->push_ranges[2].length == 0);
+            assert(bind_map->push_ranges[3].length == 0);
+#endif
          }
       }
 
@@ -2720,11 +2762,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
    if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
 
-      /* The exact descriptor layout is pulled from the pipeline, so we need
-       * to re-emit binding tables on every pipeline change.
-       */
-      cmd_buffer->state.descriptors_dirty |= pipeline->active_stages;
-
       /* If the pipeline changed, we may need to re-allocate push constant
        * space in the URB.
        */
@@ -2811,7 +2848,8 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
          .VertexBufferIndex = index,
          .AddressModifyEnable = true,
          .BufferPitch = 0,
-         .MOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
+         .MOCS = addr.bo ? anv_mocs_for_bo(cmd_buffer->device, addr.bo) : 0,
+         .NullVertexBuffer = size == 0,
 #if (GEN_GEN >= 8)
          .BufferStartingAddress = addr,
          .BufferSize = size
@@ -2826,25 +2864,29 @@ static void
 emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer,
                              struct anv_address addr)
 {
-   emit_vertex_bo(cmd_buffer, addr, 8, ANV_SVGS_VB_INDEX);
+   emit_vertex_bo(cmd_buffer, addr, addr.bo ? 8 : 0, ANV_SVGS_VB_INDEX);
 }
 
 static void
 emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer,
                           uint32_t base_vertex, uint32_t base_instance)
 {
-   struct anv_state id_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
+   if (base_vertex == 0 && base_instance == 0) {
+      emit_base_vertex_instance_bo(cmd_buffer, ANV_NULL_ADDRESS);
+   } else {
+      struct anv_state id_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
 
-   ((uint32_t *)id_state.map)[0] = base_vertex;
-   ((uint32_t *)id_state.map)[1] = base_instance;
+      ((uint32_t *)id_state.map)[0] = base_vertex;
+      ((uint32_t *)id_state.map)[1] = base_instance;
 
-   struct anv_address addr = {
-      .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
-      .offset = id_state.offset,
-   };
+      struct anv_address addr = {
+         .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         .offset = id_state.offset,
+      };
 
-   emit_base_vertex_instance_bo(cmd_buffer, addr);
+      emit_base_vertex_instance_bo(cmd_buffer, addr);
+   }
 }
 
 static void
@@ -3534,6 +3576,11 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+
+      /* The workgroup size of the pipeline affects our push constant layout
+       * so flag push constants as dirty if we change the pipeline.
+       */
+      cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
    }
 
    if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
@@ -4090,6 +4137,20 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
 
    isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
 
+   if (GEN_GEN >= 12) {
+      /* GEN:BUG:1408224581
+       *
+       * Workaround: Gen12LP Astep only An additional pipe control with
+       * post-sync = store dword operation would be required.( w/a is to
+       * have an additional pipe control after the stencil state whenever
+       * the surface state bits of this state is changing).
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.PostSyncOperation = WriteImmediateData;
+         pc.Address =
+            (struct anv_address) { cmd_buffer->device->workaround_bo, 0 };
+      }
+   }
    cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ;
 }